2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 #include "langhooks.h"
151 /*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
155 /* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162 struct loop *slpeel_tree_peel_loop_to_edge
163 (struct loop *, struct loops *, edge, tree, tree, bool);
164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop *, struct loops *, edge);
166 static void slpeel_update_phis_for_duplicate_loop
167 (struct loop *, struct loop *, bool after);
168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
172 static void allocate_new_names (bitmap);
173 static void rename_use_op (use_operand_p);
174 static void rename_def_op (def_operand_p, tree);
175 static void rename_variables_in_bb (basic_block);
176 static void free_new_names (bitmap);
177 static void rename_variables_in_loop (struct loop *);
178 #ifdef ENABLE_CHECKING
179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
183 /*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
187 /* Main analysis functions. */
188 static loop_vec_info vect_analyze_loop (struct loop *);
189 static loop_vec_info vect_analyze_loop_form (struct loop *);
190 static bool vect_analyze_data_refs (loop_vec_info);
191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
192 static bool vect_analyze_scalar_cycles (loop_vec_info);
193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
195 static bool vect_compute_data_refs_alignment (loop_vec_info);
196 static bool vect_analyze_operations (loop_vec_info);
198 /* Main code transformation functions. */
199 static void vect_transform_loop (loop_vec_info, struct loops *);
200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
205 static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference *);
207 static void vect_align_data_ref (tree);
208 static void vect_enhance_data_refs_alignment (loop_vec_info);
210 /* Utility functions for the analyses. */
211 static bool vect_is_simple_use (tree , struct loop *, tree *);
212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
214 static void vect_mark_relevant (varray_type *, tree);
215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
216 static tree vect_get_loop_niters (struct loop *, tree *);
217 static bool vect_compute_data_ref_alignment (struct data_reference *);
218 static bool vect_analyze_data_ref_access (struct data_reference *);
219 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
220 static struct data_reference * vect_analyze_pointer_ref_access
222 static bool vect_can_advance_ivs_p (struct loop *);
223 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
224 loop_vec_info, tree *, tree *, tree *,
226 static struct data_reference * vect_analyze_pointer_ref_access
228 static tree vect_get_ptr_offset (tree, tree, tree *);
229 static tree vect_get_memtag_and_dr
230 (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
231 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
233 static tree vect_strip_conversion (tree);
235 /* Utility functions for the code transformation. */
236 static tree vect_create_destination_var (tree, tree);
237 static tree vect_create_data_ref_ptr
238 (tree, block_stmt_iterator *, tree, tree *, bool);
239 static tree vect_create_index_for_vector_ref
240 (struct loop *, block_stmt_iterator *);
241 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
242 static tree get_vectype_for_scalar_type (tree);
243 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
244 static tree vect_get_vec_def_for_operand (tree, tree);
245 static tree vect_init_vector (tree, tree);
246 static void vect_finish_stmt_generation
247 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
249 /* Utility function dealing with loop peeling (not peeling itself). */
250 static void vect_generate_tmps_on_preheader
251 (loop_vec_info, tree *, tree *, tree *);
252 static tree vect_build_loop_niters (loop_vec_info);
253 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
254 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
255 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
256 static void vect_update_inits_of_drs (loop_vec_info, tree);
257 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
258 static void vect_do_peeling_for_loop_bound
259 (loop_vec_info, tree *, struct loops *);
261 /* Utilities for creation and deletion of vec_info structs. */
262 loop_vec_info new_loop_vec_info (struct loop *loop);
263 void destroy_loop_vec_info (loop_vec_info);
264 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
266 static bool vect_debug_stats (struct loop *loop);
267 static bool vect_debug_details (struct loop *loop);
270 /*************************************************************************
271 Simple Loop Peeling Utilities
273 Utilities to support loop peeling for vectorization purposes.
274 *************************************************************************/
277 /* For each definition in DEFINITIONS this function allocates
281 allocate_new_names (bitmap definitions)
286 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
288 tree def = ssa_name (ver);
289 tree *new_name_ptr = xmalloc (sizeof (tree));
291 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
293 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
294 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
296 SSA_NAME_AUX (def) = new_name_ptr;
301 /* Renames the use *OP_P. */
304 rename_use_op (use_operand_p op_p)
308 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
311 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
313 /* Something defined outside of the loop. */
317 /* An ordinary ssa name defined in the loop. */
319 SET_USE (op_p, *new_name_ptr);
323 /* Renames the def *OP_P in statement STMT. */
326 rename_def_op (def_operand_p op_p, tree stmt)
330 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
333 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
335 /* Something defined outside of the loop. */
339 /* An ordinary ssa name defined in the loop. */
341 SET_DEF (op_p, *new_name_ptr);
342 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
346 /* Renames the variables in basic block BB. */
349 rename_variables_in_bb (basic_block bb)
352 block_stmt_iterator bsi;
358 v_may_def_optype v_may_defs;
359 v_must_def_optype v_must_defs;
363 struct loop *loop = bb->loop_father;
365 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
366 rename_def_op (PHI_RESULT_PTR (phi), phi);
368 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
370 stmt = bsi_stmt (bsi);
371 get_stmt_operands (stmt);
372 ann = stmt_ann (stmt);
374 uses = USE_OPS (ann);
375 for (i = 0; i < NUM_USES (uses); i++)
376 rename_use_op (USE_OP_PTR (uses, i));
378 defs = DEF_OPS (ann);
379 for (i = 0; i < NUM_DEFS (defs); i++)
380 rename_def_op (DEF_OP_PTR (defs, i), stmt);
382 vuses = VUSE_OPS (ann);
383 for (i = 0; i < NUM_VUSES (vuses); i++)
384 rename_use_op (VUSE_OP_PTR (vuses, i));
386 v_may_defs = V_MAY_DEF_OPS (ann);
387 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
389 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
390 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
393 v_must_defs = V_MUST_DEF_OPS (ann);
394 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
396 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
397 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
401 FOR_EACH_EDGE (e, ei, bb->succs)
403 if (!flow_bb_inside_loop_p (loop, e->dest))
405 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
406 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
411 /* Releases the structures holding the new ssa names. */
414 free_new_names (bitmap definitions)
419 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
421 tree def = ssa_name (ver);
423 if (SSA_NAME_AUX (def))
425 free (SSA_NAME_AUX (def));
426 SSA_NAME_AUX (def) = NULL;
432 /* Renames variables in new generated LOOP. */
435 rename_variables_in_loop (struct loop *loop)
440 bbs = get_loop_body (loop);
442 for (i = 0; i < loop->num_nodes; i++)
443 rename_variables_in_bb (bbs[i]);
449 /* Update the PHI nodes of NEW_LOOP.
451 NEW_LOOP is a duplicate of ORIG_LOOP.
452 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
453 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
454 executes before it. */
457 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
458 struct loop *new_loop, bool after)
460 tree *new_name_ptr, new_ssa_name;
461 tree phi_new, phi_orig;
463 edge orig_loop_latch = loop_latch_edge (orig_loop);
464 edge orig_entry_e = loop_preheader_edge (orig_loop);
465 edge new_loop_exit_e = new_loop->exit_edges[0];
466 edge new_loop_entry_e = loop_preheader_edge (new_loop);
467 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
470 step 1. For each loop-header-phi:
471 Add the first phi argument for the phi in NEW_LOOP
472 (the one associated with the entry of NEW_LOOP)
474 step 2. For each loop-header-phi:
475 Add the second phi argument for the phi in NEW_LOOP
476 (the one associated with the latch of NEW_LOOP)
478 step 3. Update the phis in the successor block of NEW_LOOP.
480 case 1: NEW_LOOP was placed before ORIG_LOOP:
481 The successor block of NEW_LOOP is the header of ORIG_LOOP.
482 Updating the phis in the successor block can therefore be done
483 along with the scanning of the loop header phis, because the
484 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
485 phi nodes, organized in the same order.
487 case 2: NEW_LOOP was placed after ORIG_LOOP:
488 The successor block of NEW_LOOP is the original exit block of
489 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
490 We postpone updating these phis to a later stage (when
491 loop guards are added).
495 /* Scan the phis in the headers of the old and new loops
496 (they are organized in exactly the same order). */
498 for (phi_new = phi_nodes (new_loop->header),
499 phi_orig = phi_nodes (orig_loop->header);
501 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
504 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
505 add_phi_arg (phi_new, def, new_loop_entry_e);
508 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
509 if (TREE_CODE (def) != SSA_NAME)
512 new_name_ptr = SSA_NAME_AUX (def);
514 /* Something defined outside of the loop. */
517 /* An ordinary ssa name defined in the loop. */
518 new_ssa_name = *new_name_ptr;
519 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
521 /* step 3 (case 1). */
524 gcc_assert (new_loop_exit_e == orig_entry_e);
525 SET_PHI_ARG_DEF (phi_orig,
526 new_loop_exit_e->dest_idx,
533 /* Update PHI nodes for a guard of the LOOP.
536 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
537 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
538 originates from the guard-bb, skips LOOP and reaches the (unique) exit
539 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
540 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
541 LOOP header) before the guard code was added, and now it became a merge
542 point of two paths - the path that ends with the LOOP exit-edge, and
543 the path that ends with GUARD_EDGE.
545 This function creates and updates the relevant phi nodes to account for
546 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
547 1. Create phi nodes at NEW_MERGE_BB.
548 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
549 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
552 ===> The CFG before the guard-code was added:
554 if (exit_loop) goto update_bb : LOOP_header_bb
557 ==> The CFG after the guard-code was added:
559 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
561 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
566 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
567 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
568 organized in the same order.
569 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
572 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
573 "original" loop). FALSE if LOOP is an original loop (not a newly
574 created copy). The SSA_NAME_AUX fields of the defs in the original
575 loop are the corresponding new ssa-names used in the new duplicated
576 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
577 nodes in UPDATE_BB takes the original ssa-name, and which takes the
578 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
579 the LOOP-exit-edge takes the new-name, and the phi-arg that is
580 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
581 FALSE, it's the other way around.
585 slpeel_update_phi_nodes_for_guard (edge guard_edge,
590 tree orig_phi, new_phi, update_phi;
591 tree guard_arg, loop_arg;
592 basic_block new_merge_bb = guard_edge->dest;
593 edge e = EDGE_SUCC (new_merge_bb, 0);
594 basic_block update_bb = e->dest;
595 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
597 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
598 orig_phi && update_phi;
599 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
601 /* 1. Generate new phi node in NEW_MERGE_BB: */
602 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
605 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
606 of LOOP. Set the two phi args in NEW_PHI for these edges: */
609 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
610 EDGE_SUCC (loop->latch, 0));
611 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
615 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
616 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
620 new_name = *new_name_ptr;
622 /* Something defined outside of the loop */
627 guard_arg = orig_def;
632 guard_arg = new_name;
636 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
637 add_phi_arg (new_phi, guard_arg, guard_edge);
639 /* 3. Update phi in successor block. */
640 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
641 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
642 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
645 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
649 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
650 that starts at zero, increases by one and its limit is NITERS.
652 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
655 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
657 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
659 edge exit_edge = loop->exit_edges[0];
660 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
661 tree begin_label = tree_block_label (loop->latch);
662 tree exit_label = tree_block_label (loop->single_exit->dest);
663 tree init = build_int_cst (TREE_TYPE (niters), 0);
664 tree step = build_int_cst (TREE_TYPE (niters), 1);
668 orig_cond = get_loop_exit_condition (loop);
669 gcc_assert (orig_cond);
670 create_iv (init, step, NULL_TREE, loop,
671 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
673 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
674 back to the exit condition statement. */
675 bsi_next (&loop_exit_bsi);
676 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
678 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
680 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
681 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
682 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
684 else /* 'then' edge loops back. */
686 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
687 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
688 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
691 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
692 then_label, else_label);
693 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
695 /* Remove old loop exit test: */
696 bsi_remove (&loop_exit_bsi);
698 if (vect_debug_stats (loop) || vect_debug_details (loop))
699 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
701 loop->nb_iterations = niters;
705 /* Given LOOP this function generates a new copy of it and puts it
706 on E which is either the entry or exit of LOOP. */
709 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
712 struct loop *new_loop;
713 basic_block *new_bbs, *bbs;
716 basic_block exit_dest;
719 at_exit = (e == loop->exit_edges[0]);
720 if (!at_exit && e != loop_preheader_edge (loop))
722 if (dump_file && (dump_flags & TDF_DETAILS))
723 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
727 bbs = get_loop_body (loop);
729 /* Check whether duplication is possible. */
730 if (!can_copy_bbs_p (bbs, loop->num_nodes))
732 if (vect_debug_stats (loop) || vect_debug_details (loop))
733 fprintf (dump_file, "Cannot copy basic blocks.\n");
738 /* Generate new loop structure. */
739 new_loop = duplicate_loop (loops, loop, loop->outer);
742 if (vect_debug_stats (loop) || vect_debug_details (loop))
743 fprintf (dump_file, "duplicate_loop returns NULL.\n");
748 exit_dest = loop->exit_edges[0]->dest;
749 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
750 exit_dest) == loop->header ?
753 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
755 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
757 /* Duplicating phi args at exit bbs as coming
758 also from exit of duplicated loop. */
759 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
761 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
764 edge new_loop_exit_edge;
766 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
767 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
769 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
771 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
775 if (at_exit) /* Add the loop copy at exit. */
777 redirect_edge_and_branch_force (e, new_loop->header);
778 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
780 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
782 else /* Add the copy at entry. */
785 edge entry_e = loop_preheader_edge (loop);
786 basic_block preheader = entry_e->src;
788 if (!flow_bb_inside_loop_p (new_loop,
789 EDGE_SUCC (new_loop->header, 0)->dest))
790 new_exit_e = EDGE_SUCC (new_loop->header, 0);
792 new_exit_e = EDGE_SUCC (new_loop->header, 1);
794 redirect_edge_and_branch_force (new_exit_e, loop->header);
795 set_immediate_dominator (CDI_DOMINATORS, loop->header,
798 /* We have to add phi args to the loop->header here as coming
799 from new_exit_e edge. */
800 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
802 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
804 add_phi_arg (phi, phi_arg, new_exit_e);
807 redirect_edge_and_branch_force (entry_e, new_loop->header);
808 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
811 flow_loop_scan (new_loop, LOOP_ALL);
812 flow_loop_scan (loop, LOOP_ALL);
820 /* Given the condition statement COND, put it as the last statement
821 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
822 Assumes that this is the single exit of the guarded loop.
823 Returns the skip edge. */
826 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
829 block_stmt_iterator bsi;
831 tree cond_stmt, then_label, else_label;
833 enter_e = EDGE_SUCC (guard_bb, 0);
834 enter_e->flags &= ~EDGE_FALLTHRU;
835 enter_e->flags |= EDGE_FALSE_VALUE;
836 bsi = bsi_last (guard_bb);
838 then_label = build1 (GOTO_EXPR, void_type_node,
839 tree_block_label (exit_bb));
840 else_label = build1 (GOTO_EXPR, void_type_node,
841 tree_block_label (enter_e->dest));
842 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
843 then_label, else_label);
844 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
845 /* Add new edge to connect entry block to the second loop. */
846 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
847 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
852 /* This function verifies that the following restrictions apply to LOOP:
854 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
855 (3) it is single entry, single exit
856 (4) its exit condition is the last stmt in the header
857 (5) E is the entry/exit edge of LOOP.
861 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
863 edge exit_e = loop->exit_edges [0];
864 edge entry_e = loop_preheader_edge (loop);
865 tree orig_cond = get_loop_exit_condition (loop);
866 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
868 if (any_marked_for_rewrite_p ())
872 /* All loops have an outer scope; the only case loop->outer is NULL is for
873 the function itself. */
875 || loop->num_nodes != 2
876 || !empty_block_p (loop->latch)
877 || loop->num_exits != 1
878 || loop->num_entries != 1
879 /* Verify that new loop exit condition can be trivially modified. */
880 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
881 || (e != exit_e && e != entry_e))
887 #ifdef ENABLE_CHECKING
889 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
890 struct loop *second_loop)
892 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
893 basic_block loop2_entry_bb = second_loop->pre_header;
894 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
896 /* A guard that controls whether the second_loop is to be executed or skipped
897 is placed in first_loop->exit. first_loopt->exit therefore has two
898 successors - one is the preheader of second_loop, and the other is a bb
901 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
904 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
907 /* The preheader of new_loop is expected to have two predessors:
908 first_loop->exit and the block that precedes first_loop. */
910 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
911 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
912 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
913 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
914 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
916 /* Verify that the other successor of first_loopt->exit is after the
922 /* Function slpeel_tree_peel_loop_to_edge.
924 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
925 that is placed on the entry (exit) edge E of LOOP. After this transformation
926 we have two loops one after the other - first-loop iterates FIRST_NITERS
927 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
930 - LOOP: the loop to be peeled.
931 - E: the exit or entry edge of LOOP.
932 If it is the entry edge, we peel the first iterations of LOOP. In this
933 case first-loop is LOOP, and second-loop is the newly created loop.
934 If it is the exit edge, we peel the last iterations of LOOP. In this
935 case, first-loop is the newly created loop, and second-loop is LOOP.
936 - NITERS: the number of iterations that LOOP iterates.
937 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
938 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
939 for updating the loop bound of the first-loop to FIRST_NITERS. If it
940 is false, the caller of this function may want to take care of this
941 (this can be useful if we don't want new stmts added to first-loop).
944 The function returns a pointer to the new loop-copy, or NULL if it failed
945 to perform the transformation.
947 The function generates two if-then-else guards: one before the first loop,
948 and the other before the second loop:
950 if (FIRST_NITERS == 0) then skip the first loop,
951 and go directly to the second loop.
953 if (FIRST_NITERS == NITERS) then skip the second loop.
955 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
956 FORNOW the resulting code will not be in loop-closed-ssa form.
960 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
961 edge e, tree first_niters,
962 tree niters, bool update_first_loop_count)
964 struct loop *new_loop = NULL, *first_loop, *second_loop;
968 basic_block bb_before_second_loop, bb_after_second_loop;
969 basic_block bb_before_first_loop;
970 basic_block bb_between_loops;
971 edge exit_e = loop->exit_edges [0];
973 if (!slpeel_can_duplicate_loop_p (loop, e))
976 /* We have to initialize cfg_hooks. Then, when calling
977 cfg_hooks->split_edge, the function tree_split_edge
978 is actually called and, when calling cfg_hooks->duplicate_block,
979 the function tree_duplicate_bb is called. */
980 tree_register_cfg_hooks ();
983 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
984 Resulting CFG would be:
997 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
999 if (vect_debug_stats (loop) || vect_debug_details (loop))
1000 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1006 /* NEW_LOOP was placed after LOOP. */
1008 second_loop = new_loop;
1012 /* NEW_LOOP was placed before LOOP. */
1013 first_loop = new_loop;
1017 definitions = marked_ssa_names ();
1018 allocate_new_names (definitions);
1019 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1020 rename_variables_in_loop (new_loop);
1023 /* 2. Add the guard that controls whether the first loop is executed.
1024 Resulting CFG would be:
1026 bb_before_first_loop:
1027 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1034 bb_before_second_loop:
1043 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1044 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1045 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1046 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1047 flow_loop_scan (first_loop, LOOP_ALL);
1048 flow_loop_scan (second_loop, LOOP_ALL);
1051 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1052 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1053 bb_before_second_loop, bb_before_first_loop);
1054 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1055 first_loop == new_loop);
1058 /* 3. Add the guard that controls whether the second loop is executed.
1059 Resulting CFG would be:
1061 bb_before_first_loop:
1062 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1070 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1071 GOTO bb_before_second_loop
1073 bb_before_second_loop:
1079 bb_after_second_loop:
1084 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1085 add_bb_to_loop (bb_between_loops, first_loop->outer);
1086 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1087 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1088 flow_loop_scan (first_loop, LOOP_ALL);
1089 flow_loop_scan (second_loop, LOOP_ALL);
1091 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1092 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1093 bb_after_second_loop, bb_before_first_loop);
1094 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1095 second_loop == new_loop);
1097 /* Flow loop scan does not update loop->single_exit field. */
1098 first_loop->single_exit = first_loop->exit_edges[0];
1099 second_loop->single_exit = second_loop->exit_edges[0];
1101 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1103 if (update_first_loop_count)
1104 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1106 free_new_names (definitions);
1107 BITMAP_XFREE (definitions);
1108 unmark_all_for_rewrite ();
1114 /* Here the proper Vectorizer starts. */
1116 /*************************************************************************
1117 Vectorization Utilities.
1118 *************************************************************************/
1120 /* Function new_stmt_vec_info.
1122 Create and initialize a new stmt_vec_info struct for STMT. */
1125 new_stmt_vec_info (tree stmt, struct loop *loop)
1128 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1130 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1131 STMT_VINFO_STMT (res) = stmt;
1132 STMT_VINFO_LOOP (res) = loop;
1133 STMT_VINFO_RELEVANT_P (res) = 0;
1134 STMT_VINFO_VECTYPE (res) = NULL;
1135 STMT_VINFO_VEC_STMT (res) = NULL;
1136 STMT_VINFO_DATA_REF (res) = NULL;
1137 STMT_VINFO_MEMTAG (res) = NULL;
1138 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1139 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1140 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1141 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1142 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1148 /* Function new_loop_vec_info.
1150 Create and initialize a new loop_vec_info struct for LOOP, as well as
1151 stmt_vec_info structs for all the stmts in LOOP. */
1154 new_loop_vec_info (struct loop *loop)
1158 block_stmt_iterator si;
1161 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1163 bbs = get_loop_body (loop);
1165 /* Create stmt_info for all stmts in the loop. */
1166 for (i = 0; i < loop->num_nodes; i++)
1168 basic_block bb = bbs[i];
1169 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1171 tree stmt = bsi_stmt (si);
1174 get_stmt_operands (stmt);
1175 ann = stmt_ann (stmt);
1176 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1180 LOOP_VINFO_LOOP (res) = loop;
1181 LOOP_VINFO_BBS (res) = bbs;
1182 LOOP_VINFO_EXIT_COND (res) = NULL;
1183 LOOP_VINFO_NITERS (res) = NULL;
1184 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1185 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1186 LOOP_VINFO_VECT_FACTOR (res) = 0;
1187 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1188 "loop_write_datarefs");
1189 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1190 "loop_read_datarefs");
1191 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1197 /* Function destroy_loop_vec_info.
1199 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1200 stmts in the loop. */
1203 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1208 block_stmt_iterator si;
1214 loop = LOOP_VINFO_LOOP (loop_vinfo);
1216 bbs = LOOP_VINFO_BBS (loop_vinfo);
1217 nbbs = loop->num_nodes;
1219 for (j = 0; j < nbbs; j++)
1221 basic_block bb = bbs[j];
1222 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1224 tree stmt = bsi_stmt (si);
1225 stmt_ann_t ann = stmt_ann (stmt);
1226 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1228 set_stmt_info (ann, NULL);
1232 free (LOOP_VINFO_BBS (loop_vinfo));
1233 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1234 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1240 /* Function debug_loop_stats.
1242 For vectorization statistics dumps. */
1245 vect_debug_stats (struct loop *loop)
1248 block_stmt_iterator si;
1249 tree node = NULL_TREE;
1251 if (!dump_file || !(dump_flags & TDF_STATS))
1256 fprintf (dump_file, "\n");
1265 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1267 node = bsi_stmt (si);
1268 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1272 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1273 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1275 fprintf (dump_file, "\nloop at %s:%d: ",
1276 EXPR_FILENAME (node), EXPR_LINENO (node));
1284 /* Function debug_loop_details.
1286 For vectorization debug dumps. */
1289 vect_debug_details (struct loop *loop)
1292 block_stmt_iterator si;
1293 tree node = NULL_TREE;
1295 if (!dump_file || !(dump_flags & TDF_DETAILS))
1300 fprintf (dump_file, "\n");
1309 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1311 node = bsi_stmt (si);
1312 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1316 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1317 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1319 fprintf (dump_file, "\nloop at %s:%d: ",
1320 EXPR_FILENAME (node), EXPR_LINENO (node));
1328 /* Function vect_get_ptr_offset
1330 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1333 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1334 tree vectype ATTRIBUTE_UNUSED,
1335 tree *offset ATTRIBUTE_UNUSED)
1337 /* TODO: Use alignment information. */
1342 /* Function vect_strip_conversions
1344 Strip conversions that don't narrow the mode. */
1347 vect_strip_conversion (tree expr)
1349 tree to, ti, oprnd0;
1351 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1353 to = TREE_TYPE (expr);
1354 oprnd0 = TREE_OPERAND (expr, 0);
1355 ti = TREE_TYPE (oprnd0);
1357 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1359 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1368 /* Function vect_analyze_offset_expr
1370 Given an offset expression EXPR received from get_inner_reference, analyze
1371 it and create an expression for INITIAL_OFFSET by substituting the variables
1372 of EXPR with initial_condition of the corresponding access_fn in the loop.
1375 for (j = 3; j < N; j++)
1378 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1379 substituted, since its access_fn in the inner loop is i. 'j' will be
1380 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1383 Compute MISALIGN (the misalignment of the data reference initial access from
1384 its base) if possible. Misalignment can be calculated only if all the
1385 variables can be substituted with constants, or if a variable is multiplied
1386 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1387 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1388 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1389 VECTYPE_ALIGNMENT computation in the caller of this function).
1391 STEP is an evolution of the data reference in this loop in bytes.
1392 In the above example, STEP is C_j.
1394 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1395 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1396 are NULL_TREEs. Otherwise, return TRUE.
1401 vect_analyze_offset_expr (tree expr,
1403 tree vectype_alignment,
1404 tree *initial_offset,
1410 tree left_offset = size_zero_node;
1411 tree right_offset = size_zero_node;
1412 tree left_misalign = size_zero_node;
1413 tree right_misalign = size_zero_node;
1414 tree left_step = size_zero_node;
1415 tree right_step = size_zero_node;
1416 enum tree_code code;
1417 tree init, evolution;
1420 *misalign = NULL_TREE;
1421 *initial_offset = NULL_TREE;
1423 /* Strip conversions that don't narrow the mode. */
1424 expr = vect_strip_conversion (expr);
1430 if (TREE_CODE (expr) == INTEGER_CST)
1432 *initial_offset = fold_convert (sizetype, expr);
1433 *misalign = fold_convert (sizetype, expr);
1434 *step = size_zero_node;
1438 /* 2. Variable. Try to substitute with initial_condition of the corresponding
1439 access_fn in the current loop. */
1440 if (SSA_VAR_P (expr))
1442 tree access_fn = analyze_scalar_evolution (loop, expr);
1444 if (access_fn == chrec_dont_know)
1448 init = initial_condition_in_loop_num (access_fn, loop->num);
1449 if (init == expr && !expr_invariant_in_loop_p (loop, init))
1450 /* Not enough information: may be not loop invariant.
1451 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1452 initial_condition is D, but it depends on i - loop's induction
1456 evolution = evolution_part_in_loop_num (access_fn, loop->num);
1457 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1458 /* Evolution is not constant. */
1461 if (TREE_CODE (init) == INTEGER_CST)
1462 *misalign = fold_convert (sizetype, init);
1464 /* Not constant, misalignment cannot be calculated. */
1465 *misalign = NULL_TREE;
1467 *initial_offset = fold_convert (sizetype, init);
1469 *step = evolution ? fold_convert (sizetype, evolution) : size_zero_node;
1473 /* Recursive computation. */
1474 if (!BINARY_CLASS_P (expr))
1476 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
1477 if (vect_debug_details (NULL))
1479 fprintf (dump_file, "Not binary expression ");
1480 print_generic_expr (dump_file, expr, TDF_SLIM);
1484 oprnd0 = TREE_OPERAND (expr, 0);
1485 oprnd1 = TREE_OPERAND (expr, 1);
1487 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1488 &left_misalign, &left_step)
1489 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1490 &right_offset, &right_misalign, &right_step))
1493 /* The type of the operation: plus, minus or mult. */
1494 code = TREE_CODE (expr);
1498 if (TREE_CODE (right_offset) != INTEGER_CST)
1499 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1501 FORNOW: We don't support such cases. */
1504 /* Strip conversions that don't narrow the mode. */
1505 left_offset = vect_strip_conversion (left_offset);
1508 /* Misalignment computation. */
1509 if (SSA_VAR_P (left_offset))
1511 /* If the left side contains variable that cannot be substituted with
1512 constant, we check if the right side is a multiple of ALIGNMENT. */
1513 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1514 vectype_alignment)))
1515 *misalign = size_zero_node;
1517 /* If the remainder is not zero or the right side isn't constant, we
1518 can't compute misalignment. */
1519 *misalign = NULL_TREE;
1523 /* The left operand was successfully substituted with constant. */
1525 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1527 *misalign = size_binop (code, left_misalign, right_misalign);
1529 *misalign = NULL_TREE;
1532 /* Step calculation. */
1533 /* Multiply the step by the right operand. */
1534 *step = size_binop (MULT_EXPR, left_step, right_offset);
1539 /* Combine the recursive calculations for step and misalignment. */
1540 *step = size_binop (code, left_step, right_step);
1542 if (left_misalign && right_misalign)
1543 *misalign = size_binop (code, left_misalign, right_misalign);
1545 *misalign = NULL_TREE;
1553 /* Compute offset. */
1554 *initial_offset = fold_convert (sizetype,
1555 fold (build2 (code, TREE_TYPE (left_offset),
1562 /* Function vect_get_base_and_offset
1564 Return the BASE of the data reference EXPR.
1565 If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1567 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1568 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1569 instantiated with initial_conditions of access_functions of variables,
1570 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1572 Function get_inner_reference is used for the above in case of ARRAY_REF and
1576 EXPR - the memory reference that is being analyzed
1577 DR - the data_reference struct of the _original_ memory reference
1578 (Note: DR_REF (DR) is not necessarily EXPR)
1579 VECTYPE - the type that defines the alignment (i.e, we compute
1580 alignment relative to TYPE_ALIGN(VECTYPE))
1583 BASE (returned value) - the base of the data reference EXPR.
1584 E.g, if EXPR is a.b[k].c[i][j] the returned
1586 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1587 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1588 computation is impossible
1589 STEP - evolution of the DR_REF in the loop
1590 BASE_ALIGNED_P - indicates if BASE is aligned
1592 If something unexpected is encountered (an unsupported form of data-ref),
1593 then NULL_TREE is returned. */
1596 vect_get_base_and_offset (struct data_reference *dr,
1599 loop_vec_info loop_vinfo,
1600 tree *initial_offset,
1603 bool *base_aligned_p)
1605 tree this_offset = size_zero_node;
1606 tree this_misalign = size_zero_node;
1607 tree this_step = size_zero_node;
1608 tree base = NULL_TREE;
1610 tree oprnd0, oprnd1;
1611 enum tree_code code = TREE_CODE (expr);
1612 HOST_WIDE_INT pbitsize;
1613 HOST_WIDE_INT pbitpos;
1615 enum machine_mode pmode;
1616 int punsignedp, pvolatilep;
1617 tree bit_pos_in_bytes;
1618 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1620 *base_aligned_p = false;
1624 /* These cases end the recursion: */
1627 *initial_offset = size_zero_node;
1628 *step = size_zero_node;
1629 *misalign = size_zero_node;
1630 if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1631 *base_aligned_p = true;
1635 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1638 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1640 base = vect_get_ptr_offset (expr, vectype, misalign);
1642 *base_aligned_p = true;
1646 *base_aligned_p = true;
1647 *misalign = size_zero_node;
1649 *initial_offset = size_zero_node;
1650 *step = size_zero_node;
1654 *initial_offset = fold_convert (sizetype, expr);
1655 *misalign = fold_convert (sizetype, expr);
1656 *step = size_zero_node;
1659 /* These cases continue the recursion: */
1661 oprnd0 = TREE_OPERAND (expr, 0);
1666 oprnd0 = TREE_OPERAND (expr, 0);
1672 oprnd0 = TREE_OPERAND (expr, 0);
1673 oprnd1 = TREE_OPERAND (expr, 1);
1675 /* In case we have a PLUS_EXPR of the form
1676 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1677 This is verified in vect_get_memtag_and_dr. */
1678 base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1679 &this_offset, &this_misalign,
1680 &this_step, base_aligned_p);
1681 /* Offset was already computed in vect_analyze_pointer_ref_access. */
1682 this_offset = size_zero_node;
1685 this_misalign = NULL_TREE;
1691 if (!handled_component_p (expr))
1692 /* Unsupported expression. */
1695 /* Find the base and the offset from it. */
1696 next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1697 &pmode, &punsignedp, &pvolatilep, false);
1702 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1703 &this_offset, &this_misalign,
1706 /* Failed to compute offset or step. */
1708 *initial_offset = NULL_TREE;
1709 *misalign = NULL_TREE;
1713 /* Add bit position to OFFSET and MISALIGN. */
1715 bit_pos_in_bytes = size_int (pbitpos/BITS_PER_UNIT);
1716 /* Check that there is no remainder in bits. */
1717 if (pbitpos%BITS_PER_UNIT)
1719 if (vect_debug_details (NULL))
1720 fprintf (dump_file, "bit offset alignment.");
1723 this_offset = fold (size_binop (PLUS_EXPR, bit_pos_in_bytes,
1724 fold_convert (sizetype, this_offset)));
1726 this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1728 /* Continue the recursion to refine the base (get_inner_reference returns
1729 &a for &a[i], and not a). */
1733 base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1734 initial_offset, misalign, step,
1738 /* Combine the results. */
1739 if (this_misalign && *misalign)
1740 *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1742 *misalign = NULL_TREE;
1744 *step = size_binop (PLUS_EXPR, *step, this_step);
1746 *initial_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (*initial_offset),
1747 *initial_offset, this_offset));
1749 if (vect_debug_details (NULL))
1751 print_generic_expr (dump_file, expr, TDF_SLIM);
1752 fprintf (dump_file, "\n --> total offset for ref: ");
1753 print_generic_expr (dump_file, *initial_offset, TDF_SLIM);
1754 fprintf (dump_file, "\n --> total misalign for ref: ");
1755 print_generic_expr (dump_file, *misalign, TDF_SLIM);
1756 fprintf (dump_file, "\n --> total step for ref: ");
1757 print_generic_expr (dump_file, *step, TDF_SLIM);
1764 /* Function vect_force_dr_alignment_p.
1766 Returns whether the alignment of a DECL can be forced to be aligned
1767 on ALIGNMENT bit boundary. */
1770 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1772 if (TREE_CODE (decl) != VAR_DECL)
1775 if (DECL_EXTERNAL (decl))
1778 if (TREE_ASM_WRITTEN (decl))
1781 if (TREE_STATIC (decl))
1782 return (alignment <= MAX_OFILE_ALIGNMENT);
1784 /* This is not 100% correct. The absolute correct stack alignment
1785 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1786 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1787 However, until someone implements forced stack alignment, SSE
1788 isn't really usable without this. */
1789 return (alignment <= PREFERRED_STACK_BOUNDARY);
1793 /* Function vect_get_new_vect_var.
1795 Returns a name for a new variable. The current naming scheme appends the
1796 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1797 the name of vectorizer generated variables, and appends that to NAME if
1801 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1807 if (var_kind == vect_simple_var)
1812 prefix_len = strlen (prefix);
1815 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1817 new_vect_var = create_tmp_var (type, prefix);
1819 return new_vect_var;
1823 /* Function vect_create_index_for_vector_ref.
1825 Create (and return) an index variable, along with it's update chain in the
1826 loop. This variable will be used to access a memory location in a vector
1830 LOOP: The loop being vectorized.
1831 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1832 function can be added here, or in the loop pre-header.
1835 Return an index that will be used to index a vector array. It is expected
1836 that a pointer to the first vector will be used as the base address for the
1839 FORNOW: we are not trying to be efficient, just creating a new index each
1840 time from scratch. At this time all vector references could use the same
1843 TODO: create only one index to be used by all vector references. Record
1844 the index in the LOOP_VINFO the first time this procedure is called and
1845 return it on subsequent calls. The increment of this index must be placed
1846 just before the conditional expression that ends the single block loop. */
1849 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1852 tree indx_before_incr, indx_after_incr;
1854 /* It is assumed that the base pointer used for vectorized access contains
1855 the address of the first vector. Therefore the index used for vectorized
1856 access must be initialized to zero and incremented by 1. */
1858 init = integer_zero_node;
1859 step = integer_one_node;
1861 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1862 create_iv (init, step, NULL_TREE, loop, bsi, false,
1863 &indx_before_incr, &indx_after_incr);
1865 return indx_before_incr;
1869 /* Function vect_create_addr_base_for_vector_ref.
1871 Create an expression that computes the address of the first memory location
1872 that will be accessed for a data reference.
1875 STMT: The statement containing the data reference.
1876 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1877 OFFSET: Optional. If supplied, it is be added to the initial address.
1880 1. Return an SSA_NAME whose value is the address of the memory location of
1881 the first vector of the data reference.
1882 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1883 these statement(s) which define the returned SSA_NAME.
1885 FORNOW: We are only handling array accesses with step 1. */
1888 vect_create_addr_base_for_vector_ref (tree stmt,
1889 tree *new_stmt_list,
1892 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1893 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1894 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1895 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1896 tree ref = DR_REF (dr);
1897 tree scalar_type = TREE_TYPE (ref);
1898 tree scalar_ptr_type = build_pointer_type (scalar_type);
1901 tree addr_base, addr_expr;
1902 tree dest, new_stmt;
1903 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1905 if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1906 /* After the analysis stage, we expect to get here only with RECORD_TYPE
1908 /* Add '&' to ref_base. */
1909 data_ref_base = build_fold_addr_expr (data_ref_base);
1912 /* Create '(scalar_type*) base' for pointers. */
1913 tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1914 tree scalar_array_type = build_array_type (scalar_type, 0);
1915 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1916 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1917 add_referenced_tmp_var (array_ptr);
1919 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1920 add_referenced_tmp_var (dest);
1921 tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1922 append_to_statement_list_force (new_stmt, new_stmt_list);
1924 vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1925 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1926 new_temp = make_ssa_name (array_ptr, vec_stmt);
1927 TREE_OPERAND (vec_stmt, 0) = new_temp;
1928 append_to_statement_list_force (vec_stmt, new_stmt_list);
1929 data_ref_base = new_temp;
1932 /* Create base_offset */
1933 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
1934 add_referenced_tmp_var (dest);
1935 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
1936 append_to_statement_list_force (new_stmt, new_stmt_list);
1940 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
1941 add_referenced_tmp_var (tmp);
1942 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
1943 STMT_VINFO_VECT_STEP (stmt_info)));
1944 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset), base_offset,
1946 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
1947 append_to_statement_list_force (new_stmt, new_stmt_list);
1950 /* base + base_offset */
1951 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
1954 /* addr_expr = addr_base */
1955 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1956 get_name (base_name));
1957 add_referenced_tmp_var (addr_expr);
1958 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1959 new_temp = make_ssa_name (addr_expr, vec_stmt);
1960 TREE_OPERAND (vec_stmt, 0) = new_temp;
1961 append_to_statement_list_force (vec_stmt, new_stmt_list);
1963 if (vect_debug_details (NULL))
1965 fprintf (dump_file, "created ");
1966 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1967 fprintf (dump_file, "\n");
1973 /* Function get_vectype_for_scalar_type.
1975 Returns the vector type corresponding to SCALAR_TYPE as supported
1979 get_vectype_for_scalar_type (tree scalar_type)
1981 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1982 int nbytes = GET_MODE_SIZE (inner_mode);
1989 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1991 nunits = UNITS_PER_SIMD_WORD / nbytes;
1993 vectype = build_vector_type (scalar_type, nunits);
1994 if (vect_debug_details (NULL))
1996 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1997 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2003 if (vect_debug_details (NULL))
2005 fprintf (dump_file, "vectype: ");
2006 print_generic_expr (dump_file, vectype, TDF_SLIM);
2009 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2011 /* TODO: tree-complex.c sometimes can parallelize operations
2012 on generic vectors. We can vectorize the loop in that case,
2013 but then we should re-run the lowering pass. */
2014 if (vect_debug_details (NULL))
2015 fprintf (dump_file, "mode not supported by target.");
2023 /* Function vect_align_data_ref.
2025 Handle mislignment of a memory accesses.
2027 FORNOW: Can't handle misaligned accesses.
2028 Make sure that the dataref is aligned. */
2031 vect_align_data_ref (tree stmt)
2033 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2034 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2036 /* FORNOW: can't handle misaligned accesses;
2037 all accesses expected to be aligned. */
2038 gcc_assert (aligned_access_p (dr));
2042 /* Function vect_create_data_ref_ptr.
2044 Create a memory reference expression for vector access, to be used in a
2045 vector load/store stmt. The reference is based on a new pointer to vector
2049 1. STMT: a stmt that references memory. Expected to be of the form
2050 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2051 2. BSI: block_stmt_iterator where new stmts can be added.
2052 3. OFFSET (optional): an offset to be added to the initial address accessed
2053 by the data-ref in STMT.
2054 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2055 pointing to the initial address.
2058 1. Declare a new ptr to vector_type, and have it point to the base of the
2059 data reference (initial addressed accessed by the data reference).
2060 For example, for vector of type V8HI, the following code is generated:
2063 vp = (v8hi *)initial_address;
2065 if OFFSET is not supplied:
2066 initial_address = &a[init];
2067 if OFFSET is supplied:
2068 initial_address = &a[init + OFFSET];
2070 Return the initial_address in INITIAL_ADDRESS.
2072 2. Create a data-reference in the loop based on the new vector pointer vp,
2073 and using a new index variable 'idx' as follows:
2077 where if ONLY_INIT is true:
2080 update = idx + vector_type_size
2082 Return the pointer vp'.
2085 FORNOW: handle only aligned and consecutive accesses. */
2088 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2089 tree *initial_address, bool only_init)
2092 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2093 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2094 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2095 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2099 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2100 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2101 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2102 int nvuses, nv_may_defs, nv_must_defs;
2106 tree new_stmt_list = NULL_TREE;
2108 edge pe = loop_preheader_edge (loop);
2114 tree type, tmp, size;
2116 base_name = unshare_expr (DR_BASE_NAME (dr));
2117 if (vect_debug_details (NULL))
2119 tree data_ref_base = base_name;
2120 fprintf (dump_file, "create array_ref of type: ");
2121 print_generic_expr (dump_file, vectype, TDF_SLIM);
2122 if (TREE_CODE (data_ref_base) == VAR_DECL)
2123 fprintf (dump_file, "\nvectorizing a one dimensional array ref: ");
2124 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2125 fprintf (dump_file, "\nvectorizing a multidimensional array ref: ");
2126 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2127 fprintf (dump_file, "\nvectorizing a record based array ref: ");
2128 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2129 fprintf (dump_file, "\nvectorizing a pointer ref: ");
2130 print_generic_expr (dump_file, base_name, TDF_SLIM);
2133 /** (1) Create the new vector-pointer variable: **/
2135 vect_ptr_type = build_pointer_type (vectype);
2136 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2137 get_name (base_name));
2138 add_referenced_tmp_var (vect_ptr);
2141 /** (2) Handle aliasing information of the new vector-pointer: **/
2143 tag = STMT_VINFO_MEMTAG (stmt_info);
2145 get_var_ann (vect_ptr)->type_mem_tag = tag;
2147 /* Mark for renaming all aliased variables
2148 (i.e, the may-aliases of the type-mem-tag). */
2149 nvuses = NUM_VUSES (vuses);
2150 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2151 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2152 for (i = 0; i < nvuses; i++)
2154 tree use = VUSE_OP (vuses, i);
2155 if (TREE_CODE (use) == SSA_NAME)
2156 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2158 for (i = 0; i < nv_may_defs; i++)
2160 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2161 if (TREE_CODE (def) == SSA_NAME)
2162 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2164 for (i = 0; i < nv_must_defs; i++)
2166 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2167 if (TREE_CODE (def) == SSA_NAME)
2168 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2172 /** (3) Calculate the initial address the vector-pointer, and set
2173 the vector-pointer to point to it before the loop: **/
2175 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2176 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2178 pe = loop_preheader_edge (loop);
2179 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2180 gcc_assert (!new_bb);
2181 *initial_address = new_temp;
2183 /* Create: p = (vectype *) initial_base */
2184 vec_stmt = fold_convert (vect_ptr_type, new_temp);
2185 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2186 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2187 TREE_OPERAND (vec_stmt, 0) = new_temp;
2188 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2189 gcc_assert (!new_bb);
2190 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2193 /** (4) Handle the updating of the vector-pointer inside the loop: **/
2195 if (only_init) /* No update in loop is required. */
2196 return vect_ptr_init;
2198 idx = vect_create_index_for_vector_ref (loop, bsi);
2200 /* Create: update = idx * vectype_size */
2201 tmp = create_tmp_var (integer_type_node, "update");
2202 add_referenced_tmp_var (tmp);
2203 size = TYPE_SIZE (vect_ptr_type);
2204 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2205 ptr_update = create_tmp_var (type, "update");
2206 add_referenced_tmp_var (ptr_update);
2207 vectype_size = TYPE_SIZE_UNIT (vectype);
2208 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2209 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2210 new_temp = make_ssa_name (tmp, vec_stmt);
2211 TREE_OPERAND (vec_stmt, 0) = new_temp;
2212 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2213 vec_stmt = fold_convert (type, new_temp);
2214 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2215 new_temp = make_ssa_name (ptr_update, vec_stmt);
2216 TREE_OPERAND (vec_stmt, 0) = new_temp;
2217 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2219 /* Create: data_ref_ptr = vect_ptr_init + update */
2220 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2221 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2222 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2223 TREE_OPERAND (vec_stmt, 0) = new_temp;
2224 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2225 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2227 return data_ref_ptr;
2231 /* Function vect_create_destination_var.
2233 Create a new temporary of type VECTYPE. */
2236 vect_create_destination_var (tree scalar_dest, tree vectype)
2239 const char *new_name;
2241 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2243 new_name = get_name (scalar_dest);
2246 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2247 add_referenced_tmp_var (vec_dest);
2253 /* Function vect_init_vector.
2255 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2256 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2257 used in the vectorization of STMT. */
2260 vect_init_vector (tree stmt, tree vector_var)
2262 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2263 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2266 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2272 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2273 add_referenced_tmp_var (new_var);
2275 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2276 new_temp = make_ssa_name (new_var, init_stmt);
2277 TREE_OPERAND (init_stmt, 0) = new_temp;
2279 pe = loop_preheader_edge (loop);
2280 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2281 gcc_assert (!new_bb);
2283 if (vect_debug_details (NULL))
2285 fprintf (dump_file, "created new init_stmt: ");
2286 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2289 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2294 /* Function vect_get_vec_def_for_operand.
2296 OP is an operand in STMT. This function returns a (vector) def that will be
2297 used in the vectorized stmt for STMT.
2299 In the case that OP is an SSA_NAME which is defined in the loop, then
2300 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2302 In case OP is an invariant or constant, a new stmt that creates a vector def
2303 needs to be introduced. */
2306 vect_get_vec_def_for_operand (tree op, tree stmt)
2311 stmt_vec_info def_stmt_info = NULL;
2312 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2313 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2314 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2315 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2322 if (vect_debug_details (NULL))
2324 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2325 print_generic_expr (dump_file, op, TDF_SLIM);
2328 /** ===> Case 1: operand is a constant. **/
2330 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2332 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2336 /* Build a tree with vector elements. */
2337 if (vect_debug_details (NULL))
2338 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2340 for (i = nunits - 1; i >= 0; --i)
2342 t = tree_cons (NULL_TREE, op, t);
2344 vec_cst = build_vector (vectype, t);
2345 return vect_init_vector (stmt, vec_cst);
2348 gcc_assert (TREE_CODE (op) == SSA_NAME);
2350 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2352 def_stmt = SSA_NAME_DEF_STMT (op);
2353 def_stmt_info = vinfo_for_stmt (def_stmt);
2355 if (vect_debug_details (NULL))
2357 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2358 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2362 /** ==> Case 2.1: operand is defined inside the loop. **/
2366 /* Get the def from the vectorized stmt. */
2368 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2369 gcc_assert (vec_stmt);
2370 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2375 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2376 it is a reduction/induction. **/
2378 bb = bb_for_stmt (def_stmt);
2379 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2381 if (vect_debug_details (NULL))
2382 fprintf (dump_file, "reduction/induction - unsupported.");
2383 internal_error ("no support for reduction/induction"); /* FORNOW */
2387 /** ==> Case 2.3: operand is defined outside the loop -
2388 it is a loop invariant. */
2390 switch (TREE_CODE (def_stmt))
2393 def = PHI_RESULT (def_stmt);
2396 def = TREE_OPERAND (def_stmt, 0);
2399 def = TREE_OPERAND (def_stmt, 0);
2400 gcc_assert (IS_EMPTY_STMT (def_stmt));
2404 if (vect_debug_details (NULL))
2406 fprintf (dump_file, "unsupported defining stmt: ");
2407 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2409 internal_error ("unsupported defining stmt");
2412 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2414 if (vect_debug_details (NULL))
2415 fprintf (dump_file, "Create vector_inv.");
2417 for (i = nunits - 1; i >= 0; --i)
2419 t = tree_cons (NULL_TREE, def, t);
2422 vec_inv = build_constructor (vectype, t);
2423 return vect_init_vector (stmt, vec_inv);
2427 /* Function vect_finish_stmt_generation.
2429 Insert a new stmt. */
2432 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2434 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2436 if (vect_debug_details (NULL))
2438 fprintf (dump_file, "add new stmt: ");
2439 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2442 /* Make sure bsi points to the stmt that is being vectorized. */
2444 /* Assumption: any stmts created for the vectorization of stmt S were
2445 inserted before S. BSI is expected to point to S or some new stmt before S.
2448 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2450 gcc_assert (stmt == bsi_stmt (*bsi));
2454 /* Function vectorizable_assignment.
2456 Check if STMT performs an assignment (copy) that can be vectorized.
2457 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2458 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2459 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2462 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2468 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2469 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2470 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2473 /* Is vectorizable assignment? */
2475 if (TREE_CODE (stmt) != MODIFY_EXPR)
2478 scalar_dest = TREE_OPERAND (stmt, 0);
2479 if (TREE_CODE (scalar_dest) != SSA_NAME)
2482 op = TREE_OPERAND (stmt, 1);
2483 if (!vect_is_simple_use (op, loop, NULL))
2485 if (vect_debug_details (NULL))
2486 fprintf (dump_file, "use not simple.");
2490 if (!vec_stmt) /* transformation not required. */
2492 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2497 if (vect_debug_details (NULL))
2498 fprintf (dump_file, "transform assignment.");
2501 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2504 op = TREE_OPERAND (stmt, 1);
2505 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2507 /* Arguments are ready. create the new vector stmt. */
2508 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2509 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2510 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2511 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2517 /* Function vectorizable_operation.
2519 Check if STMT performs a binary or unary operation that can be vectorized.
2520 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2521 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2522 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2525 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2530 tree op0, op1 = NULL;
2531 tree vec_oprnd0, vec_oprnd1=NULL;
2532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2533 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2534 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2536 enum tree_code code;
2537 enum machine_mode vec_mode;
2543 /* Is STMT a vectorizable binary/unary operation? */
2544 if (TREE_CODE (stmt) != MODIFY_EXPR)
2547 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2550 operation = TREE_OPERAND (stmt, 1);
2551 code = TREE_CODE (operation);
2552 optab = optab_for_tree_code (code, vectype);
2554 /* Support only unary or binary operations. */
2555 op_type = TREE_CODE_LENGTH (code);
2556 if (op_type != unary_op && op_type != binary_op)
2558 if (vect_debug_details (NULL))
2559 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2563 for (i = 0; i < op_type; i++)
2565 op = TREE_OPERAND (operation, i);
2566 if (!vect_is_simple_use (op, loop, NULL))
2568 if (vect_debug_details (NULL))
2569 fprintf (dump_file, "use not simple.");
2574 /* Supportable by target? */
2577 if (vect_debug_details (NULL))
2578 fprintf (dump_file, "no optab.");
2581 vec_mode = TYPE_MODE (vectype);
2582 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2584 if (vect_debug_details (NULL))
2585 fprintf (dump_file, "op not supported by target.");
2589 if (!vec_stmt) /* transformation not required. */
2591 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2597 if (vect_debug_details (NULL))
2598 fprintf (dump_file, "transform binary/unary operation.");
2601 scalar_dest = TREE_OPERAND (stmt, 0);
2602 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2605 op0 = TREE_OPERAND (operation, 0);
2606 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2608 if (op_type == binary_op)
2610 op1 = TREE_OPERAND (operation, 1);
2611 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2614 /* Arguments are ready. create the new vector stmt. */
2616 if (op_type == binary_op)
2617 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2618 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2620 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2621 build1 (code, vectype, vec_oprnd0));
2622 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2623 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2624 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2630 /* Function vectorizable_store.
2632 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2634 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2635 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2636 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2639 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2645 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2646 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2647 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2648 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2649 enum machine_mode vec_mode;
2651 enum dr_alignment_support alignment_support_cheme;
2653 /* Is vectorizable store? */
2655 if (TREE_CODE (stmt) != MODIFY_EXPR)
2658 scalar_dest = TREE_OPERAND (stmt, 0);
2659 if (TREE_CODE (scalar_dest) != ARRAY_REF
2660 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2663 op = TREE_OPERAND (stmt, 1);
2664 if (!vect_is_simple_use (op, loop, NULL))
2666 if (vect_debug_details (NULL))
2667 fprintf (dump_file, "use not simple.");
2671 vec_mode = TYPE_MODE (vectype);
2672 /* FORNOW. In some cases can vectorize even if data-type not supported
2673 (e.g. - array initialization with 0). */
2674 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2677 if (!STMT_VINFO_DATA_REF (stmt_info))
2681 if (!vec_stmt) /* transformation not required. */
2683 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2689 if (vect_debug_details (NULL))
2690 fprintf (dump_file, "transform store");
2692 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2693 gcc_assert (alignment_support_cheme);
2694 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2696 /* Handle use - get the vectorized def from the defining stmt. */
2697 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2700 /* FORNOW: make sure the data reference is aligned. */
2701 vect_align_data_ref (stmt);
2702 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2703 data_ref = build_fold_indirect_ref (data_ref);
2705 /* Arguments are ready. create the new vector stmt. */
2706 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2707 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2713 /* vectorizable_load.
2715 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2717 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2718 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2719 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2722 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2725 tree vec_dest = NULL;
2726 tree data_ref = NULL;
2728 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2729 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2730 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2737 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2738 edge pe = loop_preheader_edge (loop);
2739 enum dr_alignment_support alignment_support_cheme;
2741 /* Is vectorizable load? */
2743 if (TREE_CODE (stmt) != MODIFY_EXPR)
2746 scalar_dest = TREE_OPERAND (stmt, 0);
2747 if (TREE_CODE (scalar_dest) != SSA_NAME)
2750 op = TREE_OPERAND (stmt, 1);
2751 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2754 if (!STMT_VINFO_DATA_REF (stmt_info))
2757 mode = (int) TYPE_MODE (vectype);
2759 /* FORNOW. In some cases can vectorize even if data-type not supported
2760 (e.g. - data copies). */
2761 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2763 if (vect_debug_details (loop))
2764 fprintf (dump_file, "Aligned load, but unsupported type.");
2768 if (!vec_stmt) /* transformation not required. */
2770 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2776 if (vect_debug_details (NULL))
2777 fprintf (dump_file, "transform load.");
2779 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2780 gcc_assert (alignment_support_cheme);
2782 if (alignment_support_cheme == dr_aligned
2783 || alignment_support_cheme == dr_unaligned_supported)
2794 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2795 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2796 if (aligned_access_p (dr))
2797 data_ref = build_fold_indirect_ref (data_ref);
2800 int mis = DR_MISALIGNMENT (dr);
2801 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2802 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2803 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2805 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2806 new_temp = make_ssa_name (vec_dest, new_stmt);
2807 TREE_OPERAND (new_stmt, 0) = new_temp;
2808 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2810 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2814 msq_init = *(floor(p1))
2815 p2 = initial_addr + VS - 1;
2816 magic = have_builtin ? builtin_result : initial_address;
2819 p2' = p2 + indx * vectype_size
2821 vec_dest = realign_load (msq, lsq, magic)
2835 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2836 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2837 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2839 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2840 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2841 new_temp = make_ssa_name (vec_dest, new_stmt);
2842 TREE_OPERAND (new_stmt, 0) = new_temp;
2843 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2844 gcc_assert (!new_bb);
2845 msq_init = TREE_OPERAND (new_stmt, 0);
2848 /* <2> Create lsq = *(floor(p2')) in the loop */
2849 offset = build_int_cst (integer_type_node,
2850 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2851 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2852 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2853 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2854 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2855 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2856 new_temp = make_ssa_name (vec_dest, new_stmt);
2857 TREE_OPERAND (new_stmt, 0) = new_temp;
2858 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2859 lsq = TREE_OPERAND (new_stmt, 0);
2863 if (targetm.vectorize.builtin_mask_for_load)
2865 /* Create permutation mask, if required, in loop preheader. */
2867 params = build_tree_list (NULL_TREE, init_addr);
2868 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2869 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2870 new_stmt = build_function_call_expr (builtin_decl, params);
2871 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2872 new_temp = make_ssa_name (vec_dest, new_stmt);
2873 TREE_OPERAND (new_stmt, 0) = new_temp;
2874 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2875 gcc_assert (!new_bb);
2876 magic = TREE_OPERAND (new_stmt, 0);
2878 /* Since we have just created a CALL_EXPR, we may need to
2879 rename call-clobbered variables. */
2880 mark_call_clobbered_vars_to_rename ();
2884 /* Use current address instead of init_addr for reduced reg pressure.
2886 magic = dataref_ptr;
2890 /* <4> Create msq = phi <msq_init, lsq> in loop */
2891 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2892 msq = make_ssa_name (vec_dest, NULL_TREE);
2893 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2894 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2895 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2896 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2899 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2900 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2901 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2902 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2903 new_temp = make_ssa_name (vec_dest, new_stmt);
2904 TREE_OPERAND (new_stmt, 0) = new_temp;
2905 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2910 *vec_stmt = new_stmt;
2915 /* Function vect_supportable_dr_alignment
2917 Return whether the data reference DR is supported with respect to its
2920 static enum dr_alignment_support
2921 vect_supportable_dr_alignment (struct data_reference *dr)
2923 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2924 enum machine_mode mode = (int) TYPE_MODE (vectype);
2926 if (aligned_access_p (dr))
2929 /* Possibly unaligned access. */
2931 if (DR_IS_READ (dr))
2933 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2934 && (!targetm.vectorize.builtin_mask_for_load
2935 || targetm.vectorize.builtin_mask_for_load ()))
2936 return dr_unaligned_software_pipeline;
2938 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2939 /* Can't software pipeline the loads, but can at least do them. */
2940 return dr_unaligned_supported;
2944 return dr_unaligned_unsupported;
2948 /* Function vect_transform_stmt.
2950 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2953 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2955 bool is_store = false;
2956 tree vec_stmt = NULL_TREE;
2957 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2960 switch (STMT_VINFO_TYPE (stmt_info))
2962 case op_vec_info_type:
2963 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2967 case assignment_vec_info_type:
2968 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2972 case load_vec_info_type:
2973 done = vectorizable_load (stmt, bsi, &vec_stmt);
2977 case store_vec_info_type:
2978 done = vectorizable_store (stmt, bsi, &vec_stmt);
2983 if (vect_debug_details (NULL))
2984 fprintf (dump_file, "stmt not supported.");
2988 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2994 /* This function builds ni_name = number of iterations loop executes
2995 on the loop preheader. */
2998 vect_build_loop_niters (loop_vec_info loop_vinfo)
3000 tree ni_name, stmt, var;
3002 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3003 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3005 var = create_tmp_var (TREE_TYPE (ni), "niters");
3006 add_referenced_tmp_var (var);
3007 ni_name = force_gimple_operand (ni, &stmt, false, var);
3009 pe = loop_preheader_edge (loop);
3012 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3013 gcc_assert (!new_bb);
3020 /* This function generates the following statements:
3022 ni_name = number of iterations loop executes
3023 ratio = ni_name / vf
3024 ratio_mult_vf_name = ratio * vf
3026 and places them at the loop preheader edge. */
3029 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3031 tree *ratio_mult_vf_name_ptr,
3032 tree *ratio_name_ptr)
3040 tree ratio_mult_vf_name;
3041 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3042 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3043 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3044 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3046 pe = loop_preheader_edge (loop);
3048 /* Generate temporary variable that contains
3049 number of iterations loop executes. */
3051 ni_name = vect_build_loop_niters (loop_vinfo);
3053 /* Create: ratio = ni >> log2(vf) */
3055 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3056 add_referenced_tmp_var (var);
3057 ratio_name = make_ssa_name (var, NULL_TREE);
3058 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3059 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3060 SSA_NAME_DEF_STMT (ratio_name) = stmt;
3062 pe = loop_preheader_edge (loop);
3063 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3064 gcc_assert (!new_bb);
3066 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3068 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3069 add_referenced_tmp_var (var);
3070 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3071 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3072 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3073 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3075 pe = loop_preheader_edge (loop);
3076 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3077 gcc_assert (!new_bb);
3079 *ni_name_ptr = ni_name;
3080 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3081 *ratio_name_ptr = ratio_name;
3087 /* Function vect_update_ivs_after_vectorizer.
3089 "Advance" the induction variables of LOOP to the value they should take
3090 after the execution of LOOP. This is currently necessary because the
3091 vectorizer does not handle induction variables that are used after the
3092 loop. Such a situation occurs when the last iterations of LOOP are
3094 1. We introduced new uses after LOOP for IVs that were not originally used
3095 after LOOP: the IVs of LOOP are now used by an epilog loop.
3096 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3097 times, whereas the loop IVs should be bumped N times.
3100 - LOOP - a loop that is going to be vectorized. The last few iterations
3101 of LOOP were peeled.
3102 - NITERS - the number of iterations that LOOP executes (before it is
3103 vectorized). i.e, the number of times the ivs should be bumped.
3104 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3105 coming out from LOOP on which there are uses of the LOOP ivs
3106 (this is the path from LOOP->exit to epilog_loop->preheader).
3108 The new definitions of the ivs are placed in LOOP->exit.
3109 The phi args associated with the edge UPDATE_E in the bb
3110 UPDATE_E->dest are updated accordingly.
3112 Assumption 1: Like the rest of the vectorizer, this function assumes
3113 a single loop exit that has a single predecessor.
3115 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3116 organized in the same order.
3118 Assumption 3: The access function of the ivs is simple enough (see
3119 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3121 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3122 coming out of LOOP on which the ivs of LOOP are used (this is the path
3123 that leads to the epilog loop; other paths skip the epilog loop). This
3124 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3125 needs to have its phis updated.
3129 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
3131 basic_block exit_bb = loop->exit_edges[0]->dest;
3133 basic_block update_bb = update_e->dest;
3135 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
3137 /* Make sure there exists a single-predecessor exit bb: */
3138 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3140 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3142 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3144 tree access_fn = NULL;
3145 tree evolution_part;
3148 tree var, stmt, ni, ni_name;
3149 block_stmt_iterator last_bsi;
3151 /* Skip virtual phi's. */
3152 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3154 if (vect_debug_details (NULL))
3155 fprintf (dump_file, "virtual phi. skip.");
3159 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3160 gcc_assert (access_fn);
3162 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3163 gcc_assert (evolution_part != NULL_TREE);
3165 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3166 of degree >= 2 or exponential. */
3167 gcc_assert (!tree_is_chrec (evolution_part));
3169 step_expr = evolution_part;
3170 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3173 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3174 build2 (MULT_EXPR, TREE_TYPE (niters),
3175 niters, step_expr), init_expr);
3177 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3178 add_referenced_tmp_var (var);
3180 ni_name = force_gimple_operand (ni, &stmt, false, var);
3182 /* Insert stmt into exit_bb. */
3183 last_bsi = bsi_last (exit_bb);
3185 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3187 /* Fix phi expressions in the successor bb. */
3188 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3189 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3190 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
3195 /* Function vect_do_peeling_for_loop_bound
3197 Peel the last iterations of the loop represented by LOOP_VINFO.
3198 The peeled iterations form a new epilog loop. Given that the loop now
3199 iterates NITERS times, the new epilog loop iterates
3200 NITERS % VECTORIZATION_FACTOR times.
3202 The original loop will later be made to iterate
3203 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3206 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3207 struct loops *loops)
3210 tree ni_name, ratio_mult_vf_name;
3211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3212 struct loop *new_loop;
3214 #ifdef ENABLE_CHECKING
3218 if (vect_debug_details (NULL))
3219 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3221 /* Generate the following variables on the preheader of original loop:
3223 ni_name = number of iteration the original loop executes
3224 ratio = ni_name / vf
3225 ratio_mult_vf_name = ratio * vf */
3226 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3227 &ratio_mult_vf_name, ratio);
3229 /* Update loop info. */
3230 loop->pre_header = loop_preheader_edge (loop)->src;
3231 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3233 #ifdef ENABLE_CHECKING
3234 loop_num = loop->num;
3236 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3237 ratio_mult_vf_name, ni_name, false);
3238 #ifdef ENABLE_CHECKING
3239 gcc_assert (new_loop);
3240 gcc_assert (loop_num == loop->num);
3241 slpeel_verify_cfg_after_peeling (loop, new_loop);
3244 /* A guard that controls whether the new_loop is to be executed or skipped
3245 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3246 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3247 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3248 is on the path where the LOOP IVs are used and need to be updated. */
3250 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3251 update_e = EDGE_PRED (new_loop->pre_header, 0);
3253 update_e = EDGE_PRED (new_loop->pre_header, 1);
3255 /* Update IVs of original loop as if they were advanced
3256 by ratio_mult_vf_name steps. */
3257 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3259 /* After peeling we have to reset scalar evolution analyzer. */
3266 /* Function vect_gen_niters_for_prolog_loop
3268 Set the number of iterations for the loop represented by LOOP_VINFO
3269 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3270 and the misalignment of DR - the first data reference recorded in
3271 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3272 this loop, the data reference DR will refer to an aligned location.
3274 The following computation is generated:
3276 compute address misalignment in bytes:
3277 addr_mis = addr & (vectype_size - 1)
3279 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3281 (elem_size = element type size; an element is the scalar element
3282 whose type is the inner type of the vectype) */
3285 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3287 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3288 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3289 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3291 tree iters, iters_name;
3294 tree dr_stmt = DR_STMT (dr);
3295 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3296 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3297 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3300 tree new_stmts = NULL_TREE;
3302 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3303 tree ptr_type = TREE_TYPE (start_addr);
3304 tree size = TYPE_SIZE (ptr_type);
3305 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3306 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3307 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3308 tree niters_type = TREE_TYPE (loop_niters);
3309 tree elem_size_log =
3310 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3311 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3313 pe = loop_preheader_edge (loop);
3314 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3315 gcc_assert (!new_bb);
3317 /* Create: byte_misalign = addr & (vectype_size - 1) */
3318 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3320 /* Create: elem_misalign = byte_misalign / element_size */
3322 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3324 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3325 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3326 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3327 iters = fold_convert (niters_type, iters);
3329 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3330 /* If the loop bound is known at compile time we already verified that it is
3331 greater than vf; since the misalignment ('iters') is at most vf, there's
3332 no need to generate the MIN_EXPR in this case. */
3333 if (TREE_CODE (loop_niters) != INTEGER_CST)
3334 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3336 var = create_tmp_var (niters_type, "prolog_loop_niters");
3337 add_referenced_tmp_var (var);
3338 iters_name = force_gimple_operand (iters, &stmt, false, var);
3340 /* Insert stmt on loop preheader edge. */
3341 pe = loop_preheader_edge (loop);
3344 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3345 gcc_assert (!new_bb);
3352 /* Function vect_update_inits_of_dr
3354 NITERS iterations were peeled from LOOP. DR represents a data reference
3355 in LOOP. This function updates the information recorded in DR to
3356 account for the fact that the first NITERS iterations had already been
3357 executed. Specifically, it updates the OFFSET field of stmt_info. */
3360 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3362 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3363 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3365 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3366 STMT_VINFO_VECT_STEP (stmt_info)));
3367 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3368 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3372 /* Function vect_update_inits_of_drs
3374 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3375 This function updates the information recorded for the data references in
3376 the loop to account for the fact that the first NITERS iterations had
3377 already been executed. Specifically, it updates the initial_condition of the
3378 access_function of all the data_references in the loop. */
3381 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3384 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3385 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3387 if (dump_file && (dump_flags & TDF_DETAILS))
3388 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3390 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3392 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3393 vect_update_inits_of_dr (dr, niters);
3396 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3398 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3399 vect_update_inits_of_dr (dr, niters);
3404 /* Function vect_do_peeling_for_alignment
3406 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3407 'niters' is set to the misalignment of one of the data references in the
3408 loop, thereby forcing it to refer to an aligned location at the beginning
3409 of the execution of this loop. The data reference for which we are
3410 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3413 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3415 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3416 tree niters_of_prolog_loop, ni_name;
3418 struct loop *new_loop;
3420 if (vect_debug_details (NULL))
3421 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3423 ni_name = vect_build_loop_niters (loop_vinfo);
3424 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3426 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3428 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3429 niters_of_prolog_loop, ni_name, true);
3430 #ifdef ENABLE_CHECKING
3431 gcc_assert (new_loop);
3432 slpeel_verify_cfg_after_peeling (new_loop, loop);
3435 /* Update number of times loop executes. */
3436 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3437 LOOP_VINFO_NITERS (loop_vinfo) =
3438 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3440 /* Update the init conditions of the access functions of all data refs. */
3441 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3443 /* After peeling we have to reset scalar evolution analyzer. */
3450 /* Function vect_transform_loop.
3452 The analysis phase has determined that the loop is vectorizable.
3453 Vectorize the loop - created vectorized stmts to replace the scalar
3454 stmts in the loop, and update the loop exit condition. */
3457 vect_transform_loop (loop_vec_info loop_vinfo,
3458 struct loops *loops ATTRIBUTE_UNUSED)
3460 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3461 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3462 int nbbs = loop->num_nodes;
3463 block_stmt_iterator si;
3466 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3468 if (vect_debug_details (NULL))
3469 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3472 /* Peel the loop if there are data refs with unknown alignment.
3473 Only one data ref with unknown store is allowed. */
3475 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3476 vect_do_peeling_for_alignment (loop_vinfo, loops);
3478 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3479 compile time constant), or it is a constant that doesn't divide by the
3480 vectorization factor, then an epilog loop needs to be created.
3481 We therefore duplicate the loop: the original loop will be vectorized,
3482 and will compute the first (n/VF) iterations. The second copy of the loop
3483 will remain scalar and will compute the remaining (n%VF) iterations.
3484 (VF is the vectorization factor). */
3486 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3487 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3488 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3489 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3491 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3492 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3494 /* 1) Make sure the loop header has exactly two entries
3495 2) Make sure we have a preheader basic block. */
3497 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3499 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3502 /* FORNOW: the vectorizer supports only loops which body consist
3503 of one basic block (header + empty latch). When the vectorizer will
3504 support more involved loop forms, the order by which the BBs are
3505 traversed need to be reconsidered. */
3507 for (i = 0; i < nbbs; i++)
3509 basic_block bb = bbs[i];
3511 for (si = bsi_start (bb); !bsi_end_p (si);)
3513 tree stmt = bsi_stmt (si);
3514 stmt_vec_info stmt_info;
3517 if (vect_debug_details (NULL))
3519 fprintf (dump_file, "------>vectorizing statement: ");
3520 print_generic_expr (dump_file, stmt, TDF_SLIM);
3522 stmt_info = vinfo_for_stmt (stmt);
3523 gcc_assert (stmt_info);
3524 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3529 #ifdef ENABLE_CHECKING
3530 /* FORNOW: Verify that all stmts operate on the same number of
3531 units and no inner unrolling is necessary. */
3533 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3534 == vectorization_factor);
3536 /* -------- vectorize statement ------------ */
3537 if (vect_debug_details (NULL))
3538 fprintf (dump_file, "transform statement.");
3540 is_store = vect_transform_stmt (stmt, &si);
3543 /* free the attached stmt_vec_info and remove the stmt. */
3544 stmt_ann_t ann = stmt_ann (stmt);
3546 set_stmt_info (ann, NULL);
3555 slpeel_make_loop_iterate_ntimes (loop, ratio);
3557 if (vect_debug_details (loop))
3558 fprintf (dump_file,"Success! loop vectorized.");
3559 if (vect_debug_stats (loop))
3560 fprintf (dump_file, "LOOP VECTORIZED.");
3564 /* Function vect_is_simple_use.
3567 LOOP - the loop that is being vectorized.
3568 OPERAND - operand of a stmt in LOOP.
3569 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3571 Returns whether a stmt with OPERAND can be vectorized.
3572 Supportable operands are constants, loop invariants, and operands that are
3573 defined by the current iteration of the loop. Unsupportable operands are
3574 those that are defined by a previous iteration of the loop (as is the case
3575 in reduction/induction computations). */
3578 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3586 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3589 if (TREE_CODE (operand) != SSA_NAME)
3592 def_stmt = SSA_NAME_DEF_STMT (operand);
3593 if (def_stmt == NULL_TREE )
3595 if (vect_debug_details (NULL))
3596 fprintf (dump_file, "no def_stmt.");
3600 /* empty stmt is expected only in case of a function argument.
3601 (Otherwise - we expect a phi_node or a modify_expr). */
3602 if (IS_EMPTY_STMT (def_stmt))
3604 tree arg = TREE_OPERAND (def_stmt, 0);
3605 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3607 if (vect_debug_details (NULL))
3609 fprintf (dump_file, "Unexpected empty stmt: ");
3610 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3615 /* phi_node inside the loop indicates an induction/reduction pattern.
3616 This is not supported yet. */
3617 bb = bb_for_stmt (def_stmt);
3618 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3620 if (vect_debug_details (NULL))
3621 fprintf (dump_file, "reduction/induction - unsupported.");
3622 return false; /* FORNOW: not supported yet. */
3625 /* Expecting a modify_expr or a phi_node. */
3626 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3627 || TREE_CODE (def_stmt) == PHI_NODE)
3638 /* Function vect_analyze_operations.
3640 Scan the loop stmts and make sure they are all vectorizable. */
3643 vect_analyze_operations (loop_vec_info loop_vinfo)
3645 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3646 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3647 int nbbs = loop->num_nodes;
3648 block_stmt_iterator si;
3649 unsigned int vectorization_factor = 0;
3654 if (vect_debug_details (NULL))
3655 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3657 for (i = 0; i < nbbs; i++)
3659 basic_block bb = bbs[i];
3661 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3663 tree stmt = bsi_stmt (si);
3664 unsigned int nunits;
3665 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3668 if (vect_debug_details (NULL))
3670 fprintf (dump_file, "==> examining statement: ");
3671 print_generic_expr (dump_file, stmt, TDF_SLIM);
3674 gcc_assert (stmt_info);
3676 /* skip stmts which do not need to be vectorized.
3677 this is expected to include:
3678 - the COND_EXPR which is the loop exit condition
3679 - any LABEL_EXPRs in the loop
3680 - computations that are used only for array indexing or loop
3683 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3685 if (vect_debug_details (NULL))
3686 fprintf (dump_file, "irrelevant.");
3690 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3692 if (vect_debug_stats (loop) || vect_debug_details (loop))
3694 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3695 print_generic_expr (dump_file, stmt, TDF_SLIM);
3700 if (STMT_VINFO_DATA_REF (stmt_info))
3701 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3702 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3703 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3705 scalar_type = TREE_TYPE (stmt);
3707 if (vect_debug_details (NULL))
3709 fprintf (dump_file, "get vectype for scalar type: ");
3710 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3713 vectype = get_vectype_for_scalar_type (scalar_type);
3716 if (vect_debug_stats (loop) || vect_debug_details (loop))
3718 fprintf (dump_file, "not vectorized: unsupported data-type ");
3719 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3724 if (vect_debug_details (NULL))
3726 fprintf (dump_file, "vectype: ");
3727 print_generic_expr (dump_file, vectype, TDF_SLIM);
3729 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3731 ok = (vectorizable_operation (stmt, NULL, NULL)
3732 || vectorizable_assignment (stmt, NULL, NULL)
3733 || vectorizable_load (stmt, NULL, NULL)
3734 || vectorizable_store (stmt, NULL, NULL));
3738 if (vect_debug_stats (loop) || vect_debug_details (loop))
3740 fprintf (dump_file, "not vectorized: stmt not supported: ");
3741 print_generic_expr (dump_file, stmt, TDF_SLIM);
3746 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3747 if (vect_debug_details (NULL))
3748 fprintf (dump_file, "nunits = %d", nunits);
3750 if (vectorization_factor)
3752 /* FORNOW: don't allow mixed units.
3753 This restriction will be relaxed in the future. */
3754 if (nunits != vectorization_factor)
3756 if (vect_debug_stats (loop) || vect_debug_details (loop))
3757 fprintf (dump_file, "not vectorized: mixed data-types");
3762 vectorization_factor = nunits;
3764 #ifdef ENABLE_CHECKING
3765 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3766 * vectorization_factor == UNITS_PER_SIMD_WORD);
3771 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3773 if (vectorization_factor <= 1)
3775 if (vect_debug_stats (loop) || vect_debug_details (loop))
3776 fprintf (dump_file, "not vectorized: unsupported data-type");
3779 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3781 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3783 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3784 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3786 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3787 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3789 if (vect_debug_stats (loop) || vect_debug_details (loop))
3790 fprintf (dump_file, "not vectorized: iteration count too small.");
3794 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3795 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3797 if (vect_debug_stats (loop) || vect_debug_details (loop))
3798 fprintf (dump_file, "epilog loop required.");
3799 if (!vect_can_advance_ivs_p (loop))
3801 if (vect_debug_stats (loop) || vect_debug_details (loop))
3802 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3805 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3807 if (vect_debug_stats (loop) || vect_debug_details (loop))
3808 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3817 /* Function exist_non_indexing_operands_for_use_p
3819 USE is one of the uses attached to STMT. Check if USE is
3820 used in STMT for anything other than indexing an array. */
3823 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3826 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3828 /* USE corresponds to some operand in STMT. If there is no data
3829 reference in STMT, then any operand that corresponds to USE
3830 is not indexing an array. */
3831 if (!STMT_VINFO_DATA_REF (stmt_info))
3834 /* STMT has a data_ref. FORNOW this means that its of one of
3835 the following forms:
3838 (This should have been verified in analyze_data_refs).
3840 'var' in the second case corresponds to a def, not a use,
3841 so USE cannot correspond to any operands that are not used
3844 Therefore, all we need to check is if STMT falls into the
3845 first case, and whether var corresponds to USE. */
3847 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3850 operand = TREE_OPERAND (stmt, 1);
3852 if (TREE_CODE (operand) != SSA_NAME)
3862 /* Function vect_is_simple_iv_evolution.
3864 FORNOW: A simple evolution of an induction variables in the loop is
3865 considered a polynomial evolution with constant step. */
3868 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3869 tree * step, bool strict)
3874 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3876 /* When there is no evolution in this loop, the evolution function
3878 if (evolution_part == NULL_TREE)
3881 /* When the evolution is a polynomial of degree >= 2
3882 the evolution function is not "simple". */
3883 if (tree_is_chrec (evolution_part))
3886 step_expr = evolution_part;
3887 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
3889 if (vect_debug_details (NULL))
3891 fprintf (dump_file, "step: ");
3892 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3893 fprintf (dump_file, ", init: ");
3894 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3900 if (TREE_CODE (step_expr) != INTEGER_CST)
3902 if (vect_debug_details (NULL))
3903 fprintf (dump_file, "step unknown.");
3908 if (!integer_onep (step_expr))
3910 if (vect_debug_details (NULL))
3911 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3919 /* Function vect_analyze_scalar_cycles.
3921 Examine the cross iteration def-use cycles of scalar variables, by
3922 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3923 cycles that they represent do not impede vectorization.
3925 FORNOW: Reduction as in the following loop, is not supported yet:
3929 The cross-iteration cycle corresponding to variable 'sum' will be
3930 considered too complicated and will impede vectorization.
3932 FORNOW: Induction as in the following loop, is not supported yet:
3937 However, the following loop *is* vectorizable:
3942 In both loops there exists a def-use cycle for the variable i:
3943 loop: i_2 = PHI (i_0, i_1)
3948 The evolution of the above cycle is considered simple enough,
3949 however, we also check that the cycle does not need to be
3950 vectorized, i.e - we check that the variable that this cycle
3951 defines is only used for array indexing or in stmts that do not
3952 need to be vectorized. This is not the case in loop2, but it
3953 *is* the case in loop3. */
3956 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3959 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3960 basic_block bb = loop->header;
3963 if (vect_debug_details (NULL))
3964 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3966 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3968 tree access_fn = NULL;
3970 if (vect_debug_details (NULL))
3972 fprintf (dump_file, "Analyze phi: ");
3973 print_generic_expr (dump_file, phi, TDF_SLIM);
3976 /* Skip virtual phi's. The data dependences that are associated with
3977 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3979 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3981 if (vect_debug_details (NULL))
3982 fprintf (dump_file, "virtual phi. skip.");
3986 /* Analyze the evolution function. */
3988 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3989 those of loop induction variables; This property is verified here.
3991 Furthermore, if that induction variable is used in an operation
3992 that needs to be vectorized (i.e, is not solely used to index
3993 arrays and check the exit condition) - we do not support its
3994 vectorization yet. This property is verified in vect_is_simple_use,
3995 during vect_analyze_operations. */
3997 access_fn = /* instantiate_parameters
3999 analyze_scalar_evolution (loop, PHI_RESULT (phi));
4003 if (vect_debug_stats (loop) || vect_debug_details (loop))
4004 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4008 if (vect_debug_details (NULL))
4010 fprintf (dump_file, "Access function of PHI: ");
4011 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4014 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
4017 if (vect_debug_stats (loop) || vect_debug_details (loop))
4018 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4027 /* Function vect_analyze_data_ref_dependence.
4029 Return TRUE if there (might) exist a dependence between a memory-reference
4030 DRA and a memory-reference DRB. */
4033 vect_analyze_data_ref_dependence (struct data_reference *dra,
4034 struct data_reference *drb,
4038 struct data_dependence_relation *ddr;
4040 if (!array_base_name_differ_p (dra, drb, &differ_p))
4042 if (vect_debug_stats (loop) || vect_debug_details (loop))
4045 "not vectorized: can't determine dependence between: ");
4046 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4047 fprintf (dump_file, " and ");
4048 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4056 ddr = initialize_data_dependence_relation (dra, drb);
4057 compute_affine_dependence (ddr);
4059 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4062 if (vect_debug_stats (loop) || vect_debug_details (loop))
4065 "not vectorized: possible dependence between data-refs ");
4066 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4067 fprintf (dump_file, " and ");
4068 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4075 /* Function vect_analyze_data_ref_dependences.
4077 Examine all the data references in the loop, and make sure there do not
4078 exist any data dependences between them.
4080 TODO: dependences which distance is greater than the vectorization factor
4084 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4087 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4088 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4089 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4091 /* Examine store-store (output) dependences. */
4093 if (vect_debug_details (NULL))
4094 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
4096 if (vect_debug_details (NULL))
4097 fprintf (dump_file, "compare all store-store pairs.");
4099 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4101 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4103 struct data_reference *dra =
4104 VARRAY_GENERIC_PTR (loop_write_refs, i);
4105 struct data_reference *drb =
4106 VARRAY_GENERIC_PTR (loop_write_refs, j);
4107 if (vect_analyze_data_ref_dependence (dra, drb, loop))
4112 /* Examine load-store (true/anti) dependences. */
4114 if (vect_debug_details (NULL))
4115 fprintf (dump_file, "compare all load-store pairs.");
4117 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4119 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4121 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4122 struct data_reference *drb =
4123 VARRAY_GENERIC_PTR (loop_write_refs, j);
4124 if (vect_analyze_data_ref_dependence (dra, drb, loop))
4133 /* Function vect_compute_data_ref_alignment
4135 Compute the misalignment of the data reference DR.
4138 1. If during the misalignment computation it is found that the data reference
4139 cannot be vectorized then false is returned.
4140 2. DR_MISALIGNMENT (DR) is defined.
4142 FOR NOW: No analysis is actually performed. Misalignment is calculated
4143 only for trivial cases. TODO. */
4146 vect_compute_data_ref_alignment (struct data_reference *dr)
4148 tree stmt = DR_STMT (dr);
4149 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4150 tree ref = DR_REF (dr);
4152 tree base, alignment;
4153 bool base_aligned_p;
4156 if (vect_debug_details (NULL))
4157 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4159 /* Initialize misalignment to unknown. */
4160 DR_MISALIGNMENT (dr) = -1;
4162 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4163 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4164 base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4165 vectype = STMT_VINFO_VECTYPE (stmt_info);
4169 if (vect_debug_details (NULL))
4171 fprintf (dump_file, "Unknown alignment for access: ");
4172 print_generic_expr (dump_file, base, TDF_SLIM);
4177 if (!base_aligned_p)
4179 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4181 if (vect_debug_details (NULL))
4183 fprintf (dump_file, "can't force alignment of ref: ");
4184 print_generic_expr (dump_file, ref, TDF_SLIM);
4189 /* Force the alignment of the decl.
4190 NOTE: This is the only change to the code we make during
4191 the analysis phase, before deciding to vectorize the loop. */
4192 if (vect_debug_details (NULL))
4193 fprintf (dump_file, "force alignment");
4194 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4195 DECL_USER_ALIGN (base) = 1;
4198 /* At this point we assume that the base is aligned. */
4199 gcc_assert (base_aligned_p
4200 || (TREE_CODE (base) == VAR_DECL
4201 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4203 /* Alignment required, in bytes: */
4204 alignment = size_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4206 /* Modulo alignment. */
4207 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4208 if (tree_int_cst_sgn (misalign) < 0)
4210 /* Negative misalignment value. */
4211 if (vect_debug_details (NULL))
4212 fprintf (dump_file, "unexpected misalign value");
4216 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4218 if (vect_debug_details (NULL))
4219 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4225 /* Function vect_compute_data_refs_alignment
4227 Compute the misalignment of data references in the loop.
4228 This pass may take place at function granularity instead of at loop
4231 FOR NOW: No analysis is actually performed. Misalignment is calculated
4232 only for trivial cases. TODO. */
4235 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4237 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4238 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4241 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4243 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4244 if (!vect_compute_data_ref_alignment (dr))
4248 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4250 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4251 if (!vect_compute_data_ref_alignment (dr))
4259 /* Function vect_enhance_data_refs_alignment
4261 This pass will use loop versioning and loop peeling in order to enhance
4262 the alignment of data references in the loop.
4264 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4265 original loop is to be vectorized; Any other loops that are created by
4266 the transformations performed in this pass - are not supposed to be
4267 vectorized. This restriction will be relaxed. */
4270 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4272 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4273 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4274 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4278 This pass will require a cost model to guide it whether to apply peeling
4279 or versioning or a combination of the two. For example, the scheme that
4280 intel uses when given a loop with several memory accesses, is as follows:
4281 choose one memory access ('p') which alignment you want to force by doing
4282 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4283 other accesses are not necessarily aligned, or (2) use loop versioning to
4284 generate one loop in which all accesses are aligned, and another loop in
4285 which only 'p' is necessarily aligned.
4287 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4288 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4289 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4291 Devising a cost model is the most critical aspect of this work. It will
4292 guide us on which access to peel for, whether to use loop versioning, how
4293 many versions to create, etc. The cost model will probably consist of
4294 generic considerations as well as target specific considerations (on
4295 powerpc for example, misaligned stores are more painful than misaligned
4298 Here is the general steps involved in alignment enhancements:
4300 -- original loop, before alignment analysis:
4301 for (i=0; i<N; i++){
4302 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4303 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4306 -- After vect_compute_data_refs_alignment:
4307 for (i=0; i<N; i++){
4308 x = q[i]; # DR_MISALIGNMENT(q) = 3
4309 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4312 -- Possibility 1: we do loop versioning:
4314 for (i=0; i<N; i++){ # loop 1A
4315 x = q[i]; # DR_MISALIGNMENT(q) = 3
4316 p[i] = y; # DR_MISALIGNMENT(p) = 0
4320 for (i=0; i<N; i++){ # loop 1B
4321 x = q[i]; # DR_MISALIGNMENT(q) = 3
4322 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4326 -- Possibility 2: we do loop peeling:
4327 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4331 for (i = 3; i < N; i++){ # loop 2A
4332 x = q[i]; # DR_MISALIGNMENT(q) = 0
4333 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4336 -- Possibility 3: combination of loop peeling and versioning:
4337 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4342 for (i = 3; i<N; i++){ # loop 3A
4343 x = q[i]; # DR_MISALIGNMENT(q) = 0
4344 p[i] = y; # DR_MISALIGNMENT(p) = 0
4348 for (i = 3; i<N; i++){ # loop 3B
4349 x = q[i]; # DR_MISALIGNMENT(q) = 0
4350 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4354 These loops are later passed to loop_transform to be vectorized. The
4355 vectorizer will use the alignment information to guide the transformation
4356 (whether to generate regular loads/stores, or with special handling for
4360 /* (1) Peeling to force alignment. */
4362 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4364 + How many accesses will become aligned due to the peeling
4365 - How many accesses will become unaligned due to the peeling,
4366 and the cost of misaligned accesses.
4367 - The cost of peeling (the extra runtime checks, the increase
4370 The scheme we use FORNOW: peel to force the alignment of the first
4371 misaligned store in the loop.
4372 Rationale: misaligned stores are not yet supported.
4374 TODO: Use a better cost model. */
4376 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4378 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4379 if (!aligned_access_p (dr))
4381 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4382 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4387 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4389 if (vect_debug_details (loop))
4390 fprintf (dump_file, "Peeling for alignment will not be applied.");
4394 if (vect_debug_details (loop))
4395 fprintf (dump_file, "Peeling for alignment will be applied.");
4398 /* (1.2) Update the alignment info according to the peeling factor.
4399 If the misalignment of the DR we peel for is M, then the
4400 peeling factor is VF - M, and the misalignment of each access DR_i
4401 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4402 If the misalignment of the DR we peel for is unknown, then the
4403 misalignment of each access DR_i in the loop is also unknown.
4405 FORNOW: set the misalignment of the accesses to unknown even
4406 if the peeling factor is known at compile time.
4408 TODO: - if the peeling factor is known at compile time, use that
4409 when updating the misalignment info of the loop DRs.
4410 - consider accesses that are known to have the same
4411 alignment, even if that alignment is unknown. */
4413 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4415 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4416 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4418 DR_MISALIGNMENT (dr) = 0;
4419 if (vect_debug_details (loop) || vect_debug_stats (loop))
4420 fprintf (dump_file, "Alignment of access forced using peeling.");
4423 DR_MISALIGNMENT (dr) = -1;
4425 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4427 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4428 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4430 DR_MISALIGNMENT (dr) = 0;
4431 if (vect_debug_details (loop) || vect_debug_stats (loop))
4432 fprintf (dump_file, "Alignment of access forced using peeling.");
4435 DR_MISALIGNMENT (dr) = -1;
4440 /* Function vect_analyze_data_refs_alignment
4442 Analyze the alignment of the data-references in the loop.
4443 FOR NOW: Until support for misliagned accesses is in place, only if all
4444 accesses are aligned can the loop be vectorized. This restriction will be
4448 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4450 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4451 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4452 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4453 enum dr_alignment_support supportable_dr_alignment;
4456 if (vect_debug_details (NULL))
4457 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4460 /* This pass may take place at function granularity instead of at loop
4463 if (!vect_compute_data_refs_alignment (loop_vinfo))
4465 if (vect_debug_details (loop) || vect_debug_stats (loop))
4467 "not vectorized: can't calculate alignment for data ref.");
4472 /* This pass will decide on using loop versioning and/or loop peeling in
4473 order to enhance the alignment of data references in the loop. */
4475 vect_enhance_data_refs_alignment (loop_vinfo);
4478 /* Finally, check that all the data references in the loop can be
4479 handled with respect to their alignment. */
4481 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4483 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4484 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4485 if (!supportable_dr_alignment)
4487 if (vect_debug_details (loop) || vect_debug_stats (loop))
4488 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4491 if (supportable_dr_alignment != dr_aligned
4492 && (vect_debug_details (loop) || vect_debug_stats (loop)))
4493 fprintf (dump_file, "Vectorizing an unaligned access.");
4495 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4497 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4498 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4499 if (!supportable_dr_alignment)
4501 if (vect_debug_details (loop) || vect_debug_stats (loop))
4502 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4505 if (supportable_dr_alignment != dr_aligned
4506 && (vect_debug_details (loop) || vect_debug_stats (loop)))
4507 fprintf (dump_file, "Vectorizing an unaligned access.");
4514 /* Function vect_analyze_data_ref_access.
4516 Analyze the access pattern of the data-reference DR. For now, a data access
4517 has to consecutive to be considered vectorizable. */
4520 vect_analyze_data_ref_access (struct data_reference *dr)
4522 tree stmt = DR_STMT (dr);
4523 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4524 tree step = STMT_VINFO_VECT_STEP (stmt_info);
4525 tree scalar_type = TREE_TYPE (DR_REF (dr));
4527 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4529 if (vect_debug_details (NULL))
4530 fprintf (dump_file, "not consecutive access");
4537 /* Function vect_analyze_data_ref_accesses.
4539 Analyze the access pattern of all the data references in the loop.
4541 FORNOW: the only access pattern that is considered vectorizable is a
4542 simple step 1 (consecutive) access.
4544 FORNOW: handle only arrays and pointer accesses. */
4547 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4550 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4551 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4553 if (vect_debug_details (NULL))
4554 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4556 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4558 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4559 bool ok = vect_analyze_data_ref_access (dr);
4562 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4563 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4564 fprintf (dump_file, "not vectorized: complicated access pattern.");
4569 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4571 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4572 bool ok = vect_analyze_data_ref_access (dr);
4575 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4576 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4577 fprintf (dump_file, "not vectorized: complicated access pattern.");
4586 /* Function vect_analyze_pointer_ref_access.
4589 STMT - a stmt that contains a data-ref
4590 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4592 If the data-ref access is vectorizable, return a data_reference structure
4593 that represents it (DR). Otherwise - return NULL. */
4595 static struct data_reference *
4596 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4598 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4599 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4600 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4602 tree reftype, innertype;
4603 tree indx_access_fn;
4604 int loopnum = loop->num;
4605 struct data_reference *dr;
4609 if (vect_debug_stats (loop) || vect_debug_details (loop))
4610 fprintf (dump_file, "not vectorized: complicated pointer access.");
4614 if (vect_debug_details (NULL))
4616 fprintf (dump_file, "Access function of ptr: ");
4617 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4620 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4622 if (vect_debug_stats (loop) || vect_debug_details (loop))
4623 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4629 if (!expr_invariant_in_loop_p (loop, init))
4631 if (vect_debug_stats (loop) || vect_debug_details (loop))
4633 "not vectorized: initial condition is not loop invariant.");
4637 if (TREE_CODE (step) != INTEGER_CST)
4639 if (vect_debug_stats (loop) || vect_debug_details (loop))
4641 "not vectorized: non constant step for pointer access.");
4645 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4646 if (TREE_CODE (reftype) != POINTER_TYPE)
4648 if (vect_debug_stats (loop) || vect_debug_details (loop))
4649 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4653 reftype = TREE_TYPE (init);
4654 if (TREE_CODE (reftype) != POINTER_TYPE)
4656 if (vect_debug_stats (loop) || vect_debug_details (loop))
4657 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4661 innertype = TREE_TYPE (reftype);
4662 if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4664 /* FORNOW: support only consecutive access */
4665 if (vect_debug_stats (loop) || vect_debug_details (loop))
4666 fprintf (dump_file, "not vectorized: non consecutive access.");
4670 STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (sizetype, step);
4671 if (TREE_CODE (init) == PLUS_EXPR
4672 || TREE_CODE (init) == MINUS_EXPR)
4673 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4674 fold (size_binop (TREE_CODE (init), size_zero_node,
4675 fold_convert (sizetype, TREE_OPERAND (init, 1))));
4677 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = size_zero_node;
4680 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4681 if (vect_debug_details (NULL))
4683 fprintf (dump_file, "Access function of ptr indx: ");
4684 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4686 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4691 /* Function vect_get_memtag_and_dr.
4693 The function returns the relevant variable for memory tag (for aliasing
4694 purposes). Also data reference structure DR is created.
4696 This function handles three kinds of MEMREF:
4698 It is called from vect_analyze_data_refs with a MEMREF that is either an
4699 ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4700 It builds a DR for them using vect_get_base_and_offset, and calls itself
4701 recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4702 MEMREF along the way. During the recursive calls, the function may be called
4703 with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4704 MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4705 and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4706 and SSA_NAME (this is category 3 - "recursion stop condition").
4708 When the MEMREF falls into category 1 there is still no data reference struct
4709 (DR) available. It is created by this function, and then, along the recursion,
4710 MEMREF will fall into category 2 or 3, in which case a DR will have already
4711 been created, but the analysis continues to retrieve the MEMTAG.
4714 MEMREF - data reference in STMT
4715 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4718 DR - data_reference struct for MEMREF
4719 return value - the relevant variable for memory tag (for aliasing purposes).
4724 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4725 loop_vec_info loop_vinfo,
4726 tree vectype, struct data_reference **dr)
4728 tree symbl, oprnd0, oprnd1;
4729 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4730 tree offset, misalign, step;
4731 tree ref_to_be_analyzed, tag, dr_base;
4732 struct data_reference *new_dr;
4733 bool base_aligned_p;
4737 /* Category 3: recursion stop condition. */
4738 /* (1) A DR already exists. We only need to get the relevant memtag for
4739 MEMREF, the rest of the data was already initialized. */
4741 switch (TREE_CODE (memref))
4743 /* (1.1) Stop condition: find the relevant memtag and return. */
4745 symbl = SSA_NAME_VAR (memref);
4746 tag = get_var_ann (symbl)->type_mem_tag;
4749 tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4750 if (TREE_CODE (ptr) == SSA_NAME)
4751 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4755 if (vect_debug_details (NULL))
4756 fprintf (dump_file, "not vectorized: no memtag for ref.");
4765 /* Category 2: recursion continues. */
4766 /* (1.2) A recursive call to find the relevant memtag is required. */
4768 symbl = TREE_OPERAND (memref, 0);
4769 break; /* For recursive call. */
4772 /* Could have recorded more accurate information -
4773 i.e, the actual FIELD_DECL that is being referenced -
4774 but later passes expect VAR_DECL as the nmt. */
4778 symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4779 break; /* For recursive call. */
4783 /* Although DR exists, we have to call the function recursively to
4784 build MEMTAG for such expression. This is handled below. */
4785 oprnd0 = TREE_OPERAND (memref, 0);
4786 oprnd1 = TREE_OPERAND (memref, 1);
4788 STRIP_NOPS (oprnd1);
4789 /* Supported plus/minus expressions are of the form
4790 {address_base + offset}, such that address_base is of type
4791 POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4792 or it's not of type POINTER/ARRAY.
4793 TODO: swap operands if {offset + address_base}. */
4794 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4795 && TREE_CODE (oprnd1) != INTEGER_CST)
4796 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4800 break; /* For recursive call. */
4808 /* Category 1: recursion begins. */
4809 /* (2) A DR does not exist yet and must be built, followed by a
4810 recursive call to get the relevant memtag for MEMREF. */
4812 switch (TREE_CODE (memref))
4815 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4819 symbl = DR_BASE_NAME (new_dr);
4820 ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4824 new_dr = analyze_array (stmt, memref, is_read);
4826 symbl = DR_BASE_NAME (new_dr);
4827 ref_to_be_analyzed = memref;
4831 /* TODO: Support data-refs of form a[i].p for unions and single
4832 field structures. */
4836 offset = size_zero_node;
4837 misalign = size_zero_node;
4838 step = size_zero_node;
4840 /* Analyze data-ref, find its base, initial offset from the base, step,
4842 dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4843 vectype, loop_vinfo, &offset,
4844 &misalign, &step, &base_aligned_p);
4848 /* Initialize information according to above analysis. */
4849 /* Since offset and step of a pointer can be also set in
4850 vect_analyze_pointer_ref_access, we combine the values here. */
4851 if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4852 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4853 fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
4854 STMT_VINFO_VECT_INIT_OFFSET (stmt_info)));
4856 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4858 if (step && STMT_VINFO_VECT_STEP (stmt_info))
4859 STMT_VINFO_VECT_STEP (stmt_info) =
4860 size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4862 STMT_VINFO_VECT_STEP (stmt_info) = step;
4864 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4865 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4866 STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4871 /* Recursive call to retrieve the relevant memtag. */
4872 tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4878 /* Function vect_analyze_data_refs.
4880 Find all the data references in the loop.
4882 The general structure of the analysis of data refs in the vectorizer is as
4884 1- vect_analyze_data_refs(loop):
4885 Find and analyze all data-refs in the loop:
4887 ref_stmt.memtag = vect_get_memtag_and_dr (ref)
4888 1.1- vect_get_memtag_and_dr(ref):
4889 Analyze ref, and build a DR (data_referece struct) for it;
4890 call vect_get_base_and_offset to compute base, initial_offset,
4891 step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4892 ref_stmt.alignment, and ref_stmt.step accordingly.
4893 1.1.1- vect_get_base_and_offset():
4894 Calculate base, initial_offset, step and alignment.
4895 For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4896 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
4897 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4898 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4900 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4901 which base is really an array (not a pointer) and which alignment
4902 can be forced. This restriction will be relaxed. */
4905 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4907 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4908 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4909 int nbbs = loop->num_nodes;
4910 block_stmt_iterator si;
4912 struct data_reference *dr;
4914 if (vect_debug_details (NULL))
4915 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4917 for (j = 0; j < nbbs; j++)
4919 basic_block bb = bbs[j];
4920 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4922 bool is_read = false;
4923 tree stmt = bsi_stmt (si);
4924 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4925 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4926 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4927 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4928 varray_type *datarefs = NULL;
4929 int nvuses, nv_may_defs, nv_must_defs;
4932 tree scalar_type, vectype;
4934 /* Assumption: there exists a data-ref in stmt, if and only if
4935 it has vuses/vdefs. */
4937 if (!vuses && !v_may_defs && !v_must_defs)
4940 nvuses = NUM_VUSES (vuses);
4941 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4942 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4944 if (nvuses && (nv_may_defs || nv_must_defs))
4946 if (vect_debug_details (NULL))
4948 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4949 print_generic_expr (dump_file, stmt, TDF_SLIM);
4954 if (TREE_CODE (stmt) != MODIFY_EXPR)
4956 if (vect_debug_details (NULL))
4958 fprintf (dump_file, "unexpected vops in stmt: ");
4959 print_generic_expr (dump_file, stmt, TDF_SLIM);
4966 memref = TREE_OPERAND (stmt, 1);
4967 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4972 memref = TREE_OPERAND (stmt, 0);
4973 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4977 scalar_type = TREE_TYPE (memref);
4978 vectype = get_vectype_for_scalar_type (scalar_type);
4981 if (vect_debug_details (NULL))
4983 fprintf (dump_file, "no vectype for stmt: ");
4984 print_generic_expr (dump_file, stmt, TDF_SLIM);
4985 fprintf (dump_file, " scalar_type: ");
4986 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4988 /* It is not possible to vectorize this data reference. */
4991 /* Analyze MEMREF. If it is of a supported form, build data_reference
4992 struct for it (DR) and find memtag for aliasing purposes. */
4994 symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
4998 if (vect_debug_stats (loop) || vect_debug_details (loop))
5000 fprintf (dump_file, "not vectorized: unhandled data ref: ");
5001 print_generic_expr (dump_file, stmt, TDF_SLIM);
5005 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5006 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5007 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5008 STMT_VINFO_DATA_REF (stmt_info) = dr;
5016 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5018 /* Function vect_mark_relevant.
5020 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5023 vect_mark_relevant (varray_type *worklist, tree stmt)
5025 stmt_vec_info stmt_info;
5027 if (vect_debug_details (NULL))
5028 fprintf (dump_file, "mark relevant.");
5030 if (TREE_CODE (stmt) == PHI_NODE)
5032 VARRAY_PUSH_TREE (*worklist, stmt);
5036 stmt_info = vinfo_for_stmt (stmt);
5040 if (vect_debug_details (NULL))
5042 fprintf (dump_file, "mark relevant: no stmt info!!.");
5043 print_generic_expr (dump_file, stmt, TDF_SLIM);
5048 if (STMT_VINFO_RELEVANT_P (stmt_info))
5050 if (vect_debug_details (NULL))
5051 fprintf (dump_file, "already marked relevant.");
5055 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5056 VARRAY_PUSH_TREE (*worklist, stmt);
5060 /* Function vect_stmt_relevant_p.
5062 Return true if STMT in loop that is represented by LOOP_VINFO is
5063 "relevant for vectorization".
5065 A stmt is considered "relevant for vectorization" if:
5066 - it has uses outside the loop.
5067 - it has vdefs (it alters memory).
5068 - control stmts in the loop (except for the exit condition).
5070 CHECKME: what other side effects would the vectorizer allow? */
5073 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5075 v_may_def_optype v_may_defs;
5076 v_must_def_optype v_must_defs;
5077 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5082 /* cond stmt other than loop exit cond. */
5083 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5086 /* changing memory. */
5087 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5088 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5089 if (v_may_defs || v_must_defs)
5091 if (vect_debug_details (NULL))
5092 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5096 /* uses outside the loop. */
5097 df = get_immediate_uses (stmt);
5098 num_uses = num_immediate_uses (df);
5099 for (i = 0; i < num_uses; i++)
5101 tree use = immediate_use (df, i);
5102 basic_block bb = bb_for_stmt (use);
5103 if (!flow_bb_inside_loop_p (loop, bb))
5105 if (vect_debug_details (NULL))
5106 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5115 /* Function vect_mark_stmts_to_be_vectorized.
5117 Not all stmts in the loop need to be vectorized. For example:
5126 Stmt 1 and 3 do not need to be vectorized, because loop control and
5127 addressing of vectorized data-refs are handled differently.
5129 This pass detects such stmts. */
5132 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5134 varray_type worklist;
5135 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5136 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5137 unsigned int nbbs = loop->num_nodes;
5138 block_stmt_iterator si;
5144 stmt_vec_info stmt_info;
5146 if (vect_debug_details (NULL))
5147 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5149 VARRAY_TREE_INIT (worklist, 64, "work list");
5151 /* 1. Init worklist. */
5153 for (i = 0; i < nbbs; i++)
5155 basic_block bb = bbs[i];
5156 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5158 stmt = bsi_stmt (si);
5160 if (vect_debug_details (NULL))
5162 fprintf (dump_file, "init: stmt relevant? ");
5163 print_generic_expr (dump_file, stmt, TDF_SLIM);
5166 stmt_info = vinfo_for_stmt (stmt);
5167 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5169 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5170 vect_mark_relevant (&worklist, stmt);
5175 /* 2. Process_worklist */
5177 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5179 stmt = VARRAY_TOP_TREE (worklist);
5180 VARRAY_POP (worklist);
5182 if (vect_debug_details (NULL))
5184 fprintf (dump_file, "worklist: examine stmt: ");
5185 print_generic_expr (dump_file, stmt, TDF_SLIM);
5188 /* Examine the USES in this statement. Mark all the statements which
5189 feed this statement's uses as "relevant", unless the USE is used as
5192 if (TREE_CODE (stmt) == PHI_NODE)
5194 /* follow the def-use chain inside the loop. */
5195 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5197 tree arg = PHI_ARG_DEF (stmt, j);
5198 tree def_stmt = NULL_TREE;
5200 if (!vect_is_simple_use (arg, loop, &def_stmt))
5202 if (vect_debug_details (NULL))
5203 fprintf (dump_file, "worklist: unsupported use.");
5204 varray_clear (worklist);
5210 if (vect_debug_details (NULL))
5212 fprintf (dump_file, "worklist: def_stmt: ");
5213 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5216 bb = bb_for_stmt (def_stmt);
5217 if (flow_bb_inside_loop_p (loop, bb))
5218 vect_mark_relevant (&worklist, def_stmt);
5222 ann = stmt_ann (stmt);
5223 use_ops = USE_OPS (ann);
5225 for (i = 0; i < NUM_USES (use_ops); i++)
5227 tree use = USE_OP (use_ops, i);
5229 /* We are only interested in uses that need to be vectorized. Uses
5230 that are used for address computation are not considered relevant.
5232 if (exist_non_indexing_operands_for_use_p (use, stmt))
5234 tree def_stmt = NULL_TREE;
5236 if (!vect_is_simple_use (use, loop, &def_stmt))
5238 if (vect_debug_details (NULL))
5239 fprintf (dump_file, "worklist: unsupported use.");
5240 varray_clear (worklist);
5247 if (vect_debug_details (NULL))
5249 fprintf (dump_file, "worklist: examine use %d: ", i);
5250 print_generic_expr (dump_file, use, TDF_SLIM);
5253 bb = bb_for_stmt (def_stmt);
5254 if (flow_bb_inside_loop_p (loop, bb))
5255 vect_mark_relevant (&worklist, def_stmt);
5258 } /* while worklist */
5260 varray_clear (worklist);
5265 /* Function vect_can_advance_ivs_p
5267 In case the number of iterations that LOOP iterates in unknown at compile
5268 time, an epilog loop will be generated, and the loop induction variables
5269 (IVs) will be "advanced" to the value they are supposed to take just before
5270 the epilog loop. Here we check that the access function of the loop IVs
5271 and the expression that represents the loop bound are simple enough.
5272 These restrictions will be relaxed in the future. */
5275 vect_can_advance_ivs_p (struct loop *loop)
5277 basic_block bb = loop->header;
5280 /* Analyze phi functions of the loop header. */
5282 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5284 tree access_fn = NULL;
5285 tree evolution_part;
5287 if (vect_debug_details (NULL))
5289 fprintf (dump_file, "Analyze phi: ");
5290 print_generic_expr (dump_file, phi, TDF_SLIM);
5293 /* Skip virtual phi's. The data dependences that are associated with
5294 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5296 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5298 if (vect_debug_details (NULL))
5299 fprintf (dump_file, "virtual phi. skip.");
5303 /* Analyze the evolution function. */
5305 access_fn = instantiate_parameters
5306 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5310 if (vect_debug_details (NULL))
5311 fprintf (dump_file, "No Access function.");
5315 if (vect_debug_details (NULL))
5317 fprintf (dump_file, "Access function of PHI: ");
5318 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5321 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5323 if (evolution_part == NULL_TREE)
5326 /* FORNOW: We do not transform initial conditions of IVs
5327 which evolution functions are a polynomial of degree >= 2. */
5329 if (tree_is_chrec (evolution_part))
5337 /* Function vect_get_loop_niters.
5339 Determine how many iterations the loop is executed.
5340 If an expression that represents the number of iterations
5341 can be constructed, place it in NUMBER_OF_ITERATIONS.
5342 Return the loop exit condition. */
5345 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5349 if (vect_debug_details (NULL))
5350 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5352 niters = number_of_iterations_in_loop (loop);
5354 if (niters != NULL_TREE
5355 && niters != chrec_dont_know)
5357 *number_of_iterations = niters;
5359 if (vect_debug_details (NULL))
5361 fprintf (dump_file, "==> get_loop_niters:" );
5362 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5366 return get_loop_exit_condition (loop);
5370 /* Function vect_analyze_loop_form.
5372 Verify the following restrictions (some may be relaxed in the future):
5373 - it's an inner-most loop
5374 - number of BBs = 2 (which are the loop header and the latch)
5375 - the loop has a pre-header
5376 - the loop has a single entry and exit
5377 - the loop exit condition is simple enough, and the number of iterations
5378 can be analyzed (a countable loop). */
5380 static loop_vec_info
5381 vect_analyze_loop_form (struct loop *loop)
5383 loop_vec_info loop_vinfo;
5385 tree number_of_iterations = NULL;
5386 bool rescan = false;
5388 if (vect_debug_details (loop))
5389 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5392 || !loop->single_exit
5393 || loop->num_nodes != 2
5394 || EDGE_COUNT (loop->header->preds) != 2
5395 || loop->num_entries != 1)
5397 if (vect_debug_stats (loop) || vect_debug_details (loop))
5399 fprintf (dump_file, "not vectorized: bad loop form. ");
5401 fprintf (dump_file, "nested loop.");
5402 else if (!loop->single_exit)
5403 fprintf (dump_file, "multiple exits.");
5404 else if (loop->num_nodes != 2)
5405 fprintf (dump_file, "too many BBs in loop.");
5406 else if (EDGE_COUNT (loop->header->preds) != 2)
5407 fprintf (dump_file, "too many incoming edges.");
5408 else if (loop->num_entries != 1)
5409 fprintf (dump_file, "too many entries.");
5415 /* We assume that the loop exit condition is at the end of the loop. i.e,
5416 that the loop is represented as a do-while (with a proper if-guard
5417 before the loop if needed), where the loop header contains all the
5418 executable statements, and the latch is empty. */
5419 if (!empty_block_p (loop->latch))
5421 if (vect_debug_stats (loop) || vect_debug_details (loop))
5422 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5426 /* Make sure we have a preheader basic block. */
5427 if (!loop->pre_header)
5430 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5433 /* Make sure there exists a single-predecessor exit bb: */
5434 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5437 loop_split_edge_with (loop->exit_edges[0], NULL);
5442 flow_loop_scan (loop, LOOP_ALL);
5443 /* Flow loop scan does not update loop->single_exit field. */
5444 loop->single_exit = loop->exit_edges[0];
5447 if (empty_block_p (loop->header))
5449 if (vect_debug_stats (loop) || vect_debug_details (loop))
5450 fprintf (dump_file, "not vectorized: empty loop.");
5454 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5457 if (vect_debug_stats (loop) || vect_debug_details (loop))
5458 fprintf (dump_file, "not vectorized: complicated exit condition.");
5462 if (!number_of_iterations)
5464 if (vect_debug_stats (loop) || vect_debug_details (loop))
5466 "not vectorized: number of iterations cannot be computed.");
5470 if (chrec_contains_undetermined (number_of_iterations))
5472 if (vect_debug_details (NULL))
5473 fprintf (dump_file, "Infinite number of iterations.");
5477 loop_vinfo = new_loop_vec_info (loop);
5478 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5480 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5482 if (vect_debug_details (loop))
5484 fprintf (dump_file, "loop bound unknown.\n");
5485 fprintf (dump_file, "Symbolic number of iterations is ");
5486 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5490 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5492 if (vect_debug_stats (loop) || vect_debug_details (loop))
5493 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5497 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5503 /* Function vect_analyze_loop.
5505 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5506 for it. The different analyses will record information in the
5507 loop_vec_info struct. */
5509 static loop_vec_info
5510 vect_analyze_loop (struct loop *loop)
5513 loop_vec_info loop_vinfo;
5515 if (vect_debug_details (NULL))
5516 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5518 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5520 loop_vinfo = vect_analyze_loop_form (loop);
5523 if (vect_debug_details (loop))
5524 fprintf (dump_file, "bad loop form.");
5528 /* Find all data references in the loop (which correspond to vdefs/vuses)
5529 and analyze their evolution in the loop.
5531 FORNOW: Handle only simple, array references, which
5532 alignment can be forced, and aligned pointer-references. */
5534 ok = vect_analyze_data_refs (loop_vinfo);
5537 if (vect_debug_details (loop))
5538 fprintf (dump_file, "bad data references.");
5539 destroy_loop_vec_info (loop_vinfo);
5543 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5545 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5548 if (vect_debug_details (loop))
5549 fprintf (dump_file, "unexpected pattern.");
5550 if (vect_debug_details (loop))
5551 fprintf (dump_file, "not vectorized: unexpected pattern.");
5552 destroy_loop_vec_info (loop_vinfo);
5556 /* Check that all cross-iteration scalar data-flow cycles are OK.
5557 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5559 ok = vect_analyze_scalar_cycles (loop_vinfo);
5562 if (vect_debug_details (loop))
5563 fprintf (dump_file, "bad scalar cycle.");
5564 destroy_loop_vec_info (loop_vinfo);
5568 /* Analyze data dependences between the data-refs in the loop.
5569 FORNOW: fail at the first data dependence that we encounter. */
5571 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5574 if (vect_debug_details (loop))
5575 fprintf (dump_file, "bad data dependence.");
5576 destroy_loop_vec_info (loop_vinfo);
5580 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5581 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5583 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5586 if (vect_debug_details (loop))
5587 fprintf (dump_file, "bad data access.");
5588 destroy_loop_vec_info (loop_vinfo);
5592 /* Analyze the alignment of the data-refs in the loop.
5593 FORNOW: Only aligned accesses are handled. */
5595 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5598 if (vect_debug_details (loop))
5599 fprintf (dump_file, "bad data alignment.");
5600 destroy_loop_vec_info (loop_vinfo);
5604 /* Scan all the operations in the loop and make sure they are
5607 ok = vect_analyze_operations (loop_vinfo);
5610 if (vect_debug_details (loop))
5611 fprintf (dump_file, "bad operation or unsupported loop bound.");
5612 destroy_loop_vec_info (loop_vinfo);
5616 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5622 /* Function need_imm_uses_for.
5624 Return whether we ought to include information for 'var'
5625 when calculating immediate uses. For this pass we only want use
5626 information for non-virtual variables. */
5629 need_imm_uses_for (tree var)
5631 return is_gimple_reg (var);
5635 /* Function vectorize_loops.
5637 Entry Point to loop vectorization phase. */
5640 vectorize_loops (struct loops *loops)
5642 unsigned int i, loops_num;
5643 unsigned int num_vectorized_loops = 0;
5645 /* Does the target support SIMD? */
5646 /* FORNOW: until more sophisticated machine modelling is in place. */
5647 if (!UNITS_PER_SIMD_WORD)
5649 if (vect_debug_details (NULL))
5650 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5654 #ifdef ENABLE_CHECKING
5655 verify_loop_closed_ssa ();
5658 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5660 /* ----------- Analyze loops. ----------- */
5662 /* If some loop was duplicated, it gets bigger number
5663 than all previously defined loops. This fact allows us to run
5664 only over initial loops skipping newly generated ones. */
5665 loops_num = loops->num;
5666 for (i = 1; i < loops_num; i++)
5668 loop_vec_info loop_vinfo;
5669 struct loop *loop = loops->parray[i];
5674 loop_vinfo = vect_analyze_loop (loop);
5675 loop->aux = loop_vinfo;
5677 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5680 vect_transform_loop (loop_vinfo, loops);
5681 num_vectorized_loops++;
5684 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5685 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5686 num_vectorized_loops);
5688 /* ----------- Finalize. ----------- */
5691 for (i = 1; i < loops_num; i++)
5693 struct loop *loop = loops->parray[i];
5694 loop_vec_info loop_vinfo;
5698 loop_vinfo = loop->aux;
5699 destroy_loop_vec_info (loop_vinfo);
5703 rewrite_into_ssa (false);
5704 rewrite_into_loop_closed_ssa (); /* FORNOW */
5705 bitmap_clear (vars_to_rename);