2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
147 #include "tree-vectorizer.h"
148 #include "tree-pass.h"
149 #include "langhooks.h"
152 /*************************************************************************
153 Simple Loop Peeling Utilities
154 *************************************************************************/
156 /* Entry point for peeling of simple loops.
157 Peel the first/last iterations of a loop.
158 It can be used outside of the vectorizer for loops that are simple enough
159 (see function documentation). In the vectorizer it is used to peel the
160 last few iterations when the loop bound is unknown or does not evenly
161 divide by the vectorization factor, and to peel the first few iterations
162 to force the alignment of data references in the loop. */
163 struct loop *slpeel_tree_peel_loop_to_edge
164 (struct loop *, struct loops *, edge, tree, tree, bool);
165 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
166 (struct loop *, struct loops *, edge);
167 static void slpeel_update_phis_for_duplicate_loop
168 (struct loop *, struct loop *, bool after);
169 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
170 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
171 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
172 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
173 static void allocate_new_names (bitmap);
174 static void rename_use_op (use_operand_p);
175 static void rename_def_op (def_operand_p, tree);
176 static void rename_variables_in_bb (basic_block);
177 static void free_new_names (bitmap);
178 static void rename_variables_in_loop (struct loop *);
179 #ifdef ENABLE_CHECKING
180 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
182 static LOC find_loop_location (struct loop *);
185 /*************************************************************************
186 Vectorization Utilities.
187 *************************************************************************/
189 /* Main analysis functions. */
190 static loop_vec_info vect_analyze_loop (struct loop *);
191 static loop_vec_info vect_analyze_loop_form (struct loop *);
192 static bool vect_analyze_data_refs (loop_vec_info);
193 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
194 static bool vect_analyze_scalar_cycles (loop_vec_info);
195 static bool vect_analyze_data_ref_accesses (loop_vec_info);
196 static bool vect_analyze_data_ref_dependence
197 (struct data_reference *, struct data_reference *, loop_vec_info);
198 static bool vect_analyze_data_ref_dependences (loop_vec_info);
199 static bool vect_analyze_data_refs_alignment (loop_vec_info);
200 static bool vect_compute_data_refs_alignment (loop_vec_info);
201 static bool vect_analyze_operations (loop_vec_info);
203 /* Main code transformation functions. */
204 static void vect_transform_loop (loop_vec_info, struct loops *);
205 static bool vect_transform_stmt (tree, block_stmt_iterator *);
206 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
207 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
208 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
209 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
210 static enum dr_alignment_support vect_supportable_dr_alignment
211 (struct data_reference *);
212 static void vect_align_data_ref (tree);
213 static void vect_enhance_data_refs_alignment (loop_vec_info);
215 /* Utility functions for the analyses. */
216 static bool vect_is_simple_use (tree , loop_vec_info, tree *);
217 static bool exist_non_indexing_operands_for_use_p (tree, tree);
218 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
219 static void vect_mark_relevant (varray_type *, tree);
220 static bool vect_stmt_relevant_p (tree, loop_vec_info);
221 static tree vect_get_loop_niters (struct loop *, tree *);
222 static bool vect_compute_data_ref_alignment (struct data_reference *);
223 static bool vect_analyze_data_ref_access (struct data_reference *);
224 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
225 static struct data_reference * vect_analyze_pointer_ref_access
226 (tree, tree, bool, tree*);
227 static bool vect_can_advance_ivs_p (loop_vec_info);
228 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
229 loop_vec_info, tree *, tree *, tree *,
231 static tree vect_get_ptr_offset (tree, tree, tree *);
232 static tree vect_get_memtag_and_dr
233 (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
234 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
236 static tree vect_strip_conversion (tree);
237 static bool vect_base_addr_differ_p (struct data_reference *,
238 struct data_reference *drb, bool *);
241 /* Utility functions for the code transformation. */
242 static tree vect_create_destination_var (tree, tree);
243 static tree vect_create_data_ref_ptr
244 (tree, block_stmt_iterator *, tree, tree *, bool);
245 static tree vect_create_index_for_vector_ref (loop_vec_info);
246 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
247 static tree get_vectype_for_scalar_type (tree);
248 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
249 static tree vect_get_vec_def_for_operand (tree, tree);
250 static tree vect_init_vector (tree, tree);
251 static void vect_finish_stmt_generation
252 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
254 /* Utility function dealing with loop peeling (not peeling itself). */
255 static void vect_generate_tmps_on_preheader
256 (loop_vec_info, tree *, tree *, tree *);
257 static tree vect_build_loop_niters (loop_vec_info);
258 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
259 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
260 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
261 static void vect_update_inits_of_drs (loop_vec_info, tree);
262 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
263 static void vect_do_peeling_for_loop_bound
264 (loop_vec_info, tree *, struct loops *);
266 /* Utilities for creation and deletion of vec_info structs. */
267 loop_vec_info new_loop_vec_info (struct loop *loop);
268 void destroy_loop_vec_info (loop_vec_info);
269 stmt_vec_info new_stmt_vec_info (tree, loop_vec_info);
271 /*************************************************************************
272 Vectorization Debug Information.
273 *************************************************************************/
275 /* vect_verbosity_level set to invalid verbosity level to mark that it's
277 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
279 /* vect_dump will be set to stderr or dump_file if exist. */
282 /* Utilities for output formatting. */
283 static bool vect_print_dump_info (enum verbosity_levels, LOC);
284 static void vect_set_dump_settings (void);
285 void vect_set_verbosity_level (const char *);
289 /*************************************************************************
290 Simple Loop Peeling Utilities
292 Utilities to support loop peeling for vectorization purposes.
293 *************************************************************************/
296 /* For each definition in DEFINITIONS this function allocates
300 allocate_new_names (bitmap definitions)
305 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
307 tree def = ssa_name (ver);
308 tree *new_name_ptr = xmalloc (sizeof (tree));
310 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
312 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
313 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
315 SSA_NAME_AUX (def) = new_name_ptr;
320 /* Renames the use *OP_P. */
323 rename_use_op (use_operand_p op_p)
327 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
330 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
332 /* Something defined outside of the loop. */
336 /* An ordinary ssa name defined in the loop. */
338 SET_USE (op_p, *new_name_ptr);
342 /* Renames the def *OP_P in statement STMT. */
345 rename_def_op (def_operand_p op_p, tree stmt)
349 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
352 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
354 /* Something defined outside of the loop. */
358 /* An ordinary ssa name defined in the loop. */
360 SET_DEF (op_p, *new_name_ptr);
361 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
365 /* Renames the variables in basic block BB. */
368 rename_variables_in_bb (basic_block bb)
371 block_stmt_iterator bsi;
377 v_may_def_optype v_may_defs;
378 v_must_def_optype v_must_defs;
382 struct loop *loop = bb->loop_father;
384 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
385 rename_def_op (PHI_RESULT_PTR (phi), phi);
387 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
389 stmt = bsi_stmt (bsi);
390 get_stmt_operands (stmt);
391 ann = stmt_ann (stmt);
393 uses = USE_OPS (ann);
394 for (i = 0; i < NUM_USES (uses); i++)
395 rename_use_op (USE_OP_PTR (uses, i));
397 defs = DEF_OPS (ann);
398 for (i = 0; i < NUM_DEFS (defs); i++)
399 rename_def_op (DEF_OP_PTR (defs, i), stmt);
401 vuses = VUSE_OPS (ann);
402 for (i = 0; i < NUM_VUSES (vuses); i++)
403 rename_use_op (VUSE_OP_PTR (vuses, i));
405 v_may_defs = V_MAY_DEF_OPS (ann);
406 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
408 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
409 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
412 v_must_defs = V_MUST_DEF_OPS (ann);
413 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
415 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
416 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
420 FOR_EACH_EDGE (e, ei, bb->succs)
422 if (!flow_bb_inside_loop_p (loop, e->dest))
424 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
425 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
430 /* Releases the structures holding the new ssa names. */
433 free_new_names (bitmap definitions)
438 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
440 tree def = ssa_name (ver);
442 if (SSA_NAME_AUX (def))
444 free (SSA_NAME_AUX (def));
445 SSA_NAME_AUX (def) = NULL;
451 /* Renames variables in new generated LOOP. */
454 rename_variables_in_loop (struct loop *loop)
459 bbs = get_loop_body (loop);
461 for (i = 0; i < loop->num_nodes; i++)
462 rename_variables_in_bb (bbs[i]);
468 /* Update the PHI nodes of NEW_LOOP.
470 NEW_LOOP is a duplicate of ORIG_LOOP.
471 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
472 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
473 executes before it. */
476 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
477 struct loop *new_loop, bool after)
479 tree *new_name_ptr, new_ssa_name;
480 tree phi_new, phi_orig;
482 edge orig_loop_latch = loop_latch_edge (orig_loop);
483 edge orig_entry_e = loop_preheader_edge (orig_loop);
484 edge new_loop_exit_e = new_loop->exit_edges[0];
485 edge new_loop_entry_e = loop_preheader_edge (new_loop);
486 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
489 step 1. For each loop-header-phi:
490 Add the first phi argument for the phi in NEW_LOOP
491 (the one associated with the entry of NEW_LOOP)
493 step 2. For each loop-header-phi:
494 Add the second phi argument for the phi in NEW_LOOP
495 (the one associated with the latch of NEW_LOOP)
497 step 3. Update the phis in the successor block of NEW_LOOP.
499 case 1: NEW_LOOP was placed before ORIG_LOOP:
500 The successor block of NEW_LOOP is the header of ORIG_LOOP.
501 Updating the phis in the successor block can therefore be done
502 along with the scanning of the loop header phis, because the
503 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
504 phi nodes, organized in the same order.
506 case 2: NEW_LOOP was placed after ORIG_LOOP:
507 The successor block of NEW_LOOP is the original exit block of
508 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
509 We postpone updating these phis to a later stage (when
510 loop guards are added).
514 /* Scan the phis in the headers of the old and new loops
515 (they are organized in exactly the same order). */
517 for (phi_new = phi_nodes (new_loop->header),
518 phi_orig = phi_nodes (orig_loop->header);
520 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
523 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
524 add_phi_arg (phi_new, def, new_loop_entry_e);
527 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
528 if (TREE_CODE (def) != SSA_NAME)
531 new_name_ptr = SSA_NAME_AUX (def);
533 /* Something defined outside of the loop. */
536 /* An ordinary ssa name defined in the loop. */
537 new_ssa_name = *new_name_ptr;
538 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
540 /* step 3 (case 1). */
543 gcc_assert (new_loop_exit_e == orig_entry_e);
544 SET_PHI_ARG_DEF (phi_orig,
545 new_loop_exit_e->dest_idx,
552 /* Update PHI nodes for a guard of the LOOP.
555 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
556 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
557 originates from the guard-bb, skips LOOP and reaches the (unique) exit
558 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
559 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
560 LOOP header) before the guard code was added, and now it became a merge
561 point of two paths - the path that ends with the LOOP exit-edge, and
562 the path that ends with GUARD_EDGE.
564 This function creates and updates the relevant phi nodes to account for
565 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
566 1. Create phi nodes at NEW_MERGE_BB.
567 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
568 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
571 ===> The CFG before the guard-code was added:
573 if (exit_loop) goto update_bb : LOOP_header_bb
576 ==> The CFG after the guard-code was added:
578 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
580 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
585 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
586 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
587 organized in the same order.
588 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
591 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
592 "original" loop). FALSE if LOOP is an original loop (not a newly
593 created copy). The SSA_NAME_AUX fields of the defs in the original
594 loop are the corresponding new ssa-names used in the new duplicated
595 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
596 nodes in UPDATE_BB takes the original ssa-name, and which takes the
597 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
598 the LOOP-exit-edge takes the new-name, and the phi-arg that is
599 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
600 FALSE, it's the other way around.
604 slpeel_update_phi_nodes_for_guard (edge guard_edge,
609 tree orig_phi, new_phi, update_phi;
610 tree guard_arg, loop_arg;
611 basic_block new_merge_bb = guard_edge->dest;
612 edge e = EDGE_SUCC (new_merge_bb, 0);
613 basic_block update_bb = e->dest;
614 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
616 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
617 orig_phi && update_phi;
618 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
620 /* 1. Generate new phi node in NEW_MERGE_BB: */
621 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
624 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
625 of LOOP. Set the two phi args in NEW_PHI for these edges: */
628 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
629 EDGE_SUCC (loop->latch, 0));
630 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
634 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
635 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
639 new_name = *new_name_ptr;
641 /* Something defined outside of the loop */
646 guard_arg = orig_def;
651 guard_arg = new_name;
655 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
656 add_phi_arg (new_phi, guard_arg, guard_edge);
658 /* 3. Update phi in successor block. */
659 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
660 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
661 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
664 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
668 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
669 that starts at zero, increases by one and its limit is NITERS.
671 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
674 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
676 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
678 edge exit_edge = loop->exit_edges[0];
679 block_stmt_iterator loop_cond_bsi;
680 block_stmt_iterator incr_bsi;
682 tree begin_label = tree_block_label (loop->latch);
683 tree exit_label = tree_block_label (loop->single_exit->dest);
684 tree init = build_int_cst (TREE_TYPE (niters), 0);
685 tree step = build_int_cst (TREE_TYPE (niters), 1);
690 orig_cond = get_loop_exit_condition (loop);
691 #ifdef ENABLE_CHECKING
692 gcc_assert (orig_cond);
694 loop_cond_bsi = bsi_for_stmt (orig_cond);
696 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
697 create_iv (init, step, NULL_TREE, loop,
698 &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
700 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
702 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
703 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
704 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
706 else /* 'then' edge loops back. */
708 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
709 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
710 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
713 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
714 then_label, else_label);
715 bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
717 /* Remove old loop exit test: */
718 bsi_remove (&loop_cond_bsi);
720 loop_loc = find_loop_location (loop);
721 if (dump_file && (dump_flags & TDF_DETAILS))
723 if (loop_loc != UNKNOWN_LOC)
724 fprintf (dump_file, "\nloop at %s:%d: ",
725 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
726 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
729 loop->nb_iterations = niters;
733 /* Given LOOP this function generates a new copy of it and puts it
734 on E which is either the entry or exit of LOOP. */
737 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
740 struct loop *new_loop;
741 basic_block *new_bbs, *bbs;
744 basic_block exit_dest;
747 at_exit = (e == loop->exit_edges[0]);
748 if (!at_exit && e != loop_preheader_edge (loop))
751 bbs = get_loop_body (loop);
753 /* Check whether duplication is possible. */
754 if (!can_copy_bbs_p (bbs, loop->num_nodes))
760 /* Generate new loop structure. */
761 new_loop = duplicate_loop (loops, loop, loop->outer);
768 exit_dest = loop->exit_edges[0]->dest;
769 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
770 exit_dest) == loop->header ?
773 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
775 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
777 /* Duplicating phi args at exit bbs as coming
778 also from exit of duplicated loop. */
779 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
781 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
784 edge new_loop_exit_edge;
786 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
787 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
789 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
791 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
795 if (at_exit) /* Add the loop copy at exit. */
797 redirect_edge_and_branch_force (e, new_loop->header);
798 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
800 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
802 else /* Add the copy at entry. */
805 edge entry_e = loop_preheader_edge (loop);
806 basic_block preheader = entry_e->src;
808 if (!flow_bb_inside_loop_p (new_loop,
809 EDGE_SUCC (new_loop->header, 0)->dest))
810 new_exit_e = EDGE_SUCC (new_loop->header, 0);
812 new_exit_e = EDGE_SUCC (new_loop->header, 1);
814 redirect_edge_and_branch_force (new_exit_e, loop->header);
815 set_immediate_dominator (CDI_DOMINATORS, loop->header,
818 /* We have to add phi args to the loop->header here as coming
819 from new_exit_e edge. */
820 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
822 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
824 add_phi_arg (phi, phi_arg, new_exit_e);
827 redirect_edge_and_branch_force (entry_e, new_loop->header);
828 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
831 flow_loop_scan (new_loop, LOOP_ALL);
832 flow_loop_scan (loop, LOOP_ALL);
840 /* Given the condition statement COND, put it as the last statement
841 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
842 Assumes that this is the single exit of the guarded loop.
843 Returns the skip edge. */
846 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
849 block_stmt_iterator bsi;
851 tree cond_stmt, then_label, else_label;
853 enter_e = EDGE_SUCC (guard_bb, 0);
854 enter_e->flags &= ~EDGE_FALLTHRU;
855 enter_e->flags |= EDGE_FALSE_VALUE;
856 bsi = bsi_last (guard_bb);
858 then_label = build1 (GOTO_EXPR, void_type_node,
859 tree_block_label (exit_bb));
860 else_label = build1 (GOTO_EXPR, void_type_node,
861 tree_block_label (enter_e->dest));
862 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
863 then_label, else_label);
864 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
865 /* Add new edge to connect entry block to the second loop. */
866 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
867 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
872 /* This function verifies that the following restrictions apply to LOOP:
874 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
875 (3) it is single entry, single exit
876 (4) its exit condition is the last stmt in the header
877 (5) E is the entry/exit edge of LOOP.
881 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
883 edge exit_e = loop->exit_edges [0];
884 edge entry_e = loop_preheader_edge (loop);
885 tree orig_cond = get_loop_exit_condition (loop);
886 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
888 if (any_marked_for_rewrite_p ())
892 /* All loops have an outer scope; the only case loop->outer is NULL is for
893 the function itself. */
895 || loop->num_nodes != 2
896 || !empty_block_p (loop->latch)
897 || loop->num_exits != 1
898 || loop->num_entries != 1
899 /* Verify that new loop exit condition can be trivially modified. */
900 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
901 || (e != exit_e && e != entry_e))
907 #ifdef ENABLE_CHECKING
909 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
910 struct loop *second_loop)
912 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
913 basic_block loop2_entry_bb = second_loop->pre_header;
914 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
916 /* A guard that controls whether the second_loop is to be executed or skipped
917 is placed in first_loop->exit. first_loopt->exit therefore has two
918 successors - one is the preheader of second_loop, and the other is a bb
921 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
924 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
927 /* The preheader of new_loop is expected to have two predessors:
928 first_loop->exit and the block that precedes first_loop. */
930 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
931 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
932 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
933 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
934 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
936 /* Verify that the other successor of first_loopt->exit is after the
942 /* Function slpeel_tree_peel_loop_to_edge.
944 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
945 that is placed on the entry (exit) edge E of LOOP. After this transformation
946 we have two loops one after the other - first-loop iterates FIRST_NITERS
947 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
950 - LOOP: the loop to be peeled.
951 - E: the exit or entry edge of LOOP.
952 If it is the entry edge, we peel the first iterations of LOOP. In this
953 case first-loop is LOOP, and second-loop is the newly created loop.
954 If it is the exit edge, we peel the last iterations of LOOP. In this
955 case, first-loop is the newly created loop, and second-loop is LOOP.
956 - NITERS: the number of iterations that LOOP iterates.
957 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
958 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
959 for updating the loop bound of the first-loop to FIRST_NITERS. If it
960 is false, the caller of this function may want to take care of this
961 (this can be useful if we don't want new stmts added to first-loop).
964 The function returns a pointer to the new loop-copy, or NULL if it failed
965 to perform the transformation.
967 The function generates two if-then-else guards: one before the first loop,
968 and the other before the second loop:
970 if (FIRST_NITERS == 0) then skip the first loop,
971 and go directly to the second loop.
973 if (FIRST_NITERS == NITERS) then skip the second loop.
975 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
976 FORNOW the resulting code will not be in loop-closed-ssa form.
980 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
981 edge e, tree first_niters,
982 tree niters, bool update_first_loop_count)
984 struct loop *new_loop = NULL, *first_loop, *second_loop;
988 basic_block bb_before_second_loop, bb_after_second_loop;
989 basic_block bb_before_first_loop;
990 basic_block bb_between_loops;
991 edge exit_e = loop->exit_edges [0];
994 if (!slpeel_can_duplicate_loop_p (loop, e))
997 /* We have to initialize cfg_hooks. Then, when calling
998 cfg_hooks->split_edge, the function tree_split_edge
999 is actually called and, when calling cfg_hooks->duplicate_block,
1000 the function tree_duplicate_bb is called. */
1001 tree_register_cfg_hooks ();
1004 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1005 Resulting CFG would be:
1018 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1020 loop_loc = find_loop_location (loop);
1021 if (dump_file && (dump_flags & TDF_DETAILS))
1023 if (loop_loc != UNKNOWN_LOC)
1024 fprintf (dump_file, "\n%s:%d: note: ",
1025 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1026 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1033 /* NEW_LOOP was placed after LOOP. */
1035 second_loop = new_loop;
1039 /* NEW_LOOP was placed before LOOP. */
1040 first_loop = new_loop;
1044 definitions = marked_ssa_names ();
1045 allocate_new_names (definitions);
1046 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1047 rename_variables_in_loop (new_loop);
1050 /* 2. Add the guard that controls whether the first loop is executed.
1051 Resulting CFG would be:
1053 bb_before_first_loop:
1054 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1061 bb_before_second_loop:
1070 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1071 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1072 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1073 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1074 flow_loop_scan (first_loop, LOOP_ALL);
1075 flow_loop_scan (second_loop, LOOP_ALL);
1078 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1079 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1080 bb_before_second_loop, bb_before_first_loop);
1081 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1082 first_loop == new_loop);
1085 /* 3. Add the guard that controls whether the second loop is executed.
1086 Resulting CFG would be:
1088 bb_before_first_loop:
1089 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1097 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1098 GOTO bb_before_second_loop
1100 bb_before_second_loop:
1106 bb_after_second_loop:
1111 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1112 add_bb_to_loop (bb_between_loops, first_loop->outer);
1113 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1114 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1115 flow_loop_scan (first_loop, LOOP_ALL);
1116 flow_loop_scan (second_loop, LOOP_ALL);
1118 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1119 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1120 bb_after_second_loop, bb_before_first_loop);
1121 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1122 second_loop == new_loop);
1124 /* Flow loop scan does not update loop->single_exit field. */
1125 first_loop->single_exit = first_loop->exit_edges[0];
1126 second_loop->single_exit = second_loop->exit_edges[0];
1128 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1130 if (update_first_loop_count)
1131 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1133 free_new_names (definitions);
1134 BITMAP_XFREE (definitions);
1135 unmark_all_for_rewrite ();
1140 /* Function vect_get_loop_location.
1142 Extract the location of the loop in the source code.
1143 If the loop is not well formed for vectorization, an estimated
1144 location is calculated.
1145 Return the loop location if succeed and NULL if not. */
1148 find_loop_location (struct loop *loop)
1150 tree node = NULL_TREE;
1152 block_stmt_iterator si;
1157 node = get_loop_exit_condition (loop);
1159 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1160 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1161 return EXPR_LOC (node);
1163 /* If we got here the loop is probably not "well formed",
1164 try to estimate the loop location */
1171 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1173 node = bsi_stmt (si);
1174 if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1175 return EXPR_LOC (node);
1182 /*************************************************************************
1183 Vectorization Debug Information.
1184 *************************************************************************/
1186 /* Function vect_set_verbosity_level.
1188 Called from toplev.c upon detection of the
1189 -ftree-vectorizer-verbose=N option. */
1192 vect_set_verbosity_level (const char *val)
1197 if (vl < MAX_VERBOSITY_LEVEL)
1198 vect_verbosity_level = vl;
1200 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1204 /* Function vect_set_dump_settings.
1206 Fix the verbosity level of the vectorizer if the
1207 requested level was not set explicitly using the flag
1208 -ftree-vectorizer-verbose=N.
1209 Decide where to print the debugging information (dump_file/stderr).
1210 If the user defined the verbosity level, but there is no dump file,
1211 print to stderr, otherwise print to the dump file. */
1214 vect_set_dump_settings (void)
1216 vect_dump = dump_file;
1218 /* Check if the verbosity level was defined by the user: */
1219 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1221 /* If there is no dump file, print to stderr. */
1227 /* User didn't specify verbosity level: */
1228 if (dump_file && (dump_flags & TDF_DETAILS))
1229 vect_verbosity_level = REPORT_DETAILS;
1230 else if (dump_file && (dump_flags & TDF_STATS))
1231 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1233 vect_verbosity_level = REPORT_NONE;
1235 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1239 /* Function debug_loop_details.
1241 For vectorization debug dumps. */
1244 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1246 if (vl > vect_verbosity_level)
1249 if (loc == UNKNOWN_LOC)
1250 fprintf (vect_dump, "\n%s:%d: note: ",
1251 DECL_SOURCE_FILE (current_function_decl),
1252 DECL_SOURCE_LINE (current_function_decl));
1254 fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1262 /* Here the proper Vectorizer starts. */
1264 /*************************************************************************
1265 Vectorization Utilities.
1266 *************************************************************************/
1268 /* Function new_stmt_vec_info.
1270 Create and initialize a new stmt_vec_info struct for STMT. */
1273 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1276 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1278 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1279 STMT_VINFO_STMT (res) = stmt;
1280 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1281 STMT_VINFO_RELEVANT_P (res) = 0;
1282 STMT_VINFO_VECTYPE (res) = NULL;
1283 STMT_VINFO_VEC_STMT (res) = NULL;
1284 STMT_VINFO_DATA_REF (res) = NULL;
1285 STMT_VINFO_MEMTAG (res) = NULL;
1286 STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1287 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1288 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1289 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1290 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1296 /* Function new_loop_vec_info.
1298 Create and initialize a new loop_vec_info struct for LOOP, as well as
1299 stmt_vec_info structs for all the stmts in LOOP. */
1302 new_loop_vec_info (struct loop *loop)
1306 block_stmt_iterator si;
1309 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1311 bbs = get_loop_body (loop);
1313 /* Create stmt_info for all stmts in the loop. */
1314 for (i = 0; i < loop->num_nodes; i++)
1316 basic_block bb = bbs[i];
1317 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1319 tree stmt = bsi_stmt (si);
1322 get_stmt_operands (stmt);
1323 ann = stmt_ann (stmt);
1324 set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1328 LOOP_VINFO_LOOP (res) = loop;
1329 LOOP_VINFO_BBS (res) = bbs;
1330 LOOP_VINFO_EXIT_COND (res) = NULL;
1331 LOOP_VINFO_NITERS (res) = NULL;
1332 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1333 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1334 LOOP_VINFO_VECT_FACTOR (res) = 0;
1335 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1336 "loop_write_datarefs");
1337 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1338 "loop_read_datarefs");
1339 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1340 LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1346 /* Function destroy_loop_vec_info.
1348 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1349 stmts in the loop. */
1352 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1357 block_stmt_iterator si;
1363 loop = LOOP_VINFO_LOOP (loop_vinfo);
1365 bbs = LOOP_VINFO_BBS (loop_vinfo);
1366 nbbs = loop->num_nodes;
1368 for (j = 0; j < nbbs; j++)
1370 basic_block bb = bbs[j];
1371 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1373 tree stmt = bsi_stmt (si);
1374 stmt_ann_t ann = stmt_ann (stmt);
1375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1377 set_stmt_info (ann, NULL);
1381 free (LOOP_VINFO_BBS (loop_vinfo));
1382 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1383 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1389 /* Function vect_get_ptr_offset
1391 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1394 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1395 tree vectype ATTRIBUTE_UNUSED,
1396 tree *offset ATTRIBUTE_UNUSED)
1398 /* TODO: Use alignment information. */
1403 /* Function vect_strip_conversions
1405 Strip conversions that don't narrow the mode. */
1408 vect_strip_conversion (tree expr)
1410 tree to, ti, oprnd0;
1412 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1414 to = TREE_TYPE (expr);
1415 oprnd0 = TREE_OPERAND (expr, 0);
1416 ti = TREE_TYPE (oprnd0);
1418 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1420 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1429 /* Function vect_analyze_offset_expr
1431 Given an offset expression EXPR received from get_inner_reference, analyze
1432 it and create an expression for INITIAL_OFFSET by substituting the variables
1433 of EXPR with initial_condition of the corresponding access_fn in the loop.
1436 for (j = 3; j < N; j++)
1439 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1440 substituted, since its access_fn in the inner loop is i. 'j' will be
1441 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1444 Compute MISALIGN (the misalignment of the data reference initial access from
1445 its base) if possible. Misalignment can be calculated only if all the
1446 variables can be substituted with constants, or if a variable is multiplied
1447 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1448 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1449 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1450 VECTYPE_ALIGNMENT computation in the caller of this function).
1452 STEP is an evolution of the data reference in this loop in bytes.
1453 In the above example, STEP is C_j.
1455 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1456 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1457 are NULL_TREEs. Otherwise, return TRUE.
1462 vect_analyze_offset_expr (tree expr,
1464 tree vectype_alignment,
1465 tree *initial_offset,
1471 tree left_offset = ssize_int (0);
1472 tree right_offset = ssize_int (0);
1473 tree left_misalign = ssize_int (0);
1474 tree right_misalign = ssize_int (0);
1475 tree left_step = ssize_int (0);
1476 tree right_step = ssize_int (0);
1477 enum tree_code code;
1478 tree init, evolution;
1481 *misalign = NULL_TREE;
1482 *initial_offset = NULL_TREE;
1484 /* Strip conversions that don't narrow the mode. */
1485 expr = vect_strip_conversion (expr);
1491 if (TREE_CODE (expr) == INTEGER_CST)
1493 *initial_offset = fold_convert (ssizetype, expr);
1494 *misalign = fold_convert (ssizetype, expr);
1495 *step = ssize_int (0);
1499 /* 2. Variable. Try to substitute with initial_condition of the corresponding
1500 access_fn in the current loop. */
1501 if (SSA_VAR_P (expr))
1503 tree access_fn = analyze_scalar_evolution (loop, expr);
1505 if (access_fn == chrec_dont_know)
1509 init = initial_condition_in_loop_num (access_fn, loop->num);
1510 if (init == expr && !expr_invariant_in_loop_p (loop, init))
1511 /* Not enough information: may be not loop invariant.
1512 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1513 initial_condition is D, but it depends on i - loop's induction
1517 evolution = evolution_part_in_loop_num (access_fn, loop->num);
1518 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1519 /* Evolution is not constant. */
1522 if (TREE_CODE (init) == INTEGER_CST)
1523 *misalign = fold_convert (ssizetype, init);
1525 /* Not constant, misalignment cannot be calculated. */
1526 *misalign = NULL_TREE;
1528 *initial_offset = fold_convert (ssizetype, init);
1530 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
1534 /* Recursive computation. */
1535 if (!BINARY_CLASS_P (expr))
1537 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
1538 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1540 fprintf (vect_dump, "Not binary expression ");
1541 print_generic_expr (vect_dump, expr, TDF_SLIM);
1545 oprnd0 = TREE_OPERAND (expr, 0);
1546 oprnd1 = TREE_OPERAND (expr, 1);
1548 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1549 &left_misalign, &left_step)
1550 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1551 &right_offset, &right_misalign, &right_step))
1554 /* The type of the operation: plus, minus or mult. */
1555 code = TREE_CODE (expr);
1559 if (TREE_CODE (right_offset) != INTEGER_CST)
1560 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1562 FORNOW: We don't support such cases. */
1565 /* Strip conversions that don't narrow the mode. */
1566 left_offset = vect_strip_conversion (left_offset);
1569 /* Misalignment computation. */
1570 if (SSA_VAR_P (left_offset))
1572 /* If the left side contains variables that can't be substituted with
1573 constants, we check if the right side is a multiple of ALIGNMENT.
1575 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1576 fold_convert (ssizetype, vectype_alignment))))
1577 *misalign = ssize_int (0);
1579 /* If the remainder is not zero or the right side isn't constant,
1580 we can't compute misalignment. */
1581 *misalign = NULL_TREE;
1585 /* The left operand was successfully substituted with constant. */
1587 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1589 *misalign = size_binop (code, left_misalign, right_misalign);
1591 *misalign = NULL_TREE;
1594 /* Step calculation. */
1595 /* Multiply the step by the right operand. */
1596 *step = size_binop (MULT_EXPR, left_step, right_offset);
1601 /* Combine the recursive calculations for step and misalignment. */
1602 *step = size_binop (code, left_step, right_step);
1604 if (left_misalign && right_misalign)
1605 *misalign = size_binop (code, left_misalign, right_misalign);
1607 *misalign = NULL_TREE;
1615 /* Compute offset. */
1616 *initial_offset = fold_convert (ssizetype,
1617 fold (build2 (code, TREE_TYPE (left_offset),
1624 /* Function vect_get_base_and_offset
1626 Return the BASE of the data reference EXPR.
1627 If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1629 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1630 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1631 instantiated with initial_conditions of access_functions of variables,
1632 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1634 Function get_inner_reference is used for the above in case of ARRAY_REF and
1638 EXPR - the memory reference that is being analyzed
1639 DR - the data_reference struct of the _original_ memory reference
1640 (Note: DR_REF (DR) is not necessarily EXPR)
1641 VECTYPE - the type that defines the alignment (i.e, we compute
1642 alignment relative to TYPE_ALIGN(VECTYPE))
1645 BASE (returned value) - the base of the data reference EXPR.
1646 E.g, if EXPR is a.b[k].c[i][j] the returned
1648 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1649 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1650 computation is impossible
1651 STEP - evolution of the DR_REF in the loop
1652 BASE_ALIGNED_P - indicates if BASE is aligned
1654 If something unexpected is encountered (an unsupported form of data-ref),
1655 then NULL_TREE is returned. */
1658 vect_get_base_and_offset (struct data_reference *dr,
1661 loop_vec_info loop_vinfo,
1662 tree *initial_offset,
1665 bool *base_aligned_p)
1667 tree this_offset = ssize_int (0);
1668 tree this_misalign = ssize_int (0);
1669 tree this_step = ssize_int (0);
1670 tree base = NULL_TREE;
1672 tree oprnd0, oprnd1;
1673 enum tree_code code = TREE_CODE (expr);
1674 HOST_WIDE_INT pbitsize;
1675 HOST_WIDE_INT pbitpos;
1677 enum machine_mode pmode;
1678 int punsignedp, pvolatilep;
1679 tree bit_pos_in_bytes;
1680 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1682 *base_aligned_p = false;
1686 /* These cases end the recursion: */
1689 *initial_offset = ssize_int (0);
1690 *step = ssize_int (0);
1691 *misalign = ssize_int (0);
1692 if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1693 *base_aligned_p = true;
1694 return build_fold_addr_expr (expr);
1697 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1700 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1702 base = vect_get_ptr_offset (expr, vectype, misalign);
1704 *base_aligned_p = true;
1708 *base_aligned_p = true;
1709 *misalign = ssize_int (0);
1711 *initial_offset = ssize_int (0);
1712 *step = ssize_int (0);
1716 *initial_offset = fold_convert (ssizetype, expr);
1717 *misalign = fold_convert (ssizetype, expr);
1718 *step = ssize_int (0);
1721 /* These cases continue the recursion: */
1723 oprnd0 = TREE_OPERAND (expr, 0);
1728 oprnd0 = TREE_OPERAND (expr, 0);
1734 oprnd0 = TREE_OPERAND (expr, 0);
1735 oprnd1 = TREE_OPERAND (expr, 1);
1737 /* In case we have a PLUS_EXPR of the form
1738 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1739 This is verified in vect_get_memtag_and_dr. */
1740 base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1741 &this_offset, &this_misalign,
1742 &this_step, base_aligned_p);
1743 /* Offset was already computed in vect_analyze_pointer_ref_access. */
1744 this_offset = ssize_int (0);
1747 this_misalign = NULL_TREE;
1749 this_misalign = size_binop (TREE_CODE (expr), ssize_int (0),
1755 if (!handled_component_p (expr))
1756 /* Unsupported expression. */
1759 /* Find the base and the offset from it. */
1760 next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1761 &pmode, &punsignedp, &pvolatilep, false);
1766 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1767 &this_offset, &this_misalign,
1770 /* Failed to compute offset or step. */
1772 *initial_offset = NULL_TREE;
1773 *misalign = NULL_TREE;
1777 /* Add bit position to OFFSET and MISALIGN. */
1779 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1780 /* Check that there is no remainder in bits. */
1781 if (pbitpos%BITS_PER_UNIT)
1783 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1784 fprintf (vect_dump, "bit offset alignment.");
1787 this_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes,
1788 fold_convert (ssizetype, this_offset));
1790 this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1792 /* Continue the recursion to refine the base (get_inner_reference returns
1793 &a for &a[i], and not a). */
1797 base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1798 initial_offset, misalign, step,
1802 /* Combine the results. */
1803 if (this_misalign && *misalign)
1804 *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1806 *misalign = NULL_TREE;
1808 *step = size_binop (PLUS_EXPR, *step, this_step);
1810 *initial_offset = size_binop (PLUS_EXPR, *initial_offset, this_offset);
1812 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1814 print_generic_expr (vect_dump, expr, TDF_SLIM);
1815 fprintf (vect_dump, "\n --> total offset for ref: ");
1816 print_generic_expr (vect_dump, *initial_offset, TDF_SLIM);
1817 fprintf (vect_dump, "\n --> total misalign for ref: ");
1818 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1819 fprintf (vect_dump, "\n --> total step for ref: ");
1820 print_generic_expr (vect_dump, *step, TDF_SLIM);
1827 /* Function vect_force_dr_alignment_p.
1829 Returns whether the alignment of a DECL can be forced to be aligned
1830 on ALIGNMENT bit boundary. */
1833 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1835 if (TREE_CODE (decl) != VAR_DECL)
1838 if (DECL_EXTERNAL (decl))
1841 if (TREE_ASM_WRITTEN (decl))
1844 if (TREE_STATIC (decl))
1845 return (alignment <= MAX_OFILE_ALIGNMENT);
1847 /* This is not 100% correct. The absolute correct stack alignment
1848 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1849 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1850 However, until someone implements forced stack alignment, SSE
1851 isn't really usable without this. */
1852 return (alignment <= PREFERRED_STACK_BOUNDARY);
1856 /* Function vect_get_new_vect_var.
1858 Returns a name for a new variable. The current naming scheme appends the
1859 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1860 the name of vectorizer generated variables, and appends that to NAME if
1864 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1870 if (var_kind == vect_simple_var)
1875 prefix_len = strlen (prefix);
1878 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1880 new_vect_var = create_tmp_var (type, prefix);
1882 return new_vect_var;
1886 /* Function vect_create_index_for_vector_ref.
1888 Create (and return) an index variable, along with it's update chain in the
1889 loop. This variable will be used to access a memory location in a vector
1893 LOOP: The loop being vectorized.
1894 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1895 function can be added here, or in the loop pre-header.
1898 Return an index that will be used to index a vector array. It is expected
1899 that a pointer to the first vector will be used as the base address for the
1902 FORNOW: we are not trying to be efficient, just creating a new index each
1903 time from scratch. At this time all vector references could use the same
1906 TODO: create only one index to be used by all vector references. Record
1907 the index in the LOOP_VINFO the first time this procedure is called and
1908 return it on subsequent calls. The increment of this index must be placed
1909 just before the conditional expression that ends the single block loop. */
1912 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
1915 block_stmt_iterator incr_bsi;
1917 tree indx_before_incr, indx_after_incr;
1918 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1921 /* It is assumed that the base pointer used for vectorized access contains
1922 the address of the first vector. Therefore the index used for vectorized
1923 access must be initialized to zero and incremented by 1. */
1925 init = integer_zero_node;
1926 step = integer_one_node;
1928 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
1929 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
1930 &indx_before_incr, &indx_after_incr);
1931 incr = bsi_stmt (incr_bsi);
1932 get_stmt_operands (incr);
1933 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
1935 return indx_before_incr;
1939 /* Function vect_create_addr_base_for_vector_ref.
1941 Create an expression that computes the address of the first memory location
1942 that will be accessed for a data reference.
1945 STMT: The statement containing the data reference.
1946 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1947 OFFSET: Optional. If supplied, it is be added to the initial address.
1950 1. Return an SSA_NAME whose value is the address of the memory location of
1951 the first vector of the data reference.
1952 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1953 these statement(s) which define the returned SSA_NAME.
1955 FORNOW: We are only handling array accesses with step 1. */
1958 vect_create_addr_base_for_vector_ref (tree stmt,
1959 tree *new_stmt_list,
1962 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1963 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1964 tree data_ref_base =
1965 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
1966 tree base_name = build_fold_indirect_ref (data_ref_base);
1967 tree ref = DR_REF (dr);
1968 tree scalar_type = TREE_TYPE (ref);
1969 tree scalar_ptr_type = build_pointer_type (scalar_type);
1972 tree addr_base, addr_expr;
1973 tree dest, new_stmt;
1974 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1976 /* Create base_offset */
1977 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
1978 add_referenced_tmp_var (dest);
1979 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
1980 append_to_statement_list_force (new_stmt, new_stmt_list);
1984 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
1985 add_referenced_tmp_var (tmp);
1986 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
1987 STMT_VINFO_VECT_STEP (stmt_info)));
1988 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
1989 base_offset, offset));
1990 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
1991 append_to_statement_list_force (new_stmt, new_stmt_list);
1994 /* base + base_offset */
1995 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
1998 /* addr_expr = addr_base */
1999 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
2000 get_name (base_name));
2001 add_referenced_tmp_var (addr_expr);
2002 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
2003 new_temp = make_ssa_name (addr_expr, vec_stmt);
2004 TREE_OPERAND (vec_stmt, 0) = new_temp;
2005 append_to_statement_list_force (vec_stmt, new_stmt_list);
2007 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2009 fprintf (vect_dump, "created ");
2010 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2016 /* Function get_vectype_for_scalar_type.
2018 Returns the vector type corresponding to SCALAR_TYPE as supported
2022 get_vectype_for_scalar_type (tree scalar_type)
2024 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
2025 int nbytes = GET_MODE_SIZE (inner_mode);
2032 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
2034 nunits = UNITS_PER_SIMD_WORD / nbytes;
2036 vectype = build_vector_type (scalar_type, nunits);
2037 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2039 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
2040 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2046 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2048 fprintf (vect_dump, "vectype: ");
2049 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2052 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2054 /* TODO: tree-complex.c sometimes can parallelize operations
2055 on generic vectors. We can vectorize the loop in that case,
2056 but then we should re-run the lowering pass. */
2057 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2058 fprintf (vect_dump, "mode not supported by target.");
2066 /* Function vect_align_data_ref.
2068 Handle mislignment of a memory accesses.
2070 FORNOW: Can't handle misaligned accesses.
2071 Make sure that the dataref is aligned. */
2074 vect_align_data_ref (tree stmt)
2076 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2077 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2079 /* FORNOW: can't handle misaligned accesses;
2080 all accesses expected to be aligned. */
2081 gcc_assert (aligned_access_p (dr));
2085 /* Function vect_create_data_ref_ptr.
2087 Create a memory reference expression for vector access, to be used in a
2088 vector load/store stmt. The reference is based on a new pointer to vector
2092 1. STMT: a stmt that references memory. Expected to be of the form
2093 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2094 2. BSI: block_stmt_iterator where new stmts can be added.
2095 3. OFFSET (optional): an offset to be added to the initial address accessed
2096 by the data-ref in STMT.
2097 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2098 pointing to the initial address.
2101 1. Declare a new ptr to vector_type, and have it point to the base of the
2102 data reference (initial addressed accessed by the data reference).
2103 For example, for vector of type V8HI, the following code is generated:
2106 vp = (v8hi *)initial_address;
2108 if OFFSET is not supplied:
2109 initial_address = &a[init];
2110 if OFFSET is supplied:
2111 initial_address = &a[init + OFFSET];
2113 Return the initial_address in INITIAL_ADDRESS.
2115 2. Create a data-reference in the loop based on the new vector pointer vp,
2116 and using a new index variable 'idx' as follows:
2120 where if ONLY_INIT is true:
2123 update = idx + vector_type_size
2125 Return the pointer vp'.
2128 FORNOW: handle only aligned and consecutive accesses. */
2131 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2132 tree *initial_address, bool only_init)
2135 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2136 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2137 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2138 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2142 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2143 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2144 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2145 int nvuses, nv_may_defs, nv_must_defs;
2149 tree new_stmt_list = NULL_TREE;
2151 edge pe = loop_preheader_edge (loop);
2157 tree type, tmp, size;
2159 base_name = build_fold_indirect_ref (unshare_expr (
2160 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)));
2162 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2164 tree data_ref_base = base_name;
2165 fprintf (vect_dump, "create array_ref of type: ");
2166 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2167 if (TREE_CODE (data_ref_base) == VAR_DECL)
2168 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
2169 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2170 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
2171 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2172 fprintf (vect_dump, " vectorizing a record based array ref: ");
2173 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2174 fprintf (vect_dump, " vectorizing a pointer ref: ");
2175 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2178 /** (1) Create the new vector-pointer variable: **/
2180 vect_ptr_type = build_pointer_type (vectype);
2181 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2182 get_name (base_name));
2183 add_referenced_tmp_var (vect_ptr);
2186 /** (2) Handle aliasing information of the new vector-pointer: **/
2188 tag = STMT_VINFO_MEMTAG (stmt_info);
2190 get_var_ann (vect_ptr)->type_mem_tag = tag;
2192 /* Mark for renaming all aliased variables
2193 (i.e, the may-aliases of the type-mem-tag). */
2194 nvuses = NUM_VUSES (vuses);
2195 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2196 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2197 for (i = 0; i < nvuses; i++)
2199 tree use = VUSE_OP (vuses, i);
2200 if (TREE_CODE (use) == SSA_NAME)
2201 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2203 for (i = 0; i < nv_may_defs; i++)
2205 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2206 if (TREE_CODE (def) == SSA_NAME)
2207 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2209 for (i = 0; i < nv_must_defs; i++)
2211 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2212 if (TREE_CODE (def) == SSA_NAME)
2213 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2217 /** (3) Calculate the initial address the vector-pointer, and set
2218 the vector-pointer to point to it before the loop: **/
2220 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2221 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2223 pe = loop_preheader_edge (loop);
2224 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2225 gcc_assert (!new_bb);
2226 *initial_address = new_temp;
2228 /* Create: p = (vectype *) initial_base */
2229 vec_stmt = fold_convert (vect_ptr_type, new_temp);
2230 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2231 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2232 TREE_OPERAND (vec_stmt, 0) = new_temp;
2233 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2234 gcc_assert (!new_bb);
2235 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2238 /** (4) Handle the updating of the vector-pointer inside the loop: **/
2240 if (only_init) /* No update in loop is required. */
2241 return vect_ptr_init;
2243 idx = vect_create_index_for_vector_ref (loop_vinfo);
2245 /* Create: update = idx * vectype_size */
2246 tmp = create_tmp_var (integer_type_node, "update");
2247 add_referenced_tmp_var (tmp);
2248 size = TYPE_SIZE (vect_ptr_type);
2249 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2250 ptr_update = create_tmp_var (type, "update");
2251 add_referenced_tmp_var (ptr_update);
2252 vectype_size = TYPE_SIZE_UNIT (vectype);
2253 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2254 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2255 new_temp = make_ssa_name (tmp, vec_stmt);
2256 TREE_OPERAND (vec_stmt, 0) = new_temp;
2257 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2258 vec_stmt = fold_convert (type, new_temp);
2259 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2260 new_temp = make_ssa_name (ptr_update, vec_stmt);
2261 TREE_OPERAND (vec_stmt, 0) = new_temp;
2262 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2264 /* Create: data_ref_ptr = vect_ptr_init + update */
2265 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2266 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2267 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2268 TREE_OPERAND (vec_stmt, 0) = new_temp;
2269 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2270 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2272 return data_ref_ptr;
2276 /* Function vect_create_destination_var.
2278 Create a new temporary of type VECTYPE. */
2281 vect_create_destination_var (tree scalar_dest, tree vectype)
2284 const char *new_name;
2286 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2288 new_name = get_name (scalar_dest);
2291 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2292 add_referenced_tmp_var (vec_dest);
2298 /* Function vect_init_vector.
2300 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2301 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2302 used in the vectorization of STMT. */
2305 vect_init_vector (tree stmt, tree vector_var)
2307 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2308 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2309 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2312 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2318 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2319 add_referenced_tmp_var (new_var);
2321 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2322 new_temp = make_ssa_name (new_var, init_stmt);
2323 TREE_OPERAND (init_stmt, 0) = new_temp;
2325 pe = loop_preheader_edge (loop);
2326 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2327 gcc_assert (!new_bb);
2329 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2331 fprintf (vect_dump, "created new init_stmt: ");
2332 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
2335 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2340 /* Function vect_get_vec_def_for_operand.
2342 OP is an operand in STMT. This function returns a (vector) def that will be
2343 used in the vectorized stmt for STMT.
2345 In the case that OP is an SSA_NAME which is defined in the loop, then
2346 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2348 In case OP is an invariant or constant, a new stmt that creates a vector def
2349 needs to be introduced. */
2352 vect_get_vec_def_for_operand (tree op, tree stmt)
2357 stmt_vec_info def_stmt_info = NULL;
2358 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2359 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2360 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2361 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2362 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2369 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2371 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
2372 print_generic_expr (vect_dump, op, TDF_SLIM);
2375 /** ===> Case 1: operand is a constant. **/
2377 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2379 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2383 /* Build a tree with vector elements. */
2384 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2385 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
2387 for (i = nunits - 1; i >= 0; --i)
2389 t = tree_cons (NULL_TREE, op, t);
2391 vec_cst = build_vector (vectype, t);
2392 return vect_init_vector (stmt, vec_cst);
2395 gcc_assert (TREE_CODE (op) == SSA_NAME);
2397 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2399 def_stmt = SSA_NAME_DEF_STMT (op);
2400 def_stmt_info = vinfo_for_stmt (def_stmt);
2402 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2404 fprintf (vect_dump, "vect_get_vec_def_for_operand: def_stmt: ");
2405 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2409 /** ==> Case 2.1: operand is defined inside the loop. **/
2413 /* Get the def from the vectorized stmt. */
2415 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2416 gcc_assert (vec_stmt);
2417 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2422 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2423 it is a reduction/induction. **/
2425 bb = bb_for_stmt (def_stmt);
2426 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2428 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2429 fprintf (vect_dump, "reduction/induction - unsupported.");
2430 internal_error ("no support for reduction/induction"); /* FORNOW */
2434 /** ==> Case 2.3: operand is defined outside the loop -
2435 it is a loop invariant. */
2437 switch (TREE_CODE (def_stmt))
2440 def = PHI_RESULT (def_stmt);
2443 def = TREE_OPERAND (def_stmt, 0);
2446 def = TREE_OPERAND (def_stmt, 0);
2447 gcc_assert (IS_EMPTY_STMT (def_stmt));
2451 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2453 fprintf (vect_dump, "unsupported defining stmt: ");
2454 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2456 internal_error ("unsupported defining stmt");
2459 /* Build a tree with vector elements.
2460 Create 'vec_inv = {inv,inv,..,inv}' */
2462 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2463 fprintf (vect_dump, "Create vector_inv.");
2465 for (i = nunits - 1; i >= 0; --i)
2467 t = tree_cons (NULL_TREE, def, t);
2470 vec_inv = build_constructor (vectype, t);
2471 return vect_init_vector (stmt, vec_inv);
2475 /* Function vect_finish_stmt_generation.
2477 Insert a new stmt. */
2480 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2482 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2484 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2486 fprintf (vect_dump, "add new stmt: ");
2487 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2490 #ifdef ENABLE_CHECKING
2491 /* Make sure bsi points to the stmt that is being vectorized. */
2492 gcc_assert (stmt == bsi_stmt (*bsi));
2495 #ifdef USE_MAPPED_LOCATION
2496 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCUS (stmt));
2498 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
2503 /* Function vectorizable_assignment.
2505 Check if STMT performs an assignment (copy) that can be vectorized.
2506 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2507 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2508 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2511 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2517 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2518 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2519 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2522 /* Is vectorizable assignment? */
2524 if (TREE_CODE (stmt) != MODIFY_EXPR)
2527 scalar_dest = TREE_OPERAND (stmt, 0);
2528 if (TREE_CODE (scalar_dest) != SSA_NAME)
2531 op = TREE_OPERAND (stmt, 1);
2532 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2534 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2535 fprintf (vect_dump, "use not simple.");
2539 if (!vec_stmt) /* transformation not required. */
2541 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2546 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2547 fprintf (vect_dump, "transform assignment.");
2550 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2553 op = TREE_OPERAND (stmt, 1);
2554 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2556 /* Arguments are ready. create the new vector stmt. */
2557 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2558 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2559 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2560 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2566 /* Function vectorizable_operation.
2568 Check if STMT performs a binary or unary operation that can be vectorized.
2569 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2570 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2571 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2574 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2579 tree op0, op1 = NULL;
2580 tree vec_oprnd0, vec_oprnd1=NULL;
2581 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2582 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2583 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2585 enum tree_code code;
2586 enum machine_mode vec_mode;
2592 /* Is STMT a vectorizable binary/unary operation? */
2593 if (TREE_CODE (stmt) != MODIFY_EXPR)
2596 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2599 operation = TREE_OPERAND (stmt, 1);
2600 code = TREE_CODE (operation);
2601 optab = optab_for_tree_code (code, vectype);
2603 /* Support only unary or binary operations. */
2604 op_type = TREE_CODE_LENGTH (code);
2605 if (op_type != unary_op && op_type != binary_op)
2607 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2608 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
2612 for (i = 0; i < op_type; i++)
2614 op = TREE_OPERAND (operation, i);
2615 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2617 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2618 fprintf (vect_dump, "use not simple.");
2623 /* Supportable by target? */
2626 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2627 fprintf (vect_dump, "no optab.");
2630 vec_mode = TYPE_MODE (vectype);
2631 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2633 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2634 fprintf (vect_dump, "op not supported by target.");
2638 if (!vec_stmt) /* transformation not required. */
2640 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2646 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2647 fprintf (vect_dump, "transform binary/unary operation.");
2650 scalar_dest = TREE_OPERAND (stmt, 0);
2651 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2654 op0 = TREE_OPERAND (operation, 0);
2655 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2657 if (op_type == binary_op)
2659 op1 = TREE_OPERAND (operation, 1);
2660 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2663 /* Arguments are ready. create the new vector stmt. */
2665 if (op_type == binary_op)
2666 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2667 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2669 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2670 build1 (code, vectype, vec_oprnd0));
2671 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2672 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2673 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2679 /* Function vectorizable_store.
2681 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2683 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2684 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2685 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2688 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2694 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2695 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2696 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2698 enum machine_mode vec_mode;
2700 enum dr_alignment_support alignment_support_cheme;
2702 /* Is vectorizable store? */
2704 if (TREE_CODE (stmt) != MODIFY_EXPR)
2707 scalar_dest = TREE_OPERAND (stmt, 0);
2708 if (TREE_CODE (scalar_dest) != ARRAY_REF
2709 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2712 op = TREE_OPERAND (stmt, 1);
2713 if (!vect_is_simple_use (op, loop_vinfo, NULL))
2715 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2716 fprintf (vect_dump, "use not simple.");
2720 vec_mode = TYPE_MODE (vectype);
2721 /* FORNOW. In some cases can vectorize even if data-type not supported
2722 (e.g. - array initialization with 0). */
2723 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2726 if (!STMT_VINFO_DATA_REF (stmt_info))
2730 if (!vec_stmt) /* transformation not required. */
2732 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2738 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2739 fprintf (vect_dump, "transform store");
2741 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2742 gcc_assert (alignment_support_cheme);
2743 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2745 /* Handle use - get the vectorized def from the defining stmt. */
2746 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2749 /* FORNOW: make sure the data reference is aligned. */
2750 vect_align_data_ref (stmt);
2751 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2752 data_ref = build_fold_indirect_ref (data_ref);
2754 /* Arguments are ready. create the new vector stmt. */
2755 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2756 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2762 /* vectorizable_load.
2764 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2766 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2767 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2768 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2771 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2774 tree vec_dest = NULL;
2775 tree data_ref = NULL;
2777 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2778 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2779 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2786 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2787 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2788 edge pe = loop_preheader_edge (loop);
2789 enum dr_alignment_support alignment_support_cheme;
2791 /* Is vectorizable load? */
2793 if (TREE_CODE (stmt) != MODIFY_EXPR)
2796 scalar_dest = TREE_OPERAND (stmt, 0);
2797 if (TREE_CODE (scalar_dest) != SSA_NAME)
2800 op = TREE_OPERAND (stmt, 1);
2801 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2804 if (!STMT_VINFO_DATA_REF (stmt_info))
2807 mode = (int) TYPE_MODE (vectype);
2809 /* FORNOW. In some cases can vectorize even if data-type not supported
2810 (e.g. - data copies). */
2811 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2813 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2814 fprintf (vect_dump, "Aligned load, but unsupported type.");
2818 if (!vec_stmt) /* transformation not required. */
2820 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2826 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2827 fprintf (vect_dump, "transform load.");
2829 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2830 gcc_assert (alignment_support_cheme);
2832 if (alignment_support_cheme == dr_aligned
2833 || alignment_support_cheme == dr_unaligned_supported)
2844 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2845 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2846 if (aligned_access_p (dr))
2847 data_ref = build_fold_indirect_ref (data_ref);
2850 int mis = DR_MISALIGNMENT (dr);
2851 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2852 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2853 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2855 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2856 new_temp = make_ssa_name (vec_dest, new_stmt);
2857 TREE_OPERAND (new_stmt, 0) = new_temp;
2858 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2860 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2864 msq_init = *(floor(p1))
2865 p2 = initial_addr + VS - 1;
2866 magic = have_builtin ? builtin_result : initial_address;
2869 p2' = p2 + indx * vectype_size
2871 vec_dest = realign_load (msq, lsq, magic)
2885 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2886 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2887 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2889 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2890 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2891 new_temp = make_ssa_name (vec_dest, new_stmt);
2892 TREE_OPERAND (new_stmt, 0) = new_temp;
2893 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2894 gcc_assert (!new_bb);
2895 msq_init = TREE_OPERAND (new_stmt, 0);
2898 /* <2> Create lsq = *(floor(p2')) in the loop */
2899 offset = build_int_cst (integer_type_node,
2900 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2901 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2902 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2903 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2904 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2905 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2906 new_temp = make_ssa_name (vec_dest, new_stmt);
2907 TREE_OPERAND (new_stmt, 0) = new_temp;
2908 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2909 lsq = TREE_OPERAND (new_stmt, 0);
2913 if (targetm.vectorize.builtin_mask_for_load)
2915 /* Create permutation mask, if required, in loop preheader. */
2917 params = build_tree_list (NULL_TREE, init_addr);
2918 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2919 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2920 new_stmt = build_function_call_expr (builtin_decl, params);
2921 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2922 new_temp = make_ssa_name (vec_dest, new_stmt);
2923 TREE_OPERAND (new_stmt, 0) = new_temp;
2924 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2925 gcc_assert (!new_bb);
2926 magic = TREE_OPERAND (new_stmt, 0);
2928 /* Since we have just created a CALL_EXPR, we may need to
2929 rename call-clobbered variables. */
2930 mark_call_clobbered_vars_to_rename ();
2934 /* Use current address instead of init_addr for reduced reg pressure.
2936 magic = dataref_ptr;
2940 /* <4> Create msq = phi <msq_init, lsq> in loop */
2941 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2942 msq = make_ssa_name (vec_dest, NULL_TREE);
2943 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2944 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2945 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2946 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2949 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2950 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2951 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2952 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2953 new_temp = make_ssa_name (vec_dest, new_stmt);
2954 TREE_OPERAND (new_stmt, 0) = new_temp;
2955 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2960 *vec_stmt = new_stmt;
2965 /* Function vect_supportable_dr_alignment
2967 Return whether the data reference DR is supported with respect to its
2970 static enum dr_alignment_support
2971 vect_supportable_dr_alignment (struct data_reference *dr)
2973 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2974 enum machine_mode mode = (int) TYPE_MODE (vectype);
2976 if (aligned_access_p (dr))
2979 /* Possibly unaligned access. */
2981 if (DR_IS_READ (dr))
2983 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2984 && (!targetm.vectorize.builtin_mask_for_load
2985 || targetm.vectorize.builtin_mask_for_load ()))
2986 return dr_unaligned_software_pipeline;
2988 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2989 /* Can't software pipeline the loads, but can at least do them. */
2990 return dr_unaligned_supported;
2994 return dr_unaligned_unsupported;
2998 /* Function vect_transform_stmt.
3000 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3003 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
3005 bool is_store = false;
3006 tree vec_stmt = NULL_TREE;
3007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3010 switch (STMT_VINFO_TYPE (stmt_info))
3012 case op_vec_info_type:
3013 done = vectorizable_operation (stmt, bsi, &vec_stmt);
3017 case assignment_vec_info_type:
3018 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
3022 case load_vec_info_type:
3023 done = vectorizable_load (stmt, bsi, &vec_stmt);
3027 case store_vec_info_type:
3028 done = vectorizable_store (stmt, bsi, &vec_stmt);
3033 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3034 fprintf (vect_dump, "stmt not supported.");
3038 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
3044 /* This function builds ni_name = number of iterations loop executes
3045 on the loop preheader. */
3048 vect_build_loop_niters (loop_vec_info loop_vinfo)
3050 tree ni_name, stmt, var;
3052 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3053 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3055 var = create_tmp_var (TREE_TYPE (ni), "niters");
3056 add_referenced_tmp_var (var);
3057 ni_name = force_gimple_operand (ni, &stmt, false, var);
3059 pe = loop_preheader_edge (loop);
3062 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3063 gcc_assert (!new_bb);
3070 /* This function generates the following statements:
3072 ni_name = number of iterations loop executes
3073 ratio = ni_name / vf
3074 ratio_mult_vf_name = ratio * vf
3076 and places them at the loop preheader edge. */
3079 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3081 tree *ratio_mult_vf_name_ptr,
3082 tree *ratio_name_ptr)
3090 tree ratio_mult_vf_name;
3091 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3092 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3093 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3094 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3096 pe = loop_preheader_edge (loop);
3098 /* Generate temporary variable that contains
3099 number of iterations loop executes. */
3101 ni_name = vect_build_loop_niters (loop_vinfo);
3103 /* Create: ratio = ni >> log2(vf) */
3105 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3106 add_referenced_tmp_var (var);
3107 ratio_name = make_ssa_name (var, NULL_TREE);
3108 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3109 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3110 SSA_NAME_DEF_STMT (ratio_name) = stmt;
3112 pe = loop_preheader_edge (loop);
3113 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3114 gcc_assert (!new_bb);
3116 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3118 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3119 add_referenced_tmp_var (var);
3120 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3121 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3122 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3123 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3125 pe = loop_preheader_edge (loop);
3126 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3127 gcc_assert (!new_bb);
3129 *ni_name_ptr = ni_name;
3130 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3131 *ratio_name_ptr = ratio_name;
3137 /* Function vect_update_ivs_after_vectorizer.
3139 "Advance" the induction variables of LOOP to the value they should take
3140 after the execution of LOOP. This is currently necessary because the
3141 vectorizer does not handle induction variables that are used after the
3142 loop. Such a situation occurs when the last iterations of LOOP are
3144 1. We introduced new uses after LOOP for IVs that were not originally used
3145 after LOOP: the IVs of LOOP are now used by an epilog loop.
3146 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3147 times, whereas the loop IVs should be bumped N times.
3150 - LOOP - a loop that is going to be vectorized. The last few iterations
3151 of LOOP were peeled.
3152 - NITERS - the number of iterations that LOOP executes (before it is
3153 vectorized). i.e, the number of times the ivs should be bumped.
3154 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3155 coming out from LOOP on which there are uses of the LOOP ivs
3156 (this is the path from LOOP->exit to epilog_loop->preheader).
3158 The new definitions of the ivs are placed in LOOP->exit.
3159 The phi args associated with the edge UPDATE_E in the bb
3160 UPDATE_E->dest are updated accordingly.
3162 Assumption 1: Like the rest of the vectorizer, this function assumes
3163 a single loop exit that has a single predecessor.
3165 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3166 organized in the same order.
3168 Assumption 3: The access function of the ivs is simple enough (see
3169 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3171 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3172 coming out of LOOP on which the ivs of LOOP are used (this is the path
3173 that leads to the epilog loop; other paths skip the epilog loop). This
3174 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3175 needs to have its phis updated.
3179 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
3182 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3183 basic_block exit_bb = loop->exit_edges[0]->dest;
3185 basic_block update_bb = update_e->dest;
3187 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
3189 /* Make sure there exists a single-predecessor exit bb: */
3190 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3192 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3194 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3196 tree access_fn = NULL;
3197 tree evolution_part;
3200 tree var, stmt, ni, ni_name;
3201 block_stmt_iterator last_bsi;
3203 /* Skip virtual phi's. */
3204 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3206 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3207 fprintf (vect_dump, "virtual phi. skip.");
3211 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3212 gcc_assert (access_fn);
3214 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3215 gcc_assert (evolution_part != NULL_TREE);
3217 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3218 of degree >= 2 or exponential. */
3219 gcc_assert (!tree_is_chrec (evolution_part));
3221 step_expr = evolution_part;
3222 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3225 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3226 build2 (MULT_EXPR, TREE_TYPE (niters),
3227 niters, step_expr), init_expr);
3229 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3230 add_referenced_tmp_var (var);
3232 ni_name = force_gimple_operand (ni, &stmt, false, var);
3234 /* Insert stmt into exit_bb. */
3235 last_bsi = bsi_last (exit_bb);
3237 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3239 /* Fix phi expressions in the successor bb. */
3240 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3241 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3242 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
3247 /* Function vect_do_peeling_for_loop_bound
3249 Peel the last iterations of the loop represented by LOOP_VINFO.
3250 The peeled iterations form a new epilog loop. Given that the loop now
3251 iterates NITERS times, the new epilog loop iterates
3252 NITERS % VECTORIZATION_FACTOR times.
3254 The original loop will later be made to iterate
3255 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3258 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3259 struct loops *loops)
3262 tree ni_name, ratio_mult_vf_name;
3263 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3264 struct loop *new_loop;
3266 #ifdef ENABLE_CHECKING
3270 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3271 fprintf (vect_dump, "=== vect_transtorm_for_unknown_loop_bound ===");
3273 /* Generate the following variables on the preheader of original loop:
3275 ni_name = number of iteration the original loop executes
3276 ratio = ni_name / vf
3277 ratio_mult_vf_name = ratio * vf */
3278 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3279 &ratio_mult_vf_name, ratio);
3281 /* Update loop info. */
3282 loop->pre_header = loop_preheader_edge (loop)->src;
3283 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3285 #ifdef ENABLE_CHECKING
3286 loop_num = loop->num;
3288 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3289 ratio_mult_vf_name, ni_name, false);
3290 #ifdef ENABLE_CHECKING
3291 gcc_assert (new_loop);
3292 gcc_assert (loop_num == loop->num);
3293 slpeel_verify_cfg_after_peeling (loop, new_loop);
3296 /* A guard that controls whether the new_loop is to be executed or skipped
3297 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3298 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3299 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3300 is on the path where the LOOP IVs are used and need to be updated. */
3302 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3303 update_e = EDGE_PRED (new_loop->pre_header, 0);
3305 update_e = EDGE_PRED (new_loop->pre_header, 1);
3307 /* Update IVs of original loop as if they were advanced
3308 by ratio_mult_vf_name steps. */
3309 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
3311 /* After peeling we have to reset scalar evolution analyzer. */
3318 /* Function vect_gen_niters_for_prolog_loop
3320 Set the number of iterations for the loop represented by LOOP_VINFO
3321 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3322 and the misalignment of DR - the first data reference recorded in
3323 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3324 this loop, the data reference DR will refer to an aligned location.
3326 The following computation is generated:
3328 compute address misalignment in bytes:
3329 addr_mis = addr & (vectype_size - 1)
3331 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3333 (elem_size = element type size; an element is the scalar element
3334 whose type is the inner type of the vectype) */
3337 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3339 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3340 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3341 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3343 tree iters, iters_name;
3346 tree dr_stmt = DR_STMT (dr);
3347 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3348 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3349 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3352 tree new_stmts = NULL_TREE;
3354 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3355 tree ptr_type = TREE_TYPE (start_addr);
3356 tree size = TYPE_SIZE (ptr_type);
3357 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3358 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3359 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3360 tree niters_type = TREE_TYPE (loop_niters);
3361 tree elem_size_log =
3362 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3363 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3365 pe = loop_preheader_edge (loop);
3366 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3367 gcc_assert (!new_bb);
3369 /* Create: byte_misalign = addr & (vectype_size - 1) */
3370 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3372 /* Create: elem_misalign = byte_misalign / element_size */
3374 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3376 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3377 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3378 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3379 iters = fold_convert (niters_type, iters);
3381 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3382 /* If the loop bound is known at compile time we already verified that it is
3383 greater than vf; since the misalignment ('iters') is at most vf, there's
3384 no need to generate the MIN_EXPR in this case. */
3385 if (TREE_CODE (loop_niters) != INTEGER_CST)
3386 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3388 var = create_tmp_var (niters_type, "prolog_loop_niters");
3389 add_referenced_tmp_var (var);
3390 iters_name = force_gimple_operand (iters, &stmt, false, var);
3392 /* Insert stmt on loop preheader edge. */
3393 pe = loop_preheader_edge (loop);
3396 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3397 gcc_assert (!new_bb);
3404 /* Function vect_update_inits_of_dr
3406 NITERS iterations were peeled from LOOP. DR represents a data reference
3407 in LOOP. This function updates the information recorded in DR to
3408 account for the fact that the first NITERS iterations had already been
3409 executed. Specifically, it updates the OFFSET field of stmt_info. */
3412 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3414 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3415 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3417 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3418 STMT_VINFO_VECT_STEP (stmt_info)));
3419 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3420 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3424 /* Function vect_update_inits_of_drs
3426 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3427 This function updates the information recorded for the data references in
3428 the loop to account for the fact that the first NITERS iterations had
3429 already been executed. Specifically, it updates the initial_condition of the
3430 access_function of all the data_references in the loop. */
3433 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3436 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3437 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3439 if (vect_dump && (dump_flags & TDF_DETAILS))
3440 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
3442 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3444 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3445 vect_update_inits_of_dr (dr, niters);
3448 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3450 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3451 vect_update_inits_of_dr (dr, niters);
3456 /* Function vect_do_peeling_for_alignment
3458 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3459 'niters' is set to the misalignment of one of the data references in the
3460 loop, thereby forcing it to refer to an aligned location at the beginning
3461 of the execution of this loop. The data reference for which we are
3462 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3465 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3467 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3468 tree niters_of_prolog_loop, ni_name;
3470 struct loop *new_loop;
3472 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3473 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
3475 ni_name = vect_build_loop_niters (loop_vinfo);
3476 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3478 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3480 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3481 niters_of_prolog_loop, ni_name, true);
3482 #ifdef ENABLE_CHECKING
3483 gcc_assert (new_loop);
3484 slpeel_verify_cfg_after_peeling (new_loop, loop);
3487 /* Update number of times loop executes. */
3488 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3489 LOOP_VINFO_NITERS (loop_vinfo) =
3490 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3492 /* Update the init conditions of the access functions of all data refs. */
3493 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3495 /* After peeling we have to reset scalar evolution analyzer. */
3502 /* Function vect_transform_loop.
3504 The analysis phase has determined that the loop is vectorizable.
3505 Vectorize the loop - created vectorized stmts to replace the scalar
3506 stmts in the loop, and update the loop exit condition. */
3509 vect_transform_loop (loop_vec_info loop_vinfo,
3510 struct loops *loops ATTRIBUTE_UNUSED)
3512 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3513 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3514 int nbbs = loop->num_nodes;
3515 block_stmt_iterator si;
3518 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3520 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3521 fprintf (vect_dump, "=== vec_transform_loop ===");
3524 /* Peel the loop if there are data refs with unknown alignment.
3525 Only one data ref with unknown store is allowed. */
3527 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3528 vect_do_peeling_for_alignment (loop_vinfo, loops);
3530 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3531 compile time constant), or it is a constant that doesn't divide by the
3532 vectorization factor, then an epilog loop needs to be created.
3533 We therefore duplicate the loop: the original loop will be vectorized,
3534 and will compute the first (n/VF) iterations. The second copy of the loop
3535 will remain scalar and will compute the remaining (n%VF) iterations.
3536 (VF is the vectorization factor). */
3538 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3539 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3540 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3541 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3543 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3544 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3546 /* 1) Make sure the loop header has exactly two entries
3547 2) Make sure we have a preheader basic block. */
3549 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3551 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3554 /* FORNOW: the vectorizer supports only loops which body consist
3555 of one basic block (header + empty latch). When the vectorizer will
3556 support more involved loop forms, the order by which the BBs are
3557 traversed need to be reconsidered. */
3559 for (i = 0; i < nbbs; i++)
3561 basic_block bb = bbs[i];
3563 for (si = bsi_start (bb); !bsi_end_p (si);)
3565 tree stmt = bsi_stmt (si);
3566 stmt_vec_info stmt_info;
3569 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3571 fprintf (vect_dump, "------>vectorizing statement: ");
3572 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3574 stmt_info = vinfo_for_stmt (stmt);
3575 gcc_assert (stmt_info);
3576 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3581 #ifdef ENABLE_CHECKING
3582 /* FORNOW: Verify that all stmts operate on the same number of
3583 units and no inner unrolling is necessary. */
3585 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3586 == vectorization_factor);
3588 /* -------- vectorize statement ------------ */
3589 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3590 fprintf (vect_dump, "transform statement.");
3592 is_store = vect_transform_stmt (stmt, &si);
3595 /* free the attached stmt_vec_info and remove the stmt. */
3596 stmt_ann_t ann = stmt_ann (stmt);
3598 set_stmt_info (ann, NULL);
3607 slpeel_make_loop_iterate_ntimes (loop, ratio);
3609 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
3610 fprintf (vect_dump, "LOOP VECTORIZED.");
3614 /* Function vect_is_simple_use.
3617 LOOP - the loop that is being vectorized.
3618 OPERAND - operand of a stmt in LOOP.
3619 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3621 Returns whether a stmt with OPERAND can be vectorized.
3622 Supportable operands are constants, loop invariants, and operands that are
3623 defined by the current iteration of the loop. Unsupportable operands are
3624 those that are defined by a previous iteration of the loop (as is the case
3625 in reduction/induction computations). */
3628 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
3632 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3637 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3640 if (TREE_CODE (operand) != SSA_NAME)
3643 def_stmt = SSA_NAME_DEF_STMT (operand);
3644 if (def_stmt == NULL_TREE )
3646 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3647 fprintf (vect_dump, "no def_stmt.");
3651 /* empty stmt is expected only in case of a function argument.
3652 (Otherwise - we expect a phi_node or a modify_expr). */
3653 if (IS_EMPTY_STMT (def_stmt))
3655 tree arg = TREE_OPERAND (def_stmt, 0);
3656 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3658 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3660 fprintf (vect_dump, "Unexpected empty stmt: ");
3661 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
3666 /* phi_node inside the loop indicates an induction/reduction pattern.
3667 This is not supported yet. */
3668 bb = bb_for_stmt (def_stmt);
3669 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3671 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3672 fprintf (vect_dump, "reduction/induction - unsupported.");
3673 return false; /* FORNOW: not supported yet. */
3676 /* Expecting a modify_expr or a phi_node. */
3677 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3678 || TREE_CODE (def_stmt) == PHI_NODE)
3689 /* Function vect_analyze_operations.
3691 Scan the loop stmts and make sure they are all vectorizable. */
3694 vect_analyze_operations (loop_vec_info loop_vinfo)
3696 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3697 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3698 int nbbs = loop->num_nodes;
3699 block_stmt_iterator si;
3700 unsigned int vectorization_factor = 0;
3705 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3706 fprintf (vect_dump, "=== vect_analyze_operations ===");
3708 for (i = 0; i < nbbs; i++)
3710 basic_block bb = bbs[i];
3712 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3714 tree stmt = bsi_stmt (si);
3715 unsigned int nunits;
3716 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3719 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3721 fprintf (vect_dump, "==> examining statement: ");
3722 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3725 gcc_assert (stmt_info);
3727 /* skip stmts which do not need to be vectorized.
3728 this is expected to include:
3729 - the COND_EXPR which is the loop exit condition
3730 - any LABEL_EXPRs in the loop
3731 - computations that are used only for array indexing or loop
3734 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3736 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3737 fprintf (vect_dump, "irrelevant.");
3741 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3743 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3744 LOOP_LOC (loop_vinfo)))
3746 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
3747 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3752 if (STMT_VINFO_DATA_REF (stmt_info))
3753 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3754 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3755 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3757 scalar_type = TREE_TYPE (stmt);
3759 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3761 fprintf (vect_dump, "get vectype for scalar type: ");
3762 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3765 vectype = get_vectype_for_scalar_type (scalar_type);
3768 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3769 LOOP_LOC (loop_vinfo)))
3772 "not vectorized: unsupported data-type ");
3773 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3778 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3780 fprintf (vect_dump, "vectype: ");
3781 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3783 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3785 ok = (vectorizable_operation (stmt, NULL, NULL)
3786 || vectorizable_assignment (stmt, NULL, NULL)
3787 || vectorizable_load (stmt, NULL, NULL)
3788 || vectorizable_store (stmt, NULL, NULL));
3792 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3793 LOOP_LOC (loop_vinfo)))
3795 fprintf (vect_dump, "not vectorized: stmt not supported: ");
3796 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3801 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3802 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3803 fprintf (vect_dump, "nunits = %d", nunits);
3805 if (vectorization_factor)
3807 /* FORNOW: don't allow mixed units.
3808 This restriction will be relaxed in the future. */
3809 if (nunits != vectorization_factor)
3811 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3812 LOOP_LOC (loop_vinfo)))
3813 fprintf (vect_dump, "not vectorized: mixed data-types");
3818 vectorization_factor = nunits;
3820 #ifdef ENABLE_CHECKING
3821 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3822 * vectorization_factor == UNITS_PER_SIMD_WORD);
3827 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3829 if (vectorization_factor <= 1)
3831 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3832 LOOP_LOC (loop_vinfo)))
3833 fprintf (vect_dump, "not vectorized: unsupported data-type");
3836 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3838 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3839 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3841 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3842 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3844 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3845 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3847 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3848 LOOP_LOC (loop_vinfo)))
3849 fprintf (vect_dump, "not vectorized: iteration count too small.");
3853 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3854 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3856 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
3857 fprintf (vect_dump, "epilog loop required.");
3858 if (!vect_can_advance_ivs_p (loop_vinfo))
3860 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3861 LOOP_LOC (loop_vinfo)))
3863 "not vectorized: can't create epilog loop 1.");
3866 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3868 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
3869 LOOP_LOC (loop_vinfo)))
3871 "not vectorized: can't create epilog loop 2.");
3880 /* Function exist_non_indexing_operands_for_use_p
3882 USE is one of the uses attached to STMT. Check if USE is
3883 used in STMT for anything other than indexing an array. */
3886 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3889 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3891 /* USE corresponds to some operand in STMT. If there is no data
3892 reference in STMT, then any operand that corresponds to USE
3893 is not indexing an array. */
3894 if (!STMT_VINFO_DATA_REF (stmt_info))
3897 /* STMT has a data_ref. FORNOW this means that its of one of
3898 the following forms:
3901 (This should have been verified in analyze_data_refs).
3903 'var' in the second case corresponds to a def, not a use,
3904 so USE cannot correspond to any operands that are not used
3907 Therefore, all we need to check is if STMT falls into the
3908 first case, and whether var corresponds to USE. */
3910 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3913 operand = TREE_OPERAND (stmt, 1);
3915 if (TREE_CODE (operand) != SSA_NAME)
3925 /* Function vect_is_simple_iv_evolution.
3927 FORNOW: A simple evolution of an induction variables in the loop is
3928 considered a polynomial evolution with constant step. */
3931 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3932 tree * step, bool strict)
3937 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3939 /* When there is no evolution in this loop, the evolution function
3941 if (evolution_part == NULL_TREE)
3944 /* When the evolution is a polynomial of degree >= 2
3945 the evolution function is not "simple". */
3946 if (tree_is_chrec (evolution_part))
3949 step_expr = evolution_part;
3950 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3953 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3955 fprintf (vect_dump, "step: ");
3956 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
3957 fprintf (vect_dump, ", init: ");
3958 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
3964 if (TREE_CODE (step_expr) != INTEGER_CST)
3966 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3967 fprintf (vect_dump, "step unknown.");
3972 if (!integer_onep (step_expr))
3974 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
3975 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
3983 /* Function vect_analyze_scalar_cycles.
3985 Examine the cross iteration def-use cycles of scalar variables, by
3986 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3987 cycles that they represent do not impede vectorization.
3989 FORNOW: Reduction as in the following loop, is not supported yet:
3993 The cross-iteration cycle corresponding to variable 'sum' will be
3994 considered too complicated and will impede vectorization.
3996 FORNOW: Induction as in the following loop, is not supported yet:
4001 However, the following loop *is* vectorizable:
4006 In both loops there exists a def-use cycle for the variable i:
4007 loop: i_2 = PHI (i_0, i_1)
4012 The evolution of the above cycle is considered simple enough,
4013 however, we also check that the cycle does not need to be
4014 vectorized, i.e - we check that the variable that this cycle
4015 defines is only used for array indexing or in stmts that do not
4016 need to be vectorized. This is not the case in loop2, but it
4017 *is* the case in loop3. */
4020 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
4023 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4024 basic_block bb = loop->header;
4027 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4028 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
4030 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
4032 tree access_fn = NULL;
4034 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4036 fprintf (vect_dump, "Analyze phi: ");
4037 print_generic_expr (vect_dump, phi, TDF_SLIM);
4040 /* Skip virtual phi's. The data dependences that are associated with
4041 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
4043 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
4045 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4046 fprintf (vect_dump, "virtual phi. skip.");
4050 /* Analyze the evolution function. */
4052 /* FORNOW: The only scalar cross-iteration cycles that we allow are
4053 those of loop induction variables; This property is verified here.
4055 Furthermore, if that induction variable is used in an operation
4056 that needs to be vectorized (i.e, is not solely used to index
4057 arrays and check the exit condition) - we do not support its
4058 vectorization yet. This property is verified in vect_is_simple_use,
4059 during vect_analyze_operations. */
4061 access_fn = /* instantiate_parameters
4063 analyze_scalar_evolution (loop, PHI_RESULT (phi));
4067 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4068 LOOP_LOC (loop_vinfo)))
4069 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
4073 if (vect_print_dump_info (REPORT_DETAILS,
4074 LOOP_LOC (loop_vinfo)))
4076 fprintf (vect_dump, "Access function of PHI: ");
4077 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
4080 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
4083 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4084 LOOP_LOC (loop_vinfo)))
4085 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
4094 /* Function vect_base_addr_differ_p.
4096 This is the simplest data dependence test: determines whether the
4097 data references A and B access the same array/region. Returns
4098 false when the property is not computable at compile time.
4099 Otherwise return true, and DIFFER_P will record the result. This
4100 utility will not be necessary when alias_sets_conflict_p will be
4101 less conservative. */
4105 vect_base_addr_differ_p (struct data_reference *dra,
4106 struct data_reference *drb,
4109 tree stmt_a = DR_STMT (dra);
4110 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
4111 tree stmt_b = DR_STMT (drb);
4112 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
4113 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
4114 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
4115 tree type_a = TREE_TYPE (addr_a);
4116 tree type_b = TREE_TYPE (addr_b);
4117 HOST_WIDE_INT alias_set_a, alias_set_b;
4119 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
4121 /* Both references are ADDR_EXPR, i.e., we have the objects. */
4122 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
4123 return array_base_name_differ_p (dra, drb, differ_p);
4125 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
4126 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
4127 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
4128 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
4130 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
4136 /* An instruction writing through a restricted pointer is "independent" of any
4137 instruction reading or writing through a different pointer, in the same
4139 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
4140 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
4149 /* Function vect_analyze_data_ref_dependence.
4151 Return TRUE if there (might) exist a dependence between a memory-reference
4152 DRA and a memory-reference DRB. */
4155 vect_analyze_data_ref_dependence (struct data_reference *dra,
4156 struct data_reference *drb,
4157 loop_vec_info loop_vinfo)
4160 struct data_dependence_relation *ddr;
4162 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
4164 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4165 LOOP_LOC (loop_vinfo)))
4168 "not vectorized: can't determine dependence between: ");
4169 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
4170 fprintf (vect_dump, " and ");
4171 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
4179 ddr = initialize_data_dependence_relation (dra, drb);
4180 compute_affine_dependence (ddr);
4182 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4185 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4186 LOOP_LOC (loop_vinfo)))
4189 "not vectorized: possible dependence between data-refs ");
4190 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
4191 fprintf (vect_dump, " and ");
4192 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
4199 /* Function vect_analyze_data_ref_dependences.
4201 Examine all the data references in the loop, and make sure there do not
4202 exist any data dependences between them.
4204 TODO: dependences which distance is greater than the vectorization factor
4208 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4211 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4212 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4214 /* Examine store-store (output) dependences. */
4216 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4217 fprintf (vect_dump, "=== vect_analyze_dependences ===");
4219 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4220 fprintf (vect_dump, "compare all store-store pairs.");
4222 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4224 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4226 struct data_reference *dra =
4227 VARRAY_GENERIC_PTR (loop_write_refs, i);
4228 struct data_reference *drb =
4229 VARRAY_GENERIC_PTR (loop_write_refs, j);
4230 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
4235 /* Examine load-store (true/anti) dependences. */
4237 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4238 fprintf (vect_dump, "compare all load-store pairs.");
4240 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4242 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4244 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4245 struct data_reference *drb =
4246 VARRAY_GENERIC_PTR (loop_write_refs, j);
4247 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
4256 /* Function vect_compute_data_ref_alignment
4258 Compute the misalignment of the data reference DR.
4261 1. If during the misalignment computation it is found that the data reference
4262 cannot be vectorized then false is returned.
4263 2. DR_MISALIGNMENT (DR) is defined.
4265 FOR NOW: No analysis is actually performed. Misalignment is calculated
4266 only for trivial cases. TODO. */
4269 vect_compute_data_ref_alignment (struct data_reference *dr)
4271 tree stmt = DR_STMT (dr);
4272 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4273 tree ref = DR_REF (dr);
4275 tree base, alignment;
4276 bool base_aligned_p;
4279 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4280 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
4282 /* Initialize misalignment to unknown. */
4283 DR_MISALIGNMENT (dr) = -1;
4285 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4286 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4287 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
4288 vectype = STMT_VINFO_VECTYPE (stmt_info);
4292 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4294 fprintf (vect_dump, "Unknown alignment for access: ");
4295 print_generic_expr (vect_dump, base, TDF_SLIM);
4300 if (!base_aligned_p)
4302 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4304 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4306 fprintf (vect_dump, "can't force alignment of ref: ");
4307 print_generic_expr (vect_dump, ref, TDF_SLIM);
4312 /* Force the alignment of the decl.
4313 NOTE: This is the only change to the code we make during
4314 the analysis phase, before deciding to vectorize the loop. */
4315 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4316 fprintf (vect_dump, "force alignment");
4317 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4318 DECL_USER_ALIGN (base) = 1;
4321 /* At this point we assume that the base is aligned. */
4322 gcc_assert (base_aligned_p
4323 || (TREE_CODE (base) == VAR_DECL
4324 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4326 /* Alignment required, in bytes: */
4327 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4329 /* Modulo alignment. */
4330 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4331 if (tree_int_cst_sgn (misalign) < 0)
4333 /* Negative misalignment value. */
4334 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4335 fprintf (vect_dump, "unexpected misalign value");
4339 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4341 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4342 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
4348 /* Function vect_compute_data_refs_alignment
4350 Compute the misalignment of data references in the loop.
4351 This pass may take place at function granularity instead of at loop
4354 FOR NOW: No analysis is actually performed. Misalignment is calculated
4355 only for trivial cases. TODO. */
4358 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4360 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4361 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4364 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4366 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4367 if (!vect_compute_data_ref_alignment (dr))
4371 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4373 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4374 if (!vect_compute_data_ref_alignment (dr))
4382 /* Function vect_enhance_data_refs_alignment
4384 This pass will use loop versioning and loop peeling in order to enhance
4385 the alignment of data references in the loop.
4387 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4388 original loop is to be vectorized; Any other loops that are created by
4389 the transformations performed in this pass - are not supposed to be
4390 vectorized. This restriction will be relaxed. */
4393 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4395 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4396 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4400 This pass will require a cost model to guide it whether to apply peeling
4401 or versioning or a combination of the two. For example, the scheme that
4402 intel uses when given a loop with several memory accesses, is as follows:
4403 choose one memory access ('p') which alignment you want to force by doing
4404 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4405 other accesses are not necessarily aligned, or (2) use loop versioning to
4406 generate one loop in which all accesses are aligned, and another loop in
4407 which only 'p' is necessarily aligned.
4409 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4410 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4411 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4413 Devising a cost model is the most critical aspect of this work. It will
4414 guide us on which access to peel for, whether to use loop versioning, how
4415 many versions to create, etc. The cost model will probably consist of
4416 generic considerations as well as target specific considerations (on
4417 powerpc for example, misaligned stores are more painful than misaligned
4420 Here is the general steps involved in alignment enhancements:
4422 -- original loop, before alignment analysis:
4423 for (i=0; i<N; i++){
4424 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4425 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4428 -- After vect_compute_data_refs_alignment:
4429 for (i=0; i<N; i++){
4430 x = q[i]; # DR_MISALIGNMENT(q) = 3
4431 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4434 -- Possibility 1: we do loop versioning:
4436 for (i=0; i<N; i++){ # loop 1A
4437 x = q[i]; # DR_MISALIGNMENT(q) = 3
4438 p[i] = y; # DR_MISALIGNMENT(p) = 0
4442 for (i=0; i<N; i++){ # loop 1B
4443 x = q[i]; # DR_MISALIGNMENT(q) = 3
4444 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4448 -- Possibility 2: we do loop peeling:
4449 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4453 for (i = 3; i < N; i++){ # loop 2A
4454 x = q[i]; # DR_MISALIGNMENT(q) = 0
4455 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4458 -- Possibility 3: combination of loop peeling and versioning:
4459 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4464 for (i = 3; i<N; i++){ # loop 3A
4465 x = q[i]; # DR_MISALIGNMENT(q) = 0
4466 p[i] = y; # DR_MISALIGNMENT(p) = 0
4470 for (i = 3; i<N; i++){ # loop 3B
4471 x = q[i]; # DR_MISALIGNMENT(q) = 0
4472 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4476 These loops are later passed to loop_transform to be vectorized. The
4477 vectorizer will use the alignment information to guide the transformation
4478 (whether to generate regular loads/stores, or with special handling for
4482 /* (1) Peeling to force alignment. */
4484 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4486 + How many accesses will become aligned due to the peeling
4487 - How many accesses will become unaligned due to the peeling,
4488 and the cost of misaligned accesses.
4489 - The cost of peeling (the extra runtime checks, the increase
4492 The scheme we use FORNOW: peel to force the alignment of the first
4493 misaligned store in the loop.
4494 Rationale: misaligned stores are not yet supported.
4496 TODO: Use a better cost model. */
4498 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4500 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4501 if (!aligned_access_p (dr))
4503 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4504 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4509 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4511 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
4512 fprintf (vect_dump, "Peeling for alignment will not be applied.");
4516 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
4517 fprintf (vect_dump, "Peeling for alignment will be applied.");
4520 /* (1.2) Update the alignment info according to the peeling factor.
4521 If the misalignment of the DR we peel for is M, then the
4522 peeling factor is VF - M, and the misalignment of each access DR_i
4523 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4524 If the misalignment of the DR we peel for is unknown, then the
4525 misalignment of each access DR_i in the loop is also unknown.
4527 FORNOW: set the misalignment of the accesses to unknown even
4528 if the peeling factor is known at compile time.
4530 TODO: - if the peeling factor is known at compile time, use that
4531 when updating the misalignment info of the loop DRs.
4532 - consider accesses that are known to have the same
4533 alignment, even if that alignment is unknown. */
4535 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4537 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4538 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4540 DR_MISALIGNMENT (dr) = 0;
4541 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
4542 fprintf (vect_dump, "Alignment of access forced using peeling.");
4545 DR_MISALIGNMENT (dr) = -1;
4547 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4549 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4550 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4552 DR_MISALIGNMENT (dr) = 0;
4553 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
4554 fprintf (vect_dump, "Alignment of access forced using peeling.");
4557 DR_MISALIGNMENT (dr) = -1;
4562 /* Function vect_analyze_data_refs_alignment
4564 Analyze the alignment of the data-references in the loop.
4565 FOR NOW: Until support for misliagned accesses is in place, only if all
4566 accesses are aligned can the loop be vectorized. This restriction will be
4570 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4572 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4573 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4574 enum dr_alignment_support supportable_dr_alignment;
4577 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4578 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
4581 /* This pass may take place at function granularity instead of at loop
4584 if (!vect_compute_data_refs_alignment (loop_vinfo))
4586 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4587 LOOP_LOC (loop_vinfo)))
4589 "not vectorized: can't calculate alignment for data ref.");
4594 /* This pass will decide on using loop versioning and/or loop peeling in
4595 order to enhance the alignment of data references in the loop. */
4597 vect_enhance_data_refs_alignment (loop_vinfo);
4600 /* Finally, check that all the data references in the loop can be
4601 handled with respect to their alignment. */
4603 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4605 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4606 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4607 if (!supportable_dr_alignment)
4609 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4610 LOOP_LOC (loop_vinfo)))
4611 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
4614 if (supportable_dr_alignment != dr_aligned
4615 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
4616 fprintf (vect_dump, "Vectorizing an unaligned access.");
4618 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4620 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4621 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4622 if (!supportable_dr_alignment)
4624 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4625 LOOP_LOC (loop_vinfo)))
4626 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
4629 if (supportable_dr_alignment != dr_aligned
4630 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
4631 fprintf (vect_dump, "Vectorizing an unaligned access.");
4638 /* Function vect_analyze_data_ref_access.
4640 Analyze the access pattern of the data-reference DR. For now, a data access
4641 has to consecutive to be considered vectorizable. */
4644 vect_analyze_data_ref_access (struct data_reference *dr)
4646 tree stmt = DR_STMT (dr);
4647 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4648 tree step = STMT_VINFO_VECT_STEP (stmt_info);
4649 tree scalar_type = TREE_TYPE (DR_REF (dr));
4651 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4653 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4654 fprintf (vect_dump, "not consecutive access");
4661 /* Function vect_analyze_data_ref_accesses.
4663 Analyze the access pattern of all the data references in the loop.
4665 FORNOW: the only access pattern that is considered vectorizable is a
4666 simple step 1 (consecutive) access.
4668 FORNOW: handle only arrays and pointer accesses. */
4671 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4674 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4675 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4677 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4678 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
4680 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4682 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4683 bool ok = vect_analyze_data_ref_access (dr);
4686 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4687 LOOP_LOC (loop_vinfo)))
4688 fprintf (vect_dump, "not vectorized: complicated access pattern.");
4693 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4695 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4696 bool ok = vect_analyze_data_ref_access (dr);
4699 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4700 LOOP_LOC (loop_vinfo)))
4701 fprintf (vect_dump, "not vectorized: complicated access pattern.");
4710 /* Function vect_analyze_pointer_ref_access.
4713 STMT - a stmt that contains a data-ref
4714 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4716 If the data-ref access is vectorizable, return a data_reference structure
4717 that represents it (DR). Otherwise - return NULL. */
4719 static struct data_reference *
4720 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
4723 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4724 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4725 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4726 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4728 tree reftype, innertype;
4729 tree indx_access_fn;
4730 int loopnum = loop->num;
4731 struct data_reference *dr;
4735 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4736 LOOP_LOC (loop_vinfo)))
4737 fprintf (vect_dump, "not vectorized: complicated pointer access.");
4741 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4743 fprintf (vect_dump, "Access function of ptr: ");
4744 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
4747 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4749 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4750 LOOP_LOC (loop_vinfo)))
4751 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
4757 if (!expr_invariant_in_loop_p (loop, init))
4759 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4760 LOOP_LOC (loop_vinfo)))
4762 "not vectorized: initial condition is not loop invariant.");
4766 if (TREE_CODE (step) != INTEGER_CST)
4768 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4769 LOOP_LOC (loop_vinfo)))
4771 "not vectorized: non constant step for pointer access.");
4775 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4776 if (TREE_CODE (reftype) != POINTER_TYPE)
4778 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4779 LOOP_LOC (loop_vinfo)))
4780 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
4784 reftype = TREE_TYPE (init);
4785 if (TREE_CODE (reftype) != POINTER_TYPE)
4787 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4788 LOOP_LOC (loop_vinfo)))
4789 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
4793 innertype = TREE_TYPE (reftype);
4794 if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4796 /* FORNOW: support only consecutive access */
4797 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4798 LOOP_LOC (loop_vinfo)))
4799 fprintf (vect_dump, "not vectorized: non consecutive access.");
4803 STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (ssizetype, step);
4804 if (TREE_CODE (init) == PLUS_EXPR
4805 || TREE_CODE (init) == MINUS_EXPR)
4806 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4807 size_binop (TREE_CODE (init), ssize_int (0),
4808 fold_convert (ssizetype, TREE_OPERAND (init, 1)));
4810 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = ssize_int (0);
4813 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4814 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
4816 fprintf (vect_dump, "Access function of ptr indx: ");
4817 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
4819 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
4825 /* Function vect_get_memtag_and_dr.
4827 The function returns the relevant variable for memory tag (for aliasing
4828 purposes). Also data reference structure DR is created.
4830 This function handles three kinds of MEMREF:
4832 It is called from vect_analyze_data_refs with a MEMREF that is either an
4833 ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4834 It builds a DR for them using vect_get_base_and_offset, and calls itself
4835 recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4836 MEMREF along the way. During the recursive calls, the function may be called
4837 with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4838 MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4839 and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4840 and SSA_NAME (this is category 3 - "recursion stop condition").
4842 When the MEMREF falls into category 1 there is still no data reference struct
4843 (DR) available. It is created by this function, and then, along the
4844 recursion, MEMREF will fall into category 2 or 3, in which case a DR will
4845 have already been created, but the analysis continues to retrieve the MEMTAG.
4848 MEMREF - data reference in STMT
4849 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4852 DR - data_reference struct for MEMREF
4853 return value - the relevant variable for memory tag (for aliasing purposes).
4858 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4859 loop_vec_info loop_vinfo,
4860 tree vectype, struct data_reference **dr)
4862 tree symbl, oprnd0, oprnd1;
4863 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4864 tree offset, misalign, step;
4865 tree ref_to_be_analyzed, tag, dr_base;
4866 struct data_reference *new_dr;
4867 bool base_aligned_p;
4872 /* Category 3: recursion stop condition. */
4873 /* (1) A DR already exists. We only need to get the relevant memtag for
4874 MEMREF, the rest of the data was already initialized. */
4876 switch (TREE_CODE (memref))
4878 /* (1.1) Stop condition: find the relevant memtag and return. */
4880 symbl = SSA_NAME_VAR (memref);
4881 tag = get_var_ann (symbl)->type_mem_tag;
4884 tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4885 if (TREE_CODE (ptr) == SSA_NAME)
4886 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4890 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
4892 fprintf (vect_dump, "not vectorized: no memtag for ref.");
4901 /* Category 2: recursion continues. */
4902 /* (1.2) A recursive call to find the relevant memtag is required. */
4904 symbl = TREE_OPERAND (memref, 0);
4905 break; /* For recursive call. */
4908 /* Could have recorded more accurate information -
4909 i.e, the actual FIELD_DECL that is being referenced -
4910 but later passes expect VAR_DECL as the nmt. */
4914 symbl = build_fold_indirect_ref (
4915 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
4916 break; /* For recursive call. */
4920 /* Although DR exists, we have to call the function recursively to
4921 build MEMTAG for such expression. This is handled below. */
4922 oprnd0 = TREE_OPERAND (memref, 0);
4923 oprnd1 = TREE_OPERAND (memref, 1);
4925 STRIP_NOPS (oprnd1);
4926 /* Supported plus/minus expressions are of the form
4927 {address_base + offset}, such that address_base is of type
4928 POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4929 or it's not of type POINTER/ARRAY.
4930 TODO: swap operands if {offset + address_base}. */
4931 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4932 && TREE_CODE (oprnd1) != INTEGER_CST)
4933 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4937 break; /* For recursive call. */
4945 /* Category 1: recursion begins. */
4946 /* (2) A DR does not exist yet and must be built, followed by a
4947 recursive call to get the relevant memtag for MEMREF. */
4949 switch (TREE_CODE (memref))
4952 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
4958 ref_to_be_analyzed = ptr_init;
4962 new_dr = analyze_array (stmt, memref, is_read);
4964 symbl = DR_BASE_NAME (new_dr);
4965 ref_to_be_analyzed = memref;
4969 /* TODO: Support data-refs of form a[i].p for unions and single
4970 field structures. */
4974 offset = ssize_int (0);
4975 misalign = ssize_int (0);
4976 step = ssize_int (0);
4978 /* Analyze data-ref, find its base, initial offset from the base, step,
4980 dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4981 vectype, loop_vinfo, &offset,
4982 &misalign, &step, &base_aligned_p);
4986 /* Initialize information according to above analysis. */
4987 /* Since offset and step of a pointer can be also set in
4988 vect_analyze_pointer_ref_access, we combine the values here. */
4989 if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4990 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4991 size_binop (PLUS_EXPR, offset,
4992 STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
4994 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4996 if (step && STMT_VINFO_VECT_STEP (stmt_info))
4997 STMT_VINFO_VECT_STEP (stmt_info) =
4998 size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
5000 STMT_VINFO_VECT_STEP (stmt_info) = step;
5002 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
5003 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
5004 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = dr_base;
5009 /* Recursive call to retrieve the relevant memtag. */
5010 tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
5015 /* Function vect_analyze_data_refs.
5017 Find all the data references in the loop.
5019 The general structure of the analysis of data refs in the vectorizer is as
5021 1- vect_analyze_data_refs(loop):
5022 Find and analyze all data-refs in the loop:
5024 ref_stmt.memtag = vect_get_memtag_and_dr (ref)
5025 1.1- vect_get_memtag_and_dr(ref):
5026 Analyze ref, and build a DR (data_referece struct) for it;
5027 call vect_get_base_and_offset to compute base, initial_offset,
5028 step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
5029 ref_stmt.alignment, and ref_stmt.step accordingly.
5030 1.1.1- vect_get_base_and_offset():
5031 Calculate base, initial_offset, step and alignment.
5032 For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
5033 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
5034 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
5035 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
5037 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
5038 which base is really an array (not a pointer) and which alignment
5039 can be forced. This restriction will be relaxed. */
5042 vect_analyze_data_refs (loop_vec_info loop_vinfo)
5044 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5045 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5046 int nbbs = loop->num_nodes;
5047 block_stmt_iterator si;
5049 struct data_reference *dr;
5051 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5052 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
5054 for (j = 0; j < nbbs; j++)
5056 basic_block bb = bbs[j];
5057 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5059 bool is_read = false;
5060 tree stmt = bsi_stmt (si);
5061 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5062 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5063 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5064 vuse_optype vuses = STMT_VUSE_OPS (stmt);
5065 varray_type *datarefs = NULL;
5066 int nvuses, nv_may_defs, nv_must_defs;
5069 tree scalar_type, vectype;
5071 /* Assumption: there exists a data-ref in stmt, if and only if
5072 it has vuses/vdefs. */
5074 if (!vuses && !v_may_defs && !v_must_defs)
5077 nvuses = NUM_VUSES (vuses);
5078 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
5079 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
5081 if (nvuses && (nv_may_defs || nv_must_defs))
5083 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5085 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
5086 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5091 if (TREE_CODE (stmt) != MODIFY_EXPR)
5093 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5095 fprintf (vect_dump, "unexpected vops in stmt: ");
5096 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5103 memref = TREE_OPERAND (stmt, 1);
5104 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
5109 memref = TREE_OPERAND (stmt, 0);
5110 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
5114 scalar_type = TREE_TYPE (memref);
5115 vectype = get_vectype_for_scalar_type (scalar_type);
5118 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5120 fprintf (vect_dump, "no vectype for stmt: ");
5121 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5122 fprintf (vect_dump, " scalar_type: ");
5123 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
5125 /* It is not possible to vectorize this data reference. */
5128 /* Analyze MEMREF. If it is of a supported form, build data_reference
5129 struct for it (DR) and find memtag for aliasing purposes. */
5131 symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
5135 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5136 LOOP_LOC (loop_vinfo)))
5138 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
5139 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5143 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5144 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5145 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5146 STMT_VINFO_DATA_REF (stmt_info) = dr;
5154 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5156 /* Function vect_mark_relevant.
5158 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5161 vect_mark_relevant (varray_type *worklist, tree stmt)
5163 stmt_vec_info stmt_info;
5165 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5166 fprintf (vect_dump, "mark relevant.");
5168 if (TREE_CODE (stmt) == PHI_NODE)
5170 VARRAY_PUSH_TREE (*worklist, stmt);
5174 stmt_info = vinfo_for_stmt (stmt);
5178 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5180 fprintf (vect_dump, "mark relevant: no stmt info!!.");
5181 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5186 if (STMT_VINFO_RELEVANT_P (stmt_info))
5188 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5189 fprintf (vect_dump, "already marked relevant.");
5193 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5194 VARRAY_PUSH_TREE (*worklist, stmt);
5198 /* Function vect_stmt_relevant_p.
5200 Return true if STMT in loop that is represented by LOOP_VINFO is
5201 "relevant for vectorization".
5203 A stmt is considered "relevant for vectorization" if:
5204 - it has uses outside the loop.
5205 - it has vdefs (it alters memory).
5206 - control stmts in the loop (except for the exit condition).
5208 CHECKME: what other side effects would the vectorizer allow? */
5211 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5213 v_may_def_optype v_may_defs;
5214 v_must_def_optype v_must_defs;
5215 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5220 /* cond stmt other than loop exit cond. */
5221 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5224 /* changing memory. */
5225 if (TREE_CODE (stmt) != PHI_NODE)
5227 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5228 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5229 if (v_may_defs || v_must_defs)
5231 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5232 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
5237 /* uses outside the loop. */
5238 df = get_immediate_uses (stmt);
5239 num_uses = num_immediate_uses (df);
5240 for (i = 0; i < num_uses; i++)
5242 tree use = immediate_use (df, i);
5243 basic_block bb = bb_for_stmt (use);
5244 if (!flow_bb_inside_loop_p (loop, bb))
5246 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5247 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
5256 /* Function vect_mark_stmts_to_be_vectorized.
5258 Not all stmts in the loop need to be vectorized. For example:
5267 Stmt 1 and 3 do not need to be vectorized, because loop control and
5268 addressing of vectorized data-refs are handled differently.
5270 This pass detects such stmts. */
5273 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5275 varray_type worklist;
5276 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5277 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5278 unsigned int nbbs = loop->num_nodes;
5279 block_stmt_iterator si;
5285 stmt_vec_info stmt_info;
5289 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5290 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
5293 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5295 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5297 fprintf (vect_dump, "init: phi relevant? ");
5298 print_generic_expr (vect_dump, phi, TDF_SLIM);
5301 if (vect_stmt_relevant_p (phi, loop_vinfo))
5303 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5304 LOOP_LOC (loop_vinfo)))
5305 fprintf (vect_dump, "unsupported reduction/induction.");
5310 VARRAY_TREE_INIT (worklist, 64, "work list");
5312 /* 1. Init worklist. */
5314 for (i = 0; i < nbbs; i++)
5317 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5319 stmt = bsi_stmt (si);
5321 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5323 fprintf (vect_dump, "init: stmt relevant? ");
5324 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5327 stmt_info = vinfo_for_stmt (stmt);
5328 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5330 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5331 vect_mark_relevant (&worklist, stmt);
5336 /* 2. Process_worklist */
5338 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5340 stmt = VARRAY_TOP_TREE (worklist);
5341 VARRAY_POP (worklist);
5343 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5345 fprintf (vect_dump, "worklist: examine stmt: ");
5346 print_generic_expr (vect_dump, stmt, TDF_SLIM);
5349 /* Examine the USES in this statement. Mark all the statements which
5350 feed this statement's uses as "relevant", unless the USE is used as
5353 if (TREE_CODE (stmt) == PHI_NODE)
5355 /* follow the def-use chain inside the loop. */
5356 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5358 tree arg = PHI_ARG_DEF (stmt, j);
5359 tree def_stmt = NULL_TREE;
5361 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
5363 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5364 LOOP_LOC (loop_vinfo)))
5365 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
5366 varray_clear (worklist);
5372 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5374 fprintf (vect_dump, "worklist: def_stmt: ");
5375 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
5378 bb = bb_for_stmt (def_stmt);
5379 if (flow_bb_inside_loop_p (loop, bb))
5380 vect_mark_relevant (&worklist, def_stmt);
5384 ann = stmt_ann (stmt);
5385 use_ops = USE_OPS (ann);
5387 for (i = 0; i < NUM_USES (use_ops); i++)
5389 tree use = USE_OP (use_ops, i);
5391 /* We are only interested in uses that need to be vectorized. Uses
5392 that are used for address computation are not considered relevant.
5394 if (exist_non_indexing_operands_for_use_p (use, stmt))
5396 tree def_stmt = NULL_TREE;
5398 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
5400 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
5401 LOOP_LOC (loop_vinfo)))
5402 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
5403 varray_clear (worklist);
5410 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5412 fprintf (vect_dump, "worklist: examine use %d: ", i);
5413 print_generic_expr (vect_dump, use, TDF_SLIM);
5416 bb = bb_for_stmt (def_stmt);
5417 if (flow_bb_inside_loop_p (loop, bb))
5418 vect_mark_relevant (&worklist, def_stmt);
5421 } /* while worklist */
5423 varray_clear (worklist);
5428 /* Function vect_can_advance_ivs_p
5430 In case the number of iterations that LOOP iterates in unknown at compile
5431 time, an epilog loop will be generated, and the loop induction variables
5432 (IVs) will be "advanced" to the value they are supposed to take just before
5433 the epilog loop. Here we check that the access function of the loop IVs
5434 and the expression that represents the loop bound are simple enough.
5435 These restrictions will be relaxed in the future. */
5438 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
5440 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5441 basic_block bb = loop->header;
5444 /* Analyze phi functions of the loop header. */
5446 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5448 tree access_fn = NULL;
5449 tree evolution_part;
5451 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5453 fprintf (vect_dump, "Analyze phi: ");
5454 print_generic_expr (vect_dump, phi, TDF_SLIM);
5457 /* Skip virtual phi's. The data dependences that are associated with
5458 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5460 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5462 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5463 fprintf (vect_dump, "virtual phi. skip.");
5467 /* Analyze the evolution function. */
5469 access_fn = instantiate_parameters
5470 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5474 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5475 fprintf (vect_dump, "No Access function.");
5479 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5481 fprintf (vect_dump, "Access function of PHI: ");
5482 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
5485 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5487 if (evolution_part == NULL_TREE)
5490 /* FORNOW: We do not transform initial conditions of IVs
5491 which evolution functions are a polynomial of degree >= 2. */
5493 if (tree_is_chrec (evolution_part))
5501 /* Function vect_get_loop_niters.
5503 Determine how many iterations the loop is executed.
5504 If an expression that represents the number of iterations
5505 can be constructed, place it in NUMBER_OF_ITERATIONS.
5506 Return the loop exit condition. */
5509 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5513 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5514 fprintf (vect_dump, "=== get_loop_niters ===");
5516 niters = number_of_iterations_in_loop (loop);
5518 if (niters != NULL_TREE
5519 && niters != chrec_dont_know)
5521 *number_of_iterations = niters;
5523 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5525 fprintf (vect_dump, "==> get_loop_niters:" );
5526 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
5530 return get_loop_exit_condition (loop);
5534 /* Function vect_analyze_loop_form.
5536 Verify the following restrictions (some may be relaxed in the future):
5537 - it's an inner-most loop
5538 - number of BBs = 2 (which are the loop header and the latch)
5539 - the loop has a pre-header
5540 - the loop has a single entry and exit
5541 - the loop exit condition is simple enough, and the number of iterations
5542 can be analyzed (a countable loop). */
5544 static loop_vec_info
5545 vect_analyze_loop_form (struct loop *loop)
5547 loop_vec_info loop_vinfo;
5549 tree number_of_iterations = NULL;
5550 bool rescan = false;
5553 loop_loc = find_loop_location (loop);
5555 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
5556 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
5560 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
5561 fprintf (vect_dump, "not vectorized: nested loop.");
5565 if (!loop->single_exit
5566 || loop->num_nodes != 2
5567 || EDGE_COUNT (loop->header->preds) != 2
5568 || loop->num_entries != 1)
5570 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5572 if (!loop->single_exit)
5573 fprintf (vect_dump, "not vectorized: multiple exits.");
5574 else if (loop->num_nodes != 2)
5575 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
5576 else if (EDGE_COUNT (loop->header->preds) != 2)
5577 fprintf (vect_dump, "not vectorized: too many incoming edges.");
5578 else if (loop->num_entries != 1)
5579 fprintf (vect_dump, "not vectorized: too many entries.");
5585 /* We assume that the loop exit condition is at the end of the loop. i.e,
5586 that the loop is represented as a do-while (with a proper if-guard
5587 before the loop if needed), where the loop header contains all the
5588 executable statements, and the latch is empty. */
5589 if (!empty_block_p (loop->latch))
5591 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5592 fprintf (vect_dump, "not vectorized: unexpectd loop form.");
5596 /* Make sure we have a preheader basic block. */
5597 if (!loop->pre_header)
5600 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5603 /* Make sure there exists a single-predecessor exit bb: */
5604 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5607 loop_split_edge_with (loop->exit_edges[0], NULL);
5612 flow_loop_scan (loop, LOOP_ALL);
5613 /* Flow loop scan does not update loop->single_exit field. */
5614 loop->single_exit = loop->exit_edges[0];
5617 if (empty_block_p (loop->header))
5619 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5620 fprintf (vect_dump, "not vectorized: empty loop.");
5624 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5627 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5628 fprintf (vect_dump, "not vectorized: complicated exit condition.");
5632 if (!number_of_iterations)
5634 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5636 "not vectorized: number of iterations cannot be computed.");
5640 if (chrec_contains_undetermined (number_of_iterations))
5642 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
5643 fprintf (vect_dump, "Infinite number of iterations.");
5647 loop_vinfo = new_loop_vec_info (loop);
5648 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5650 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5652 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
5654 fprintf (vect_dump, "Symbolic number of iterations is ");
5655 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
5659 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5661 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
5662 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
5666 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5667 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
5673 /* Function vect_analyze_loop.
5675 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5676 for it. The different analyses will record information in the
5677 loop_vec_info struct. */
5679 static loop_vec_info
5680 vect_analyze_loop (struct loop *loop)
5683 loop_vec_info loop_vinfo;
5685 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5686 fprintf (vect_dump, "===== analyze_loop_nest =====");
5688 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5690 loop_vinfo = vect_analyze_loop_form (loop);
5693 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5694 fprintf (vect_dump, "bad loop form.");
5698 /* Find all data references in the loop (which correspond to vdefs/vuses)
5699 and analyze their evolution in the loop.
5701 FORNOW: Handle only simple, array references, which
5702 alignment can be forced, and aligned pointer-references. */
5704 ok = vect_analyze_data_refs (loop_vinfo);
5707 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5708 fprintf (vect_dump, "bad data references.");
5709 destroy_loop_vec_info (loop_vinfo);
5713 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5715 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5718 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5719 fprintf (vect_dump, "unexpected pattern.");
5720 destroy_loop_vec_info (loop_vinfo);
5724 /* Check that all cross-iteration scalar data-flow cycles are OK.
5725 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5727 ok = vect_analyze_scalar_cycles (loop_vinfo);
5730 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5731 fprintf (vect_dump, "bad scalar cycle.");
5732 destroy_loop_vec_info (loop_vinfo);
5736 /* Analyze data dependences between the data-refs in the loop.
5737 FORNOW: fail at the first data dependence that we encounter. */
5739 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5742 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5743 fprintf (vect_dump, "bad data dependence.");
5744 destroy_loop_vec_info (loop_vinfo);
5748 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5749 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5751 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5754 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5755 fprintf (vect_dump, "bad data access.");
5756 destroy_loop_vec_info (loop_vinfo);
5760 /* Analyze the alignment of the data-refs in the loop.
5761 FORNOW: Only aligned accesses are handled. */
5763 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5766 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5767 fprintf (vect_dump, "bad data alignment.");
5768 destroy_loop_vec_info (loop_vinfo);
5772 /* Scan all the operations in the loop and make sure they are
5775 ok = vect_analyze_operations (loop_vinfo);
5778 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
5779 fprintf (vect_dump, "bad operation or unsupported loop bound.");
5780 destroy_loop_vec_info (loop_vinfo);
5784 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5790 /* Function need_imm_uses_for.
5792 Return whether we ought to include information for 'var'
5793 when calculating immediate uses. For this pass we only want use
5794 information for non-virtual variables. */
5797 need_imm_uses_for (tree var)
5799 return is_gimple_reg (var);
5803 /* Function vectorize_loops.
5805 Entry Point to loop vectorization phase. */
5808 vectorize_loops (struct loops *loops)
5810 unsigned int i, loops_num;
5811 unsigned int num_vectorized_loops = 0;
5813 /* Fix the verbosity level if not defined explicitly by the user. */
5814 vect_set_dump_settings ();
5816 /* Does the target support SIMD? */
5817 /* FORNOW: until more sophisticated machine modelling is in place. */
5818 if (!UNITS_PER_SIMD_WORD)
5820 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
5821 fprintf (vect_dump, "vectorizer: target vector size is not defined.");
5825 #ifdef ENABLE_CHECKING
5826 verify_loop_closed_ssa ();
5829 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5831 /* ----------- Analyze loops. ----------- */
5833 /* If some loop was duplicated, it gets bigger number
5834 than all previously defined loops. This fact allows us to run
5835 only over initial loops skipping newly generated ones. */
5836 loops_num = loops->num;
5837 for (i = 1; i < loops_num; i++)
5839 loop_vec_info loop_vinfo;
5840 struct loop *loop = loops->parray[i];
5845 loop_vinfo = vect_analyze_loop (loop);
5846 loop->aux = loop_vinfo;
5848 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5851 vect_transform_loop (loop_vinfo, loops);
5852 num_vectorized_loops++;
5855 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
5856 fprintf (vect_dump, "vectorized %u loops in function.\n",
5857 num_vectorized_loops);
5859 /* ----------- Finalize. ----------- */
5862 for (i = 1; i < loops_num; i++)
5864 struct loop *loop = loops->parray[i];
5865 loop_vec_info loop_vinfo;
5869 loop_vinfo = loop->aux;
5870 destroy_loop_vec_info (loop_vinfo);
5874 rewrite_into_ssa (false);
5875 rewrite_into_loop_closed_ssa (); /* FORNOW */
5876 bitmap_clear (vars_to_rename);