2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
150 /*************************************************************************
151 Simple Loop Peeling Utilities
152 *************************************************************************/
154 /* Entry point for peeling of simple loops.
155 Peel the first/last iterations of a loop.
156 It can be used outside of the vectorizer for loops that are simple enough
157 (see function documentation). In the vectorizer it is used to peel the
158 last few iterations when the loop bound is unknown or does not evenly
159 divide by the vectorization factor, and to peel the first few iterations
160 to force the alignment of data references in the loop. */
161 struct loop *slpeel_tree_peel_loop_to_edge
162 (struct loop *, struct loops *, edge, tree, tree, bool);
163 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
164 (struct loop *, struct loops *, edge);
165 static void slpeel_update_phis_for_duplicate_loop
166 (struct loop *, struct loop *, bool after);
167 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *);
168 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree, tree, tree);
169 static edge slpeel_add_loop_guard (basic_block, tree, basic_block);
170 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
171 static void allocate_new_names (bitmap);
172 static void rename_use_op (use_operand_p);
173 static void rename_def_op (def_operand_p, tree);
174 static void rename_variables_in_bb (basic_block);
175 static void free_new_names (bitmap);
176 static void rename_variables_in_loop (struct loop *);
179 /*************************************************************************
180 Vectorization Utilities.
181 *************************************************************************/
183 /* Main analysis functions. */
184 static loop_vec_info vect_analyze_loop (struct loop *);
185 static loop_vec_info vect_analyze_loop_form (struct loop *);
186 static bool vect_analyze_data_refs (loop_vec_info);
187 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
188 static bool vect_analyze_scalar_cycles (loop_vec_info);
189 static bool vect_analyze_data_ref_accesses (loop_vec_info);
190 static bool vect_analyze_data_refs_alignment (loop_vec_info);
191 static bool vect_compute_data_refs_alignment (loop_vec_info);
192 static bool vect_analyze_operations (loop_vec_info);
194 /* Main code transformation functions. */
195 static void vect_transform_loop (loop_vec_info, struct loops *);
196 static void vect_transform_loop_bound (loop_vec_info, tree niters);
197 static bool vect_transform_stmt (tree, block_stmt_iterator *);
198 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
199 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
200 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
201 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
202 static enum dr_alignment_support vect_supportable_dr_alignment
203 (struct data_reference *);
204 static void vect_align_data_ref (tree);
205 static void vect_enhance_data_refs_alignment (loop_vec_info);
207 /* Utility functions for the analyses. */
208 static bool vect_is_simple_use (tree , struct loop *, tree *);
209 static bool exist_non_indexing_operands_for_use_p (tree, tree);
210 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
211 static void vect_mark_relevant (varray_type, tree);
212 static bool vect_stmt_relevant_p (tree, loop_vec_info);
213 static tree vect_get_loop_niters (struct loop *, tree *);
214 static bool vect_compute_data_ref_alignment
215 (struct data_reference *, loop_vec_info);
216 static bool vect_analyze_data_ref_access (struct data_reference *);
217 static bool vect_get_first_index (tree, tree *);
218 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
219 static struct data_reference * vect_analyze_pointer_ref_access
221 static bool vect_can_advance_ivs_p (struct loop *);
222 static tree vect_get_base_and_bit_offset
223 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
224 static struct data_reference * vect_analyze_pointer_ref_access
226 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
227 static tree vect_compute_array_ref_alignment
228 (struct data_reference *, loop_vec_info, tree, tree *);
229 static tree vect_get_ptr_offset (tree, tree, tree *);
230 static tree vect_get_symbl_and_dr
231 (tree, tree, bool, loop_vec_info, struct data_reference **);
233 /* Utility functions for the code transformation. */
234 static tree vect_create_destination_var (tree, tree);
235 static tree vect_create_data_ref_ptr
236 (tree, block_stmt_iterator *, tree, tree *, bool);
237 static tree vect_create_index_for_vector_ref
238 (struct loop *, block_stmt_iterator *);
239 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
240 static tree get_vectype_for_scalar_type (tree);
241 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
242 static tree vect_get_vec_def_for_operand (tree, tree);
243 static tree vect_init_vector (tree, tree);
244 static tree vect_build_symbol_bound (tree, int, struct loop *);
245 static void vect_finish_stmt_generation
246 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
248 /* Utility function dealing with loop peeling (not peeling itself). */
249 static void vect_generate_tmps_on_preheader
250 (loop_vec_info, tree *, tree *, tree *);
251 static tree vect_build_loop_niters (loop_vec_info);
252 static void vect_update_ivs_after_vectorizer (struct loop *, tree);
253 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
254 static void vect_update_niters_after_peeling (loop_vec_info, tree);
255 static void vect_update_inits_of_dr
256 (struct data_reference *, struct loop *, tree niters);
257 static void vect_update_inits_of_drs (loop_vec_info, tree);
258 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
259 static void vect_transform_for_unknown_loop_bound
260 (loop_vec_info, tree *, struct loops *);
262 /* Utilities for creation and deletion of vec_info structs. */
263 loop_vec_info new_loop_vec_info (struct loop *loop);
264 void destroy_loop_vec_info (loop_vec_info);
265 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
267 static bool vect_debug_stats (struct loop *loop);
268 static bool vect_debug_details (struct loop *loop);
271 /*************************************************************************
272 Simple Loop Peeling Utilities
274 Utilities to support loop peeling for vectorization purposes.
275 *************************************************************************/
278 /* For each definition in DEFINITIONS this function allocates
282 allocate_new_names (bitmap definitions)
287 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
289 tree def = ssa_name (ver);
290 tree *new_name_ptr = xmalloc (sizeof (tree));
292 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
294 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
295 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
297 SSA_NAME_AUX (def) = new_name_ptr;
302 /* Renames the use *OP_P. */
305 rename_use_op (use_operand_p op_p)
309 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
312 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
314 /* Something defined outside of the loop. */
318 /* An ordinary ssa name defined in the loop. */
320 SET_USE (op_p, *new_name_ptr);
324 /* Renames the def *OP_P in statement STMT. */
327 rename_def_op (def_operand_p op_p, tree stmt)
331 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
334 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
336 /* Something defined outside of the loop. */
340 /* An ordinary ssa name defined in the loop. */
342 SET_DEF (op_p, *new_name_ptr);
343 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
347 /* Renames the variables in basic block BB. */
350 rename_variables_in_bb (basic_block bb)
353 block_stmt_iterator bsi;
359 v_may_def_optype v_may_defs;
360 v_must_def_optype v_must_defs;
365 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
366 rename_def_op (PHI_RESULT_PTR (phi), phi);
368 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
370 stmt = bsi_stmt (bsi);
371 get_stmt_operands (stmt);
372 ann = stmt_ann (stmt);
374 uses = USE_OPS (ann);
375 for (i = 0; i < NUM_USES (uses); i++)
376 rename_use_op (USE_OP_PTR (uses, i));
378 defs = DEF_OPS (ann);
379 for (i = 0; i < NUM_DEFS (defs); i++)
380 rename_def_op (DEF_OP_PTR (defs, i), stmt);
382 vuses = VUSE_OPS (ann);
383 for (i = 0; i < NUM_VUSES (vuses); i++)
384 rename_use_op (VUSE_OP_PTR (vuses, i));
386 v_may_defs = V_MAY_DEF_OPS (ann);
387 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
389 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
390 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
393 v_must_defs = V_MUST_DEF_OPS (ann);
394 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
396 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
397 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
401 FOR_EACH_EDGE (e, ei, bb->succs)
402 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
403 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
407 /* Releases the structures holding the new ssa names. */
410 free_new_names (bitmap definitions)
415 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
417 tree def = ssa_name (ver);
419 if (SSA_NAME_AUX (def))
421 free (SSA_NAME_AUX (def));
422 SSA_NAME_AUX (def) = NULL;
428 /* Renames variables in new generated LOOP. */
431 rename_variables_in_loop (struct loop *loop)
436 bbs = get_loop_body (loop);
438 for (i = 0; i < loop->num_nodes; i++)
439 rename_variables_in_bb (bbs[i]);
445 /* This function copies phis from LOOP header to
446 NEW_LOOP header. AFTER is as
447 in update_phis_for_duplicate_loop function. */
450 copy_phi_nodes (struct loop *loop, struct loop *new_loop,
453 tree phi, new_phi, def;
455 edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
457 /* Second add arguments to newly created phi nodes. */
458 for (phi = phi_nodes (loop->header),
459 new_phi = phi_nodes (new_loop->header);
461 phi = PHI_CHAIN (phi),
462 new_phi = PHI_CHAIN (new_phi))
464 new_e = loop_preheader_edge (new_loop);
465 def = PHI_ARG_DEF_FROM_EDGE (phi, e);
466 add_phi_arg (&new_phi, def, new_e);
471 /* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
472 executes after LOOP, and false if it executes before it. */
475 slpeel_update_phis_for_duplicate_loop (struct loop *loop,
476 struct loop *new_loop, bool after)
479 tree *new_name_ptr, new_ssa_name;
480 tree phi_new, phi_old, def;
481 edge orig_entry_e = loop_preheader_edge (loop);
483 /* Copy phis from loop->header to new_loop->header. */
484 copy_phi_nodes (loop, new_loop, after);
486 old_latch = loop_latch_edge (loop);
488 /* Update PHI args for the new loop latch edge, and
489 the old loop preheader edge, we know that the PHI nodes
490 are ordered appropriately in copy_phi_nodes. */
491 for (phi_new = phi_nodes (new_loop->header),
492 phi_old = phi_nodes (loop->header);
494 phi_new = PHI_CHAIN (phi_new), phi_old = PHI_CHAIN (phi_old))
496 def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
498 if (TREE_CODE (def) != SSA_NAME)
501 new_name_ptr = SSA_NAME_AUX (def);
503 /* Something defined outside of the loop. */
507 /* An ordinary ssa name defined in the loop. */
508 new_ssa_name = *new_name_ptr;
510 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
512 /* Update PHI args for the original loop pre-header edge. */
514 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
520 /* Update PHI nodes for a guard of the LOOP.
522 LOOP is supposed to have a preheader bb at which a guard condition is
523 located. The true edge of this condition skips the LOOP and ends
524 at the destination of the (unique) LOOP exit. The loop exit bb is supposed
525 to be an empty bb (created by this transformation) with one successor.
527 This function creates phi nodes at the LOOP exit bb. These phis need to be
528 created as a result of adding true edge coming from guard.
530 FORNOW: Only phis which have corresponding phi nodes at the header of the
531 LOOP are created. Here we use the assumption that after the LOOP there
532 are no uses of defs generated in LOOP.
534 After the phis creation, the function updates the values of phi nodes at
535 the LOOP exit successor bb:
542 if (exit_cond) goto bb3 else goto bb2
548 After guard creation (the loop before this function):
551 if (guard_condition) goto bb4 else goto bb1
553 if (exit_cond) goto bb4 else goto bb2
561 This function updates the phi nodes in bb4 and in bb3, to account for the
562 new edge from bb0 to bb4. */
565 slpeel_update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
568 basic_block bb = loop->exit_edges[0]->dest;
570 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
575 /* Generate new phi node. */
576 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), bb);
578 /* Add argument coming from guard true edge. */
579 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
580 add_phi_arg (&new_phi, phi_arg, guard_true_edge);
582 /* Add argument coming from loop exit edge. */
583 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
584 add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
586 /* Update all phi nodes at the loop exit successor. */
587 for (phi1 = phi_nodes (EDGE_SUCC (bb, 0)->dest);
589 phi1 = PHI_CHAIN (phi1))
591 tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1, EDGE_SUCC (bb, 0));
592 if (old_arg == phi_arg)
594 edge e = EDGE_SUCC (bb, 0);
596 SET_PHI_ARG_DEF (phi1,
597 phi_arg_from_edge (phi1, e),
598 PHI_RESULT (new_phi));
603 set_phi_nodes (bb, phi_reverse (phi_nodes (bb)));
607 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
608 that starts at zero, increases by one and its limit is NITERS. */
611 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters,
612 tree begin_label, tree exit_label)
614 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
616 edge exit_edge = loop->exit_edges[0];
617 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
619 /* Flow loop scan does not update loop->single_exit field. */
620 loop->single_exit = loop->exit_edges[0];
621 orig_cond = get_loop_exit_condition (loop);
622 gcc_assert (orig_cond);
623 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
624 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
626 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
627 back to the exit condition statement. */
628 bsi_next (&loop_exit_bsi);
629 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
632 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
633 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
634 else /* 'then' edge loops back. */
635 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
637 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
638 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
639 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
640 begin_label, exit_label);
641 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
643 /* Remove old loop exit test: */
644 bsi_remove (&loop_exit_bsi);
646 if (vect_debug_stats (loop) || vect_debug_details (loop))
647 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
649 loop->nb_iterations = niters;
653 /* Given LOOP this function generates a new copy of it and puts it
654 on E which is either the entry or exit of LOOP. */
657 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
660 struct loop *new_loop;
661 basic_block *new_bbs, *bbs;
664 basic_block exit_dest;
667 at_exit = (e == loop->exit_edges[0]);
668 if (!at_exit && e != loop_preheader_edge (loop))
670 if (dump_file && (dump_flags & TDF_DETAILS))
672 "Edge is not an entry nor an exit edge.\n");
676 bbs = get_loop_body (loop);
678 /* Check whether duplication is possible. */
679 if (!can_copy_bbs_p (bbs, loop->num_nodes))
681 if (vect_debug_stats (loop) || vect_debug_details (loop))
683 "Cannot copy basic blocks.\n");
688 /* Generate new loop structure. */
689 new_loop = duplicate_loop (loops, loop, loop->outer);
692 if (vect_debug_stats (loop) || vect_debug_details (loop))
694 "The duplicate_loop returns NULL.\n");
699 exit_dest = loop->exit_edges[0]->dest;
700 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
701 exit_dest) == loop->header ?
704 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
706 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
708 /* Duplicating phi args at exit bbs as coming
709 also from exit of duplicated loop. */
710 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
712 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
715 edge new_loop_exit_edge;
717 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
718 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
720 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
722 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
726 if (at_exit) /* Add the loop copy at exit. */
728 redirect_edge_and_branch_force (e, new_loop->header);
729 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
731 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
733 else /* Add the copy at entry. */
736 edge entry_e = loop_preheader_edge (loop);
737 basic_block preheader = entry_e->src;
739 if (!flow_bb_inside_loop_p (new_loop,
740 EDGE_SUCC (new_loop->header, 0)->dest))
741 new_exit_e = EDGE_SUCC (new_loop->header, 0);
743 new_exit_e = EDGE_SUCC (new_loop->header, 1);
745 redirect_edge_and_branch_force (new_exit_e, loop->header);
746 set_immediate_dominator (CDI_DOMINATORS, loop->header,
749 /* We have to add phi args to the loop->header here as coming
750 from new_exit_e edge. */
751 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
753 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
755 add_phi_arg (&phi, phi_arg, new_exit_e);
758 redirect_edge_and_branch_force (entry_e, new_loop->header);
759 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
762 flow_loop_scan (new_loop, LOOP_ALL);
763 flow_loop_scan (loop, LOOP_ALL);
771 /* Given the condition statement COND, put it as the last statement
772 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
773 Assumes that this is the single exit of the guarded loop.
774 Returns the skip edge. */
777 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
779 block_stmt_iterator bsi;
781 tree cond_stmt, then_label, else_label;
783 enter_e = EDGE_SUCC (guard_bb, 0);
784 enter_e->flags &= ~EDGE_FALLTHRU;
785 enter_e->flags |= EDGE_FALSE_VALUE;
786 bsi = bsi_last (guard_bb);
788 then_label = build1 (GOTO_EXPR, void_type_node,
789 tree_block_label (exit_bb));
790 else_label = build1 (GOTO_EXPR, void_type_node,
791 tree_block_label (enter_e->dest));
792 cond_stmt = build (COND_EXPR, void_type_node, cond,
793 then_label, else_label);
794 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
795 /* Add new edge to connect entry block to the second loop. */
796 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
797 set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
802 /* This function verifies that the following restrictions apply to LOOP:
804 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
805 (3) it is single entry, single exit
806 (4) its exit condition is the last stmt in the header
807 (5) E is the entry/exit edge of LOOP.
811 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
813 edge exit_e = loop->exit_edges [0];
814 edge entry_e = loop_preheader_edge (loop);
815 tree orig_cond = get_loop_exit_condition (loop);
816 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
818 if (any_marked_for_rewrite_p ())
822 /* All loops have an outer scope; the only case loop->outer is NULL is for
823 the function itself. */
825 || loop->num_nodes != 2
826 || !empty_block_p (loop->latch)
827 || loop->num_exits != 1
828 || loop->num_entries != 1
829 /* Verify that new loop exit condition can be trivially modified. */
830 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
831 || (e != exit_e && e != entry_e))
838 /* Given LOOP this function duplicates it to the edge E.
840 This transformation takes place before the loop is vectorized.
841 For now, there are two main cases when it's used
842 by the vectorizer: to support loops with unknown loop bounds
843 (or loop bounds indivisible by vectorization factor) and to force the
844 alignment of data references in the loop. In the first case, LOOP is
845 duplicated to the exit edge, producing epilog loop. In the second case, LOOP
846 is duplicated to the preheader edge thus generating prolog loop. In both
847 cases, the original loop will be vectorized after the transformation.
849 The edge E is supposed to be either preheader edge of the LOOP or
850 its exit edge. If preheader edge is specified, the LOOP copy
851 will precede the original one. Otherwise the copy will be located
852 at the exit of the LOOP.
854 FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate
855 the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first
856 loop will be iterated FIRST_NITERS times by introducing additional
857 induction variable and replacing loop exit condition. If
858 UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and
859 the caller to tree_duplicate_loop_to_edge is responsible for updating
860 the first loop count.
862 NITERS (also SSA_NAME) parameter defines the number of iteration the
863 original loop iterated. The function generates two if-then guards:
864 one prior to the first loop and the other prior to the second loop.
865 The first guard will be:
867 if (FIRST_NITERS == 0) then skip the first loop
869 The second guard will be:
871 if (FIRST_NITERS == NITERS) then skip the second loop
873 Thus the equivalence to the original code is guaranteed by correct values
874 of NITERS and FIRST_NITERS and generation of if-then loop guards.
876 For now this function supports only loop forms that are candidate for
877 vectorization. Such types are the following:
879 (1) only innermost loops
880 (2) loops built from 2 basic blocks
881 (3) loops with one entry and one exit
882 (4) loops without function calls
883 (5) loops without defs that are used after the loop
885 (1), (3) are checked in this function; (2) - in function
886 vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
887 (5) is checked as part of the function vect_mark_stmts_to_be_vectorized,
888 when excluding induction/reduction support.
890 The function returns NULL in case one of these checks or
891 transformations failed. */
894 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
895 edge e, tree first_niters,
896 tree niters, bool update_first_loop_count)
898 struct loop *new_loop = NULL, *first_loop, *second_loop;
902 basic_block first_exit_bb, second_exit_bb;
903 basic_block pre_header_bb;
904 edge exit_e = loop->exit_edges [0];
906 if (!slpeel_can_duplicate_loop_p (loop, e))
909 /* We have to initialize cfg_hooks. Then, when calling
910 cfg_hooks->split_edge, the function tree_split_edge
911 is actually called and, when calling cfg_hooks->duplicate_block,
912 the function tree_duplicate_bb is called. */
913 tree_register_cfg_hooks ();
915 /* 1. Generate a copy of LOOP and put it on E (entry or exit). */
916 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
918 if (vect_debug_stats (loop) || vect_debug_details (loop))
920 "The tree_duplicate_loop_to_edge_cfg failed.\n");
924 definitions = marked_ssa_names ();
925 allocate_new_names (definitions);
926 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
927 /* Here, using assumption (5), we do not propagate new names further
928 than on phis of the exit from the second loop. */
929 rename_variables_in_loop (new_loop);
930 free_new_names (definitions);
935 second_loop = new_loop;
939 first_loop = new_loop;
943 /* 2. Generate bb between the loops. */
944 first_exit_bb = split_edge (first_loop->exit_edges[0]);
945 add_bb_to_loop (first_exit_bb, first_loop->outer);
947 /* We need to update here first loop exit edge
948 and second loop preheader edge. */
949 flow_loop_scan (first_loop, LOOP_ALL);
950 flow_loop_scan (second_loop, LOOP_ALL);
952 /* 3. Make first loop iterate FIRST_NITERS times, if needed. */
953 if (!update_first_loop_count)
955 tree first_loop_latch_lbl = tree_block_label (first_loop->latch);
956 tree first_loop_exit_lbl = tree_block_label (first_exit_bb);
958 slpeel_make_loop_iterate_ntimes (first_loop, first_niters,
959 first_loop_latch_lbl,
960 first_loop_exit_lbl);
963 /* 4. Add the guard before first loop:
970 /* 4a. Generate bb before first loop. */
971 pre_header_bb = split_edge (loop_preheader_edge (first_loop));
972 add_bb_to_loop (pre_header_bb, first_loop->outer);
974 /* First loop preheader edge is changed. */
975 flow_loop_scan (first_loop, LOOP_ALL);
977 /* 4b. Generate guard condition. */
978 pre_condition = build (LE_EXPR, boolean_type_node,
979 first_niters, integer_zero_node);
981 /* 4c. Add condition at the end of preheader bb. */
982 skip_e = slpeel_add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
984 /* 4d. Update phis at first loop exit and propagate changes
985 to the phis of second loop. */
986 slpeel_update_phi_nodes_for_guard (skip_e, first_loop);
988 /* 5. Add the guard before second loop:
990 if FIRST_NITERS == NITERS SKIP
995 /* 5a. Generate empty bb at the exit from the second loop. */
996 second_exit_bb = split_edge (second_loop->exit_edges[0]);
997 add_bb_to_loop (second_exit_bb, second_loop->outer);
999 /* Second loop preheader edge is changed. */
1000 flow_loop_scan (second_loop, LOOP_ALL);
1002 /* 5b. Generate guard condition. */
1003 pre_condition = build (EQ_EXPR, boolean_type_node,
1004 first_niters, niters);
1006 /* 5c. Add condition at the end of preheader bb. */
1007 skip_e = slpeel_add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
1008 slpeel_update_phi_nodes_for_guard (skip_e, second_loop);
1010 BITMAP_XFREE (definitions);
1011 unmark_all_for_rewrite ();
1018 /* Here the proper Vectorizer starts. */
1020 /*************************************************************************
1021 Vectorization Utilities.
1022 *************************************************************************/
1024 /* Function new_stmt_vec_info.
1026 Create and initialize a new stmt_vec_info struct for STMT. */
1029 new_stmt_vec_info (tree stmt, struct loop *loop)
1032 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1034 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1035 STMT_VINFO_STMT (res) = stmt;
1036 STMT_VINFO_LOOP (res) = loop;
1037 STMT_VINFO_RELEVANT_P (res) = 0;
1038 STMT_VINFO_VECTYPE (res) = NULL;
1039 STMT_VINFO_VEC_STMT (res) = NULL;
1040 STMT_VINFO_DATA_REF (res) = NULL;
1041 STMT_VINFO_MEMTAG (res) = NULL;
1042 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1048 /* Function new_loop_vec_info.
1050 Create and initialize a new loop_vec_info struct for LOOP, as well as
1051 stmt_vec_info structs for all the stmts in LOOP. */
1054 new_loop_vec_info (struct loop *loop)
1058 block_stmt_iterator si;
1061 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1063 bbs = get_loop_body (loop);
1065 /* Create stmt_info for all stmts in the loop. */
1066 for (i = 0; i < loop->num_nodes; i++)
1068 basic_block bb = bbs[i];
1069 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1071 tree stmt = bsi_stmt (si);
1074 get_stmt_operands (stmt);
1075 ann = stmt_ann (stmt);
1076 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1080 LOOP_VINFO_LOOP (res) = loop;
1081 LOOP_VINFO_BBS (res) = bbs;
1082 LOOP_VINFO_EXIT_COND (res) = NULL;
1083 LOOP_VINFO_NITERS (res) = NULL;
1084 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1085 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1086 LOOP_VINFO_VECT_FACTOR (res) = 0;
1087 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1088 "loop_write_datarefs");
1089 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1090 "loop_read_datarefs");
1091 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1097 /* Function destroy_loop_vec_info.
1099 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1100 stmts in the loop. */
1103 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1108 block_stmt_iterator si;
1114 loop = LOOP_VINFO_LOOP (loop_vinfo);
1116 bbs = LOOP_VINFO_BBS (loop_vinfo);
1117 nbbs = loop->num_nodes;
1119 for (j = 0; j < nbbs; j++)
1121 basic_block bb = bbs[j];
1122 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1124 tree stmt = bsi_stmt (si);
1125 stmt_ann_t ann = stmt_ann (stmt);
1126 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1128 set_stmt_info (ann, NULL);
1132 free (LOOP_VINFO_BBS (loop_vinfo));
1133 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1134 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1140 /* Function debug_loop_stats.
1142 For vectorization statistics dumps. */
1145 vect_debug_stats (struct loop *loop)
1148 block_stmt_iterator si;
1149 tree node = NULL_TREE;
1151 if (!dump_file || !(dump_flags & TDF_STATS))
1156 fprintf (dump_file, "\n");
1165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1167 node = bsi_stmt (si);
1168 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1172 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1173 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1175 fprintf (dump_file, "\nloop at %s:%d: ",
1176 EXPR_FILENAME (node), EXPR_LINENO (node));
1184 /* Function debug_loop_details.
1186 For vectorization debug dumps. */
1189 vect_debug_details (struct loop *loop)
1192 block_stmt_iterator si;
1193 tree node = NULL_TREE;
1195 if (!dump_file || !(dump_flags & TDF_DETAILS))
1200 fprintf (dump_file, "\n");
1209 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1211 node = bsi_stmt (si);
1212 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1216 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1217 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1219 fprintf (dump_file, "\nloop at %s:%d: ",
1220 EXPR_FILENAME (node), EXPR_LINENO (node));
1228 /* Function vect_get_ptr_offset
1230 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1233 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1234 tree vectype ATTRIBUTE_UNUSED,
1235 tree *offset ATTRIBUTE_UNUSED)
1237 /* TODO: Use alignment information. */
1242 /* Function vect_get_base_and_bit_offset
1244 Return the BASE of the data reference EXPR.
1245 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1246 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1247 bits of 'a.b[i] + 4B' from a.
1250 EXPR - the memory reference that is being analyzed
1251 DR - the data_reference struct of the _original_ memory reference
1252 (Note: DR_REF (DR) is not necessarily EXPR)
1253 VECTYPE - the type that defines the alignment (i.e, we compute
1254 alignment relative to TYPE_ALIGN(VECTYPE))
1257 BASE (returned value) - the base of the data reference EXPR.
1258 E.g, if EXPR is a.b[k].c[i][j] the returned
1260 OFFSET - offset of EXPR from BASE in bits
1261 BASE_ALIGNED_P - indicates if BASE is aligned
1263 If something unexpected is encountered (an unsupported form of data-ref),
1264 or if VECTYPE is given but OFFSET cannot be determined:
1265 then NULL_TREE is returned. */
1268 vect_get_base_and_bit_offset (struct data_reference *dr,
1271 loop_vec_info loop_vinfo,
1273 bool *base_aligned_p)
1275 tree this_offset = size_zero_node;
1276 tree base = NULL_TREE;
1278 tree oprnd0, oprnd1;
1279 struct data_reference *array_dr;
1280 enum tree_code code = TREE_CODE (expr);
1282 *base_aligned_p = false;
1286 /* These cases end the recursion: */
1288 *offset = size_zero_node;
1289 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1290 *base_aligned_p = true;
1297 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1300 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1302 base = vect_get_ptr_offset (expr, vectype, offset);
1304 *base_aligned_p = true;
1308 *base_aligned_p = true;
1309 *offset = size_zero_node;
1315 *offset = int_const_binop (MULT_EXPR, expr,
1316 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1319 /* These cases continue the recursion: */
1321 oprnd0 = TREE_OPERAND (expr, 0);
1322 oprnd1 = TREE_OPERAND (expr, 1);
1324 this_offset = bit_position (oprnd1);
1325 if (vectype && !host_integerp (this_offset, 1))
1331 oprnd0 = TREE_OPERAND (expr, 0);
1336 oprnd0 = TREE_OPERAND (expr, 0);
1341 if (DR_REF (dr) != expr)
1342 /* Build array data_reference struct if the existing DR_REF
1343 doesn't match EXPR. This happens, for example, when the
1344 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1345 contains information on the access of T, not of arr. In order
1346 to continue the analysis, we create a new DR struct that
1347 describes the access of arr.
1349 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1353 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1354 vectype, &this_offset);
1359 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1361 *offset = this_offset;
1362 *base_aligned_p = true;
1369 /* In case we have a PLUS_EXPR of the form
1370 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1371 This is verified in vect_get_symbl_and_dr. */
1372 oprnd0 = TREE_OPERAND (expr, 0);
1373 oprnd1 = TREE_OPERAND (expr, 1);
1375 base = vect_get_base_and_bit_offset
1376 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1377 if (vectype && !base)
1387 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1388 loop_vinfo, offset, base_aligned_p);
1390 if (vectype && base)
1392 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1393 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1396 if (vect_debug_details (NULL))
1398 print_generic_expr (dump_file, expr, TDF_SLIM);
1399 fprintf (dump_file, " --> total offset for ref: ");
1400 print_generic_expr (dump_file, *offset, TDF_SLIM);
1407 /* Function vect_force_dr_alignment_p.
1409 Returns whether the alignment of a DECL can be forced to be aligned
1410 on ALIGNMENT bit boundary. */
1413 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1415 if (TREE_CODE (decl) != VAR_DECL)
1418 if (DECL_EXTERNAL (decl))
1421 if (TREE_STATIC (decl))
1422 return (alignment <= MAX_OFILE_ALIGNMENT);
1424 /* This is not 100% correct. The absolute correct stack alignment
1425 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1426 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1427 However, until someone implements forced stack alignment, SSE
1428 isn't really usable without this. */
1429 return (alignment <= PREFERRED_STACK_BOUNDARY);
1433 /* Function vect_get_new_vect_var.
1435 Returns a name for a new variable. The current naming scheme appends the
1436 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1437 the name of vectorizer generated variables, and appends that to NAME if
1441 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1447 if (var_kind == vect_simple_var)
1452 prefix_len = strlen (prefix);
1455 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1457 new_vect_var = create_tmp_var (type, prefix);
1459 return new_vect_var;
1463 /* Function vect_create_index_for_vector_ref.
1465 Create (and return) an index variable, along with it's update chain in the
1466 loop. This variable will be used to access a memory location in a vector
1470 LOOP: The loop being vectorized.
1471 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1472 function can be added here, or in the loop pre-header.
1475 Return an index that will be used to index a vector array. It is expected
1476 that a pointer to the first vector will be used as the base address for the
1479 FORNOW: we are not trying to be efficient, just creating a new index each
1480 time from scratch. At this time all vector references could use the same
1483 TODO: create only one index to be used by all vector references. Record
1484 the index in the LOOP_VINFO the first time this procedure is called and
1485 return it on subsequent calls. The increment of this index must be placed
1486 just before the conditional expression that ends the single block loop. */
1489 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1492 tree indx_before_incr, indx_after_incr;
1494 /* It is assumed that the base pointer used for vectorized access contains
1495 the address of the first vector. Therefore the index used for vectorized
1496 access must be initialized to zero and incremented by 1. */
1498 init = integer_zero_node;
1499 step = integer_one_node;
1501 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1502 create_iv (init, step, NULL_TREE, loop, bsi, false,
1503 &indx_before_incr, &indx_after_incr);
1505 return indx_before_incr;
1509 /* Function vect_create_addr_base_for_vector_ref.
1511 Create an expression that computes the address of the first memory location
1512 that will be accessed for a data reference.
1515 STMT: The statement containing the data reference.
1516 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1517 OFFSET: Optional. If supplied, it is be added to the initial address.
1520 1. Return an SSA_NAME whose value is the address of the memory location of
1521 the first vector of the data reference.
1522 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1523 these statement(s) which define the returned SSA_NAME.
1525 FORNOW: We are only handling array accesses with step 1. */
1528 vect_create_addr_base_for_vector_ref (tree stmt,
1529 tree *new_stmt_list,
1532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1533 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1534 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1535 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1536 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1537 tree ref = DR_REF (dr);
1538 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1539 tree scalar_type = TREE_TYPE (ref);
1540 tree scalar_ptr_type = build_pointer_type (scalar_type);
1542 tree init_val, step, init_oval;
1544 bool is_ptr_ref, is_array_ref, is_addr_expr;
1549 tree addr_base, addr_expr;
1550 tree dest, new_stmt;
1552 /* Only the access function of the last index is relevant (i_n in
1553 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1554 access_fn = DR_ACCESS_FN (dr, 0);
1555 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1558 init_oval = integer_zero_node;
1560 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1561 && TREE_CODE (data_ref_base) == SSA_NAME;
1562 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1563 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1564 || TREE_CODE (data_ref_base) == PLUS_EXPR
1565 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1566 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1568 /** Create: &(base[init_val])
1570 if data_ref_base is an ARRAY_TYPE:
1571 base = data_ref_base
1573 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1574 base = *((scalar_array *) data_ref_base)
1578 array_base = data_ref_base;
1579 else /* is_ptr_ref or is_addr_expr */
1581 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1582 tree scalar_array_type = build_array_type (scalar_type, 0);
1583 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1584 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1585 add_referenced_tmp_var (array_ptr);
1587 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1588 add_referenced_tmp_var (dest);
1590 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1591 append_to_statement_list_force (new_stmt, new_stmt_list);
1593 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1594 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1595 new_temp = make_ssa_name (array_ptr, vec_stmt);
1596 TREE_OPERAND (vec_stmt, 0) = new_temp;
1597 append_to_statement_list_force (vec_stmt, new_stmt_list);
1600 array_base = build_fold_indirect_ref (new_temp);
1603 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1604 add_referenced_tmp_var (dest);
1605 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1606 append_to_statement_list_force (new_stmt, new_stmt_list);
1610 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1611 add_referenced_tmp_var (tmp);
1612 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1613 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1614 init_val = make_ssa_name (tmp, vec_stmt);
1615 TREE_OPERAND (vec_stmt, 0) = init_val;
1616 append_to_statement_list_force (vec_stmt, new_stmt_list);
1619 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1620 NULL_TREE, NULL_TREE);
1621 addr_base = build_fold_addr_expr (array_ref);
1623 /* addr_expr = addr_base */
1624 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1625 get_name (base_name));
1626 add_referenced_tmp_var (addr_expr);
1627 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1628 new_temp = make_ssa_name (addr_expr, vec_stmt);
1629 TREE_OPERAND (vec_stmt, 0) = new_temp;
1630 append_to_statement_list_force (vec_stmt, new_stmt_list);
1636 /* Function get_vectype_for_scalar_type.
1638 Returns the vector type corresponding to SCALAR_TYPE as supported
1642 get_vectype_for_scalar_type (tree scalar_type)
1644 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1645 int nbytes = GET_MODE_SIZE (inner_mode);
1652 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1654 nunits = UNITS_PER_SIMD_WORD / nbytes;
1656 vectype = build_vector_type (scalar_type, nunits);
1657 if (vect_debug_details (NULL))
1659 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1660 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1666 if (vect_debug_details (NULL))
1668 fprintf (dump_file, "vectype: ");
1669 print_generic_expr (dump_file, vectype, TDF_SLIM);
1672 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1674 /* TODO: tree-complex.c sometimes can parallelize operations
1675 on generic vectors. We can vectorize the loop in that case,
1676 but then we should re-run the lowering pass. */
1677 if (vect_debug_details (NULL))
1678 fprintf (dump_file, "mode not supported by target.");
1686 /* Function vect_align_data_ref.
1688 Handle mislignment of a memory accesses.
1690 FORNOW: Can't handle misaligned accesses.
1691 Make sure that the dataref is aligned. */
1694 vect_align_data_ref (tree stmt)
1696 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1697 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1699 /* FORNOW: can't handle misaligned accesses;
1700 all accesses expected to be aligned. */
1701 gcc_assert (aligned_access_p (dr));
1705 /* Function vect_create_data_ref_ptr.
1707 Create a memory reference expression for vector access, to be used in a
1708 vector load/store stmt. The reference is based on a new pointer to vector
1712 1. STMT: a stmt that references memory. Expected to be of the form
1713 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1714 2. BSI: block_stmt_iterator where new stmts can be added.
1715 3. OFFSET (optional): an offset to be added to the initial address accessed
1716 by the data-ref in STMT.
1717 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1718 pointing to the initial address.
1721 1. Declare a new ptr to vector_type, and have it point to the base of the
1722 data reference (initial addressed accessed by the data reference).
1723 For example, for vector of type V8HI, the following code is generated:
1726 vp = (v8hi *)initial_address;
1728 if OFFSET is not supplied:
1729 initial_address = &a[init];
1730 if OFFSET is supplied:
1731 initial_address = &a[init + OFFSET];
1733 Return the initial_address in INITIAL_ADDRESS.
1735 2. Create a data-reference in the loop based on the new vector pointer vp,
1736 and using a new index variable 'idx' as follows:
1740 where if ONLY_INIT is true:
1743 update = idx + vector_type_size
1745 Return the pointer vp'.
1748 FORNOW: handle only aligned and consecutive accesses. */
1751 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1752 tree *initial_address, bool only_init)
1755 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1756 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1757 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1758 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1762 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1763 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1764 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1765 int nvuses, nv_may_defs, nv_must_defs;
1769 tree new_stmt_list = NULL_TREE;
1771 edge pe = loop_preheader_edge (loop);
1778 base_name = unshare_expr (DR_BASE_NAME (dr));
1779 if (vect_debug_details (NULL))
1781 tree data_ref_base = base_name;
1782 fprintf (dump_file, "create array_ref of type: ");
1783 print_generic_expr (dump_file, vectype, TDF_SLIM);
1784 if (TREE_CODE (data_ref_base) == VAR_DECL)
1785 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1786 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1787 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1788 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1789 fprintf (dump_file, "vectorizing a record based array ref: ");
1790 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1791 fprintf (dump_file, "vectorizing a pointer ref: ");
1792 print_generic_expr (dump_file, base_name, TDF_SLIM);
1795 /** (1) Create the new vector-pointer variable: **/
1797 vect_ptr_type = build_pointer_type (vectype);
1798 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1799 get_name (base_name));
1800 add_referenced_tmp_var (vect_ptr);
1803 /** (2) Handle aliasing information of the new vector-pointer: **/
1805 tag = STMT_VINFO_MEMTAG (stmt_info);
1807 get_var_ann (vect_ptr)->type_mem_tag = tag;
1809 /* Mark for renaming all aliased variables
1810 (i.e, the may-aliases of the type-mem-tag). */
1811 nvuses = NUM_VUSES (vuses);
1812 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1813 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1814 for (i = 0; i < nvuses; i++)
1816 tree use = VUSE_OP (vuses, i);
1817 if (TREE_CODE (use) == SSA_NAME)
1818 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1820 for (i = 0; i < nv_may_defs; i++)
1822 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1823 if (TREE_CODE (def) == SSA_NAME)
1824 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1826 for (i = 0; i < nv_must_defs; i++)
1828 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1829 if (TREE_CODE (def) == SSA_NAME)
1830 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1834 /** (3) Calculate the initial address the vector-pointer, and set
1835 the vector-pointer to point to it before the loop: **/
1837 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1838 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1840 pe = loop_preheader_edge (loop);
1841 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1842 gcc_assert (!new_bb);
1843 *initial_address = new_temp;
1845 /* Create: p = (vectype *) initial_base */
1846 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1847 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1848 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1849 TREE_OPERAND (vec_stmt, 0) = new_temp;
1850 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1851 gcc_assert (!new_bb);
1852 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1855 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1857 if (only_init) /* No update in loop is required. */
1858 return vect_ptr_init;
1860 idx = vect_create_index_for_vector_ref (loop, bsi);
1862 /* Create: update = idx * vectype_size */
1863 ptr_update = create_tmp_var (integer_type_node, "update");
1864 add_referenced_tmp_var (ptr_update);
1865 vectype_size = build_int_cst (integer_type_node,
1866 GET_MODE_SIZE (TYPE_MODE (vectype)));
1867 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1868 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1869 new_temp = make_ssa_name (ptr_update, vec_stmt);
1870 TREE_OPERAND (vec_stmt, 0) = new_temp;
1871 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1873 /* Create: data_ref_ptr = vect_ptr_init + update */
1874 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1875 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1876 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1877 TREE_OPERAND (vec_stmt, 0) = new_temp;
1878 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1879 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1881 return data_ref_ptr;
1885 /* Function vect_create_destination_var.
1887 Create a new temporary of type VECTYPE. */
1890 vect_create_destination_var (tree scalar_dest, tree vectype)
1893 const char *new_name;
1895 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1897 new_name = get_name (scalar_dest);
1900 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1901 add_referenced_tmp_var (vec_dest);
1907 /* Function vect_init_vector.
1909 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1910 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1911 used in the vectorization of STMT. */
1914 vect_init_vector (tree stmt, tree vector_var)
1916 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1917 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1920 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1926 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1927 add_referenced_tmp_var (new_var);
1929 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1930 new_temp = make_ssa_name (new_var, init_stmt);
1931 TREE_OPERAND (init_stmt, 0) = new_temp;
1933 pe = loop_preheader_edge (loop);
1934 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1935 gcc_assert (!new_bb);
1937 if (vect_debug_details (NULL))
1939 fprintf (dump_file, "created new init_stmt: ");
1940 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1943 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1948 /* Function vect_get_vec_def_for_operand.
1950 OP is an operand in STMT. This function returns a (vector) def that will be
1951 used in the vectorized stmt for STMT.
1953 In the case that OP is an SSA_NAME which is defined in the loop, then
1954 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1956 In case OP is an invariant or constant, a new stmt that creates a vector def
1957 needs to be introduced. */
1960 vect_get_vec_def_for_operand (tree op, tree stmt)
1965 stmt_vec_info def_stmt_info = NULL;
1966 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1967 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1968 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1969 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1976 if (vect_debug_details (NULL))
1978 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
1979 print_generic_expr (dump_file, op, TDF_SLIM);
1982 /** ===> Case 1: operand is a constant. **/
1984 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
1986 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1990 /* Build a tree with vector elements. */
1991 if (vect_debug_details (NULL))
1992 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
1994 for (i = nunits - 1; i >= 0; --i)
1996 t = tree_cons (NULL_TREE, op, t);
1998 vec_cst = build_vector (vectype, t);
1999 return vect_init_vector (stmt, vec_cst);
2002 gcc_assert (TREE_CODE (op) == SSA_NAME);
2004 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2006 def_stmt = SSA_NAME_DEF_STMT (op);
2007 def_stmt_info = vinfo_for_stmt (def_stmt);
2009 if (vect_debug_details (NULL))
2011 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2012 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2016 /** ==> Case 2.1: operand is defined inside the loop. **/
2020 /* Get the def from the vectorized stmt. */
2022 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2023 gcc_assert (vec_stmt);
2024 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2029 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2030 it is a reduction/induction. **/
2032 bb = bb_for_stmt (def_stmt);
2033 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2035 if (vect_debug_details (NULL))
2036 fprintf (dump_file, "reduction/induction - unsupported.");
2037 internal_error ("no support for reduction/induction"); /* FORNOW */
2041 /** ==> Case 2.3: operand is defined outside the loop -
2042 it is a loop invariant. */
2044 switch (TREE_CODE (def_stmt))
2047 def = PHI_RESULT (def_stmt);
2050 def = TREE_OPERAND (def_stmt, 0);
2053 def = TREE_OPERAND (def_stmt, 0);
2054 gcc_assert (IS_EMPTY_STMT (def_stmt));
2058 if (vect_debug_details (NULL))
2060 fprintf (dump_file, "unsupported defining stmt: ");
2061 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2063 internal_error ("unsupported defining stmt");
2066 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2068 if (vect_debug_details (NULL))
2069 fprintf (dump_file, "Create vector_inv.");
2071 for (i = nunits - 1; i >= 0; --i)
2073 t = tree_cons (NULL_TREE, def, t);
2076 vec_inv = build_constructor (vectype, t);
2077 return vect_init_vector (stmt, vec_inv);
2081 /* Function vect_finish_stmt_generation.
2083 Insert a new stmt. */
2086 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2088 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2090 if (vect_debug_details (NULL))
2092 fprintf (dump_file, "add new stmt: ");
2093 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2096 /* Make sure bsi points to the stmt that is being vectorized. */
2098 /* Assumption: any stmts created for the vectorization of stmt S were
2099 inserted before S. BSI is expected to point to S or some new stmt before S. */
2101 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2103 gcc_assert (stmt == bsi_stmt (*bsi));
2107 /* Function vectorizable_assignment.
2109 Check if STMT performs an assignment (copy) that can be vectorized.
2110 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2111 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2112 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2115 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2121 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2122 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2123 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2126 /* Is vectorizable assignment? */
2128 if (TREE_CODE (stmt) != MODIFY_EXPR)
2131 scalar_dest = TREE_OPERAND (stmt, 0);
2132 if (TREE_CODE (scalar_dest) != SSA_NAME)
2135 op = TREE_OPERAND (stmt, 1);
2136 if (!vect_is_simple_use (op, loop, NULL))
2138 if (vect_debug_details (NULL))
2139 fprintf (dump_file, "use not simple.");
2143 if (!vec_stmt) /* transformation not required. */
2145 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2150 if (vect_debug_details (NULL))
2151 fprintf (dump_file, "transform assignment.");
2154 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2157 op = TREE_OPERAND (stmt, 1);
2158 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2160 /* Arguments are ready. create the new vector stmt. */
2161 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2162 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2163 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2164 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2170 /* Function vectorizable_operation.
2172 Check if STMT performs a binary or unary operation that can be vectorized.
2173 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2174 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2175 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2178 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2183 tree op0, op1 = NULL;
2184 tree vec_oprnd0, vec_oprnd1=NULL;
2185 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2186 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2187 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2189 enum tree_code code;
2190 enum machine_mode vec_mode;
2196 /* Is STMT a vectorizable binary/unary operation? */
2197 if (TREE_CODE (stmt) != MODIFY_EXPR)
2200 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2203 operation = TREE_OPERAND (stmt, 1);
2204 code = TREE_CODE (operation);
2205 optab = optab_for_tree_code (code, vectype);
2207 /* Support only unary or binary operations. */
2208 op_type = TREE_CODE_LENGTH (code);
2209 if (op_type != unary_op && op_type != binary_op)
2211 if (vect_debug_details (NULL))
2212 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2216 for (i = 0; i < op_type; i++)
2218 op = TREE_OPERAND (operation, i);
2219 if (!vect_is_simple_use (op, loop, NULL))
2221 if (vect_debug_details (NULL))
2222 fprintf (dump_file, "use not simple.");
2227 /* Supportable by target? */
2230 if (vect_debug_details (NULL))
2231 fprintf (dump_file, "no optab.");
2234 vec_mode = TYPE_MODE (vectype);
2235 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2237 if (vect_debug_details (NULL))
2238 fprintf (dump_file, "op not supported by target.");
2242 if (!vec_stmt) /* transformation not required. */
2244 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2250 if (vect_debug_details (NULL))
2251 fprintf (dump_file, "transform binary/unary operation.");
2254 scalar_dest = TREE_OPERAND (stmt, 0);
2255 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2258 op0 = TREE_OPERAND (operation, 0);
2259 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2261 if (op_type == binary_op)
2263 op1 = TREE_OPERAND (operation, 1);
2264 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2267 /* Arguments are ready. create the new vector stmt. */
2269 if (op_type == binary_op)
2270 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2271 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2273 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2274 build1 (code, vectype, vec_oprnd0));
2275 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2276 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2277 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2283 /* Function vectorizable_store.
2285 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2287 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2288 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2289 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2292 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2298 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2299 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2300 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2301 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2302 enum machine_mode vec_mode;
2304 enum dr_alignment_support alignment_support_cheme;
2306 /* Is vectorizable store? */
2308 if (TREE_CODE (stmt) != MODIFY_EXPR)
2311 scalar_dest = TREE_OPERAND (stmt, 0);
2312 if (TREE_CODE (scalar_dest) != ARRAY_REF
2313 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2316 op = TREE_OPERAND (stmt, 1);
2317 if (!vect_is_simple_use (op, loop, NULL))
2319 if (vect_debug_details (NULL))
2320 fprintf (dump_file, "use not simple.");
2324 vec_mode = TYPE_MODE (vectype);
2325 /* FORNOW. In some cases can vectorize even if data-type not supported
2326 (e.g. - array initialization with 0). */
2327 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2330 if (!STMT_VINFO_DATA_REF (stmt_info))
2334 if (!vec_stmt) /* transformation not required. */
2336 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2342 if (vect_debug_details (NULL))
2343 fprintf (dump_file, "transform store");
2345 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2346 gcc_assert (alignment_support_cheme);
2347 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2349 /* Handle use - get the vectorized def from the defining stmt. */
2350 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2353 /* FORNOW: make sure the data reference is aligned. */
2354 vect_align_data_ref (stmt);
2355 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2356 data_ref = build_fold_indirect_ref (data_ref);
2358 /* Arguments are ready. create the new vector stmt. */
2359 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2360 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2366 /* vectorizable_load.
2368 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2370 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2371 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2372 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2375 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2378 tree vec_dest = NULL;
2379 tree data_ref = NULL;
2381 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2382 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2383 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2390 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2391 edge pe = loop_preheader_edge (loop);
2392 enum dr_alignment_support alignment_support_cheme;
2394 /* Is vectorizable load? */
2396 if (TREE_CODE (stmt) != MODIFY_EXPR)
2399 scalar_dest = TREE_OPERAND (stmt, 0);
2400 if (TREE_CODE (scalar_dest) != SSA_NAME)
2403 op = TREE_OPERAND (stmt, 1);
2404 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2407 if (!STMT_VINFO_DATA_REF (stmt_info))
2410 mode = (int) TYPE_MODE (vectype);
2412 /* FORNOW. In some cases can vectorize even if data-type not supported
2413 (e.g. - data copies). */
2414 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2416 if (vect_debug_details (loop))
2417 fprintf (dump_file, "Aligned load, but unsupported type.");
2421 if (!vec_stmt) /* transformation not required. */
2423 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2429 if (vect_debug_details (NULL))
2430 fprintf (dump_file, "transform load.");
2432 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2433 gcc_assert (alignment_support_cheme);
2435 if (alignment_support_cheme == dr_aligned
2436 || alignment_support_cheme == dr_unaligned_supported)
2447 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2448 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2449 if (aligned_access_p (dr))
2450 data_ref = build_fold_indirect_ref (data_ref);
2453 int mis = DR_MISALIGNMENT (dr);
2454 tree tmis = (mis == -1 ?
2456 build_int_cst (integer_type_node, mis));
2457 tmis = int_const_binop (MULT_EXPR, tmis,
2458 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2459 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2461 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2462 new_temp = make_ssa_name (vec_dest, new_stmt);
2463 TREE_OPERAND (new_stmt, 0) = new_temp;
2464 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2466 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2470 msq_init = *(floor(p1))
2471 p2 = initial_addr + VS - 1;
2472 magic = have_builtin ? builtin_result : initial_address;
2475 p2' = p2 + indx * vectype_size
2477 vec_dest = realign_load (msq, lsq, magic)
2491 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2492 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2493 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2495 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2496 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2497 new_temp = make_ssa_name (vec_dest, new_stmt);
2498 TREE_OPERAND (new_stmt, 0) = new_temp;
2499 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2500 gcc_assert (!new_bb);
2501 msq_init = TREE_OPERAND (new_stmt, 0);
2504 /* <2> Create lsq = *(floor(p2')) in the loop */
2505 offset = build_int_cst (integer_type_node,
2506 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2507 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2508 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2509 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2510 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2511 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2512 new_temp = make_ssa_name (vec_dest, new_stmt);
2513 TREE_OPERAND (new_stmt, 0) = new_temp;
2514 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2515 lsq = TREE_OPERAND (new_stmt, 0);
2519 if (targetm.vectorize.builtin_mask_for_load)
2521 /* Create permutation mask, if required, in loop preheader. */
2523 params = build_tree_list (NULL_TREE, init_addr);
2524 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2525 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2526 new_stmt = build_function_call_expr (builtin_decl, params);
2527 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2528 new_temp = make_ssa_name (vec_dest, new_stmt);
2529 TREE_OPERAND (new_stmt, 0) = new_temp;
2530 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2531 gcc_assert (!new_bb);
2532 magic = TREE_OPERAND (new_stmt, 0);
2536 /* Use current address instead of init_addr for reduced reg pressure.
2538 magic = dataref_ptr;
2542 /* <4> Create msq = phi <msq_init, lsq> in loop */
2543 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2544 msq = make_ssa_name (vec_dest, NULL_TREE);
2545 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2546 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2547 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2548 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2551 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2552 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2553 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2554 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2555 new_temp = make_ssa_name (vec_dest, new_stmt);
2556 TREE_OPERAND (new_stmt, 0) = new_temp;
2557 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2562 *vec_stmt = new_stmt;
2567 /* Function vect_supportable_dr_alignment
2569 Return whether the data reference DR is supported with respect to its
2572 static enum dr_alignment_support
2573 vect_supportable_dr_alignment (struct data_reference *dr)
2575 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2576 enum machine_mode mode = (int) TYPE_MODE (vectype);
2578 if (aligned_access_p (dr))
2581 /* Possibly unaligned access. */
2583 if (DR_IS_READ (dr))
2585 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2586 && (!targetm.vectorize.builtin_mask_for_load
2587 || targetm.vectorize.builtin_mask_for_load ()))
2588 return dr_unaligned_software_pipeline;
2590 if (targetm.vectorize.misaligned_mem_ok (mode))
2591 /* Can't software pipeline the loads. */
2592 return dr_unaligned_supported;
2596 return dr_unaligned_unsupported;
2600 /* Function vect_transform_stmt.
2602 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2605 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2607 bool is_store = false;
2608 tree vec_stmt = NULL_TREE;
2609 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2612 switch (STMT_VINFO_TYPE (stmt_info))
2614 case op_vec_info_type:
2615 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2619 case assignment_vec_info_type:
2620 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2624 case load_vec_info_type:
2625 done = vectorizable_load (stmt, bsi, &vec_stmt);
2629 case store_vec_info_type:
2630 done = vectorizable_store (stmt, bsi, &vec_stmt);
2635 if (vect_debug_details (NULL))
2636 fprintf (dump_file, "stmt not supported.");
2640 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2646 /* This function builds ni_name = number of iterations loop executes
2647 on the loop preheader. */
2650 vect_build_loop_niters (loop_vec_info loop_vinfo)
2652 tree ni_name, stmt, var;
2654 basic_block new_bb = NULL;
2655 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2656 tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2658 var = create_tmp_var (TREE_TYPE (ni), "niters");
2659 add_referenced_tmp_var (var);
2660 if (TREE_CODE (ni) == INTEGER_CST)
2662 /* This case is generated when treating a known loop bound
2663 indivisible by VF. Here we cannot use force_gimple_operand. */
2664 stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2665 ni_name = make_ssa_name (var, stmt);
2666 TREE_OPERAND (stmt, 0) = ni_name;
2669 ni_name = force_gimple_operand (ni, &stmt, false, var);
2671 pe = loop_preheader_edge (loop);
2673 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2675 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2681 /* This function generates the following statements:
2683 ni_name = number of iterations loop executes
2684 ratio = ni_name / vf
2685 ratio_mult_vf_name = ratio * vf
2687 and places them at the loop preheader edge. */
2690 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2691 tree *ratio_mult_vf_name_p, tree *ratio_p)
2698 tree ratio_mult_vf_name, ratio_mult_vf;
2699 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2700 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2704 /* Generate temporary variable that contains
2705 number of iterations loop executes. */
2707 ni_name = vect_build_loop_niters (loop_vinfo);
2710 vf is power of 2; then if ratio = = n >> log2 (vf). */
2711 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2712 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2714 /* Update initial conditions of loop copy. */
2716 /* ratio_mult_vf = ratio * vf;
2717 then if ratio_mult_vf = ratio << log2 (vf). */
2719 i = exact_log2 (vf);
2720 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2721 add_referenced_tmp_var (ratio_mult_vf);
2723 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2725 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2726 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2727 ratio, build_int_cst (unsigned_type_node,
2730 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2732 pe = loop_preheader_edge (loop);
2733 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2735 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2737 *ni_name_p = ni_name;
2738 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2745 /* This function generates stmt
2749 and attaches it to preheader of LOOP. */
2752 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2754 tree var, stmt, var_name;
2759 /* create temporary variable */
2760 var = create_tmp_var (TREE_TYPE (n), "bnd");
2761 add_referenced_tmp_var (var);
2763 var_name = make_ssa_name (var, NULL_TREE);
2765 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2767 i = exact_log2 (vf);
2768 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2769 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2770 n, build_int_cst (unsigned_type_node,i)));
2772 SSA_NAME_DEF_STMT (var_name) = stmt;
2774 pe = loop_preheader_edge (loop);
2775 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2777 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2779 if (vect_debug_details (NULL))
2780 fprintf (dump_file, "New bb on preheader edge was not generated.");
2786 /* Function vect_transform_loop_bound.
2788 Create a new exit condition for the loop. */
2791 vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2793 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2794 edge exit_edge = loop->single_exit;
2795 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
2796 tree indx_before_incr, indx_after_incr;
2797 tree orig_cond_expr;
2798 HOST_WIDE_INT old_N = 0;
2801 tree new_loop_bound;
2806 symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2809 old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2811 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2813 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2814 #ifdef ENABLE_CHECKING
2815 gcc_assert (orig_cond_expr);
2817 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
2819 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
2820 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
2822 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
2823 to point to the exit condition. */
2824 bsi_next (&loop_exit_bsi);
2825 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
2827 /* new loop exit test: */
2828 lb_type = TREE_TYPE (TREE_OPERAND (COND_EXPR_COND (orig_cond_expr), 1));
2830 new_loop_bound = fold_convert (lb_type,
2831 build_int_cst (unsigned_type_node,
2834 new_loop_bound = niters;
2836 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
2837 cond = build2 (GE_EXPR, boolean_type_node,
2838 indx_after_incr, new_loop_bound);
2839 else /* 'then' edge loops back. */
2840 cond = build2 (LT_EXPR, boolean_type_node,
2841 indx_after_incr, new_loop_bound);
2843 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
2844 COND_EXPR_THEN (orig_cond_expr),
2845 COND_EXPR_ELSE (orig_cond_expr));
2847 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
2849 /* remove old loop exit test: */
2850 bsi_remove (&loop_exit_bsi);
2852 if (vect_debug_details (NULL))
2853 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
2855 loop->nb_iterations = new_loop_bound;
2859 /* Function vect_update_ivs_after_vectorizer.
2861 "Advance" the induction variables of LOOP to the value they should take
2862 after the execution of LOOP. This is currently necessary because the
2863 vectorizer does not handle induction variables that are used after the
2864 loop. Such a situation occurs when the last iterations of LOOP are
2866 1. We introduced new uses after LOOP for IVs that were not originally used
2867 after LOOP: the IVs of LOOP are now used by an epilog loop.
2868 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2869 times, whereas the loop IVs should be bumped N times.
2872 - LOOP - a loop that is going to be vectorized. The last few iterations
2873 of LOOP were peeled.
2874 - NITERS - the number of iterations that LOOP executes (before it is
2875 vectorized). i.e, the number of times the ivs should be bumped.
2880 if (guard-cond) GOTO bb_before_epilog_loop
2887 bb_before_epilog_loop:
2889 bb_before_epilog_loop has edges coming in form the loop exit and
2890 from bb_before_loop. New definitions for ivs will be placed on the edge
2891 from loop->exit to bb_before_epilog_loop. This also requires that we update
2892 the phis in bb_before_epilog_loop. (In the code this bb is denoted
2895 Assumption 1: Like the rest of the vectorizer, this function assumes
2896 a single loop exit that has a single predecessor.
2898 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2899 organized in the same order.
2901 Assumption 3: The access function of the ivs is simple enough (see
2902 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2906 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
2908 edge exit = loop->exit_edges[0];
2910 basic_block update_bb = exit->dest;
2913 /* Generate basic block at the exit from the loop. */
2914 basic_block new_bb = split_edge (exit);
2916 add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
2917 loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
2918 update_e = EDGE_SUCC (new_bb, 0);
2920 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2922 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2924 tree access_fn = NULL;
2925 tree evolution_part;
2928 tree var, stmt, ni, ni_name;
2929 block_stmt_iterator last_bsi;
2931 /* Skip virtual phi's. The data dependences that are associated with
2932 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2934 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2936 if (vect_debug_details (NULL))
2937 fprintf (dump_file, "virtual phi. skip.");
2941 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2942 gcc_assert (access_fn);
2944 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2946 /* FORNOW: We do not transform initial conditions of IVs
2947 which evolution functions are a polynomial of degree >= 2 or
2949 gcc_assert (!tree_is_chrec (evolution_part));
2951 step_expr = evolution_part;
2952 init_expr = unshare_expr (initial_condition (access_fn));
2954 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2955 build2 (MULT_EXPR, TREE_TYPE (niters),
2956 niters, step_expr), init_expr);
2958 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2959 add_referenced_tmp_var (var);
2961 ni_name = force_gimple_operand (ni, &stmt, false, var);
2963 /* Insert stmt into new_bb. */
2964 last_bsi = bsi_last (new_bb);
2966 bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);
2968 /* Fix phi expressions in duplicated loop. */
2969 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
2970 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
2971 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
2976 /* This function is the main driver of transformation
2977 to be done for loop before vectorizing it in case of
2978 unknown loop bound. */
2981 vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
2982 struct loops *loops)
2985 tree ni_name, ratio_mult_vf_name;
2986 #ifdef ENABLE_CHECKING
2989 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2990 struct loop *new_loop;
2992 if (vect_debug_details (NULL))
2993 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
2995 /* Generate the following variables on the preheader of original loop:
2997 ni_name = number of iteration the original loop executes
2998 ratio = ni_name / vf
2999 ratio_mult_vf_name = ratio * vf */
3000 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3001 &ratio_mult_vf_name, ratio);
3003 /* Update loop info. */
3004 loop->pre_header = loop_preheader_edge (loop)->src;
3005 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3007 #ifdef ENABLE_CHECKING
3008 loop_num = loop->num;
3010 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3011 ratio_mult_vf_name, ni_name, true);
3012 #ifdef ENABLE_CHECKING
3013 gcc_assert (new_loop);
3014 gcc_assert (loop_num == loop->num);
3017 /* Update IVs of original loop as if they were advanced
3018 by ratio_mult_vf_name steps. */
3020 #ifdef ENABLE_CHECKING
3021 /* Check existence of intermediate bb. */
3022 gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
3024 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name);
3031 /* Function vect_gen_niters_for_prolog_loop
3033 Set the number of iterations for the loop represented by LOOP_VINFO
3034 to the minimum between NITERS (the original iteration count of the loop)
3035 and the misalignment of DR - the first data reference recorded in
3036 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3037 this loop, the data reference DR will refer to an aligned location. */
3040 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
3042 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3043 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3044 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3046 tree iters, iters_name;
3049 tree dr_stmt = DR_STMT (dr);
3050 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3051 tree start_addr, byte_miss_align, elem_miss_align;
3052 int vec_type_align =
3053 GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3056 tree new_stmt_list = NULL_TREE;
3058 start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3059 &new_stmt_list, NULL_TREE);
3061 pe = loop_preheader_edge (loop);
3062 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3064 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3067 build (BIT_AND_EXPR, integer_type_node, start_addr,
3068 build (MINUS_EXPR, integer_type_node,
3069 build_int_cst (unsigned_type_node,
3070 vec_type_align), integer_one_node));
3071 tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3072 elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3073 byte_miss_align, tmp1);
3076 build (BIT_AND_EXPR, integer_type_node,
3077 build (MINUS_EXPR, integer_type_node,
3078 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3079 build (MINUS_EXPR, integer_type_node,
3080 build_int_cst (unsigned_type_node, vf), integer_one_node));
3082 iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3083 var = create_tmp_var (TREE_TYPE (iters), "iters");
3084 add_referenced_tmp_var (var);
3085 iters_name = force_gimple_operand (iters, &stmt, false, var);
3087 /* Insert stmt on loop preheader edge. */
3088 pe = loop_preheader_edge (loop);
3090 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3092 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3098 /* Function vect_update_niters_after_peeling
3100 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3101 The new number of iterations is therefore original_niters - NITERS.
3102 Record the new number of iterations in LOOP_VINFO. */
3105 vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters)
3107 tree n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3108 LOOP_VINFO_NITERS (loop_vinfo) =
3109 build (MINUS_EXPR, integer_type_node, n_iters, niters);
3113 /* Function vect_update_inits_of_dr
3115 NITERS iterations were peeled from LOOP. DR represents a data reference
3116 in LOOP. This function updates the information recorded in DR to
3117 account for the fact that the first NITERS iterations had already been
3118 executed. Specifically, it updates the initial_condition of the
3119 access_function of DR. */
3122 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3125 tree access_fn = DR_ACCESS_FN (dr, 0);
3126 tree init, init_new, step;
3128 step = evolution_part_in_loop_num (access_fn, loop->num);
3129 init = initial_condition (access_fn);
3131 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3132 build (MULT_EXPR, TREE_TYPE (niters),
3133 niters, step), init);
3134 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3140 /* Function vect_update_inits_of_drs
3142 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3143 This function updates the information recorded for the data references in
3144 the loop to account for the fact that the first NITERS iterations had
3145 already been executed. Specifically, it updates the initial_condition of the
3146 access_function of all the data_references in the loop. */
3149 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3152 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3153 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3154 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3156 if (dump_file && (dump_flags & TDF_DETAILS))
3157 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3159 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3161 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3162 vect_update_inits_of_dr (dr, loop, niters);
3165 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3167 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3168 vect_update_inits_of_dr (dr, loop, niters);
3173 /* Function vect_do_peeling_for_alignment
3175 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3176 'niters' is set to the misalignment of one of the data references in the
3177 loop, thereby forcing it to refer to an aligned location at the beginning
3178 of the execution of this loop. The data reference for which we are
3179 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3182 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3184 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3185 tree niters_of_prolog_loop, ni_name;
3187 if (vect_debug_details (NULL))
3188 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3190 ni_name = vect_build_loop_niters (loop_vinfo);
3191 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3194 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3195 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge(loop),
3196 niters_of_prolog_loop, ni_name, false);
3198 /* Update number of times loop executes. */
3199 vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop);
3201 /* Update all inits of access functions of all data refs. */
3202 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3204 /* After peeling we have to reset scalar evolution analyzer. */
3211 /* Function vect_transform_loop.
3213 The analysis phase has determined that the loop is vectorizable.
3214 Vectorize the loop - created vectorized stmts to replace the scalar
3215 stmts in the loop, and update the loop exit condition. */
3218 vect_transform_loop (loop_vec_info loop_vinfo,
3219 struct loops *loops ATTRIBUTE_UNUSED)
3221 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3222 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3223 int nbbs = loop->num_nodes;
3224 block_stmt_iterator si;
3227 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3229 if (vect_debug_details (NULL))
3230 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3233 /* Peel the loop if there are data refs with unknown alignment.
3234 Only one data ref with unknown store is allowed. */
3237 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3238 vect_do_peeling_for_alignment (loop_vinfo, loops);
3240 /* If the loop has a symbolic number of iterations 'n'
3241 (i.e. it's not a compile time constant),
3242 then an epilog loop needs to be created. We therefore duplicate
3243 the initial loop. The original loop will be vectorized, and will compute
3244 the first (n/VF) iterations. The second copy of the loop will remain
3245 serial and will compute the remaining (n%VF) iterations.
3246 (VF is the vectorization factor). */
3248 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3249 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3251 /* FORNOW: we'll treat the case where niters is constant and
3255 in the way similar to one with symbolic niters.
3256 For this we'll generate variable which value is equal to niters. */
3258 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3259 && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3260 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3263 /* 1) Make sure the loop header has exactly two entries
3264 2) Make sure we have a preheader basic block. */
3266 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3268 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3271 /* FORNOW: the vectorizer supports only loops which body consist
3272 of one basic block (header + empty latch). When the vectorizer will
3273 support more involved loop forms, the order by which the BBs are
3274 traversed need to be reconsidered. */
3276 for (i = 0; i < nbbs; i++)
3278 basic_block bb = bbs[i];
3280 for (si = bsi_start (bb); !bsi_end_p (si);)
3282 tree stmt = bsi_stmt (si);
3283 stmt_vec_info stmt_info;
3286 if (vect_debug_details (NULL))
3288 fprintf (dump_file, "------>vectorizing statement: ");
3289 print_generic_expr (dump_file, stmt, TDF_SLIM);
3291 stmt_info = vinfo_for_stmt (stmt);
3292 gcc_assert (stmt_info);
3293 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3298 #ifdef ENABLE_CHECKING
3299 /* FORNOW: Verify that all stmts operate on the same number of
3300 units and no inner unrolling is necessary. */
3302 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3303 == vectorization_factor);
3305 /* -------- vectorize statement ------------ */
3306 if (vect_debug_details (NULL))
3307 fprintf (dump_file, "transform statement.");
3309 is_store = vect_transform_stmt (stmt, &si);
3312 /* free the attached stmt_vec_info and remove the stmt. */
3313 stmt_ann_t ann = stmt_ann (stmt);
3315 set_stmt_info (ann, NULL);
3324 vect_transform_loop_bound (loop_vinfo, ratio);
3326 if (vect_debug_details (loop))
3327 fprintf (dump_file,"Success! loop vectorized.");
3328 if (vect_debug_stats (loop))
3329 fprintf (dump_file, "LOOP VECTORIZED.");
3333 /* Function vect_is_simple_use.
3336 LOOP - the loop that is being vectorized.
3337 OPERAND - operand of a stmt in LOOP.
3338 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3340 Returns whether a stmt with OPERAND can be vectorized.
3341 Supportable operands are constants, loop invariants, and operands that are
3342 defined by the current iteration of the loop. Unsupportable operands are
3343 those that are defined by a previous iteration of the loop (as is the case
3344 in reduction/induction computations). */
3347 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3355 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3358 if (TREE_CODE (operand) != SSA_NAME)
3361 def_stmt = SSA_NAME_DEF_STMT (operand);
3362 if (def_stmt == NULL_TREE )
3364 if (vect_debug_details (NULL))
3365 fprintf (dump_file, "no def_stmt.");
3369 /* empty stmt is expected only in case of a function argument.
3370 (Otherwise - we expect a phi_node or a modify_expr). */
3371 if (IS_EMPTY_STMT (def_stmt))
3373 tree arg = TREE_OPERAND (def_stmt, 0);
3374 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3376 if (vect_debug_details (NULL))
3378 fprintf (dump_file, "Unexpected empty stmt: ");
3379 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3384 /* phi_node inside the loop indicates an induction/reduction pattern.
3385 This is not supported yet. */
3386 bb = bb_for_stmt (def_stmt);
3387 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3389 if (vect_debug_details (NULL))
3390 fprintf (dump_file, "reduction/induction - unsupported.");
3391 return false; /* FORNOW: not supported yet. */
3394 /* Expecting a modify_expr or a phi_node. */
3395 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3396 || TREE_CODE (def_stmt) == PHI_NODE)
3407 /* Function vect_analyze_operations.
3409 Scan the loop stmts and make sure they are all vectorizable. */
3412 vect_analyze_operations (loop_vec_info loop_vinfo)
3414 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3415 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3416 int nbbs = loop->num_nodes;
3417 block_stmt_iterator si;
3418 int vectorization_factor = 0;
3423 if (vect_debug_details (NULL))
3424 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3426 for (i = 0; i < nbbs; i++)
3428 basic_block bb = bbs[i];
3430 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3432 tree stmt = bsi_stmt (si);
3434 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3437 if (vect_debug_details (NULL))
3439 fprintf (dump_file, "==> examining statement: ");
3440 print_generic_expr (dump_file, stmt, TDF_SLIM);
3443 gcc_assert (stmt_info);
3445 /* skip stmts which do not need to be vectorized.
3446 this is expected to include:
3447 - the COND_EXPR which is the loop exit condition
3448 - any LABEL_EXPRs in the loop
3449 - computations that are used only for array indexing or loop
3452 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3454 if (vect_debug_details (NULL))
3455 fprintf (dump_file, "irrelevant.");
3459 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3461 if (vect_debug_stats (loop) || vect_debug_details (loop))
3463 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3464 print_generic_expr (dump_file, stmt, TDF_SLIM);
3469 if (STMT_VINFO_DATA_REF (stmt_info))
3470 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3471 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3472 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3474 scalar_type = TREE_TYPE (stmt);
3476 if (vect_debug_details (NULL))
3478 fprintf (dump_file, "get vectype for scalar type: ");
3479 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3482 vectype = get_vectype_for_scalar_type (scalar_type);
3485 if (vect_debug_stats (loop) || vect_debug_details (loop))
3487 fprintf (dump_file, "not vectorized: unsupported data-type ");
3488 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3493 if (vect_debug_details (NULL))
3495 fprintf (dump_file, "vectype: ");
3496 print_generic_expr (dump_file, vectype, TDF_SLIM);
3498 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3500 ok = (vectorizable_operation (stmt, NULL, NULL)
3501 || vectorizable_assignment (stmt, NULL, NULL)
3502 || vectorizable_load (stmt, NULL, NULL)
3503 || vectorizable_store (stmt, NULL, NULL));
3507 if (vect_debug_stats (loop) || vect_debug_details (loop))
3509 fprintf (dump_file, "not vectorized: stmt not supported: ");
3510 print_generic_expr (dump_file, stmt, TDF_SLIM);
3515 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3516 if (vect_debug_details (NULL))
3517 fprintf (dump_file, "nunits = %d", nunits);
3519 if (vectorization_factor)
3521 /* FORNOW: don't allow mixed units.
3522 This restriction will be relaxed in the future. */
3523 if (nunits != vectorization_factor)
3525 if (vect_debug_stats (loop) || vect_debug_details (loop))
3526 fprintf (dump_file, "not vectorized: mixed data-types");
3531 vectorization_factor = nunits;
3533 #ifdef ENABLE_CHECKING
3534 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3535 * vectorization_factor == UNITS_PER_SIMD_WORD);
3540 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3542 if (vectorization_factor <= 1)
3544 if (vect_debug_stats (loop) || vect_debug_details (loop))
3545 fprintf (dump_file, "not vectorized: unsupported data-type");
3548 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3550 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3552 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3553 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3555 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3556 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3558 if (vect_debug_stats (loop) || vect_debug_details (loop))
3559 fprintf (dump_file, "epilog loop required.");
3560 if (!vect_can_advance_ivs_p (loop))
3562 if (vect_debug_stats (loop) || vect_debug_details (loop))
3563 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3566 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3568 if (vect_debug_stats (loop) || vect_debug_details (loop))
3569 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3578 /* Function exist_non_indexing_operands_for_use_p
3580 USE is one of the uses attached to STMT. Check if USE is
3581 used in STMT for anything other than indexing an array. */
3584 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3587 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3589 /* USE corresponds to some operand in STMT. If there is no data
3590 reference in STMT, then any operand that corresponds to USE
3591 is not indexing an array. */
3592 if (!STMT_VINFO_DATA_REF (stmt_info))
3595 /* STMT has a data_ref. FORNOW this means that its of one of
3596 the following forms:
3599 (This should have been verified in analyze_data_refs).
3601 'var' in the second case corresponds to a def, not a use,
3602 so USE cannot correspond to any operands that are not used
3605 Therefore, all we need to check is if STMT falls into the
3606 first case, and whether var corresponds to USE. */
3608 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3611 operand = TREE_OPERAND (stmt, 1);
3613 if (TREE_CODE (operand) != SSA_NAME)
3623 /* Function vect_is_simple_iv_evolution.
3625 FORNOW: A simple evolution of an induction variables in the loop is
3626 considered a polynomial evolution with constant step. */
3629 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3630 tree * step, bool strict)
3635 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3637 /* When there is no evolution in this loop, the evolution function
3639 if (evolution_part == NULL_TREE)
3642 /* When the evolution is a polynomial of degree >= 2
3643 the evolution function is not "simple". */
3644 if (tree_is_chrec (evolution_part))
3647 step_expr = evolution_part;
3648 init_expr = unshare_expr (initial_condition (access_fn));
3650 if (vect_debug_details (NULL))
3652 fprintf (dump_file, "step: ");
3653 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3654 fprintf (dump_file, ", init: ");
3655 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3661 if (TREE_CODE (step_expr) != INTEGER_CST)
3663 if (vect_debug_details (NULL))
3664 fprintf (dump_file, "step unknown.");
3669 if (!integer_onep (step_expr))
3671 if (vect_debug_details (NULL))
3672 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3680 /* Function vect_analyze_scalar_cycles.
3682 Examine the cross iteration def-use cycles of scalar variables, by
3683 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3684 cycles that they represent do not impede vectorization.
3686 FORNOW: Reduction as in the following loop, is not supported yet:
3690 The cross-iteration cycle corresponding to variable 'sum' will be
3691 considered too complicated and will impede vectorization.
3693 FORNOW: Induction as in the following loop, is not supported yet:
3698 However, the following loop *is* vectorizable:
3703 In both loops there exists a def-use cycle for the variable i:
3704 loop: i_2 = PHI (i_0, i_1)
3709 The evolution of the above cycle is considered simple enough,
3710 however, we also check that the cycle does not need to be
3711 vectorized, i.e - we check that the variable that this cycle
3712 defines is only used for array indexing or in stmts that do not
3713 need to be vectorized. This is not the case in loop2, but it
3714 *is* the case in loop3. */
3717 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3720 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3721 basic_block bb = loop->header;
3724 if (vect_debug_details (NULL))
3725 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3727 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3729 tree access_fn = NULL;
3731 if (vect_debug_details (NULL))
3733 fprintf (dump_file, "Analyze phi: ");
3734 print_generic_expr (dump_file, phi, TDF_SLIM);
3737 /* Skip virtual phi's. The data dependences that are associated with
3738 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3740 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3742 if (vect_debug_details (NULL))
3743 fprintf (dump_file, "virtual phi. skip.");
3747 /* Analyze the evolution function. */
3749 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3750 those of loop induction variables; This property is verified here.
3752 Furthermore, if that induction variable is used in an operation
3753 that needs to be vectorized (i.e, is not solely used to index
3754 arrays and check the exit condition) - we do not support its
3755 vectorization yet. This property is verified in vect_is_simple_use,
3756 during vect_analyze_operations. */
3758 access_fn = /* instantiate_parameters
3760 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3764 if (vect_debug_stats (loop) || vect_debug_details (loop))
3765 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3769 if (vect_debug_details (NULL))
3771 fprintf (dump_file, "Access function of PHI: ");
3772 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3775 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3778 if (vect_debug_stats (loop) || vect_debug_details (loop))
3779 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3788 /* Function vect_analyze_data_ref_dependence.
3790 Return TRUE if there (might) exist a dependence between a memory-reference
3791 DRA and a memory-reference DRB. */
3794 vect_analyze_data_ref_dependence (struct data_reference *dra,
3795 struct data_reference *drb,
3799 struct data_dependence_relation *ddr;
3801 if (!array_base_name_differ_p (dra, drb, &differ_p))
3803 if (vect_debug_stats (loop) || vect_debug_details (loop))
3806 "not vectorized: can't determine dependence between: ");
3807 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3808 fprintf (dump_file, " and ");
3809 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3817 ddr = initialize_data_dependence_relation (dra, drb);
3818 compute_affine_dependence (ddr);
3820 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3823 if (vect_debug_stats (loop) || vect_debug_details (loop))
3826 "not vectorized: possible dependence between data-refs ");
3827 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3828 fprintf (dump_file, " and ");
3829 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3836 /* Function vect_analyze_data_ref_dependences.
3838 Examine all the data references in the loop, and make sure there do not
3839 exist any data dependences between them.
3841 TODO: dependences which distance is greater than the vectorization factor
3845 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3848 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3849 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3850 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3852 /* Examine store-store (output) dependences. */
3854 if (vect_debug_details (NULL))
3855 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3857 if (vect_debug_details (NULL))
3858 fprintf (dump_file, "compare all store-store pairs.");
3860 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3862 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3864 struct data_reference *dra =
3865 VARRAY_GENERIC_PTR (loop_write_refs, i);
3866 struct data_reference *drb =
3867 VARRAY_GENERIC_PTR (loop_write_refs, j);
3868 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3873 /* Examine load-store (true/anti) dependences. */
3875 if (vect_debug_details (NULL))
3876 fprintf (dump_file, "compare all load-store pairs.");
3878 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3880 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3882 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3883 struct data_reference *drb =
3884 VARRAY_GENERIC_PTR (loop_write_refs, j);
3885 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3894 /* Function vect_get_first_index.
3896 REF is a data reference.
3897 If it is an ARRAY_REF: if its lower bound is simple enough,
3898 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3899 If it is not an ARRAY_REF: REF has no "first index";
3900 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3903 vect_get_first_index (tree ref, tree *array_first_index)
3907 if (TREE_CODE (ref) != ARRAY_REF)
3908 *array_first_index = size_zero_node;
3911 array_start = array_ref_low_bound (ref);
3912 if (!host_integerp (array_start,0))
3914 if (vect_debug_details (NULL))
3916 fprintf (dump_file, "array min val not simple integer cst.");
3917 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3921 *array_first_index = array_start;
3928 /* Function vect_compute_array_base_alignment.
3929 A utility function of vect_compute_array_ref_alignment.
3931 Compute the misalignment of ARRAY in bits.
3934 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3935 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3936 if NULL: don't compute misalignment, just return the base of ARRAY.
3937 PREV_DIMENSIONS - initialized to one.
3938 MISALIGNMENT - the computed misalignment in bits.
3941 If VECTYPE is not NULL:
3942 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3943 the base of the array, and put the computed misalignment in MISALIGNMENT.
3945 Return the base of the array.
3947 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3948 a[idx_N]...[idx_2][idx_1] is
3949 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3950 ... + idx_N * dim_0 * ... * dim_N-1}.
3951 (The misalignment of &a is not checked here).
3952 Note, that every term contains dim_0, therefore, if dim_0 is a
3953 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3954 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3955 NUINTS, we can say that the misalignment of the sum is equal to
3956 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3957 we can't determine this array misalignment, and we return
3959 We proceed recursively in this manner, accumulating total misalignment
3960 and the multiplication of previous dimensions for correct misalignment
3964 vect_compute_array_base_alignment (tree array,
3966 tree *prev_dimensions,
3971 tree dimension_size;
3973 tree bits_per_vectype;
3974 tree bits_per_vectype_unit;
3976 /* The 'stop condition' of the recursion. */
3977 if (TREE_CODE (array) != ARRAY_REF)
3981 /* Just get the base decl. */
3982 return vect_compute_array_base_alignment
3983 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3985 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
3986 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
3989 domain = TYPE_DOMAIN (TREE_TYPE (array));
3991 int_const_binop (PLUS_EXPR,
3992 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
3993 TYPE_MIN_VALUE (domain), 1),
3996 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
3997 is a multiple of NUNITS:
3999 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4001 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4002 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4003 if (integer_zerop (mis))
4004 /* This array is aligned. Continue just in order to get the base decl. */
4005 return vect_compute_array_base_alignment
4006 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4008 index = TREE_OPERAND (array, 1);
4009 if (!host_integerp (index, 1))
4010 /* The current index is not constant. */
4013 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4015 bits_per_vectype = fold_convert (unsigned_type_node,
4016 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4017 GET_MODE_SIZE (TYPE_MODE (vectype))));
4018 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4019 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4020 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4022 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4026 (*misalignment + index_val * dimension_size * *prev_dimensions)
4030 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4031 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4032 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4033 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4034 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4037 *prev_dimensions = int_const_binop (MULT_EXPR,
4038 *prev_dimensions, dimension_size, 1);
4040 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4046 /* Function vect_compute_data_ref_alignment
4048 Compute the misalignment of the data reference DR.
4051 1. If during the misalignment computation it is found that the data reference
4052 cannot be vectorized then false is returned.
4053 2. DR_MISALIGNMENT (DR) is defined.
4055 FOR NOW: No analysis is actually performed. Misalignment is calculated
4056 only for trivial cases. TODO. */
4059 vect_compute_data_ref_alignment (struct data_reference *dr,
4060 loop_vec_info loop_vinfo)
4062 tree stmt = DR_STMT (dr);
4063 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4064 tree ref = DR_REF (dr);
4067 tree offset = size_zero_node;
4068 tree base, bit_offset, alignment;
4069 tree unit_bits = fold_convert (unsigned_type_node,
4070 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4072 bool base_aligned_p;
4074 if (vect_debug_details (NULL))
4075 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4077 /* Initialize misalignment to unknown. */
4078 DR_MISALIGNMENT (dr) = -1;
4080 scalar_type = TREE_TYPE (ref);
4081 vectype = get_vectype_for_scalar_type (scalar_type);
4084 if (vect_debug_details (NULL))
4086 fprintf (dump_file, "no vectype for stmt: ");
4087 print_generic_expr (dump_file, stmt, TDF_SLIM);
4088 fprintf (dump_file, " scalar_type: ");
4089 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4091 /* It is not possible to vectorize this data reference. */
4094 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4095 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4097 if (TREE_CODE (ref) == ARRAY_REF)
4100 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4102 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4103 loop_vinfo, &bit_offset, &base_aligned_p);
4106 if (vect_debug_details (NULL))
4108 fprintf (dump_file, "Unknown alignment for access: ");
4109 print_generic_expr (dump_file,
4110 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4115 if (!base_aligned_p)
4117 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4119 if (vect_debug_details (NULL))
4121 fprintf (dump_file, "can't force alignment of ref: ");
4122 print_generic_expr (dump_file, ref, TDF_SLIM);
4127 /* Force the alignment of the decl.
4128 NOTE: This is the only change to the code we make during
4129 the analysis phase, before deciding to vectorize the loop. */
4130 if (vect_debug_details (NULL))
4131 fprintf (dump_file, "force alignment");
4132 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4133 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4136 /* At this point we assume that the base is aligned, and the offset from it
4137 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4138 gcc_assert (base_aligned_p
4139 || (TREE_CODE (base) == VAR_DECL
4140 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4142 /* Convert into bytes. */
4143 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4144 /* Check that there is no remainder in bits. */
4145 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4146 if (!integer_zerop (bit_offset))
4148 if (vect_debug_details (NULL))
4150 fprintf (dump_file, "bit offset alignment: ");
4151 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4156 /* Alignment required, in bytes: */
4157 alignment = fold_convert (unsigned_type_node,
4158 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4160 /* Modulo alignment. */
4161 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4162 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4164 if (vect_debug_details (NULL))
4165 fprintf (dump_file, "unexpected misalign value");
4169 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4171 if (vect_debug_details (NULL))
4172 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4178 /* Function vect_compute_array_ref_alignment
4180 Compute the alignment of an array-ref.
4181 The alignment we compute here is relative to
4182 TYPE_ALIGN(VECTYPE) boundary.
4185 OFFSET - the alignment in bits
4186 Return value - the base of the array-ref. E.g,
4187 if the array-ref is a.b[k].c[i][j] the returned
4192 vect_compute_array_ref_alignment (struct data_reference *dr,
4193 loop_vec_info loop_vinfo,
4197 tree array_first_index = size_zero_node;
4199 tree ref = DR_REF (dr);
4200 tree scalar_type = TREE_TYPE (ref);
4201 tree oprnd0 = TREE_OPERAND (ref, 0);
4202 tree dims = size_one_node;
4203 tree misalign = size_zero_node;
4204 tree next_ref, this_offset = size_zero_node;
4208 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4209 /* The reference is an array without its last index. */
4210 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4213 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4216 /* Alignment is not requested. Just return the base. */
4219 /* Compute alignment. */
4220 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4222 this_offset = misalign;
4224 /* Check the first index accessed. */
4225 if (!vect_get_first_index (ref, &array_first_index))
4227 if (vect_debug_details (NULL))
4228 fprintf (dump_file, "no first_index for array.");
4232 /* Check the index of the array_ref. */
4233 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4234 LOOP_VINFO_LOOP (loop_vinfo)->num);
4236 /* FORNOW: In order to simplify the handling of alignment, we make sure
4237 that the first location at which the array is accessed ('init') is on an
4238 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4239 This is too conservative, since we require that
4240 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4241 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4242 This should be relaxed in the future. */
4244 if (!init || !host_integerp (init, 0))
4246 if (vect_debug_details (NULL))
4247 fprintf (dump_file, "non constant init. ");
4251 /* bytes per scalar element: */
4252 nunits = fold_convert (unsigned_type_node,
4253 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4254 nbits = int_const_binop (MULT_EXPR, nunits,
4255 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4257 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4258 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4259 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4260 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4262 /* TODO: allow negative misalign values. */
4263 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4265 if (vect_debug_details (NULL))
4266 fprintf (dump_file, "unexpected misalign value");
4274 /* Function vect_compute_data_refs_alignment
4276 Compute the misalignment of data references in the loop.
4277 This pass may take place at function granularity instead of at loop
4280 FOR NOW: No analysis is actually performed. Misalignment is calculated
4281 only for trivial cases. TODO. */
4284 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4286 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4287 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4290 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4292 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4293 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4297 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4299 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4300 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4308 /* Function vect_enhance_data_refs_alignment
4310 This pass will use loop versioning and loop peeling in order to enhance
4311 the alignment of data references in the loop.
4313 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4314 original loop is to be vectorized; Any other loops that are created by
4315 the transformations performed in this pass - are not supposed to be
4316 vectorized. This restriction will be relaxed. */
4319 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4321 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4322 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4323 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4327 This pass will require a cost model to guide it whether to apply peeling
4328 or versioning or a combination of the two. For example, the scheme that
4329 intel uses when given a loop with several memory accesses, is as follows:
4330 choose one memory access ('p') which alignment you want to force by doing
4331 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4332 other accesses are not necessarily aligned, or (2) use loop versioning to
4333 generate one loop in which all accesses are aligned, and another loop in
4334 which only 'p' is necessarily aligned.
4336 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4337 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4338 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4340 Devising a cost model is the most critical aspect of this work. It will
4341 guide us on which access to peel for, whether to use loop versioning, how
4342 many versions to create, etc. The cost model will probably consist of
4343 generic considerations as well as target specific considerations (on
4344 powerpc for example, misaligned stores are more painful than misaligned
4347 Here is the general steps involved in alignment enhancements:
4349 -- original loop, before alignment analysis:
4350 for (i=0; i<N; i++){
4351 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4352 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4355 -- After vect_compute_data_refs_alignment:
4356 for (i=0; i<N; i++){
4357 x = q[i]; # DR_MISALIGNMENT(q) = 3
4358 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4361 -- Possibility 1: we do loop versioning:
4363 for (i=0; i<N; i++){ # loop 1A
4364 x = q[i]; # DR_MISALIGNMENT(q) = 3
4365 p[i] = y; # DR_MISALIGNMENT(p) = 0
4369 for (i=0; i<N; i++){ # loop 1B
4370 x = q[i]; # DR_MISALIGNMENT(q) = 3
4371 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4375 -- Possibility 2: we do loop peeling:
4376 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4380 for (i = 3; i < N; i++){ # loop 2A
4381 x = q[i]; # DR_MISALIGNMENT(q) = 0
4382 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4385 -- Possibility 3: combination of loop peeling and versioning:
4386 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4391 for (i = 3; i<N; i++){ # loop 3A
4392 x = q[i]; # DR_MISALIGNMENT(q) = 0
4393 p[i] = y; # DR_MISALIGNMENT(p) = 0
4397 for (i = 3; i<N; i++){ # loop 3B
4398 x = q[i]; # DR_MISALIGNMENT(q) = 0
4399 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4403 These loops are later passed to loop_transform to be vectorized. The
4404 vectorizer will use the alignment information to guide the transformation
4405 (whether to generate regular loads/stores, or with special handling for
4409 /* (1) Peeling to force alignment. */
4411 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4413 + How many accesses will become aligned due to the peeling
4414 - How many accesses will become unaligned due to the peeling,
4415 and the cost of misaligned accesses.
4416 - The cost of peeling (the extra runtime checks, the increase
4419 The scheme we use FORNOW: peel to force the alignment of the first
4420 misaligned store in the loop.
4421 Rationale: misaligned stores are not yet supported.
4423 TODO: Use a better cost model. */
4425 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4427 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4428 if (!aligned_access_p (dr))
4430 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4431 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4436 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4438 if (vect_debug_details (loop))
4439 fprintf (dump_file, "Peeling for alignment will not be applied.");
4443 if (vect_debug_details (loop))
4444 fprintf (dump_file, "Peeling for alignment will be applied.");
4447 /* (1.2) Update the alignment info according to the peeling factor.
4448 If the misalignment of the DR we peel for is M, then the
4449 peeling factor is VF - M, and the misalignment of each access DR_i
4450 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4451 If the misalignment of the DR we peel for is unknown, then the
4452 misalignment of each access DR_i in the loop is also unknown.
4454 FORNOW: set the misalignment of the accesses to unknown even
4455 if the peeling factor is known at compile time.
4457 TODO: - if the peeling factor is known at compile time, use that
4458 when updating the misalignment info of the loop DRs.
4459 - consider accesses that are known to have the same
4460 alignment, even if that alignment is unknown. */
4462 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4464 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4465 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4466 DR_MISALIGNMENT (dr) = 0;
4468 DR_MISALIGNMENT (dr) = -1;
4470 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4472 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4473 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4474 DR_MISALIGNMENT (dr) = 0;
4476 DR_MISALIGNMENT (dr) = -1;
4481 /* Function vect_analyze_data_refs_alignment
4483 Analyze the alignment of the data-references in the loop.
4484 FOR NOW: Until support for misliagned accesses is in place, only if all
4485 accesses are aligned can the loop be vectorized. This restriction will be
4489 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4491 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4492 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4493 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4494 enum dr_alignment_support supportable_dr_alignment;
4497 if (vect_debug_details (NULL))
4498 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4501 /* This pass may take place at function granularity instead of at loop
4504 if (!vect_compute_data_refs_alignment (loop_vinfo))
4506 if (vect_debug_details (loop) || vect_debug_stats (loop))
4508 "not vectorized: can't calculate alignment for data ref.");
4513 /* This pass will decide on using loop versioning and/or loop peeling in
4514 order to enhance the alignment of data references in the loop. */
4516 vect_enhance_data_refs_alignment (loop_vinfo);
4519 /* Finally, check that all the data references in the loop can be
4520 handled with respect to their alignment. */
4522 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4524 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4525 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4526 if (!supportable_dr_alignment)
4528 if (vect_debug_details (loop) || vect_debug_stats (loop))
4529 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4533 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4535 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4536 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4537 if (!supportable_dr_alignment)
4539 if (vect_debug_details (loop) || vect_debug_stats (loop))
4540 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4549 /* Function vect_analyze_data_ref_access.
4551 Analyze the access pattern of the data-reference DR. For now, a data access
4552 has to consecutive and aligned to be considered vectorizable. */
4555 vect_analyze_data_ref_access (struct data_reference *dr)
4557 varray_type access_fns = DR_ACCESS_FNS (dr);
4560 unsigned int dimensions, i;
4562 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4563 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4564 access is contiguous). */
4565 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4567 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4569 access_fn = DR_ACCESS_FN (dr, i);
4571 if (evolution_part_in_loop_num (access_fn,
4572 loop_containing_stmt (DR_STMT (dr))->num))
4574 /* Evolution part is not NULL in this loop (it is neither constant
4576 if (vect_debug_details (NULL))
4579 "not vectorized: complicated multidim. array access.");
4580 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4586 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4587 if (!evolution_function_is_constant_p (access_fn)
4588 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4589 access_fn, &init, &step, true))
4591 if (vect_debug_details (NULL))
4593 fprintf (dump_file, "not vectorized: complicated access function.");
4594 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4603 /* Function vect_analyze_data_ref_accesses.
4605 Analyze the access pattern of all the data references in the loop.
4607 FORNOW: the only access pattern that is considered vectorizable is a
4608 simple step 1 (consecutive) access.
4610 FORNOW: handle only arrays and pointer accesses. */
4613 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4616 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4617 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4619 if (vect_debug_details (NULL))
4620 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4622 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4624 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4625 bool ok = vect_analyze_data_ref_access (dr);
4628 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4629 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4630 fprintf (dump_file, "not vectorized: complicated access pattern.");
4635 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4637 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4638 bool ok = vect_analyze_data_ref_access (dr);
4641 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4642 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4643 fprintf (dump_file, "not vectorized: complicated access pattern.");
4652 /* Function vect_analyze_pointer_ref_access.
4655 STMT - a stmt that contains a data-ref
4656 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4658 If the data-ref access is vectorizable, return a data_reference structure
4659 that represents it (DR). Otherwise - return NULL. */
4661 static struct data_reference *
4662 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4665 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4666 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4669 tree reftype, innertype;
4670 enum machine_mode innermode;
4671 tree indx_access_fn;
4672 int loopnum = loop->num;
4673 struct data_reference *dr;
4677 if (vect_debug_stats (loop) || vect_debug_details (loop))
4678 fprintf (dump_file, "not vectorized: complicated pointer access.");
4682 if (vect_debug_details (NULL))
4684 fprintf (dump_file, "Access function of ptr: ");
4685 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4688 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4690 if (vect_debug_stats (loop) || vect_debug_details (loop))
4691 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4697 if (!host_integerp (step,0))
4699 if (vect_debug_stats (loop) || vect_debug_details (loop))
4701 "not vectorized: non constant step for pointer access.");
4705 step_val = TREE_INT_CST_LOW (step);
4707 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4708 if (TREE_CODE (reftype) != POINTER_TYPE)
4710 if (vect_debug_stats (loop) || vect_debug_details (loop))
4711 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4715 reftype = TREE_TYPE (init);
4716 if (TREE_CODE (reftype) != POINTER_TYPE)
4718 if (vect_debug_stats (loop) || vect_debug_details (loop))
4719 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4723 innertype = TREE_TYPE (reftype);
4724 innermode = TYPE_MODE (innertype);
4725 if (GET_MODE_SIZE (innermode) != step_val)
4727 /* FORNOW: support only consecutive access */
4728 if (vect_debug_stats (loop) || vect_debug_details (loop))
4729 fprintf (dump_file, "not vectorized: non consecutive access.");
4734 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4735 if (vect_debug_details (NULL))
4737 fprintf (dump_file, "Access function of ptr indx: ");
4738 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4740 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4745 /* Function vect_get_symbl_and_dr.
4747 The function returns SYMBL - the relevant variable for
4748 memory tag (for aliasing purposes).
4749 Also data reference structure DR is created.
4752 MEMREF - data reference in STMT
4753 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4756 DR - data_reference struct for MEMREF
4757 return value - the relevant variable for memory tag (for aliasing purposes).
4762 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4763 loop_vec_info loop_vinfo, struct data_reference **dr)
4765 tree symbl, oprnd0, oprnd1;
4766 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4768 tree array_base, base;
4769 struct data_reference *new_dr;
4770 bool base_aligned_p;
4773 switch (TREE_CODE (memref))
4776 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4780 symbl = DR_BASE_NAME (new_dr);
4781 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4783 switch (TREE_CODE (symbl))
4787 oprnd0 = TREE_OPERAND (symbl, 0);
4788 oprnd1 = TREE_OPERAND (symbl, 1);
4791 /* Only {address_base + offset} expressions are supported,
4792 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4793 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4794 TODO: swap operands if {offset + address_base}. */
4795 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4796 && TREE_CODE (oprnd1) != INTEGER_CST)
4797 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4800 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4803 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4804 loop_vinfo, &new_dr);
4808 /* symbl remains unchanged. */
4812 if (vect_debug_details (NULL))
4814 fprintf (dump_file, "unhandled data ref: ");
4815 print_generic_expr (dump_file, memref, TDF_SLIM);
4816 fprintf (dump_file, " (symbl ");
4817 print_generic_expr (dump_file, symbl, TDF_SLIM);
4818 fprintf (dump_file, ") in stmt ");
4819 print_generic_expr (dump_file, stmt, TDF_SLIM);
4826 offset = size_zero_node;
4828 /* Store the array base in the stmt info.
4829 For one dimensional array ref a[i], the base is a,
4830 for multidimensional a[i1][i2]..[iN], the base is
4831 a[i1][i2]..[iN-1]. */
4832 array_base = TREE_OPERAND (memref, 0);
4833 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4835 new_dr = analyze_array (stmt, memref, is_read);
4838 /* Find the relevant symbol for aliasing purposes. */
4839 base = DR_BASE_NAME (new_dr);
4840 switch (TREE_CODE (base))
4847 symbl = TREE_OPERAND (base, 0);
4851 /* Could have recorded more accurate information -
4852 i.e, the actual FIELD_DECL that is being referenced -
4853 but later passes expect VAR_DECL as the nmt. */
4854 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4855 loop_vinfo, &offset, &base_aligned_p);
4860 if (vect_debug_details (NULL))
4862 fprintf (dump_file, "unhandled struct/class field access ");
4863 print_generic_expr (dump_file, stmt, TDF_SLIM);
4870 if (vect_debug_details (NULL))
4872 fprintf (dump_file, "unhandled data ref: ");
4873 print_generic_expr (dump_file, memref, TDF_SLIM);
4874 fprintf (dump_file, " in stmt ");
4875 print_generic_expr (dump_file, stmt, TDF_SLIM);
4883 /* Function vect_analyze_data_refs.
4885 Find all the data references in the loop.
4887 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4888 which base is really an array (not a pointer) and which alignment
4889 can be forced. This restriction will be relaxed. */
4892 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4894 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4895 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4896 int nbbs = loop->num_nodes;
4897 block_stmt_iterator si;
4899 struct data_reference *dr;
4902 bool base_aligned_p;
4905 if (vect_debug_details (NULL))
4906 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4908 for (j = 0; j < nbbs; j++)
4910 basic_block bb = bbs[j];
4911 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4913 bool is_read = false;
4914 tree stmt = bsi_stmt (si);
4915 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4916 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4917 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4918 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4919 varray_type *datarefs = NULL;
4920 int nvuses, nv_may_defs, nv_must_defs;
4924 /* Assumption: there exists a data-ref in stmt, if and only if
4925 it has vuses/vdefs. */
4927 if (!vuses && !v_may_defs && !v_must_defs)
4930 nvuses = NUM_VUSES (vuses);
4931 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4932 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4934 if (nvuses && (nv_may_defs || nv_must_defs))
4936 if (vect_debug_details (NULL))
4938 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4939 print_generic_expr (dump_file, stmt, TDF_SLIM);
4944 if (TREE_CODE (stmt) != MODIFY_EXPR)
4946 if (vect_debug_details (NULL))
4948 fprintf (dump_file, "unexpected vops in stmt: ");
4949 print_generic_expr (dump_file, stmt, TDF_SLIM);
4956 memref = TREE_OPERAND (stmt, 1);
4957 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4962 memref = TREE_OPERAND (stmt, 0);
4963 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4967 /* Analyze MEMREF. If it is of a supported form, build data_reference
4968 struct for it (DR) and find the relevant symbol for aliasing
4970 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4974 if (vect_debug_stats (loop) || vect_debug_details (loop))
4976 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4977 print_generic_expr (dump_file, stmt, TDF_SLIM);
4982 /* Find and record the memtag assigned to this data-ref. */
4983 switch (TREE_CODE (symbl))
4986 STMT_VINFO_MEMTAG (stmt_info) = symbl;
4990 symbl = SSA_NAME_VAR (symbl);
4991 tag = get_var_ann (symbl)->type_mem_tag;
4994 tree ptr = TREE_OPERAND (memref, 0);
4995 if (TREE_CODE (ptr) == SSA_NAME)
4996 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5000 if (vect_debug_stats (loop) || vect_debug_details (loop))
5001 fprintf (dump_file, "not vectorized: no memtag for ref.");
5004 STMT_VINFO_MEMTAG (stmt_info) = tag;
5008 address_base = TREE_OPERAND (symbl, 0);
5010 switch (TREE_CODE (address_base))
5013 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5015 STMT_VINFO_MEMTAG (stmt_info) =
5016 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
5017 loop_vinfo, &offset,
5022 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5026 if (vect_debug_stats (loop) || vect_debug_details (loop))
5029 "not vectorized: unhandled address expr: ");
5030 print_generic_expr (dump_file, stmt, TDF_SLIM);
5037 if (vect_debug_stats (loop) || vect_debug_details (loop))
5039 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5040 print_generic_expr (dump_file, memref, TDF_SLIM);
5045 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5046 STMT_VINFO_DATA_REF (stmt_info) = dr;
5054 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5056 /* Function vect_mark_relevant.
5058 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5061 vect_mark_relevant (varray_type worklist, tree stmt)
5063 stmt_vec_info stmt_info;
5065 if (vect_debug_details (NULL))
5066 fprintf (dump_file, "mark relevant.");
5068 if (TREE_CODE (stmt) == PHI_NODE)
5070 VARRAY_PUSH_TREE (worklist, stmt);
5074 stmt_info = vinfo_for_stmt (stmt);
5078 if (vect_debug_details (NULL))
5080 fprintf (dump_file, "mark relevant: no stmt info!!.");
5081 print_generic_expr (dump_file, stmt, TDF_SLIM);
5086 if (STMT_VINFO_RELEVANT_P (stmt_info))
5088 if (vect_debug_details (NULL))
5089 fprintf (dump_file, "already marked relevant.");
5093 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5094 VARRAY_PUSH_TREE (worklist, stmt);
5098 /* Function vect_stmt_relevant_p.
5100 Return true if STMT in loop that is represented by LOOP_VINFO is
5101 "relevant for vectorization".
5103 A stmt is considered "relevant for vectorization" if:
5104 - it has uses outside the loop.
5105 - it has vdefs (it alters memory).
5106 - control stmts in the loop (except for the exit condition).
5108 CHECKME: what other side effects would the vectorizer allow? */
5111 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5113 v_may_def_optype v_may_defs;
5114 v_must_def_optype v_must_defs;
5115 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5120 /* cond stmt other than loop exit cond. */
5121 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5124 /* changing memory. */
5125 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5126 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5127 if (v_may_defs || v_must_defs)
5129 if (vect_debug_details (NULL))
5130 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5134 /* uses outside the loop. */
5135 df = get_immediate_uses (stmt);
5136 num_uses = num_immediate_uses (df);
5137 for (i = 0; i < num_uses; i++)
5139 tree use = immediate_use (df, i);
5140 basic_block bb = bb_for_stmt (use);
5141 if (!flow_bb_inside_loop_p (loop, bb))
5143 if (vect_debug_details (NULL))
5144 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5153 /* Function vect_mark_stmts_to_be_vectorized.
5155 Not all stmts in the loop need to be vectorized. For example:
5164 Stmt 1 and 3 do not need to be vectorized, because loop control and
5165 addressing of vectorized data-refs are handled differently.
5167 This pass detects such stmts. */
5170 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5172 varray_type worklist;
5173 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5174 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5175 unsigned int nbbs = loop->num_nodes;
5176 block_stmt_iterator si;
5182 stmt_vec_info stmt_info;
5184 if (vect_debug_details (NULL))
5185 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5187 VARRAY_TREE_INIT (worklist, 64, "work list");
5189 /* 1. Init worklist. */
5191 for (i = 0; i < nbbs; i++)
5193 basic_block bb = bbs[i];
5194 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5196 stmt = bsi_stmt (si);
5198 if (vect_debug_details (NULL))
5200 fprintf (dump_file, "init: stmt relevant? ");
5201 print_generic_expr (dump_file, stmt, TDF_SLIM);
5204 stmt_info = vinfo_for_stmt (stmt);
5205 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5207 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5208 vect_mark_relevant (worklist, stmt);
5213 /* 2. Process_worklist */
5215 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5217 stmt = VARRAY_TOP_TREE (worklist);
5218 VARRAY_POP (worklist);
5220 if (vect_debug_details (NULL))
5222 fprintf (dump_file, "worklist: examine stmt: ");
5223 print_generic_expr (dump_file, stmt, TDF_SLIM);
5226 /* Examine the USES in this statement. Mark all the statements which
5227 feed this statement's uses as "relevant", unless the USE is used as
5230 if (TREE_CODE (stmt) == PHI_NODE)
5232 /* follow the def-use chain inside the loop. */
5233 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5235 tree arg = PHI_ARG_DEF (stmt, j);
5236 tree def_stmt = NULL_TREE;
5238 if (!vect_is_simple_use (arg, loop, &def_stmt))
5240 if (vect_debug_details (NULL))
5241 fprintf (dump_file, "worklist: unsupported use.");
5242 varray_clear (worklist);
5248 if (vect_debug_details (NULL))
5250 fprintf (dump_file, "worklist: def_stmt: ");
5251 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5254 bb = bb_for_stmt (def_stmt);
5255 if (flow_bb_inside_loop_p (loop, bb))
5256 vect_mark_relevant (worklist, def_stmt);
5260 ann = stmt_ann (stmt);
5261 use_ops = USE_OPS (ann);
5263 for (i = 0; i < NUM_USES (use_ops); i++)
5265 tree use = USE_OP (use_ops, i);
5267 /* We are only interested in uses that need to be vectorized. Uses
5268 that are used for address computation are not considered relevant.
5270 if (exist_non_indexing_operands_for_use_p (use, stmt))
5272 tree def_stmt = NULL_TREE;
5274 if (!vect_is_simple_use (use, loop, &def_stmt))
5276 if (vect_debug_details (NULL))
5277 fprintf (dump_file, "worklist: unsupported use.");
5278 varray_clear (worklist);
5285 if (vect_debug_details (NULL))
5287 fprintf (dump_file, "worklist: examine use %d: ", i);
5288 print_generic_expr (dump_file, use, TDF_SLIM);
5291 bb = bb_for_stmt (def_stmt);
5292 if (flow_bb_inside_loop_p (loop, bb))
5293 vect_mark_relevant (worklist, def_stmt);
5296 } /* while worklist */
5298 varray_clear (worklist);
5303 /* Function vect_can_advance_ivs_p
5305 In case the number of iterations that LOOP iterates in unknown at compile
5306 time, an epilog loop will be generated, and the loop induction variables
5307 (IVs) will be "advanced" to the value they are supposed to take just before
5308 the epilog loop. Here we check that the access function of the loop IVs
5309 and the expression that represents the loop bound are simple enough.
5310 These restrictions will be relaxed in the future. */
5313 vect_can_advance_ivs_p (struct loop *loop)
5315 basic_block bb = loop->header;
5318 /* Analyze phi functions of the loop header. */
5320 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5322 tree access_fn = NULL;
5323 tree evolution_part;
5325 if (vect_debug_details (NULL))
5327 fprintf (dump_file, "Analyze phi: ");
5328 print_generic_expr (dump_file, phi, TDF_SLIM);
5331 /* Skip virtual phi's. The data dependences that are associated with
5332 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5334 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5336 if (vect_debug_details (NULL))
5337 fprintf (dump_file, "virtual phi. skip.");
5341 /* Analyze the evolution function. */
5343 access_fn = instantiate_parameters
5344 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5348 if (vect_debug_details (NULL))
5349 fprintf (dump_file, "No Access function.");
5353 if (vect_debug_details (NULL))
5355 fprintf (dump_file, "Access function of PHI: ");
5356 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5359 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5361 if (evolution_part == NULL_TREE)
5364 /* FORNOW: We do not transform initial conditions of IVs
5365 which evolution functions are a polynomial of degree >= 2. */
5367 if (tree_is_chrec (evolution_part))
5375 /* Function vect_get_loop_niters.
5377 Determine how many iterations the loop is executed.
5378 If an expression that represents the number of iterations
5379 can be constructed, place it in NUMBER_OF_ITERATIONS.
5380 Return the loop exit condition. */
5383 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5387 if (vect_debug_details (NULL))
5388 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5390 niters = number_of_iterations_in_loop (loop);
5392 if (niters != NULL_TREE
5393 && niters != chrec_dont_know)
5395 *number_of_iterations = niters;
5397 if (vect_debug_details (NULL))
5399 fprintf (dump_file, "==> get_loop_niters:" );
5400 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5404 return get_loop_exit_condition (loop);
5408 /* Function vect_analyze_loop_form.
5410 Verify the following restrictions (some may be relaxed in the future):
5411 - it's an inner-most loop
5412 - number of BBs = 2 (which are the loop header and the latch)
5413 - the loop has a pre-header
5414 - the loop has a single entry and exit
5415 - the loop exit condition is simple enough, and the number of iterations
5416 can be analyzed (a countable loop). */
5418 static loop_vec_info
5419 vect_analyze_loop_form (struct loop *loop)
5421 loop_vec_info loop_vinfo;
5423 tree number_of_iterations = NULL;
5424 bool rescan = false;
5426 if (vect_debug_details (loop))
5427 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5430 || !loop->single_exit
5431 || loop->num_nodes != 2
5432 || EDGE_COUNT (loop->header->preds) != 2
5433 || loop->num_entries != 1)
5435 if (vect_debug_stats (loop) || vect_debug_details (loop))
5437 fprintf (dump_file, "not vectorized: bad loop form. ");
5439 fprintf (dump_file, "nested loop.");
5440 else if (!loop->single_exit)
5441 fprintf (dump_file, "multiple exits.");
5442 else if (loop->num_nodes != 2)
5443 fprintf (dump_file, "too many BBs in loop.");
5444 else if (EDGE_COUNT (loop->header->preds) != 2)
5445 fprintf (dump_file, "too many incoming edges.");
5446 else if (loop->num_entries != 1)
5447 fprintf (dump_file, "too many entries.");
5453 /* We assume that the loop exit condition is at the end of the loop. i.e,
5454 that the loop is represented as a do-while (with a proper if-guard
5455 before the loop if needed), where the loop header contains all the
5456 executable statements, and the latch is empty. */
5457 if (!empty_block_p (loop->latch))
5459 if (vect_debug_stats (loop) || vect_debug_details (loop))
5460 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5464 /* Make sure we have a preheader basic block. */
5465 if (!loop->pre_header)
5468 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5471 /* Make sure there exists a single-predecessor exit bb: */
5472 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5475 loop_split_edge_with (loop->exit_edges[0], NULL);
5480 flow_loop_scan (loop, LOOP_ALL);
5481 /* Flow loop scan does not update loop->single_exit field. */
5482 loop->single_exit = loop->exit_edges[0];
5485 if (empty_block_p (loop->header))
5487 if (vect_debug_stats (loop) || vect_debug_details (loop))
5488 fprintf (dump_file, "not vectorized: empty loop.");
5492 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5495 if (vect_debug_stats (loop) || vect_debug_details (loop))
5496 fprintf (dump_file, "not vectorized: complicated exit condition.");
5500 if (!number_of_iterations)
5502 if (vect_debug_stats (loop) || vect_debug_details (loop))
5504 "not vectorized: number of iterations cannot be computed.");
5508 if (chrec_contains_undetermined (number_of_iterations))
5510 if (vect_debug_details (NULL))
5511 fprintf (dump_file, "Infinite number of iterations.");
5515 loop_vinfo = new_loop_vec_info (loop);
5516 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5518 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5520 if (vect_debug_details (loop))
5522 fprintf (dump_file, "loop bound unknown.\n");
5523 fprintf (dump_file, "Symbolic number of iterations is ");
5524 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5528 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5530 if (vect_debug_stats (loop) || vect_debug_details (loop))
5531 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5535 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5541 /* Function vect_analyze_loop.
5543 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5544 for it. The different analyses will record information in the
5545 loop_vec_info struct. */
5547 static loop_vec_info
5548 vect_analyze_loop (struct loop *loop)
5551 loop_vec_info loop_vinfo;
5553 if (vect_debug_details (NULL))
5554 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5556 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5558 loop_vinfo = vect_analyze_loop_form (loop);
5561 if (vect_debug_details (loop))
5562 fprintf (dump_file, "bad loop form.");
5566 /* Find all data references in the loop (which correspond to vdefs/vuses)
5567 and analyze their evolution in the loop.
5569 FORNOW: Handle only simple, array references, which
5570 alignment can be forced, and aligned pointer-references. */
5572 ok = vect_analyze_data_refs (loop_vinfo);
5575 if (vect_debug_details (loop))
5576 fprintf (dump_file, "bad data references.");
5577 destroy_loop_vec_info (loop_vinfo);
5581 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5583 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5586 if (vect_debug_details (loop))
5587 fprintf (dump_file, "unexpected pattern.");
5588 if (vect_debug_details (loop))
5589 fprintf (dump_file, "not vectorized: unexpected pattern.");
5590 destroy_loop_vec_info (loop_vinfo);
5594 /* Check that all cross-iteration scalar data-flow cycles are OK.
5595 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5597 ok = vect_analyze_scalar_cycles (loop_vinfo);
5600 if (vect_debug_details (loop))
5601 fprintf (dump_file, "bad scalar cycle.");
5602 destroy_loop_vec_info (loop_vinfo);
5606 /* Analyze data dependences between the data-refs in the loop.
5607 FORNOW: fail at the first data dependence that we encounter. */
5609 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5612 if (vect_debug_details (loop))
5613 fprintf (dump_file, "bad data dependence.");
5614 destroy_loop_vec_info (loop_vinfo);
5618 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5619 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5621 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5624 if (vect_debug_details (loop))
5625 fprintf (dump_file, "bad data access.");
5626 destroy_loop_vec_info (loop_vinfo);
5630 /* Analyze the alignment of the data-refs in the loop.
5631 FORNOW: Only aligned accesses are handled. */
5633 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5636 if (vect_debug_details (loop))
5637 fprintf (dump_file, "bad data alignment.");
5638 destroy_loop_vec_info (loop_vinfo);
5642 /* Scan all the operations in the loop and make sure they are
5645 ok = vect_analyze_operations (loop_vinfo);
5648 if (vect_debug_details (loop))
5649 fprintf (dump_file, "bad operation or unsupported loop bound.");
5650 destroy_loop_vec_info (loop_vinfo);
5654 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5660 /* Function need_imm_uses_for.
5662 Return whether we ought to include information for 'var'
5663 when calculating immediate uses. For this pass we only want use
5664 information for non-virtual variables. */
5667 need_imm_uses_for (tree var)
5669 return is_gimple_reg (var);
5673 /* Function vectorize_loops.
5675 Entry Point to loop vectorization phase. */
5678 vectorize_loops (struct loops *loops)
5680 unsigned int i, loops_num;
5681 unsigned int num_vectorized_loops = 0;
5683 /* Does the target support SIMD? */
5684 /* FORNOW: until more sophisticated machine modelling is in place. */
5685 if (!UNITS_PER_SIMD_WORD)
5687 if (vect_debug_details (NULL))
5688 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5692 #ifdef ENABLE_CHECKING
5693 verify_loop_closed_ssa ();
5696 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5698 /* ----------- Analyze loops. ----------- */
5700 /* If some loop was duplicated, it gets bigger number
5701 than all previously defined loops. This fact allows us to run
5702 only over initial loops skipping newly generated ones. */
5703 loops_num = loops->num;
5704 for (i = 1; i < loops_num; i++)
5706 loop_vec_info loop_vinfo;
5707 struct loop *loop = loops->parray[i];
5712 loop_vinfo = vect_analyze_loop (loop);
5713 loop->aux = loop_vinfo;
5715 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5718 vect_transform_loop (loop_vinfo, loops);
5719 num_vectorized_loops++;
5722 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5723 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5724 num_vectorized_loops);
5726 /* ----------- Finalize. ----------- */
5729 for (i = 1; i < loops_num; i++)
5731 struct loop *loop = loops->parray[i];
5732 loop_vec_info loop_vinfo;
5736 loop_vinfo = loop->aux;
5737 destroy_loop_vec_info (loop_vinfo);
5741 rewrite_into_ssa (false);
5742 rewrite_into_loop_closed_ssa (); /* FORNOW */
5743 bitmap_clear (vars_to_rename);