2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
150 /*************************************************************************
151 Simple Loop Peeling Utilities
152 *************************************************************************/
154 /* Entry point for peeling of simple loops.
155 Peel the first/last iterations of a loop.
156 It can be used outside of the vectorizer for loops that are simple enough
157 (see function documentation). In the vectorizer it is used to peel the
158 last few iterations when the loop bound is unknown or does not evenly
159 divide by the vectorization factor, and to peel the first few iterations
160 to force the alignment of data references in the loop. */
161 struct loop *slpeel_tree_peel_loop_to_edge
162 (struct loop *, struct loops *, edge, tree, tree, bool);
163 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
164 (struct loop *, struct loops *, edge);
165 static void slpeel_update_phis_for_duplicate_loop
166 (struct loop *, struct loop *, bool after);
167 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *);
168 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
169 static edge slpeel_add_loop_guard (basic_block, tree, basic_block);
170 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
171 static void allocate_new_names (bitmap);
172 static void rename_use_op (use_operand_p);
173 static void rename_def_op (def_operand_p, tree);
174 static void rename_variables_in_bb (basic_block);
175 static void free_new_names (bitmap);
176 static void rename_variables_in_loop (struct loop *);
179 /*************************************************************************
180 Vectorization Utilities.
181 *************************************************************************/
183 /* Main analysis functions. */
184 static loop_vec_info vect_analyze_loop (struct loop *);
185 static loop_vec_info vect_analyze_loop_form (struct loop *);
186 static bool vect_analyze_data_refs (loop_vec_info);
187 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
188 static bool vect_analyze_scalar_cycles (loop_vec_info);
189 static bool vect_analyze_data_ref_accesses (loop_vec_info);
190 static bool vect_analyze_data_refs_alignment (loop_vec_info);
191 static bool vect_compute_data_refs_alignment (loop_vec_info);
192 static bool vect_analyze_operations (loop_vec_info);
194 /* Main code transformation functions. */
195 static void vect_transform_loop (loop_vec_info, struct loops *);
196 static void vect_transform_loop_bound (loop_vec_info, tree niters);
197 static bool vect_transform_stmt (tree, block_stmt_iterator *);
198 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
199 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
200 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
201 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
202 static enum dr_alignment_support vect_supportable_dr_alignment
203 (struct data_reference *);
204 static void vect_align_data_ref (tree);
205 static void vect_enhance_data_refs_alignment (loop_vec_info);
207 /* Utility functions for the analyses. */
208 static bool vect_is_simple_use (tree , struct loop *, tree *);
209 static bool exist_non_indexing_operands_for_use_p (tree, tree);
210 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
211 static void vect_mark_relevant (varray_type, tree);
212 static bool vect_stmt_relevant_p (tree, loop_vec_info);
213 static tree vect_get_loop_niters (struct loop *, tree *);
214 static bool vect_compute_data_ref_alignment
215 (struct data_reference *, loop_vec_info);
216 static bool vect_analyze_data_ref_access (struct data_reference *);
217 static bool vect_get_first_index (tree, tree *);
218 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
219 static struct data_reference * vect_analyze_pointer_ref_access
221 static bool vect_can_advance_ivs_p (struct loop *);
222 static tree vect_get_base_and_bit_offset
223 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
224 static struct data_reference * vect_analyze_pointer_ref_access
226 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
227 static tree vect_compute_array_ref_alignment
228 (struct data_reference *, loop_vec_info, tree, tree *);
229 static tree vect_get_ptr_offset (tree, tree, tree *);
230 static tree vect_get_symbl_and_dr
231 (tree, tree, bool, loop_vec_info, struct data_reference **);
233 /* Utility functions for the code transformation. */
234 static tree vect_create_destination_var (tree, tree);
235 static tree vect_create_data_ref_ptr
236 (tree, block_stmt_iterator *, tree, tree *, bool);
237 static tree vect_create_index_for_vector_ref
238 (struct loop *, block_stmt_iterator *);
239 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
240 static tree get_vectype_for_scalar_type (tree);
241 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
242 static tree vect_get_vec_def_for_operand (tree, tree);
243 static tree vect_init_vector (tree, tree);
244 static tree vect_build_symbol_bound (tree, int, struct loop *);
245 static void vect_finish_stmt_generation
246 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
248 /* Utility function dealing with loop peeling (not peeling itself). */
249 static void vect_generate_tmps_on_preheader
250 (loop_vec_info, tree *, tree *, tree *);
251 static tree vect_build_loop_niters (loop_vec_info);
252 static void vect_update_ivs_after_vectorizer (struct loop *, tree);
253 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
254 static void vect_update_inits_of_dr
255 (struct data_reference *, struct loop *, tree niters);
256 static void vect_update_inits_of_drs (loop_vec_info, tree);
257 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
258 static void vect_do_peeling_for_loop_bound
259 (loop_vec_info, tree *, struct loops *);
261 /* Utilities for creation and deletion of vec_info structs. */
262 loop_vec_info new_loop_vec_info (struct loop *loop);
263 void destroy_loop_vec_info (loop_vec_info);
264 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
266 static bool vect_debug_stats (struct loop *loop);
267 static bool vect_debug_details (struct loop *loop);
270 /*************************************************************************
271 Simple Loop Peeling Utilities
273 Utilities to support loop peeling for vectorization purposes.
274 *************************************************************************/
277 /* For each definition in DEFINITIONS this function allocates
281 allocate_new_names (bitmap definitions)
286 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
288 tree def = ssa_name (ver);
289 tree *new_name_ptr = xmalloc (sizeof (tree));
291 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
293 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
294 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
296 SSA_NAME_AUX (def) = new_name_ptr;
301 /* Renames the use *OP_P. */
304 rename_use_op (use_operand_p op_p)
308 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
311 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
313 /* Something defined outside of the loop. */
317 /* An ordinary ssa name defined in the loop. */
319 SET_USE (op_p, *new_name_ptr);
323 /* Renames the def *OP_P in statement STMT. */
326 rename_def_op (def_operand_p op_p, tree stmt)
330 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
333 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
335 /* Something defined outside of the loop. */
339 /* An ordinary ssa name defined in the loop. */
341 SET_DEF (op_p, *new_name_ptr);
342 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
346 /* Renames the variables in basic block BB. */
349 rename_variables_in_bb (basic_block bb)
352 block_stmt_iterator bsi;
358 v_may_def_optype v_may_defs;
359 v_must_def_optype v_must_defs;
364 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
365 rename_def_op (PHI_RESULT_PTR (phi), phi);
367 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
369 stmt = bsi_stmt (bsi);
370 get_stmt_operands (stmt);
371 ann = stmt_ann (stmt);
373 uses = USE_OPS (ann);
374 for (i = 0; i < NUM_USES (uses); i++)
375 rename_use_op (USE_OP_PTR (uses, i));
377 defs = DEF_OPS (ann);
378 for (i = 0; i < NUM_DEFS (defs); i++)
379 rename_def_op (DEF_OP_PTR (defs, i), stmt);
381 vuses = VUSE_OPS (ann);
382 for (i = 0; i < NUM_VUSES (vuses); i++)
383 rename_use_op (VUSE_OP_PTR (vuses, i));
385 v_may_defs = V_MAY_DEF_OPS (ann);
386 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
388 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
389 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
392 v_must_defs = V_MUST_DEF_OPS (ann);
393 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
395 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
396 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
400 FOR_EACH_EDGE (e, ei, bb->succs)
401 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
402 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
406 /* Releases the structures holding the new ssa names. */
409 free_new_names (bitmap definitions)
414 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
416 tree def = ssa_name (ver);
418 if (SSA_NAME_AUX (def))
420 free (SSA_NAME_AUX (def));
421 SSA_NAME_AUX (def) = NULL;
427 /* Renames variables in new generated LOOP. */
430 rename_variables_in_loop (struct loop *loop)
435 bbs = get_loop_body (loop);
437 for (i = 0; i < loop->num_nodes; i++)
438 rename_variables_in_bb (bbs[i]);
444 /* This function copies phis from LOOP header to
445 NEW_LOOP header. AFTER is as
446 in update_phis_for_duplicate_loop function. */
449 copy_phi_nodes (struct loop *loop, struct loop *new_loop,
452 tree phi, new_phi, def;
454 edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
456 /* Second add arguments to newly created phi nodes. */
457 for (phi = phi_nodes (loop->header),
458 new_phi = phi_nodes (new_loop->header);
460 phi = PHI_CHAIN (phi),
461 new_phi = PHI_CHAIN (new_phi))
463 new_e = loop_preheader_edge (new_loop);
464 def = PHI_ARG_DEF_FROM_EDGE (phi, e);
465 add_phi_arg (&new_phi, def, new_e);
470 /* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
471 executes after LOOP, and false if it executes before it. */
474 slpeel_update_phis_for_duplicate_loop (struct loop *loop,
475 struct loop *new_loop, bool after)
478 tree *new_name_ptr, new_ssa_name;
479 tree phi_new, phi_old, def;
480 edge orig_entry_e = loop_preheader_edge (loop);
482 /* Copy phis from loop->header to new_loop->header. */
483 copy_phi_nodes (loop, new_loop, after);
485 old_latch = loop_latch_edge (loop);
487 /* Update PHI args for the new loop latch edge, and
488 the old loop preheader edge, we know that the PHI nodes
489 are ordered appropriately in copy_phi_nodes. */
490 for (phi_new = phi_nodes (new_loop->header),
491 phi_old = phi_nodes (loop->header);
493 phi_new = PHI_CHAIN (phi_new), phi_old = PHI_CHAIN (phi_old))
495 def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
497 if (TREE_CODE (def) != SSA_NAME)
500 new_name_ptr = SSA_NAME_AUX (def);
502 /* Something defined outside of the loop. */
506 /* An ordinary ssa name defined in the loop. */
507 new_ssa_name = *new_name_ptr;
509 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
511 /* Update PHI args for the original loop pre-header edge. */
513 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
519 /* Update PHI nodes for a guard of the LOOP.
521 LOOP is supposed to have a preheader bb at which a guard condition is
522 located. The true edge of this condition skips the LOOP and ends
523 at the destination of the (unique) LOOP exit. The loop exit bb is supposed
524 to be an empty bb (created by this transformation) with one successor.
526 This function creates phi nodes at the LOOP exit bb. These phis need to be
527 created as a result of adding true edge coming from guard.
529 FORNOW: Only phis which have corresponding phi nodes at the header of the
530 LOOP are created. Here we use the assumption that after the LOOP there
531 are no uses of defs generated in LOOP.
533 After the phis creation, the function updates the values of phi nodes at
534 the LOOP exit successor bb:
541 if (exit_cond) goto bb3 else goto bb2
547 After guard creation (the loop before this function):
550 if (guard_condition) goto bb4 else goto bb1
552 if (exit_cond) goto bb4 else goto bb2
560 This function updates the phi nodes in bb4 and in bb3, to account for the
561 new edge from bb0 to bb4. */
564 slpeel_update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
567 basic_block bb = loop->exit_edges[0]->dest;
569 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
574 /* Generate new phi node. */
575 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), bb);
577 /* Add argument coming from guard true edge. */
578 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
579 add_phi_arg (&new_phi, phi_arg, guard_true_edge);
581 /* Add argument coming from loop exit edge. */
582 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
583 add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
585 /* Update all phi nodes at the loop exit successor. */
586 for (phi1 = phi_nodes (EDGE_SUCC (bb, 0)->dest);
588 phi1 = PHI_CHAIN (phi1))
590 tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1, EDGE_SUCC (bb, 0));
591 if (old_arg == phi_arg)
593 edge e = EDGE_SUCC (bb, 0);
595 SET_PHI_ARG_DEF (phi1,
596 phi_arg_from_edge (phi1, e),
597 PHI_RESULT (new_phi));
602 set_phi_nodes (bb, phi_reverse (phi_nodes (bb)));
606 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
607 that starts at zero, increases by one and its limit is NITERS.
609 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
612 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
614 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
616 edge exit_edge = loop->exit_edges[0];
617 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
618 tree begin_label = tree_block_label (loop->latch);
619 tree exit_label = tree_block_label (loop->single_exit->dest);
621 /* Flow loop scan does not update loop->single_exit field. */
622 loop->single_exit = loop->exit_edges[0];
623 orig_cond = get_loop_exit_condition (loop);
624 gcc_assert (orig_cond);
625 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
626 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
628 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
629 back to the exit condition statement. */
630 bsi_next (&loop_exit_bsi);
631 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
634 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
635 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
636 else /* 'then' edge loops back. */
637 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
639 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
640 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
641 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
642 begin_label, exit_label);
643 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
645 /* Remove old loop exit test: */
646 bsi_remove (&loop_exit_bsi);
648 if (vect_debug_stats (loop) || vect_debug_details (loop))
649 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
651 loop->nb_iterations = niters;
655 /* Given LOOP this function generates a new copy of it and puts it
656 on E which is either the entry or exit of LOOP. */
659 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
662 struct loop *new_loop;
663 basic_block *new_bbs, *bbs;
666 basic_block exit_dest;
669 at_exit = (e == loop->exit_edges[0]);
670 if (!at_exit && e != loop_preheader_edge (loop))
672 if (dump_file && (dump_flags & TDF_DETAILS))
674 "Edge is not an entry nor an exit edge.\n");
678 bbs = get_loop_body (loop);
680 /* Check whether duplication is possible. */
681 if (!can_copy_bbs_p (bbs, loop->num_nodes))
683 if (vect_debug_stats (loop) || vect_debug_details (loop))
685 "Cannot copy basic blocks.\n");
690 /* Generate new loop structure. */
691 new_loop = duplicate_loop (loops, loop, loop->outer);
694 if (vect_debug_stats (loop) || vect_debug_details (loop))
696 "The duplicate_loop returns NULL.\n");
701 exit_dest = loop->exit_edges[0]->dest;
702 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
703 exit_dest) == loop->header ?
706 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
708 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
710 /* Duplicating phi args at exit bbs as coming
711 also from exit of duplicated loop. */
712 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
714 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
717 edge new_loop_exit_edge;
719 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
720 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
722 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
724 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
728 if (at_exit) /* Add the loop copy at exit. */
730 redirect_edge_and_branch_force (e, new_loop->header);
731 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
733 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
735 else /* Add the copy at entry. */
738 edge entry_e = loop_preheader_edge (loop);
739 basic_block preheader = entry_e->src;
741 if (!flow_bb_inside_loop_p (new_loop,
742 EDGE_SUCC (new_loop->header, 0)->dest))
743 new_exit_e = EDGE_SUCC (new_loop->header, 0);
745 new_exit_e = EDGE_SUCC (new_loop->header, 1);
747 redirect_edge_and_branch_force (new_exit_e, loop->header);
748 set_immediate_dominator (CDI_DOMINATORS, loop->header,
751 /* We have to add phi args to the loop->header here as coming
752 from new_exit_e edge. */
753 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
755 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
757 add_phi_arg (&phi, phi_arg, new_exit_e);
760 redirect_edge_and_branch_force (entry_e, new_loop->header);
761 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
764 flow_loop_scan (new_loop, LOOP_ALL);
765 flow_loop_scan (loop, LOOP_ALL);
773 /* Given the condition statement COND, put it as the last statement
774 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
775 Assumes that this is the single exit of the guarded loop.
776 Returns the skip edge. */
779 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
781 block_stmt_iterator bsi;
783 tree cond_stmt, then_label, else_label;
785 enter_e = EDGE_SUCC (guard_bb, 0);
786 enter_e->flags &= ~EDGE_FALLTHRU;
787 enter_e->flags |= EDGE_FALSE_VALUE;
788 bsi = bsi_last (guard_bb);
790 then_label = build1 (GOTO_EXPR, void_type_node,
791 tree_block_label (exit_bb));
792 else_label = build1 (GOTO_EXPR, void_type_node,
793 tree_block_label (enter_e->dest));
794 cond_stmt = build (COND_EXPR, void_type_node, cond,
795 then_label, else_label);
796 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
797 /* Add new edge to connect entry block to the second loop. */
798 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
799 set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
804 /* This function verifies that the following restrictions apply to LOOP:
806 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
807 (3) it is single entry, single exit
808 (4) its exit condition is the last stmt in the header
809 (5) E is the entry/exit edge of LOOP.
813 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
815 edge exit_e = loop->exit_edges [0];
816 edge entry_e = loop_preheader_edge (loop);
817 tree orig_cond = get_loop_exit_condition (loop);
818 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
820 if (any_marked_for_rewrite_p ())
824 /* All loops have an outer scope; the only case loop->outer is NULL is for
825 the function itself. */
827 || loop->num_nodes != 2
828 || !empty_block_p (loop->latch)
829 || loop->num_exits != 1
830 || loop->num_entries != 1
831 /* Verify that new loop exit condition can be trivially modified. */
832 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
833 || (e != exit_e && e != entry_e))
840 /* Given LOOP this function duplicates it to the edge E.
842 This transformation takes place before the loop is vectorized.
843 For now, there are two main cases when it's used
844 by the vectorizer: to support loops with unknown loop bounds
845 (or loop bounds indivisible by vectorization factor) and to force the
846 alignment of data references in the loop. In the first case, LOOP is
847 duplicated to the exit edge, producing epilog loop. In the second case, LOOP
848 is duplicated to the preheader edge thus generating prolog loop. In both
849 cases, the original loop will be vectorized after the transformation.
851 The edge E is supposed to be either preheader edge of the LOOP or
852 its exit edge. If preheader edge is specified, the LOOP copy
853 will precede the original one. Otherwise the copy will be located
854 at the exit of the LOOP.
856 FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate
857 the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first
858 loop will be iterated FIRST_NITERS times by introducing additional
859 induction variable and replacing loop exit condition. If
860 UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and
861 the caller to tree_duplicate_loop_to_edge is responsible for updating
862 the first loop count.
864 NITERS (also SSA_NAME) parameter defines the number of iteration the
865 original loop iterated. The function generates two if-then guards:
866 one prior to the first loop and the other prior to the second loop.
867 The first guard will be:
869 if (FIRST_NITERS == 0) then skip the first loop
871 The second guard will be:
873 if (FIRST_NITERS == NITERS) then skip the second loop
875 Thus the equivalence to the original code is guaranteed by correct values
876 of NITERS and FIRST_NITERS and generation of if-then loop guards.
878 For now this function supports only loop forms that are candidate for
879 vectorization. Such types are the following:
881 (1) only innermost loops
882 (2) loops built from 2 basic blocks
883 (3) loops with one entry and one exit
884 (4) loops without function calls
885 (5) loops without defs that are used after the loop
887 (1), (3) are checked in this function; (2) - in function
888 vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
889 (5) is checked as part of the function vect_mark_stmts_to_be_vectorized,
890 when excluding induction/reduction support.
892 The function returns NULL in case one of these checks or
893 transformations failed. */
896 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
897 edge e, tree first_niters,
898 tree niters, bool update_first_loop_count)
900 struct loop *new_loop = NULL, *first_loop, *second_loop;
904 basic_block first_exit_bb, second_exit_bb;
905 basic_block pre_header_bb;
906 edge exit_e = loop->exit_edges [0];
908 if (!slpeel_can_duplicate_loop_p (loop, e))
911 /* We have to initialize cfg_hooks. Then, when calling
912 cfg_hooks->split_edge, the function tree_split_edge
913 is actually called and, when calling cfg_hooks->duplicate_block,
914 the function tree_duplicate_bb is called. */
915 tree_register_cfg_hooks ();
917 /* 1. Generate a copy of LOOP and put it on E (entry or exit). */
918 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
920 if (vect_debug_stats (loop) || vect_debug_details (loop))
922 "The tree_duplicate_loop_to_edge_cfg failed.\n");
926 definitions = marked_ssa_names ();
927 allocate_new_names (definitions);
928 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
929 /* Here, using assumption (5), we do not propagate new names further
930 than on phis of the exit from the second loop. */
931 rename_variables_in_loop (new_loop);
932 free_new_names (definitions);
937 second_loop = new_loop;
941 first_loop = new_loop;
945 /* 2. Generate bb between the loops. */
946 first_exit_bb = split_edge (first_loop->exit_edges[0]);
947 add_bb_to_loop (first_exit_bb, first_loop->outer);
949 /* We need to update here first loop exit edge
950 and second loop preheader edge. */
951 flow_loop_scan (first_loop, LOOP_ALL);
952 flow_loop_scan (second_loop, LOOP_ALL);
953 /* Flow loop scan does not update loop->single_exit field. */
954 first_loop->single_exit = first_loop->exit_edges[0];
955 second_loop->single_exit = second_loop->exit_edges[0];
957 /* 3. Make first loop iterate FIRST_NITERS times, if needed. */
958 if (!update_first_loop_count)
959 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
961 /* 4. Add the guard before first loop:
968 /* 4a. Generate bb before first loop. */
969 pre_header_bb = split_edge (loop_preheader_edge (first_loop));
970 add_bb_to_loop (pre_header_bb, first_loop->outer);
972 /* First loop preheader edge is changed. */
973 flow_loop_scan (first_loop, LOOP_ALL);
975 /* 4b. Generate guard condition. */
976 pre_condition = build (LE_EXPR, boolean_type_node,
977 first_niters, integer_zero_node);
979 /* 4c. Add condition at the end of preheader bb. */
980 skip_e = slpeel_add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
982 /* 4d. Update phis at first loop exit and propagate changes
983 to the phis of second loop. */
984 slpeel_update_phi_nodes_for_guard (skip_e, first_loop);
986 /* 5. Add the guard before second loop:
988 if FIRST_NITERS == NITERS SKIP
993 /* 5a. Generate empty bb at the exit from the second loop. */
994 second_exit_bb = split_edge (second_loop->exit_edges[0]);
995 add_bb_to_loop (second_exit_bb, second_loop->outer);
997 /* Second loop preheader edge is changed. */
998 flow_loop_scan (second_loop, LOOP_ALL);
1000 /* 5b. Generate guard condition. */
1001 pre_condition = build (EQ_EXPR, boolean_type_node,
1002 first_niters, niters);
1004 /* 5c. Add condition at the end of preheader bb. */
1005 skip_e = slpeel_add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
1006 slpeel_update_phi_nodes_for_guard (skip_e, second_loop);
1008 BITMAP_XFREE (definitions);
1009 unmark_all_for_rewrite ();
1016 /* Here the proper Vectorizer starts. */
1018 /*************************************************************************
1019 Vectorization Utilities.
1020 *************************************************************************/
1022 /* Function new_stmt_vec_info.
1024 Create and initialize a new stmt_vec_info struct for STMT. */
1027 new_stmt_vec_info (tree stmt, struct loop *loop)
1030 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1032 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1033 STMT_VINFO_STMT (res) = stmt;
1034 STMT_VINFO_LOOP (res) = loop;
1035 STMT_VINFO_RELEVANT_P (res) = 0;
1036 STMT_VINFO_VECTYPE (res) = NULL;
1037 STMT_VINFO_VEC_STMT (res) = NULL;
1038 STMT_VINFO_DATA_REF (res) = NULL;
1039 STMT_VINFO_MEMTAG (res) = NULL;
1040 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1046 /* Function new_loop_vec_info.
1048 Create and initialize a new loop_vec_info struct for LOOP, as well as
1049 stmt_vec_info structs for all the stmts in LOOP. */
1052 new_loop_vec_info (struct loop *loop)
1056 block_stmt_iterator si;
1059 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1061 bbs = get_loop_body (loop);
1063 /* Create stmt_info for all stmts in the loop. */
1064 for (i = 0; i < loop->num_nodes; i++)
1066 basic_block bb = bbs[i];
1067 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1069 tree stmt = bsi_stmt (si);
1072 get_stmt_operands (stmt);
1073 ann = stmt_ann (stmt);
1074 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1078 LOOP_VINFO_LOOP (res) = loop;
1079 LOOP_VINFO_BBS (res) = bbs;
1080 LOOP_VINFO_EXIT_COND (res) = NULL;
1081 LOOP_VINFO_NITERS (res) = NULL;
1082 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1083 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1084 LOOP_VINFO_VECT_FACTOR (res) = 0;
1085 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1086 "loop_write_datarefs");
1087 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1088 "loop_read_datarefs");
1089 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1095 /* Function destroy_loop_vec_info.
1097 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1098 stmts in the loop. */
1101 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1106 block_stmt_iterator si;
1112 loop = LOOP_VINFO_LOOP (loop_vinfo);
1114 bbs = LOOP_VINFO_BBS (loop_vinfo);
1115 nbbs = loop->num_nodes;
1117 for (j = 0; j < nbbs; j++)
1119 basic_block bb = bbs[j];
1120 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1122 tree stmt = bsi_stmt (si);
1123 stmt_ann_t ann = stmt_ann (stmt);
1124 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1126 set_stmt_info (ann, NULL);
1130 free (LOOP_VINFO_BBS (loop_vinfo));
1131 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1132 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1138 /* Function debug_loop_stats.
1140 For vectorization statistics dumps. */
1143 vect_debug_stats (struct loop *loop)
1146 block_stmt_iterator si;
1147 tree node = NULL_TREE;
1149 if (!dump_file || !(dump_flags & TDF_STATS))
1154 fprintf (dump_file, "\n");
1163 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1165 node = bsi_stmt (si);
1166 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1170 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1171 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1173 fprintf (dump_file, "\nloop at %s:%d: ",
1174 EXPR_FILENAME (node), EXPR_LINENO (node));
1182 /* Function debug_loop_details.
1184 For vectorization debug dumps. */
1187 vect_debug_details (struct loop *loop)
1190 block_stmt_iterator si;
1191 tree node = NULL_TREE;
1193 if (!dump_file || !(dump_flags & TDF_DETAILS))
1198 fprintf (dump_file, "\n");
1207 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1209 node = bsi_stmt (si);
1210 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1214 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1215 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1217 fprintf (dump_file, "\nloop at %s:%d: ",
1218 EXPR_FILENAME (node), EXPR_LINENO (node));
1226 /* Function vect_get_ptr_offset
1228 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1231 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1232 tree vectype ATTRIBUTE_UNUSED,
1233 tree *offset ATTRIBUTE_UNUSED)
1235 /* TODO: Use alignment information. */
1240 /* Function vect_get_base_and_bit_offset
1242 Return the BASE of the data reference EXPR.
1243 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1244 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1245 bits of 'a.b[i] + 4B' from a.
1248 EXPR - the memory reference that is being analyzed
1249 DR - the data_reference struct of the _original_ memory reference
1250 (Note: DR_REF (DR) is not necessarily EXPR)
1251 VECTYPE - the type that defines the alignment (i.e, we compute
1252 alignment relative to TYPE_ALIGN(VECTYPE))
1255 BASE (returned value) - the base of the data reference EXPR.
1256 E.g, if EXPR is a.b[k].c[i][j] the returned
1258 OFFSET - offset of EXPR from BASE in bits
1259 BASE_ALIGNED_P - indicates if BASE is aligned
1261 If something unexpected is encountered (an unsupported form of data-ref),
1262 or if VECTYPE is given but OFFSET cannot be determined:
1263 then NULL_TREE is returned. */
1266 vect_get_base_and_bit_offset (struct data_reference *dr,
1269 loop_vec_info loop_vinfo,
1271 bool *base_aligned_p)
1273 tree this_offset = size_zero_node;
1274 tree base = NULL_TREE;
1276 tree oprnd0, oprnd1;
1277 struct data_reference *array_dr;
1278 enum tree_code code = TREE_CODE (expr);
1280 *base_aligned_p = false;
1284 /* These cases end the recursion: */
1286 *offset = size_zero_node;
1287 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1288 *base_aligned_p = true;
1295 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1298 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1300 base = vect_get_ptr_offset (expr, vectype, offset);
1302 *base_aligned_p = true;
1306 *base_aligned_p = true;
1307 *offset = size_zero_node;
1313 *offset = int_const_binop (MULT_EXPR, expr,
1314 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1317 /* These cases continue the recursion: */
1319 oprnd0 = TREE_OPERAND (expr, 0);
1320 oprnd1 = TREE_OPERAND (expr, 1);
1322 this_offset = bit_position (oprnd1);
1323 if (vectype && !host_integerp (this_offset, 1))
1329 oprnd0 = TREE_OPERAND (expr, 0);
1334 oprnd0 = TREE_OPERAND (expr, 0);
1339 if (DR_REF (dr) != expr)
1340 /* Build array data_reference struct if the existing DR_REF
1341 doesn't match EXPR. This happens, for example, when the
1342 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1343 contains information on the access of T, not of arr. In order
1344 to continue the analysis, we create a new DR struct that
1345 describes the access of arr.
1347 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1351 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1352 vectype, &this_offset);
1357 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1359 *offset = this_offset;
1360 *base_aligned_p = true;
1367 /* In case we have a PLUS_EXPR of the form
1368 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1369 This is verified in vect_get_symbl_and_dr. */
1370 oprnd0 = TREE_OPERAND (expr, 0);
1371 oprnd1 = TREE_OPERAND (expr, 1);
1373 base = vect_get_base_and_bit_offset
1374 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1375 if (vectype && !base)
1385 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1386 loop_vinfo, offset, base_aligned_p);
1388 if (vectype && base)
1390 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1391 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1394 if (vect_debug_details (NULL))
1396 print_generic_expr (dump_file, expr, TDF_SLIM);
1397 fprintf (dump_file, " --> total offset for ref: ");
1398 print_generic_expr (dump_file, *offset, TDF_SLIM);
1405 /* Function vect_force_dr_alignment_p.
1407 Returns whether the alignment of a DECL can be forced to be aligned
1408 on ALIGNMENT bit boundary. */
1411 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1413 if (TREE_CODE (decl) != VAR_DECL)
1416 if (DECL_EXTERNAL (decl))
1419 if (TREE_STATIC (decl))
1420 return (alignment <= MAX_OFILE_ALIGNMENT);
1422 /* This is not 100% correct. The absolute correct stack alignment
1423 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1424 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1425 However, until someone implements forced stack alignment, SSE
1426 isn't really usable without this. */
1427 return (alignment <= PREFERRED_STACK_BOUNDARY);
1431 /* Function vect_get_new_vect_var.
1433 Returns a name for a new variable. The current naming scheme appends the
1434 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1435 the name of vectorizer generated variables, and appends that to NAME if
1439 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1445 if (var_kind == vect_simple_var)
1450 prefix_len = strlen (prefix);
1453 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1455 new_vect_var = create_tmp_var (type, prefix);
1457 return new_vect_var;
1461 /* Function vect_create_index_for_vector_ref.
1463 Create (and return) an index variable, along with it's update chain in the
1464 loop. This variable will be used to access a memory location in a vector
1468 LOOP: The loop being vectorized.
1469 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1470 function can be added here, or in the loop pre-header.
1473 Return an index that will be used to index a vector array. It is expected
1474 that a pointer to the first vector will be used as the base address for the
1477 FORNOW: we are not trying to be efficient, just creating a new index each
1478 time from scratch. At this time all vector references could use the same
1481 TODO: create only one index to be used by all vector references. Record
1482 the index in the LOOP_VINFO the first time this procedure is called and
1483 return it on subsequent calls. The increment of this index must be placed
1484 just before the conditional expression that ends the single block loop. */
1487 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1490 tree indx_before_incr, indx_after_incr;
1492 /* It is assumed that the base pointer used for vectorized access contains
1493 the address of the first vector. Therefore the index used for vectorized
1494 access must be initialized to zero and incremented by 1. */
1496 init = integer_zero_node;
1497 step = integer_one_node;
1499 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1500 create_iv (init, step, NULL_TREE, loop, bsi, false,
1501 &indx_before_incr, &indx_after_incr);
1503 return indx_before_incr;
1507 /* Function vect_create_addr_base_for_vector_ref.
1509 Create an expression that computes the address of the first memory location
1510 that will be accessed for a data reference.
1513 STMT: The statement containing the data reference.
1514 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1515 OFFSET: Optional. If supplied, it is be added to the initial address.
1518 1. Return an SSA_NAME whose value is the address of the memory location of
1519 the first vector of the data reference.
1520 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1521 these statement(s) which define the returned SSA_NAME.
1523 FORNOW: We are only handling array accesses with step 1. */
1526 vect_create_addr_base_for_vector_ref (tree stmt,
1527 tree *new_stmt_list,
1530 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1531 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1532 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1533 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1534 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1535 tree ref = DR_REF (dr);
1536 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1537 tree scalar_type = TREE_TYPE (ref);
1538 tree scalar_ptr_type = build_pointer_type (scalar_type);
1540 tree init_val, step, init_oval;
1542 bool is_ptr_ref, is_array_ref, is_addr_expr;
1547 tree addr_base, addr_expr;
1548 tree dest, new_stmt;
1550 /* Only the access function of the last index is relevant (i_n in
1551 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1552 access_fn = DR_ACCESS_FN (dr, 0);
1553 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1556 init_oval = integer_zero_node;
1558 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1559 && TREE_CODE (data_ref_base) == SSA_NAME;
1560 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1561 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1562 || TREE_CODE (data_ref_base) == PLUS_EXPR
1563 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1564 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1566 /** Create: &(base[init_val])
1568 if data_ref_base is an ARRAY_TYPE:
1569 base = data_ref_base
1571 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1572 base = *((scalar_array *) data_ref_base)
1576 array_base = data_ref_base;
1577 else /* is_ptr_ref or is_addr_expr */
1579 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1580 tree scalar_array_type = build_array_type (scalar_type, 0);
1581 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1582 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1583 add_referenced_tmp_var (array_ptr);
1585 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1586 add_referenced_tmp_var (dest);
1588 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1589 append_to_statement_list_force (new_stmt, new_stmt_list);
1591 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1592 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1593 new_temp = make_ssa_name (array_ptr, vec_stmt);
1594 TREE_OPERAND (vec_stmt, 0) = new_temp;
1595 append_to_statement_list_force (vec_stmt, new_stmt_list);
1598 array_base = build_fold_indirect_ref (new_temp);
1601 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1602 add_referenced_tmp_var (dest);
1603 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1604 append_to_statement_list_force (new_stmt, new_stmt_list);
1608 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1609 add_referenced_tmp_var (tmp);
1610 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1611 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1612 init_val = make_ssa_name (tmp, vec_stmt);
1613 TREE_OPERAND (vec_stmt, 0) = init_val;
1614 append_to_statement_list_force (vec_stmt, new_stmt_list);
1617 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1618 NULL_TREE, NULL_TREE);
1619 addr_base = build_fold_addr_expr (array_ref);
1621 /* addr_expr = addr_base */
1622 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1623 get_name (base_name));
1624 add_referenced_tmp_var (addr_expr);
1625 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1626 new_temp = make_ssa_name (addr_expr, vec_stmt);
1627 TREE_OPERAND (vec_stmt, 0) = new_temp;
1628 append_to_statement_list_force (vec_stmt, new_stmt_list);
1634 /* Function get_vectype_for_scalar_type.
1636 Returns the vector type corresponding to SCALAR_TYPE as supported
1640 get_vectype_for_scalar_type (tree scalar_type)
1642 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1643 int nbytes = GET_MODE_SIZE (inner_mode);
1650 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1652 nunits = UNITS_PER_SIMD_WORD / nbytes;
1654 vectype = build_vector_type (scalar_type, nunits);
1655 if (vect_debug_details (NULL))
1657 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1658 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1664 if (vect_debug_details (NULL))
1666 fprintf (dump_file, "vectype: ");
1667 print_generic_expr (dump_file, vectype, TDF_SLIM);
1670 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1672 /* TODO: tree-complex.c sometimes can parallelize operations
1673 on generic vectors. We can vectorize the loop in that case,
1674 but then we should re-run the lowering pass. */
1675 if (vect_debug_details (NULL))
1676 fprintf (dump_file, "mode not supported by target.");
1684 /* Function vect_align_data_ref.
1686 Handle mislignment of a memory accesses.
1688 FORNOW: Can't handle misaligned accesses.
1689 Make sure that the dataref is aligned. */
1692 vect_align_data_ref (tree stmt)
1694 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1695 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1697 /* FORNOW: can't handle misaligned accesses;
1698 all accesses expected to be aligned. */
1699 gcc_assert (aligned_access_p (dr));
1703 /* Function vect_create_data_ref_ptr.
1705 Create a memory reference expression for vector access, to be used in a
1706 vector load/store stmt. The reference is based on a new pointer to vector
1710 1. STMT: a stmt that references memory. Expected to be of the form
1711 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1712 2. BSI: block_stmt_iterator where new stmts can be added.
1713 3. OFFSET (optional): an offset to be added to the initial address accessed
1714 by the data-ref in STMT.
1715 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1716 pointing to the initial address.
1719 1. Declare a new ptr to vector_type, and have it point to the base of the
1720 data reference (initial addressed accessed by the data reference).
1721 For example, for vector of type V8HI, the following code is generated:
1724 vp = (v8hi *)initial_address;
1726 if OFFSET is not supplied:
1727 initial_address = &a[init];
1728 if OFFSET is supplied:
1729 initial_address = &a[init + OFFSET];
1731 Return the initial_address in INITIAL_ADDRESS.
1733 2. Create a data-reference in the loop based on the new vector pointer vp,
1734 and using a new index variable 'idx' as follows:
1738 where if ONLY_INIT is true:
1741 update = idx + vector_type_size
1743 Return the pointer vp'.
1746 FORNOW: handle only aligned and consecutive accesses. */
1749 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1750 tree *initial_address, bool only_init)
1753 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1754 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1755 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1756 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1760 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1761 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1762 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1763 int nvuses, nv_may_defs, nv_must_defs;
1767 tree new_stmt_list = NULL_TREE;
1769 edge pe = loop_preheader_edge (loop);
1776 base_name = unshare_expr (DR_BASE_NAME (dr));
1777 if (vect_debug_details (NULL))
1779 tree data_ref_base = base_name;
1780 fprintf (dump_file, "create array_ref of type: ");
1781 print_generic_expr (dump_file, vectype, TDF_SLIM);
1782 if (TREE_CODE (data_ref_base) == VAR_DECL)
1783 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1784 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1785 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1786 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1787 fprintf (dump_file, "vectorizing a record based array ref: ");
1788 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1789 fprintf (dump_file, "vectorizing a pointer ref: ");
1790 print_generic_expr (dump_file, base_name, TDF_SLIM);
1793 /** (1) Create the new vector-pointer variable: **/
1795 vect_ptr_type = build_pointer_type (vectype);
1796 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1797 get_name (base_name));
1798 add_referenced_tmp_var (vect_ptr);
1801 /** (2) Handle aliasing information of the new vector-pointer: **/
1803 tag = STMT_VINFO_MEMTAG (stmt_info);
1805 get_var_ann (vect_ptr)->type_mem_tag = tag;
1807 /* Mark for renaming all aliased variables
1808 (i.e, the may-aliases of the type-mem-tag). */
1809 nvuses = NUM_VUSES (vuses);
1810 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1811 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1812 for (i = 0; i < nvuses; i++)
1814 tree use = VUSE_OP (vuses, i);
1815 if (TREE_CODE (use) == SSA_NAME)
1816 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1818 for (i = 0; i < nv_may_defs; i++)
1820 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1821 if (TREE_CODE (def) == SSA_NAME)
1822 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1824 for (i = 0; i < nv_must_defs; i++)
1826 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1827 if (TREE_CODE (def) == SSA_NAME)
1828 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1832 /** (3) Calculate the initial address the vector-pointer, and set
1833 the vector-pointer to point to it before the loop: **/
1835 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1836 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1838 pe = loop_preheader_edge (loop);
1839 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1840 gcc_assert (!new_bb);
1841 *initial_address = new_temp;
1843 /* Create: p = (vectype *) initial_base */
1844 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1845 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1846 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1847 TREE_OPERAND (vec_stmt, 0) = new_temp;
1848 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1849 gcc_assert (!new_bb);
1850 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1853 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1855 if (only_init) /* No update in loop is required. */
1856 return vect_ptr_init;
1858 idx = vect_create_index_for_vector_ref (loop, bsi);
1860 /* Create: update = idx * vectype_size */
1861 ptr_update = create_tmp_var (integer_type_node, "update");
1862 add_referenced_tmp_var (ptr_update);
1863 vectype_size = build_int_cst (integer_type_node,
1864 GET_MODE_SIZE (TYPE_MODE (vectype)));
1865 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1866 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1867 new_temp = make_ssa_name (ptr_update, vec_stmt);
1868 TREE_OPERAND (vec_stmt, 0) = new_temp;
1869 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1871 /* Create: data_ref_ptr = vect_ptr_init + update */
1872 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1873 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1874 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1875 TREE_OPERAND (vec_stmt, 0) = new_temp;
1876 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1877 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1879 return data_ref_ptr;
1883 /* Function vect_create_destination_var.
1885 Create a new temporary of type VECTYPE. */
1888 vect_create_destination_var (tree scalar_dest, tree vectype)
1891 const char *new_name;
1893 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1895 new_name = get_name (scalar_dest);
1898 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1899 add_referenced_tmp_var (vec_dest);
1905 /* Function vect_init_vector.
1907 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1908 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1909 used in the vectorization of STMT. */
1912 vect_init_vector (tree stmt, tree vector_var)
1914 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1915 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1918 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1924 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1925 add_referenced_tmp_var (new_var);
1927 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1928 new_temp = make_ssa_name (new_var, init_stmt);
1929 TREE_OPERAND (init_stmt, 0) = new_temp;
1931 pe = loop_preheader_edge (loop);
1932 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1933 gcc_assert (!new_bb);
1935 if (vect_debug_details (NULL))
1937 fprintf (dump_file, "created new init_stmt: ");
1938 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1941 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1946 /* Function vect_get_vec_def_for_operand.
1948 OP is an operand in STMT. This function returns a (vector) def that will be
1949 used in the vectorized stmt for STMT.
1951 In the case that OP is an SSA_NAME which is defined in the loop, then
1952 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1954 In case OP is an invariant or constant, a new stmt that creates a vector def
1955 needs to be introduced. */
1958 vect_get_vec_def_for_operand (tree op, tree stmt)
1963 stmt_vec_info def_stmt_info = NULL;
1964 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1965 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1966 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1967 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1974 if (vect_debug_details (NULL))
1976 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
1977 print_generic_expr (dump_file, op, TDF_SLIM);
1980 /** ===> Case 1: operand is a constant. **/
1982 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
1984 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1988 /* Build a tree with vector elements. */
1989 if (vect_debug_details (NULL))
1990 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
1992 for (i = nunits - 1; i >= 0; --i)
1994 t = tree_cons (NULL_TREE, op, t);
1996 vec_cst = build_vector (vectype, t);
1997 return vect_init_vector (stmt, vec_cst);
2000 gcc_assert (TREE_CODE (op) == SSA_NAME);
2002 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2004 def_stmt = SSA_NAME_DEF_STMT (op);
2005 def_stmt_info = vinfo_for_stmt (def_stmt);
2007 if (vect_debug_details (NULL))
2009 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2010 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2014 /** ==> Case 2.1: operand is defined inside the loop. **/
2018 /* Get the def from the vectorized stmt. */
2020 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2021 gcc_assert (vec_stmt);
2022 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2027 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2028 it is a reduction/induction. **/
2030 bb = bb_for_stmt (def_stmt);
2031 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2033 if (vect_debug_details (NULL))
2034 fprintf (dump_file, "reduction/induction - unsupported.");
2035 internal_error ("no support for reduction/induction"); /* FORNOW */
2039 /** ==> Case 2.3: operand is defined outside the loop -
2040 it is a loop invariant. */
2042 switch (TREE_CODE (def_stmt))
2045 def = PHI_RESULT (def_stmt);
2048 def = TREE_OPERAND (def_stmt, 0);
2051 def = TREE_OPERAND (def_stmt, 0);
2052 gcc_assert (IS_EMPTY_STMT (def_stmt));
2056 if (vect_debug_details (NULL))
2058 fprintf (dump_file, "unsupported defining stmt: ");
2059 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2061 internal_error ("unsupported defining stmt");
2064 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2066 if (vect_debug_details (NULL))
2067 fprintf (dump_file, "Create vector_inv.");
2069 for (i = nunits - 1; i >= 0; --i)
2071 t = tree_cons (NULL_TREE, def, t);
2074 vec_inv = build_constructor (vectype, t);
2075 return vect_init_vector (stmt, vec_inv);
2079 /* Function vect_finish_stmt_generation.
2081 Insert a new stmt. */
2084 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2086 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2088 if (vect_debug_details (NULL))
2090 fprintf (dump_file, "add new stmt: ");
2091 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2094 /* Make sure bsi points to the stmt that is being vectorized. */
2096 /* Assumption: any stmts created for the vectorization of stmt S were
2097 inserted before S. BSI is expected to point to S or some new stmt before S. */
2099 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2101 gcc_assert (stmt == bsi_stmt (*bsi));
2105 /* Function vectorizable_assignment.
2107 Check if STMT performs an assignment (copy) that can be vectorized.
2108 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2109 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2110 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2113 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2119 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2120 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2121 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2124 /* Is vectorizable assignment? */
2126 if (TREE_CODE (stmt) != MODIFY_EXPR)
2129 scalar_dest = TREE_OPERAND (stmt, 0);
2130 if (TREE_CODE (scalar_dest) != SSA_NAME)
2133 op = TREE_OPERAND (stmt, 1);
2134 if (!vect_is_simple_use (op, loop, NULL))
2136 if (vect_debug_details (NULL))
2137 fprintf (dump_file, "use not simple.");
2141 if (!vec_stmt) /* transformation not required. */
2143 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2148 if (vect_debug_details (NULL))
2149 fprintf (dump_file, "transform assignment.");
2152 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2155 op = TREE_OPERAND (stmt, 1);
2156 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2158 /* Arguments are ready. create the new vector stmt. */
2159 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2160 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2161 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2162 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2168 /* Function vectorizable_operation.
2170 Check if STMT performs a binary or unary operation that can be vectorized.
2171 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2172 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2173 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2176 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2181 tree op0, op1 = NULL;
2182 tree vec_oprnd0, vec_oprnd1=NULL;
2183 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2184 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2185 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2187 enum tree_code code;
2188 enum machine_mode vec_mode;
2194 /* Is STMT a vectorizable binary/unary operation? */
2195 if (TREE_CODE (stmt) != MODIFY_EXPR)
2198 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2201 operation = TREE_OPERAND (stmt, 1);
2202 code = TREE_CODE (operation);
2203 optab = optab_for_tree_code (code, vectype);
2205 /* Support only unary or binary operations. */
2206 op_type = TREE_CODE_LENGTH (code);
2207 if (op_type != unary_op && op_type != binary_op)
2209 if (vect_debug_details (NULL))
2210 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2214 for (i = 0; i < op_type; i++)
2216 op = TREE_OPERAND (operation, i);
2217 if (!vect_is_simple_use (op, loop, NULL))
2219 if (vect_debug_details (NULL))
2220 fprintf (dump_file, "use not simple.");
2225 /* Supportable by target? */
2228 if (vect_debug_details (NULL))
2229 fprintf (dump_file, "no optab.");
2232 vec_mode = TYPE_MODE (vectype);
2233 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2235 if (vect_debug_details (NULL))
2236 fprintf (dump_file, "op not supported by target.");
2240 if (!vec_stmt) /* transformation not required. */
2242 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2248 if (vect_debug_details (NULL))
2249 fprintf (dump_file, "transform binary/unary operation.");
2252 scalar_dest = TREE_OPERAND (stmt, 0);
2253 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2256 op0 = TREE_OPERAND (operation, 0);
2257 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2259 if (op_type == binary_op)
2261 op1 = TREE_OPERAND (operation, 1);
2262 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2265 /* Arguments are ready. create the new vector stmt. */
2267 if (op_type == binary_op)
2268 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2269 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2271 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2272 build1 (code, vectype, vec_oprnd0));
2273 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2274 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2275 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2281 /* Function vectorizable_store.
2283 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2285 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2286 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2287 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2290 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2296 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2297 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2298 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2299 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2300 enum machine_mode vec_mode;
2302 enum dr_alignment_support alignment_support_cheme;
2304 /* Is vectorizable store? */
2306 if (TREE_CODE (stmt) != MODIFY_EXPR)
2309 scalar_dest = TREE_OPERAND (stmt, 0);
2310 if (TREE_CODE (scalar_dest) != ARRAY_REF
2311 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2314 op = TREE_OPERAND (stmt, 1);
2315 if (!vect_is_simple_use (op, loop, NULL))
2317 if (vect_debug_details (NULL))
2318 fprintf (dump_file, "use not simple.");
2322 vec_mode = TYPE_MODE (vectype);
2323 /* FORNOW. In some cases can vectorize even if data-type not supported
2324 (e.g. - array initialization with 0). */
2325 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2328 if (!STMT_VINFO_DATA_REF (stmt_info))
2332 if (!vec_stmt) /* transformation not required. */
2334 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2340 if (vect_debug_details (NULL))
2341 fprintf (dump_file, "transform store");
2343 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2344 gcc_assert (alignment_support_cheme);
2345 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2347 /* Handle use - get the vectorized def from the defining stmt. */
2348 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2351 /* FORNOW: make sure the data reference is aligned. */
2352 vect_align_data_ref (stmt);
2353 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2354 data_ref = build_fold_indirect_ref (data_ref);
2356 /* Arguments are ready. create the new vector stmt. */
2357 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2358 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2364 /* vectorizable_load.
2366 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2368 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2369 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2370 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2373 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2376 tree vec_dest = NULL;
2377 tree data_ref = NULL;
2379 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2380 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2381 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2388 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2389 edge pe = loop_preheader_edge (loop);
2390 enum dr_alignment_support alignment_support_cheme;
2392 /* Is vectorizable load? */
2394 if (TREE_CODE (stmt) != MODIFY_EXPR)
2397 scalar_dest = TREE_OPERAND (stmt, 0);
2398 if (TREE_CODE (scalar_dest) != SSA_NAME)
2401 op = TREE_OPERAND (stmt, 1);
2402 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2405 if (!STMT_VINFO_DATA_REF (stmt_info))
2408 mode = (int) TYPE_MODE (vectype);
2410 /* FORNOW. In some cases can vectorize even if data-type not supported
2411 (e.g. - data copies). */
2412 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2414 if (vect_debug_details (loop))
2415 fprintf (dump_file, "Aligned load, but unsupported type.");
2419 if (!vec_stmt) /* transformation not required. */
2421 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2427 if (vect_debug_details (NULL))
2428 fprintf (dump_file, "transform load.");
2430 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2431 gcc_assert (alignment_support_cheme);
2433 if (alignment_support_cheme == dr_aligned
2434 || alignment_support_cheme == dr_unaligned_supported)
2445 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2446 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2447 if (aligned_access_p (dr))
2448 data_ref = build_fold_indirect_ref (data_ref);
2451 int mis = DR_MISALIGNMENT (dr);
2452 tree tmis = (mis == -1 ?
2454 build_int_cst (integer_type_node, mis));
2455 tmis = int_const_binop (MULT_EXPR, tmis,
2456 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2457 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2459 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2460 new_temp = make_ssa_name (vec_dest, new_stmt);
2461 TREE_OPERAND (new_stmt, 0) = new_temp;
2462 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2464 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2468 msq_init = *(floor(p1))
2469 p2 = initial_addr + VS - 1;
2470 magic = have_builtin ? builtin_result : initial_address;
2473 p2' = p2 + indx * vectype_size
2475 vec_dest = realign_load (msq, lsq, magic)
2489 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2490 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2491 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2493 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2494 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2495 new_temp = make_ssa_name (vec_dest, new_stmt);
2496 TREE_OPERAND (new_stmt, 0) = new_temp;
2497 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2498 gcc_assert (!new_bb);
2499 msq_init = TREE_OPERAND (new_stmt, 0);
2502 /* <2> Create lsq = *(floor(p2')) in the loop */
2503 offset = build_int_cst (integer_type_node,
2504 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2505 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2506 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2507 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2508 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2509 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2510 new_temp = make_ssa_name (vec_dest, new_stmt);
2511 TREE_OPERAND (new_stmt, 0) = new_temp;
2512 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2513 lsq = TREE_OPERAND (new_stmt, 0);
2517 if (targetm.vectorize.builtin_mask_for_load)
2519 /* Create permutation mask, if required, in loop preheader. */
2521 params = build_tree_list (NULL_TREE, init_addr);
2522 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2523 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2524 new_stmt = build_function_call_expr (builtin_decl, params);
2525 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2526 new_temp = make_ssa_name (vec_dest, new_stmt);
2527 TREE_OPERAND (new_stmt, 0) = new_temp;
2528 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2529 gcc_assert (!new_bb);
2530 magic = TREE_OPERAND (new_stmt, 0);
2534 /* Use current address instead of init_addr for reduced reg pressure.
2536 magic = dataref_ptr;
2540 /* <4> Create msq = phi <msq_init, lsq> in loop */
2541 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2542 msq = make_ssa_name (vec_dest, NULL_TREE);
2543 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2544 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2545 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2546 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2549 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2550 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2551 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2552 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2553 new_temp = make_ssa_name (vec_dest, new_stmt);
2554 TREE_OPERAND (new_stmt, 0) = new_temp;
2555 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2560 *vec_stmt = new_stmt;
2565 /* Function vect_supportable_dr_alignment
2567 Return whether the data reference DR is supported with respect to its
2570 static enum dr_alignment_support
2571 vect_supportable_dr_alignment (struct data_reference *dr)
2573 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2574 enum machine_mode mode = (int) TYPE_MODE (vectype);
2576 if (aligned_access_p (dr))
2579 /* Possibly unaligned access. */
2581 if (DR_IS_READ (dr))
2583 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2584 && (!targetm.vectorize.builtin_mask_for_load
2585 || targetm.vectorize.builtin_mask_for_load ()))
2586 return dr_unaligned_software_pipeline;
2588 if (targetm.vectorize.misaligned_mem_ok (mode))
2589 /* Can't software pipeline the loads. */
2590 return dr_unaligned_supported;
2594 return dr_unaligned_unsupported;
2598 /* Function vect_transform_stmt.
2600 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2603 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2605 bool is_store = false;
2606 tree vec_stmt = NULL_TREE;
2607 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2610 switch (STMT_VINFO_TYPE (stmt_info))
2612 case op_vec_info_type:
2613 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2617 case assignment_vec_info_type:
2618 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2622 case load_vec_info_type:
2623 done = vectorizable_load (stmt, bsi, &vec_stmt);
2627 case store_vec_info_type:
2628 done = vectorizable_store (stmt, bsi, &vec_stmt);
2633 if (vect_debug_details (NULL))
2634 fprintf (dump_file, "stmt not supported.");
2638 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2644 /* This function builds ni_name = number of iterations loop executes
2645 on the loop preheader. */
2648 vect_build_loop_niters (loop_vec_info loop_vinfo)
2650 tree ni_name, stmt, var;
2652 basic_block new_bb = NULL;
2653 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2654 tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2656 var = create_tmp_var (TREE_TYPE (ni), "niters");
2657 add_referenced_tmp_var (var);
2658 if (TREE_CODE (ni) == INTEGER_CST)
2660 /* This case is generated when treating a known loop bound
2661 indivisible by VF. Here we cannot use force_gimple_operand. */
2662 stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2663 ni_name = make_ssa_name (var, stmt);
2664 TREE_OPERAND (stmt, 0) = ni_name;
2667 ni_name = force_gimple_operand (ni, &stmt, false, var);
2669 pe = loop_preheader_edge (loop);
2671 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2673 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2679 /* This function generates the following statements:
2681 ni_name = number of iterations loop executes
2682 ratio = ni_name / vf
2683 ratio_mult_vf_name = ratio * vf
2685 and places them at the loop preheader edge. */
2688 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2689 tree *ratio_mult_vf_name_p, tree *ratio_p)
2696 tree ratio_mult_vf_name, ratio_mult_vf;
2697 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2698 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2702 /* Generate temporary variable that contains
2703 number of iterations loop executes. */
2705 ni_name = vect_build_loop_niters (loop_vinfo);
2708 vf is power of 2; then if ratio = = n >> log2 (vf). */
2709 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2710 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2712 /* Update initial conditions of loop copy. */
2714 /* ratio_mult_vf = ratio * vf;
2715 then if ratio_mult_vf = ratio << log2 (vf). */
2717 i = exact_log2 (vf);
2718 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2719 add_referenced_tmp_var (ratio_mult_vf);
2721 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2723 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2724 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2725 ratio, build_int_cst (unsigned_type_node,
2728 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2730 pe = loop_preheader_edge (loop);
2731 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2733 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2735 *ni_name_p = ni_name;
2736 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2743 /* This function generates stmt
2747 and attaches it to preheader of LOOP. */
2750 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2752 tree var, stmt, var_name;
2757 /* create temporary variable */
2758 var = create_tmp_var (TREE_TYPE (n), "bnd");
2759 add_referenced_tmp_var (var);
2761 var_name = make_ssa_name (var, NULL_TREE);
2763 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2765 i = exact_log2 (vf);
2766 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2767 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2768 n, build_int_cst (unsigned_type_node,i)));
2770 SSA_NAME_DEF_STMT (var_name) = stmt;
2772 pe = loop_preheader_edge (loop);
2773 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2775 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2777 if (vect_debug_details (NULL))
2778 fprintf (dump_file, "New bb on preheader edge was not generated.");
2784 /* Function vect_transform_loop_bound.
2786 Create a new exit condition for the loop. */
2789 vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2791 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2792 tree orig_cond_expr;
2793 HOST_WIDE_INT old_N = 0;
2795 tree new_loop_bound;
2799 symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2802 old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2804 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2806 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2807 #ifdef ENABLE_CHECKING
2808 gcc_assert (orig_cond_expr);
2811 /* new loop exit test: */
2812 lb_type = TREE_TYPE (TREE_OPERAND (COND_EXPR_COND (orig_cond_expr), 1));
2815 fold_convert (lb_type, build_int_cst (unsigned_type_node, old_N/vf));
2817 new_loop_bound = niters;
2819 slpeel_make_loop_iterate_ntimes (loop, new_loop_bound);
2823 /* Function vect_update_ivs_after_vectorizer.
2825 "Advance" the induction variables of LOOP to the value they should take
2826 after the execution of LOOP. This is currently necessary because the
2827 vectorizer does not handle induction variables that are used after the
2828 loop. Such a situation occurs when the last iterations of LOOP are
2830 1. We introduced new uses after LOOP for IVs that were not originally used
2831 after LOOP: the IVs of LOOP are now used by an epilog loop.
2832 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2833 times, whereas the loop IVs should be bumped N times.
2836 - LOOP - a loop that is going to be vectorized. The last few iterations
2837 of LOOP were peeled.
2838 - NITERS - the number of iterations that LOOP executes (before it is
2839 vectorized). i.e, the number of times the ivs should be bumped.
2844 if (guard-cond) GOTO bb_before_epilog_loop
2851 bb_before_epilog_loop:
2853 bb_before_epilog_loop has edges coming in form the loop exit and
2854 from bb_before_loop. New definitions for ivs will be placed on the edge
2855 from loop->exit to bb_before_epilog_loop. This also requires that we update
2856 the phis in bb_before_epilog_loop. (In the code this bb is denoted
2859 Assumption 1: Like the rest of the vectorizer, this function assumes
2860 a single loop exit that has a single predecessor.
2862 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2863 organized in the same order.
2865 Assumption 3: The access function of the ivs is simple enough (see
2866 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2870 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
2872 edge exit = loop->exit_edges[0];
2874 basic_block update_bb = exit->dest;
2877 /* Generate basic block at the exit from the loop. */
2878 basic_block new_bb = split_edge (exit);
2880 add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
2881 loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
2882 update_e = EDGE_SUCC (new_bb, 0);
2884 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2886 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2888 tree access_fn = NULL;
2889 tree evolution_part;
2892 tree var, stmt, ni, ni_name;
2893 block_stmt_iterator last_bsi;
2895 /* Skip virtual phi's. The data dependences that are associated with
2896 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2898 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2900 if (vect_debug_details (NULL))
2901 fprintf (dump_file, "virtual phi. skip.");
2905 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2906 gcc_assert (access_fn);
2908 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2910 /* FORNOW: We do not transform initial conditions of IVs
2911 which evolution functions are a polynomial of degree >= 2 or
2913 gcc_assert (!tree_is_chrec (evolution_part));
2915 step_expr = evolution_part;
2916 init_expr = unshare_expr (initial_condition (access_fn));
2918 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2919 build2 (MULT_EXPR, TREE_TYPE (niters),
2920 niters, step_expr), init_expr);
2922 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2923 add_referenced_tmp_var (var);
2925 ni_name = force_gimple_operand (ni, &stmt, false, var);
2927 /* Insert stmt into new_bb. */
2928 last_bsi = bsi_last (new_bb);
2930 bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);
2932 /* Fix phi expressions in duplicated loop. */
2933 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
2934 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
2935 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
2940 /* This function is the main driver of transformation
2941 to be done for loop before vectorizing it in case of
2942 unknown loop bound. */
2945 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
2946 struct loops *loops)
2949 tree ni_name, ratio_mult_vf_name;
2950 #ifdef ENABLE_CHECKING
2953 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2954 struct loop *new_loop;
2956 if (vect_debug_details (NULL))
2957 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
2959 /* Generate the following variables on the preheader of original loop:
2961 ni_name = number of iteration the original loop executes
2962 ratio = ni_name / vf
2963 ratio_mult_vf_name = ratio * vf */
2964 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2965 &ratio_mult_vf_name, ratio);
2967 /* Update loop info. */
2968 loop->pre_header = loop_preheader_edge (loop)->src;
2969 loop->pre_header_edges[0] = loop_preheader_edge (loop);
2971 #ifdef ENABLE_CHECKING
2972 loop_num = loop->num;
2974 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
2975 ratio_mult_vf_name, ni_name, true);
2976 #ifdef ENABLE_CHECKING
2977 gcc_assert (new_loop);
2978 gcc_assert (loop_num == loop->num);
2981 /* Update IVs of original loop as if they were advanced
2982 by ratio_mult_vf_name steps. */
2984 #ifdef ENABLE_CHECKING
2985 /* Check existence of intermediate bb. */
2986 gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
2988 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name);
2995 /* Function vect_gen_niters_for_prolog_loop
2997 Set the number of iterations for the loop represented by LOOP_VINFO
2998 to the minimum between NITERS (the original iteration count of the loop)
2999 and the misalignment of DR - the first data reference recorded in
3000 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3001 this loop, the data reference DR will refer to an aligned location. */
3004 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
3006 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3007 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3008 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3010 tree iters, iters_name;
3013 tree dr_stmt = DR_STMT (dr);
3014 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3015 tree start_addr, byte_miss_align, elem_miss_align;
3016 int vec_type_align =
3017 GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3020 tree new_stmt_list = NULL_TREE;
3022 start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3023 &new_stmt_list, NULL_TREE);
3025 pe = loop_preheader_edge (loop);
3026 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3028 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3031 build (BIT_AND_EXPR, integer_type_node, start_addr,
3032 build (MINUS_EXPR, integer_type_node,
3033 build_int_cst (unsigned_type_node,
3034 vec_type_align), integer_one_node));
3035 tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3036 elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3037 byte_miss_align, tmp1);
3040 build (BIT_AND_EXPR, integer_type_node,
3041 build (MINUS_EXPR, integer_type_node,
3042 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3043 build (MINUS_EXPR, integer_type_node,
3044 build_int_cst (unsigned_type_node, vf), integer_one_node));
3046 iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3047 var = create_tmp_var (TREE_TYPE (iters), "iters");
3048 add_referenced_tmp_var (var);
3049 iters_name = force_gimple_operand (iters, &stmt, false, var);
3051 /* Insert stmt on loop preheader edge. */
3052 pe = loop_preheader_edge (loop);
3054 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3056 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3062 /* Function vect_update_inits_of_dr
3064 NITERS iterations were peeled from LOOP. DR represents a data reference
3065 in LOOP. This function updates the information recorded in DR to
3066 account for the fact that the first NITERS iterations had already been
3067 executed. Specifically, it updates the initial_condition of the
3068 access_function of DR. */
3071 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3074 tree access_fn = DR_ACCESS_FN (dr, 0);
3075 tree init, init_new, step;
3077 step = evolution_part_in_loop_num (access_fn, loop->num);
3078 init = initial_condition (access_fn);
3080 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3081 build (MULT_EXPR, TREE_TYPE (niters),
3082 niters, step), init);
3083 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3089 /* Function vect_update_inits_of_drs
3091 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3092 This function updates the information recorded for the data references in
3093 the loop to account for the fact that the first NITERS iterations had
3094 already been executed. Specifically, it updates the initial_condition of the
3095 access_function of all the data_references in the loop. */
3098 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3101 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3102 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3103 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3105 if (dump_file && (dump_flags & TDF_DETAILS))
3106 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3108 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3110 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3111 vect_update_inits_of_dr (dr, loop, niters);
3114 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3116 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3117 vect_update_inits_of_dr (dr, loop, niters);
3122 /* Function vect_do_peeling_for_alignment
3124 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3125 'niters' is set to the misalignment of one of the data references in the
3126 loop, thereby forcing it to refer to an aligned location at the beginning
3127 of the execution of this loop. The data reference for which we are
3128 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3131 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3133 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3134 tree niters_of_prolog_loop, ni_name;
3137 if (vect_debug_details (NULL))
3138 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3140 ni_name = vect_build_loop_niters (loop_vinfo);
3141 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3144 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3145 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge(loop),
3146 niters_of_prolog_loop, ni_name, false);
3148 /* Update number of times loop executes. */
3149 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3150 LOOP_VINFO_NITERS (loop_vinfo) =
3151 build (MINUS_EXPR, integer_type_node, n_iters, niters_of_prolog_loop);
3153 /* Update all inits of access functions of all data refs. */
3154 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3156 /* After peeling we have to reset scalar evolution analyzer. */
3163 /* Function vect_transform_loop.
3165 The analysis phase has determined that the loop is vectorizable.
3166 Vectorize the loop - created vectorized stmts to replace the scalar
3167 stmts in the loop, and update the loop exit condition. */
3170 vect_transform_loop (loop_vec_info loop_vinfo,
3171 struct loops *loops ATTRIBUTE_UNUSED)
3173 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3174 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3175 int nbbs = loop->num_nodes;
3176 block_stmt_iterator si;
3179 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3181 if (vect_debug_details (NULL))
3182 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3185 /* Peel the loop if there are data refs with unknown alignment.
3186 Only one data ref with unknown store is allowed. */
3188 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3189 vect_do_peeling_for_alignment (loop_vinfo, loops);
3191 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3192 compile time constant), or it is a constant that doesn't divide by the
3193 vectorization factor, then an epilog loop needs to be created.
3194 We therefore duplicate the loop: the original loop will be vectorized,
3195 and will compute the first (n/VF) iterations. The second copy of the loop
3196 will remain scalar and will compute the remaining (n%VF) iterations.
3197 (VF is the vectorization factor). */
3199 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3200 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3201 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3202 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3204 /* 1) Make sure the loop header has exactly two entries
3205 2) Make sure we have a preheader basic block. */
3207 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3209 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3212 /* FORNOW: the vectorizer supports only loops which body consist
3213 of one basic block (header + empty latch). When the vectorizer will
3214 support more involved loop forms, the order by which the BBs are
3215 traversed need to be reconsidered. */
3217 for (i = 0; i < nbbs; i++)
3219 basic_block bb = bbs[i];
3221 for (si = bsi_start (bb); !bsi_end_p (si);)
3223 tree stmt = bsi_stmt (si);
3224 stmt_vec_info stmt_info;
3227 if (vect_debug_details (NULL))
3229 fprintf (dump_file, "------>vectorizing statement: ");
3230 print_generic_expr (dump_file, stmt, TDF_SLIM);
3232 stmt_info = vinfo_for_stmt (stmt);
3233 gcc_assert (stmt_info);
3234 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3239 #ifdef ENABLE_CHECKING
3240 /* FORNOW: Verify that all stmts operate on the same number of
3241 units and no inner unrolling is necessary. */
3243 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3244 == vectorization_factor);
3246 /* -------- vectorize statement ------------ */
3247 if (vect_debug_details (NULL))
3248 fprintf (dump_file, "transform statement.");
3250 is_store = vect_transform_stmt (stmt, &si);
3253 /* free the attached stmt_vec_info and remove the stmt. */
3254 stmt_ann_t ann = stmt_ann (stmt);
3256 set_stmt_info (ann, NULL);
3265 vect_transform_loop_bound (loop_vinfo, ratio);
3267 if (vect_debug_details (loop))
3268 fprintf (dump_file,"Success! loop vectorized.");
3269 if (vect_debug_stats (loop))
3270 fprintf (dump_file, "LOOP VECTORIZED.");
3274 /* Function vect_is_simple_use.
3277 LOOP - the loop that is being vectorized.
3278 OPERAND - operand of a stmt in LOOP.
3279 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3281 Returns whether a stmt with OPERAND can be vectorized.
3282 Supportable operands are constants, loop invariants, and operands that are
3283 defined by the current iteration of the loop. Unsupportable operands are
3284 those that are defined by a previous iteration of the loop (as is the case
3285 in reduction/induction computations). */
3288 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3296 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3299 if (TREE_CODE (operand) != SSA_NAME)
3302 def_stmt = SSA_NAME_DEF_STMT (operand);
3303 if (def_stmt == NULL_TREE )
3305 if (vect_debug_details (NULL))
3306 fprintf (dump_file, "no def_stmt.");
3310 /* empty stmt is expected only in case of a function argument.
3311 (Otherwise - we expect a phi_node or a modify_expr). */
3312 if (IS_EMPTY_STMT (def_stmt))
3314 tree arg = TREE_OPERAND (def_stmt, 0);
3315 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3317 if (vect_debug_details (NULL))
3319 fprintf (dump_file, "Unexpected empty stmt: ");
3320 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3325 /* phi_node inside the loop indicates an induction/reduction pattern.
3326 This is not supported yet. */
3327 bb = bb_for_stmt (def_stmt);
3328 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3330 if (vect_debug_details (NULL))
3331 fprintf (dump_file, "reduction/induction - unsupported.");
3332 return false; /* FORNOW: not supported yet. */
3335 /* Expecting a modify_expr or a phi_node. */
3336 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3337 || TREE_CODE (def_stmt) == PHI_NODE)
3348 /* Function vect_analyze_operations.
3350 Scan the loop stmts and make sure they are all vectorizable. */
3353 vect_analyze_operations (loop_vec_info loop_vinfo)
3355 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3356 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3357 int nbbs = loop->num_nodes;
3358 block_stmt_iterator si;
3359 int vectorization_factor = 0;
3364 if (vect_debug_details (NULL))
3365 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3367 for (i = 0; i < nbbs; i++)
3369 basic_block bb = bbs[i];
3371 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3373 tree stmt = bsi_stmt (si);
3375 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3378 if (vect_debug_details (NULL))
3380 fprintf (dump_file, "==> examining statement: ");
3381 print_generic_expr (dump_file, stmt, TDF_SLIM);
3384 gcc_assert (stmt_info);
3386 /* skip stmts which do not need to be vectorized.
3387 this is expected to include:
3388 - the COND_EXPR which is the loop exit condition
3389 - any LABEL_EXPRs in the loop
3390 - computations that are used only for array indexing or loop
3393 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3395 if (vect_debug_details (NULL))
3396 fprintf (dump_file, "irrelevant.");
3400 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3402 if (vect_debug_stats (loop) || vect_debug_details (loop))
3404 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3405 print_generic_expr (dump_file, stmt, TDF_SLIM);
3410 if (STMT_VINFO_DATA_REF (stmt_info))
3411 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3412 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3413 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3415 scalar_type = TREE_TYPE (stmt);
3417 if (vect_debug_details (NULL))
3419 fprintf (dump_file, "get vectype for scalar type: ");
3420 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3423 vectype = get_vectype_for_scalar_type (scalar_type);
3426 if (vect_debug_stats (loop) || vect_debug_details (loop))
3428 fprintf (dump_file, "not vectorized: unsupported data-type ");
3429 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3434 if (vect_debug_details (NULL))
3436 fprintf (dump_file, "vectype: ");
3437 print_generic_expr (dump_file, vectype, TDF_SLIM);
3439 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3441 ok = (vectorizable_operation (stmt, NULL, NULL)
3442 || vectorizable_assignment (stmt, NULL, NULL)
3443 || vectorizable_load (stmt, NULL, NULL)
3444 || vectorizable_store (stmt, NULL, NULL));
3448 if (vect_debug_stats (loop) || vect_debug_details (loop))
3450 fprintf (dump_file, "not vectorized: stmt not supported: ");
3451 print_generic_expr (dump_file, stmt, TDF_SLIM);
3456 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3457 if (vect_debug_details (NULL))
3458 fprintf (dump_file, "nunits = %d", nunits);
3460 if (vectorization_factor)
3462 /* FORNOW: don't allow mixed units.
3463 This restriction will be relaxed in the future. */
3464 if (nunits != vectorization_factor)
3466 if (vect_debug_stats (loop) || vect_debug_details (loop))
3467 fprintf (dump_file, "not vectorized: mixed data-types");
3472 vectorization_factor = nunits;
3474 #ifdef ENABLE_CHECKING
3475 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3476 * vectorization_factor == UNITS_PER_SIMD_WORD);
3481 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3483 if (vectorization_factor <= 1)
3485 if (vect_debug_stats (loop) || vect_debug_details (loop))
3486 fprintf (dump_file, "not vectorized: unsupported data-type");
3489 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3491 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3493 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3494 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3496 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3497 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3499 if (vect_debug_stats (loop) || vect_debug_details (loop))
3500 fprintf (dump_file, "epilog loop required.");
3501 if (!vect_can_advance_ivs_p (loop))
3503 if (vect_debug_stats (loop) || vect_debug_details (loop))
3504 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3507 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3509 if (vect_debug_stats (loop) || vect_debug_details (loop))
3510 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3519 /* Function exist_non_indexing_operands_for_use_p
3521 USE is one of the uses attached to STMT. Check if USE is
3522 used in STMT for anything other than indexing an array. */
3525 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3528 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3530 /* USE corresponds to some operand in STMT. If there is no data
3531 reference in STMT, then any operand that corresponds to USE
3532 is not indexing an array. */
3533 if (!STMT_VINFO_DATA_REF (stmt_info))
3536 /* STMT has a data_ref. FORNOW this means that its of one of
3537 the following forms:
3540 (This should have been verified in analyze_data_refs).
3542 'var' in the second case corresponds to a def, not a use,
3543 so USE cannot correspond to any operands that are not used
3546 Therefore, all we need to check is if STMT falls into the
3547 first case, and whether var corresponds to USE. */
3549 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3552 operand = TREE_OPERAND (stmt, 1);
3554 if (TREE_CODE (operand) != SSA_NAME)
3564 /* Function vect_is_simple_iv_evolution.
3566 FORNOW: A simple evolution of an induction variables in the loop is
3567 considered a polynomial evolution with constant step. */
3570 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3571 tree * step, bool strict)
3576 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3578 /* When there is no evolution in this loop, the evolution function
3580 if (evolution_part == NULL_TREE)
3583 /* When the evolution is a polynomial of degree >= 2
3584 the evolution function is not "simple". */
3585 if (tree_is_chrec (evolution_part))
3588 step_expr = evolution_part;
3589 init_expr = unshare_expr (initial_condition (access_fn));
3591 if (vect_debug_details (NULL))
3593 fprintf (dump_file, "step: ");
3594 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3595 fprintf (dump_file, ", init: ");
3596 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3602 if (TREE_CODE (step_expr) != INTEGER_CST)
3604 if (vect_debug_details (NULL))
3605 fprintf (dump_file, "step unknown.");
3610 if (!integer_onep (step_expr))
3612 if (vect_debug_details (NULL))
3613 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3621 /* Function vect_analyze_scalar_cycles.
3623 Examine the cross iteration def-use cycles of scalar variables, by
3624 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3625 cycles that they represent do not impede vectorization.
3627 FORNOW: Reduction as in the following loop, is not supported yet:
3631 The cross-iteration cycle corresponding to variable 'sum' will be
3632 considered too complicated and will impede vectorization.
3634 FORNOW: Induction as in the following loop, is not supported yet:
3639 However, the following loop *is* vectorizable:
3644 In both loops there exists a def-use cycle for the variable i:
3645 loop: i_2 = PHI (i_0, i_1)
3650 The evolution of the above cycle is considered simple enough,
3651 however, we also check that the cycle does not need to be
3652 vectorized, i.e - we check that the variable that this cycle
3653 defines is only used for array indexing or in stmts that do not
3654 need to be vectorized. This is not the case in loop2, but it
3655 *is* the case in loop3. */
3658 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3661 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3662 basic_block bb = loop->header;
3665 if (vect_debug_details (NULL))
3666 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3668 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3670 tree access_fn = NULL;
3672 if (vect_debug_details (NULL))
3674 fprintf (dump_file, "Analyze phi: ");
3675 print_generic_expr (dump_file, phi, TDF_SLIM);
3678 /* Skip virtual phi's. The data dependences that are associated with
3679 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3681 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3683 if (vect_debug_details (NULL))
3684 fprintf (dump_file, "virtual phi. skip.");
3688 /* Analyze the evolution function. */
3690 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3691 those of loop induction variables; This property is verified here.
3693 Furthermore, if that induction variable is used in an operation
3694 that needs to be vectorized (i.e, is not solely used to index
3695 arrays and check the exit condition) - we do not support its
3696 vectorization yet. This property is verified in vect_is_simple_use,
3697 during vect_analyze_operations. */
3699 access_fn = /* instantiate_parameters
3701 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3705 if (vect_debug_stats (loop) || vect_debug_details (loop))
3706 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3710 if (vect_debug_details (NULL))
3712 fprintf (dump_file, "Access function of PHI: ");
3713 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3716 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3719 if (vect_debug_stats (loop) || vect_debug_details (loop))
3720 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3729 /* Function vect_analyze_data_ref_dependence.
3731 Return TRUE if there (might) exist a dependence between a memory-reference
3732 DRA and a memory-reference DRB. */
3735 vect_analyze_data_ref_dependence (struct data_reference *dra,
3736 struct data_reference *drb,
3740 struct data_dependence_relation *ddr;
3742 if (!array_base_name_differ_p (dra, drb, &differ_p))
3744 if (vect_debug_stats (loop) || vect_debug_details (loop))
3747 "not vectorized: can't determine dependence between: ");
3748 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3749 fprintf (dump_file, " and ");
3750 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3758 ddr = initialize_data_dependence_relation (dra, drb);
3759 compute_affine_dependence (ddr);
3761 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3764 if (vect_debug_stats (loop) || vect_debug_details (loop))
3767 "not vectorized: possible dependence between data-refs ");
3768 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3769 fprintf (dump_file, " and ");
3770 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3777 /* Function vect_analyze_data_ref_dependences.
3779 Examine all the data references in the loop, and make sure there do not
3780 exist any data dependences between them.
3782 TODO: dependences which distance is greater than the vectorization factor
3786 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3789 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3790 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3791 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3793 /* Examine store-store (output) dependences. */
3795 if (vect_debug_details (NULL))
3796 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3798 if (vect_debug_details (NULL))
3799 fprintf (dump_file, "compare all store-store pairs.");
3801 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3803 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3805 struct data_reference *dra =
3806 VARRAY_GENERIC_PTR (loop_write_refs, i);
3807 struct data_reference *drb =
3808 VARRAY_GENERIC_PTR (loop_write_refs, j);
3809 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3814 /* Examine load-store (true/anti) dependences. */
3816 if (vect_debug_details (NULL))
3817 fprintf (dump_file, "compare all load-store pairs.");
3819 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3821 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3823 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3824 struct data_reference *drb =
3825 VARRAY_GENERIC_PTR (loop_write_refs, j);
3826 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3835 /* Function vect_get_first_index.
3837 REF is a data reference.
3838 If it is an ARRAY_REF: if its lower bound is simple enough,
3839 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3840 If it is not an ARRAY_REF: REF has no "first index";
3841 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3844 vect_get_first_index (tree ref, tree *array_first_index)
3848 if (TREE_CODE (ref) != ARRAY_REF)
3849 *array_first_index = size_zero_node;
3852 array_start = array_ref_low_bound (ref);
3853 if (!host_integerp (array_start,0))
3855 if (vect_debug_details (NULL))
3857 fprintf (dump_file, "array min val not simple integer cst.");
3858 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3862 *array_first_index = array_start;
3869 /* Function vect_compute_array_base_alignment.
3870 A utility function of vect_compute_array_ref_alignment.
3872 Compute the misalignment of ARRAY in bits.
3875 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3876 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3877 if NULL: don't compute misalignment, just return the base of ARRAY.
3878 PREV_DIMENSIONS - initialized to one.
3879 MISALIGNMENT - the computed misalignment in bits.
3882 If VECTYPE is not NULL:
3883 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3884 the base of the array, and put the computed misalignment in MISALIGNMENT.
3886 Return the base of the array.
3888 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3889 a[idx_N]...[idx_2][idx_1] is
3890 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3891 ... + idx_N * dim_0 * ... * dim_N-1}.
3892 (The misalignment of &a is not checked here).
3893 Note, that every term contains dim_0, therefore, if dim_0 is a
3894 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3895 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3896 NUINTS, we can say that the misalignment of the sum is equal to
3897 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3898 we can't determine this array misalignment, and we return
3900 We proceed recursively in this manner, accumulating total misalignment
3901 and the multiplication of previous dimensions for correct misalignment
3905 vect_compute_array_base_alignment (tree array,
3907 tree *prev_dimensions,
3912 tree dimension_size;
3914 tree bits_per_vectype;
3915 tree bits_per_vectype_unit;
3917 /* The 'stop condition' of the recursion. */
3918 if (TREE_CODE (array) != ARRAY_REF)
3922 /* Just get the base decl. */
3923 return vect_compute_array_base_alignment
3924 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3926 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
3927 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
3930 domain = TYPE_DOMAIN (TREE_TYPE (array));
3932 int_const_binop (PLUS_EXPR,
3933 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
3934 TYPE_MIN_VALUE (domain), 1),
3937 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
3938 is a multiple of NUNITS:
3940 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
3942 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
3943 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
3944 if (integer_zerop (mis))
3945 /* This array is aligned. Continue just in order to get the base decl. */
3946 return vect_compute_array_base_alignment
3947 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3949 index = TREE_OPERAND (array, 1);
3950 if (!host_integerp (index, 1))
3951 /* The current index is not constant. */
3954 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
3956 bits_per_vectype = fold_convert (unsigned_type_node,
3957 build_int_cst (NULL_TREE, BITS_PER_UNIT *
3958 GET_MODE_SIZE (TYPE_MODE (vectype))));
3959 bits_per_vectype_unit = fold_convert (unsigned_type_node,
3960 build_int_cst (NULL_TREE, BITS_PER_UNIT *
3961 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
3963 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
3967 (*misalignment + index_val * dimension_size * *prev_dimensions)
3971 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
3972 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
3973 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
3974 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
3975 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
3978 *prev_dimensions = int_const_binop (MULT_EXPR,
3979 *prev_dimensions, dimension_size, 1);
3981 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
3987 /* Function vect_compute_data_ref_alignment
3989 Compute the misalignment of the data reference DR.
3992 1. If during the misalignment computation it is found that the data reference
3993 cannot be vectorized then false is returned.
3994 2. DR_MISALIGNMENT (DR) is defined.
3996 FOR NOW: No analysis is actually performed. Misalignment is calculated
3997 only for trivial cases. TODO. */
4000 vect_compute_data_ref_alignment (struct data_reference *dr,
4001 loop_vec_info loop_vinfo)
4003 tree stmt = DR_STMT (dr);
4004 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4005 tree ref = DR_REF (dr);
4008 tree offset = size_zero_node;
4009 tree base, bit_offset, alignment;
4010 tree unit_bits = fold_convert (unsigned_type_node,
4011 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4013 bool base_aligned_p;
4015 if (vect_debug_details (NULL))
4016 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4018 /* Initialize misalignment to unknown. */
4019 DR_MISALIGNMENT (dr) = -1;
4021 scalar_type = TREE_TYPE (ref);
4022 vectype = get_vectype_for_scalar_type (scalar_type);
4025 if (vect_debug_details (NULL))
4027 fprintf (dump_file, "no vectype for stmt: ");
4028 print_generic_expr (dump_file, stmt, TDF_SLIM);
4029 fprintf (dump_file, " scalar_type: ");
4030 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4032 /* It is not possible to vectorize this data reference. */
4035 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4036 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4038 if (TREE_CODE (ref) == ARRAY_REF)
4041 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4043 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4044 loop_vinfo, &bit_offset, &base_aligned_p);
4047 if (vect_debug_details (NULL))
4049 fprintf (dump_file, "Unknown alignment for access: ");
4050 print_generic_expr (dump_file,
4051 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4056 if (!base_aligned_p)
4058 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4060 if (vect_debug_details (NULL))
4062 fprintf (dump_file, "can't force alignment of ref: ");
4063 print_generic_expr (dump_file, ref, TDF_SLIM);
4068 /* Force the alignment of the decl.
4069 NOTE: This is the only change to the code we make during
4070 the analysis phase, before deciding to vectorize the loop. */
4071 if (vect_debug_details (NULL))
4072 fprintf (dump_file, "force alignment");
4073 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4074 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4077 /* At this point we assume that the base is aligned, and the offset from it
4078 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4079 gcc_assert (base_aligned_p
4080 || (TREE_CODE (base) == VAR_DECL
4081 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4083 /* Convert into bytes. */
4084 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4085 /* Check that there is no remainder in bits. */
4086 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4087 if (!integer_zerop (bit_offset))
4089 if (vect_debug_details (NULL))
4091 fprintf (dump_file, "bit offset alignment: ");
4092 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4097 /* Alignment required, in bytes: */
4098 alignment = fold_convert (unsigned_type_node,
4099 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4101 /* Modulo alignment. */
4102 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4103 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4105 if (vect_debug_details (NULL))
4106 fprintf (dump_file, "unexpected misalign value");
4110 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4112 if (vect_debug_details (NULL))
4113 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4119 /* Function vect_compute_array_ref_alignment
4121 Compute the alignment of an array-ref.
4122 The alignment we compute here is relative to
4123 TYPE_ALIGN(VECTYPE) boundary.
4126 OFFSET - the alignment in bits
4127 Return value - the base of the array-ref. E.g,
4128 if the array-ref is a.b[k].c[i][j] the returned
4133 vect_compute_array_ref_alignment (struct data_reference *dr,
4134 loop_vec_info loop_vinfo,
4138 tree array_first_index = size_zero_node;
4140 tree ref = DR_REF (dr);
4141 tree scalar_type = TREE_TYPE (ref);
4142 tree oprnd0 = TREE_OPERAND (ref, 0);
4143 tree dims = size_one_node;
4144 tree misalign = size_zero_node;
4145 tree next_ref, this_offset = size_zero_node;
4149 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4150 /* The reference is an array without its last index. */
4151 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4154 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4157 /* Alignment is not requested. Just return the base. */
4160 /* Compute alignment. */
4161 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4163 this_offset = misalign;
4165 /* Check the first index accessed. */
4166 if (!vect_get_first_index (ref, &array_first_index))
4168 if (vect_debug_details (NULL))
4169 fprintf (dump_file, "no first_index for array.");
4173 /* Check the index of the array_ref. */
4174 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4175 LOOP_VINFO_LOOP (loop_vinfo)->num);
4177 /* FORNOW: In order to simplify the handling of alignment, we make sure
4178 that the first location at which the array is accessed ('init') is on an
4179 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4180 This is too conservative, since we require that
4181 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4182 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4183 This should be relaxed in the future. */
4185 if (!init || !host_integerp (init, 0))
4187 if (vect_debug_details (NULL))
4188 fprintf (dump_file, "non constant init. ");
4192 /* bytes per scalar element: */
4193 nunits = fold_convert (unsigned_type_node,
4194 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4195 nbits = int_const_binop (MULT_EXPR, nunits,
4196 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4198 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4199 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4200 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4201 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4203 /* TODO: allow negative misalign values. */
4204 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4206 if (vect_debug_details (NULL))
4207 fprintf (dump_file, "unexpected misalign value");
4215 /* Function vect_compute_data_refs_alignment
4217 Compute the misalignment of data references in the loop.
4218 This pass may take place at function granularity instead of at loop
4221 FOR NOW: No analysis is actually performed. Misalignment is calculated
4222 only for trivial cases. TODO. */
4225 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4227 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4228 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4231 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4233 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4234 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4238 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4240 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4241 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4249 /* Function vect_enhance_data_refs_alignment
4251 This pass will use loop versioning and loop peeling in order to enhance
4252 the alignment of data references in the loop.
4254 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4255 original loop is to be vectorized; Any other loops that are created by
4256 the transformations performed in this pass - are not supposed to be
4257 vectorized. This restriction will be relaxed. */
4260 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4262 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4263 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4264 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4268 This pass will require a cost model to guide it whether to apply peeling
4269 or versioning or a combination of the two. For example, the scheme that
4270 intel uses when given a loop with several memory accesses, is as follows:
4271 choose one memory access ('p') which alignment you want to force by doing
4272 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4273 other accesses are not necessarily aligned, or (2) use loop versioning to
4274 generate one loop in which all accesses are aligned, and another loop in
4275 which only 'p' is necessarily aligned.
4277 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4278 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4279 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4281 Devising a cost model is the most critical aspect of this work. It will
4282 guide us on which access to peel for, whether to use loop versioning, how
4283 many versions to create, etc. The cost model will probably consist of
4284 generic considerations as well as target specific considerations (on
4285 powerpc for example, misaligned stores are more painful than misaligned
4288 Here is the general steps involved in alignment enhancements:
4290 -- original loop, before alignment analysis:
4291 for (i=0; i<N; i++){
4292 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4293 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4296 -- After vect_compute_data_refs_alignment:
4297 for (i=0; i<N; i++){
4298 x = q[i]; # DR_MISALIGNMENT(q) = 3
4299 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4302 -- Possibility 1: we do loop versioning:
4304 for (i=0; i<N; i++){ # loop 1A
4305 x = q[i]; # DR_MISALIGNMENT(q) = 3
4306 p[i] = y; # DR_MISALIGNMENT(p) = 0
4310 for (i=0; i<N; i++){ # loop 1B
4311 x = q[i]; # DR_MISALIGNMENT(q) = 3
4312 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4316 -- Possibility 2: we do loop peeling:
4317 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4321 for (i = 3; i < N; i++){ # loop 2A
4322 x = q[i]; # DR_MISALIGNMENT(q) = 0
4323 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4326 -- Possibility 3: combination of loop peeling and versioning:
4327 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4332 for (i = 3; i<N; i++){ # loop 3A
4333 x = q[i]; # DR_MISALIGNMENT(q) = 0
4334 p[i] = y; # DR_MISALIGNMENT(p) = 0
4338 for (i = 3; i<N; i++){ # loop 3B
4339 x = q[i]; # DR_MISALIGNMENT(q) = 0
4340 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4344 These loops are later passed to loop_transform to be vectorized. The
4345 vectorizer will use the alignment information to guide the transformation
4346 (whether to generate regular loads/stores, or with special handling for
4350 /* (1) Peeling to force alignment. */
4352 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4354 + How many accesses will become aligned due to the peeling
4355 - How many accesses will become unaligned due to the peeling,
4356 and the cost of misaligned accesses.
4357 - The cost of peeling (the extra runtime checks, the increase
4360 The scheme we use FORNOW: peel to force the alignment of the first
4361 misaligned store in the loop.
4362 Rationale: misaligned stores are not yet supported.
4364 TODO: Use a better cost model. */
4366 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4368 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4369 if (!aligned_access_p (dr))
4371 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4372 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4377 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4379 if (vect_debug_details (loop))
4380 fprintf (dump_file, "Peeling for alignment will not be applied.");
4384 if (vect_debug_details (loop))
4385 fprintf (dump_file, "Peeling for alignment will be applied.");
4388 /* (1.2) Update the alignment info according to the peeling factor.
4389 If the misalignment of the DR we peel for is M, then the
4390 peeling factor is VF - M, and the misalignment of each access DR_i
4391 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4392 If the misalignment of the DR we peel for is unknown, then the
4393 misalignment of each access DR_i in the loop is also unknown.
4395 FORNOW: set the misalignment of the accesses to unknown even
4396 if the peeling factor is known at compile time.
4398 TODO: - if the peeling factor is known at compile time, use that
4399 when updating the misalignment info of the loop DRs.
4400 - consider accesses that are known to have the same
4401 alignment, even if that alignment is unknown. */
4403 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4405 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4406 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4407 DR_MISALIGNMENT (dr) = 0;
4409 DR_MISALIGNMENT (dr) = -1;
4411 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4413 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4414 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4415 DR_MISALIGNMENT (dr) = 0;
4417 DR_MISALIGNMENT (dr) = -1;
4422 /* Function vect_analyze_data_refs_alignment
4424 Analyze the alignment of the data-references in the loop.
4425 FOR NOW: Until support for misliagned accesses is in place, only if all
4426 accesses are aligned can the loop be vectorized. This restriction will be
4430 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4432 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4433 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4434 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4435 enum dr_alignment_support supportable_dr_alignment;
4438 if (vect_debug_details (NULL))
4439 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4442 /* This pass may take place at function granularity instead of at loop
4445 if (!vect_compute_data_refs_alignment (loop_vinfo))
4447 if (vect_debug_details (loop) || vect_debug_stats (loop))
4449 "not vectorized: can't calculate alignment for data ref.");
4454 /* This pass will decide on using loop versioning and/or loop peeling in
4455 order to enhance the alignment of data references in the loop. */
4457 vect_enhance_data_refs_alignment (loop_vinfo);
4460 /* Finally, check that all the data references in the loop can be
4461 handled with respect to their alignment. */
4463 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4465 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4466 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4467 if (!supportable_dr_alignment)
4469 if (vect_debug_details (loop) || vect_debug_stats (loop))
4470 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4474 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4476 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4477 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4478 if (!supportable_dr_alignment)
4480 if (vect_debug_details (loop) || vect_debug_stats (loop))
4481 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4490 /* Function vect_analyze_data_ref_access.
4492 Analyze the access pattern of the data-reference DR. For now, a data access
4493 has to consecutive and aligned to be considered vectorizable. */
4496 vect_analyze_data_ref_access (struct data_reference *dr)
4498 varray_type access_fns = DR_ACCESS_FNS (dr);
4501 unsigned int dimensions, i;
4503 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4504 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4505 access is contiguous). */
4506 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4508 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4510 access_fn = DR_ACCESS_FN (dr, i);
4512 if (evolution_part_in_loop_num (access_fn,
4513 loop_containing_stmt (DR_STMT (dr))->num))
4515 /* Evolution part is not NULL in this loop (it is neither constant
4517 if (vect_debug_details (NULL))
4520 "not vectorized: complicated multidim. array access.");
4521 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4527 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4528 if (!evolution_function_is_constant_p (access_fn)
4529 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4530 access_fn, &init, &step, true))
4532 if (vect_debug_details (NULL))
4534 fprintf (dump_file, "not vectorized: complicated access function.");
4535 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4544 /* Function vect_analyze_data_ref_accesses.
4546 Analyze the access pattern of all the data references in the loop.
4548 FORNOW: the only access pattern that is considered vectorizable is a
4549 simple step 1 (consecutive) access.
4551 FORNOW: handle only arrays and pointer accesses. */
4554 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4557 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4558 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4560 if (vect_debug_details (NULL))
4561 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4563 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4565 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4566 bool ok = vect_analyze_data_ref_access (dr);
4569 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4570 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4571 fprintf (dump_file, "not vectorized: complicated access pattern.");
4576 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4578 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4579 bool ok = vect_analyze_data_ref_access (dr);
4582 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4583 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4584 fprintf (dump_file, "not vectorized: complicated access pattern.");
4593 /* Function vect_analyze_pointer_ref_access.
4596 STMT - a stmt that contains a data-ref
4597 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4599 If the data-ref access is vectorizable, return a data_reference structure
4600 that represents it (DR). Otherwise - return NULL. */
4602 static struct data_reference *
4603 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4605 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4606 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4607 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4610 tree reftype, innertype;
4611 enum machine_mode innermode;
4612 tree indx_access_fn;
4613 int loopnum = loop->num;
4614 struct data_reference *dr;
4618 if (vect_debug_stats (loop) || vect_debug_details (loop))
4619 fprintf (dump_file, "not vectorized: complicated pointer access.");
4623 if (vect_debug_details (NULL))
4625 fprintf (dump_file, "Access function of ptr: ");
4626 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4629 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4631 if (vect_debug_stats (loop) || vect_debug_details (loop))
4632 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4638 if (!host_integerp (step,0))
4640 if (vect_debug_stats (loop) || vect_debug_details (loop))
4642 "not vectorized: non constant step for pointer access.");
4646 step_val = TREE_INT_CST_LOW (step);
4648 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4649 if (TREE_CODE (reftype) != POINTER_TYPE)
4651 if (vect_debug_stats (loop) || vect_debug_details (loop))
4652 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4656 reftype = TREE_TYPE (init);
4657 if (TREE_CODE (reftype) != POINTER_TYPE)
4659 if (vect_debug_stats (loop) || vect_debug_details (loop))
4660 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4664 innertype = TREE_TYPE (reftype);
4665 innermode = TYPE_MODE (innertype);
4666 if (GET_MODE_SIZE (innermode) != step_val)
4668 /* FORNOW: support only consecutive access */
4669 if (vect_debug_stats (loop) || vect_debug_details (loop))
4670 fprintf (dump_file, "not vectorized: non consecutive access.");
4675 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4676 if (vect_debug_details (NULL))
4678 fprintf (dump_file, "Access function of ptr indx: ");
4679 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4681 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4686 /* Function vect_get_symbl_and_dr.
4688 The function returns SYMBL - the relevant variable for
4689 memory tag (for aliasing purposes).
4690 Also data reference structure DR is created.
4693 MEMREF - data reference in STMT
4694 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4697 DR - data_reference struct for MEMREF
4698 return value - the relevant variable for memory tag (for aliasing purposes).
4703 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4704 loop_vec_info loop_vinfo, struct data_reference **dr)
4706 tree symbl, oprnd0, oprnd1;
4707 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4709 tree array_base, base;
4710 struct data_reference *new_dr;
4711 bool base_aligned_p;
4714 switch (TREE_CODE (memref))
4717 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4721 symbl = DR_BASE_NAME (new_dr);
4722 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4724 switch (TREE_CODE (symbl))
4728 oprnd0 = TREE_OPERAND (symbl, 0);
4729 oprnd1 = TREE_OPERAND (symbl, 1);
4732 /* Only {address_base + offset} expressions are supported,
4733 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4734 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4735 TODO: swap operands if {offset + address_base}. */
4736 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4737 && TREE_CODE (oprnd1) != INTEGER_CST)
4738 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4741 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4744 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4745 loop_vinfo, &new_dr);
4749 /* symbl remains unchanged. */
4753 if (vect_debug_details (NULL))
4755 fprintf (dump_file, "unhandled data ref: ");
4756 print_generic_expr (dump_file, memref, TDF_SLIM);
4757 fprintf (dump_file, " (symbl ");
4758 print_generic_expr (dump_file, symbl, TDF_SLIM);
4759 fprintf (dump_file, ") in stmt ");
4760 print_generic_expr (dump_file, stmt, TDF_SLIM);
4767 offset = size_zero_node;
4769 /* Store the array base in the stmt info.
4770 For one dimensional array ref a[i], the base is a,
4771 for multidimensional a[i1][i2]..[iN], the base is
4772 a[i1][i2]..[iN-1]. */
4773 array_base = TREE_OPERAND (memref, 0);
4774 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4776 new_dr = analyze_array (stmt, memref, is_read);
4779 /* Find the relevant symbol for aliasing purposes. */
4780 base = DR_BASE_NAME (new_dr);
4781 switch (TREE_CODE (base))
4788 symbl = TREE_OPERAND (base, 0);
4792 /* Could have recorded more accurate information -
4793 i.e, the actual FIELD_DECL that is being referenced -
4794 but later passes expect VAR_DECL as the nmt. */
4795 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4796 loop_vinfo, &offset, &base_aligned_p);
4801 if (vect_debug_details (NULL))
4803 fprintf (dump_file, "unhandled struct/class field access ");
4804 print_generic_expr (dump_file, stmt, TDF_SLIM);
4811 if (vect_debug_details (NULL))
4813 fprintf (dump_file, "unhandled data ref: ");
4814 print_generic_expr (dump_file, memref, TDF_SLIM);
4815 fprintf (dump_file, " in stmt ");
4816 print_generic_expr (dump_file, stmt, TDF_SLIM);
4824 /* Function vect_analyze_data_refs.
4826 Find all the data references in the loop.
4828 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4829 which base is really an array (not a pointer) and which alignment
4830 can be forced. This restriction will be relaxed. */
4833 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4835 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4836 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4837 int nbbs = loop->num_nodes;
4838 block_stmt_iterator si;
4840 struct data_reference *dr;
4843 bool base_aligned_p;
4846 if (vect_debug_details (NULL))
4847 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4849 for (j = 0; j < nbbs; j++)
4851 basic_block bb = bbs[j];
4852 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4854 bool is_read = false;
4855 tree stmt = bsi_stmt (si);
4856 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4857 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4858 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4859 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4860 varray_type *datarefs = NULL;
4861 int nvuses, nv_may_defs, nv_must_defs;
4865 /* Assumption: there exists a data-ref in stmt, if and only if
4866 it has vuses/vdefs. */
4868 if (!vuses && !v_may_defs && !v_must_defs)
4871 nvuses = NUM_VUSES (vuses);
4872 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4873 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4875 if (nvuses && (nv_may_defs || nv_must_defs))
4877 if (vect_debug_details (NULL))
4879 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4880 print_generic_expr (dump_file, stmt, TDF_SLIM);
4885 if (TREE_CODE (stmt) != MODIFY_EXPR)
4887 if (vect_debug_details (NULL))
4889 fprintf (dump_file, "unexpected vops in stmt: ");
4890 print_generic_expr (dump_file, stmt, TDF_SLIM);
4897 memref = TREE_OPERAND (stmt, 1);
4898 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4903 memref = TREE_OPERAND (stmt, 0);
4904 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4908 /* Analyze MEMREF. If it is of a supported form, build data_reference
4909 struct for it (DR) and find the relevant symbol for aliasing
4911 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4915 if (vect_debug_stats (loop) || vect_debug_details (loop))
4917 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4918 print_generic_expr (dump_file, stmt, TDF_SLIM);
4923 /* Find and record the memtag assigned to this data-ref. */
4924 switch (TREE_CODE (symbl))
4927 STMT_VINFO_MEMTAG (stmt_info) = symbl;
4931 symbl = SSA_NAME_VAR (symbl);
4932 tag = get_var_ann (symbl)->type_mem_tag;
4935 tree ptr = TREE_OPERAND (memref, 0);
4936 if (TREE_CODE (ptr) == SSA_NAME)
4937 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4941 if (vect_debug_stats (loop) || vect_debug_details (loop))
4942 fprintf (dump_file, "not vectorized: no memtag for ref.");
4945 STMT_VINFO_MEMTAG (stmt_info) = tag;
4949 address_base = TREE_OPERAND (symbl, 0);
4951 switch (TREE_CODE (address_base))
4954 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
4956 STMT_VINFO_MEMTAG (stmt_info) =
4957 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
4958 loop_vinfo, &offset,
4963 STMT_VINFO_MEMTAG (stmt_info) = address_base;
4967 if (vect_debug_stats (loop) || vect_debug_details (loop))
4970 "not vectorized: unhandled address expr: ");
4971 print_generic_expr (dump_file, stmt, TDF_SLIM);
4978 if (vect_debug_stats (loop) || vect_debug_details (loop))
4980 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
4981 print_generic_expr (dump_file, memref, TDF_SLIM);
4986 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
4987 STMT_VINFO_DATA_REF (stmt_info) = dr;
4995 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
4997 /* Function vect_mark_relevant.
4999 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5002 vect_mark_relevant (varray_type worklist, tree stmt)
5004 stmt_vec_info stmt_info;
5006 if (vect_debug_details (NULL))
5007 fprintf (dump_file, "mark relevant.");
5009 if (TREE_CODE (stmt) == PHI_NODE)
5011 VARRAY_PUSH_TREE (worklist, stmt);
5015 stmt_info = vinfo_for_stmt (stmt);
5019 if (vect_debug_details (NULL))
5021 fprintf (dump_file, "mark relevant: no stmt info!!.");
5022 print_generic_expr (dump_file, stmt, TDF_SLIM);
5027 if (STMT_VINFO_RELEVANT_P (stmt_info))
5029 if (vect_debug_details (NULL))
5030 fprintf (dump_file, "already marked relevant.");
5034 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5035 VARRAY_PUSH_TREE (worklist, stmt);
5039 /* Function vect_stmt_relevant_p.
5041 Return true if STMT in loop that is represented by LOOP_VINFO is
5042 "relevant for vectorization".
5044 A stmt is considered "relevant for vectorization" if:
5045 - it has uses outside the loop.
5046 - it has vdefs (it alters memory).
5047 - control stmts in the loop (except for the exit condition).
5049 CHECKME: what other side effects would the vectorizer allow? */
5052 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5054 v_may_def_optype v_may_defs;
5055 v_must_def_optype v_must_defs;
5056 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5061 /* cond stmt other than loop exit cond. */
5062 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5065 /* changing memory. */
5066 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5067 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5068 if (v_may_defs || v_must_defs)
5070 if (vect_debug_details (NULL))
5071 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5075 /* uses outside the loop. */
5076 df = get_immediate_uses (stmt);
5077 num_uses = num_immediate_uses (df);
5078 for (i = 0; i < num_uses; i++)
5080 tree use = immediate_use (df, i);
5081 basic_block bb = bb_for_stmt (use);
5082 if (!flow_bb_inside_loop_p (loop, bb))
5084 if (vect_debug_details (NULL))
5085 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5094 /* Function vect_mark_stmts_to_be_vectorized.
5096 Not all stmts in the loop need to be vectorized. For example:
5105 Stmt 1 and 3 do not need to be vectorized, because loop control and
5106 addressing of vectorized data-refs are handled differently.
5108 This pass detects such stmts. */
5111 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5113 varray_type worklist;
5114 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5115 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5116 unsigned int nbbs = loop->num_nodes;
5117 block_stmt_iterator si;
5123 stmt_vec_info stmt_info;
5125 if (vect_debug_details (NULL))
5126 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5128 VARRAY_TREE_INIT (worklist, 64, "work list");
5130 /* 1. Init worklist. */
5132 for (i = 0; i < nbbs; i++)
5134 basic_block bb = bbs[i];
5135 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5137 stmt = bsi_stmt (si);
5139 if (vect_debug_details (NULL))
5141 fprintf (dump_file, "init: stmt relevant? ");
5142 print_generic_expr (dump_file, stmt, TDF_SLIM);
5145 stmt_info = vinfo_for_stmt (stmt);
5146 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5148 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5149 vect_mark_relevant (worklist, stmt);
5154 /* 2. Process_worklist */
5156 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5158 stmt = VARRAY_TOP_TREE (worklist);
5159 VARRAY_POP (worklist);
5161 if (vect_debug_details (NULL))
5163 fprintf (dump_file, "worklist: examine stmt: ");
5164 print_generic_expr (dump_file, stmt, TDF_SLIM);
5167 /* Examine the USES in this statement. Mark all the statements which
5168 feed this statement's uses as "relevant", unless the USE is used as
5171 if (TREE_CODE (stmt) == PHI_NODE)
5173 /* follow the def-use chain inside the loop. */
5174 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5176 tree arg = PHI_ARG_DEF (stmt, j);
5177 tree def_stmt = NULL_TREE;
5179 if (!vect_is_simple_use (arg, loop, &def_stmt))
5181 if (vect_debug_details (NULL))
5182 fprintf (dump_file, "worklist: unsupported use.");
5183 varray_clear (worklist);
5189 if (vect_debug_details (NULL))
5191 fprintf (dump_file, "worklist: def_stmt: ");
5192 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5195 bb = bb_for_stmt (def_stmt);
5196 if (flow_bb_inside_loop_p (loop, bb))
5197 vect_mark_relevant (worklist, def_stmt);
5201 ann = stmt_ann (stmt);
5202 use_ops = USE_OPS (ann);
5204 for (i = 0; i < NUM_USES (use_ops); i++)
5206 tree use = USE_OP (use_ops, i);
5208 /* We are only interested in uses that need to be vectorized. Uses
5209 that are used for address computation are not considered relevant.
5211 if (exist_non_indexing_operands_for_use_p (use, stmt))
5213 tree def_stmt = NULL_TREE;
5215 if (!vect_is_simple_use (use, loop, &def_stmt))
5217 if (vect_debug_details (NULL))
5218 fprintf (dump_file, "worklist: unsupported use.");
5219 varray_clear (worklist);
5226 if (vect_debug_details (NULL))
5228 fprintf (dump_file, "worklist: examine use %d: ", i);
5229 print_generic_expr (dump_file, use, TDF_SLIM);
5232 bb = bb_for_stmt (def_stmt);
5233 if (flow_bb_inside_loop_p (loop, bb))
5234 vect_mark_relevant (worklist, def_stmt);
5237 } /* while worklist */
5239 varray_clear (worklist);
5244 /* Function vect_can_advance_ivs_p
5246 In case the number of iterations that LOOP iterates in unknown at compile
5247 time, an epilog loop will be generated, and the loop induction variables
5248 (IVs) will be "advanced" to the value they are supposed to take just before
5249 the epilog loop. Here we check that the access function of the loop IVs
5250 and the expression that represents the loop bound are simple enough.
5251 These restrictions will be relaxed in the future. */
5254 vect_can_advance_ivs_p (struct loop *loop)
5256 basic_block bb = loop->header;
5259 /* Analyze phi functions of the loop header. */
5261 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5263 tree access_fn = NULL;
5264 tree evolution_part;
5266 if (vect_debug_details (NULL))
5268 fprintf (dump_file, "Analyze phi: ");
5269 print_generic_expr (dump_file, phi, TDF_SLIM);
5272 /* Skip virtual phi's. The data dependences that are associated with
5273 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5275 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5277 if (vect_debug_details (NULL))
5278 fprintf (dump_file, "virtual phi. skip.");
5282 /* Analyze the evolution function. */
5284 access_fn = instantiate_parameters
5285 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5289 if (vect_debug_details (NULL))
5290 fprintf (dump_file, "No Access function.");
5294 if (vect_debug_details (NULL))
5296 fprintf (dump_file, "Access function of PHI: ");
5297 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5300 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5302 if (evolution_part == NULL_TREE)
5305 /* FORNOW: We do not transform initial conditions of IVs
5306 which evolution functions are a polynomial of degree >= 2. */
5308 if (tree_is_chrec (evolution_part))
5316 /* Function vect_get_loop_niters.
5318 Determine how many iterations the loop is executed.
5319 If an expression that represents the number of iterations
5320 can be constructed, place it in NUMBER_OF_ITERATIONS.
5321 Return the loop exit condition. */
5324 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5328 if (vect_debug_details (NULL))
5329 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5331 niters = number_of_iterations_in_loop (loop);
5333 if (niters != NULL_TREE
5334 && niters != chrec_dont_know)
5336 *number_of_iterations = niters;
5338 if (vect_debug_details (NULL))
5340 fprintf (dump_file, "==> get_loop_niters:" );
5341 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5345 return get_loop_exit_condition (loop);
5349 /* Function vect_analyze_loop_form.
5351 Verify the following restrictions (some may be relaxed in the future):
5352 - it's an inner-most loop
5353 - number of BBs = 2 (which are the loop header and the latch)
5354 - the loop has a pre-header
5355 - the loop has a single entry and exit
5356 - the loop exit condition is simple enough, and the number of iterations
5357 can be analyzed (a countable loop). */
5359 static loop_vec_info
5360 vect_analyze_loop_form (struct loop *loop)
5362 loop_vec_info loop_vinfo;
5364 tree number_of_iterations = NULL;
5365 bool rescan = false;
5367 if (vect_debug_details (loop))
5368 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5371 || !loop->single_exit
5372 || loop->num_nodes != 2
5373 || EDGE_COUNT (loop->header->preds) != 2
5374 || loop->num_entries != 1)
5376 if (vect_debug_stats (loop) || vect_debug_details (loop))
5378 fprintf (dump_file, "not vectorized: bad loop form. ");
5380 fprintf (dump_file, "nested loop.");
5381 else if (!loop->single_exit)
5382 fprintf (dump_file, "multiple exits.");
5383 else if (loop->num_nodes != 2)
5384 fprintf (dump_file, "too many BBs in loop.");
5385 else if (EDGE_COUNT (loop->header->preds) != 2)
5386 fprintf (dump_file, "too many incoming edges.");
5387 else if (loop->num_entries != 1)
5388 fprintf (dump_file, "too many entries.");
5394 /* We assume that the loop exit condition is at the end of the loop. i.e,
5395 that the loop is represented as a do-while (with a proper if-guard
5396 before the loop if needed), where the loop header contains all the
5397 executable statements, and the latch is empty. */
5398 if (!empty_block_p (loop->latch))
5400 if (vect_debug_stats (loop) || vect_debug_details (loop))
5401 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5405 /* Make sure we have a preheader basic block. */
5406 if (!loop->pre_header)
5409 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5412 /* Make sure there exists a single-predecessor exit bb: */
5413 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5416 loop_split_edge_with (loop->exit_edges[0], NULL);
5421 flow_loop_scan (loop, LOOP_ALL);
5422 /* Flow loop scan does not update loop->single_exit field. */
5423 loop->single_exit = loop->exit_edges[0];
5426 if (empty_block_p (loop->header))
5428 if (vect_debug_stats (loop) || vect_debug_details (loop))
5429 fprintf (dump_file, "not vectorized: empty loop.");
5433 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5436 if (vect_debug_stats (loop) || vect_debug_details (loop))
5437 fprintf (dump_file, "not vectorized: complicated exit condition.");
5441 if (!number_of_iterations)
5443 if (vect_debug_stats (loop) || vect_debug_details (loop))
5445 "not vectorized: number of iterations cannot be computed.");
5449 if (chrec_contains_undetermined (number_of_iterations))
5451 if (vect_debug_details (NULL))
5452 fprintf (dump_file, "Infinite number of iterations.");
5456 loop_vinfo = new_loop_vec_info (loop);
5457 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5459 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5461 if (vect_debug_details (loop))
5463 fprintf (dump_file, "loop bound unknown.\n");
5464 fprintf (dump_file, "Symbolic number of iterations is ");
5465 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5469 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5471 if (vect_debug_stats (loop) || vect_debug_details (loop))
5472 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5476 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5482 /* Function vect_analyze_loop.
5484 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5485 for it. The different analyses will record information in the
5486 loop_vec_info struct. */
5488 static loop_vec_info
5489 vect_analyze_loop (struct loop *loop)
5492 loop_vec_info loop_vinfo;
5494 if (vect_debug_details (NULL))
5495 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5497 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5499 loop_vinfo = vect_analyze_loop_form (loop);
5502 if (vect_debug_details (loop))
5503 fprintf (dump_file, "bad loop form.");
5507 /* Find all data references in the loop (which correspond to vdefs/vuses)
5508 and analyze their evolution in the loop.
5510 FORNOW: Handle only simple, array references, which
5511 alignment can be forced, and aligned pointer-references. */
5513 ok = vect_analyze_data_refs (loop_vinfo);
5516 if (vect_debug_details (loop))
5517 fprintf (dump_file, "bad data references.");
5518 destroy_loop_vec_info (loop_vinfo);
5522 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5524 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5527 if (vect_debug_details (loop))
5528 fprintf (dump_file, "unexpected pattern.");
5529 if (vect_debug_details (loop))
5530 fprintf (dump_file, "not vectorized: unexpected pattern.");
5531 destroy_loop_vec_info (loop_vinfo);
5535 /* Check that all cross-iteration scalar data-flow cycles are OK.
5536 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5538 ok = vect_analyze_scalar_cycles (loop_vinfo);
5541 if (vect_debug_details (loop))
5542 fprintf (dump_file, "bad scalar cycle.");
5543 destroy_loop_vec_info (loop_vinfo);
5547 /* Analyze data dependences between the data-refs in the loop.
5548 FORNOW: fail at the first data dependence that we encounter. */
5550 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5553 if (vect_debug_details (loop))
5554 fprintf (dump_file, "bad data dependence.");
5555 destroy_loop_vec_info (loop_vinfo);
5559 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5560 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5562 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5565 if (vect_debug_details (loop))
5566 fprintf (dump_file, "bad data access.");
5567 destroy_loop_vec_info (loop_vinfo);
5571 /* Analyze the alignment of the data-refs in the loop.
5572 FORNOW: Only aligned accesses are handled. */
5574 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5577 if (vect_debug_details (loop))
5578 fprintf (dump_file, "bad data alignment.");
5579 destroy_loop_vec_info (loop_vinfo);
5583 /* Scan all the operations in the loop and make sure they are
5586 ok = vect_analyze_operations (loop_vinfo);
5589 if (vect_debug_details (loop))
5590 fprintf (dump_file, "bad operation or unsupported loop bound.");
5591 destroy_loop_vec_info (loop_vinfo);
5595 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5601 /* Function need_imm_uses_for.
5603 Return whether we ought to include information for 'var'
5604 when calculating immediate uses. For this pass we only want use
5605 information for non-virtual variables. */
5608 need_imm_uses_for (tree var)
5610 return is_gimple_reg (var);
5614 /* Function vectorize_loops.
5616 Entry Point to loop vectorization phase. */
5619 vectorize_loops (struct loops *loops)
5621 unsigned int i, loops_num;
5622 unsigned int num_vectorized_loops = 0;
5624 /* Does the target support SIMD? */
5625 /* FORNOW: until more sophisticated machine modelling is in place. */
5626 if (!UNITS_PER_SIMD_WORD)
5628 if (vect_debug_details (NULL))
5629 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5633 #ifdef ENABLE_CHECKING
5634 verify_loop_closed_ssa ();
5637 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5639 /* ----------- Analyze loops. ----------- */
5641 /* If some loop was duplicated, it gets bigger number
5642 than all previously defined loops. This fact allows us to run
5643 only over initial loops skipping newly generated ones. */
5644 loops_num = loops->num;
5645 for (i = 1; i < loops_num; i++)
5647 loop_vec_info loop_vinfo;
5648 struct loop *loop = loops->parray[i];
5653 loop_vinfo = vect_analyze_loop (loop);
5654 loop->aux = loop_vinfo;
5656 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5659 vect_transform_loop (loop_vinfo, loops);
5660 num_vectorized_loops++;
5663 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5664 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5665 num_vectorized_loops);
5667 /* ----------- Finalize. ----------- */
5670 for (i = 1; i < loops_num; i++)
5672 struct loop *loop = loops->parray[i];
5673 loop_vec_info loop_vinfo;
5677 loop_vinfo = loop->aux;
5678 destroy_loop_vec_info (loop_vinfo);
5682 rewrite_into_ssa (false);
5683 rewrite_into_loop_closed_ssa (); /* FORNOW */
5684 bitmap_clear (vars_to_rename);