2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /* Main analysis functions. */
150 static loop_vec_info vect_analyze_loop (struct loop *);
151 static loop_vec_info vect_analyze_loop_form (struct loop *);
152 static bool vect_analyze_data_refs (loop_vec_info);
153 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
154 static bool vect_analyze_scalar_cycles (loop_vec_info);
155 static bool vect_analyze_data_ref_accesses (loop_vec_info);
156 static bool vect_analyze_data_refs_alignment (loop_vec_info);
157 static bool vect_compute_data_refs_alignment (loop_vec_info);
158 static bool vect_analyze_operations (loop_vec_info);
160 /* Main code transformation functions. */
161 static void vect_transform_loop (loop_vec_info, struct loops *);
162 static void vect_transform_loop_bound (loop_vec_info, tree niters);
163 static bool vect_transform_stmt (tree, block_stmt_iterator *);
164 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
167 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
168 static enum dr_alignment_support vect_supportable_dr_alignment
169 (struct data_reference *);
170 static void vect_align_data_ref (tree);
171 static void vect_enhance_data_refs_alignment (loop_vec_info);
173 /* Utility functions for the analyses. */
174 static bool vect_is_simple_use (tree , struct loop *, tree *);
175 static bool exist_non_indexing_operands_for_use_p (tree, tree);
176 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
177 static void vect_mark_relevant (varray_type, tree);
178 static bool vect_stmt_relevant_p (tree, loop_vec_info);
179 static tree vect_get_loop_niters (struct loop *, tree *);
180 static bool vect_compute_data_ref_alignment
181 (struct data_reference *, loop_vec_info);
182 static bool vect_analyze_data_ref_access (struct data_reference *);
183 static bool vect_get_first_index (tree, tree *);
184 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
185 static struct data_reference * vect_analyze_pointer_ref_access
187 static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
189 static tree vect_get_base_and_bit_offset
190 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
191 static struct data_reference * vect_analyze_pointer_ref_access
193 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
194 static tree vect_compute_array_ref_alignment
195 (struct data_reference *, loop_vec_info, tree, tree *);
196 static tree vect_get_ptr_offset (tree, tree, tree *);
197 static tree vect_get_symbl_and_dr
198 (tree, tree, bool, loop_vec_info, struct data_reference **);
200 /* Utility functions for the code transformation. */
201 static tree vect_create_destination_var (tree, tree);
202 static tree vect_create_data_ref_ptr
203 (tree, block_stmt_iterator *, tree, tree *, bool);
204 static tree vect_create_index_for_vector_ref
205 (struct loop *, block_stmt_iterator *);
206 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
207 static tree get_vectype_for_scalar_type (tree);
208 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
209 static tree vect_get_vec_def_for_operand (tree, tree);
210 static tree vect_init_vector (tree, tree);
211 static tree vect_build_symbol_bound (tree, int, struct loop *);
212 static void vect_finish_stmt_generation
213 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
215 static void vect_generate_tmps_on_preheader (loop_vec_info,
218 static tree vect_build_loop_niters (loop_vec_info);
219 static void vect_update_ivs_after_vectorizer (struct loop *, tree);
221 /* Loop transformations prior to vectorization. */
223 /* Loop transformations entry point function.
224 It can be used outside of the vectorizer
225 in case the loop to be manipulated answers conditions specified
226 in function documentation. */
227 struct loop *tree_duplicate_loop_to_edge (struct loop *,
228 struct loops *, edge,
231 static void allocate_new_names (bitmap);
232 static void rename_use_op (use_operand_p);
233 static void rename_def_op (def_operand_p, tree);
234 static void rename_variables_in_bb (basic_block);
235 static void free_new_names (bitmap);
236 static void rename_variables_in_loop (struct loop *);
237 static void copy_phi_nodes (struct loop *, struct loop *, bool);
238 static void update_phis_for_duplicate_loop (struct loop *,
241 static void update_phi_nodes_for_guard (edge, struct loop *);
242 static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree);
243 static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *,
246 static edge add_loop_guard (basic_block, tree, basic_block);
247 static bool verify_loop_for_duplication (struct loop *, bool, edge);
249 /* Utilities dealing with loop peeling (not peeling itself). */
250 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
251 static void vect_update_niters_after_peeling (loop_vec_info, tree);
252 static void vect_update_inits_of_dr (struct data_reference *, struct loop *,
254 static void vect_update_inits_of_drs (loop_vec_info, tree);
255 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
257 /* Utilities for creation and deletion of vec_info structs. */
258 loop_vec_info new_loop_vec_info (struct loop *loop);
259 void destroy_loop_vec_info (loop_vec_info);
260 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
262 static bool vect_debug_stats (struct loop *loop);
263 static bool vect_debug_details (struct loop *loop);
266 /* Utilities to support loop peeling for vectorization purposes. */
269 /* For each definition in DEFINITIONS this function allocates
273 allocate_new_names (bitmap definitions)
278 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
280 tree def = ssa_name (ver);
281 tree *new_name_ptr = xmalloc (sizeof (tree));
283 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
285 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
286 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
288 SSA_NAME_AUX (def) = new_name_ptr;
293 /* Renames the use *OP_P. */
296 rename_use_op (use_operand_p op_p)
300 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
303 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
305 /* Something defined outside of the loop. */
309 /* An ordinary ssa name defined in the loop. */
311 SET_USE (op_p, *new_name_ptr);
315 /* Renames the def *OP_P in statement STMT. */
318 rename_def_op (def_operand_p op_p, tree stmt)
322 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
325 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
327 /* Something defined outside of the loop. */
331 /* An ordinary ssa name defined in the loop. */
333 SET_DEF (op_p, *new_name_ptr);
334 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
338 /* Renames the variables in basic block BB. */
341 rename_variables_in_bb (basic_block bb)
344 block_stmt_iterator bsi;
350 v_may_def_optype v_may_defs;
351 v_must_def_optype v_must_defs;
356 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
357 rename_def_op (PHI_RESULT_PTR (phi), phi);
359 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
361 stmt = bsi_stmt (bsi);
362 get_stmt_operands (stmt);
363 ann = stmt_ann (stmt);
365 uses = USE_OPS (ann);
366 for (i = 0; i < NUM_USES (uses); i++)
367 rename_use_op (USE_OP_PTR (uses, i));
369 defs = DEF_OPS (ann);
370 for (i = 0; i < NUM_DEFS (defs); i++)
371 rename_def_op (DEF_OP_PTR (defs, i), stmt);
373 vuses = VUSE_OPS (ann);
374 for (i = 0; i < NUM_VUSES (vuses); i++)
375 rename_use_op (VUSE_OP_PTR (vuses, i));
377 v_may_defs = V_MAY_DEF_OPS (ann);
378 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
380 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
381 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
384 v_must_defs = V_MUST_DEF_OPS (ann);
385 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
387 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
388 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
392 FOR_EACH_EDGE (e, ei, bb->succs)
393 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
394 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
398 /* Releases the structures holding the new ssa names. */
401 free_new_names (bitmap definitions)
406 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
408 tree def = ssa_name (ver);
410 if (SSA_NAME_AUX (def))
412 free (SSA_NAME_AUX (def));
413 SSA_NAME_AUX (def) = NULL;
419 /* Renames variables in new generated LOOP. */
422 rename_variables_in_loop (struct loop *loop)
427 bbs = get_loop_body (loop);
429 for (i = 0; i < loop->num_nodes; i++)
430 rename_variables_in_bb (bbs[i]);
436 /* This function copies phis from LOOP header to
437 NEW_LOOP header. AFTER is as
438 in update_phis_for_duplicate_loop function. */
441 copy_phi_nodes (struct loop *loop, struct loop *new_loop,
444 tree phi, new_phi, def;
446 edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
448 /* Second add arguments to newly created phi nodes. */
449 for (phi = phi_nodes (loop->header),
450 new_phi = phi_nodes (new_loop->header);
452 phi = PHI_CHAIN (phi),
453 new_phi = PHI_CHAIN (new_phi))
455 new_e = loop_preheader_edge (new_loop);
456 def = PHI_ARG_DEF_FROM_EDGE (phi, e);
457 add_phi_arg (&new_phi, def, new_e);
462 /* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
463 executes after LOOP, and false if it executes before it. */
466 update_phis_for_duplicate_loop (struct loop *loop,
467 struct loop *new_loop, bool after)
470 tree *new_name_ptr, new_ssa_name;
471 tree phi_new, phi_old, def;
472 edge orig_entry_e = loop_preheader_edge (loop);
474 /* Copy phis from loop->header to new_loop->header. */
475 copy_phi_nodes (loop, new_loop, after);
477 old_latch = loop_latch_edge (loop);
479 /* Update PHI args for the new loop latch edge, and
480 the old loop preheader edge, we know that the PHI nodes
481 are ordered appropriately in copy_phi_nodes. */
482 for (phi_new = phi_nodes (new_loop->header),
483 phi_old = phi_nodes (loop->header);
485 phi_new = PHI_CHAIN (phi_new), phi_old = PHI_CHAIN (phi_old))
487 def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
489 if (TREE_CODE (def) != SSA_NAME)
492 new_name_ptr = SSA_NAME_AUX (def);
494 /* Something defined outside of the loop. */
498 /* An ordinary ssa name defined in the loop. */
499 new_ssa_name = *new_name_ptr;
501 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
503 /* Update PHI args for the original loop pre-header edge. */
505 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
511 /* Update PHI nodes for a guard of the LOOP.
513 LOOP is supposed to have a preheader bb at which a guard condition is
514 located. The true edge of this condition skips the LOOP and ends
515 at the destination of the (unique) LOOP exit. The loop exit bb is supposed
516 to be an empty bb (created by this transformation) with one successor.
518 This function creates phi nodes at the LOOP exit bb. These phis need to be
519 created as a result of adding true edge coming from guard.
521 FORNOW: Only phis which have corresponding phi nodes at the header of the
522 LOOP are created. Here we use the assumption that after the LOOP there
523 are no uses of defs generated in LOOP.
525 After the phis creation, the function updates the values of phi nodes at
526 the LOOP exit successor bb:
533 if (exit_cond) goto bb3 else goto bb2
539 After guard creation (the loop before this function):
542 if (guard_condition) goto bb4 else goto bb1
544 if (exit_cond) goto bb4 else goto bb2
552 This function updates the phi nodes in bb4 and in bb3, to account for the
553 new edge from bb0 to bb4. */
556 update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
559 basic_block bb = loop->exit_edges[0]->dest;
561 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
566 /* Generate new phi node. */
567 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), bb);
569 /* Add argument coming from guard true edge. */
570 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
571 add_phi_arg (&new_phi, phi_arg, guard_true_edge);
573 /* Add argument coming from loop exit edge. */
574 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
575 add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
577 /* Update all phi nodes at the loop exit successor. */
578 for (phi1 = phi_nodes (EDGE_SUCC (bb, 0)->dest);
580 phi1 = PHI_CHAIN (phi1))
582 tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1, EDGE_SUCC (bb, 0));
583 if (old_arg == phi_arg)
585 edge e = EDGE_SUCC (bb, 0);
587 SET_PHI_ARG_DEF (phi1,
588 phi_arg_from_edge (phi1, e),
589 PHI_RESULT (new_phi));
594 set_phi_nodes (bb, phi_reverse (phi_nodes (bb)));
598 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
599 that starts at zero, increases by one and its limit is NITERS. */
602 make_loop_iterate_ntimes (struct loop *loop, tree niters,
603 tree begin_label, tree exit_label)
605 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
607 edge exit_edge = loop->exit_edges[0];
608 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
610 /* Flow loop scan does not update loop->single_exit field. */
611 loop->single_exit = loop->exit_edges[0];
612 orig_cond = get_loop_exit_condition (loop);
613 gcc_assert (orig_cond);
614 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
615 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
617 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
618 back to the exit condition statement. */
619 bsi_next (&loop_exit_bsi);
620 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
623 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
624 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
625 else /* 'then' edge loops back. */
626 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
628 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
629 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
630 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
631 begin_label, exit_label);
632 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
634 /* Remove old loop exit test: */
635 bsi_remove (&loop_exit_bsi);
637 if (vect_debug_stats (loop) || vect_debug_details (loop))
638 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
642 /* Given LOOP this function generates a new copy of it and puts it
643 on E which is either the entry or exit of LOOP. */
646 tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
649 struct loop *new_loop;
650 basic_block *new_bbs, *bbs;
653 basic_block exit_dest;
656 at_exit = (e == loop->exit_edges[0]);
657 if (!at_exit && e != loop_preheader_edge (loop))
659 if (dump_file && (dump_flags & TDF_DETAILS))
661 "Edge is not an entry nor an exit edge.\n");
665 bbs = get_loop_body (loop);
667 /* Check whether duplication is possible. */
668 if (!can_copy_bbs_p (bbs, loop->num_nodes))
670 if (vect_debug_stats (loop) || vect_debug_details (loop))
672 "Cannot copy basic blocks.\n");
677 /* Generate new loop structure. */
678 new_loop = duplicate_loop (loops, loop, loop->outer);
681 if (vect_debug_stats (loop) || vect_debug_details (loop))
683 "The duplicate_loop returns NULL.\n");
688 exit_dest = loop->exit_edges[0]->dest;
689 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
690 exit_dest) == loop->header ?
693 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
695 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
697 /* Duplicating phi args at exit bbs as coming
698 also from exit of duplicated loop. */
699 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
701 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
704 edge new_loop_exit_edge;
706 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
707 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
709 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
711 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
715 if (at_exit) /* Add the loop copy at exit. */
717 redirect_edge_and_branch_force (e, new_loop->header);
718 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
720 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
722 else /* Add the copy at entry. */
725 edge entry_e = loop_preheader_edge (loop);
726 basic_block preheader = entry_e->src;
728 if (!flow_bb_inside_loop_p (new_loop,
729 EDGE_SUCC (new_loop->header, 0)->dest))
730 new_exit_e = EDGE_SUCC (new_loop->header, 0);
732 new_exit_e = EDGE_SUCC (new_loop->header, 1);
734 redirect_edge_and_branch_force (new_exit_e, loop->header);
735 set_immediate_dominator (CDI_DOMINATORS, loop->header,
738 /* We have to add phi args to the loop->header here as coming
739 from new_exit_e edge. */
740 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
742 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
744 add_phi_arg (&phi, phi_arg, new_exit_e);
747 redirect_edge_and_branch_force (entry_e, new_loop->header);
748 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
751 flow_loop_scan (new_loop, LOOP_ALL);
752 flow_loop_scan (loop, LOOP_ALL);
760 /* Given the condition statement COND, put it as the last statement
761 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
762 Assumes that this is the single exit of the guarded loop.
763 Returns the skip edge. */
766 add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
768 block_stmt_iterator bsi;
770 tree cond_stmt, then_label, else_label;
772 enter_e = EDGE_SUCC (guard_bb, 0);
773 enter_e->flags &= ~EDGE_FALLTHRU;
774 enter_e->flags |= EDGE_FALSE_VALUE;
775 bsi = bsi_last (guard_bb);
777 then_label = build1 (GOTO_EXPR, void_type_node,
778 tree_block_label (exit_bb));
779 else_label = build1 (GOTO_EXPR, void_type_node,
780 tree_block_label (enter_e->dest));
781 cond_stmt = build (COND_EXPR, void_type_node, cond,
782 then_label, else_label);
783 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
784 /* Add new edge to connect entry block to the second loop. */
785 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
786 set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
791 /* This function verifies that certain restrictions apply to LOOP. */
794 verify_loop_for_duplication (struct loop *loop,
795 bool update_first_loop_count, edge e)
797 edge exit_e = loop->exit_edges [0];
798 edge entry_e = loop_preheader_edge (loop);
800 /* We duplicate only innermost loops. */
803 if (vect_debug_stats (loop) || vect_debug_details (loop))
805 "Loop duplication failed. Loop is not innermost.\n");
809 /* Only loops with 1 exit. */
810 if (loop->num_exits != 1)
812 if (vect_debug_stats (loop) || vect_debug_details (loop))
814 "More than one exit from loop.\n");
818 /* Only loops with 1 entry. */
819 if (loop->num_entries != 1)
821 if (vect_debug_stats (loop) || vect_debug_details (loop))
823 "More than one exit from loop.\n");
827 /* All loops has outers, the only case loop->outer is NULL is for
828 the function itself. */
831 if (vect_debug_stats (loop) || vect_debug_details (loop))
833 "Loop is outer-most loop.\n");
837 /* Verify that new IV can be created and loop condition
838 can be changed to make first loop iterate first_niters times. */
839 if (!update_first_loop_count)
841 tree orig_cond = get_loop_exit_condition (loop);
842 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
846 if (vect_debug_stats (loop) || vect_debug_details (loop))
848 "Loop has no exit condition.\n");
851 if (orig_cond != bsi_stmt (loop_exit_bsi))
853 if (vect_debug_stats (loop) || vect_debug_details (loop))
855 "Loop exit condition is not loop header last stmt.\n");
860 /* Make sure E is either an entry or an exit edge. */
861 if (e != exit_e && e != entry_e)
863 if (vect_debug_stats (loop) || vect_debug_details (loop))
865 "E is not loop entry or exit edge.\n");
873 /* Given LOOP this function duplicates it to the edge E.
875 This transformation takes place before the loop is vectorized.
876 For now, there are two main cases when it's used
877 by the vectorizer: to support loops with unknown loop bounds
878 (or loop bounds indivisible by vectorization factor) and to force the
879 alignment of data references in the loop. In the first case, LOOP is
880 duplicated to the exit edge, producing epilog loop. In the second case, LOOP
881 is duplicated to the preheader edge thus generating prolog loop. In both
882 cases, the original loop will be vectorized after the transformation.
884 The edge E is supposed to be either preheader edge of the LOOP or
885 its exit edge. If preheader edge is specified, the LOOP copy
886 will precede the original one. Otherwise the copy will be located
887 at the exit of the LOOP.
889 FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate
890 the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first
891 loop will be iterated FIRST_NITERS times by introducing additional
892 induction variable and replacing loop exit condition. If
893 UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and
894 the caller to tree_duplicate_loop_to_edge is responsible for updating
895 the first loop count.
897 NITERS (also SSA_NAME) parameter defines the number of iteration the
898 original loop iterated. The function generates two if-then guards:
899 one prior to the first loop and the other prior to the second loop.
900 The first guard will be:
902 if (FIRST_NITERS == 0) then skip the first loop
904 The second guard will be:
906 if (FIRST_NITERS == NITERS) then skip the second loop
908 Thus the equivalence to the original code is guaranteed by correct values
909 of NITERS and FIRST_NITERS and generation of if-then loop guards.
911 For now this function supports only loop forms that are candidate for
912 vectorization. Such types are the following:
914 (1) only innermost loops
915 (2) loops built from 2 basic blocks
916 (3) loops with one entry and one exit
917 (4) loops without function calls
918 (5) loops without defs that are used after the loop
920 (1), (3) are checked in this function; (2) - in function
921 vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
922 (5) is checked as part of the function vect_mark_stmts_to_be_vectorized,
923 when excluding induction/reduction support.
925 The function returns NULL in case one of these checks or
926 transformations failed. */
929 tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops,
930 edge e, tree first_niters,
931 tree niters, bool update_first_loop_count)
933 struct loop *new_loop = NULL, *first_loop, *second_loop;
937 basic_block first_exit_bb, second_exit_bb;
938 basic_block pre_header_bb;
939 edge exit_e = loop->exit_edges [0];
941 gcc_assert (!any_marked_for_rewrite_p ());
943 if (!verify_loop_for_duplication (loop, update_first_loop_count, e))
946 /* We have to initialize cfg_hooks. Then, when calling
947 cfg_hooks->split_edge, the function tree_split_edge
948 is actually called and, when calling cfg_hooks->duplicate_block,
949 the function tree_duplicate_bb is called. */
950 tree_register_cfg_hooks ();
952 /* 1. Generate a copy of LOOP and put it on E (entry or exit). */
953 if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
955 if (vect_debug_stats (loop) || vect_debug_details (loop))
957 "The tree_duplicate_loop_to_edge_cfg failed.\n");
961 definitions = marked_ssa_names ();
962 allocate_new_names (definitions);
963 update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
964 /* Here, using assumption (5), we do not propagate new names further
965 than on phis of the exit from the second loop. */
966 rename_variables_in_loop (new_loop);
967 free_new_names (definitions);
972 second_loop = new_loop;
976 first_loop = new_loop;
980 /* 2. Generate bb between the loops. */
981 first_exit_bb = split_edge (first_loop->exit_edges[0]);
982 add_bb_to_loop (first_exit_bb, first_loop->outer);
984 /* We need to update here first loop exit edge
985 and second loop preheader edge. */
986 flow_loop_scan (first_loop, LOOP_ALL);
987 flow_loop_scan (second_loop, LOOP_ALL);
989 /* 3. Make first loop iterate FIRST_NITERS times, if needed. */
990 if (!update_first_loop_count)
992 tree first_loop_latch_lbl = tree_block_label (first_loop->latch);
993 tree first_loop_exit_lbl = tree_block_label (first_exit_bb);
995 make_loop_iterate_ntimes (first_loop, first_niters,
996 first_loop_latch_lbl,
997 first_loop_exit_lbl);
1000 /* 4. Add the guard before first loop:
1002 if FIRST_NITERS == 0
1007 /* 4a. Generate bb before first loop. */
1008 pre_header_bb = split_edge (loop_preheader_edge (first_loop));
1009 add_bb_to_loop (pre_header_bb, first_loop->outer);
1011 /* First loop preheader edge is changed. */
1012 flow_loop_scan (first_loop, LOOP_ALL);
1014 /* 4b. Generate guard condition. */
1015 pre_condition = build (LE_EXPR, boolean_type_node,
1016 first_niters, integer_zero_node);
1018 /* 4c. Add condition at the end of preheader bb. */
1019 skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
1021 /* 4d. Update phis at first loop exit and propagate changes
1022 to the phis of second loop. */
1023 update_phi_nodes_for_guard (skip_e, first_loop);
1025 /* 5. Add the guard before second loop:
1027 if FIRST_NITERS == NITERS SKIP
1030 enter second loop */
1032 /* 5a. Generate empty bb at the exit from the second loop. */
1033 second_exit_bb = split_edge (second_loop->exit_edges[0]);
1034 add_bb_to_loop (second_exit_bb, second_loop->outer);
1036 /* Second loop preheader edge is changed. */
1037 flow_loop_scan (second_loop, LOOP_ALL);
1039 /* 5b. Generate guard condition. */
1040 pre_condition = build (EQ_EXPR, boolean_type_node,
1041 first_niters, niters);
1043 /* 5c. Add condition at the end of preheader bb. */
1044 skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
1045 update_phi_nodes_for_guard (skip_e, second_loop);
1047 BITMAP_XFREE (definitions);
1048 unmark_all_for_rewrite ();
1055 /* Here the proper Vectorizer starts. */
1057 /* Function new_stmt_vec_info.
1059 Create and initialize a new stmt_vec_info struct for STMT. */
1062 new_stmt_vec_info (tree stmt, struct loop *loop)
1065 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1067 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1068 STMT_VINFO_STMT (res) = stmt;
1069 STMT_VINFO_LOOP (res) = loop;
1070 STMT_VINFO_RELEVANT_P (res) = 0;
1071 STMT_VINFO_VECTYPE (res) = NULL;
1072 STMT_VINFO_VEC_STMT (res) = NULL;
1073 STMT_VINFO_DATA_REF (res) = NULL;
1074 STMT_VINFO_MEMTAG (res) = NULL;
1075 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1081 /* Function new_loop_vec_info.
1083 Create and initialize a new loop_vec_info struct for LOOP, as well as
1084 stmt_vec_info structs for all the stmts in LOOP. */
1087 new_loop_vec_info (struct loop *loop)
1091 block_stmt_iterator si;
1094 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1096 bbs = get_loop_body (loop);
1098 /* Create stmt_info for all stmts in the loop. */
1099 for (i = 0; i < loop->num_nodes; i++)
1101 basic_block bb = bbs[i];
1102 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1104 tree stmt = bsi_stmt (si);
1107 get_stmt_operands (stmt);
1108 ann = stmt_ann (stmt);
1109 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1113 LOOP_VINFO_LOOP (res) = loop;
1114 LOOP_VINFO_BBS (res) = bbs;
1115 LOOP_VINFO_EXIT_COND (res) = NULL;
1116 LOOP_VINFO_NITERS (res) = NULL;
1117 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1118 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1119 LOOP_VINFO_VECT_FACTOR (res) = 0;
1120 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1121 "loop_write_datarefs");
1122 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1123 "loop_read_datarefs");
1124 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1130 /* Function destroy_loop_vec_info.
1132 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1133 stmts in the loop. */
1136 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1141 block_stmt_iterator si;
1147 loop = LOOP_VINFO_LOOP (loop_vinfo);
1149 bbs = LOOP_VINFO_BBS (loop_vinfo);
1150 nbbs = loop->num_nodes;
1152 for (j = 0; j < nbbs; j++)
1154 basic_block bb = bbs[j];
1155 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1157 tree stmt = bsi_stmt (si);
1158 stmt_ann_t ann = stmt_ann (stmt);
1159 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1161 set_stmt_info (ann, NULL);
1165 free (LOOP_VINFO_BBS (loop_vinfo));
1166 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1167 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1173 /* Function debug_loop_stats.
1175 For vectorization statistics dumps. */
1178 vect_debug_stats (struct loop *loop)
1181 block_stmt_iterator si;
1182 tree node = NULL_TREE;
1184 if (!dump_file || !(dump_flags & TDF_STATS))
1189 fprintf (dump_file, "\n");
1198 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1200 node = bsi_stmt (si);
1201 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1205 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1206 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1208 fprintf (dump_file, "\nloop at %s:%d: ",
1209 EXPR_FILENAME (node), EXPR_LINENO (node));
1217 /* Function debug_loop_details.
1219 For vectorization debug dumps. */
1222 vect_debug_details (struct loop *loop)
1225 block_stmt_iterator si;
1226 tree node = NULL_TREE;
1228 if (!dump_file || !(dump_flags & TDF_DETAILS))
1233 fprintf (dump_file, "\n");
1242 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1244 node = bsi_stmt (si);
1245 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1249 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1250 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1252 fprintf (dump_file, "\nloop at %s:%d: ",
1253 EXPR_FILENAME (node), EXPR_LINENO (node));
1261 /* Function vect_get_ptr_offset
1263 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1266 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1267 tree vectype ATTRIBUTE_UNUSED,
1268 tree *offset ATTRIBUTE_UNUSED)
1270 /* TODO: Use alignment information. */
1275 /* Function vect_get_base_and_bit_offset
1277 Return the BASE of the data reference EXPR.
1278 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1279 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1280 bits of 'a.b[i] + 4B' from a.
1283 EXPR - the memory reference that is being analyzed
1284 DR - the data_reference struct of the _original_ memory reference
1285 (Note: DR_REF (DR) is not necessarily EXPR)
1286 VECTYPE - the type that defines the alignment (i.e, we compute
1287 alignment relative to TYPE_ALIGN(VECTYPE))
1290 BASE (returned value) - the base of the data reference EXPR.
1291 E.g, if EXPR is a.b[k].c[i][j] the returned
1293 OFFSET - offset of EXPR from BASE in bits
1294 BASE_ALIGNED_P - indicates if BASE is aligned
1296 If something unexpected is encountered (an unsupported form of data-ref),
1297 or if VECTYPE is given but OFFSET cannot be determined:
1298 then NULL_TREE is returned. */
1301 vect_get_base_and_bit_offset (struct data_reference *dr,
1304 loop_vec_info loop_vinfo,
1306 bool *base_aligned_p)
1308 tree this_offset = size_zero_node;
1309 tree base = NULL_TREE;
1311 tree oprnd0, oprnd1;
1312 struct data_reference *array_dr;
1313 enum tree_code code = TREE_CODE (expr);
1315 *base_aligned_p = false;
1319 /* These cases end the recursion: */
1321 *offset = size_zero_node;
1322 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1323 *base_aligned_p = true;
1330 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1333 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1335 base = vect_get_ptr_offset (expr, vectype, offset);
1337 *base_aligned_p = true;
1341 *base_aligned_p = true;
1342 *offset = size_zero_node;
1348 *offset = int_const_binop (MULT_EXPR, expr,
1349 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1352 /* These cases continue the recursion: */
1354 oprnd0 = TREE_OPERAND (expr, 0);
1355 oprnd1 = TREE_OPERAND (expr, 1);
1357 this_offset = bit_position (oprnd1);
1358 if (vectype && !host_integerp (this_offset, 1))
1364 oprnd0 = TREE_OPERAND (expr, 0);
1369 oprnd0 = TREE_OPERAND (expr, 0);
1374 if (DR_REF (dr) != expr)
1375 /* Build array data_reference struct if the existing DR_REF
1376 doesn't match EXPR. This happens, for example, when the
1377 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1378 contains information on the access of T, not of arr. In order
1379 to continue the analysis, we create a new DR struct that
1380 describes the access of arr.
1382 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1386 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1387 vectype, &this_offset);
1392 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1394 *offset = this_offset;
1395 *base_aligned_p = true;
1402 /* In case we have a PLUS_EXPR of the form
1403 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1404 This is verified in vect_get_symbl_and_dr. */
1405 oprnd0 = TREE_OPERAND (expr, 0);
1406 oprnd1 = TREE_OPERAND (expr, 1);
1408 base = vect_get_base_and_bit_offset
1409 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1410 if (vectype && !base)
1420 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1421 loop_vinfo, offset, base_aligned_p);
1423 if (vectype && base)
1425 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1426 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1429 if (vect_debug_details (NULL))
1431 print_generic_expr (dump_file, expr, TDF_SLIM);
1432 fprintf (dump_file, " --> total offset for ref: ");
1433 print_generic_expr (dump_file, *offset, TDF_SLIM);
1440 /* Function vect_force_dr_alignment_p.
1442 Returns whether the alignment of a DECL can be forced to be aligned
1443 on ALIGNMENT bit boundary. */
1446 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1448 if (TREE_CODE (decl) != VAR_DECL)
1451 if (DECL_EXTERNAL (decl))
1454 if (TREE_STATIC (decl))
1455 return (alignment <= MAX_OFILE_ALIGNMENT);
1457 /* This is not 100% correct. The absolute correct stack alignment
1458 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1459 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1460 However, until someone implements forced stack alignment, SSE
1461 isn't really usable without this. */
1462 return (alignment <= PREFERRED_STACK_BOUNDARY);
1466 /* Function vect_get_new_vect_var.
1468 Returns a name for a new variable. The current naming scheme appends the
1469 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1470 the name of vectorizer generated variables, and appends that to NAME if
1474 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1480 if (var_kind == vect_simple_var)
1485 prefix_len = strlen (prefix);
1488 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1490 new_vect_var = create_tmp_var (type, prefix);
1492 return new_vect_var;
1496 /* Function vect_create_index_for_vector_ref.
1498 Create (and return) an index variable, along with it's update chain in the
1499 loop. This variable will be used to access a memory location in a vector
1503 LOOP: The loop being vectorized.
1504 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1505 function can be added here, or in the loop pre-header.
1508 Return an index that will be used to index a vector array. It is expected
1509 that a pointer to the first vector will be used as the base address for the
1512 FORNOW: we are not trying to be efficient, just creating a new index each
1513 time from scratch. At this time all vector references could use the same
1516 TODO: create only one index to be used by all vector references. Record
1517 the index in the LOOP_VINFO the first time this procedure is called and
1518 return it on subsequent calls. The increment of this index must be placed
1519 just before the conditional expression that ends the single block loop. */
1522 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1525 tree indx_before_incr, indx_after_incr;
1527 /* It is assumed that the base pointer used for vectorized access contains
1528 the address of the first vector. Therefore the index used for vectorized
1529 access must be initialized to zero and incremented by 1. */
1531 init = integer_zero_node;
1532 step = integer_one_node;
1534 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1535 create_iv (init, step, NULL_TREE, loop, bsi, false,
1536 &indx_before_incr, &indx_after_incr);
1538 return indx_before_incr;
1542 /* Function vect_create_addr_base_for_vector_ref.
1544 Create an expression that computes the address of the first memory location
1545 that will be accessed for a data reference.
1548 STMT: The statement containing the data reference.
1549 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1550 OFFSET: Optional. If supplied, it is be added to the initial address.
1553 1. Return an SSA_NAME whose value is the address of the memory location of
1554 the first vector of the data reference.
1555 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1556 these statement(s) which define the returned SSA_NAME.
1558 FORNOW: We are only handling array accesses with step 1. */
1561 vect_create_addr_base_for_vector_ref (tree stmt,
1562 tree *new_stmt_list,
1565 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1566 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1567 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1568 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1569 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1570 tree ref = DR_REF (dr);
1571 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1572 tree scalar_type = TREE_TYPE (ref);
1573 tree scalar_ptr_type = build_pointer_type (scalar_type);
1575 tree init_val, step, init_oval;
1577 bool is_ptr_ref, is_array_ref, is_addr_expr;
1582 tree addr_base, addr_expr;
1583 tree dest, new_stmt;
1585 /* Only the access function of the last index is relevant (i_n in
1586 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1587 access_fn = DR_ACCESS_FN (dr, 0);
1588 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1591 init_oval = integer_zero_node;
1593 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1594 && TREE_CODE (data_ref_base) == SSA_NAME;
1595 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1596 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1597 || TREE_CODE (data_ref_base) == PLUS_EXPR
1598 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1599 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1601 /** Create: &(base[init_val])
1603 if data_ref_base is an ARRAY_TYPE:
1604 base = data_ref_base
1606 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1607 base = *((scalar_array *) data_ref_base)
1611 array_base = data_ref_base;
1612 else /* is_ptr_ref or is_addr_expr */
1614 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1615 tree scalar_array_type = build_array_type (scalar_type, 0);
1616 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1617 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1618 add_referenced_tmp_var (array_ptr);
1620 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1621 add_referenced_tmp_var (dest);
1623 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1624 append_to_statement_list_force (new_stmt, new_stmt_list);
1626 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1627 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1628 new_temp = make_ssa_name (array_ptr, vec_stmt);
1629 TREE_OPERAND (vec_stmt, 0) = new_temp;
1630 append_to_statement_list_force (vec_stmt, new_stmt_list);
1633 array_base = build_fold_indirect_ref (new_temp);
1636 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1637 add_referenced_tmp_var (dest);
1638 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1639 append_to_statement_list_force (new_stmt, new_stmt_list);
1643 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1644 add_referenced_tmp_var (tmp);
1645 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1646 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1647 init_val = make_ssa_name (tmp, vec_stmt);
1648 TREE_OPERAND (vec_stmt, 0) = init_val;
1649 append_to_statement_list_force (vec_stmt, new_stmt_list);
1652 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1653 NULL_TREE, NULL_TREE);
1654 addr_base = build_fold_addr_expr (array_ref);
1656 /* addr_expr = addr_base */
1657 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1658 get_name (base_name));
1659 add_referenced_tmp_var (addr_expr);
1660 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1661 new_temp = make_ssa_name (addr_expr, vec_stmt);
1662 TREE_OPERAND (vec_stmt, 0) = new_temp;
1663 append_to_statement_list_force (vec_stmt, new_stmt_list);
1669 /* Function get_vectype_for_scalar_type.
1671 Returns the vector type corresponding to SCALAR_TYPE as supported
1675 get_vectype_for_scalar_type (tree scalar_type)
1677 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1678 int nbytes = GET_MODE_SIZE (inner_mode);
1685 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1687 nunits = UNITS_PER_SIMD_WORD / nbytes;
1689 vectype = build_vector_type (scalar_type, nunits);
1690 if (vect_debug_details (NULL))
1692 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1693 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1699 if (vect_debug_details (NULL))
1701 fprintf (dump_file, "vectype: ");
1702 print_generic_expr (dump_file, vectype, TDF_SLIM);
1705 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1707 /* TODO: tree-complex.c sometimes can parallelize operations
1708 on generic vectors. We can vectorize the loop in that case,
1709 but then we should re-run the lowering pass. */
1710 if (vect_debug_details (NULL))
1711 fprintf (dump_file, "mode not supported by target.");
1719 /* Function vect_align_data_ref.
1721 Handle mislignment of a memory accesses.
1723 FORNOW: Can't handle misaligned accesses.
1724 Make sure that the dataref is aligned. */
1727 vect_align_data_ref (tree stmt)
1729 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1730 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1732 /* FORNOW: can't handle misaligned accesses;
1733 all accesses expected to be aligned. */
1734 gcc_assert (aligned_access_p (dr));
1738 /* Function vect_create_data_ref_ptr.
1740 Create a memory reference expression for vector access, to be used in a
1741 vector load/store stmt. The reference is based on a new pointer to vector
1745 1. STMT: a stmt that references memory. Expected to be of the form
1746 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1747 2. BSI: block_stmt_iterator where new stmts can be added.
1748 3. OFFSET (optional): an offset to be added to the initial address accessed
1749 by the data-ref in STMT.
1750 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1751 pointing to the initial address.
1754 1. Declare a new ptr to vector_type, and have it point to the base of the
1755 data reference (initial addressed accessed by the data reference).
1756 For example, for vector of type V8HI, the following code is generated:
1759 vp = (v8hi *)initial_address;
1761 if OFFSET is not supplied:
1762 initial_address = &a[init];
1763 if OFFSET is supplied:
1764 initial_address = &a[init + OFFSET];
1766 Return the initial_address in INITIAL_ADDRESS.
1768 2. Create a data-reference in the loop based on the new vector pointer vp,
1769 and using a new index variable 'idx' as follows:
1773 where if ONLY_INIT is true:
1776 update = idx + vector_type_size
1778 Return the pointer vp'.
1781 FORNOW: handle only aligned and consecutive accesses. */
1784 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1785 tree *initial_address, bool only_init)
1788 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1789 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1790 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1791 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1795 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1796 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1797 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1798 int nvuses, nv_may_defs, nv_must_defs;
1802 tree new_stmt_list = NULL_TREE;
1804 edge pe = loop_preheader_edge (loop);
1811 base_name = unshare_expr (DR_BASE_NAME (dr));
1812 if (vect_debug_details (NULL))
1814 tree data_ref_base = base_name;
1815 fprintf (dump_file, "create array_ref of type: ");
1816 print_generic_expr (dump_file, vectype, TDF_SLIM);
1817 if (TREE_CODE (data_ref_base) == VAR_DECL)
1818 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1819 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1820 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1821 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1822 fprintf (dump_file, "vectorizing a record based array ref: ");
1823 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1824 fprintf (dump_file, "vectorizing a pointer ref: ");
1825 print_generic_expr (dump_file, base_name, TDF_SLIM);
1828 /** (1) Create the new vector-pointer variable: **/
1830 vect_ptr_type = build_pointer_type (vectype);
1831 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1832 get_name (base_name));
1833 add_referenced_tmp_var (vect_ptr);
1836 /** (2) Handle aliasing information of the new vector-pointer: **/
1838 tag = STMT_VINFO_MEMTAG (stmt_info);
1840 get_var_ann (vect_ptr)->type_mem_tag = tag;
1842 /* Mark for renaming all aliased variables
1843 (i.e, the may-aliases of the type-mem-tag). */
1844 nvuses = NUM_VUSES (vuses);
1845 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1846 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1847 for (i = 0; i < nvuses; i++)
1849 tree use = VUSE_OP (vuses, i);
1850 if (TREE_CODE (use) == SSA_NAME)
1851 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1853 for (i = 0; i < nv_may_defs; i++)
1855 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1856 if (TREE_CODE (def) == SSA_NAME)
1857 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1859 for (i = 0; i < nv_must_defs; i++)
1861 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1862 if (TREE_CODE (def) == SSA_NAME)
1863 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1867 /** (3) Calculate the initial address the vector-pointer, and set
1868 the vector-pointer to point to it before the loop: **/
1870 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1871 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1873 pe = loop_preheader_edge (loop);
1874 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1875 gcc_assert (!new_bb);
1876 *initial_address = new_temp;
1878 /* Create: p = (vectype *) initial_base */
1879 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1880 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1881 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1882 TREE_OPERAND (vec_stmt, 0) = new_temp;
1883 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1884 gcc_assert (!new_bb);
1885 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1888 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1890 if (only_init) /* No update in loop is required. */
1891 return vect_ptr_init;
1893 idx = vect_create_index_for_vector_ref (loop, bsi);
1895 /* Create: update = idx * vectype_size */
1896 ptr_update = create_tmp_var (integer_type_node, "update");
1897 add_referenced_tmp_var (ptr_update);
1898 vectype_size = build_int_cst (integer_type_node,
1899 GET_MODE_SIZE (TYPE_MODE (vectype)));
1900 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1901 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1902 new_temp = make_ssa_name (ptr_update, vec_stmt);
1903 TREE_OPERAND (vec_stmt, 0) = new_temp;
1904 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1906 /* Create: data_ref_ptr = vect_ptr_init + update */
1907 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1908 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1909 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1910 TREE_OPERAND (vec_stmt, 0) = new_temp;
1911 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1912 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1914 return data_ref_ptr;
1918 /* Function vect_create_destination_var.
1920 Create a new temporary of type VECTYPE. */
1923 vect_create_destination_var (tree scalar_dest, tree vectype)
1926 const char *new_name;
1928 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1930 new_name = get_name (scalar_dest);
1933 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1934 add_referenced_tmp_var (vec_dest);
1940 /* Function vect_init_vector.
1942 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1943 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1944 used in the vectorization of STMT. */
1947 vect_init_vector (tree stmt, tree vector_var)
1949 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1950 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1953 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1959 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1960 add_referenced_tmp_var (new_var);
1962 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1963 new_temp = make_ssa_name (new_var, init_stmt);
1964 TREE_OPERAND (init_stmt, 0) = new_temp;
1966 pe = loop_preheader_edge (loop);
1967 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1968 gcc_assert (!new_bb);
1970 if (vect_debug_details (NULL))
1972 fprintf (dump_file, "created new init_stmt: ");
1973 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1976 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1981 /* Function vect_get_vec_def_for_operand.
1983 OP is an operand in STMT. This function returns a (vector) def that will be
1984 used in the vectorized stmt for STMT.
1986 In the case that OP is an SSA_NAME which is defined in the loop, then
1987 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1989 In case OP is an invariant or constant, a new stmt that creates a vector def
1990 needs to be introduced. */
1993 vect_get_vec_def_for_operand (tree op, tree stmt)
1998 stmt_vec_info def_stmt_info = NULL;
1999 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2000 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2001 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2002 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2009 if (vect_debug_details (NULL))
2011 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2012 print_generic_expr (dump_file, op, TDF_SLIM);
2015 /** ===> Case 1: operand is a constant. **/
2017 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2019 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2023 /* Build a tree with vector elements. */
2024 if (vect_debug_details (NULL))
2025 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2027 for (i = nunits - 1; i >= 0; --i)
2029 t = tree_cons (NULL_TREE, op, t);
2031 vec_cst = build_vector (vectype, t);
2032 return vect_init_vector (stmt, vec_cst);
2035 gcc_assert (TREE_CODE (op) == SSA_NAME);
2037 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2039 def_stmt = SSA_NAME_DEF_STMT (op);
2040 def_stmt_info = vinfo_for_stmt (def_stmt);
2042 if (vect_debug_details (NULL))
2044 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2045 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2049 /** ==> Case 2.1: operand is defined inside the loop. **/
2053 /* Get the def from the vectorized stmt. */
2055 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2056 gcc_assert (vec_stmt);
2057 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2062 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2063 it is a reduction/induction. **/
2065 bb = bb_for_stmt (def_stmt);
2066 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2068 if (vect_debug_details (NULL))
2069 fprintf (dump_file, "reduction/induction - unsupported.");
2070 internal_error ("no support for reduction/induction"); /* FORNOW */
2074 /** ==> Case 2.3: operand is defined outside the loop -
2075 it is a loop invariant. */
2077 switch (TREE_CODE (def_stmt))
2080 def = PHI_RESULT (def_stmt);
2083 def = TREE_OPERAND (def_stmt, 0);
2086 def = TREE_OPERAND (def_stmt, 0);
2087 gcc_assert (IS_EMPTY_STMT (def_stmt));
2091 if (vect_debug_details (NULL))
2093 fprintf (dump_file, "unsupported defining stmt: ");
2094 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2096 internal_error ("unsupported defining stmt");
2099 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2101 if (vect_debug_details (NULL))
2102 fprintf (dump_file, "Create vector_inv.");
2104 for (i = nunits - 1; i >= 0; --i)
2106 t = tree_cons (NULL_TREE, def, t);
2109 vec_inv = build_constructor (vectype, t);
2110 return vect_init_vector (stmt, vec_inv);
2114 /* Function vect_finish_stmt_generation.
2116 Insert a new stmt. */
2119 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2121 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2123 if (vect_debug_details (NULL))
2125 fprintf (dump_file, "add new stmt: ");
2126 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2129 /* Make sure bsi points to the stmt that is being vectorized. */
2131 /* Assumption: any stmts created for the vectorization of stmt S were
2132 inserted before S. BSI is expected to point to S or some new stmt before S. */
2134 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2136 gcc_assert (stmt == bsi_stmt (*bsi));
2140 /* Function vectorizable_assignment.
2142 Check if STMT performs an assignment (copy) that can be vectorized.
2143 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2144 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2145 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2148 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2154 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2155 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2156 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2159 /* Is vectorizable assignment? */
2161 if (TREE_CODE (stmt) != MODIFY_EXPR)
2164 scalar_dest = TREE_OPERAND (stmt, 0);
2165 if (TREE_CODE (scalar_dest) != SSA_NAME)
2168 op = TREE_OPERAND (stmt, 1);
2169 if (!vect_is_simple_use (op, loop, NULL))
2171 if (vect_debug_details (NULL))
2172 fprintf (dump_file, "use not simple.");
2176 if (!vec_stmt) /* transformation not required. */
2178 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2183 if (vect_debug_details (NULL))
2184 fprintf (dump_file, "transform assignment.");
2187 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2190 op = TREE_OPERAND (stmt, 1);
2191 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2193 /* Arguments are ready. create the new vector stmt. */
2194 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2195 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2196 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2197 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2203 /* Function vectorizable_operation.
2205 Check if STMT performs a binary or unary operation that can be vectorized.
2206 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2207 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2208 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2211 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2216 tree op0, op1 = NULL;
2217 tree vec_oprnd0, vec_oprnd1=NULL;
2218 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2219 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2220 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2222 enum tree_code code;
2223 enum machine_mode vec_mode;
2229 /* Is STMT a vectorizable binary/unary operation? */
2230 if (TREE_CODE (stmt) != MODIFY_EXPR)
2233 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2236 operation = TREE_OPERAND (stmt, 1);
2237 code = TREE_CODE (operation);
2238 optab = optab_for_tree_code (code, vectype);
2240 /* Support only unary or binary operations. */
2241 op_type = TREE_CODE_LENGTH (code);
2242 if (op_type != unary_op && op_type != binary_op)
2244 if (vect_debug_details (NULL))
2245 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2249 for (i = 0; i < op_type; i++)
2251 op = TREE_OPERAND (operation, i);
2252 if (!vect_is_simple_use (op, loop, NULL))
2254 if (vect_debug_details (NULL))
2255 fprintf (dump_file, "use not simple.");
2260 /* Supportable by target? */
2263 if (vect_debug_details (NULL))
2264 fprintf (dump_file, "no optab.");
2267 vec_mode = TYPE_MODE (vectype);
2268 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2270 if (vect_debug_details (NULL))
2271 fprintf (dump_file, "op not supported by target.");
2275 if (!vec_stmt) /* transformation not required. */
2277 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2283 if (vect_debug_details (NULL))
2284 fprintf (dump_file, "transform binary/unary operation.");
2287 scalar_dest = TREE_OPERAND (stmt, 0);
2288 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2291 op0 = TREE_OPERAND (operation, 0);
2292 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2294 if (op_type == binary_op)
2296 op1 = TREE_OPERAND (operation, 1);
2297 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2300 /* Arguments are ready. create the new vector stmt. */
2302 if (op_type == binary_op)
2303 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2304 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2306 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2307 build1 (code, vectype, vec_oprnd0));
2308 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2309 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2310 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2316 /* Function vectorizable_store.
2318 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2320 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2321 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2322 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2325 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2331 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2332 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2333 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2334 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2335 enum machine_mode vec_mode;
2337 enum dr_alignment_support alignment_support_cheme;
2339 /* Is vectorizable store? */
2341 if (TREE_CODE (stmt) != MODIFY_EXPR)
2344 scalar_dest = TREE_OPERAND (stmt, 0);
2345 if (TREE_CODE (scalar_dest) != ARRAY_REF
2346 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2349 op = TREE_OPERAND (stmt, 1);
2350 if (!vect_is_simple_use (op, loop, NULL))
2352 if (vect_debug_details (NULL))
2353 fprintf (dump_file, "use not simple.");
2357 vec_mode = TYPE_MODE (vectype);
2358 /* FORNOW. In some cases can vectorize even if data-type not supported
2359 (e.g. - array initialization with 0). */
2360 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2363 if (!STMT_VINFO_DATA_REF (stmt_info))
2367 if (!vec_stmt) /* transformation not required. */
2369 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2375 if (vect_debug_details (NULL))
2376 fprintf (dump_file, "transform store");
2378 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2379 gcc_assert (alignment_support_cheme);
2380 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2382 /* Handle use - get the vectorized def from the defining stmt. */
2383 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2386 /* FORNOW: make sure the data reference is aligned. */
2387 vect_align_data_ref (stmt);
2388 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2389 data_ref = build_fold_indirect_ref (data_ref);
2391 /* Arguments are ready. create the new vector stmt. */
2392 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2393 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2399 /* vectorizable_load.
2401 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2403 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2404 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2405 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2408 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2411 tree vec_dest = NULL;
2412 tree data_ref = NULL;
2414 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2415 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2416 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2423 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2424 edge pe = loop_preheader_edge (loop);
2425 enum dr_alignment_support alignment_support_cheme;
2427 /* Is vectorizable load? */
2429 if (TREE_CODE (stmt) != MODIFY_EXPR)
2432 scalar_dest = TREE_OPERAND (stmt, 0);
2433 if (TREE_CODE (scalar_dest) != SSA_NAME)
2436 op = TREE_OPERAND (stmt, 1);
2437 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2440 if (!STMT_VINFO_DATA_REF (stmt_info))
2443 mode = (int) TYPE_MODE (vectype);
2445 /* FORNOW. In some cases can vectorize even if data-type not supported
2446 (e.g. - data copies). */
2447 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2449 if (vect_debug_details (loop))
2450 fprintf (dump_file, "Aligned load, but unsupported type.");
2454 if (!vec_stmt) /* transformation not required. */
2456 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2462 if (vect_debug_details (NULL))
2463 fprintf (dump_file, "transform load.");
2465 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2466 gcc_assert (alignment_support_cheme);
2468 if (alignment_support_cheme == dr_aligned
2469 || alignment_support_cheme == dr_unaligned_supported)
2480 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2481 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2482 if (aligned_access_p (dr))
2483 data_ref = build_fold_indirect_ref (data_ref);
2486 int mis = DR_MISALIGNMENT (dr);
2487 tree tmis = (mis == -1 ?
2489 build_int_cst (integer_type_node, mis));
2490 tmis = int_const_binop (MULT_EXPR, tmis,
2491 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2492 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2494 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2495 new_temp = make_ssa_name (vec_dest, new_stmt);
2496 TREE_OPERAND (new_stmt, 0) = new_temp;
2497 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2499 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2503 msq_init = *(floor(p1))
2504 p2 = initial_addr + VS - 1;
2505 magic = have_builtin ? builtin_result : initial_address;
2508 p2' = p2 + indx * vectype_size
2510 vec_dest = realign_load (msq, lsq, magic)
2524 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2525 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2526 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2528 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2529 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2530 new_temp = make_ssa_name (vec_dest, new_stmt);
2531 TREE_OPERAND (new_stmt, 0) = new_temp;
2532 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2533 gcc_assert (!new_bb);
2534 msq_init = TREE_OPERAND (new_stmt, 0);
2537 /* <2> Create lsq = *(floor(p2')) in the loop */
2538 offset = build_int_cst (integer_type_node,
2539 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2540 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2541 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2542 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2543 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2544 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2545 new_temp = make_ssa_name (vec_dest, new_stmt);
2546 TREE_OPERAND (new_stmt, 0) = new_temp;
2547 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2548 lsq = TREE_OPERAND (new_stmt, 0);
2552 if (targetm.vectorize.builtin_mask_for_load)
2554 /* Create permutation mask, if required, in loop preheader. */
2556 params = build_tree_list (NULL_TREE, init_addr);
2557 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2558 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2559 new_stmt = build_function_call_expr (builtin_decl, params);
2560 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2561 new_temp = make_ssa_name (vec_dest, new_stmt);
2562 TREE_OPERAND (new_stmt, 0) = new_temp;
2563 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2564 gcc_assert (!new_bb);
2565 magic = TREE_OPERAND (new_stmt, 0);
2569 /* Use current address instead of init_addr for reduced reg pressure.
2571 magic = dataref_ptr;
2575 /* <4> Create msq = phi <msq_init, lsq> in loop */
2576 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2577 msq = make_ssa_name (vec_dest, NULL_TREE);
2578 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2579 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2580 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2581 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2584 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2585 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2586 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2587 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2588 new_temp = make_ssa_name (vec_dest, new_stmt);
2589 TREE_OPERAND (new_stmt, 0) = new_temp;
2590 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2595 *vec_stmt = new_stmt;
2600 /* Function vect_supportable_dr_alignment
2602 Return whether the data reference DR is supported with respect to its
2605 static enum dr_alignment_support
2606 vect_supportable_dr_alignment (struct data_reference *dr)
2608 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2609 enum machine_mode mode = (int) TYPE_MODE (vectype);
2611 if (aligned_access_p (dr))
2614 /* Possibly unaligned access. */
2616 if (DR_IS_READ (dr))
2618 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2619 && (!targetm.vectorize.builtin_mask_for_load
2620 || targetm.vectorize.builtin_mask_for_load ()))
2621 return dr_unaligned_software_pipeline;
2623 if (targetm.vectorize.misaligned_mem_ok (mode))
2624 /* Can't software pipeline the loads. */
2625 return dr_unaligned_supported;
2629 return dr_unaligned_unsupported;
2633 /* Function vect_transform_stmt.
2635 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2638 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2640 bool is_store = false;
2641 tree vec_stmt = NULL_TREE;
2642 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2645 switch (STMT_VINFO_TYPE (stmt_info))
2647 case op_vec_info_type:
2648 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2652 case assignment_vec_info_type:
2653 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2657 case load_vec_info_type:
2658 done = vectorizable_load (stmt, bsi, &vec_stmt);
2662 case store_vec_info_type:
2663 done = vectorizable_store (stmt, bsi, &vec_stmt);
2668 if (vect_debug_details (NULL))
2669 fprintf (dump_file, "stmt not supported.");
2673 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2679 /* This function builds ni_name = number of iterations loop executes
2680 on the loop preheader. */
2683 vect_build_loop_niters (loop_vec_info loop_vinfo)
2685 tree ni_name, stmt, var;
2687 basic_block new_bb = NULL;
2688 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2689 tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2691 var = create_tmp_var (TREE_TYPE (ni), "niters");
2692 add_referenced_tmp_var (var);
2693 if (TREE_CODE (ni) == INTEGER_CST)
2695 /* This case is generated when treating a known loop bound
2696 indivisible by VF. Here we cannot use force_gimple_operand. */
2697 stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2698 ni_name = make_ssa_name (var, stmt);
2699 TREE_OPERAND (stmt, 0) = ni_name;
2702 ni_name = force_gimple_operand (ni, &stmt, false, var);
2704 pe = loop_preheader_edge (loop);
2706 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2708 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2714 /* This function generates the following statements:
2716 ni_name = number of iterations loop executes
2717 ratio = ni_name / vf
2718 ratio_mult_vf_name = ratio * vf
2720 and places them at the loop preheader edge. */
2723 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2724 tree *ratio_mult_vf_name_p, tree *ratio_p)
2731 tree ratio_mult_vf_name, ratio_mult_vf;
2732 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2733 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2737 /* Generate temporary variable that contains
2738 number of iterations loop executes. */
2740 ni_name = vect_build_loop_niters (loop_vinfo);
2743 vf is power of 2; then if ratio = = n >> log2 (vf). */
2744 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2745 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2747 /* Update initial conditions of loop copy. */
2749 /* ratio_mult_vf = ratio * vf;
2750 then if ratio_mult_vf = ratio << log2 (vf). */
2752 i = exact_log2 (vf);
2753 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2754 add_referenced_tmp_var (ratio_mult_vf);
2756 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2758 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2759 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2760 ratio, build_int_cst (unsigned_type_node,
2763 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2765 pe = loop_preheader_edge (loop);
2766 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2768 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2770 *ni_name_p = ni_name;
2771 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2778 /* This function generates stmt
2782 and attaches it to preheader of LOOP. */
2785 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2787 tree var, stmt, var_name;
2792 /* create temporary variable */
2793 var = create_tmp_var (TREE_TYPE (n), "bnd");
2794 add_referenced_tmp_var (var);
2796 var_name = make_ssa_name (var, NULL_TREE);
2798 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2800 i = exact_log2 (vf);
2801 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2802 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2803 n, build_int_cst (unsigned_type_node,i)));
2805 SSA_NAME_DEF_STMT (var_name) = stmt;
2807 pe = loop_preheader_edge (loop);
2808 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2810 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2812 if (vect_debug_details (NULL))
2813 fprintf (dump_file, "New bb on preheader edge was not generated.");
2819 /* Function vect_transform_loop_bound.
2821 Create a new exit condition for the loop. */
2824 vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2826 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2827 edge exit_edge = loop->single_exit;
2828 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
2829 tree indx_before_incr, indx_after_incr;
2830 tree orig_cond_expr;
2831 HOST_WIDE_INT old_N = 0;
2834 tree new_loop_bound;
2839 symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2842 old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2844 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2846 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2847 #ifdef ENABLE_CHECKING
2848 gcc_assert (orig_cond_expr);
2850 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
2852 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
2853 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
2855 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
2856 to point to the exit condition. */
2857 bsi_next (&loop_exit_bsi);
2858 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
2860 /* new loop exit test: */
2861 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
2863 new_loop_bound = fold_convert (lb_type,
2864 build_int_cst (unsigned_type_node,
2867 new_loop_bound = niters;
2869 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
2870 cond = build2 (GE_EXPR, boolean_type_node,
2871 indx_after_incr, new_loop_bound);
2872 else /* 'then' edge loops back. */
2873 cond = build2 (LT_EXPR, boolean_type_node,
2874 indx_after_incr, new_loop_bound);
2876 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
2877 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
2879 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
2881 /* remove old loop exit test: */
2882 bsi_remove (&loop_exit_bsi);
2884 if (vect_debug_details (NULL))
2885 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
2889 /* Function vect_update_ivs_after_vectorizer.
2891 "Advance" the induction variables of LOOP to the value they should take
2892 after the execution of LOOP. This is currently necessary because the
2893 vectorizer does not handle induction variables that are used after the
2894 loop. Such a situation occurs when the last iterations of LOOP are
2896 1. We introduced new uses after LOOP for IVs that were not originally used
2897 after LOOP: the IVs of LOOP are now used by an epilog loop.
2898 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2899 times, whereas the loop IVs should be bumped N times.
2902 - LOOP - a loop that is going to be vectorized. The last few iterations
2903 of LOOP were peeled.
2904 - NITERS - the number of iterations that LOOP executes (before it is
2905 vectorized). i.e, the number of times the ivs should be bumped.
2910 if (guard-cond) GOTO bb_before_epilog_loop
2917 bb_before_epilog_loop:
2919 bb_before_epilog_loop has edges coming in form the loop exit and
2920 from bb_before_loop. New definitions for ivs will be placed on the edge
2921 from loop->exit to bb_before_epilog_loop. This also requires that we update
2922 the phis in bb_before_epilog_loop. (In the code this bb is denoted
2925 Assumption 1: Like the rest of the vectorizer, this function assumes
2926 a single loop exit that has a single predecessor.
2928 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2929 organized in the same order.
2931 Assumption 3: The access function of the ivs is simple enough (see
2932 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2936 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
2938 edge exit = loop->exit_edges[0];
2940 basic_block update_bb = exit->dest;
2943 /* Generate basic block at the exit from the loop. */
2944 basic_block new_bb = split_edge (exit);
2946 add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
2947 loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
2948 update_e = EDGE_SUCC (new_bb, 0);
2950 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2952 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2954 tree access_fn = NULL;
2955 tree evolution_part;
2958 tree var, stmt, ni, ni_name;
2959 block_stmt_iterator last_bsi;
2961 /* Skip virtual phi's. The data dependences that are associated with
2962 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2964 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2966 if (vect_debug_details (NULL))
2967 fprintf (dump_file, "virtual phi. skip.");
2971 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2972 gcc_assert (access_fn);
2974 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2976 /* FORNOW: We do not transform initial conditions of IVs
2977 which evolution functions are a polynomial of degree >= 2 or
2979 gcc_assert (!tree_is_chrec (evolution_part));
2981 step_expr = evolution_part;
2982 init_expr = unshare_expr (initial_condition (access_fn));
2984 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2985 build2 (MULT_EXPR, TREE_TYPE (niters),
2986 niters, step_expr), init_expr);
2988 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2989 add_referenced_tmp_var (var);
2991 ni_name = force_gimple_operand (ni, &stmt, false, var);
2993 /* Insert stmt into new_bb. */
2994 last_bsi = bsi_last (new_bb);
2996 bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);
2998 /* Fix phi expressions in duplicated loop. */
2999 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3000 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3001 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3006 /* This function is the main driver of transformation
3007 to be done for loop before vectorizing it in case of
3008 unknown loop bound. */
3011 vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
3012 struct loops *loops)
3015 tree ni_name, ratio_mult_vf_name;
3016 #ifdef ENABLE_CHECKING
3019 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3020 struct loop *new_loop;
3022 if (vect_debug_details (NULL))
3023 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3025 /* Generate the following variables on the preheader of original loop:
3027 ni_name = number of iteration the original loop executes
3028 ratio = ni_name / vf
3029 ratio_mult_vf_name = ratio * vf */
3030 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3031 &ratio_mult_vf_name, ratio);
3033 /* Update loop info. */
3034 loop->pre_header = loop_preheader_edge (loop)->src;
3035 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3037 #ifdef ENABLE_CHECKING
3038 loop_num = loop->num;
3040 new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0],
3041 ratio_mult_vf_name, ni_name, true);
3042 #ifdef ENABLE_CHECKING
3043 gcc_assert (new_loop);
3044 gcc_assert (loop_num == loop->num);
3047 /* Update IVs of original loop as if they were advanced
3048 by ratio_mult_vf_name steps. */
3050 #ifdef ENABLE_CHECKING
3051 /* Check existence of intermediate bb. */
3052 gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
3054 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name);
3061 /* Function vect_gen_niters_for_prolog_loop
3063 Set the number of iterations for the loop represented by LOOP_VINFO
3064 to the minimum between NITERS (the original iteration count of the loop)
3065 and the misalignment of DR - the first data reference recorded in
3066 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3067 this loop, the data reference DR will refer to an aligned location. */
3070 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
3072 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3073 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3074 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3076 tree iters, iters_name;
3079 tree dr_stmt = DR_STMT (dr);
3080 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3081 tree start_addr, byte_miss_align, elem_miss_align;
3082 int vec_type_align =
3083 GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3086 tree new_stmt_list = NULL_TREE;
3088 start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3089 &new_stmt_list, NULL_TREE);
3091 pe = loop_preheader_edge (loop);
3092 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3094 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3097 build (BIT_AND_EXPR, integer_type_node, start_addr,
3098 build (MINUS_EXPR, integer_type_node,
3099 build_int_cst (unsigned_type_node,
3100 vec_type_align), integer_one_node));
3101 tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3102 elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3103 byte_miss_align, tmp1);
3106 build (BIT_AND_EXPR, integer_type_node,
3107 build (MINUS_EXPR, integer_type_node,
3108 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3109 build (MINUS_EXPR, integer_type_node,
3110 build_int_cst (unsigned_type_node, vf), integer_one_node));
3112 iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3113 var = create_tmp_var (TREE_TYPE (iters), "iters");
3114 add_referenced_tmp_var (var);
3115 iters_name = force_gimple_operand (iters, &stmt, false, var);
3117 /* Insert stmt on loop preheader edge. */
3118 pe = loop_preheader_edge (loop);
3120 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3122 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3128 /* Function vect_update_niters_after_peeling
3130 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3131 The new number of iterations is therefore original_niters - NITERS.
3132 Record the new number of iterations in LOOP_VINFO. */
3135 vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters)
3137 tree n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3138 LOOP_VINFO_NITERS (loop_vinfo) =
3139 build (MINUS_EXPR, integer_type_node, n_iters, niters);
3143 /* Function vect_update_inits_of_dr
3145 NITERS iterations were peeled from LOOP. DR represents a data reference
3146 in LOOP. This function updates the information recorded in DR to
3147 account for the fact that the first NITERS iterations had already been
3148 executed. Specifically, it updates the initial_condition of the
3149 access_function of DR. */
3152 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3155 tree access_fn = DR_ACCESS_FN (dr, 0);
3156 tree init, init_new, step;
3158 step = evolution_part_in_loop_num (access_fn, loop->num);
3159 init = initial_condition (access_fn);
3161 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3162 build (MULT_EXPR, TREE_TYPE (niters),
3163 niters, step), init);
3164 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3170 /* Function vect_update_inits_of_drs
3172 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3173 This function updates the information recorded for the data references in
3174 the loop to account for the fact that the first NITERS iterations had
3175 already been executed. Specifically, it updates the initial_condition of the
3176 access_function of all the data_references in the loop. */
3179 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3182 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3183 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3184 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3186 if (dump_file && (dump_flags & TDF_DETAILS))
3187 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3189 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3191 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3192 vect_update_inits_of_dr (dr, loop, niters);
3195 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3197 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3198 vect_update_inits_of_dr (dr, loop, niters);
3203 /* Function vect_do_peeling_for_alignment
3205 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3206 'niters' is set to the misalignment of one of the data references in the
3207 loop, thereby forcing it to refer to an aligned location at the beginning
3208 of the execution of this loop. The data reference for which we are
3209 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3212 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3214 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3215 tree niters_of_prolog_loop, ni_name;
3217 if (vect_debug_details (NULL))
3218 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3220 ni_name = vect_build_loop_niters (loop_vinfo);
3221 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3224 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3225 tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop),
3226 niters_of_prolog_loop, ni_name, false);
3228 /* Update number of times loop executes. */
3229 vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop);
3231 /* Update all inits of access functions of all data refs. */
3232 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3234 /* After peeling we have to reset scalar evolution analyzer. */
3241 /* Function vect_transform_loop.
3243 The analysis phase has determined that the loop is vectorizable.
3244 Vectorize the loop - created vectorized stmts to replace the scalar
3245 stmts in the loop, and update the loop exit condition. */
3248 vect_transform_loop (loop_vec_info loop_vinfo,
3249 struct loops *loops ATTRIBUTE_UNUSED)
3251 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3252 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3253 int nbbs = loop->num_nodes;
3254 block_stmt_iterator si;
3257 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3259 if (vect_debug_details (NULL))
3260 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3263 /* Peel the loop if there are data refs with unknown alignment.
3264 Only one data ref with unknown store is allowed. */
3267 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3268 vect_do_peeling_for_alignment (loop_vinfo, loops);
3270 /* If the loop has a symbolic number of iterations 'n'
3271 (i.e. it's not a compile time constant),
3272 then an epilog loop needs to be created. We therefore duplicate
3273 the initial loop. The original loop will be vectorized, and will compute
3274 the first (n/VF) iterations. The second copy of the loop will remain
3275 serial and will compute the remaining (n%VF) iterations.
3276 (VF is the vectorization factor). */
3278 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3279 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3281 /* FORNOW: we'll treat the case where niters is constant and
3285 in the way similar to one with symbolic niters.
3286 For this we'll generate variable which value is equal to niters. */
3288 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3289 && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3290 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3293 /* 1) Make sure the loop header has exactly two entries
3294 2) Make sure we have a preheader basic block. */
3296 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3298 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3301 /* FORNOW: the vectorizer supports only loops which body consist
3302 of one basic block (header + empty latch). When the vectorizer will
3303 support more involved loop forms, the order by which the BBs are
3304 traversed need to be reconsidered. */
3306 for (i = 0; i < nbbs; i++)
3308 basic_block bb = bbs[i];
3310 for (si = bsi_start (bb); !bsi_end_p (si);)
3312 tree stmt = bsi_stmt (si);
3313 stmt_vec_info stmt_info;
3316 if (vect_debug_details (NULL))
3318 fprintf (dump_file, "------>vectorizing statement: ");
3319 print_generic_expr (dump_file, stmt, TDF_SLIM);
3321 stmt_info = vinfo_for_stmt (stmt);
3322 gcc_assert (stmt_info);
3323 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3328 #ifdef ENABLE_CHECKING
3329 /* FORNOW: Verify that all stmts operate on the same number of
3330 units and no inner unrolling is necessary. */
3332 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3333 == vectorization_factor);
3335 /* -------- vectorize statement ------------ */
3336 if (vect_debug_details (NULL))
3337 fprintf (dump_file, "transform statement.");
3339 is_store = vect_transform_stmt (stmt, &si);
3342 /* free the attached stmt_vec_info and remove the stmt. */
3343 stmt_ann_t ann = stmt_ann (stmt);
3345 set_stmt_info (ann, NULL);
3354 vect_transform_loop_bound (loop_vinfo, ratio);
3356 if (vect_debug_details (loop))
3357 fprintf (dump_file,"Success! loop vectorized.");
3358 if (vect_debug_stats (loop))
3359 fprintf (dump_file, "LOOP VECTORIZED.");
3363 /* Function vect_is_simple_use.
3366 LOOP - the loop that is being vectorized.
3367 OPERAND - operand of a stmt in LOOP.
3368 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3370 Returns whether a stmt with OPERAND can be vectorized.
3371 Supportable operands are constants, loop invariants, and operands that are
3372 defined by the current iteration of the loop. Unsupportable operands are
3373 those that are defined by a previous iteration of the loop (as is the case
3374 in reduction/induction computations). */
3377 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3385 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3388 if (TREE_CODE (operand) != SSA_NAME)
3391 def_stmt = SSA_NAME_DEF_STMT (operand);
3392 if (def_stmt == NULL_TREE )
3394 if (vect_debug_details (NULL))
3395 fprintf (dump_file, "no def_stmt.");
3399 /* empty stmt is expected only in case of a function argument.
3400 (Otherwise - we expect a phi_node or a modify_expr). */
3401 if (IS_EMPTY_STMT (def_stmt))
3403 tree arg = TREE_OPERAND (def_stmt, 0);
3404 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3406 if (vect_debug_details (NULL))
3408 fprintf (dump_file, "Unexpected empty stmt: ");
3409 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3414 /* phi_node inside the loop indicates an induction/reduction pattern.
3415 This is not supported yet. */
3416 bb = bb_for_stmt (def_stmt);
3417 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3419 if (vect_debug_details (NULL))
3420 fprintf (dump_file, "reduction/induction - unsupported.");
3421 return false; /* FORNOW: not supported yet. */
3424 /* Expecting a modify_expr or a phi_node. */
3425 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3426 || TREE_CODE (def_stmt) == PHI_NODE)
3437 /* Function vect_analyze_operations.
3439 Scan the loop stmts and make sure they are all vectorizable. */
3442 vect_analyze_operations (loop_vec_info loop_vinfo)
3444 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3445 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3446 int nbbs = loop->num_nodes;
3447 block_stmt_iterator si;
3448 int vectorization_factor = 0;
3453 if (vect_debug_details (NULL))
3454 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3456 for (i = 0; i < nbbs; i++)
3458 basic_block bb = bbs[i];
3460 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3462 tree stmt = bsi_stmt (si);
3464 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3467 if (vect_debug_details (NULL))
3469 fprintf (dump_file, "==> examining statement: ");
3470 print_generic_expr (dump_file, stmt, TDF_SLIM);
3473 gcc_assert (stmt_info);
3475 /* skip stmts which do not need to be vectorized.
3476 this is expected to include:
3477 - the COND_EXPR which is the loop exit condition
3478 - any LABEL_EXPRs in the loop
3479 - computations that are used only for array indexing or loop
3482 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3484 if (vect_debug_details (NULL))
3485 fprintf (dump_file, "irrelevant.");
3489 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3491 if (vect_debug_stats (loop) || vect_debug_details (loop))
3493 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3494 print_generic_expr (dump_file, stmt, TDF_SLIM);
3499 if (STMT_VINFO_DATA_REF (stmt_info))
3500 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3501 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3502 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3504 scalar_type = TREE_TYPE (stmt);
3506 if (vect_debug_details (NULL))
3508 fprintf (dump_file, "get vectype for scalar type: ");
3509 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3512 vectype = get_vectype_for_scalar_type (scalar_type);
3515 if (vect_debug_stats (loop) || vect_debug_details (loop))
3517 fprintf (dump_file, "not vectorized: unsupported data-type ");
3518 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3523 if (vect_debug_details (NULL))
3525 fprintf (dump_file, "vectype: ");
3526 print_generic_expr (dump_file, vectype, TDF_SLIM);
3528 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3530 ok = (vectorizable_operation (stmt, NULL, NULL)
3531 || vectorizable_assignment (stmt, NULL, NULL)
3532 || vectorizable_load (stmt, NULL, NULL)
3533 || vectorizable_store (stmt, NULL, NULL));
3537 if (vect_debug_stats (loop) || vect_debug_details (loop))
3539 fprintf (dump_file, "not vectorized: stmt not supported: ");
3540 print_generic_expr (dump_file, stmt, TDF_SLIM);
3545 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3546 if (vect_debug_details (NULL))
3547 fprintf (dump_file, "nunits = %d", nunits);
3549 if (vectorization_factor)
3551 /* FORNOW: don't allow mixed units.
3552 This restriction will be relaxed in the future. */
3553 if (nunits != vectorization_factor)
3555 if (vect_debug_stats (loop) || vect_debug_details (loop))
3556 fprintf (dump_file, "not vectorized: mixed data-types");
3561 vectorization_factor = nunits;
3563 #ifdef ENABLE_CHECKING
3564 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3565 * vectorization_factor == UNITS_PER_SIMD_WORD);
3570 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3572 if (vectorization_factor <= 1)
3574 if (vect_debug_stats (loop) || vect_debug_details (loop))
3575 fprintf (dump_file, "not vectorized: unsupported data-type");
3578 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3581 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3582 && vect_debug_details (NULL))
3584 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3585 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3587 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3588 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3590 /* In this case we have to generate epilog loop, that
3591 can be done only for loops with one entry edge. */
3592 if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1
3593 || !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header))
3595 if (vect_debug_stats (loop) || vect_debug_details (loop))
3596 fprintf (dump_file, "not vectorized: more than one entry.");
3605 /* Function exist_non_indexing_operands_for_use_p
3607 USE is one of the uses attached to STMT. Check if USE is
3608 used in STMT for anything other than indexing an array. */
3611 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3614 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3616 /* USE corresponds to some operand in STMT. If there is no data
3617 reference in STMT, then any operand that corresponds to USE
3618 is not indexing an array. */
3619 if (!STMT_VINFO_DATA_REF (stmt_info))
3622 /* STMT has a data_ref. FORNOW this means that its of one of
3623 the following forms:
3626 (This should have been verified in analyze_data_refs).
3628 'var' in the second case corresponds to a def, not a use,
3629 so USE cannot correspond to any operands that are not used
3632 Therefore, all we need to check is if STMT falls into the
3633 first case, and whether var corresponds to USE. */
3635 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3638 operand = TREE_OPERAND (stmt, 1);
3640 if (TREE_CODE (operand) != SSA_NAME)
3650 /* Function vect_is_simple_iv_evolution.
3652 FORNOW: A simple evolution of an induction variables in the loop is
3653 considered a polynomial evolution with constant step. */
3656 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3657 tree * step, bool strict)
3662 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3664 /* When there is no evolution in this loop, the evolution function
3666 if (evolution_part == NULL_TREE)
3669 /* When the evolution is a polynomial of degree >= 2
3670 the evolution function is not "simple". */
3671 if (tree_is_chrec (evolution_part))
3674 step_expr = evolution_part;
3675 init_expr = unshare_expr (initial_condition (access_fn));
3677 if (vect_debug_details (NULL))
3679 fprintf (dump_file, "step: ");
3680 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3681 fprintf (dump_file, ", init: ");
3682 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3688 if (TREE_CODE (step_expr) != INTEGER_CST)
3690 if (vect_debug_details (NULL))
3691 fprintf (dump_file, "step unknown.");
3696 if (!integer_onep (step_expr))
3698 if (vect_debug_details (NULL))
3699 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3707 /* Function vect_analyze_scalar_cycles.
3709 Examine the cross iteration def-use cycles of scalar variables, by
3710 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3711 cycles that they represent do not impede vectorization.
3713 FORNOW: Reduction as in the following loop, is not supported yet:
3717 The cross-iteration cycle corresponding to variable 'sum' will be
3718 considered too complicated and will impede vectorization.
3720 FORNOW: Induction as in the following loop, is not supported yet:
3725 However, the following loop *is* vectorizable:
3730 In both loops there exists a def-use cycle for the variable i:
3731 loop: i_2 = PHI (i_0, i_1)
3736 The evolution of the above cycle is considered simple enough,
3737 however, we also check that the cycle does not need to be
3738 vectorized, i.e - we check that the variable that this cycle
3739 defines is only used for array indexing or in stmts that do not
3740 need to be vectorized. This is not the case in loop2, but it
3741 *is* the case in loop3. */
3744 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3747 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3748 basic_block bb = loop->header;
3751 if (vect_debug_details (NULL))
3752 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3754 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3756 tree access_fn = NULL;
3758 if (vect_debug_details (NULL))
3760 fprintf (dump_file, "Analyze phi: ");
3761 print_generic_expr (dump_file, phi, TDF_SLIM);
3764 /* Skip virtual phi's. The data dependences that are associated with
3765 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3767 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3769 if (vect_debug_details (NULL))
3770 fprintf (dump_file, "virtual phi. skip.");
3774 /* Analyze the evolution function. */
3776 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3777 those of loop induction variables; This property is verified here.
3779 Furthermore, if that induction variable is used in an operation
3780 that needs to be vectorized (i.e, is not solely used to index
3781 arrays and check the exit condition) - we do not support its
3782 vectorization yet. This property is verified in vect_is_simple_use,
3783 during vect_analyze_operations. */
3785 access_fn = /* instantiate_parameters
3787 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3791 if (vect_debug_stats (loop) || vect_debug_details (loop))
3792 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3796 if (vect_debug_details (NULL))
3798 fprintf (dump_file, "Access function of PHI: ");
3799 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3802 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3805 if (vect_debug_stats (loop) || vect_debug_details (loop))
3806 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3815 /* Function vect_analyze_data_ref_dependence.
3817 Return TRUE if there (might) exist a dependence between a memory-reference
3818 DRA and a memory-reference DRB. */
3821 vect_analyze_data_ref_dependence (struct data_reference *dra,
3822 struct data_reference *drb,
3826 struct data_dependence_relation *ddr;
3828 if (!array_base_name_differ_p (dra, drb, &differ_p))
3830 if (vect_debug_stats (loop) || vect_debug_details (loop))
3833 "not vectorized: can't determine dependence between: ");
3834 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3835 fprintf (dump_file, " and ");
3836 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3844 ddr = initialize_data_dependence_relation (dra, drb);
3845 compute_affine_dependence (ddr);
3847 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3850 if (vect_debug_stats (loop) || vect_debug_details (loop))
3853 "not vectorized: possible dependence between data-refs ");
3854 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3855 fprintf (dump_file, " and ");
3856 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3863 /* Function vect_analyze_data_ref_dependences.
3865 Examine all the data references in the loop, and make sure there do not
3866 exist any data dependences between them.
3868 TODO: dependences which distance is greater than the vectorization factor
3872 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3875 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3876 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3877 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3879 /* Examine store-store (output) dependences. */
3881 if (vect_debug_details (NULL))
3882 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3884 if (vect_debug_details (NULL))
3885 fprintf (dump_file, "compare all store-store pairs.");
3887 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3889 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3891 struct data_reference *dra =
3892 VARRAY_GENERIC_PTR (loop_write_refs, i);
3893 struct data_reference *drb =
3894 VARRAY_GENERIC_PTR (loop_write_refs, j);
3895 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3900 /* Examine load-store (true/anti) dependences. */
3902 if (vect_debug_details (NULL))
3903 fprintf (dump_file, "compare all load-store pairs.");
3905 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3907 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3909 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3910 struct data_reference *drb =
3911 VARRAY_GENERIC_PTR (loop_write_refs, j);
3912 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3921 /* Function vect_get_first_index.
3923 REF is a data reference.
3924 If it is an ARRAY_REF: if its lower bound is simple enough,
3925 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3926 If it is not an ARRAY_REF: REF has no "first index";
3927 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3930 vect_get_first_index (tree ref, tree *array_first_index)
3934 if (TREE_CODE (ref) != ARRAY_REF)
3935 *array_first_index = size_zero_node;
3938 array_start = array_ref_low_bound (ref);
3939 if (!host_integerp (array_start,0))
3941 if (vect_debug_details (NULL))
3943 fprintf (dump_file, "array min val not simple integer cst.");
3944 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3948 *array_first_index = array_start;
3955 /* Function vect_compute_array_base_alignment.
3956 A utility function of vect_compute_array_ref_alignment.
3958 Compute the misalignment of ARRAY in bits.
3961 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3962 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3963 if NULL: don't compute misalignment, just return the base of ARRAY.
3964 PREV_DIMENSIONS - initialized to one.
3965 MISALIGNMENT - the computed misalignment in bits.
3968 If VECTYPE is not NULL:
3969 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3970 the base of the array, and put the computed misalignment in MISALIGNMENT.
3972 Return the base of the array.
3974 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3975 a[idx_N]...[idx_2][idx_1] is
3976 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3977 ... + idx_N * dim_0 * ... * dim_N-1}.
3978 (The misalignment of &a is not checked here).
3979 Note, that every term contains dim_0, therefore, if dim_0 is a
3980 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3981 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3982 NUINTS, we can say that the misalignment of the sum is equal to
3983 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3984 we can't determine this array misalignment, and we return
3986 We proceed recursively in this manner, accumulating total misalignment
3987 and the multiplication of previous dimensions for correct misalignment
3991 vect_compute_array_base_alignment (tree array,
3993 tree *prev_dimensions,
3998 tree dimension_size;
4000 tree bits_per_vectype;
4001 tree bits_per_vectype_unit;
4003 /* The 'stop condition' of the recursion. */
4004 if (TREE_CODE (array) != ARRAY_REF)
4008 /* Just get the base decl. */
4009 return vect_compute_array_base_alignment
4010 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4012 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
4013 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4016 domain = TYPE_DOMAIN (TREE_TYPE (array));
4018 int_const_binop (PLUS_EXPR,
4019 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4020 TYPE_MIN_VALUE (domain), 1),
4023 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4024 is a multiple of NUNITS:
4026 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4028 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4029 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4030 if (integer_zerop (mis))
4031 /* This array is aligned. Continue just in order to get the base decl. */
4032 return vect_compute_array_base_alignment
4033 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4035 index = TREE_OPERAND (array, 1);
4036 if (!host_integerp (index, 1))
4037 /* The current index is not constant. */
4040 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4042 bits_per_vectype = fold_convert (unsigned_type_node,
4043 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4044 GET_MODE_SIZE (TYPE_MODE (vectype))));
4045 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4046 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4047 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4049 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4053 (*misalignment + index_val * dimension_size * *prev_dimensions)
4057 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4058 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4059 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4060 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4061 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4064 *prev_dimensions = int_const_binop (MULT_EXPR,
4065 *prev_dimensions, dimension_size, 1);
4067 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4073 /* Function vect_compute_data_ref_alignment
4075 Compute the misalignment of the data reference DR.
4078 1. If during the misalignment computation it is found that the data reference
4079 cannot be vectorized then false is returned.
4080 2. DR_MISALIGNMENT (DR) is defined.
4082 FOR NOW: No analysis is actually performed. Misalignment is calculated
4083 only for trivial cases. TODO. */
4086 vect_compute_data_ref_alignment (struct data_reference *dr,
4087 loop_vec_info loop_vinfo)
4089 tree stmt = DR_STMT (dr);
4090 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4091 tree ref = DR_REF (dr);
4094 tree offset = size_zero_node;
4095 tree base, bit_offset, alignment;
4096 tree unit_bits = fold_convert (unsigned_type_node,
4097 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4099 bool base_aligned_p;
4101 if (vect_debug_details (NULL))
4102 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4104 /* Initialize misalignment to unknown. */
4105 DR_MISALIGNMENT (dr) = -1;
4107 scalar_type = TREE_TYPE (ref);
4108 vectype = get_vectype_for_scalar_type (scalar_type);
4111 if (vect_debug_details (NULL))
4113 fprintf (dump_file, "no vectype for stmt: ");
4114 print_generic_expr (dump_file, stmt, TDF_SLIM);
4115 fprintf (dump_file, " scalar_type: ");
4116 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4118 /* It is not possible to vectorize this data reference. */
4121 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4122 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4124 if (TREE_CODE (ref) == ARRAY_REF)
4127 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4129 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4130 loop_vinfo, &bit_offset, &base_aligned_p);
4133 if (vect_debug_details (NULL))
4135 fprintf (dump_file, "Unknown alignment for access: ");
4136 print_generic_expr (dump_file,
4137 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4142 if (!base_aligned_p)
4144 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4146 if (vect_debug_details (NULL))
4148 fprintf (dump_file, "can't force alignment of ref: ");
4149 print_generic_expr (dump_file, ref, TDF_SLIM);
4154 /* Force the alignment of the decl.
4155 NOTE: This is the only change to the code we make during
4156 the analysis phase, before deciding to vectorize the loop. */
4157 if (vect_debug_details (NULL))
4158 fprintf (dump_file, "force alignment");
4159 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4160 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4163 /* At this point we assume that the base is aligned, and the offset from it
4164 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4165 gcc_assert (base_aligned_p
4166 || (TREE_CODE (base) == VAR_DECL
4167 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4169 /* Convert into bytes. */
4170 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4171 /* Check that there is no remainder in bits. */
4172 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4173 if (!integer_zerop (bit_offset))
4175 if (vect_debug_details (NULL))
4177 fprintf (dump_file, "bit offset alignment: ");
4178 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4183 /* Alignment required, in bytes: */
4184 alignment = fold_convert (unsigned_type_node,
4185 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4187 /* Modulo alignment. */
4188 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4189 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4191 if (vect_debug_details (NULL))
4192 fprintf (dump_file, "unexpected misalign value");
4196 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4198 if (vect_debug_details (NULL))
4199 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4205 /* Function vect_compute_array_ref_alignment
4207 Compute the alignment of an array-ref.
4208 The alignment we compute here is relative to
4209 TYPE_ALIGN(VECTYPE) boundary.
4212 OFFSET - the alignment in bits
4213 Return value - the base of the array-ref. E.g,
4214 if the array-ref is a.b[k].c[i][j] the returned
4219 vect_compute_array_ref_alignment (struct data_reference *dr,
4220 loop_vec_info loop_vinfo,
4224 tree array_first_index = size_zero_node;
4226 tree ref = DR_REF (dr);
4227 tree scalar_type = TREE_TYPE (ref);
4228 tree oprnd0 = TREE_OPERAND (ref, 0);
4229 tree dims = size_one_node;
4230 tree misalign = size_zero_node;
4231 tree next_ref, this_offset = size_zero_node;
4235 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4236 /* The reference is an array without its last index. */
4237 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4240 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4243 /* Alignment is not requested. Just return the base. */
4246 /* Compute alignment. */
4247 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4249 this_offset = misalign;
4251 /* Check the first index accessed. */
4252 if (!vect_get_first_index (ref, &array_first_index))
4254 if (vect_debug_details (NULL))
4255 fprintf (dump_file, "no first_index for array.");
4259 /* Check the index of the array_ref. */
4260 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4261 LOOP_VINFO_LOOP (loop_vinfo)->num);
4263 /* FORNOW: In order to simplify the handling of alignment, we make sure
4264 that the first location at which the array is accessed ('init') is on an
4265 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4266 This is too conservative, since we require that
4267 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4268 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4269 This should be relaxed in the future. */
4271 if (!init || !host_integerp (init, 0))
4273 if (vect_debug_details (NULL))
4274 fprintf (dump_file, "non constant init. ");
4278 /* bytes per scalar element: */
4279 nunits = fold_convert (unsigned_type_node,
4280 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4281 nbits = int_const_binop (MULT_EXPR, nunits,
4282 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4284 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4285 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4286 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4287 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4289 /* TODO: allow negative misalign values. */
4290 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4292 if (vect_debug_details (NULL))
4293 fprintf (dump_file, "unexpected misalign value");
4301 /* Function vect_compute_data_refs_alignment
4303 Compute the misalignment of data references in the loop.
4304 This pass may take place at function granularity instead of at loop
4307 FOR NOW: No analysis is actually performed. Misalignment is calculated
4308 only for trivial cases. TODO. */
4311 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4313 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4314 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4317 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4319 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4320 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4324 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4326 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4327 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4335 /* Function vect_enhance_data_refs_alignment
4337 This pass will use loop versioning and loop peeling in order to enhance
4338 the alignment of data references in the loop.
4340 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4341 original loop is to be vectorized; Any other loops that are created by
4342 the transformations performed in this pass - are not supposed to be
4343 vectorized. This restriction will be relaxed.
4345 FOR NOW: No transformation is actually performed. TODO. */
4348 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4350 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4351 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4352 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4356 This pass will require a cost model to guide it whether to apply peeling
4357 or versioning or a combination of the two. For example, the scheme that
4358 intel uses when given a loop with several memory accesses, is as follows:
4359 choose one memory access ('p') which alignment you want to force by doing
4360 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4361 other accesses are not necessarily aligned, or (2) use loop versioning to
4362 generate one loop in which all accesses are aligned, and another loop in
4363 which only 'p' is necessarily aligned.
4365 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4366 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4367 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4369 Devising a cost model is the most critical aspect of this work. It will
4370 guide us on which access to peel for, whether to use loop versioning, how
4371 many versions to create, etc. The cost model will probably consist of
4372 generic considerations as well as target specific considerations (on
4373 powerpc for example, misaligned stores are more painful than misaligned
4376 Here is the general steps involved in alignment enhancements:
4378 -- original loop, before alignment analysis:
4379 for (i=0; i<N; i++){
4380 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4381 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4384 -- After vect_compute_data_refs_alignment:
4385 for (i=0; i<N; i++){
4386 x = q[i]; # DR_MISALIGNMENT(q) = 3
4387 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4390 -- Possibility 1: we do loop versioning:
4392 for (i=0; i<N; i++){ # loop 1A
4393 x = q[i]; # DR_MISALIGNMENT(q) = 3
4394 p[i] = y; # DR_MISALIGNMENT(p) = 0
4398 for (i=0; i<N; i++){ # loop 1B
4399 x = q[i]; # DR_MISALIGNMENT(q) = 3
4400 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4404 -- Possibility 2: we do loop peeling:
4405 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4409 for (i = 3; i < N; i++){ # loop 2A
4410 x = q[i]; # DR_MISALIGNMENT(q) = 0
4411 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4414 -- Possibility 3: combination of loop peeling and versioning:
4415 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4420 for (i = 3; i<N; i++){ # loop 3A
4421 x = q[i]; # DR_MISALIGNMENT(q) = 0
4422 p[i] = y; # DR_MISALIGNMENT(p) = 0
4426 for (i = 3; i<N; i++){ # loop 3B
4427 x = q[i]; # DR_MISALIGNMENT(q) = 0
4428 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4432 These loops are later passed to loop_transform to be vectorized. The
4433 vectorizer will use the alignment information to guide the transformation
4434 (whether to generate regular loads/stores, or with special handling for
4438 /* (1) Peeling to force alignment. */
4440 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4442 + How many accesses will become aligned due to the peeling
4443 - How many accesses will become unaligned due to the peeling,
4444 and the cost of misaligned accesses.
4445 - The cost of peeling (the extra runtime checks, the increase
4448 The scheme we use FORNOW: peel to force the alignment of the first
4449 misaligned store in the loop.
4450 Rationale: misaligned stores are not yet supported.
4452 TODO: Use a better cost model. */
4454 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4456 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4457 if (!aligned_access_p (dr))
4459 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4460 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4465 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4467 if (vect_debug_details (loop))
4468 fprintf (dump_file, "Peeling for alignment will not be applied.");
4472 if (vect_debug_details (loop))
4473 fprintf (dump_file, "Peeling for alignment will be applied.");
4476 /* (1.2) Update the alignment info according to the peeling factor.
4477 If the misalignment of the DR we peel for is M, then the
4478 peeling factor is VF - M, and the misalignment of each access DR_i
4479 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4480 If the misalignment of the DR we peel for is unknown, then the
4481 misalignment of each access DR_i in the loop is also unknown.
4483 FORNOW: set the misalignment of the accesses to unknown even
4484 if the peeling factor is known at compile time.
4486 TODO: - if the peeling factor is known at compile time, use that
4487 when updating the misalignment info of the loop DRs.
4488 - consider accesses that are known to have the same
4489 alignment, even if that alignment is unknown. */
4491 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4493 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4494 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4495 DR_MISALIGNMENT (dr) = 0;
4497 DR_MISALIGNMENT (dr) = -1;
4499 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4501 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4502 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4503 DR_MISALIGNMENT (dr) = 0;
4505 DR_MISALIGNMENT (dr) = -1;
4510 /* Function vect_analyze_data_refs_alignment
4512 Analyze the alignment of the data-references in the loop.
4513 FOR NOW: Until support for misliagned accesses is in place, only if all
4514 accesses are aligned can the loop be vectorized. This restriction will be
4518 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4520 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4521 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4522 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4523 enum dr_alignment_support supportable_dr_alignment;
4526 if (vect_debug_details (NULL))
4527 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4530 /* This pass may take place at function granularity instead of at loop
4533 if (!vect_compute_data_refs_alignment (loop_vinfo))
4535 if (vect_debug_details (loop) || vect_debug_stats (loop))
4537 "not vectorized: can't calculate alignment for data ref.");
4542 /* This pass will decide on using loop versioning and/or loop peeling in
4543 order to enhance the alignment of data references in the loop. */
4545 vect_enhance_data_refs_alignment (loop_vinfo);
4548 /* Finally, check that all the data references in the loop can be
4549 handled with respect to their alignment. */
4551 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4553 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4554 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4555 if (!supportable_dr_alignment)
4557 if (vect_debug_details (loop) || vect_debug_stats (loop))
4558 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4562 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4564 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4565 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4566 if (!supportable_dr_alignment)
4568 if (vect_debug_details (loop) || vect_debug_stats (loop))
4569 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4578 /* Function vect_analyze_data_ref_access.
4580 Analyze the access pattern of the data-reference DR. For now, a data access
4581 has to consecutive and aligned to be considered vectorizable. */
4584 vect_analyze_data_ref_access (struct data_reference *dr)
4586 varray_type access_fns = DR_ACCESS_FNS (dr);
4589 unsigned int dimensions, i;
4591 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4592 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4593 access is contiguous). */
4594 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4596 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4598 access_fn = DR_ACCESS_FN (dr, i);
4600 if (evolution_part_in_loop_num (access_fn,
4601 loop_containing_stmt (DR_STMT (dr))->num))
4603 /* Evolution part is not NULL in this loop (it is neither constant
4605 if (vect_debug_details (NULL))
4608 "not vectorized: complicated multidim. array access.");
4609 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4615 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4616 if (!evolution_function_is_constant_p (access_fn)
4617 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4618 access_fn, &init, &step, true))
4620 if (vect_debug_details (NULL))
4622 fprintf (dump_file, "not vectorized: complicated access function.");
4623 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4632 /* Function vect_analyze_data_ref_accesses.
4634 Analyze the access pattern of all the data references in the loop.
4636 FORNOW: the only access pattern that is considered vectorizable is a
4637 simple step 1 (consecutive) access.
4639 FORNOW: handle only arrays and pointer accesses. */
4642 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4645 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4646 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4648 if (vect_debug_details (NULL))
4649 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4651 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4653 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4654 bool ok = vect_analyze_data_ref_access (dr);
4657 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4658 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4659 fprintf (dump_file, "not vectorized: complicated access pattern.");
4664 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4666 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4667 bool ok = vect_analyze_data_ref_access (dr);
4670 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4671 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4672 fprintf (dump_file, "not vectorized: complicated access pattern.");
4681 /* Function vect_analyze_pointer_ref_access.
4684 STMT - a stmt that contains a data-ref
4685 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4687 If the data-ref access is vectorizable, return a data_reference structure
4688 that represents it (DR). Otherwise - return NULL. */
4690 static struct data_reference *
4691 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4693 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4694 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4695 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4698 tree reftype, innertype;
4699 enum machine_mode innermode;
4700 tree indx_access_fn;
4701 int loopnum = loop->num;
4702 struct data_reference *dr;
4706 if (vect_debug_stats (loop) || vect_debug_details (loop))
4707 fprintf (dump_file, "not vectorized: complicated pointer access.");
4711 if (vect_debug_details (NULL))
4713 fprintf (dump_file, "Access function of ptr: ");
4714 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4717 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4719 if (vect_debug_stats (loop) || vect_debug_details (loop))
4720 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4726 if (!host_integerp (step,0))
4728 if (vect_debug_stats (loop) || vect_debug_details (loop))
4730 "not vectorized: non constant step for pointer access.");
4734 step_val = TREE_INT_CST_LOW (step);
4736 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4737 if (TREE_CODE (reftype) != POINTER_TYPE)
4739 if (vect_debug_stats (loop) || vect_debug_details (loop))
4740 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4744 reftype = TREE_TYPE (init);
4745 if (TREE_CODE (reftype) != POINTER_TYPE)
4747 if (vect_debug_stats (loop) || vect_debug_details (loop))
4748 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4752 innertype = TREE_TYPE (reftype);
4753 innermode = TYPE_MODE (innertype);
4754 if (GET_MODE_SIZE (innermode) != step_val)
4756 /* FORNOW: support only consecutive access */
4757 if (vect_debug_stats (loop) || vect_debug_details (loop))
4758 fprintf (dump_file, "not vectorized: non consecutive access.");
4763 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4764 if (vect_debug_details (NULL))
4766 fprintf (dump_file, "Access function of ptr indx: ");
4767 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4769 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4774 /* Function vect_get_symbl_and_dr.
4776 The function returns SYMBL - the relevant variable for
4777 memory tag (for aliasing purposes).
4778 Also data reference structure DR is created.
4781 MEMREF - data reference in STMT
4782 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4785 DR - data_reference struct for MEMREF
4786 return value - the relevant variable for memory tag (for aliasing purposes).
4791 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4792 loop_vec_info loop_vinfo, struct data_reference **dr)
4794 tree symbl, oprnd0, oprnd1;
4795 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4797 tree array_base, base;
4798 struct data_reference *new_dr;
4799 bool base_aligned_p;
4802 switch (TREE_CODE (memref))
4805 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4809 symbl = DR_BASE_NAME (new_dr);
4810 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4812 switch (TREE_CODE (symbl))
4816 oprnd0 = TREE_OPERAND (symbl, 0);
4817 oprnd1 = TREE_OPERAND (symbl, 1);
4820 /* Only {address_base + offset} expressions are supported,
4821 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4822 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4823 TODO: swap operands if {offset + address_base}. */
4824 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4825 && TREE_CODE (oprnd1) != INTEGER_CST)
4826 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4829 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4832 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4833 loop_vinfo, &new_dr);
4837 /* symbl remains unchanged. */
4841 if (vect_debug_details (NULL))
4843 fprintf (dump_file, "unhandled data ref: ");
4844 print_generic_expr (dump_file, memref, TDF_SLIM);
4845 fprintf (dump_file, " (symbl ");
4846 print_generic_expr (dump_file, symbl, TDF_SLIM);
4847 fprintf (dump_file, ") in stmt ");
4848 print_generic_expr (dump_file, stmt, TDF_SLIM);
4855 offset = size_zero_node;
4857 /* Store the array base in the stmt info.
4858 For one dimensional array ref a[i], the base is a,
4859 for multidimensional a[i1][i2]..[iN], the base is
4860 a[i1][i2]..[iN-1]. */
4861 array_base = TREE_OPERAND (memref, 0);
4862 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4864 new_dr = analyze_array (stmt, memref, is_read);
4867 /* Find the relevant symbol for aliasing purposes. */
4868 base = DR_BASE_NAME (new_dr);
4869 switch (TREE_CODE (base))
4876 symbl = TREE_OPERAND (base, 0);
4880 /* Could have recorded more accurate information -
4881 i.e, the actual FIELD_DECL that is being referenced -
4882 but later passes expect VAR_DECL as the nmt. */
4883 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4884 loop_vinfo, &offset, &base_aligned_p);
4889 if (vect_debug_details (NULL))
4891 fprintf (dump_file, "unhandled struct/class field access ");
4892 print_generic_expr (dump_file, stmt, TDF_SLIM);
4899 if (vect_debug_details (NULL))
4901 fprintf (dump_file, "unhandled data ref: ");
4902 print_generic_expr (dump_file, memref, TDF_SLIM);
4903 fprintf (dump_file, " in stmt ");
4904 print_generic_expr (dump_file, stmt, TDF_SLIM);
4912 /* Function vect_analyze_data_refs.
4914 Find all the data references in the loop.
4916 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4917 which base is really an array (not a pointer) and which alignment
4918 can be forced. This restriction will be relaxed. */
4921 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4923 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4924 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4925 int nbbs = loop->num_nodes;
4926 block_stmt_iterator si;
4928 struct data_reference *dr;
4931 bool base_aligned_p;
4934 if (vect_debug_details (NULL))
4935 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4937 for (j = 0; j < nbbs; j++)
4939 basic_block bb = bbs[j];
4940 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4942 bool is_read = false;
4943 tree stmt = bsi_stmt (si);
4944 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4945 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4946 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4947 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4948 varray_type *datarefs = NULL;
4949 int nvuses, nv_may_defs, nv_must_defs;
4953 /* Assumption: there exists a data-ref in stmt, if and only if
4954 it has vuses/vdefs. */
4956 if (!vuses && !v_may_defs && !v_must_defs)
4959 nvuses = NUM_VUSES (vuses);
4960 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4961 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4963 if (nvuses && (nv_may_defs || nv_must_defs))
4965 if (vect_debug_details (NULL))
4967 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4968 print_generic_expr (dump_file, stmt, TDF_SLIM);
4973 if (TREE_CODE (stmt) != MODIFY_EXPR)
4975 if (vect_debug_details (NULL))
4977 fprintf (dump_file, "unexpected vops in stmt: ");
4978 print_generic_expr (dump_file, stmt, TDF_SLIM);
4985 memref = TREE_OPERAND (stmt, 1);
4986 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4991 memref = TREE_OPERAND (stmt, 0);
4992 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4996 /* Analyze MEMREF. If it is of a supported form, build data_reference
4997 struct for it (DR) and find the relevant symbol for aliasing
4999 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
5003 if (vect_debug_stats (loop) || vect_debug_details (loop))
5005 fprintf (dump_file, "not vectorized: unhandled data ref: ");
5006 print_generic_expr (dump_file, stmt, TDF_SLIM);
5011 /* Find and record the memtag assigned to this data-ref. */
5012 switch (TREE_CODE (symbl))
5015 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5019 symbl = SSA_NAME_VAR (symbl);
5020 tag = get_var_ann (symbl)->type_mem_tag;
5023 tree ptr = TREE_OPERAND (memref, 0);
5024 if (TREE_CODE (ptr) == SSA_NAME)
5025 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5029 if (vect_debug_stats (loop) || vect_debug_details (loop))
5030 fprintf (dump_file, "not vectorized: no memtag for ref.");
5033 STMT_VINFO_MEMTAG (stmt_info) = tag;
5037 address_base = TREE_OPERAND (symbl, 0);
5039 switch (TREE_CODE (address_base))
5042 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5044 STMT_VINFO_MEMTAG (stmt_info) =
5045 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
5046 loop_vinfo, &offset,
5051 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5055 if (vect_debug_stats (loop) || vect_debug_details (loop))
5058 "not vectorized: unhandled address expr: ");
5059 print_generic_expr (dump_file, stmt, TDF_SLIM);
5066 if (vect_debug_stats (loop) || vect_debug_details (loop))
5068 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5069 print_generic_expr (dump_file, memref, TDF_SLIM);
5074 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5075 STMT_VINFO_DATA_REF (stmt_info) = dr;
5083 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5085 /* Function vect_mark_relevant.
5087 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5090 vect_mark_relevant (varray_type worklist, tree stmt)
5092 stmt_vec_info stmt_info;
5094 if (vect_debug_details (NULL))
5095 fprintf (dump_file, "mark relevant.");
5097 if (TREE_CODE (stmt) == PHI_NODE)
5099 VARRAY_PUSH_TREE (worklist, stmt);
5103 stmt_info = vinfo_for_stmt (stmt);
5107 if (vect_debug_details (NULL))
5109 fprintf (dump_file, "mark relevant: no stmt info!!.");
5110 print_generic_expr (dump_file, stmt, TDF_SLIM);
5115 if (STMT_VINFO_RELEVANT_P (stmt_info))
5117 if (vect_debug_details (NULL))
5118 fprintf (dump_file, "already marked relevant.");
5122 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5123 VARRAY_PUSH_TREE (worklist, stmt);
5127 /* Function vect_stmt_relevant_p.
5129 Return true if STMT in loop that is represented by LOOP_VINFO is
5130 "relevant for vectorization".
5132 A stmt is considered "relevant for vectorization" if:
5133 - it has uses outside the loop.
5134 - it has vdefs (it alters memory).
5135 - control stmts in the loop (except for the exit condition).
5137 CHECKME: what other side effects would the vectorizer allow? */
5140 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5142 v_may_def_optype v_may_defs;
5143 v_must_def_optype v_must_defs;
5144 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5149 /* cond stmt other than loop exit cond. */
5150 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5153 /* changing memory. */
5154 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5155 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5156 if (v_may_defs || v_must_defs)
5158 if (vect_debug_details (NULL))
5159 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5163 /* uses outside the loop. */
5164 df = get_immediate_uses (stmt);
5165 num_uses = num_immediate_uses (df);
5166 for (i = 0; i < num_uses; i++)
5168 tree use = immediate_use (df, i);
5169 basic_block bb = bb_for_stmt (use);
5170 if (!flow_bb_inside_loop_p (loop, bb))
5172 if (vect_debug_details (NULL))
5173 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5182 /* Function vect_mark_stmts_to_be_vectorized.
5184 Not all stmts in the loop need to be vectorized. For example:
5193 Stmt 1 and 3 do not need to be vectorized, because loop control and
5194 addressing of vectorized data-refs are handled differently.
5196 This pass detects such stmts. */
5199 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5201 varray_type worklist;
5202 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5203 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5204 unsigned int nbbs = loop->num_nodes;
5205 block_stmt_iterator si;
5211 stmt_vec_info stmt_info;
5213 if (vect_debug_details (NULL))
5214 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5216 VARRAY_TREE_INIT (worklist, 64, "work list");
5218 /* 1. Init worklist. */
5220 for (i = 0; i < nbbs; i++)
5222 basic_block bb = bbs[i];
5223 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5225 stmt = bsi_stmt (si);
5227 if (vect_debug_details (NULL))
5229 fprintf (dump_file, "init: stmt relevant? ");
5230 print_generic_expr (dump_file, stmt, TDF_SLIM);
5233 stmt_info = vinfo_for_stmt (stmt);
5234 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5236 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5237 vect_mark_relevant (worklist, stmt);
5242 /* 2. Process_worklist */
5244 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5246 stmt = VARRAY_TOP_TREE (worklist);
5247 VARRAY_POP (worklist);
5249 if (vect_debug_details (NULL))
5251 fprintf (dump_file, "worklist: examine stmt: ");
5252 print_generic_expr (dump_file, stmt, TDF_SLIM);
5255 /* Examine the USES in this statement. Mark all the statements which
5256 feed this statement's uses as "relevant", unless the USE is used as
5259 if (TREE_CODE (stmt) == PHI_NODE)
5261 /* follow the def-use chain inside the loop. */
5262 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5264 tree arg = PHI_ARG_DEF (stmt, j);
5265 tree def_stmt = NULL_TREE;
5267 if (!vect_is_simple_use (arg, loop, &def_stmt))
5269 if (vect_debug_details (NULL))
5270 fprintf (dump_file, "worklist: unsupported use.");
5271 varray_clear (worklist);
5277 if (vect_debug_details (NULL))
5279 fprintf (dump_file, "worklist: def_stmt: ");
5280 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5283 bb = bb_for_stmt (def_stmt);
5284 if (flow_bb_inside_loop_p (loop, bb))
5285 vect_mark_relevant (worklist, def_stmt);
5289 ann = stmt_ann (stmt);
5290 use_ops = USE_OPS (ann);
5292 for (i = 0; i < NUM_USES (use_ops); i++)
5294 tree use = USE_OP (use_ops, i);
5296 /* We are only interested in uses that need to be vectorized. Uses
5297 that are used for address computation are not considered relevant.
5299 if (exist_non_indexing_operands_for_use_p (use, stmt))
5301 tree def_stmt = NULL_TREE;
5303 if (!vect_is_simple_use (use, loop, &def_stmt))
5305 if (vect_debug_details (NULL))
5306 fprintf (dump_file, "worklist: unsupported use.");
5307 varray_clear (worklist);
5314 if (vect_debug_details (NULL))
5316 fprintf (dump_file, "worklist: examine use %d: ", i);
5317 print_generic_expr (dump_file, use, TDF_SLIM);
5320 bb = bb_for_stmt (def_stmt);
5321 if (flow_bb_inside_loop_p (loop, bb))
5322 vect_mark_relevant (worklist, def_stmt);
5325 } /* while worklist */
5327 varray_clear (worklist);
5332 /* Function vect_analyze_loop_with_symbolic_num_of_iters.
5334 In case the number of iterations that LOOP iterates in unknown at compile
5335 time, an epilog loop will be generated, and the loop induction variables
5336 (IVs) will be "advanced" to the value they are supposed to take just before
5337 the epilog loop. Here we check that the access function of the loop IVs
5338 and the expression that represents the loop bound are simple enough.
5339 These restrictions will be relaxed in the future. */
5342 vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
5345 basic_block bb = loop->header;
5348 if (vect_debug_details (NULL))
5350 "\n<<vect_analyze_loop_with_symbolic_num_of_iters>>\n");
5352 if (chrec_contains_undetermined (niters))
5354 if (vect_debug_details (NULL))
5355 fprintf (dump_file, "Infinite number of iterations.");
5361 if (vect_debug_details (NULL))
5362 fprintf (dump_file, "niters is NULL pointer.");
5366 if (vect_debug_details (NULL))
5368 fprintf (dump_file, "Symbolic number of iterations is ");
5369 print_generic_expr (dump_file, niters, TDF_DETAILS);
5372 /* Analyze phi functions of the loop header. */
5374 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5376 tree access_fn = NULL;
5377 tree evolution_part;
5379 if (vect_debug_details (NULL))
5381 fprintf (dump_file, "Analyze phi: ");
5382 print_generic_expr (dump_file, phi, TDF_SLIM);
5385 /* Skip virtual phi's. The data dependences that are associated with
5386 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5388 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5390 if (vect_debug_details (NULL))
5391 fprintf (dump_file, "virtual phi. skip.");
5395 /* Analyze the evolution function. */
5397 access_fn = instantiate_parameters
5398 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5402 if (vect_debug_details (NULL))
5403 fprintf (dump_file, "No Access function.");
5407 if (vect_debug_details (NULL))
5409 fprintf (dump_file, "Access function of PHI: ");
5410 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5413 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5415 if (evolution_part == NULL_TREE)
5418 /* FORNOW: We do not transform initial conditions of IVs
5419 which evolution functions are a polynomial of degree >= 2. */
5421 if (tree_is_chrec (evolution_part))
5429 /* Function vect_get_loop_niters.
5431 Determine how many iterations the loop is executed. */
5434 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5438 if (vect_debug_details (NULL))
5439 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5441 niters = number_of_iterations_in_loop (loop);
5443 if (niters != NULL_TREE
5444 && niters != chrec_dont_know)
5446 *number_of_iterations = niters;
5448 if (vect_debug_details (NULL))
5450 fprintf (dump_file, "==> get_loop_niters:" );
5451 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5455 return get_loop_exit_condition (loop);
5459 /* Function vect_analyze_loop_form.
5461 Verify the following restrictions (some may be relaxed in the future):
5462 - it's an inner-most loop
5463 - number of BBs = 2 (which are the loop header and the latch)
5464 - the loop has a pre-header
5465 - the loop has a single entry and exit
5466 - the loop exit condition is simple enough, and the number of iterations
5467 can be analyzed (a countable loop). */
5469 static loop_vec_info
5470 vect_analyze_loop_form (struct loop *loop)
5472 loop_vec_info loop_vinfo;
5474 tree number_of_iterations = NULL;
5476 if (vect_debug_details (loop))
5477 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5480 || !loop->single_exit
5481 || loop->num_nodes != 2)
5483 if (vect_debug_stats (loop) || vect_debug_details (loop))
5485 fprintf (dump_file, "not vectorized: bad loop form. ");
5487 fprintf (dump_file, "nested loop.");
5488 else if (!loop->single_exit)
5489 fprintf (dump_file, "multiple exits.");
5490 else if (loop->num_nodes != 2)
5491 fprintf (dump_file, "too many BBs in loop.");
5497 /* We assume that the loop exit condition is at the end of the loop. i.e,
5498 that the loop is represented as a do-while (with a proper if-guard
5499 before the loop if needed), where the loop header contains all the
5500 executable statements, and the latch is empty. */
5501 if (!empty_block_p (loop->latch))
5503 if (vect_debug_stats (loop) || vect_debug_details (loop))
5504 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5508 if (empty_block_p (loop->header))
5510 if (vect_debug_stats (loop) || vect_debug_details (loop))
5511 fprintf (dump_file, "not vectorized: empty loop.");
5515 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5518 if (vect_debug_stats (loop) || vect_debug_details (loop))
5519 fprintf (dump_file, "not vectorized: complicated exit condition.");
5523 if (!number_of_iterations)
5525 if (vect_debug_stats (loop) || vect_debug_details (loop))
5527 "not vectorized: number of iterations cannot be computed.");
5531 loop_vinfo = new_loop_vec_info (loop);
5532 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5533 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5535 if (vect_debug_stats (loop) || vect_debug_details (loop))
5536 fprintf (dump_file, "loop bound unknown.");
5538 /* Unknown loop bound. */
5539 if (!vect_analyze_loop_with_symbolic_num_of_iters
5540 (number_of_iterations, loop))
5542 if (vect_debug_stats (loop) || vect_debug_details (loop))
5544 "not vectorized: can't determine loop bound.");
5549 /* We need only one loop entry for unknown loop bound support. */
5550 if (loop->num_entries != 1 || !loop->pre_header)
5552 if (vect_debug_stats (loop) || vect_debug_details (loop))
5554 "not vectorized: more than one loop entry.");
5560 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5562 if (vect_debug_stats (loop) || vect_debug_details (loop))
5563 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5567 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5573 /* Function vect_analyze_loop.
5575 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5576 for it. The different analyses will record information in the
5577 loop_vec_info struct. */
5579 static loop_vec_info
5580 vect_analyze_loop (struct loop *loop)
5583 loop_vec_info loop_vinfo;
5585 if (vect_debug_details (NULL))
5586 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5588 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5590 loop_vinfo = vect_analyze_loop_form (loop);
5593 if (vect_debug_details (loop))
5594 fprintf (dump_file, "bad loop form.");
5598 /* Find all data references in the loop (which correspond to vdefs/vuses)
5599 and analyze their evolution in the loop.
5601 FORNOW: Handle only simple, array references, which
5602 alignment can be forced, and aligned pointer-references. */
5604 ok = vect_analyze_data_refs (loop_vinfo);
5607 if (vect_debug_details (loop))
5608 fprintf (dump_file, "bad data references.");
5609 destroy_loop_vec_info (loop_vinfo);
5613 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5615 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5618 if (vect_debug_details (loop))
5619 fprintf (dump_file, "unexpected pattern.");
5620 if (vect_debug_details (loop))
5621 fprintf (dump_file, "not vectorized: unexpected pattern.");
5622 destroy_loop_vec_info (loop_vinfo);
5626 /* Check that all cross-iteration scalar data-flow cycles are OK.
5627 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5629 ok = vect_analyze_scalar_cycles (loop_vinfo);
5632 if (vect_debug_details (loop))
5633 fprintf (dump_file, "bad scalar cycle.");
5634 destroy_loop_vec_info (loop_vinfo);
5638 /* Analyze data dependences between the data-refs in the loop.
5639 FORNOW: fail at the first data dependence that we encounter. */
5641 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5644 if (vect_debug_details (loop))
5645 fprintf (dump_file, "bad data dependence.");
5646 destroy_loop_vec_info (loop_vinfo);
5650 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5651 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5653 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5656 if (vect_debug_details (loop))
5657 fprintf (dump_file, "bad data access.");
5658 destroy_loop_vec_info (loop_vinfo);
5662 /* Analyze the alignment of the data-refs in the loop.
5663 FORNOW: Only aligned accesses are handled. */
5665 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5668 if (vect_debug_details (loop))
5669 fprintf (dump_file, "bad data alignment.");
5670 destroy_loop_vec_info (loop_vinfo);
5674 /* Scan all the operations in the loop and make sure they are
5677 ok = vect_analyze_operations (loop_vinfo);
5680 if (vect_debug_details (loop))
5681 fprintf (dump_file, "bad operation or unsupported loop bound.");
5682 destroy_loop_vec_info (loop_vinfo);
5686 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5692 /* Function need_imm_uses_for.
5694 Return whether we ought to include information for 'var'
5695 when calculating immediate uses. For this pass we only want use
5696 information for non-virtual variables. */
5699 need_imm_uses_for (tree var)
5701 return is_gimple_reg (var);
5705 /* Function vectorize_loops.
5707 Entry Point to loop vectorization phase. */
5710 vectorize_loops (struct loops *loops)
5712 unsigned int i, loops_num;
5713 unsigned int num_vectorized_loops = 0;
5715 /* Does the target support SIMD? */
5716 /* FORNOW: until more sophisticated machine modelling is in place. */
5717 if (!UNITS_PER_SIMD_WORD)
5719 if (vect_debug_details (NULL))
5720 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5724 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5726 /* ----------- Analyze loops. ----------- */
5728 /* If some loop was duplicated, it gets bigger number
5729 than all previously defined loops. This fact allows us to run
5730 only over initial loops skipping newly generated ones. */
5731 loops_num = loops->num;
5732 for (i = 1; i < loops_num; i++)
5734 loop_vec_info loop_vinfo;
5735 struct loop *loop = loops->parray[i];
5740 loop_vinfo = vect_analyze_loop (loop);
5741 loop->aux = loop_vinfo;
5743 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5746 vect_transform_loop (loop_vinfo, loops);
5747 num_vectorized_loops++;
5750 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5751 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5752 num_vectorized_loops);
5754 /* ----------- Finalize. ----------- */
5757 for (i = 1; i < loops_num; i++)
5759 struct loop *loop = loops->parray[i];
5760 loop_vec_info loop_vinfo;
5764 loop_vinfo = loop->aux;
5765 destroy_loop_vec_info (loop_vinfo);
5769 rewrite_into_ssa (false);
5770 if (!bitmap_empty_p (vars_to_rename))
5772 /* The rewrite of ssa names may cause violation of loop closed ssa
5773 form invariants. TODO -- avoid these rewrites completely.
5774 Information in virtual phi nodes is sufficient for it. */
5775 rewrite_into_loop_closed_ssa ();
5777 rewrite_into_loop_closed_ssa ();
5778 bitmap_clear (vars_to_rename);