2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /* Main analysis functions. */
150 static loop_vec_info vect_analyze_loop (struct loop *);
151 static loop_vec_info vect_analyze_loop_form (struct loop *);
152 static bool vect_analyze_data_refs (loop_vec_info);
153 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
154 static bool vect_analyze_scalar_cycles (loop_vec_info);
155 static bool vect_analyze_data_ref_accesses (loop_vec_info);
156 static bool vect_analyze_data_refs_alignment (loop_vec_info);
157 static bool vect_compute_data_refs_alignment (loop_vec_info);
158 static bool vect_analyze_operations (loop_vec_info);
160 /* Main code transformation functions. */
161 static void vect_transform_loop (loop_vec_info, struct loops *);
162 static void vect_transform_loop_bound (loop_vec_info, tree niters);
163 static bool vect_transform_stmt (tree, block_stmt_iterator *);
164 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
167 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
168 static enum dr_alignment_support vect_supportable_dr_alignment
169 (struct data_reference *);
170 static void vect_align_data_ref (tree);
171 static void vect_enhance_data_refs_alignment (loop_vec_info);
173 /* Utility functions for the analyses. */
174 static bool vect_is_simple_use (tree , struct loop *, tree *);
175 static bool exist_non_indexing_operands_for_use_p (tree, tree);
176 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
177 static void vect_mark_relevant (varray_type, tree);
178 static bool vect_stmt_relevant_p (tree, loop_vec_info);
179 static tree vect_get_loop_niters (struct loop *, tree *);
180 static bool vect_compute_data_ref_alignment
181 (struct data_reference *, loop_vec_info);
182 static bool vect_analyze_data_ref_access (struct data_reference *);
183 static bool vect_get_first_index (tree, tree *);
184 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
185 static struct data_reference * vect_analyze_pointer_ref_access
187 static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
189 static tree vect_get_base_and_bit_offset
190 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
191 static struct data_reference * vect_analyze_pointer_ref_access
193 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
194 static tree vect_compute_array_ref_alignment
195 (struct data_reference *, loop_vec_info, tree, tree *);
196 static tree vect_get_ptr_offset (tree, tree, tree *);
197 static tree vect_get_symbl_and_dr
198 (tree, tree, bool, loop_vec_info, struct data_reference **);
200 /* Utility functions for the code transformation. */
201 static tree vect_create_destination_var (tree, tree);
202 static tree vect_create_data_ref_ptr
203 (tree, block_stmt_iterator *, tree, tree *, bool);
204 static tree vect_create_index_for_vector_ref
205 (struct loop *, block_stmt_iterator *);
206 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
207 static tree get_vectype_for_scalar_type (tree);
208 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
209 static tree vect_get_vec_def_for_operand (tree, tree);
210 static tree vect_init_vector (tree, tree);
211 static tree vect_build_symbol_bound (tree, int, struct loop *);
212 static void vect_finish_stmt_generation
213 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
215 static void vect_generate_tmps_on_preheader (loop_vec_info,
218 static tree vect_build_loop_niters (loop_vec_info);
219 static void vect_update_ivs_after_vectorizer (struct loop *, tree);
221 /* Loop transformations prior to vectorization. */
223 /* Loop transformations entry point function.
224 It can be used outside of the vectorizer
225 in case the loop to be manipulated answers conditions specified
226 in function documentation. */
227 struct loop *tree_duplicate_loop_to_edge (struct loop *,
228 struct loops *, edge,
231 static void allocate_new_names (bitmap);
232 static void rename_use_op (use_operand_p);
233 static void rename_def_op (def_operand_p, tree);
234 static void rename_variables_in_bb (basic_block);
235 static void free_new_names (bitmap);
236 static void rename_variables_in_loop (struct loop *);
237 static void copy_phi_nodes (struct loop *, struct loop *, bool);
238 static void update_phis_for_duplicate_loop (struct loop *,
241 static void update_phi_nodes_for_guard (edge, struct loop *);
242 static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree);
243 static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *,
246 static edge add_loop_guard (basic_block, tree, basic_block);
247 static bool verify_loop_for_duplication (struct loop *, bool, edge);
249 /* Utilities dealing with loop peeling (not peeling itself). */
250 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
251 static void vect_update_niters_after_peeling (loop_vec_info, tree);
252 static void vect_update_inits_of_dr (struct data_reference *, struct loop *,
254 static void vect_update_inits_of_drs (loop_vec_info, tree);
255 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
257 /* Utilities for creation and deletion of vec_info structs. */
258 loop_vec_info new_loop_vec_info (struct loop *loop);
259 void destroy_loop_vec_info (loop_vec_info);
260 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
262 static bool vect_debug_stats (struct loop *loop);
263 static bool vect_debug_details (struct loop *loop);
266 /* Utilities to support loop peeling for vectorization purposes. */
269 /* For each definition in DEFINITIONS this function allocates
273 allocate_new_names (bitmap definitions)
278 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
280 tree def = ssa_name (ver);
281 tree *new_name_ptr = xmalloc (sizeof (tree));
283 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
285 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
286 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
288 SSA_NAME_AUX (def) = new_name_ptr;
293 /* Renames the use *OP_P. */
296 rename_use_op (use_operand_p op_p)
300 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
303 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
305 /* Something defined outside of the loop. */
309 /* An ordinary ssa name defined in the loop. */
311 SET_USE (op_p, *new_name_ptr);
315 /* Renames the def *OP_P in statement STMT. */
318 rename_def_op (def_operand_p op_p, tree stmt)
322 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
325 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
327 /* Something defined outside of the loop. */
331 /* An ordinary ssa name defined in the loop. */
333 SET_DEF (op_p, *new_name_ptr);
334 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
338 /* Renames the variables in basic block BB. */
341 rename_variables_in_bb (basic_block bb)
344 block_stmt_iterator bsi;
350 v_may_def_optype v_may_defs;
351 v_must_def_optype v_must_defs;
356 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
357 rename_def_op (PHI_RESULT_PTR (phi), phi);
359 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
361 stmt = bsi_stmt (bsi);
362 get_stmt_operands (stmt);
363 ann = stmt_ann (stmt);
365 uses = USE_OPS (ann);
366 for (i = 0; i < NUM_USES (uses); i++)
367 rename_use_op (USE_OP_PTR (uses, i));
369 defs = DEF_OPS (ann);
370 for (i = 0; i < NUM_DEFS (defs); i++)
371 rename_def_op (DEF_OP_PTR (defs, i), stmt);
373 vuses = VUSE_OPS (ann);
374 for (i = 0; i < NUM_VUSES (vuses); i++)
375 rename_use_op (VUSE_OP_PTR (vuses, i));
377 v_may_defs = V_MAY_DEF_OPS (ann);
378 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
380 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
381 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
384 v_must_defs = V_MUST_DEF_OPS (ann);
385 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
387 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
388 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
392 FOR_EACH_EDGE (e, ei, bb->succs)
393 for (phi = phi_nodes (e->dest); phi; phi = TREE_CHAIN (phi))
394 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
398 /* Releases the structures holding the new ssa names. */
401 free_new_names (bitmap definitions)
406 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
408 tree def = ssa_name (ver);
410 if (SSA_NAME_AUX (def))
412 free (SSA_NAME_AUX (def));
413 SSA_NAME_AUX (def) = NULL;
419 /* Renames variables in new generated LOOP. */
422 rename_variables_in_loop (struct loop *loop)
427 bbs = get_loop_body (loop);
429 for (i = 0; i < loop->num_nodes; i++)
430 rename_variables_in_bb (bbs[i]);
436 /* This function copies phis from LOOP header to
437 NEW_LOOP header. AFTER is as
438 in update_phis_for_duplicate_loop function. */
441 copy_phi_nodes (struct loop *loop, struct loop *new_loop,
444 tree phi, new_phi, def;
446 edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
448 /* Second add arguments to newly created phi nodes. */
449 for (phi = phi_nodes (loop->header),
450 new_phi = phi_nodes (new_loop->header);
452 phi = TREE_CHAIN (phi),
453 new_phi = TREE_CHAIN (new_phi))
455 new_e = loop_preheader_edge (new_loop);
456 def = PHI_ARG_DEF_FROM_EDGE (phi, e);
457 add_phi_arg (&new_phi, def, new_e);
462 /* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
463 executes after LOOP, and false if it executes before it. */
466 update_phis_for_duplicate_loop (struct loop *loop,
467 struct loop *new_loop, bool after)
470 tree *new_name_ptr, new_ssa_name;
471 tree phi_new, phi_old, def;
472 edge orig_entry_e = loop_preheader_edge (loop);
474 /* Copy phis from loop->header to new_loop->header. */
475 copy_phi_nodes (loop, new_loop, after);
477 old_latch = loop_latch_edge (loop);
479 /* Update PHI args for the new loop latch edge, and
480 the old loop preheader edge, we know that the PHI nodes
481 are ordered appropriately in copy_phi_nodes. */
482 for (phi_new = phi_nodes (new_loop->header),
483 phi_old = phi_nodes (loop->header);
485 phi_new = TREE_CHAIN (phi_new), phi_old = TREE_CHAIN (phi_old))
487 def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
489 if (TREE_CODE (def) != SSA_NAME)
492 new_name_ptr = SSA_NAME_AUX (def);
494 /* Something defined outside of the loop. */
498 /* An ordinary ssa name defined in the loop. */
499 new_ssa_name = *new_name_ptr;
501 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
503 /* Update PHI args for the original loop pre-header edge. */
505 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
511 /* Update PHI nodes for a guard of the LOOP.
513 LOOP is supposed to have a preheader bb at which a guard condition is
514 located. The true edge of this condition skips the LOOP and ends
515 at the destination of the (unique) LOOP exit. The loop exit bb is supposed
516 to be an empty bb (created by this transformation) with one successor.
518 This function creates phi nodes at the LOOP exit bb. These phis need to be
519 created as a result of adding true edge coming from guard.
521 FORNOW: Only phis which have corresponding phi nodes at the header of the
522 LOOP are created. Here we use the assumption that after the LOOP there
523 are no uses of defs generated in LOOP.
525 After the phis creation, the function updates the values of phi nodes at
526 the LOOP exit successor bb:
533 if (exit_cond) goto bb3 else goto bb2
539 After guard creation (the loop before this function):
542 if (guard_condition) goto bb4 else goto bb1
544 if (exit_cond) goto bb4 else goto bb2
552 This function updates the phi nodes in bb4 and in bb3, to account for the
553 new edge from bb0 to bb4. */
556 update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
560 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
565 /* Generate new phi node. */
566 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
567 loop->exit_edges[0]->dest);
569 /* Add argument coming from guard true edge. */
570 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
571 add_phi_arg (&new_phi, phi_arg, guard_true_edge);
573 /* Add argument coming from loop exit edge. */
574 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
575 add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
577 /* Update all phi nodes at the loop exit successor. */
578 for (phi1 = phi_nodes (EDGE_SUCC (loop->exit_edges[0]->dest, 0)->dest);
580 phi1 = TREE_CHAIN (phi1))
582 tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1,
583 EDGE_SUCC (loop->exit_edges[0]->dest, 0));
584 if (old_arg == phi_arg)
586 edge e = EDGE_SUCC (loop->exit_edges[0]->dest, 0);
588 SET_PHI_ARG_DEF (phi1,
589 phi_arg_from_edge (phi1, e),
590 PHI_RESULT (new_phi));
597 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
598 that starts at zero, increases by one and its limit is NITERS. */
601 make_loop_iterate_ntimes (struct loop *loop, tree niters,
602 tree begin_label, tree exit_label)
604 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
606 edge exit_edge = loop->exit_edges[0];
607 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
609 /* Flow loop scan does not update loop->single_exit field. */
610 loop->single_exit = loop->exit_edges[0];
611 orig_cond = get_loop_exit_condition (loop);
612 gcc_assert (orig_cond);
613 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
614 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
616 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
617 back to the exit condition statement. */
618 bsi_next (&loop_exit_bsi);
619 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
622 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
623 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
624 else /* 'then' edge loops back. */
625 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
627 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
628 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
629 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
630 begin_label, exit_label);
631 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
633 /* Remove old loop exit test: */
634 bsi_remove (&loop_exit_bsi);
636 if (vect_debug_stats (loop) || vect_debug_details (loop))
637 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
641 /* Given LOOP this function generates a new copy of it and puts it
642 on E which is either the entry or exit of LOOP. */
645 tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
648 struct loop *new_loop;
649 basic_block *new_bbs, *bbs;
652 basic_block exit_dest;
655 at_exit = (e == loop->exit_edges[0]);
656 if (!at_exit && e != loop_preheader_edge (loop))
658 if (dump_file && (dump_flags & TDF_DETAILS))
660 "Edge is not an entry nor an exit edge.\n");
664 bbs = get_loop_body (loop);
666 /* Check whether duplication is possible. */
667 if (!can_copy_bbs_p (bbs, loop->num_nodes))
669 if (vect_debug_stats (loop) || vect_debug_details (loop))
671 "Cannot copy basic blocks.\n");
676 /* Generate new loop structure. */
677 new_loop = duplicate_loop (loops, loop, loop->outer);
680 if (vect_debug_stats (loop) || vect_debug_details (loop))
682 "The duplicate_loop returns NULL.\n");
687 exit_dest = loop->exit_edges[0]->dest;
688 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
689 exit_dest) == loop->header ?
692 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
694 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
696 /* Duplicating phi args at exit bbs as coming
697 also from exit of duplicated loop. */
698 for (phi = phi_nodes (exit_dest); phi; phi = TREE_CHAIN (phi))
700 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
703 edge new_loop_exit_edge;
705 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
706 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
708 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
710 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
714 if (at_exit) /* Add the loop copy at exit. */
716 redirect_edge_and_branch_force (e, new_loop->header);
717 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
719 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
721 else /* Add the copy at entry. */
724 edge entry_e = loop_preheader_edge (loop);
725 basic_block preheader = entry_e->src;
727 if (!flow_bb_inside_loop_p (new_loop,
728 EDGE_SUCC (new_loop->header, 0)->dest))
729 new_exit_e = EDGE_SUCC (new_loop->header, 0);
731 new_exit_e = EDGE_SUCC (new_loop->header, 1);
733 redirect_edge_and_branch_force (new_exit_e, loop->header);
734 set_immediate_dominator (CDI_DOMINATORS, loop->header,
737 /* We have to add phi args to the loop->header here as coming
738 from new_exit_e edge. */
739 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
741 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
743 add_phi_arg (&phi, phi_arg, new_exit_e);
746 redirect_edge_and_branch_force (entry_e, new_loop->header);
747 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
750 flow_loop_scan (new_loop, LOOP_ALL);
751 flow_loop_scan (loop, LOOP_ALL);
759 /* Given the condition statement COND, put it as the last statement
760 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
761 Assumes that this is the single exit of the guarded loop.
762 Returns the skip edge. */
765 add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
767 block_stmt_iterator bsi;
769 tree cond_stmt, then_label, else_label;
771 enter_e = EDGE_SUCC (guard_bb, 0);
772 enter_e->flags &= ~EDGE_FALLTHRU;
773 enter_e->flags |= EDGE_FALSE_VALUE;
774 bsi = bsi_last (guard_bb);
776 then_label = build1 (GOTO_EXPR, void_type_node,
777 tree_block_label (exit_bb));
778 else_label = build1 (GOTO_EXPR, void_type_node,
779 tree_block_label (enter_e->dest));
780 cond_stmt = build (COND_EXPR, void_type_node, cond,
781 then_label, else_label);
782 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
783 /* Add new edge to connect entry block to the second loop. */
784 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
785 set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
790 /* This function verifies that certain restrictions apply to LOOP. */
793 verify_loop_for_duplication (struct loop *loop,
794 bool update_first_loop_count, edge e)
796 edge exit_e = loop->exit_edges [0];
797 edge entry_e = loop_preheader_edge (loop);
799 /* We duplicate only innermost loops. */
802 if (vect_debug_stats (loop) || vect_debug_details (loop))
804 "Loop duplication failed. Loop is not innermost.\n");
808 /* Only loops with 1 exit. */
809 if (loop->num_exits != 1)
811 if (vect_debug_stats (loop) || vect_debug_details (loop))
813 "More than one exit from loop.\n");
817 /* Only loops with 1 entry. */
818 if (loop->num_entries != 1)
820 if (vect_debug_stats (loop) || vect_debug_details (loop))
822 "More than one exit from loop.\n");
826 /* All loops has outers, the only case loop->outer is NULL is for
827 the function itself. */
830 if (vect_debug_stats (loop) || vect_debug_details (loop))
832 "Loop is outer-most loop.\n");
836 /* Verify that new IV can be created and loop condition
837 can be changed to make first loop iterate first_niters times. */
838 if (!update_first_loop_count)
840 tree orig_cond = get_loop_exit_condition (loop);
841 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
845 if (vect_debug_stats (loop) || vect_debug_details (loop))
847 "Loop has no exit condition.\n");
850 if (orig_cond != bsi_stmt (loop_exit_bsi))
852 if (vect_debug_stats (loop) || vect_debug_details (loop))
854 "Loop exit condition is not loop header last stmt.\n");
859 /* Make sure E is either an entry or an exit edge. */
860 if (e != exit_e && e != entry_e)
862 if (vect_debug_stats (loop) || vect_debug_details (loop))
864 "E is not loop entry or exit edge.\n");
872 /* Given LOOP this function duplicates it to the edge E.
874 This transformation takes place before the loop is vectorized.
875 For now, there are two main cases when it's used
876 by the vectorizer: to support loops with unknown loop bounds
877 (or loop bounds indivisible by vectorization factor) and to force the
878 alignment of data references in the loop. In the first case, LOOP is
879 duplicated to the exit edge, producing epilog loop. In the second case, LOOP
880 is duplicated to the preheader edge thus generating prolog loop. In both
881 cases, the original loop will be vectorized after the transformation.
883 The edge E is supposed to be either preheader edge of the LOOP or
884 its exit edge. If preheader edge is specified, the LOOP copy
885 will precede the original one. Otherwise the copy will be located
886 at the exit of the LOOP.
888 FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate
889 the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first
890 loop will be iterated FIRST_NITERS times by introducing additional
891 induction variable and replacing loop exit condition. If
892 UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and
893 the caller to tree_duplicate_loop_to_edge is responsible for updating
894 the first loop count.
896 NITERS (also SSA_NAME) parameter defines the number of iteration the
897 original loop iterated. The function generates two if-then guards:
898 one prior to the first loop and the other prior to the second loop.
899 The first guard will be:
901 if (FIRST_NITERS == 0) then skip the first loop
903 The second guard will be:
905 if (FIRST_NITERS == NITERS) then skip the second loop
907 Thus the equivalence to the original code is guaranteed by correct values
908 of NITERS and FIRST_NITERS and generation of if-then loop guards.
910 For now this function supports only loop forms that are candidate for
911 vectorization. Such types are the following:
913 (1) only innermost loops
914 (2) loops built from 2 basic blocks
915 (3) loops with one entry and one exit
916 (4) loops without function calls
917 (5) loops without defs that are used after the loop
919 (1), (3) are checked in this function; (2) - in function
920 vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
921 (5) is checked as part of the function vect_mark_stmts_to_be_vectorized,
922 when excluding induction/reduction support.
924 The function returns NULL in case one of these checks or
925 transformations failed. */
928 tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops,
929 edge e, tree first_niters,
930 tree niters, bool update_first_loop_count)
932 struct loop *new_loop = NULL, *first_loop, *second_loop;
936 basic_block first_exit_bb, second_exit_bb;
937 basic_block pre_header_bb;
938 edge exit_e = loop->exit_edges [0];
940 gcc_assert (!any_marked_for_rewrite_p ());
942 if (!verify_loop_for_duplication (loop, update_first_loop_count, e))
945 /* We have to initialize cfg_hooks. Then, when calling
946 cfg_hooks->split_edge, the function tree_split_edge
947 is actually called and, when calling cfg_hooks->duplicate_block,
948 the function tree_duplicate_bb is called. */
949 tree_register_cfg_hooks ();
951 /* 1. Generate a copy of LOOP and put it on E (entry or exit). */
952 if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
954 if (vect_debug_stats (loop) || vect_debug_details (loop))
956 "The tree_duplicate_loop_to_edge_cfg failed.\n");
960 definitions = marked_ssa_names ();
961 allocate_new_names (definitions);
962 update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
963 /* Here, using assumption (5), we do not propagate new names further
964 than on phis of the exit from the second loop. */
965 rename_variables_in_loop (new_loop);
966 free_new_names (definitions);
971 second_loop = new_loop;
975 first_loop = new_loop;
979 /* 2. Generate bb between the loops. */
980 first_exit_bb = split_edge (first_loop->exit_edges[0]);
981 add_bb_to_loop (first_exit_bb, first_loop->outer);
983 /* We need to update here first loop exit edge
984 and second loop preheader edge. */
985 flow_loop_scan (first_loop, LOOP_ALL);
986 flow_loop_scan (second_loop, LOOP_ALL);
988 /* 3. Make first loop iterate FIRST_NITERS times, if needed. */
989 if (!update_first_loop_count)
991 tree first_loop_latch_lbl = tree_block_label (first_loop->latch);
992 tree first_loop_exit_lbl = tree_block_label (first_exit_bb);
994 make_loop_iterate_ntimes (first_loop, first_niters,
995 first_loop_latch_lbl,
996 first_loop_exit_lbl);
999 /* 4. Add the guard before first loop:
1001 if FIRST_NITERS == 0
1006 /* 4a. Generate bb before first loop. */
1007 pre_header_bb = split_edge (loop_preheader_edge (first_loop));
1008 add_bb_to_loop (pre_header_bb, first_loop->outer);
1010 /* First loop preheader edge is changed. */
1011 flow_loop_scan (first_loop, LOOP_ALL);
1013 /* 4b. Generate guard condition. */
1014 pre_condition = build (LE_EXPR, boolean_type_node,
1015 first_niters, integer_zero_node);
1017 /* 4c. Add condition at the end of preheader bb. */
1018 skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
1020 /* 4d. Update phis at first loop exit and propagate changes
1021 to the phis of second loop. */
1022 update_phi_nodes_for_guard (skip_e, first_loop);
1024 /* 5. Add the guard before second loop:
1026 if FIRST_NITERS == NITERS SKIP
1029 enter second loop */
1031 /* 5a. Generate empty bb at the exit from the second loop. */
1032 second_exit_bb = split_edge (second_loop->exit_edges[0]);
1033 add_bb_to_loop (second_exit_bb, second_loop->outer);
1035 /* Second loop preheader edge is changed. */
1036 flow_loop_scan (second_loop, LOOP_ALL);
1038 /* 5b. Generate guard condition. */
1039 pre_condition = build (EQ_EXPR, boolean_type_node,
1040 first_niters, niters);
1042 /* 5c. Add condition at the end of preheader bb. */
1043 skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
1044 update_phi_nodes_for_guard (skip_e, second_loop);
1046 BITMAP_XFREE (definitions);
1047 unmark_all_for_rewrite ();
1054 /* Here the proper Vectorizer starts. */
1056 /* Function new_stmt_vec_info.
1058 Create and initialize a new stmt_vec_info struct for STMT. */
1061 new_stmt_vec_info (tree stmt, struct loop *loop)
1064 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1066 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1067 STMT_VINFO_STMT (res) = stmt;
1068 STMT_VINFO_LOOP (res) = loop;
1069 STMT_VINFO_RELEVANT_P (res) = 0;
1070 STMT_VINFO_VECTYPE (res) = NULL;
1071 STMT_VINFO_VEC_STMT (res) = NULL;
1072 STMT_VINFO_DATA_REF (res) = NULL;
1073 STMT_VINFO_MEMTAG (res) = NULL;
1074 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1080 /* Function new_loop_vec_info.
1082 Create and initialize a new loop_vec_info struct for LOOP, as well as
1083 stmt_vec_info structs for all the stmts in LOOP. */
1086 new_loop_vec_info (struct loop *loop)
1090 block_stmt_iterator si;
1093 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1095 bbs = get_loop_body (loop);
1097 /* Create stmt_info for all stmts in the loop. */
1098 for (i = 0; i < loop->num_nodes; i++)
1100 basic_block bb = bbs[i];
1101 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1103 tree stmt = bsi_stmt (si);
1106 get_stmt_operands (stmt);
1107 ann = stmt_ann (stmt);
1108 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1112 LOOP_VINFO_LOOP (res) = loop;
1113 LOOP_VINFO_BBS (res) = bbs;
1114 LOOP_VINFO_EXIT_COND (res) = NULL;
1115 LOOP_VINFO_NITERS (res) = NULL;
1116 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1117 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1118 LOOP_VINFO_VECT_FACTOR (res) = 0;
1119 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1120 "loop_write_datarefs");
1121 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1122 "loop_read_datarefs");
1123 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1129 /* Function destroy_loop_vec_info.
1131 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1132 stmts in the loop. */
1135 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1140 block_stmt_iterator si;
1146 loop = LOOP_VINFO_LOOP (loop_vinfo);
1148 bbs = LOOP_VINFO_BBS (loop_vinfo);
1149 nbbs = loop->num_nodes;
1151 for (j = 0; j < nbbs; j++)
1153 basic_block bb = bbs[j];
1154 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1156 tree stmt = bsi_stmt (si);
1157 stmt_ann_t ann = stmt_ann (stmt);
1158 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1160 set_stmt_info (ann, NULL);
1164 free (LOOP_VINFO_BBS (loop_vinfo));
1165 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1166 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1172 /* Function debug_loop_stats.
1174 For vectorization statistics dumps. */
1177 vect_debug_stats (struct loop *loop)
1180 block_stmt_iterator si;
1181 tree node = NULL_TREE;
1183 if (!dump_file || !(dump_flags & TDF_STATS))
1188 fprintf (dump_file, "\n");
1197 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1199 node = bsi_stmt (si);
1200 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1204 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1205 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1207 fprintf (dump_file, "\nloop at %s:%d: ",
1208 EXPR_FILENAME (node), EXPR_LINENO (node));
1216 /* Function debug_loop_details.
1218 For vectorization debug dumps. */
1221 vect_debug_details (struct loop *loop)
1224 block_stmt_iterator si;
1225 tree node = NULL_TREE;
1227 if (!dump_file || !(dump_flags & TDF_DETAILS))
1232 fprintf (dump_file, "\n");
1241 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1243 node = bsi_stmt (si);
1244 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1248 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1249 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1251 fprintf (dump_file, "\nloop at %s:%d: ",
1252 EXPR_FILENAME (node), EXPR_LINENO (node));
1260 /* Function vect_get_ptr_offset
1262 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1265 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1266 tree vectype ATTRIBUTE_UNUSED,
1267 tree *offset ATTRIBUTE_UNUSED)
1269 /* TODO: Use alignment information. */
1274 /* Function vect_get_base_and_bit_offset
1276 Return the BASE of the data reference EXPR.
1277 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1278 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1279 bits of 'a.b[i] + 4B' from a.
1282 EXPR - the memory reference that is being analyzed
1283 DR - the data_reference struct of the _original_ memory reference
1284 (Note: DR_REF (DR) is not necessarily EXPR)
1285 VECTYPE - the type that defines the alignment (i.e, we compute
1286 alignment relative to TYPE_ALIGN(VECTYPE))
1289 BASE (returned value) - the base of the data reference EXPR.
1290 E.g, if EXPR is a.b[k].c[i][j] the returned
1292 OFFSET - offset of EXPR from BASE in bits
1293 BASE_ALIGNED_P - indicates if BASE is aligned
1295 If something unexpected is encountered (an unsupported form of data-ref),
1296 or if VECTYPE is given but OFFSET cannot be determined:
1297 then NULL_TREE is returned. */
1300 vect_get_base_and_bit_offset (struct data_reference *dr,
1303 loop_vec_info loop_vinfo,
1305 bool *base_aligned_p)
1307 tree this_offset = size_zero_node;
1308 tree base = NULL_TREE;
1310 tree oprnd0, oprnd1;
1311 struct data_reference *array_dr;
1312 enum tree_code code = TREE_CODE (expr);
1314 *base_aligned_p = false;
1318 /* These cases end the recursion: */
1320 *offset = size_zero_node;
1321 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1322 *base_aligned_p = true;
1329 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1332 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1334 base = vect_get_ptr_offset (expr, vectype, offset);
1336 *base_aligned_p = true;
1340 *base_aligned_p = true;
1341 *offset = size_zero_node;
1347 *offset = int_const_binop (MULT_EXPR, expr,
1348 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1351 /* These cases continue the recursion: */
1353 oprnd0 = TREE_OPERAND (expr, 0);
1354 oprnd1 = TREE_OPERAND (expr, 1);
1356 this_offset = bit_position (oprnd1);
1357 if (vectype && !host_integerp (this_offset, 1))
1363 oprnd0 = TREE_OPERAND (expr, 0);
1368 oprnd0 = TREE_OPERAND (expr, 0);
1373 if (DR_REF (dr) != expr)
1374 /* Build array data_reference struct if the existing DR_REF
1375 doesn't match EXPR. This happens, for example, when the
1376 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1377 contains information on the access of T, not of arr. In order
1378 to continue the analysis, we create a new DR struct that
1379 describes the access of arr.
1381 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1385 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1386 vectype, &this_offset);
1391 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1393 *offset = this_offset;
1394 *base_aligned_p = true;
1401 /* In case we have a PLUS_EXPR of the form
1402 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1403 This is verified in vect_get_symbl_and_dr. */
1404 oprnd0 = TREE_OPERAND (expr, 0);
1405 oprnd1 = TREE_OPERAND (expr, 1);
1407 base = vect_get_base_and_bit_offset
1408 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1409 if (vectype && !base)
1419 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1420 loop_vinfo, offset, base_aligned_p);
1422 if (vectype && base)
1424 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1425 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1428 if (vect_debug_details (NULL))
1430 print_generic_expr (dump_file, expr, TDF_SLIM);
1431 fprintf (dump_file, " --> total offset for ref: ");
1432 print_generic_expr (dump_file, *offset, TDF_SLIM);
1439 /* Function vect_force_dr_alignment_p.
1441 Returns whether the alignment of a DECL can be forced to be aligned
1442 on ALIGNMENT bit boundary. */
1445 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1447 if (TREE_CODE (decl) != VAR_DECL)
1450 if (DECL_EXTERNAL (decl))
1453 if (TREE_STATIC (decl))
1454 return (alignment <= MAX_OFILE_ALIGNMENT);
1456 /* This is not 100% correct. The absolute correct stack alignment
1457 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1458 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1459 However, until someone implements forced stack alignment, SSE
1460 isn't really usable without this. */
1461 return (alignment <= PREFERRED_STACK_BOUNDARY);
1465 /* Function vect_get_new_vect_var.
1467 Returns a name for a new variable. The current naming scheme appends the
1468 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1469 the name of vectorizer generated variables, and appends that to NAME if
1473 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1479 if (var_kind == vect_simple_var)
1484 prefix_len = strlen (prefix);
1487 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1489 new_vect_var = create_tmp_var (type, prefix);
1491 return new_vect_var;
1495 /* Function vect_create_index_for_vector_ref.
1497 Create (and return) an index variable, along with it's update chain in the
1498 loop. This variable will be used to access a memory location in a vector
1502 LOOP: The loop being vectorized.
1503 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1504 function can be added here, or in the loop pre-header.
1507 Return an index that will be used to index a vector array. It is expected
1508 that a pointer to the first vector will be used as the base address for the
1511 FORNOW: we are not trying to be efficient, just creating a new index each
1512 time from scratch. At this time all vector references could use the same
1515 TODO: create only one index to be used by all vector references. Record
1516 the index in the LOOP_VINFO the first time this procedure is called and
1517 return it on subsequent calls. The increment of this index must be placed
1518 just before the conditional expression that ends the single block loop. */
1521 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1524 tree indx_before_incr, indx_after_incr;
1526 /* It is assumed that the base pointer used for vectorized access contains
1527 the address of the first vector. Therefore the index used for vectorized
1528 access must be initialized to zero and incremented by 1. */
1530 init = integer_zero_node;
1531 step = integer_one_node;
1533 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1534 create_iv (init, step, NULL_TREE, loop, bsi, false,
1535 &indx_before_incr, &indx_after_incr);
1537 return indx_before_incr;
1541 /* Function vect_create_addr_base_for_vector_ref.
1543 Create an expression that computes the address of the first memory location
1544 that will be accessed for a data reference.
1547 STMT: The statement containing the data reference.
1548 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1549 OFFSET: Optional. If supplied, it is be added to the initial address.
1552 1. Return an SSA_NAME whose value is the address of the memory location of
1553 the first vector of the data reference.
1554 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1555 these statement(s) which define the returned SSA_NAME.
1557 FORNOW: We are only handling array accesses with step 1. */
1560 vect_create_addr_base_for_vector_ref (tree stmt,
1561 tree *new_stmt_list,
1564 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1565 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1566 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1567 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1568 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1569 tree ref = DR_REF (dr);
1570 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1571 tree scalar_type = TREE_TYPE (ref);
1572 tree scalar_ptr_type = build_pointer_type (scalar_type);
1574 tree init_val, step, init_oval;
1576 bool is_ptr_ref, is_array_ref, is_addr_expr;
1581 tree addr_base, addr_expr;
1582 tree dest, new_stmt;
1584 /* Only the access function of the last index is relevant (i_n in
1585 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1586 access_fn = DR_ACCESS_FN (dr, 0);
1587 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1590 init_oval = integer_zero_node;
1592 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1593 && TREE_CODE (data_ref_base) == SSA_NAME;
1594 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1595 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1596 || TREE_CODE (data_ref_base) == PLUS_EXPR
1597 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1598 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1600 /** Create: &(base[init_val])
1602 if data_ref_base is an ARRAY_TYPE:
1603 base = data_ref_base
1605 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1606 base = *((scalar_array *) data_ref_base)
1610 array_base = data_ref_base;
1611 else /* is_ptr_ref or is_addr_expr */
1613 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1614 tree scalar_array_type = build_array_type (scalar_type, 0);
1615 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1616 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1617 add_referenced_tmp_var (array_ptr);
1619 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1620 add_referenced_tmp_var (dest);
1622 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1623 append_to_statement_list_force (new_stmt, new_stmt_list);
1625 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1626 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1627 new_temp = make_ssa_name (array_ptr, vec_stmt);
1628 TREE_OPERAND (vec_stmt, 0) = new_temp;
1629 append_to_statement_list_force (vec_stmt, new_stmt_list);
1632 array_base = build_fold_indirect_ref (new_temp);
1635 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1636 add_referenced_tmp_var (dest);
1637 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1638 append_to_statement_list_force (new_stmt, new_stmt_list);
1642 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1643 add_referenced_tmp_var (tmp);
1644 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1645 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1646 init_val = make_ssa_name (tmp, vec_stmt);
1647 TREE_OPERAND (vec_stmt, 0) = init_val;
1648 append_to_statement_list_force (vec_stmt, new_stmt_list);
1651 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1652 NULL_TREE, NULL_TREE);
1653 addr_base = build_fold_addr_expr (array_ref);
1655 /* addr_expr = addr_base */
1656 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1657 get_name (base_name));
1658 add_referenced_tmp_var (addr_expr);
1659 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1660 new_temp = make_ssa_name (addr_expr, vec_stmt);
1661 TREE_OPERAND (vec_stmt, 0) = new_temp;
1662 append_to_statement_list_force (vec_stmt, new_stmt_list);
1668 /* Function get_vectype_for_scalar_type.
1670 Returns the vector type corresponding to SCALAR_TYPE as supported
1674 get_vectype_for_scalar_type (tree scalar_type)
1676 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1677 int nbytes = GET_MODE_SIZE (inner_mode);
1684 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1686 nunits = UNITS_PER_SIMD_WORD / nbytes;
1688 vectype = build_vector_type (scalar_type, nunits);
1689 if (vect_debug_details (NULL))
1691 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1692 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1698 if (vect_debug_details (NULL))
1700 fprintf (dump_file, "vectype: ");
1701 print_generic_expr (dump_file, vectype, TDF_SLIM);
1704 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1706 /* TODO: tree-complex.c sometimes can parallelize operations
1707 on generic vectors. We can vectorize the loop in that case,
1708 but then we should re-run the lowering pass. */
1709 if (vect_debug_details (NULL))
1710 fprintf (dump_file, "mode not supported by target.");
1718 /* Function vect_align_data_ref.
1720 Handle mislignment of a memory accesses.
1722 FORNOW: Can't handle misaligned accesses.
1723 Make sure that the dataref is aligned. */
1726 vect_align_data_ref (tree stmt)
1728 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1729 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1731 /* FORNOW: can't handle misaligned accesses;
1732 all accesses expected to be aligned. */
1733 gcc_assert (aligned_access_p (dr));
1737 /* Function vect_create_data_ref_ptr.
1739 Create a memory reference expression for vector access, to be used in a
1740 vector load/store stmt. The reference is based on a new pointer to vector
1744 1. STMT: a stmt that references memory. Expected to be of the form
1745 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1746 2. BSI: block_stmt_iterator where new stmts can be added.
1747 3. OFFSET (optional): an offset to be added to the initial address accessed
1748 by the data-ref in STMT.
1749 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1750 pointing to the initial address.
1753 1. Declare a new ptr to vector_type, and have it point to the base of the
1754 data reference (initial addressed accessed by the data reference).
1755 For example, for vector of type V8HI, the following code is generated:
1758 vp = (v8hi *)initial_address;
1760 if OFFSET is not supplied:
1761 initial_address = &a[init];
1762 if OFFSET is supplied:
1763 initial_address = &a[init + OFFSET];
1765 Return the initial_address in INITIAL_ADDRESS.
1767 2. Create a data-reference in the loop based on the new vector pointer vp,
1768 and using a new index variable 'idx' as follows:
1772 where if ONLY_INIT is true:
1775 update = idx + vector_type_size
1777 Return the pointer vp'.
1780 FORNOW: handle only aligned and consecutive accesses. */
1783 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1784 tree *initial_address, bool only_init)
1787 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1788 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1789 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1790 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1794 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1795 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1796 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1797 int nvuses, nv_may_defs, nv_must_defs;
1801 tree new_stmt_list = NULL_TREE;
1803 edge pe = loop_preheader_edge (loop);
1810 base_name = unshare_expr (DR_BASE_NAME (dr));
1811 if (vect_debug_details (NULL))
1813 tree data_ref_base = base_name;
1814 fprintf (dump_file, "create array_ref of type: ");
1815 print_generic_expr (dump_file, vectype, TDF_SLIM);
1816 if (TREE_CODE (data_ref_base) == VAR_DECL)
1817 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1818 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1819 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1820 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1821 fprintf (dump_file, "vectorizing a record based array ref: ");
1822 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1823 fprintf (dump_file, "vectorizing a pointer ref: ");
1824 print_generic_expr (dump_file, base_name, TDF_SLIM);
1827 /** (1) Create the new vector-pointer variable: **/
1829 vect_ptr_type = build_pointer_type (vectype);
1830 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1831 get_name (base_name));
1832 add_referenced_tmp_var (vect_ptr);
1835 /** (2) Handle aliasing information of the new vector-pointer: **/
1837 tag = STMT_VINFO_MEMTAG (stmt_info);
1839 get_var_ann (vect_ptr)->type_mem_tag = tag;
1841 /* Mark for renaming all aliased variables
1842 (i.e, the may-aliases of the type-mem-tag). */
1843 nvuses = NUM_VUSES (vuses);
1844 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1845 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1846 for (i = 0; i < nvuses; i++)
1848 tree use = VUSE_OP (vuses, i);
1849 if (TREE_CODE (use) == SSA_NAME)
1850 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1852 for (i = 0; i < nv_may_defs; i++)
1854 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1855 if (TREE_CODE (def) == SSA_NAME)
1856 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1858 for (i = 0; i < nv_must_defs; i++)
1860 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1861 if (TREE_CODE (def) == SSA_NAME)
1862 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1866 /** (3) Calculate the initial address the vector-pointer, and set
1867 the vector-pointer to point to it before the loop: **/
1869 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1870 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1872 pe = loop_preheader_edge (loop);
1873 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1874 gcc_assert (!new_bb);
1875 *initial_address = new_temp;
1877 /* Create: p = (vectype *) initial_base */
1878 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1879 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1880 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1881 TREE_OPERAND (vec_stmt, 0) = new_temp;
1882 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1883 gcc_assert (!new_bb);
1884 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1887 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1889 if (only_init) /* No update in loop is required. */
1890 return vect_ptr_init;
1892 idx = vect_create_index_for_vector_ref (loop, bsi);
1894 /* Create: update = idx * vectype_size */
1895 ptr_update = create_tmp_var (integer_type_node, "update");
1896 add_referenced_tmp_var (ptr_update);
1897 vectype_size = build_int_cst (integer_type_node,
1898 GET_MODE_SIZE (TYPE_MODE (vectype)));
1899 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1900 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1901 new_temp = make_ssa_name (ptr_update, vec_stmt);
1902 TREE_OPERAND (vec_stmt, 0) = new_temp;
1903 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1905 /* Create: data_ref_ptr = vect_ptr_init + update */
1906 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1907 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1908 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1909 TREE_OPERAND (vec_stmt, 0) = new_temp;
1910 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1911 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1913 return data_ref_ptr;
1917 /* Function vect_create_destination_var.
1919 Create a new temporary of type VECTYPE. */
1922 vect_create_destination_var (tree scalar_dest, tree vectype)
1925 const char *new_name;
1927 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1929 new_name = get_name (scalar_dest);
1932 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1933 add_referenced_tmp_var (vec_dest);
1939 /* Function vect_init_vector.
1941 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1942 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1943 used in the vectorization of STMT. */
1946 vect_init_vector (tree stmt, tree vector_var)
1948 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1949 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1952 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1958 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1959 add_referenced_tmp_var (new_var);
1961 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1962 new_temp = make_ssa_name (new_var, init_stmt);
1963 TREE_OPERAND (init_stmt, 0) = new_temp;
1965 pe = loop_preheader_edge (loop);
1966 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1967 gcc_assert (!new_bb);
1969 if (vect_debug_details (NULL))
1971 fprintf (dump_file, "created new init_stmt: ");
1972 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1975 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1980 /* Function vect_get_vec_def_for_operand.
1982 OP is an operand in STMT. This function returns a (vector) def that will be
1983 used in the vectorized stmt for STMT.
1985 In the case that OP is an SSA_NAME which is defined in the loop, then
1986 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1988 In case OP is an invariant or constant, a new stmt that creates a vector def
1989 needs to be introduced. */
1992 vect_get_vec_def_for_operand (tree op, tree stmt)
1997 stmt_vec_info def_stmt_info = NULL;
1998 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1999 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2000 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2001 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2008 if (vect_debug_details (NULL))
2010 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2011 print_generic_expr (dump_file, op, TDF_SLIM);
2014 /** ===> Case 1: operand is a constant. **/
2016 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2018 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2022 /* Build a tree with vector elements. */
2023 if (vect_debug_details (NULL))
2024 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2026 for (i = nunits - 1; i >= 0; --i)
2028 t = tree_cons (NULL_TREE, op, t);
2030 vec_cst = build_vector (vectype, t);
2031 return vect_init_vector (stmt, vec_cst);
2034 gcc_assert (TREE_CODE (op) == SSA_NAME);
2036 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2038 def_stmt = SSA_NAME_DEF_STMT (op);
2039 def_stmt_info = vinfo_for_stmt (def_stmt);
2041 if (vect_debug_details (NULL))
2043 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2044 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2048 /** ==> Case 2.1: operand is defined inside the loop. **/
2052 /* Get the def from the vectorized stmt. */
2054 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2055 gcc_assert (vec_stmt);
2056 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2061 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2062 it is a reduction/induction. **/
2064 bb = bb_for_stmt (def_stmt);
2065 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2067 if (vect_debug_details (NULL))
2068 fprintf (dump_file, "reduction/induction - unsupported.");
2069 internal_error ("no support for reduction/induction"); /* FORNOW */
2073 /** ==> Case 2.3: operand is defined outside the loop -
2074 it is a loop invariant. */
2076 switch (TREE_CODE (def_stmt))
2079 def = PHI_RESULT (def_stmt);
2082 def = TREE_OPERAND (def_stmt, 0);
2085 def = TREE_OPERAND (def_stmt, 0);
2086 gcc_assert (IS_EMPTY_STMT (def_stmt));
2090 if (vect_debug_details (NULL))
2092 fprintf (dump_file, "unsupported defining stmt: ");
2093 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2095 internal_error ("unsupported defining stmt");
2098 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2100 if (vect_debug_details (NULL))
2101 fprintf (dump_file, "Create vector_inv.");
2103 for (i = nunits - 1; i >= 0; --i)
2105 t = tree_cons (NULL_TREE, def, t);
2108 vec_inv = build_constructor (vectype, t);
2109 return vect_init_vector (stmt, vec_inv);
2113 /* Function vect_finish_stmt_generation.
2115 Insert a new stmt. */
2118 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2120 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2122 if (vect_debug_details (NULL))
2124 fprintf (dump_file, "add new stmt: ");
2125 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2128 /* Make sure bsi points to the stmt that is being vectorized. */
2130 /* Assumption: any stmts created for the vectorization of stmt S were
2131 inserted before S. BSI is expected to point to S or some new stmt before S. */
2133 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2135 gcc_assert (stmt == bsi_stmt (*bsi));
2139 /* Function vectorizable_assignment.
2141 Check if STMT performs an assignment (copy) that can be vectorized.
2142 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2143 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2144 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2147 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2153 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2154 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2155 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2158 /* Is vectorizable assignment? */
2160 if (TREE_CODE (stmt) != MODIFY_EXPR)
2163 scalar_dest = TREE_OPERAND (stmt, 0);
2164 if (TREE_CODE (scalar_dest) != SSA_NAME)
2167 op = TREE_OPERAND (stmt, 1);
2168 if (!vect_is_simple_use (op, loop, NULL))
2170 if (vect_debug_details (NULL))
2171 fprintf (dump_file, "use not simple.");
2175 if (!vec_stmt) /* transformation not required. */
2177 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2182 if (vect_debug_details (NULL))
2183 fprintf (dump_file, "transform assignment.");
2186 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2189 op = TREE_OPERAND (stmt, 1);
2190 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2192 /* Arguments are ready. create the new vector stmt. */
2193 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2194 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2195 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2196 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2202 /* Function vectorizable_operation.
2204 Check if STMT performs a binary or unary operation that can be vectorized.
2205 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2206 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2207 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2210 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2215 tree op0, op1 = NULL;
2216 tree vec_oprnd0, vec_oprnd1=NULL;
2217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2218 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2219 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2221 enum tree_code code;
2222 enum machine_mode vec_mode;
2228 /* Is STMT a vectorizable binary/unary operation? */
2229 if (TREE_CODE (stmt) != MODIFY_EXPR)
2232 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2235 operation = TREE_OPERAND (stmt, 1);
2236 code = TREE_CODE (operation);
2237 optab = optab_for_tree_code (code, vectype);
2239 /* Support only unary or binary operations. */
2240 op_type = TREE_CODE_LENGTH (code);
2241 if (op_type != unary_op && op_type != binary_op)
2243 if (vect_debug_details (NULL))
2244 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2248 for (i = 0; i < op_type; i++)
2250 op = TREE_OPERAND (operation, i);
2251 if (!vect_is_simple_use (op, loop, NULL))
2253 if (vect_debug_details (NULL))
2254 fprintf (dump_file, "use not simple.");
2259 /* Supportable by target? */
2262 if (vect_debug_details (NULL))
2263 fprintf (dump_file, "no optab.");
2266 vec_mode = TYPE_MODE (vectype);
2267 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2269 if (vect_debug_details (NULL))
2270 fprintf (dump_file, "op not supported by target.");
2274 if (!vec_stmt) /* transformation not required. */
2276 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2282 if (vect_debug_details (NULL))
2283 fprintf (dump_file, "transform binary/unary operation.");
2286 scalar_dest = TREE_OPERAND (stmt, 0);
2287 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2290 op0 = TREE_OPERAND (operation, 0);
2291 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2293 if (op_type == binary_op)
2295 op1 = TREE_OPERAND (operation, 1);
2296 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2299 /* Arguments are ready. create the new vector stmt. */
2301 if (op_type == binary_op)
2302 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2303 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2305 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2306 build1 (code, vectype, vec_oprnd0));
2307 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2308 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2309 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2315 /* Function vectorizable_store.
2317 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2319 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2320 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2321 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2324 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2330 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2331 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2332 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2333 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2334 enum machine_mode vec_mode;
2336 enum dr_alignment_support alignment_support_cheme;
2338 /* Is vectorizable store? */
2340 if (TREE_CODE (stmt) != MODIFY_EXPR)
2343 scalar_dest = TREE_OPERAND (stmt, 0);
2344 if (TREE_CODE (scalar_dest) != ARRAY_REF
2345 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2348 op = TREE_OPERAND (stmt, 1);
2349 if (!vect_is_simple_use (op, loop, NULL))
2351 if (vect_debug_details (NULL))
2352 fprintf (dump_file, "use not simple.");
2356 vec_mode = TYPE_MODE (vectype);
2357 /* FORNOW. In some cases can vectorize even if data-type not supported
2358 (e.g. - array initialization with 0). */
2359 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2362 if (!STMT_VINFO_DATA_REF (stmt_info))
2366 if (!vec_stmt) /* transformation not required. */
2368 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2374 if (vect_debug_details (NULL))
2375 fprintf (dump_file, "transform store");
2377 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2378 gcc_assert (alignment_support_cheme);
2379 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2381 /* Handle use - get the vectorized def from the defining stmt. */
2382 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2385 /* FORNOW: make sure the data reference is aligned. */
2386 vect_align_data_ref (stmt);
2387 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2388 data_ref = build_fold_indirect_ref (data_ref);
2390 /* Arguments are ready. create the new vector stmt. */
2391 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2392 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2398 /* vectorizable_load.
2400 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2402 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2403 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2404 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2407 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2410 tree vec_dest = NULL;
2411 tree data_ref = NULL;
2413 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2414 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2415 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2422 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2423 edge pe = loop_preheader_edge (loop);
2424 enum dr_alignment_support alignment_support_cheme;
2426 /* Is vectorizable load? */
2428 if (TREE_CODE (stmt) != MODIFY_EXPR)
2431 scalar_dest = TREE_OPERAND (stmt, 0);
2432 if (TREE_CODE (scalar_dest) != SSA_NAME)
2435 op = TREE_OPERAND (stmt, 1);
2436 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2439 if (!STMT_VINFO_DATA_REF (stmt_info))
2442 mode = (int) TYPE_MODE (vectype);
2444 /* FORNOW. In some cases can vectorize even if data-type not supported
2445 (e.g. - data copies). */
2446 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2448 if (vect_debug_details (loop))
2449 fprintf (dump_file, "Aligned load, but unsupported type.");
2453 if (!vec_stmt) /* transformation not required. */
2455 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2461 if (vect_debug_details (NULL))
2462 fprintf (dump_file, "transform load.");
2464 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2465 gcc_assert (alignment_support_cheme);
2467 if (alignment_support_cheme == dr_aligned
2468 || alignment_support_cheme == dr_unaligned_supported)
2479 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2480 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2481 if (aligned_access_p (dr))
2482 data_ref = build_fold_indirect_ref (data_ref);
2485 int mis = DR_MISALIGNMENT (dr);
2486 tree tmis = (mis == -1 ?
2488 build_int_cst (integer_type_node, mis));
2489 tmis = int_const_binop (MULT_EXPR, tmis,
2490 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2491 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2493 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2494 new_temp = make_ssa_name (vec_dest, new_stmt);
2495 TREE_OPERAND (new_stmt, 0) = new_temp;
2496 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2498 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2502 msq_init = *(floor(p1))
2503 p2 = initial_addr + VS - 1;
2504 magic = have_builtin ? builtin_result : initial_address;
2507 p2' = p2 + indx * vectype_size
2509 vec_dest = realign_load (msq, lsq, magic)
2523 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2524 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2525 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2527 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2528 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2529 new_temp = make_ssa_name (vec_dest, new_stmt);
2530 TREE_OPERAND (new_stmt, 0) = new_temp;
2531 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2532 gcc_assert (!new_bb);
2533 msq_init = TREE_OPERAND (new_stmt, 0);
2536 /* <2> Create lsq = *(floor(p2')) in the loop */
2537 offset = build_int_cst (integer_type_node,
2538 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2539 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2540 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2541 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2542 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2543 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2544 new_temp = make_ssa_name (vec_dest, new_stmt);
2545 TREE_OPERAND (new_stmt, 0) = new_temp;
2546 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2547 lsq = TREE_OPERAND (new_stmt, 0);
2551 if (targetm.vectorize.builtin_mask_for_load)
2553 /* Create permutation mask, if required, in loop preheader. */
2555 params = build_tree_list (NULL_TREE, init_addr);
2556 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2557 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2558 new_stmt = build_function_call_expr (builtin_decl, params);
2559 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2560 new_temp = make_ssa_name (vec_dest, new_stmt);
2561 TREE_OPERAND (new_stmt, 0) = new_temp;
2562 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2563 gcc_assert (!new_bb);
2564 magic = TREE_OPERAND (new_stmt, 0);
2568 /* Use current address instead of init_addr for reduced reg pressure.
2570 magic = dataref_ptr;
2574 /* <4> Create msq = phi <msq_init, lsq> in loop */
2575 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2576 msq = make_ssa_name (vec_dest, NULL_TREE);
2577 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2578 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2579 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2580 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2583 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2584 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2585 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2586 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2587 new_temp = make_ssa_name (vec_dest, new_stmt);
2588 TREE_OPERAND (new_stmt, 0) = new_temp;
2589 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2594 *vec_stmt = new_stmt;
2599 /* Function vect_supportable_dr_alignment
2601 Return whether the data reference DR is supported with respect to its
2604 static enum dr_alignment_support
2605 vect_supportable_dr_alignment (struct data_reference *dr)
2607 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2608 enum machine_mode mode = (int) TYPE_MODE (vectype);
2610 if (aligned_access_p (dr))
2613 /* Possibly unaligned access. */
2615 if (DR_IS_READ (dr))
2617 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2618 && (!targetm.vectorize.builtin_mask_for_load
2619 || targetm.vectorize.builtin_mask_for_load ()))
2620 return dr_unaligned_software_pipeline;
2622 if (targetm.vectorize.misaligned_mem_ok (mode))
2623 /* Can't software pipeline the loads. */
2624 return dr_unaligned_supported;
2628 return dr_unaligned_unsupported;
2632 /* Function vect_transform_stmt.
2634 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2637 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2639 bool is_store = false;
2640 tree vec_stmt = NULL_TREE;
2641 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2644 switch (STMT_VINFO_TYPE (stmt_info))
2646 case op_vec_info_type:
2647 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2651 case assignment_vec_info_type:
2652 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2656 case load_vec_info_type:
2657 done = vectorizable_load (stmt, bsi, &vec_stmt);
2661 case store_vec_info_type:
2662 done = vectorizable_store (stmt, bsi, &vec_stmt);
2667 if (vect_debug_details (NULL))
2668 fprintf (dump_file, "stmt not supported.");
2672 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2678 /* This function builds ni_name = number of iterations loop executes
2679 on the loop preheader. */
2682 vect_build_loop_niters (loop_vec_info loop_vinfo)
2684 tree ni_name, stmt, var;
2687 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2688 tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2690 var = create_tmp_var (TREE_TYPE (ni), "niters");
2691 add_referenced_tmp_var (var);
2692 if (TREE_CODE (ni) == INTEGER_CST)
2694 /* This case is generated when treating a known loop bound
2695 indivisible by VF. Here we cannot use force_gimple_operand. */
2696 stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2697 ni_name = make_ssa_name (var, stmt);
2698 TREE_OPERAND (stmt, 0) = ni_name;
2701 ni_name = force_gimple_operand (ni, &stmt, false, var);
2703 pe = loop_preheader_edge (loop);
2704 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2706 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2712 /* This function generates the following statements:
2714 ni_name = number of iterations loop executes
2715 ratio = ni_name / vf
2716 ratio_mult_vf_name = ratio * vf
2718 and places them at the loop preheader edge. */
2721 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2722 tree *ratio_mult_vf_name_p, tree *ratio_p)
2729 tree ratio_mult_vf_name, ratio_mult_vf;
2730 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2731 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2735 /* Generate temporary variable that contains
2736 number of iterations loop executes. */
2738 ni_name = vect_build_loop_niters (loop_vinfo);
2741 vf is power of 2; then if ratio = = n >> log2 (vf). */
2742 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2743 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2745 /* Update initial conditions of loop copy. */
2747 /* ratio_mult_vf = ratio * vf;
2748 then if ratio_mult_vf = ratio << log2 (vf). */
2750 i = exact_log2 (vf);
2751 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2752 add_referenced_tmp_var (ratio_mult_vf);
2754 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2756 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2757 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2758 ratio, build_int_cst (unsigned_type_node,
2761 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2763 pe = loop_preheader_edge (loop);
2764 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2766 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2768 *ni_name_p = ni_name;
2769 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2776 /* This function generates stmt
2780 and attaches it to preheader of LOOP. */
2783 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2785 tree var, stmt, var_name;
2790 /* create temporary variable */
2791 var = create_tmp_var (TREE_TYPE (n), "bnd");
2792 add_referenced_tmp_var (var);
2794 var_name = make_ssa_name (var, NULL_TREE);
2796 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2798 i = exact_log2 (vf);
2799 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2800 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2801 n, build_int_cst (unsigned_type_node,i)));
2803 SSA_NAME_DEF_STMT (var_name) = stmt;
2805 pe = loop_preheader_edge (loop);
2806 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2808 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2810 if (vect_debug_details (NULL))
2811 fprintf (dump_file, "New bb on preheader edge was not generated.");
2817 /* Function vect_transform_loop_bound.
2819 Create a new exit condition for the loop. */
2822 vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2824 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2825 edge exit_edge = loop->single_exit;
2826 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
2827 tree indx_before_incr, indx_after_incr;
2828 tree orig_cond_expr;
2829 HOST_WIDE_INT old_N = 0;
2832 tree new_loop_bound;
2837 symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2840 old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2842 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2844 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2845 #ifdef ENABLE_CHECKING
2846 gcc_assert (orig_cond_expr);
2848 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
2850 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
2851 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
2853 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
2854 to point to the exit condition. */
2855 bsi_next (&loop_exit_bsi);
2856 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
2858 /* new loop exit test: */
2859 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
2861 new_loop_bound = fold_convert (lb_type,
2862 build_int_cst (unsigned_type_node,
2865 new_loop_bound = niters;
2867 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
2868 cond = build2 (GE_EXPR, boolean_type_node,
2869 indx_after_incr, new_loop_bound);
2870 else /* 'then' edge loops back. */
2871 cond = build2 (LT_EXPR, boolean_type_node,
2872 indx_after_incr, new_loop_bound);
2874 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
2875 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
2877 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
2879 /* remove old loop exit test: */
2880 bsi_remove (&loop_exit_bsi);
2882 if (vect_debug_details (NULL))
2883 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
2887 /* Advance IVs of the loop (to be vectorized later) to correct position.
2889 When loop is vectorized, its IVs are not always advanced
2890 correctly since vectorization changes the loop count. It's ok
2891 in case epilog loop was not produced after original one before
2892 vectorization process (the vectorizer checks that there is no uses
2893 of IVs after the loop). However, in case the epilog loop was peeled,
2894 IVs from original loop are used in epilog loop and should be
2897 Here we use access functions of IVs and number of
2898 iteration loop executes in order to bring IVs to correct position.
2900 Function also update phis of basic block at the exit
2904 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
2906 edge exit = loop->exit_edges[0];
2908 edge latch = loop_latch_edge (loop);
2910 /* Generate basic block at the exit from the loop. */
2911 basic_block new_bb = split_edge (exit);
2912 add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
2914 loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
2916 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
2918 tree access_fn = NULL;
2919 tree evolution_part;
2922 tree var, stmt, ni, ni_name;
2923 int i, j, num_elem1, num_elem2;
2925 block_stmt_iterator last_bsi;
2927 /* Skip virtual phi's. The data dependences that are associated with
2928 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2930 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2932 if (vect_debug_details (NULL))
2933 fprintf (dump_file, "virtual phi. skip.");
2937 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2939 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2941 /* FORNOW: We do not transform initial conditions of IVs
2942 which evolution functions are a polynomial of degree >= 2 or
2945 step_expr = evolution_part;
2946 init_expr = initial_condition (access_fn);
2948 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2949 build2 (MULT_EXPR, TREE_TYPE (niters),
2950 niters, step_expr), init_expr);
2952 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2953 add_referenced_tmp_var (var);
2955 ni_name = force_gimple_operand (ni, &stmt, false, var);
2957 /* Insert stmt into new_bb. */
2958 last_bsi = bsi_last (new_bb);
2959 bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);
2961 /* Fix phi expressions in duplicated loop. */
2962 num_elem1 = PHI_NUM_ARGS (phi);
2963 for (i = 0; i < num_elem1; i++)
2964 if (PHI_ARG_EDGE (phi, i) == latch)
2966 tree def = PHI_ARG_DEF (phi, i);
2968 for (phi1 = phi_nodes (EDGE_SUCC (new_bb, 0)->dest); phi1;
2969 phi1 = TREE_CHAIN (phi1))
2971 num_elem2 = PHI_NUM_ARGS (phi1);
2972 for (j = 0; j < num_elem2; j++)
2973 if (PHI_ARG_DEF (phi1, j) == def)
2975 SET_PHI_ARG_DEF (phi1, j, ni_name);
2976 PHI_ARG_EDGE (phi1, j) = EDGE_SUCC (new_bb, 0);
2987 /* This function is the main driver of transformation
2988 to be done for loop before vectorizing it in case of
2989 unknown loop bound. */
2992 vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
2993 struct loops *loops)
2996 tree ni_name, ratio_mult_vf_name;
2997 #ifdef ENABLE_CHECKING
3000 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3001 struct loop *new_loop;
3003 if (vect_debug_details (NULL))
3004 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3006 /* Generate the following variables on the preheader of original loop:
3008 ni_name = number of iteration the original loop executes
3009 ratio = ni_name / vf
3010 ratio_mult_vf_name = ratio * vf */
3011 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3012 &ratio_mult_vf_name, ratio);
3014 /* Update loop info. */
3015 loop->pre_header = loop_preheader_edge (loop)->src;
3016 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3018 #ifdef ENABLE_CHECKING
3019 loop_num = loop->num;
3021 new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0],
3022 ratio_mult_vf_name, ni_name, true);
3023 #ifdef ENABLE_CHECKING
3024 gcc_assert (new_loop);
3025 gcc_assert (loop_num == loop->num);
3028 /* Update IVs of original loop as if they were advanced
3029 by ratio_mult_vf_name steps. */
3031 #ifdef ENABLE_CHECKING
3032 /* Check existence of intermediate bb. */
3033 gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
3035 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name);
3042 /* Function vect_gen_niters_for_prolog_loop
3044 Set the number of iterations for the loop represented by LOOP_VINFO
3045 to the minimum between NITERS (the original iteration count of the loop)
3046 and the misalignment of DR - the first data reference recorded in
3047 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3048 this loop, the data reference DR will refer to an aligned location. */
3051 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
3053 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3054 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3055 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3057 tree iters, iters_name;
3060 tree dr_stmt = DR_STMT (dr);
3061 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3062 tree start_addr, byte_miss_align, elem_miss_align;
3063 int vec_type_align =
3064 GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3067 tree new_stmt_list = NULL_TREE;
3069 start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3070 &new_stmt_list, NULL_TREE);
3072 pe = loop_preheader_edge (loop);
3073 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3075 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3078 build (BIT_AND_EXPR, integer_type_node, start_addr,
3079 build (MINUS_EXPR, integer_type_node,
3080 build_int_cst (unsigned_type_node,
3081 vec_type_align), integer_one_node));
3082 tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3083 elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3084 byte_miss_align, tmp1);
3087 build (BIT_AND_EXPR, integer_type_node,
3088 build (MINUS_EXPR, integer_type_node,
3089 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3090 build (MINUS_EXPR, integer_type_node,
3091 build_int_cst (unsigned_type_node, vf), integer_one_node));
3093 iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3094 var = create_tmp_var (TREE_TYPE (iters), "iters");
3095 add_referenced_tmp_var (var);
3096 iters_name = force_gimple_operand (iters, &stmt, false, var);
3098 /* Insert stmt on loop preheader edge. */
3099 pe = loop_preheader_edge (loop);
3100 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3102 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3108 /* Function vect_update_niters_after_peeling
3110 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3111 The new number of iterations is therefore original_niters - NITERS.
3112 Record the new number of iterations in LOOP_VINFO. */
3115 vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters)
3117 tree n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3118 LOOP_VINFO_NITERS (loop_vinfo) =
3119 build (MINUS_EXPR, integer_type_node, n_iters, niters);
3123 /* Function vect_update_inits_of_dr
3125 NITERS iterations were peeled from LOOP. DR represents a data reference
3126 in LOOP. This function updates the information recorded in DR to
3127 account for the fact that the first NITERS iterations had already been
3128 executed. Specifically, it updates the initial_condition of the
3129 access_function of DR. */
3132 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3135 tree access_fn = DR_ACCESS_FN (dr, 0);
3136 tree init, init_new, step;
3138 step = evolution_part_in_loop_num (access_fn, loop->num);
3139 init = initial_condition (access_fn);
3141 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3142 build (MULT_EXPR, TREE_TYPE (niters),
3143 niters, step), init);
3144 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3150 /* Function vect_update_inits_of_drs
3152 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3153 This function updates the information recorded for the data references in
3154 the loop to account for the fact that the first NITERS iterations had
3155 already been executed. Specifically, it updates the initial_condition of the
3156 access_function of all the data_references in the loop. */
3159 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3162 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3163 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3164 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3166 if (dump_file && (dump_flags & TDF_DETAILS))
3167 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3169 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3171 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3172 vect_update_inits_of_dr (dr, loop, niters);
3175 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3177 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3178 vect_update_inits_of_dr (dr, loop, niters);
3183 /* Function vect_do_peeling_for_alignment
3185 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3186 'niters' is set to the misalignment of one of the data references in the
3187 loop, thereby forcing it to refer to an aligned location at the beginning
3188 of the execution of this loop. The data reference for which we are
3189 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3192 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3194 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3195 tree niters_of_prolog_loop, ni_name;
3197 if (vect_debug_details (NULL))
3198 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3200 ni_name = vect_build_loop_niters (loop_vinfo);
3201 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3204 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3205 tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop),
3206 niters_of_prolog_loop, ni_name, false);
3208 /* Update number of times loop executes. */
3209 vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop);
3211 /* Update all inits of access functions of all data refs. */
3212 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3214 /* After peeling we have to reset scalar evolution analyzer. */
3221 /* Function vect_transform_loop.
3223 The analysis phase has determined that the loop is vectorizable.
3224 Vectorize the loop - created vectorized stmts to replace the scalar
3225 stmts in the loop, and update the loop exit condition. */
3228 vect_transform_loop (loop_vec_info loop_vinfo,
3229 struct loops *loops ATTRIBUTE_UNUSED)
3231 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3232 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3233 int nbbs = loop->num_nodes;
3234 block_stmt_iterator si;
3237 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3239 if (vect_debug_details (NULL))
3240 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3243 /* Peel the loop if there are data refs with unknown alignment.
3244 Only one data ref with unknown store is allowed. */
3247 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3248 vect_do_peeling_for_alignment (loop_vinfo, loops);
3250 /* If the loop has a symbolic number of iterations 'n'
3251 (i.e. it's not a compile time constant),
3252 then an epilog loop needs to be created. We therefore duplicate
3253 the initial loop. The original loop will be vectorized, and will compute
3254 the first (n/VF) iterations. The second copy of the loop will remain
3255 serial and will compute the remaining (n%VF) iterations.
3256 (VF is the vectorization factor). */
3258 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3259 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3261 /* FORNOW: we'll treat the case where niters is constant and
3265 in the way similar to one with symbolic niters.
3266 For this we'll generate variable which value is equal to niters. */
3268 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3269 && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3270 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3273 /* 1) Make sure the loop header has exactly two entries
3274 2) Make sure we have a preheader basic block. */
3276 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3278 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3281 /* FORNOW: the vectorizer supports only loops which body consist
3282 of one basic block (header + empty latch). When the vectorizer will
3283 support more involved loop forms, the order by which the BBs are
3284 traversed need to be reconsidered. */
3286 for (i = 0; i < nbbs; i++)
3288 basic_block bb = bbs[i];
3290 for (si = bsi_start (bb); !bsi_end_p (si);)
3292 tree stmt = bsi_stmt (si);
3293 stmt_vec_info stmt_info;
3296 if (vect_debug_details (NULL))
3298 fprintf (dump_file, "------>vectorizing statement: ");
3299 print_generic_expr (dump_file, stmt, TDF_SLIM);
3301 stmt_info = vinfo_for_stmt (stmt);
3302 gcc_assert (stmt_info);
3303 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3308 #ifdef ENABLE_CHECKING
3309 /* FORNOW: Verify that all stmts operate on the same number of
3310 units and no inner unrolling is necessary. */
3312 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3313 == vectorization_factor);
3315 /* -------- vectorize statement ------------ */
3316 if (vect_debug_details (NULL))
3317 fprintf (dump_file, "transform statement.");
3319 is_store = vect_transform_stmt (stmt, &si);
3322 /* free the attached stmt_vec_info and remove the stmt. */
3323 stmt_ann_t ann = stmt_ann (stmt);
3325 set_stmt_info (ann, NULL);
3334 vect_transform_loop_bound (loop_vinfo, ratio);
3336 if (vect_debug_details (loop))
3337 fprintf (dump_file,"Success! loop vectorized.");
3338 if (vect_debug_stats (loop))
3339 fprintf (dump_file, "LOOP VECTORIZED.");
3343 /* Function vect_is_simple_use.
3346 LOOP - the loop that is being vectorized.
3347 OPERAND - operand of a stmt in LOOP.
3348 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3350 Returns whether a stmt with OPERAND can be vectorized.
3351 Supportable operands are constants, loop invariants, and operands that are
3352 defined by the current iteration of the loop. Unsupportable operands are
3353 those that are defined by a previous iteration of the loop (as is the case
3354 in reduction/induction computations). */
3357 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3365 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3368 if (TREE_CODE (operand) != SSA_NAME)
3371 def_stmt = SSA_NAME_DEF_STMT (operand);
3372 if (def_stmt == NULL_TREE )
3374 if (vect_debug_details (NULL))
3375 fprintf (dump_file, "no def_stmt.");
3379 /* empty stmt is expected only in case of a function argument.
3380 (Otherwise - we expect a phi_node or a modify_expr). */
3381 if (IS_EMPTY_STMT (def_stmt))
3383 tree arg = TREE_OPERAND (def_stmt, 0);
3384 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3386 if (vect_debug_details (NULL))
3388 fprintf (dump_file, "Unexpected empty stmt: ");
3389 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3394 /* phi_node inside the loop indicates an induction/reduction pattern.
3395 This is not supported yet. */
3396 bb = bb_for_stmt (def_stmt);
3397 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3399 if (vect_debug_details (NULL))
3400 fprintf (dump_file, "reduction/induction - unsupported.");
3401 return false; /* FORNOW: not supported yet. */
3404 /* Expecting a modify_expr or a phi_node. */
3405 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3406 || TREE_CODE (def_stmt) == PHI_NODE)
3417 /* Function vect_analyze_operations.
3419 Scan the loop stmts and make sure they are all vectorizable. */
3422 vect_analyze_operations (loop_vec_info loop_vinfo)
3424 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3425 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3426 int nbbs = loop->num_nodes;
3427 block_stmt_iterator si;
3428 int vectorization_factor = 0;
3433 if (vect_debug_details (NULL))
3434 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3436 for (i = 0; i < nbbs; i++)
3438 basic_block bb = bbs[i];
3440 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3442 tree stmt = bsi_stmt (si);
3444 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3447 if (vect_debug_details (NULL))
3449 fprintf (dump_file, "==> examining statement: ");
3450 print_generic_expr (dump_file, stmt, TDF_SLIM);
3453 gcc_assert (stmt_info);
3455 /* skip stmts which do not need to be vectorized.
3456 this is expected to include:
3457 - the COND_EXPR which is the loop exit condition
3458 - any LABEL_EXPRs in the loop
3459 - computations that are used only for array indexing or loop
3462 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3464 if (vect_debug_details (NULL))
3465 fprintf (dump_file, "irrelevant.");
3469 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3471 if (vect_debug_stats (loop) || vect_debug_details (loop))
3473 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3474 print_generic_expr (dump_file, stmt, TDF_SLIM);
3479 if (STMT_VINFO_DATA_REF (stmt_info))
3480 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3481 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3482 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3484 scalar_type = TREE_TYPE (stmt);
3486 if (vect_debug_details (NULL))
3488 fprintf (dump_file, "get vectype for scalar type: ");
3489 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3492 vectype = get_vectype_for_scalar_type (scalar_type);
3495 if (vect_debug_stats (loop) || vect_debug_details (loop))
3497 fprintf (dump_file, "not vectorized: unsupported data-type ");
3498 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3503 if (vect_debug_details (NULL))
3505 fprintf (dump_file, "vectype: ");
3506 print_generic_expr (dump_file, vectype, TDF_SLIM);
3508 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3510 ok = (vectorizable_operation (stmt, NULL, NULL)
3511 || vectorizable_assignment (stmt, NULL, NULL)
3512 || vectorizable_load (stmt, NULL, NULL)
3513 || vectorizable_store (stmt, NULL, NULL));
3517 if (vect_debug_stats (loop) || vect_debug_details (loop))
3519 fprintf (dump_file, "not vectorized: stmt not supported: ");
3520 print_generic_expr (dump_file, stmt, TDF_SLIM);
3525 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3526 if (vect_debug_details (NULL))
3527 fprintf (dump_file, "nunits = %d", nunits);
3529 if (vectorization_factor)
3531 /* FORNOW: don't allow mixed units.
3532 This restriction will be relaxed in the future. */
3533 if (nunits != vectorization_factor)
3535 if (vect_debug_stats (loop) || vect_debug_details (loop))
3536 fprintf (dump_file, "not vectorized: mixed data-types");
3541 vectorization_factor = nunits;
3543 #ifdef ENABLE_CHECKING
3544 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3545 * vectorization_factor == UNITS_PER_SIMD_WORD);
3550 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3552 if (vectorization_factor <= 1)
3554 if (vect_debug_stats (loop) || vect_debug_details (loop))
3555 fprintf (dump_file, "not vectorized: unsupported data-type");
3558 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3561 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3562 && vect_debug_details (NULL))
3564 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3565 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3567 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3568 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3570 /* In this case we have to generate epilog loop, that
3571 can be done only for loops with one entry edge. */
3572 if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1
3573 || !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header))
3575 if (vect_debug_stats (loop) || vect_debug_details (loop))
3576 fprintf (dump_file, "not vectorized: more than one entry.");
3585 /* Function exist_non_indexing_operands_for_use_p
3587 USE is one of the uses attached to STMT. Check if USE is
3588 used in STMT for anything other than indexing an array. */
3591 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3594 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3596 /* USE corresponds to some operand in STMT. If there is no data
3597 reference in STMT, then any operand that corresponds to USE
3598 is not indexing an array. */
3599 if (!STMT_VINFO_DATA_REF (stmt_info))
3602 /* STMT has a data_ref. FORNOW this means that its of one of
3603 the following forms:
3606 (This should have been verified in analyze_data_refs).
3608 'var' in the second case corresponds to a def, not a use,
3609 so USE cannot correspond to any operands that are not used
3612 Therefore, all we need to check is if STMT falls into the
3613 first case, and whether var corresponds to USE. */
3615 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3618 operand = TREE_OPERAND (stmt, 1);
3620 if (TREE_CODE (operand) != SSA_NAME)
3630 /* Function vect_is_simple_iv_evolution.
3632 FORNOW: A simple evolution of an induction variables in the loop is
3633 considered a polynomial evolution with constant step. */
3636 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3637 tree * step, bool strict)
3642 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3644 /* When there is no evolution in this loop, the evolution function
3646 if (evolution_part == NULL_TREE)
3649 /* When the evolution is a polynomial of degree >= 2
3650 the evolution function is not "simple". */
3651 if (tree_is_chrec (evolution_part))
3654 step_expr = evolution_part;
3655 init_expr = unshare_expr (initial_condition (access_fn));
3657 if (vect_debug_details (NULL))
3659 fprintf (dump_file, "step: ");
3660 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3661 fprintf (dump_file, ", init: ");
3662 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3668 if (TREE_CODE (step_expr) != INTEGER_CST)
3670 if (vect_debug_details (NULL))
3671 fprintf (dump_file, "step unknown.");
3676 if (!integer_onep (step_expr))
3678 if (vect_debug_details (NULL))
3679 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3687 /* Function vect_analyze_scalar_cycles.
3689 Examine the cross iteration def-use cycles of scalar variables, by
3690 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3691 cycles that they represent do not impede vectorization.
3693 FORNOW: Reduction as in the following loop, is not supported yet:
3697 The cross-iteration cycle corresponding to variable 'sum' will be
3698 considered too complicated and will impede vectorization.
3700 FORNOW: Induction as in the following loop, is not supported yet:
3705 However, the following loop *is* vectorizable:
3710 In both loops there exists a def-use cycle for the variable i:
3711 loop: i_2 = PHI (i_0, i_1)
3716 The evolution of the above cycle is considered simple enough,
3717 however, we also check that the cycle does not need to be
3718 vectorized, i.e - we check that the variable that this cycle
3719 defines is only used for array indexing or in stmts that do not
3720 need to be vectorized. This is not the case in loop2, but it
3721 *is* the case in loop3. */
3724 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3727 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3728 basic_block bb = loop->header;
3731 if (vect_debug_details (NULL))
3732 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3734 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
3736 tree access_fn = NULL;
3738 if (vect_debug_details (NULL))
3740 fprintf (dump_file, "Analyze phi: ");
3741 print_generic_expr (dump_file, phi, TDF_SLIM);
3744 /* Skip virtual phi's. The data dependences that are associated with
3745 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3747 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3749 if (vect_debug_details (NULL))
3750 fprintf (dump_file, "virtual phi. skip.");
3754 /* Analyze the evolution function. */
3756 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3757 those of loop induction variables; This property is verified here.
3759 Furthermore, if that induction variable is used in an operation
3760 that needs to be vectorized (i.e, is not solely used to index
3761 arrays and check the exit condition) - we do not support its
3762 vectorization yet. This property is verified in vect_is_simple_use,
3763 during vect_analyze_operations. */
3765 access_fn = /* instantiate_parameters
3767 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3771 if (vect_debug_stats (loop) || vect_debug_details (loop))
3772 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3776 if (vect_debug_details (NULL))
3778 fprintf (dump_file, "Access function of PHI: ");
3779 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3782 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3785 if (vect_debug_stats (loop) || vect_debug_details (loop))
3786 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3795 /* Function vect_analyze_data_ref_dependence.
3797 Return TRUE if there (might) exist a dependence between a memory-reference
3798 DRA and a memory-reference DRB. */
3801 vect_analyze_data_ref_dependence (struct data_reference *dra,
3802 struct data_reference *drb,
3806 struct data_dependence_relation *ddr;
3808 if (!array_base_name_differ_p (dra, drb, &differ_p))
3810 if (vect_debug_stats (loop) || vect_debug_details (loop))
3813 "not vectorized: can't determine dependence between: ");
3814 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3815 fprintf (dump_file, " and ");
3816 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3824 ddr = initialize_data_dependence_relation (dra, drb);
3825 compute_affine_dependence (ddr);
3827 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3830 if (vect_debug_stats (loop) || vect_debug_details (loop))
3833 "not vectorized: possible dependence between data-refs ");
3834 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3835 fprintf (dump_file, " and ");
3836 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3843 /* Function vect_analyze_data_ref_dependences.
3845 Examine all the data references in the loop, and make sure there do not
3846 exist any data dependences between them.
3848 TODO: dependences which distance is greater than the vectorization factor
3852 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3855 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3856 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3857 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3859 /* Examine store-store (output) dependences. */
3861 if (vect_debug_details (NULL))
3862 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3864 if (vect_debug_details (NULL))
3865 fprintf (dump_file, "compare all store-store pairs.");
3867 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3869 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3871 struct data_reference *dra =
3872 VARRAY_GENERIC_PTR (loop_write_refs, i);
3873 struct data_reference *drb =
3874 VARRAY_GENERIC_PTR (loop_write_refs, j);
3875 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3880 /* Examine load-store (true/anti) dependences. */
3882 if (vect_debug_details (NULL))
3883 fprintf (dump_file, "compare all load-store pairs.");
3885 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3887 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3889 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3890 struct data_reference *drb =
3891 VARRAY_GENERIC_PTR (loop_write_refs, j);
3892 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3901 /* Function vect_get_first_index.
3903 REF is a data reference.
3904 If it is an ARRAY_REF: if its lower bound is simple enough,
3905 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3906 If it is not an ARRAY_REF: REF has no "first index";
3907 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3910 vect_get_first_index (tree ref, tree *array_first_index)
3914 if (TREE_CODE (ref) != ARRAY_REF)
3915 *array_first_index = size_zero_node;
3918 array_start = array_ref_low_bound (ref);
3919 if (!host_integerp (array_start,0))
3921 if (vect_debug_details (NULL))
3923 fprintf (dump_file, "array min val not simple integer cst.");
3924 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3928 *array_first_index = array_start;
3935 /* Function vect_compute_array_base_alignment.
3936 A utility function of vect_compute_array_ref_alignment.
3938 Compute the misalignment of ARRAY in bits.
3941 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3942 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3943 if NULL: don't compute misalignment, just return the base of ARRAY.
3944 PREV_DIMENSIONS - initialized to one.
3945 MISALIGNMENT - the computed misalignment in bits.
3948 If VECTYPE is not NULL:
3949 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3950 the base of the array, and put the computed misalignment in MISALIGNMENT.
3952 Return the base of the array.
3954 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3955 a[idx_N]...[idx_2][idx_1] is
3956 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3957 ... + idx_N * dim_0 * ... * dim_N-1}.
3958 (The misalignment of &a is not checked here).
3959 Note, that every term contains dim_0, therefore, if dim_0 is a
3960 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3961 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3962 NUINTS, we can say that the misalignment of the sum is equal to
3963 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3964 we can't determine this array misalignment, and we return
3966 We proceed recursively in this manner, accumulating total misalignment
3967 and the multiplication of previous dimensions for correct misalignment
3971 vect_compute_array_base_alignment (tree array,
3973 tree *prev_dimensions,
3978 tree dimension_size;
3980 tree bits_per_vectype;
3981 tree bits_per_vectype_unit;
3983 /* The 'stop condition' of the recursion. */
3984 if (TREE_CODE (array) != ARRAY_REF)
3988 /* Just get the base decl. */
3989 return vect_compute_array_base_alignment
3990 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3992 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
3993 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
3996 domain = TYPE_DOMAIN (TREE_TYPE (array));
3998 int_const_binop (PLUS_EXPR,
3999 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4000 TYPE_MIN_VALUE (domain), 1),
4003 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4004 is a multiple of NUNITS:
4006 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4008 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4009 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4010 if (integer_zerop (mis))
4011 /* This array is aligned. Continue just in order to get the base decl. */
4012 return vect_compute_array_base_alignment
4013 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4015 index = TREE_OPERAND (array, 1);
4016 if (!host_integerp (index, 1))
4017 /* The current index is not constant. */
4020 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4022 bits_per_vectype = fold_convert (unsigned_type_node,
4023 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4024 GET_MODE_SIZE (TYPE_MODE (vectype))));
4025 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4026 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4027 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4029 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4033 (*misalignment + index_val * dimension_size * *prev_dimensions)
4037 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4038 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4039 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4040 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4041 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4044 *prev_dimensions = int_const_binop (MULT_EXPR,
4045 *prev_dimensions, dimension_size, 1);
4047 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4053 /* Function vect_compute_data_ref_alignment
4055 Compute the misalignment of the data reference DR.
4058 1. If during the misalignment computation it is found that the data reference
4059 cannot be vectorized then false is returned.
4060 2. DR_MISALIGNMENT (DR) is defined.
4062 FOR NOW: No analysis is actually performed. Misalignment is calculated
4063 only for trivial cases. TODO. */
4066 vect_compute_data_ref_alignment (struct data_reference *dr,
4067 loop_vec_info loop_vinfo)
4069 tree stmt = DR_STMT (dr);
4070 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4071 tree ref = DR_REF (dr);
4074 tree offset = size_zero_node;
4075 tree base, bit_offset, alignment;
4076 tree unit_bits = fold_convert (unsigned_type_node,
4077 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4079 bool base_aligned_p;
4081 if (vect_debug_details (NULL))
4082 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4084 /* Initialize misalignment to unknown. */
4085 DR_MISALIGNMENT (dr) = -1;
4087 scalar_type = TREE_TYPE (ref);
4088 vectype = get_vectype_for_scalar_type (scalar_type);
4091 if (vect_debug_details (NULL))
4093 fprintf (dump_file, "no vectype for stmt: ");
4094 print_generic_expr (dump_file, stmt, TDF_SLIM);
4095 fprintf (dump_file, " scalar_type: ");
4096 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4098 /* It is not possible to vectorize this data reference. */
4101 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4102 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4104 if (TREE_CODE (ref) == ARRAY_REF)
4107 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4109 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4110 loop_vinfo, &bit_offset, &base_aligned_p);
4113 if (vect_debug_details (NULL))
4115 fprintf (dump_file, "Unknown alignment for access: ");
4116 print_generic_expr (dump_file,
4117 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4122 if (!base_aligned_p)
4124 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4126 if (vect_debug_details (NULL))
4128 fprintf (dump_file, "can't force alignment of ref: ");
4129 print_generic_expr (dump_file, ref, TDF_SLIM);
4134 /* Force the alignment of the decl.
4135 NOTE: This is the only change to the code we make during
4136 the analysis phase, before deciding to vectorize the loop. */
4137 if (vect_debug_details (NULL))
4138 fprintf (dump_file, "force alignment");
4139 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4140 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4143 /* At this point we assume that the base is aligned, and the offset from it
4144 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4145 gcc_assert (base_aligned_p
4146 || (TREE_CODE (base) == VAR_DECL
4147 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4149 /* Convert into bytes. */
4150 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4151 /* Check that there is no remainder in bits. */
4152 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4153 if (!integer_zerop (bit_offset))
4155 if (vect_debug_details (NULL))
4157 fprintf (dump_file, "bit offset alignment: ");
4158 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4163 /* Alignment required, in bytes: */
4164 alignment = fold_convert (unsigned_type_node,
4165 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4167 /* Modulo alignment. */
4168 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4169 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4171 if (vect_debug_details (NULL))
4172 fprintf (dump_file, "unexpected misalign value");
4176 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4178 if (vect_debug_details (NULL))
4179 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4185 /* Function vect_compute_array_ref_alignment
4187 Compute the alignment of an array-ref.
4188 The alignment we compute here is relative to
4189 TYPE_ALIGN(VECTYPE) boundary.
4192 OFFSET - the alignment in bits
4193 Return value - the base of the array-ref. E.g,
4194 if the array-ref is a.b[k].c[i][j] the returned
4199 vect_compute_array_ref_alignment (struct data_reference *dr,
4200 loop_vec_info loop_vinfo,
4204 tree array_first_index = size_zero_node;
4206 tree ref = DR_REF (dr);
4207 tree scalar_type = TREE_TYPE (ref);
4208 tree oprnd0 = TREE_OPERAND (ref, 0);
4209 tree dims = size_one_node;
4210 tree misalign = size_zero_node;
4211 tree next_ref, this_offset = size_zero_node;
4215 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4216 /* The reference is an array without its last index. */
4217 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4220 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4223 /* Alignment is not requested. Just return the base. */
4226 /* Compute alignment. */
4227 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4229 this_offset = misalign;
4231 /* Check the first index accessed. */
4232 if (!vect_get_first_index (ref, &array_first_index))
4234 if (vect_debug_details (NULL))
4235 fprintf (dump_file, "no first_index for array.");
4239 /* Check the index of the array_ref. */
4240 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4241 LOOP_VINFO_LOOP (loop_vinfo)->num);
4243 /* FORNOW: In order to simplify the handling of alignment, we make sure
4244 that the first location at which the array is accessed ('init') is on an
4245 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4246 This is too conservative, since we require that
4247 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4248 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4249 This should be relaxed in the future. */
4251 if (!init || !host_integerp (init, 0))
4253 if (vect_debug_details (NULL))
4254 fprintf (dump_file, "non constant init. ");
4258 /* bytes per scalar element: */
4259 nunits = fold_convert (unsigned_type_node,
4260 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4261 nbits = int_const_binop (MULT_EXPR, nunits,
4262 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4264 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4265 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4266 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4267 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4269 /* TODO: allow negative misalign values. */
4270 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4272 if (vect_debug_details (NULL))
4273 fprintf (dump_file, "unexpected misalign value");
4281 /* Function vect_compute_data_refs_alignment
4283 Compute the misalignment of data references in the loop.
4284 This pass may take place at function granularity instead of at loop
4287 FOR NOW: No analysis is actually performed. Misalignment is calculated
4288 only for trivial cases. TODO. */
4291 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4293 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4294 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4297 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4299 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4300 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4304 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4306 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4307 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4315 /* Function vect_enhance_data_refs_alignment
4317 This pass will use loop versioning and loop peeling in order to enhance
4318 the alignment of data references in the loop.
4320 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4321 original loop is to be vectorized; Any other loops that are created by
4322 the transformations performed in this pass - are not supposed to be
4323 vectorized. This restriction will be relaxed.
4325 FOR NOW: No transformation is actually performed. TODO. */
4328 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4330 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4331 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4332 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4336 This pass will require a cost model to guide it whether to apply peeling
4337 or versioning or a combination of the two. For example, the scheme that
4338 intel uses when given a loop with several memory accesses, is as follows:
4339 choose one memory access ('p') which alignment you want to force by doing
4340 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4341 other accesses are not necessarily aligned, or (2) use loop versioning to
4342 generate one loop in which all accesses are aligned, and another loop in
4343 which only 'p' is necessarily aligned.
4345 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4346 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4347 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4349 Devising a cost model is the most critical aspect of this work. It will
4350 guide us on which access to peel for, whether to use loop versioning, how
4351 many versions to create, etc. The cost model will probably consist of
4352 generic considerations as well as target specific considerations (on
4353 powerpc for example, misaligned stores are more painful than misaligned
4356 Here is the general steps involved in alignment enhancements:
4358 -- original loop, before alignment analysis:
4359 for (i=0; i<N; i++){
4360 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4361 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4364 -- After vect_compute_data_refs_alignment:
4365 for (i=0; i<N; i++){
4366 x = q[i]; # DR_MISALIGNMENT(q) = 3
4367 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4370 -- Possibility 1: we do loop versioning:
4372 for (i=0; i<N; i++){ # loop 1A
4373 x = q[i]; # DR_MISALIGNMENT(q) = 3
4374 p[i] = y; # DR_MISALIGNMENT(p) = 0
4378 for (i=0; i<N; i++){ # loop 1B
4379 x = q[i]; # DR_MISALIGNMENT(q) = 3
4380 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4384 -- Possibility 2: we do loop peeling:
4385 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4389 for (i = 3; i < N; i++){ # loop 2A
4390 x = q[i]; # DR_MISALIGNMENT(q) = 0
4391 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4394 -- Possibility 3: combination of loop peeling and versioning:
4395 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4400 for (i = 3; i<N; i++){ # loop 3A
4401 x = q[i]; # DR_MISALIGNMENT(q) = 0
4402 p[i] = y; # DR_MISALIGNMENT(p) = 0
4406 for (i = 3; i<N; i++){ # loop 3B
4407 x = q[i]; # DR_MISALIGNMENT(q) = 0
4408 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4412 These loops are later passed to loop_transform to be vectorized. The
4413 vectorizer will use the alignment information to guide the transformation
4414 (whether to generate regular loads/stores, or with special handling for
4418 /* (1) Peeling to force alignment. */
4420 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4422 + How many accesses will become aligned due to the peeling
4423 - How many accesses will become unaligned due to the peeling,
4424 and the cost of misaligned accesses.
4425 - The cost of peeling (the extra runtime checks, the increase
4428 The scheme we use FORNOW: peel to force the alignment of the first
4429 misaliged store in the loop.
4430 Rationale: misaligned store are not yet supported.
4432 TODO: Use a better cost model. */
4434 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4436 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4437 if (!aligned_access_p (dr))
4439 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4440 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4445 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4447 if (vect_debug_details (loop))
4448 fprintf (dump_file, "Peeling for alignment will not be applied.");
4452 if (vect_debug_details (loop))
4453 fprintf (dump_file, "Peeling for alignment will be applied.");
4456 /* (1.2) Update the alignment info according to the peeling factor.
4457 If the misalignment of the DR we peel for is M, then the
4458 peeling factor is VF - M, and the misalignment of each access DR_i
4459 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4460 If the misalignment of the DR we peel for is unknown, then the
4461 misalignment of each access DR_i in the loop is also unknown.
4463 FORNOW: set the misalignment of the accesses to unknown even
4464 if the peeling factor is known at compile time.
4466 TODO: - if the peeling factor is known at compile time, use that
4467 when updating the misalignment info of the loop DRs.
4468 - consider accesses that are known to have the same
4469 alignment, even if that alignment is unknown. */
4471 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4473 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4474 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4475 DR_MISALIGNMENT (dr) = 0;
4477 DR_MISALIGNMENT (dr) = -1;
4479 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4481 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4482 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4483 DR_MISALIGNMENT (dr) = 0;
4485 DR_MISALIGNMENT (dr) = -1;
4490 /* Function vect_analyze_data_refs_alignment
4492 Analyze the alignment of the data-references in the loop.
4493 FOR NOW: Until support for misliagned accesses is in place, only if all
4494 accesses are aligned can the loop be vectorized. This restriction will be
4498 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4500 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4501 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4502 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4503 enum dr_alignment_support supportable_dr_alignment;
4506 if (vect_debug_details (NULL))
4507 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4510 /* This pass may take place at function granularity instead of at loop
4513 if (!vect_compute_data_refs_alignment (loop_vinfo))
4515 if (vect_debug_details (loop) || vect_debug_stats (loop))
4517 "not vectorized: can't calculate alignment for data ref.");
4522 /* This pass will decide on using loop versioning and/or loop peeling in
4523 order to enhance the alignment of data references in the loop. */
4525 vect_enhance_data_refs_alignment (loop_vinfo);
4528 /* Finally, check that all the data references in the loop can be
4529 handled with respect to their alignment. */
4531 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4533 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4534 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4535 if (!supportable_dr_alignment)
4537 if (vect_debug_details (loop) || vect_debug_stats (loop))
4538 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4542 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4544 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4545 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4546 if (!supportable_dr_alignment)
4548 if (vect_debug_details (loop) || vect_debug_stats (loop))
4549 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4558 /* Function vect_analyze_data_ref_access.
4560 Analyze the access pattern of the data-reference DR. For now, a data access
4561 has to consecutive and aligned to be considered vectorizable. */
4564 vect_analyze_data_ref_access (struct data_reference *dr)
4566 varray_type access_fns = DR_ACCESS_FNS (dr);
4569 unsigned int dimensions, i;
4571 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4572 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4573 access is contiguous). */
4574 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4576 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4578 access_fn = DR_ACCESS_FN (dr, i);
4580 if (evolution_part_in_loop_num (access_fn,
4581 loop_containing_stmt (DR_STMT (dr))->num))
4583 /* Evolution part is not NULL in this loop (it is neither constant
4585 if (vect_debug_details (NULL))
4588 "not vectorized: complicated multidim. array access.");
4589 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4595 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4596 if (!evolution_function_is_constant_p (access_fn)
4597 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4598 access_fn, &init, &step, true))
4600 if (vect_debug_details (NULL))
4602 fprintf (dump_file, "not vectorized: complicated access function.");
4603 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4612 /* Function vect_analyze_data_ref_accesses.
4614 Analyze the access pattern of all the data references in the loop.
4616 FORNOW: the only access pattern that is considered vectorizable is a
4617 simple step 1 (consecutive) access.
4619 FORNOW: handle only arrays and pointer accesses. */
4622 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4625 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4626 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4628 if (vect_debug_details (NULL))
4629 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4631 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4633 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4634 bool ok = vect_analyze_data_ref_access (dr);
4637 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4638 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4639 fprintf (dump_file, "not vectorized: complicated access pattern.");
4644 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4646 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4647 bool ok = vect_analyze_data_ref_access (dr);
4650 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4651 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4652 fprintf (dump_file, "not vectorized: complicated access pattern.");
4661 /* Function vect_analyze_pointer_ref_access.
4664 STMT - a stmt that contains a data-ref
4665 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4667 If the data-ref access is vectorizable, return a data_reference structure
4668 that represents it (DR). Otherwise - return NULL. */
4670 static struct data_reference *
4671 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4673 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4674 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4675 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4678 tree reftype, innertype;
4679 enum machine_mode innermode;
4680 tree indx_access_fn;
4681 int loopnum = loop->num;
4682 struct data_reference *dr;
4686 if (vect_debug_stats (loop) || vect_debug_details (loop))
4687 fprintf (dump_file, "not vectorized: complicated pointer access.");
4691 if (vect_debug_details (NULL))
4693 fprintf (dump_file, "Access function of ptr: ");
4694 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4697 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4699 if (vect_debug_stats (loop) || vect_debug_details (loop))
4700 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4706 if (!host_integerp (step,0))
4708 if (vect_debug_stats (loop) || vect_debug_details (loop))
4710 "not vectorized: non constant step for pointer access.");
4714 step_val = TREE_INT_CST_LOW (step);
4716 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4717 if (TREE_CODE (reftype) != POINTER_TYPE)
4719 if (vect_debug_stats (loop) || vect_debug_details (loop))
4720 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4724 reftype = TREE_TYPE (init);
4725 if (TREE_CODE (reftype) != POINTER_TYPE)
4727 if (vect_debug_stats (loop) || vect_debug_details (loop))
4728 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4732 innertype = TREE_TYPE (reftype);
4733 innermode = TYPE_MODE (innertype);
4734 if (GET_MODE_SIZE (innermode) != step_val)
4736 /* FORNOW: support only consecutive access */
4737 if (vect_debug_stats (loop) || vect_debug_details (loop))
4738 fprintf (dump_file, "not vectorized: non consecutive access.");
4743 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4744 if (vect_debug_details (NULL))
4746 fprintf (dump_file, "Access function of ptr indx: ");
4747 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4749 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4754 /* Function vect_get_symbl_and_dr.
4756 The function returns SYMBL - the relevant variable for
4757 memory tag (for aliasing purposes).
4758 Also data reference structure DR is created.
4761 MEMREF - data reference in STMT
4762 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4765 DR - data_reference struct for MEMREF
4766 return value - the relevant variable for memory tag (for aliasing purposes).
4771 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4772 loop_vec_info loop_vinfo, struct data_reference **dr)
4774 tree symbl, oprnd0, oprnd1;
4775 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4777 tree array_base, base;
4778 struct data_reference *new_dr;
4779 bool base_aligned_p;
4782 switch (TREE_CODE (memref))
4785 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4789 symbl = DR_BASE_NAME (new_dr);
4790 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4792 switch (TREE_CODE (symbl))
4796 oprnd0 = TREE_OPERAND (symbl, 0);
4797 oprnd1 = TREE_OPERAND (symbl, 1);
4800 /* Only {address_base + offset} expressions are supported,
4801 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4802 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4803 TODO: swap operands if {offset + address_base}. */
4804 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4805 && TREE_CODE (oprnd1) != INTEGER_CST)
4806 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4809 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4812 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4813 loop_vinfo, &new_dr);
4817 /* symbl remains unchanged. */
4821 if (vect_debug_details (NULL))
4823 fprintf (dump_file, "unhandled data ref: ");
4824 print_generic_expr (dump_file, memref, TDF_SLIM);
4825 fprintf (dump_file, " (symbl ");
4826 print_generic_expr (dump_file, symbl, TDF_SLIM);
4827 fprintf (dump_file, ") in stmt ");
4828 print_generic_expr (dump_file, stmt, TDF_SLIM);
4835 offset = size_zero_node;
4837 /* Store the array base in the stmt info.
4838 For one dimensional array ref a[i], the base is a,
4839 for multidimensional a[i1][i2]..[iN], the base is
4840 a[i1][i2]..[iN-1]. */
4841 array_base = TREE_OPERAND (memref, 0);
4842 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4844 new_dr = analyze_array (stmt, memref, is_read);
4847 /* Find the relevant symbol for aliasing purposes. */
4848 base = DR_BASE_NAME (new_dr);
4849 switch (TREE_CODE (base))
4856 symbl = TREE_OPERAND (base, 0);
4860 /* Could have recorded more accurate information -
4861 i.e, the actual FIELD_DECL that is being referenced -
4862 but later passes expect VAR_DECL as the nmt. */
4863 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4864 loop_vinfo, &offset, &base_aligned_p);
4869 if (vect_debug_details (NULL))
4871 fprintf (dump_file, "unhandled struct/class field access ");
4872 print_generic_expr (dump_file, stmt, TDF_SLIM);
4879 if (vect_debug_details (NULL))
4881 fprintf (dump_file, "unhandled data ref: ");
4882 print_generic_expr (dump_file, memref, TDF_SLIM);
4883 fprintf (dump_file, " in stmt ");
4884 print_generic_expr (dump_file, stmt, TDF_SLIM);
4892 /* Function vect_analyze_data_refs.
4894 Find all the data references in the loop.
4896 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4897 which base is really an array (not a pointer) and which alignment
4898 can be forced. This restriction will be relaxed. */
4901 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4903 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4904 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4905 int nbbs = loop->num_nodes;
4906 block_stmt_iterator si;
4908 struct data_reference *dr;
4911 bool base_aligned_p;
4914 if (vect_debug_details (NULL))
4915 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4917 for (j = 0; j < nbbs; j++)
4919 basic_block bb = bbs[j];
4920 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4922 bool is_read = false;
4923 tree stmt = bsi_stmt (si);
4924 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4925 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4926 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4927 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4928 varray_type *datarefs = NULL;
4929 int nvuses, nv_may_defs, nv_must_defs;
4933 /* Assumption: there exists a data-ref in stmt, if and only if
4934 it has vuses/vdefs. */
4936 if (!vuses && !v_may_defs && !v_must_defs)
4939 nvuses = NUM_VUSES (vuses);
4940 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4941 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4943 if (nvuses && (nv_may_defs || nv_must_defs))
4945 if (vect_debug_details (NULL))
4947 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4948 print_generic_expr (dump_file, stmt, TDF_SLIM);
4953 if (TREE_CODE (stmt) != MODIFY_EXPR)
4955 if (vect_debug_details (NULL))
4957 fprintf (dump_file, "unexpected vops in stmt: ");
4958 print_generic_expr (dump_file, stmt, TDF_SLIM);
4965 memref = TREE_OPERAND (stmt, 1);
4966 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4971 memref = TREE_OPERAND (stmt, 0);
4972 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4976 /* Analyze MEMREF. If it is of a supported form, build data_reference
4977 struct for it (DR) and find the relevant symbol for aliasing
4979 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4983 if (vect_debug_stats (loop) || vect_debug_details (loop))
4985 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4986 print_generic_expr (dump_file, stmt, TDF_SLIM);
4991 /* Find and record the memtag assigned to this data-ref. */
4992 switch (TREE_CODE (symbl))
4995 STMT_VINFO_MEMTAG (stmt_info) = symbl;
4999 symbl = SSA_NAME_VAR (symbl);
5000 tag = get_var_ann (symbl)->type_mem_tag;
5003 tree ptr = TREE_OPERAND (memref, 0);
5004 if (TREE_CODE (ptr) == SSA_NAME)
5005 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5009 if (vect_debug_stats (loop) || vect_debug_details (loop))
5010 fprintf (dump_file, "not vectorized: no memtag for ref.");
5013 STMT_VINFO_MEMTAG (stmt_info) = tag;
5017 address_base = TREE_OPERAND (symbl, 0);
5019 switch (TREE_CODE (address_base))
5022 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5024 STMT_VINFO_MEMTAG (stmt_info) =
5025 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
5026 loop_vinfo, &offset,
5031 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5035 if (vect_debug_stats (loop) || vect_debug_details (loop))
5038 "not vectorized: unhandled address expr: ");
5039 print_generic_expr (dump_file, stmt, TDF_SLIM);
5046 if (vect_debug_stats (loop) || vect_debug_details (loop))
5048 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5049 print_generic_expr (dump_file, memref, TDF_SLIM);
5054 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5055 STMT_VINFO_DATA_REF (stmt_info) = dr;
5063 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5065 /* Function vect_mark_relevant.
5067 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5070 vect_mark_relevant (varray_type worklist, tree stmt)
5072 stmt_vec_info stmt_info;
5074 if (vect_debug_details (NULL))
5075 fprintf (dump_file, "mark relevant.");
5077 if (TREE_CODE (stmt) == PHI_NODE)
5079 VARRAY_PUSH_TREE (worklist, stmt);
5083 stmt_info = vinfo_for_stmt (stmt);
5087 if (vect_debug_details (NULL))
5089 fprintf (dump_file, "mark relevant: no stmt info!!.");
5090 print_generic_expr (dump_file, stmt, TDF_SLIM);
5095 if (STMT_VINFO_RELEVANT_P (stmt_info))
5097 if (vect_debug_details (NULL))
5098 fprintf (dump_file, "already marked relevant.");
5102 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5103 VARRAY_PUSH_TREE (worklist, stmt);
5107 /* Function vect_stmt_relevant_p.
5109 Return true if STMT in loop that is represented by LOOP_VINFO is
5110 "relevant for vectorization".
5112 A stmt is considered "relevant for vectorization" if:
5113 - it has uses outside the loop.
5114 - it has vdefs (it alters memory).
5115 - control stmts in the loop (except for the exit condition).
5117 CHECKME: what other side effects would the vectorizer allow? */
5120 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5122 v_may_def_optype v_may_defs;
5123 v_must_def_optype v_must_defs;
5124 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5129 /* cond stmt other than loop exit cond. */
5130 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5133 /* changing memory. */
5134 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5135 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5136 if (v_may_defs || v_must_defs)
5138 if (vect_debug_details (NULL))
5139 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5143 /* uses outside the loop. */
5144 df = get_immediate_uses (stmt);
5145 num_uses = num_immediate_uses (df);
5146 for (i = 0; i < num_uses; i++)
5148 tree use = immediate_use (df, i);
5149 basic_block bb = bb_for_stmt (use);
5150 if (!flow_bb_inside_loop_p (loop, bb))
5152 if (vect_debug_details (NULL))
5153 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5162 /* Function vect_mark_stmts_to_be_vectorized.
5164 Not all stmts in the loop need to be vectorized. For example:
5173 Stmt 1 and 3 do not need to be vectorized, because loop control and
5174 addressing of vectorized data-refs are handled differently.
5176 This pass detects such stmts. */
5179 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5181 varray_type worklist;
5182 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5183 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5184 unsigned int nbbs = loop->num_nodes;
5185 block_stmt_iterator si;
5191 stmt_vec_info stmt_info;
5193 if (vect_debug_details (NULL))
5194 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5196 VARRAY_TREE_INIT (worklist, 64, "work list");
5198 /* 1. Init worklist. */
5200 for (i = 0; i < nbbs; i++)
5202 basic_block bb = bbs[i];
5203 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5205 stmt = bsi_stmt (si);
5207 if (vect_debug_details (NULL))
5209 fprintf (dump_file, "init: stmt relevant? ");
5210 print_generic_expr (dump_file, stmt, TDF_SLIM);
5213 stmt_info = vinfo_for_stmt (stmt);
5214 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5216 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5217 vect_mark_relevant (worklist, stmt);
5222 /* 2. Process_worklist */
5224 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5226 stmt = VARRAY_TOP_TREE (worklist);
5227 VARRAY_POP (worklist);
5229 if (vect_debug_details (NULL))
5231 fprintf (dump_file, "worklist: examine stmt: ");
5232 print_generic_expr (dump_file, stmt, TDF_SLIM);
5235 /* Examine the USES in this statement. Mark all the statements which
5236 feed this statement's uses as "relevant", unless the USE is used as
5239 if (TREE_CODE (stmt) == PHI_NODE)
5241 /* follow the def-use chain inside the loop. */
5242 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5244 tree arg = PHI_ARG_DEF (stmt, j);
5245 tree def_stmt = NULL_TREE;
5247 if (!vect_is_simple_use (arg, loop, &def_stmt))
5249 if (vect_debug_details (NULL))
5250 fprintf (dump_file, "worklist: unsupported use.");
5251 varray_clear (worklist);
5257 if (vect_debug_details (NULL))
5259 fprintf (dump_file, "worklist: def_stmt: ");
5260 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5263 bb = bb_for_stmt (def_stmt);
5264 if (flow_bb_inside_loop_p (loop, bb))
5265 vect_mark_relevant (worklist, def_stmt);
5269 ann = stmt_ann (stmt);
5270 use_ops = USE_OPS (ann);
5272 for (i = 0; i < NUM_USES (use_ops); i++)
5274 tree use = USE_OP (use_ops, i);
5276 /* We are only interested in uses that need to be vectorized. Uses
5277 that are used for address computation are not considered relevant.
5279 if (exist_non_indexing_operands_for_use_p (use, stmt))
5281 tree def_stmt = NULL_TREE;
5283 if (!vect_is_simple_use (use, loop, &def_stmt))
5285 if (vect_debug_details (NULL))
5286 fprintf (dump_file, "worklist: unsupported use.");
5287 varray_clear (worklist);
5294 if (vect_debug_details (NULL))
5296 fprintf (dump_file, "worklist: examine use %d: ", i);
5297 print_generic_expr (dump_file, use, TDF_SLIM);
5300 bb = bb_for_stmt (def_stmt);
5301 if (flow_bb_inside_loop_p (loop, bb))
5302 vect_mark_relevant (worklist, def_stmt);
5305 } /* while worklist */
5307 varray_clear (worklist);
5312 /* Function vect_analyze_loop_with_symbolic_num_of_iters.
5314 In case the number of iterations that LOOP iterates in unknown at compile
5315 time, an epilog loop will be generated, and the loop induction variables
5316 (IVs) will be "advanced" to the value they are supposed to take just before
5317 the epilog loop. Here we check that the access function of the loop IVs
5318 and the expression that represents the loop bound are simple enough.
5319 These restrictions will be relaxed in the future. */
5322 vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
5325 basic_block bb = loop->header;
5328 if (vect_debug_details (NULL))
5330 "\n<<vect_analyze_loop_with_symbolic_num_of_iters>>\n");
5332 if (chrec_contains_undetermined (niters))
5334 if (vect_debug_details (NULL))
5335 fprintf (dump_file, "Infinite number of iterations.");
5341 if (vect_debug_details (NULL))
5342 fprintf (dump_file, "niters is NULL pointer.");
5346 if (vect_debug_details (NULL))
5348 fprintf (dump_file, "Symbolic number of iterations is ");
5349 print_generic_expr (dump_file, niters, TDF_DETAILS);
5352 /* Analyze phi functions of the loop header. */
5354 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
5356 tree access_fn = NULL;
5357 tree evolution_part;
5359 if (vect_debug_details (NULL))
5361 fprintf (dump_file, "Analyze phi: ");
5362 print_generic_expr (dump_file, phi, TDF_SLIM);
5365 /* Skip virtual phi's. The data dependences that are associated with
5366 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5368 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5370 if (vect_debug_details (NULL))
5371 fprintf (dump_file, "virtual phi. skip.");
5375 /* Analyze the evolution function. */
5377 access_fn = instantiate_parameters
5378 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5382 if (vect_debug_details (NULL))
5383 fprintf (dump_file, "No Access function.");
5387 if (vect_debug_details (NULL))
5389 fprintf (dump_file, "Access function of PHI: ");
5390 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5393 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5395 if (evolution_part == NULL_TREE)
5398 /* FORNOW: We do not transform initial conditions of IVs
5399 which evolution functions are a polynomial of degree >= 2. */
5401 if (tree_is_chrec (evolution_part))
5409 /* Function vect_get_loop_niters.
5411 Determine how many iterations the loop is executed. */
5414 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5418 if (vect_debug_details (NULL))
5419 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5421 niters = number_of_iterations_in_loop (loop);
5423 if (niters != NULL_TREE
5424 && niters != chrec_dont_know)
5426 *number_of_iterations = niters;
5428 if (vect_debug_details (NULL))
5430 fprintf (dump_file, "==> get_loop_niters:" );
5431 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5435 return get_loop_exit_condition (loop);
5439 /* Function vect_analyze_loop_form.
5441 Verify the following restrictions (some may be relaxed in the future):
5442 - it's an inner-most loop
5443 - number of BBs = 2 (which are the loop header and the latch)
5444 - the loop has a pre-header
5445 - the loop has a single entry and exit
5446 - the loop exit condition is simple enough, and the number of iterations
5447 can be analyzed (a countable loop). */
5449 static loop_vec_info
5450 vect_analyze_loop_form (struct loop *loop)
5452 loop_vec_info loop_vinfo;
5454 tree number_of_iterations = NULL;
5456 if (vect_debug_details (loop))
5457 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5460 || !loop->single_exit
5461 || loop->num_nodes != 2)
5463 if (vect_debug_stats (loop) || vect_debug_details (loop))
5465 fprintf (dump_file, "not vectorized: bad loop form. ");
5467 fprintf (dump_file, "nested loop.");
5468 else if (!loop->single_exit)
5469 fprintf (dump_file, "multiple exits.");
5470 else if (loop->num_nodes != 2)
5471 fprintf (dump_file, "too many BBs in loop.");
5477 /* We assume that the loop exit condition is at the end of the loop. i.e,
5478 that the loop is represented as a do-while (with a proper if-guard
5479 before the loop if needed), where the loop header contains all the
5480 executable statements, and the latch is empty. */
5481 if (!empty_block_p (loop->latch))
5483 if (vect_debug_stats (loop) || vect_debug_details (loop))
5484 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5488 if (empty_block_p (loop->header))
5490 if (vect_debug_stats (loop) || vect_debug_details (loop))
5491 fprintf (dump_file, "not vectorized: empty loop.");
5495 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5498 if (vect_debug_stats (loop) || vect_debug_details (loop))
5499 fprintf (dump_file, "not vectorized: complicated exit condition.");
5503 if (!number_of_iterations)
5505 if (vect_debug_stats (loop) || vect_debug_details (loop))
5507 "not vectorized: number of iterations cannot be computed.");
5511 loop_vinfo = new_loop_vec_info (loop);
5512 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5513 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5515 if (vect_debug_stats (loop) || vect_debug_details (loop))
5516 fprintf (dump_file, "loop bound unknown.");
5518 /* Unknown loop bound. */
5519 if (!vect_analyze_loop_with_symbolic_num_of_iters
5520 (number_of_iterations, loop))
5522 if (vect_debug_stats (loop) || vect_debug_details (loop))
5524 "not vectorized: can't determine loop bound.");
5529 /* We need only one loop entry for unknown loop bound support. */
5530 if (loop->num_entries != 1 || !loop->pre_header)
5532 if (vect_debug_stats (loop) || vect_debug_details (loop))
5534 "not vectorized: more than one loop entry.");
5540 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5542 if (vect_debug_stats (loop) || vect_debug_details (loop))
5543 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5547 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5553 /* Function vect_analyze_loop.
5555 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5556 for it. The different analyses will record information in the
5557 loop_vec_info struct. */
5559 static loop_vec_info
5560 vect_analyze_loop (struct loop *loop)
5563 loop_vec_info loop_vinfo;
5565 if (vect_debug_details (NULL))
5566 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5568 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5570 loop_vinfo = vect_analyze_loop_form (loop);
5573 if (vect_debug_details (loop))
5574 fprintf (dump_file, "bad loop form.");
5578 /* Find all data references in the loop (which correspond to vdefs/vuses)
5579 and analyze their evolution in the loop.
5581 FORNOW: Handle only simple, array references, which
5582 alignment can be forced, and aligned pointer-references. */
5584 ok = vect_analyze_data_refs (loop_vinfo);
5587 if (vect_debug_details (loop))
5588 fprintf (dump_file, "bad data references.");
5589 destroy_loop_vec_info (loop_vinfo);
5593 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5595 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5598 if (vect_debug_details (loop))
5599 fprintf (dump_file, "unexpected pattern.");
5600 if (vect_debug_details (loop))
5601 fprintf (dump_file, "not vectorized: unexpected pattern.");
5602 destroy_loop_vec_info (loop_vinfo);
5606 /* Check that all cross-iteration scalar data-flow cycles are OK.
5607 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5609 ok = vect_analyze_scalar_cycles (loop_vinfo);
5612 if (vect_debug_details (loop))
5613 fprintf (dump_file, "bad scalar cycle.");
5614 destroy_loop_vec_info (loop_vinfo);
5618 /* Analyze data dependences between the data-refs in the loop.
5619 FORNOW: fail at the first data dependence that we encounter. */
5621 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5624 if (vect_debug_details (loop))
5625 fprintf (dump_file, "bad data dependence.");
5626 destroy_loop_vec_info (loop_vinfo);
5630 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5631 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5633 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5636 if (vect_debug_details (loop))
5637 fprintf (dump_file, "bad data access.");
5638 destroy_loop_vec_info (loop_vinfo);
5642 /* Analyze the alignment of the data-refs in the loop.
5643 FORNOW: Only aligned accesses are handled. */
5645 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5648 if (vect_debug_details (loop))
5649 fprintf (dump_file, "bad data alignment.");
5650 destroy_loop_vec_info (loop_vinfo);
5654 /* Scan all the operations in the loop and make sure they are
5657 ok = vect_analyze_operations (loop_vinfo);
5660 if (vect_debug_details (loop))
5661 fprintf (dump_file, "bad operation or unsupported loop bound.");
5662 destroy_loop_vec_info (loop_vinfo);
5666 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5672 /* Function need_imm_uses_for.
5674 Return whether we ought to include information for 'var'
5675 when calculating immediate uses. For this pass we only want use
5676 information for non-virtual variables. */
5679 need_imm_uses_for (tree var)
5681 return is_gimple_reg (var);
5685 /* Function vectorize_loops.
5687 Entry Point to loop vectorization phase. */
5690 vectorize_loops (struct loops *loops)
5692 unsigned int i, loops_num;
5693 unsigned int num_vectorized_loops = 0;
5695 /* Does the target support SIMD? */
5696 /* FORNOW: until more sophisticated machine modelling is in place. */
5697 if (!UNITS_PER_SIMD_WORD)
5699 if (vect_debug_details (NULL))
5700 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5704 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5706 /* ----------- Analyze loops. ----------- */
5708 /* If some loop was duplicated, it gets bigger number
5709 than all previously defined loops. This fact allows us to run
5710 only over initial loops skipping newly generated ones. */
5711 loops_num = loops->num;
5712 for (i = 1; i < loops_num; i++)
5714 loop_vec_info loop_vinfo;
5715 struct loop *loop = loops->parray[i];
5720 loop_vinfo = vect_analyze_loop (loop);
5721 loop->aux = loop_vinfo;
5723 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5726 vect_transform_loop (loop_vinfo, loops);
5727 num_vectorized_loops++;
5730 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5731 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5732 num_vectorized_loops);
5734 /* ----------- Finalize. ----------- */
5737 for (i = 1; i < loops_num; i++)
5739 struct loop *loop = loops->parray[i];
5740 loop_vec_info loop_vinfo;
5744 loop_vinfo = loop->aux;
5745 destroy_loop_vec_info (loop_vinfo);
5749 rewrite_into_ssa (false);
5750 if (!bitmap_empty_p (vars_to_rename))
5752 /* The rewrite of ssa names may cause violation of loop closed ssa
5753 form invariants. TODO -- avoid these rewrites completely.
5754 Information in virtual phi nodes is sufficient for it. */
5755 rewrite_into_loop_closed_ssa ();
5757 rewrite_into_loop_closed_ssa ();
5758 bitmap_clear (vars_to_rename);