1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
79 #include "tree-pass.h"
81 #include "insn-config.h"
83 #include "pointer-set.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop *loop)
112 HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
114 return AVG_LOOP_NITER (loop);
119 /* Representation of the induction variable. */
122 tree base; /* Initial value of the iv. */
123 tree base_object; /* A memory object to that the induction variable points. */
124 tree step; /* Step of the iv (constant only). */
125 tree ssa_name; /* The ssa name with the value. */
126 bool biv_p; /* Is it a biv? */
127 bool have_use_for; /* Do we already have a use for it? */
128 unsigned use_id; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
134 tree name; /* The ssa name. */
135 struct iv *iv; /* Induction variable description. */
136 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv; /* For the original biv, whether to preserve it. */
139 unsigned inv_id; /* Id of an invariant. */
145 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
146 USE_ADDRESS, /* Use in an address. */
147 USE_COMPARE /* Use is a compare. */
150 /* Cost of a computation. */
153 int cost; /* The runtime cost. */
154 unsigned complexity; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
160 static const comp_cost zero_cost = {0, 0};
161 static const comp_cost infinite_cost = {INFTY, INFTY};
163 /* The candidate - cost pair. */
166 struct iv_cand *cand; /* The candidate. */
167 comp_cost cost; /* The cost. */
168 bitmap depends_on; /* The list of invariants that have to be
170 tree value; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id; /* Loop invariant expression id. */
179 unsigned id; /* The id of the use. */
180 enum use_type type; /* Type of the use. */
181 struct iv *iv; /* The induction variable it is based on. */
182 gimple stmt; /* Statement in that it occurs. */
183 tree *op_p; /* The place where it occurs. */
184 bitmap related_cands; /* The set of "related" iv candidates, plus the common
187 unsigned n_map_members; /* Number of candidates in the cost_map list. */
188 struct cost_pair *cost_map;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand *selected;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
198 IP_NORMAL, /* At the end, just before the exit condition. */
199 IP_END, /* At the end of the latch block. */
200 IP_BEFORE_USE, /* Immediately before a specific use. */
201 IP_AFTER_USE, /* Immediately after a specific use. */
202 IP_ORIGINAL /* The original biv. */
205 /* The induction variable candidate. */
208 unsigned id; /* The number of the candidate. */
209 bool important; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
212 gimple incremented_at;/* For original biv, the statement where it is
214 tree var_before; /* The variable used for it before increment. */
215 tree var_after; /* The variable used for it after increment. */
216 struct iv *iv; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost; /* Cost of the candidate. */
221 unsigned cost_step; /* Cost of the candidate's increment operation. */
222 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on; /* The list of invariants that are used in step of the
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use *iv_use_p;
240 DEF_VEC_ALLOC_P(iv_use_p,heap);
242 typedef struct iv_cand *iv_cand_p;
243 DEF_VEC_P(iv_cand_p);
244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
248 /* The currently optimized loop. */
249 struct loop *current_loop;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t *niters;
254 /* Number of registers used in it. */
257 /* The size of version_info array allocated. */
258 unsigned version_info_size;
260 /* The array of information for the ssa names. */
261 struct version_info *version_info;
263 /* The hashtable of loop invariant expressions created
267 /* Loop invariant expression id. */
270 /* The bitmap of indices in version_info whose value was changed. */
273 /* The uses of induction variables. */
274 VEC(iv_use_p,heap) *iv_uses;
276 /* The candidates. */
277 VEC(iv_cand_p,heap) *iv_candidates;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates;
282 /* The maximum invariant id. */
285 /* Whether to consider just related and important candidates when replacing a
287 bool consider_all_candidates;
289 /* Are we optimizing for speed? */
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call;
296 /* An assignment of iv candidates to uses. */
300 /* The number of uses covered by the assignment. */
303 /* Number of uses that cannot be expressed by the candidates in the set. */
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair **cand_for_use;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses;
312 /* The candidates used. */
315 /* The number of candidates in the set. */
318 /* Total number of registers needed. */
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost;
324 /* Total cost of candidates. */
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses;
330 /* The array holding the number of uses of each loop
331 invariant expressions created by ivopt. */
332 unsigned *used_inv_expr;
334 /* The number of created loop invariants. */
335 unsigned num_used_inv_expr;
337 /* Total cost of the assignment. */
341 /* Difference of two iv candidate assignments. */
348 /* An old assignment (for rollback purposes). */
349 struct cost_pair *old_cp;
351 /* A new assignment. */
352 struct cost_pair *new_cp;
354 /* Next change in the list. */
355 struct iv_ca_delta *next_change;
358 /* Bound on number of candidates below that all candidates are considered. */
360 #define CONSIDER_ALL_CANDIDATES_BOUND \
361 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
364 optimizing such a loop would help, and it would take ages). */
366 #define MAX_CONSIDERED_USES \
367 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369 /* If there are at most this number of ivs in the set, try removing unnecessary
370 ivs from the set always. */
372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
373 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375 /* The list of trees for that the decl_rtl field must be reset is stored
378 static VEC(tree,heap) *decl_rtl_to_reset;
380 /* Number of uses recorded in DATA. */
382 static inline unsigned
383 n_iv_uses (struct ivopts_data *data)
385 return VEC_length (iv_use_p, data->iv_uses);
388 /* Ith use recorded in DATA. */
390 static inline struct iv_use *
391 iv_use (struct ivopts_data *data, unsigned i)
393 return VEC_index (iv_use_p, data->iv_uses, i);
396 /* Number of candidates recorded in DATA. */
398 static inline unsigned
399 n_iv_cands (struct ivopts_data *data)
401 return VEC_length (iv_cand_p, data->iv_candidates);
404 /* Ith candidate recorded in DATA. */
406 static inline struct iv_cand *
407 iv_cand (struct ivopts_data *data, unsigned i)
409 return VEC_index (iv_cand_p, data->iv_candidates, i);
412 /* The single loop exit if it dominates the latch, NULL otherwise. */
415 single_dom_exit (struct loop *loop)
417 edge exit = single_exit (loop);
422 if (!just_once_each_iteration_p (loop, exit->src))
428 /* Dumps information about the induction variable IV to FILE. */
430 extern void dump_iv (FILE *, struct iv *);
432 dump_iv (FILE *file, struct iv *iv)
436 fprintf (file, "ssa name ");
437 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
438 fprintf (file, "\n");
441 fprintf (file, " type ");
442 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
443 fprintf (file, "\n");
447 fprintf (file, " base ");
448 print_generic_expr (file, iv->base, TDF_SLIM);
449 fprintf (file, "\n");
451 fprintf (file, " step ");
452 print_generic_expr (file, iv->step, TDF_SLIM);
453 fprintf (file, "\n");
457 fprintf (file, " invariant ");
458 print_generic_expr (file, iv->base, TDF_SLIM);
459 fprintf (file, "\n");
464 fprintf (file, " base object ");
465 print_generic_expr (file, iv->base_object, TDF_SLIM);
466 fprintf (file, "\n");
470 fprintf (file, " is a biv\n");
473 /* Dumps information about the USE to FILE. */
475 extern void dump_use (FILE *, struct iv_use *);
477 dump_use (FILE *file, struct iv_use *use)
479 fprintf (file, "use %d\n", use->id);
483 case USE_NONLINEAR_EXPR:
484 fprintf (file, " generic\n");
488 fprintf (file, " address\n");
492 fprintf (file, " compare\n");
499 fprintf (file, " in statement ");
500 print_gimple_stmt (file, use->stmt, 0, 0);
501 fprintf (file, "\n");
503 fprintf (file, " at position ");
505 print_generic_expr (file, *use->op_p, TDF_SLIM);
506 fprintf (file, "\n");
508 dump_iv (file, use->iv);
510 if (use->related_cands)
512 fprintf (file, " related candidates ");
513 dump_bitmap (file, use->related_cands);
517 /* Dumps information about the uses to FILE. */
519 extern void dump_uses (FILE *, struct ivopts_data *);
521 dump_uses (FILE *file, struct ivopts_data *data)
526 for (i = 0; i < n_iv_uses (data); i++)
528 use = iv_use (data, i);
530 dump_use (file, use);
531 fprintf (file, "\n");
535 /* Dumps information about induction variable candidate CAND to FILE. */
537 extern void dump_cand (FILE *, struct iv_cand *);
539 dump_cand (FILE *file, struct iv_cand *cand)
541 struct iv *iv = cand->iv;
543 fprintf (file, "candidate %d%s\n",
544 cand->id, cand->important ? " (important)" : "");
546 if (cand->depends_on)
548 fprintf (file, " depends on ");
549 dump_bitmap (file, cand->depends_on);
554 fprintf (file, " final value replacement\n");
558 if (cand->var_before)
560 fprintf (file, " var_before ");
561 print_generic_expr (file, cand->var_before, TDF_SLIM);
562 fprintf (file, "\n");
566 fprintf (file, " var_after ");
567 print_generic_expr (file, cand->var_after, TDF_SLIM);
568 fprintf (file, "\n");
574 fprintf (file, " incremented before exit test\n");
578 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
582 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
586 fprintf (file, " incremented at end\n");
590 fprintf (file, " original biv\n");
597 /* Returns the info for ssa version VER. */
599 static inline struct version_info *
600 ver_info (struct ivopts_data *data, unsigned ver)
602 return data->version_info + ver;
605 /* Returns the info for ssa name NAME. */
607 static inline struct version_info *
608 name_info (struct ivopts_data *data, tree name)
610 return ver_info (data, SSA_NAME_VERSION (name));
613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
617 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
619 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
623 if (sbb == loop->latch)
629 return stmt == last_stmt (bb);
632 /* Returns true if STMT if after the place where the original induction
633 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
634 if the positions are identical. */
637 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
639 basic_block cand_bb = gimple_bb (cand->incremented_at);
640 basic_block stmt_bb = gimple_bb (stmt);
642 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
645 if (stmt_bb != cand_bb)
649 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
651 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
654 /* Returns true if STMT if after the place where the induction variable
655 CAND is incremented in LOOP. */
658 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
666 return stmt_after_ip_normal_pos (loop, stmt);
670 return stmt_after_inc_pos (cand, stmt, false);
673 return stmt_after_inc_pos (cand, stmt, true);
680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
683 abnormal_ssa_name_p (tree exp)
688 if (TREE_CODE (exp) != SSA_NAME)
691 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
695 abnormal phi node. Callback for for_each_index. */
698 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
699 void *data ATTRIBUTE_UNUSED)
701 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
703 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
705 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
709 return !abnormal_ssa_name_p (*index);
712 /* Returns true if EXPR contains a ssa name that occurs in an
713 abnormal phi node. */
716 contains_abnormal_ssa_name_p (tree expr)
719 enum tree_code_class codeclass;
724 code = TREE_CODE (expr);
725 codeclass = TREE_CODE_CLASS (code);
727 if (code == SSA_NAME)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
730 if (code == INTEGER_CST
731 || is_gimple_min_invariant (expr))
734 if (code == ADDR_EXPR)
735 return !for_each_index (&TREE_OPERAND (expr, 0),
736 idx_contains_abnormal_ssa_name_p,
739 if (code == COND_EXPR)
740 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
742 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
748 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
753 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
765 /* Returns tree describing number of iterations determined from
766 EXIT of DATA->current_loop, or NULL if something goes wrong. */
769 niter_for_exit (struct ivopts_data *data, edge exit,
770 struct tree_niter_desc **desc_p)
772 struct tree_niter_desc* desc = NULL;
778 data->niters = pointer_map_create ();
782 slot = pointer_map_contains (data->niters, exit);
786 /* Try to determine number of iterations. We must know it
787 unconditionally (i.e., without possibility of # of iterations
788 being zero). Also, we cannot safely work with ssa names that
789 appear in phi nodes on abnormal edges, so that we do not create
790 overlapping life ranges for them (PR 27283). */
791 desc = XNEW (struct tree_niter_desc);
792 if (number_of_iterations_exit (data->current_loop,
794 && integer_zerop (desc->may_be_zero)
795 && !contains_abnormal_ssa_name_p (desc->niter))
801 slot = pointer_map_insert (data->niters, exit);
805 niter = ((struct tree_niter_desc *) *slot)->niter;
808 *desc_p = (struct tree_niter_desc *) *slot;
812 /* Returns tree describing number of iterations determined from
813 single dominating exit of DATA->current_loop, or NULL if something
817 niter_for_single_dom_exit (struct ivopts_data *data)
819 edge exit = single_dom_exit (data->current_loop);
824 return niter_for_exit (data, exit, NULL);
827 /* Hash table equality function for expressions. */
830 htab_inv_expr_eq (const void *ent1, const void *ent2)
832 const struct iv_inv_expr_ent *expr1 =
833 (const struct iv_inv_expr_ent *)ent1;
834 const struct iv_inv_expr_ent *expr2 =
835 (const struct iv_inv_expr_ent *)ent2;
837 return expr1->hash == expr2->hash
838 && operand_equal_p (expr1->expr, expr2->expr, 0);
841 /* Hash function for loop invariant expressions. */
844 htab_inv_expr_hash (const void *ent)
846 const struct iv_inv_expr_ent *expr =
847 (const struct iv_inv_expr_ent *)ent;
851 /* Initializes data structures used by the iv optimization pass, stored
855 tree_ssa_iv_optimize_init (struct ivopts_data *data)
857 data->version_info_size = 2 * num_ssa_names;
858 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
859 data->relevant = BITMAP_ALLOC (NULL);
860 data->important_candidates = BITMAP_ALLOC (NULL);
861 data->max_inv_id = 0;
863 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
864 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
865 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
866 htab_inv_expr_eq, free);
867 data->inv_expr_id = 0;
868 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
871 /* Returns a memory object to that EXPR points. In case we are able to
872 determine that it does not point to any such object, NULL is returned. */
875 determine_base_object (tree expr)
877 enum tree_code code = TREE_CODE (expr);
880 /* If this is a pointer casted to any type, we need to determine
881 the base object for the pointer; so handle conversions before
882 throwing away non-pointer expressions. */
883 if (CONVERT_EXPR_P (expr))
884 return determine_base_object (TREE_OPERAND (expr, 0));
886 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
895 obj = TREE_OPERAND (expr, 0);
896 base = get_base_address (obj);
901 if (TREE_CODE (base) == MEM_REF)
902 return determine_base_object (TREE_OPERAND (base, 0));
904 return fold_convert (ptr_type_node,
905 build_fold_addr_expr (base));
907 case POINTER_PLUS_EXPR:
908 return determine_base_object (TREE_OPERAND (expr, 0));
912 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
916 return fold_convert (ptr_type_node, expr);
920 /* Allocates an induction variable with given initial value BASE and step STEP
924 alloc_iv (tree base, tree step)
926 struct iv *iv = XCNEW (struct iv);
927 gcc_assert (step != NULL_TREE);
930 iv->base_object = determine_base_object (base);
933 iv->have_use_for = false;
935 iv->ssa_name = NULL_TREE;
940 /* Sets STEP and BASE for induction variable IV. */
943 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
945 struct version_info *info = name_info (data, iv);
947 gcc_assert (!info->iv);
949 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
950 info->iv = alloc_iv (base, step);
951 info->iv->ssa_name = iv;
954 /* Finds induction variable declaration for VAR. */
957 get_iv (struct ivopts_data *data, tree var)
960 tree type = TREE_TYPE (var);
962 if (!POINTER_TYPE_P (type)
963 && !INTEGRAL_TYPE_P (type))
966 if (!name_info (data, var)->iv)
968 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
971 || !flow_bb_inside_loop_p (data->current_loop, bb))
972 set_iv (data, var, var, build_int_cst (type, 0));
975 return name_info (data, var)->iv;
978 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
979 not define a simple affine biv with nonzero step. */
982 determine_biv_step (gimple phi)
984 struct loop *loop = gimple_bb (phi)->loop_father;
985 tree name = PHI_RESULT (phi);
988 if (!is_gimple_reg (name))
991 if (!simple_iv (loop, loop, name, &iv, true))
994 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
997 /* Finds basic ivs. */
1000 find_bivs (struct ivopts_data *data)
1003 tree step, type, base;
1005 struct loop *loop = data->current_loop;
1006 gimple_stmt_iterator psi;
1008 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1010 phi = gsi_stmt (psi);
1012 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1015 step = determine_biv_step (phi);
1019 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1020 base = expand_simple_operations (base);
1021 if (contains_abnormal_ssa_name_p (base)
1022 || contains_abnormal_ssa_name_p (step))
1025 type = TREE_TYPE (PHI_RESULT (phi));
1026 base = fold_convert (type, base);
1029 if (POINTER_TYPE_P (type))
1030 step = fold_convert (sizetype, step);
1032 step = fold_convert (type, step);
1035 set_iv (data, PHI_RESULT (phi), base, step);
1042 /* Marks basic ivs. */
1045 mark_bivs (struct ivopts_data *data)
1049 struct iv *iv, *incr_iv;
1050 struct loop *loop = data->current_loop;
1051 basic_block incr_bb;
1052 gimple_stmt_iterator psi;
1054 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1056 phi = gsi_stmt (psi);
1058 iv = get_iv (data, PHI_RESULT (phi));
1062 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1063 incr_iv = get_iv (data, var);
1067 /* If the increment is in the subloop, ignore it. */
1068 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1069 if (incr_bb->loop_father != data->current_loop
1070 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1074 incr_iv->biv_p = true;
1078 /* Checks whether STMT defines a linear induction variable and stores its
1079 parameters to IV. */
1082 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1085 struct loop *loop = data->current_loop;
1087 iv->base = NULL_TREE;
1088 iv->step = NULL_TREE;
1090 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1093 lhs = gimple_assign_lhs (stmt);
1094 if (TREE_CODE (lhs) != SSA_NAME)
1097 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1099 iv->base = expand_simple_operations (iv->base);
1101 if (contains_abnormal_ssa_name_p (iv->base)
1102 || contains_abnormal_ssa_name_p (iv->step))
1105 /* If STMT could throw, then do not consider STMT as defining a GIV.
1106 While this will suppress optimizations, we can not safely delete this
1107 GIV and associated statements, even if it appears it is not used. */
1108 if (stmt_could_throw_p (stmt))
1114 /* Finds general ivs in statement STMT. */
1117 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1121 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1124 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1127 /* Finds general ivs in basic block BB. */
1130 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1132 gimple_stmt_iterator bsi;
1134 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1135 find_givs_in_stmt (data, gsi_stmt (bsi));
1138 /* Finds general ivs. */
1141 find_givs (struct ivopts_data *data)
1143 struct loop *loop = data->current_loop;
1144 basic_block *body = get_loop_body_in_dom_order (loop);
1147 for (i = 0; i < loop->num_nodes; i++)
1148 find_givs_in_bb (data, body[i]);
1152 /* For each ssa name defined in LOOP determines whether it is an induction
1153 variable and if so, its initial value and step. */
1156 find_induction_variables (struct ivopts_data *data)
1161 if (!find_bivs (data))
1167 if (dump_file && (dump_flags & TDF_DETAILS))
1169 tree niter = niter_for_single_dom_exit (data);
1173 fprintf (dump_file, " number of iterations ");
1174 print_generic_expr (dump_file, niter, TDF_SLIM);
1175 fprintf (dump_file, "\n\n");
1178 fprintf (dump_file, "Induction variables:\n\n");
1180 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1182 if (ver_info (data, i)->iv)
1183 dump_iv (dump_file, ver_info (data, i)->iv);
1190 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1192 static struct iv_use *
1193 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1194 gimple stmt, enum use_type use_type)
1196 struct iv_use *use = XCNEW (struct iv_use);
1198 use->id = n_iv_uses (data);
1199 use->type = use_type;
1203 use->related_cands = BITMAP_ALLOC (NULL);
1205 /* To avoid showing ssa name in the dumps, if it was not reset by the
1207 iv->ssa_name = NULL_TREE;
1209 if (dump_file && (dump_flags & TDF_DETAILS))
1210 dump_use (dump_file, use);
1212 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1217 /* Checks whether OP is a loop-level invariant and if so, records it.
1218 NONLINEAR_USE is true if the invariant is used in a way we do not
1219 handle specially. */
1222 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1225 struct version_info *info;
1227 if (TREE_CODE (op) != SSA_NAME
1228 || !is_gimple_reg (op))
1231 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1233 && flow_bb_inside_loop_p (data->current_loop, bb))
1236 info = name_info (data, op);
1238 info->has_nonlin_use |= nonlinear_use;
1240 info->inv_id = ++data->max_inv_id;
1241 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1244 /* Checks whether the use OP is interesting and if so, records it. */
1246 static struct iv_use *
1247 find_interesting_uses_op (struct ivopts_data *data, tree op)
1254 if (TREE_CODE (op) != SSA_NAME)
1257 iv = get_iv (data, op);
1261 if (iv->have_use_for)
1263 use = iv_use (data, iv->use_id);
1265 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1269 if (integer_zerop (iv->step))
1271 record_invariant (data, op, true);
1274 iv->have_use_for = true;
1276 civ = XNEW (struct iv);
1279 stmt = SSA_NAME_DEF_STMT (op);
1280 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1281 || is_gimple_assign (stmt));
1283 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1284 iv->use_id = use->id;
1289 /* Given a condition in statement STMT, checks whether it is a compare
1290 of an induction variable and an invariant. If this is the case,
1291 CONTROL_VAR is set to location of the iv, BOUND to the location of
1292 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293 induction variable descriptions, and true is returned. If this is not
1294 the case, CONTROL_VAR and BOUND are set to the arguments of the
1295 condition and false is returned. */
1298 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1299 tree **control_var, tree **bound,
1300 struct iv **iv_var, struct iv **iv_bound)
1302 /* The objects returned when COND has constant operands. */
1303 static struct iv const_iv;
1305 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1306 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1309 if (gimple_code (stmt) == GIMPLE_COND)
1311 op0 = gimple_cond_lhs_ptr (stmt);
1312 op1 = gimple_cond_rhs_ptr (stmt);
1316 op0 = gimple_assign_rhs1_ptr (stmt);
1317 op1 = gimple_assign_rhs2_ptr (stmt);
1320 zero = integer_zero_node;
1321 const_iv.step = integer_zero_node;
1323 if (TREE_CODE (*op0) == SSA_NAME)
1324 iv0 = get_iv (data, *op0);
1325 if (TREE_CODE (*op1) == SSA_NAME)
1326 iv1 = get_iv (data, *op1);
1328 /* Exactly one of the compared values must be an iv, and the other one must
1333 if (integer_zerop (iv0->step))
1335 /* Control variable may be on the other side. */
1336 tmp_op = op0; op0 = op1; op1 = tmp_op;
1337 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1339 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1343 *control_var = op0;;
1354 /* Checks whether the condition in STMT is interesting and if so,
1358 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1360 tree *var_p, *bound_p;
1361 struct iv *var_iv, *civ;
1363 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1365 find_interesting_uses_op (data, *var_p);
1366 find_interesting_uses_op (data, *bound_p);
1370 civ = XNEW (struct iv);
1372 record_use (data, NULL, civ, stmt, USE_COMPARE);
1375 /* Returns true if expression EXPR is obviously invariant in LOOP,
1376 i.e. if all its operands are defined outside of the LOOP. LOOP
1377 should not be the function body. */
1380 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1385 gcc_assert (loop_depth (loop) > 0);
1387 if (is_gimple_min_invariant (expr))
1390 if (TREE_CODE (expr) == SSA_NAME)
1392 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1394 && flow_bb_inside_loop_p (loop, def_bb))
1403 len = TREE_OPERAND_LENGTH (expr);
1404 for (i = 0; i < len; i++)
1405 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1411 /* Returns true if statement STMT is obviously invariant in LOOP,
1412 i.e. if all its operands on the RHS are defined outside of the LOOP.
1413 LOOP should not be the function body. */
1416 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1421 gcc_assert (loop_depth (loop) > 0);
1423 lhs = gimple_get_lhs (stmt);
1424 for (i = 0; i < gimple_num_ops (stmt); i++)
1426 tree op = gimple_op (stmt, i);
1427 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1434 /* Cumulates the steps of indices into DATA and replaces their values with the
1435 initial ones. Returns false when the value of the index cannot be determined.
1436 Callback for for_each_index. */
1438 struct ifs_ivopts_data
1440 struct ivopts_data *ivopts_data;
1446 idx_find_step (tree base, tree *idx, void *data)
1448 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1450 tree step, iv_base, iv_step, lbound, off;
1451 struct loop *loop = dta->ivopts_data->current_loop;
1453 /* If base is a component ref, require that the offset of the reference
1455 if (TREE_CODE (base) == COMPONENT_REF)
1457 off = component_ref_field_offset (base);
1458 return expr_invariant_in_loop_p (loop, off);
1461 /* If base is array, first check whether we will be able to move the
1462 reference out of the loop (in order to take its address in strength
1463 reduction). In order for this to work we need both lower bound
1464 and step to be loop invariants. */
1465 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1467 /* Moreover, for a range, the size needs to be invariant as well. */
1468 if (TREE_CODE (base) == ARRAY_RANGE_REF
1469 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1472 step = array_ref_element_size (base);
1473 lbound = array_ref_low_bound (base);
1475 if (!expr_invariant_in_loop_p (loop, step)
1476 || !expr_invariant_in_loop_p (loop, lbound))
1480 if (TREE_CODE (*idx) != SSA_NAME)
1483 iv = get_iv (dta->ivopts_data, *idx);
1487 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1488 *&x[0], which is not folded and does not trigger the
1489 ARRAY_REF path below. */
1492 if (integer_zerop (iv->step))
1495 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1497 step = array_ref_element_size (base);
1499 /* We only handle addresses whose step is an integer constant. */
1500 if (TREE_CODE (step) != INTEGER_CST)
1504 /* The step for pointer arithmetics already is 1 byte. */
1505 step = size_one_node;
1509 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1510 sizetype, &iv_base, &iv_step, dta->stmt,
1513 /* The index might wrap. */
1517 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1518 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1523 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1524 object is passed to it in DATA. */
1527 idx_record_use (tree base, tree *idx,
1530 struct ivopts_data *data = (struct ivopts_data *) vdata;
1531 find_interesting_uses_op (data, *idx);
1532 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1534 find_interesting_uses_op (data, array_ref_element_size (base));
1535 find_interesting_uses_op (data, array_ref_low_bound (base));
1540 /* If we can prove that TOP = cst * BOT for some constant cst,
1541 store cst to MUL and return true. Otherwise return false.
1542 The returned value is always sign-extended, regardless of the
1543 signedness of TOP and BOT. */
1546 constant_multiple_of (tree top, tree bot, double_int *mul)
1549 enum tree_code code;
1550 double_int res, p0, p1;
1551 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1556 if (operand_equal_p (top, bot, 0))
1558 *mul = double_int_one;
1562 code = TREE_CODE (top);
1566 mby = TREE_OPERAND (top, 1);
1567 if (TREE_CODE (mby) != INTEGER_CST)
1570 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1573 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1579 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1580 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1583 if (code == MINUS_EXPR)
1584 p1 = double_int_neg (p1);
1585 *mul = double_int_sext (double_int_add (p0, p1), precision);
1589 if (TREE_CODE (bot) != INTEGER_CST)
1592 p0 = double_int_sext (tree_to_double_int (top), precision);
1593 p1 = double_int_sext (tree_to_double_int (bot), precision);
1594 if (double_int_zero_p (p1))
1596 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1598 return double_int_zero_p (res);
1605 /* Returns true if memory reference REF with step STEP may be unaligned. */
1608 may_be_unaligned_p (tree ref, tree step)
1612 HOST_WIDE_INT bitsize;
1613 HOST_WIDE_INT bitpos;
1615 enum machine_mode mode;
1616 int unsignedp, volatilep;
1617 unsigned base_align;
1619 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1620 thus they are not misaligned. */
1621 if (TREE_CODE (ref) == TARGET_MEM_REF)
1624 /* The test below is basically copy of what expr.c:normal_inner_ref
1625 does to check whether the object must be loaded by parts when
1626 STRICT_ALIGNMENT is true. */
1627 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1628 &unsignedp, &volatilep, true);
1629 base_type = TREE_TYPE (base);
1630 base_align = TYPE_ALIGN (base_type);
1632 if (mode != BLKmode)
1634 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1636 if (base_align < mode_align
1637 || (bitpos % mode_align) != 0
1638 || (bitpos % BITS_PER_UNIT) != 0)
1642 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1645 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1652 /* Return true if EXPR may be non-addressable. */
1655 may_be_nonaddressable_p (tree expr)
1657 switch (TREE_CODE (expr))
1659 case TARGET_MEM_REF:
1660 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1661 target, thus they are always addressable. */
1665 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1666 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1668 case VIEW_CONVERT_EXPR:
1669 /* This kind of view-conversions may wrap non-addressable objects
1670 and make them look addressable. After some processing the
1671 non-addressability may be uncovered again, causing ADDR_EXPRs
1672 of inappropriate objects to be built. */
1673 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1674 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1677 /* ... fall through ... */
1680 case ARRAY_RANGE_REF:
1681 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1693 /* Finds addresses in *OP_P inside STMT. */
1696 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1698 tree base = *op_p, step = size_zero_node;
1700 struct ifs_ivopts_data ifs_ivopts_data;
1702 /* Do not play with volatile memory references. A bit too conservative,
1703 perhaps, but safe. */
1704 if (gimple_has_volatile_ops (stmt))
1707 /* Ignore bitfields for now. Not really something terribly complicated
1709 if (TREE_CODE (base) == BIT_FIELD_REF)
1712 base = unshare_expr (base);
1714 if (TREE_CODE (base) == TARGET_MEM_REF)
1716 tree type = build_pointer_type (TREE_TYPE (base));
1720 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1722 civ = get_iv (data, TMR_BASE (base));
1726 TMR_BASE (base) = civ->base;
1729 if (TMR_INDEX2 (base)
1730 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1732 civ = get_iv (data, TMR_INDEX2 (base));
1736 TMR_INDEX2 (base) = civ->base;
1739 if (TMR_INDEX (base)
1740 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1742 civ = get_iv (data, TMR_INDEX (base));
1746 TMR_INDEX (base) = civ->base;
1751 if (TMR_STEP (base))
1752 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1754 step = fold_build2 (PLUS_EXPR, type, step, astep);
1758 if (integer_zerop (step))
1760 base = tree_mem_ref_addr (type, base);
1764 ifs_ivopts_data.ivopts_data = data;
1765 ifs_ivopts_data.stmt = stmt;
1766 ifs_ivopts_data.step = size_zero_node;
1767 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1768 || integer_zerop (ifs_ivopts_data.step))
1770 step = ifs_ivopts_data.step;
1772 /* Check that the base expression is addressable. This needs
1773 to be done after substituting bases of IVs into it. */
1774 if (may_be_nonaddressable_p (base))
1777 /* Moreover, on strict alignment platforms, check that it is
1778 sufficiently aligned. */
1779 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1782 base = build_fold_addr_expr (base);
1784 /* Substituting bases of IVs into the base expression might
1785 have caused folding opportunities. */
1786 if (TREE_CODE (base) == ADDR_EXPR)
1788 tree *ref = &TREE_OPERAND (base, 0);
1789 while (handled_component_p (*ref))
1790 ref = &TREE_OPERAND (*ref, 0);
1791 if (TREE_CODE (*ref) == MEM_REF)
1793 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1794 TREE_OPERAND (*ref, 0),
1795 TREE_OPERAND (*ref, 1));
1802 civ = alloc_iv (base, step);
1803 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1807 for_each_index (op_p, idx_record_use, data);
1810 /* Finds and records invariants used in STMT. */
1813 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1816 use_operand_p use_p;
1819 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1821 op = USE_FROM_PTR (use_p);
1822 record_invariant (data, op, false);
1826 /* Finds interesting uses of induction variables in the statement STMT. */
1829 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1832 tree op, *lhs, *rhs;
1834 use_operand_p use_p;
1835 enum tree_code code;
1837 find_invariants_stmt (data, stmt);
1839 if (gimple_code (stmt) == GIMPLE_COND)
1841 find_interesting_uses_cond (data, stmt);
1845 if (is_gimple_assign (stmt))
1847 lhs = gimple_assign_lhs_ptr (stmt);
1848 rhs = gimple_assign_rhs1_ptr (stmt);
1850 if (TREE_CODE (*lhs) == SSA_NAME)
1852 /* If the statement defines an induction variable, the uses are not
1853 interesting by themselves. */
1855 iv = get_iv (data, *lhs);
1857 if (iv && !integer_zerop (iv->step))
1861 code = gimple_assign_rhs_code (stmt);
1862 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1863 && (REFERENCE_CLASS_P (*rhs)
1864 || is_gimple_val (*rhs)))
1866 if (REFERENCE_CLASS_P (*rhs))
1867 find_interesting_uses_address (data, stmt, rhs);
1869 find_interesting_uses_op (data, *rhs);
1871 if (REFERENCE_CLASS_P (*lhs))
1872 find_interesting_uses_address (data, stmt, lhs);
1875 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1877 find_interesting_uses_cond (data, stmt);
1881 /* TODO -- we should also handle address uses of type
1883 memory = call (whatever);
1890 if (gimple_code (stmt) == GIMPLE_PHI
1891 && gimple_bb (stmt) == data->current_loop->header)
1893 iv = get_iv (data, PHI_RESULT (stmt));
1895 if (iv && !integer_zerop (iv->step))
1899 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1901 op = USE_FROM_PTR (use_p);
1903 if (TREE_CODE (op) != SSA_NAME)
1906 iv = get_iv (data, op);
1910 find_interesting_uses_op (data, op);
1914 /* Finds interesting uses of induction variables outside of loops
1915 on loop exit edge EXIT. */
1918 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1921 gimple_stmt_iterator psi;
1924 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1926 phi = gsi_stmt (psi);
1927 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1928 if (is_gimple_reg (def))
1929 find_interesting_uses_op (data, def);
1933 /* Finds uses of the induction variables that are interesting. */
1936 find_interesting_uses (struct ivopts_data *data)
1939 gimple_stmt_iterator bsi;
1940 basic_block *body = get_loop_body (data->current_loop);
1942 struct version_info *info;
1945 if (dump_file && (dump_flags & TDF_DETAILS))
1946 fprintf (dump_file, "Uses:\n\n");
1948 for (i = 0; i < data->current_loop->num_nodes; i++)
1953 FOR_EACH_EDGE (e, ei, bb->succs)
1954 if (e->dest != EXIT_BLOCK_PTR
1955 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1956 find_interesting_uses_outside (data, e);
1958 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1959 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1960 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1961 if (!is_gimple_debug (gsi_stmt (bsi)))
1962 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1965 if (dump_file && (dump_flags & TDF_DETAILS))
1969 fprintf (dump_file, "\n");
1971 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1973 info = ver_info (data, i);
1976 fprintf (dump_file, " ");
1977 print_generic_expr (dump_file, info->name, TDF_SLIM);
1978 fprintf (dump_file, " is invariant (%d)%s\n",
1979 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1983 fprintf (dump_file, "\n");
1989 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1990 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1991 we are at the top-level of the processed address. */
1994 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1995 unsigned HOST_WIDE_INT *offset)
1997 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1998 enum tree_code code;
1999 tree type, orig_type = TREE_TYPE (expr);
2000 unsigned HOST_WIDE_INT off0, off1, st;
2001 tree orig_expr = expr;
2005 type = TREE_TYPE (expr);
2006 code = TREE_CODE (expr);
2012 if (!cst_and_fits_in_hwi (expr)
2013 || integer_zerop (expr))
2016 *offset = int_cst_value (expr);
2017 return build_int_cst (orig_type, 0);
2019 case POINTER_PLUS_EXPR:
2022 op0 = TREE_OPERAND (expr, 0);
2023 op1 = TREE_OPERAND (expr, 1);
2025 op0 = strip_offset_1 (op0, false, false, &off0);
2026 op1 = strip_offset_1 (op1, false, false, &off1);
2028 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2029 if (op0 == TREE_OPERAND (expr, 0)
2030 && op1 == TREE_OPERAND (expr, 1))
2033 if (integer_zerop (op1))
2035 else if (integer_zerop (op0))
2037 if (code == MINUS_EXPR)
2038 expr = fold_build1 (NEGATE_EXPR, type, op1);
2043 expr = fold_build2 (code, type, op0, op1);
2045 return fold_convert (orig_type, expr);
2048 op1 = TREE_OPERAND (expr, 1);
2049 if (!cst_and_fits_in_hwi (op1))
2052 op0 = TREE_OPERAND (expr, 0);
2053 op0 = strip_offset_1 (op0, false, false, &off0);
2054 if (op0 == TREE_OPERAND (expr, 0))
2057 *offset = off0 * int_cst_value (op1);
2058 if (integer_zerop (op0))
2061 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2063 return fold_convert (orig_type, expr);
2066 case ARRAY_RANGE_REF:
2070 step = array_ref_element_size (expr);
2071 if (!cst_and_fits_in_hwi (step))
2074 st = int_cst_value (step);
2075 op1 = TREE_OPERAND (expr, 1);
2076 op1 = strip_offset_1 (op1, false, false, &off1);
2077 *offset = off1 * st;
2080 && integer_zerop (op1))
2082 /* Strip the component reference completely. */
2083 op0 = TREE_OPERAND (expr, 0);
2084 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2094 tmp = component_ref_field_offset (expr);
2096 && cst_and_fits_in_hwi (tmp))
2098 /* Strip the component reference completely. */
2099 op0 = TREE_OPERAND (expr, 0);
2100 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2101 *offset = off0 + int_cst_value (tmp);
2107 op0 = TREE_OPERAND (expr, 0);
2108 op0 = strip_offset_1 (op0, true, true, &off0);
2111 if (op0 == TREE_OPERAND (expr, 0))
2114 expr = build_fold_addr_expr (op0);
2115 return fold_convert (orig_type, expr);
2118 /* ??? Offset operand? */
2119 inside_addr = false;
2126 /* Default handling of expressions for that we want to recurse into
2127 the first operand. */
2128 op0 = TREE_OPERAND (expr, 0);
2129 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2132 if (op0 == TREE_OPERAND (expr, 0)
2133 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2136 expr = copy_node (expr);
2137 TREE_OPERAND (expr, 0) = op0;
2139 TREE_OPERAND (expr, 1) = op1;
2141 /* Inside address, we might strip the top level component references,
2142 thus changing type of the expression. Handling of ADDR_EXPR
2144 expr = fold_convert (orig_type, expr);
2149 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2152 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2154 return strip_offset_1 (expr, false, false, offset);
2157 /* Returns variant of TYPE that can be used as base for different uses.
2158 We return unsigned type with the same precision, which avoids problems
2162 generic_type_for (tree type)
2164 if (POINTER_TYPE_P (type))
2165 return unsigned_type_for (type);
2167 if (TYPE_UNSIGNED (type))
2170 return unsigned_type_for (type);
2173 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2174 the bitmap to that we should store it. */
2176 static struct ivopts_data *fd_ivopts_data;
2178 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2180 bitmap *depends_on = (bitmap *) data;
2181 struct version_info *info;
2183 if (TREE_CODE (*expr_p) != SSA_NAME)
2185 info = name_info (fd_ivopts_data, *expr_p);
2187 if (!info->inv_id || info->has_nonlin_use)
2191 *depends_on = BITMAP_ALLOC (NULL);
2192 bitmap_set_bit (*depends_on, info->inv_id);
2197 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2198 position to POS. If USE is not NULL, the candidate is set as related to
2199 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2200 replacement of the final value of the iv by a direct computation. */
2202 static struct iv_cand *
2203 add_candidate_1 (struct ivopts_data *data,
2204 tree base, tree step, bool important, enum iv_position pos,
2205 struct iv_use *use, gimple incremented_at)
2208 struct iv_cand *cand = NULL;
2209 tree type, orig_type;
2213 orig_type = TREE_TYPE (base);
2214 type = generic_type_for (orig_type);
2215 if (type != orig_type)
2217 base = fold_convert (type, base);
2218 step = fold_convert (type, step);
2222 for (i = 0; i < n_iv_cands (data); i++)
2224 cand = iv_cand (data, i);
2226 if (cand->pos != pos)
2229 if (cand->incremented_at != incremented_at
2230 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2231 && cand->ainc_use != use))
2245 if (operand_equal_p (base, cand->iv->base, 0)
2246 && operand_equal_p (step, cand->iv->step, 0)
2247 && (TYPE_PRECISION (TREE_TYPE (base))
2248 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2252 if (i == n_iv_cands (data))
2254 cand = XCNEW (struct iv_cand);
2260 cand->iv = alloc_iv (base, step);
2263 if (pos != IP_ORIGINAL && cand->iv)
2265 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2266 cand->var_after = cand->var_before;
2268 cand->important = important;
2269 cand->incremented_at = incremented_at;
2270 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2273 && TREE_CODE (step) != INTEGER_CST)
2275 fd_ivopts_data = data;
2276 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2279 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2280 cand->ainc_use = use;
2282 cand->ainc_use = NULL;
2284 if (dump_file && (dump_flags & TDF_DETAILS))
2285 dump_cand (dump_file, cand);
2288 if (important && !cand->important)
2290 cand->important = true;
2291 if (dump_file && (dump_flags & TDF_DETAILS))
2292 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2297 bitmap_set_bit (use->related_cands, i);
2298 if (dump_file && (dump_flags & TDF_DETAILS))
2299 fprintf (dump_file, "Candidate %d is related to use %d\n",
2306 /* Returns true if incrementing the induction variable at the end of the LOOP
2309 The purpose is to avoid splitting latch edge with a biv increment, thus
2310 creating a jump, possibly confusing other optimization passes and leaving
2311 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2312 is not available (so we do not have a better alternative), or if the latch
2313 edge is already nonempty. */
2316 allow_ip_end_pos_p (struct loop *loop)
2318 if (!ip_normal_pos (loop))
2321 if (!empty_block_p (ip_end_pos (loop)))
2327 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2328 Important field is set to IMPORTANT. */
2331 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2332 bool important, struct iv_use *use)
2334 basic_block use_bb = gimple_bb (use->stmt);
2335 enum machine_mode mem_mode;
2336 unsigned HOST_WIDE_INT cstepi;
2338 /* If we insert the increment in any position other than the standard
2339 ones, we must ensure that it is incremented once per iteration.
2340 It must not be in an inner nested loop, or one side of an if
2342 if (use_bb->loop_father != data->current_loop
2343 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2344 || stmt_could_throw_p (use->stmt)
2345 || !cst_and_fits_in_hwi (step))
2348 cstepi = int_cst_value (step);
2350 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2351 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2352 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2354 enum tree_code code = MINUS_EXPR;
2356 tree new_step = step;
2358 if (POINTER_TYPE_P (TREE_TYPE (base)))
2360 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2361 code = POINTER_PLUS_EXPR;
2364 new_step = fold_convert (TREE_TYPE (base), new_step);
2365 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2366 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2369 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2370 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2372 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2377 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2378 position to POS. If USE is not NULL, the candidate is set as related to
2379 it. The candidate computation is scheduled on all available positions. */
2382 add_candidate (struct ivopts_data *data,
2383 tree base, tree step, bool important, struct iv_use *use)
2385 if (ip_normal_pos (data->current_loop))
2386 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2387 if (ip_end_pos (data->current_loop)
2388 && allow_ip_end_pos_p (data->current_loop))
2389 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2391 if (use != NULL && use->type == USE_ADDRESS)
2392 add_autoinc_candidates (data, base, step, important, use);
2395 /* Add a standard "0 + 1 * iteration" iv candidate for a
2396 type with SIZE bits. */
2399 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2402 tree type = lang_hooks.types.type_for_size (size, true);
2403 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2407 /* Adds standard iv candidates. */
2410 add_standard_iv_candidates (struct ivopts_data *data)
2412 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2414 /* The same for a double-integer type if it is still fast enough. */
2415 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2416 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2420 /* Adds candidates bases on the old induction variable IV. */
2423 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2427 struct iv_cand *cand;
2429 add_candidate (data, iv->base, iv->step, true, NULL);
2431 /* The same, but with initial value zero. */
2432 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2433 add_candidate (data, size_int (0), iv->step, true, NULL);
2435 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2436 iv->step, true, NULL);
2438 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2439 if (gimple_code (phi) == GIMPLE_PHI)
2441 /* Additionally record the possibility of leaving the original iv
2443 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2444 cand = add_candidate_1 (data,
2445 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2446 SSA_NAME_DEF_STMT (def));
2447 cand->var_before = iv->ssa_name;
2448 cand->var_after = def;
2452 /* Adds candidates based on the old induction variables. */
2455 add_old_ivs_candidates (struct ivopts_data *data)
2461 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2463 iv = ver_info (data, i)->iv;
2464 if (iv && iv->biv_p && !integer_zerop (iv->step))
2465 add_old_iv_candidates (data, iv);
2469 /* Adds candidates based on the value of the induction variable IV and USE. */
2472 add_iv_value_candidates (struct ivopts_data *data,
2473 struct iv *iv, struct iv_use *use)
2475 unsigned HOST_WIDE_INT offset;
2479 add_candidate (data, iv->base, iv->step, false, use);
2481 /* The same, but with initial value zero. Make such variable important,
2482 since it is generic enough so that possibly many uses may be based
2484 basetype = TREE_TYPE (iv->base);
2485 if (POINTER_TYPE_P (basetype))
2486 basetype = sizetype;
2487 add_candidate (data, build_int_cst (basetype, 0),
2488 iv->step, true, use);
2490 /* Third, try removing the constant offset. Make sure to even
2491 add a candidate for &a[0] vs. (T *)&a. */
2492 base = strip_offset (iv->base, &offset);
2494 || base != iv->base)
2495 add_candidate (data, base, iv->step, false, use);
2498 /* Adds candidates based on the uses. */
2501 add_derived_ivs_candidates (struct ivopts_data *data)
2505 for (i = 0; i < n_iv_uses (data); i++)
2507 struct iv_use *use = iv_use (data, i);
2514 case USE_NONLINEAR_EXPR:
2517 /* Just add the ivs based on the value of the iv used here. */
2518 add_iv_value_candidates (data, use->iv, use);
2527 /* Record important candidates and add them to related_cands bitmaps
2531 record_important_candidates (struct ivopts_data *data)
2536 for (i = 0; i < n_iv_cands (data); i++)
2538 struct iv_cand *cand = iv_cand (data, i);
2540 if (cand->important)
2541 bitmap_set_bit (data->important_candidates, i);
2544 data->consider_all_candidates = (n_iv_cands (data)
2545 <= CONSIDER_ALL_CANDIDATES_BOUND);
2547 if (data->consider_all_candidates)
2549 /* We will not need "related_cands" bitmaps in this case,
2550 so release them to decrease peak memory consumption. */
2551 for (i = 0; i < n_iv_uses (data); i++)
2553 use = iv_use (data, i);
2554 BITMAP_FREE (use->related_cands);
2559 /* Add important candidates to the related_cands bitmaps. */
2560 for (i = 0; i < n_iv_uses (data); i++)
2561 bitmap_ior_into (iv_use (data, i)->related_cands,
2562 data->important_candidates);
2566 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2567 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2568 we allocate a simple list to every use. */
2571 alloc_use_cost_map (struct ivopts_data *data)
2573 unsigned i, size, s, j;
2575 for (i = 0; i < n_iv_uses (data); i++)
2577 struct iv_use *use = iv_use (data, i);
2580 if (data->consider_all_candidates)
2581 size = n_iv_cands (data);
2585 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2590 /* Round up to the power of two, so that moduling by it is fast. */
2591 for (size = 1; size < s; size <<= 1)
2595 use->n_map_members = size;
2596 use->cost_map = XCNEWVEC (struct cost_pair, size);
2600 /* Returns description of computation cost of expression whose runtime
2601 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2604 new_cost (unsigned runtime, unsigned complexity)
2608 cost.cost = runtime;
2609 cost.complexity = complexity;
2614 /* Adds costs COST1 and COST2. */
2617 add_costs (comp_cost cost1, comp_cost cost2)
2619 cost1.cost += cost2.cost;
2620 cost1.complexity += cost2.complexity;
2624 /* Subtracts costs COST1 and COST2. */
2627 sub_costs (comp_cost cost1, comp_cost cost2)
2629 cost1.cost -= cost2.cost;
2630 cost1.complexity -= cost2.complexity;
2635 /* Returns a negative number if COST1 < COST2, a positive number if
2636 COST1 > COST2, and 0 if COST1 = COST2. */
2639 compare_costs (comp_cost cost1, comp_cost cost2)
2641 if (cost1.cost == cost2.cost)
2642 return cost1.complexity - cost2.complexity;
2644 return cost1.cost - cost2.cost;
2647 /* Returns true if COST is infinite. */
2650 infinite_cost_p (comp_cost cost)
2652 return cost.cost == INFTY;
2655 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2656 on invariants DEPENDS_ON and that the value used in expressing it
2660 set_use_iv_cost (struct ivopts_data *data,
2661 struct iv_use *use, struct iv_cand *cand,
2662 comp_cost cost, bitmap depends_on, tree value,
2667 if (infinite_cost_p (cost))
2669 BITMAP_FREE (depends_on);
2673 if (data->consider_all_candidates)
2675 use->cost_map[cand->id].cand = cand;
2676 use->cost_map[cand->id].cost = cost;
2677 use->cost_map[cand->id].depends_on = depends_on;
2678 use->cost_map[cand->id].value = value;
2679 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2683 /* n_map_members is a power of two, so this computes modulo. */
2684 s = cand->id & (use->n_map_members - 1);
2685 for (i = s; i < use->n_map_members; i++)
2686 if (!use->cost_map[i].cand)
2688 for (i = 0; i < s; i++)
2689 if (!use->cost_map[i].cand)
2695 use->cost_map[i].cand = cand;
2696 use->cost_map[i].cost = cost;
2697 use->cost_map[i].depends_on = depends_on;
2698 use->cost_map[i].value = value;
2699 use->cost_map[i].inv_expr_id = inv_expr_id;
2702 /* Gets cost of (USE, CANDIDATE) pair. */
2704 static struct cost_pair *
2705 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2706 struct iv_cand *cand)
2709 struct cost_pair *ret;
2714 if (data->consider_all_candidates)
2716 ret = use->cost_map + cand->id;
2723 /* n_map_members is a power of two, so this computes modulo. */
2724 s = cand->id & (use->n_map_members - 1);
2725 for (i = s; i < use->n_map_members; i++)
2726 if (use->cost_map[i].cand == cand)
2727 return use->cost_map + i;
2729 for (i = 0; i < s; i++)
2730 if (use->cost_map[i].cand == cand)
2731 return use->cost_map + i;
2736 /* Returns estimate on cost of computing SEQ. */
2739 seq_cost (rtx seq, bool speed)
2744 for (; seq; seq = NEXT_INSN (seq))
2746 set = single_set (seq);
2748 cost += rtx_cost (SET_SRC (set), SET, speed);
2756 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2758 produce_memory_decl_rtl (tree obj, int *regno)
2760 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2761 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2765 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2767 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2768 x = gen_rtx_SYMBOL_REF (address_mode, name);
2769 SET_SYMBOL_REF_DECL (x, obj);
2770 x = gen_rtx_MEM (DECL_MODE (obj), x);
2771 set_mem_addr_space (x, as);
2772 targetm.encode_section_info (obj, x, true);
2776 x = gen_raw_REG (address_mode, (*regno)++);
2777 x = gen_rtx_MEM (DECL_MODE (obj), x);
2778 set_mem_addr_space (x, as);
2784 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2785 walk_tree. DATA contains the actual fake register number. */
2788 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2790 tree obj = NULL_TREE;
2792 int *regno = (int *) data;
2794 switch (TREE_CODE (*expr_p))
2797 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2798 handled_component_p (*expr_p);
2799 expr_p = &TREE_OPERAND (*expr_p, 0))
2802 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2803 x = produce_memory_decl_rtl (obj, regno);
2808 obj = SSA_NAME_VAR (*expr_p);
2809 if (!DECL_RTL_SET_P (obj))
2810 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2819 if (DECL_RTL_SET_P (obj))
2822 if (DECL_MODE (obj) == BLKmode)
2823 x = produce_memory_decl_rtl (obj, regno);
2825 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2835 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2836 SET_DECL_RTL (obj, x);
2842 /* Determines cost of the computation of EXPR. */
2845 computation_cost (tree expr, bool speed)
2848 tree type = TREE_TYPE (expr);
2850 /* Avoid using hard regs in ways which may be unsupported. */
2851 int regno = LAST_VIRTUAL_REGISTER + 1;
2852 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2853 enum node_frequency real_frequency = node->frequency;
2855 node->frequency = NODE_FREQUENCY_NORMAL;
2856 crtl->maybe_hot_insn_p = speed;
2857 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2859 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2862 default_rtl_profile ();
2863 node->frequency = real_frequency;
2865 cost = seq_cost (seq, speed);
2867 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2868 TYPE_ADDR_SPACE (type), speed);
2869 else if (!REG_P (rslt))
2870 cost += rtx_cost (rslt, SET, speed);
2875 /* Returns variable containing the value of candidate CAND at statement AT. */
2878 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2880 if (stmt_after_increment (loop, cand, stmt))
2881 return cand->var_after;
2883 return cand->var_before;
2886 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2887 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2890 tree_int_cst_sign_bit (const_tree t)
2892 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2893 unsigned HOST_WIDE_INT w;
2895 if (bitno < HOST_BITS_PER_WIDE_INT)
2896 w = TREE_INT_CST_LOW (t);
2899 w = TREE_INT_CST_HIGH (t);
2900 bitno -= HOST_BITS_PER_WIDE_INT;
2903 return (w >> bitno) & 1;
2906 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2907 same precision that is at least as wide as the precision of TYPE, stores
2908 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2912 determine_common_wider_type (tree *a, tree *b)
2914 tree wider_type = NULL;
2916 tree atype = TREE_TYPE (*a);
2918 if (CONVERT_EXPR_P (*a))
2920 suba = TREE_OPERAND (*a, 0);
2921 wider_type = TREE_TYPE (suba);
2922 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2928 if (CONVERT_EXPR_P (*b))
2930 subb = TREE_OPERAND (*b, 0);
2931 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2942 /* Determines the expression by that USE is expressed from induction variable
2943 CAND at statement AT in LOOP. The expression is stored in a decomposed
2944 form into AFF. Returns false if USE cannot be expressed using CAND. */
2947 get_computation_aff (struct loop *loop,
2948 struct iv_use *use, struct iv_cand *cand, gimple at,
2949 struct affine_tree_combination *aff)
2951 tree ubase = use->iv->base;
2952 tree ustep = use->iv->step;
2953 tree cbase = cand->iv->base;
2954 tree cstep = cand->iv->step, cstep_common;
2955 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2956 tree common_type, var;
2958 aff_tree cbase_aff, var_aff;
2961 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2963 /* We do not have a precision to express the values of use. */
2967 var = var_at_stmt (loop, cand, at);
2968 uutype = unsigned_type_for (utype);
2970 /* If the conversion is not noop, perform it. */
2971 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2973 cstep = fold_convert (uutype, cstep);
2974 cbase = fold_convert (uutype, cbase);
2975 var = fold_convert (uutype, var);
2978 if (!constant_multiple_of (ustep, cstep, &rat))
2981 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2982 type, we achieve better folding by computing their difference in this
2983 wider type, and cast the result to UUTYPE. We do not need to worry about
2984 overflows, as all the arithmetics will in the end be performed in UUTYPE
2986 common_type = determine_common_wider_type (&ubase, &cbase);
2988 /* use = ubase - ratio * cbase + ratio * var. */
2989 tree_to_aff_combination (ubase, common_type, aff);
2990 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2991 tree_to_aff_combination (var, uutype, &var_aff);
2993 /* We need to shift the value if we are after the increment. */
2994 if (stmt_after_increment (loop, cand, at))
2998 if (common_type != uutype)
2999 cstep_common = fold_convert (common_type, cstep);
3001 cstep_common = cstep;
3003 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3004 aff_combination_add (&cbase_aff, &cstep_aff);
3007 aff_combination_scale (&cbase_aff, double_int_neg (rat));
3008 aff_combination_add (aff, &cbase_aff);
3009 if (common_type != uutype)
3010 aff_combination_convert (aff, uutype);
3012 aff_combination_scale (&var_aff, rat);
3013 aff_combination_add (aff, &var_aff);
3018 /* Determines the expression by that USE is expressed from induction variable
3019 CAND at statement AT in LOOP. The computation is unshared. */
3022 get_computation_at (struct loop *loop,
3023 struct iv_use *use, struct iv_cand *cand, gimple at)
3026 tree type = TREE_TYPE (use->iv->base);
3028 if (!get_computation_aff (loop, use, cand, at, &aff))
3030 unshare_aff_combination (&aff);
3031 return fold_convert (type, aff_combination_to_tree (&aff));
3034 /* Determines the expression by that USE is expressed from induction variable
3035 CAND in LOOP. The computation is unshared. */
3038 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3040 return get_computation_at (loop, use, cand, use->stmt);
3043 /* Adjust the cost COST for being in loop setup rather than loop body.
3044 If we're optimizing for space, the loop setup overhead is constant;
3045 if we're optimizing for speed, amortize it over the per-iteration cost. */
3047 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3051 else if (optimize_loop_for_speed_p (data->current_loop))
3052 return cost / avg_loop_niter (data->current_loop);
3057 /* Returns cost of addition in MODE. */
3060 add_cost (enum machine_mode mode, bool speed)
3062 static unsigned costs[NUM_MACHINE_MODES];
3070 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3071 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3072 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3077 cost = seq_cost (seq, speed);
3083 if (dump_file && (dump_flags & TDF_DETAILS))
3084 fprintf (dump_file, "Addition in %s costs %d\n",
3085 GET_MODE_NAME (mode), cost);
3089 /* Entry in a hashtable of already known costs for multiplication. */
3092 HOST_WIDE_INT cst; /* The constant to multiply by. */
3093 enum machine_mode mode; /* In mode. */
3094 unsigned cost; /* The cost. */
3097 /* Counts hash value for the ENTRY. */
3100 mbc_entry_hash (const void *entry)
3102 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3104 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3107 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3110 mbc_entry_eq (const void *entry1, const void *entry2)
3112 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3113 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3115 return (e1->mode == e2->mode
3116 && e1->cst == e2->cst);
3119 /* Returns cost of multiplication by constant CST in MODE. */
3122 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3124 static htab_t costs;
3125 struct mbc_entry **cached, act;
3130 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3134 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3136 return (*cached)->cost;
3138 *cached = XNEW (struct mbc_entry);
3139 (*cached)->mode = mode;
3140 (*cached)->cst = cst;
3143 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3144 gen_int_mode (cst, mode), NULL_RTX, 0);
3148 cost = seq_cost (seq, speed);
3150 if (dump_file && (dump_flags & TDF_DETAILS))
3151 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3152 (int) cst, GET_MODE_NAME (mode), cost);
3154 (*cached)->cost = cost;
3159 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3160 validity for a memory reference accessing memory of mode MODE in
3161 address space AS. */
3163 DEF_VEC_P (sbitmap);
3164 DEF_VEC_ALLOC_P (sbitmap, heap);
3167 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3170 #define MAX_RATIO 128
3171 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3172 static VEC (sbitmap, heap) *valid_mult_list;
3175 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3176 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3178 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3181 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3182 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3186 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3187 sbitmap_zero (valid_mult);
3188 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3189 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3191 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3192 if (memory_address_addr_space_p (mode, addr, as))
3193 SET_BIT (valid_mult, i + MAX_RATIO);
3196 if (dump_file && (dump_flags & TDF_DETAILS))
3198 fprintf (dump_file, " allowed multipliers:");
3199 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3200 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3201 fprintf (dump_file, " %d", (int) i);
3202 fprintf (dump_file, "\n");
3203 fprintf (dump_file, "\n");
3206 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3209 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3212 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3215 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3216 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3217 variable is omitted. Compute the cost for a memory reference that accesses
3218 a memory location of mode MEM_MODE in address space AS.
3220 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3221 size of MEM_MODE / RATIO) is available. To make this determination, we
3222 look at the size of the increment to be made, which is given in CSTEP.
3223 CSTEP may be zero if the step is unknown.
3224 STMT_AFTER_INC is true iff the statement we're looking at is after the
3225 increment of the original biv.
3227 TODO -- there must be some better way. This all is quite crude. */
3231 HOST_WIDE_INT min_offset, max_offset;
3232 unsigned costs[2][2][2][2];
3233 } *address_cost_data;
3235 DEF_VEC_P (address_cost_data);
3236 DEF_VEC_ALLOC_P (address_cost_data, heap);
3239 get_address_cost (bool symbol_present, bool var_present,
3240 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3241 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3242 addr_space_t as, bool speed,
3243 bool stmt_after_inc, bool *may_autoinc)
3245 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3246 static VEC(address_cost_data, heap) *address_cost_data_list;
3247 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3248 address_cost_data data;
3249 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3250 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3251 unsigned cost, acost, complexity;
3252 bool offset_p, ratio_p, autoinc;
3253 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3254 unsigned HOST_WIDE_INT mask;
3257 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3258 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3261 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3265 HOST_WIDE_INT rat, off = 0;
3266 int old_cse_not_expected, width;
3267 unsigned sym_p, var_p, off_p, rat_p, add_c;
3268 rtx seq, addr, base;
3271 data = (address_cost_data) xcalloc (1, sizeof (*data));
3273 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3275 width = GET_MODE_BITSIZE (address_mode) - 1;
3276 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3277 width = HOST_BITS_PER_WIDE_INT - 1;
3278 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3280 for (i = width; i >= 0; i--)
3282 off = -((HOST_WIDE_INT) 1 << i);
3283 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3284 if (memory_address_addr_space_p (mem_mode, addr, as))
3287 data->min_offset = (i == -1? 0 : off);
3289 for (i = width; i >= 0; i--)
3291 off = ((HOST_WIDE_INT) 1 << i) - 1;
3292 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3293 if (memory_address_addr_space_p (mem_mode, addr, as))
3298 data->max_offset = off;
3300 if (dump_file && (dump_flags & TDF_DETAILS))
3302 fprintf (dump_file, "get_address_cost:\n");
3303 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3304 GET_MODE_NAME (mem_mode),
3306 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3307 GET_MODE_NAME (mem_mode),
3312 for (i = 2; i <= MAX_RATIO; i++)
3313 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3319 /* Compute the cost of various addressing modes. */
3321 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3322 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3324 if (HAVE_PRE_DECREMENT)
3326 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3327 has_predec[mem_mode]
3328 = memory_address_addr_space_p (mem_mode, addr, as);
3330 if (HAVE_POST_DECREMENT)
3332 addr = gen_rtx_POST_DEC (address_mode, reg0);
3333 has_postdec[mem_mode]
3334 = memory_address_addr_space_p (mem_mode, addr, as);
3336 if (HAVE_PRE_INCREMENT)
3338 addr = gen_rtx_PRE_INC (address_mode, reg0);
3339 has_preinc[mem_mode]
3340 = memory_address_addr_space_p (mem_mode, addr, as);
3342 if (HAVE_POST_INCREMENT)
3344 addr = gen_rtx_POST_INC (address_mode, reg0);
3345 has_postinc[mem_mode]
3346 = memory_address_addr_space_p (mem_mode, addr, as);
3348 for (i = 0; i < 16; i++)
3351 var_p = (i >> 1) & 1;
3352 off_p = (i >> 2) & 1;
3353 rat_p = (i >> 3) & 1;
3357 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3358 gen_int_mode (rat, address_mode));
3361 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3365 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3366 /* ??? We can run into trouble with some backends by presenting
3367 it with symbols which haven't been properly passed through
3368 targetm.encode_section_info. By setting the local bit, we
3369 enhance the probability of things working. */
3370 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3373 base = gen_rtx_fmt_e (CONST, address_mode,
3375 (PLUS, address_mode, base,
3376 gen_int_mode (off, address_mode)));
3379 base = gen_int_mode (off, address_mode);
3384 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3387 /* To avoid splitting addressing modes, pretend that no cse will
3389 old_cse_not_expected = cse_not_expected;
3390 cse_not_expected = true;
3391 addr = memory_address_addr_space (mem_mode, addr, as);
3392 cse_not_expected = old_cse_not_expected;
3396 acost = seq_cost (seq, speed);
3397 acost += address_cost (addr, mem_mode, as, speed);
3401 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3404 /* On some targets, it is quite expensive to load symbol to a register,
3405 which makes addresses that contain symbols look much more expensive.
3406 However, the symbol will have to be loaded in any case before the
3407 loop (and quite likely we have it in register already), so it does not
3408 make much sense to penalize them too heavily. So make some final
3409 tweaks for the SYMBOL_PRESENT modes:
3411 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3412 var is cheaper, use this mode with small penalty.
3413 If VAR_PRESENT is true, try whether the mode with
3414 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3415 if this is the case, use it. */
3416 add_c = add_cost (address_mode, speed);
3417 for (i = 0; i < 8; i++)
3420 off_p = (i >> 1) & 1;
3421 rat_p = (i >> 2) & 1;
3423 acost = data->costs[0][1][off_p][rat_p] + 1;
3427 if (acost < data->costs[1][var_p][off_p][rat_p])
3428 data->costs[1][var_p][off_p][rat_p] = acost;
3431 if (dump_file && (dump_flags & TDF_DETAILS))
3433 fprintf (dump_file, "Address costs:\n");
3435 for (i = 0; i < 16; i++)
3438 var_p = (i >> 1) & 1;
3439 off_p = (i >> 2) & 1;
3440 rat_p = (i >> 3) & 1;
3442 fprintf (dump_file, " ");
3444 fprintf (dump_file, "sym + ");
3446 fprintf (dump_file, "var + ");
3448 fprintf (dump_file, "cst + ");
3450 fprintf (dump_file, "rat * ");
3452 acost = data->costs[sym_p][var_p][off_p][rat_p];
3453 fprintf (dump_file, "index costs %d\n", acost);
3455 if (has_predec[mem_mode] || has_postdec[mem_mode]
3456 || has_preinc[mem_mode] || has_postinc[mem_mode])
3457 fprintf (dump_file, " May include autoinc/dec\n");
3458 fprintf (dump_file, "\n");
3461 VEC_replace (address_cost_data, address_cost_data_list,
3465 bits = GET_MODE_BITSIZE (address_mode);
3466 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3468 if ((offset >> (bits - 1) & 1))
3473 msize = GET_MODE_SIZE (mem_mode);
3474 autoinc_offset = offset;
3476 autoinc_offset += ratio * cstep;
3477 if (symbol_present || var_present || ratio != 1)
3479 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3481 || (has_postdec[mem_mode] && autoinc_offset == 0
3483 || (has_preinc[mem_mode] && autoinc_offset == msize
3485 || (has_predec[mem_mode] && autoinc_offset == -msize
3486 && msize == -cstep))
3490 offset_p = (s_offset != 0
3491 && data->min_offset <= s_offset
3492 && s_offset <= data->max_offset);
3493 ratio_p = (ratio != 1
3494 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3496 if (ratio != 1 && !ratio_p)
3497 cost += multiply_by_cost (ratio, address_mode, speed);
3499 if (s_offset && !offset_p && !symbol_present)
3500 cost += add_cost (address_mode, speed);
3503 *may_autoinc = autoinc;
3504 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3505 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3506 return new_cost (cost + acost, complexity);
3509 /* Estimates cost of forcing expression EXPR into a variable. */
3512 force_expr_to_var_cost (tree expr, bool speed)
3514 static bool costs_initialized = false;
3515 static unsigned integer_cost [2];
3516 static unsigned symbol_cost [2];
3517 static unsigned address_cost [2];
3519 comp_cost cost0, cost1, cost;
3520 enum machine_mode mode;
3522 if (!costs_initialized)
3524 tree type = build_pointer_type (integer_type_node);
3529 var = create_tmp_var_raw (integer_type_node, "test_var");
3530 TREE_STATIC (var) = 1;
3531 x = produce_memory_decl_rtl (var, NULL);
3532 SET_DECL_RTL (var, x);
3534 addr = build1 (ADDR_EXPR, type, var);
3537 for (i = 0; i < 2; i++)
3539 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3542 symbol_cost[i] = computation_cost (addr, i) + 1;
3545 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3547 build_int_cst (sizetype, 2000)), i) + 1;
3548 if (dump_file && (dump_flags & TDF_DETAILS))
3550 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3551 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3552 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3553 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3554 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3555 fprintf (dump_file, "\n");
3559 costs_initialized = true;
3564 if (SSA_VAR_P (expr))
3567 if (is_gimple_min_invariant (expr))
3569 if (TREE_CODE (expr) == INTEGER_CST)
3570 return new_cost (integer_cost [speed], 0);
3572 if (TREE_CODE (expr) == ADDR_EXPR)
3574 tree obj = TREE_OPERAND (expr, 0);
3576 if (TREE_CODE (obj) == VAR_DECL
3577 || TREE_CODE (obj) == PARM_DECL
3578 || TREE_CODE (obj) == RESULT_DECL)
3579 return new_cost (symbol_cost [speed], 0);
3582 return new_cost (address_cost [speed], 0);
3585 switch (TREE_CODE (expr))
3587 case POINTER_PLUS_EXPR:
3591 op0 = TREE_OPERAND (expr, 0);
3592 op1 = TREE_OPERAND (expr, 1);
3596 if (is_gimple_val (op0))
3599 cost0 = force_expr_to_var_cost (op0, speed);
3601 if (is_gimple_val (op1))
3604 cost1 = force_expr_to_var_cost (op1, speed);
3609 op0 = TREE_OPERAND (expr, 0);
3613 if (is_gimple_val (op0))
3616 cost0 = force_expr_to_var_cost (op0, speed);
3622 /* Just an arbitrary value, FIXME. */
3623 return new_cost (target_spill_cost[speed], 0);
3626 mode = TYPE_MODE (TREE_TYPE (expr));
3627 switch (TREE_CODE (expr))
3629 case POINTER_PLUS_EXPR:
3633 cost = new_cost (add_cost (mode, speed), 0);
3637 if (cst_and_fits_in_hwi (op0))
3638 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3639 else if (cst_and_fits_in_hwi (op1))
3640 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3642 return new_cost (target_spill_cost [speed], 0);
3649 cost = add_costs (cost, cost0);
3650 cost = add_costs (cost, cost1);
3652 /* Bound the cost by target_spill_cost. The parts of complicated
3653 computations often are either loop invariant or at least can
3654 be shared between several iv uses, so letting this grow without
3655 limits would not give reasonable results. */
3656 if (cost.cost > (int) target_spill_cost [speed])
3657 cost.cost = target_spill_cost [speed];
3662 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3663 invariants the computation depends on. */
3666 force_var_cost (struct ivopts_data *data,
3667 tree expr, bitmap *depends_on)
3671 fd_ivopts_data = data;
3672 walk_tree (&expr, find_depends, depends_on, NULL);
3675 return force_expr_to_var_cost (expr, data->speed);
3678 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3679 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3680 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3681 invariants the computation depends on. */
3684 split_address_cost (struct ivopts_data *data,
3685 tree addr, bool *symbol_present, bool *var_present,
3686 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3689 HOST_WIDE_INT bitsize;
3690 HOST_WIDE_INT bitpos;
3692 enum machine_mode mode;
3693 int unsignedp, volatilep;
3695 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3696 &unsignedp, &volatilep, false);
3699 || bitpos % BITS_PER_UNIT != 0
3700 || TREE_CODE (core) != VAR_DECL)
3702 *symbol_present = false;
3703 *var_present = true;
3704 fd_ivopts_data = data;
3705 walk_tree (&addr, find_depends, depends_on, NULL);
3706 return new_cost (target_spill_cost[data->speed], 0);
3709 *offset += bitpos / BITS_PER_UNIT;
3710 if (TREE_STATIC (core)
3711 || DECL_EXTERNAL (core))
3713 *symbol_present = true;
3714 *var_present = false;
3718 *symbol_present = false;
3719 *var_present = true;
3723 /* Estimates cost of expressing difference of addresses E1 - E2 as
3724 var + symbol + offset. The value of offset is added to OFFSET,
3725 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3726 part is missing. DEPENDS_ON is a set of the invariants the computation
3730 ptr_difference_cost (struct ivopts_data *data,
3731 tree e1, tree e2, bool *symbol_present, bool *var_present,
3732 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3734 HOST_WIDE_INT diff = 0;
3735 aff_tree aff_e1, aff_e2;
3738 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3740 if (ptr_difference_const (e1, e2, &diff))
3743 *symbol_present = false;
3744 *var_present = false;
3748 if (integer_zerop (e2))
3749 return split_address_cost (data, TREE_OPERAND (e1, 0),
3750 symbol_present, var_present, offset, depends_on);
3752 *symbol_present = false;
3753 *var_present = true;
3755 type = signed_type_for (TREE_TYPE (e1));
3756 tree_to_aff_combination (e1, type, &aff_e1);
3757 tree_to_aff_combination (e2, type, &aff_e2);
3758 aff_combination_scale (&aff_e2, double_int_minus_one);
3759 aff_combination_add (&aff_e1, &aff_e2);
3761 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3764 /* Estimates cost of expressing difference E1 - E2 as
3765 var + symbol + offset. The value of offset is added to OFFSET,
3766 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3767 part is missing. DEPENDS_ON is a set of the invariants the computation
3771 difference_cost (struct ivopts_data *data,
3772 tree e1, tree e2, bool *symbol_present, bool *var_present,
3773 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3775 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3776 unsigned HOST_WIDE_INT off1, off2;
3777 aff_tree aff_e1, aff_e2;
3780 e1 = strip_offset (e1, &off1);
3781 e2 = strip_offset (e2, &off2);
3782 *offset += off1 - off2;
3787 if (TREE_CODE (e1) == ADDR_EXPR)
3788 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3789 offset, depends_on);
3790 *symbol_present = false;
3792 if (operand_equal_p (e1, e2, 0))
3794 *var_present = false;
3798 *var_present = true;
3800 if (integer_zerop (e2))
3801 return force_var_cost (data, e1, depends_on);
3803 if (integer_zerop (e1))
3805 comp_cost cost = force_var_cost (data, e2, depends_on);
3806 cost.cost += multiply_by_cost (-1, mode, data->speed);
3810 type = signed_type_for (TREE_TYPE (e1));
3811 tree_to_aff_combination (e1, type, &aff_e1);
3812 tree_to_aff_combination (e2, type, &aff_e2);
3813 aff_combination_scale (&aff_e2, double_int_minus_one);
3814 aff_combination_add (&aff_e1, &aff_e2);
3816 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3819 /* Returns true if AFF1 and AFF2 are identical. */
3822 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3826 if (aff1->n != aff2->n)
3829 for (i = 0; i < aff1->n; i++)
3831 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3834 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3840 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3841 requires a new compiler generated temporary. Returns -1 otherwise.
3842 ADDRESS_P is a flag indicating if the expression is for address
3846 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3847 tree cbase, HOST_WIDE_INT ratio,
3850 aff_tree ubase_aff, cbase_aff;
3852 struct iv_inv_expr_ent ent;
3853 struct iv_inv_expr_ent **slot;
3860 if ((TREE_CODE (ubase) == INTEGER_CST)
3861 && (TREE_CODE (cbase) == INTEGER_CST))
3864 /* Strips the constant part. */
3865 if (TREE_CODE (ubase) == PLUS_EXPR
3866 || TREE_CODE (ubase) == MINUS_EXPR
3867 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3869 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3870 ubase = TREE_OPERAND (ubase, 0);
3873 /* Strips the constant part. */
3874 if (TREE_CODE (cbase) == PLUS_EXPR
3875 || TREE_CODE (cbase) == MINUS_EXPR
3876 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3878 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3879 cbase = TREE_OPERAND (cbase, 0);
3884 if (((TREE_CODE (ubase) == SSA_NAME)
3885 || (TREE_CODE (ubase) == ADDR_EXPR
3886 && is_gimple_min_invariant (ubase)))
3887 && (TREE_CODE (cbase) == INTEGER_CST))
3890 if (((TREE_CODE (cbase) == SSA_NAME)
3891 || (TREE_CODE (cbase) == ADDR_EXPR
3892 && is_gimple_min_invariant (cbase)))
3893 && (TREE_CODE (ubase) == INTEGER_CST))
3899 if(operand_equal_p (ubase, cbase, 0))
3902 if (TREE_CODE (ubase) == ADDR_EXPR
3903 && TREE_CODE (cbase) == ADDR_EXPR)
3907 usym = TREE_OPERAND (ubase, 0);
3908 csym = TREE_OPERAND (cbase, 0);
3909 if (TREE_CODE (usym) == ARRAY_REF)
3911 tree ind = TREE_OPERAND (usym, 1);
3912 if (TREE_CODE (ind) == INTEGER_CST
3913 && host_integerp (ind, 0)
3914 && TREE_INT_CST_LOW (ind) == 0)
3915 usym = TREE_OPERAND (usym, 0);
3917 if (TREE_CODE (csym) == ARRAY_REF)
3919 tree ind = TREE_OPERAND (csym, 1);
3920 if (TREE_CODE (ind) == INTEGER_CST
3921 && host_integerp (ind, 0)
3922 && TREE_INT_CST_LOW (ind) == 0)
3923 csym = TREE_OPERAND (csym, 0);
3925 if (operand_equal_p (usym, csym, 0))
3928 /* Now do more complex comparison */
3929 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3930 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3931 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3935 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3936 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3938 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3939 aff_combination_add (&ubase_aff, &cbase_aff);
3940 expr = aff_combination_to_tree (&ubase_aff);
3942 ent.hash = iterative_hash_expr (expr, 0);
3943 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3948 *slot = XNEW (struct iv_inv_expr_ent);
3949 (*slot)->expr = expr;
3950 (*slot)->hash = ent.hash;
3951 (*slot)->id = data->inv_expr_id++;
3957 /* Determines the cost of the computation by that USE is expressed
3958 from induction variable CAND. If ADDRESS_P is true, we just need
3959 to create an address from it, otherwise we want to get it into
3960 register. A set of invariants we depend on is stored in
3961 DEPENDS_ON. AT is the statement at that the value is computed.
3962 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3963 addressing is likely. */
3966 get_computation_cost_at (struct ivopts_data *data,
3967 struct iv_use *use, struct iv_cand *cand,
3968 bool address_p, bitmap *depends_on, gimple at,
3972 tree ubase = use->iv->base, ustep = use->iv->step;
3974 tree utype = TREE_TYPE (ubase), ctype;
3975 unsigned HOST_WIDE_INT cstepi, offset = 0;
3976 HOST_WIDE_INT ratio, aratio;
3977 bool var_present, symbol_present, stmt_is_after_inc;
3980 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3984 /* Only consider real candidates. */
3986 return infinite_cost;
3988 cbase = cand->iv->base;
3989 cstep = cand->iv->step;
3990 ctype = TREE_TYPE (cbase);
3992 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3994 /* We do not have a precision to express the values of use. */
3995 return infinite_cost;
4000 /* Do not try to express address of an object with computation based
4001 on address of a different object. This may cause problems in rtl
4002 level alias analysis (that does not expect this to be happening,
4003 as this is illegal in C), and would be unlikely to be useful
4005 if (use->iv->base_object
4006 && cand->iv->base_object
4007 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4008 return infinite_cost;
4011 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4013 /* TODO -- add direct handling of this case. */
4017 /* CSTEPI is removed from the offset in case statement is after the
4018 increment. If the step is not constant, we use zero instead.
4019 This is a bit imprecise (there is the extra addition), but
4020 redundancy elimination is likely to transform the code so that
4021 it uses value of the variable before increment anyway,
4022 so it is not that much unrealistic. */
4023 if (cst_and_fits_in_hwi (cstep))
4024 cstepi = int_cst_value (cstep);
4028 if (!constant_multiple_of (ustep, cstep, &rat))
4029 return infinite_cost;
4031 if (double_int_fits_in_shwi_p (rat))
4032 ratio = double_int_to_shwi (rat);
4034 return infinite_cost;
4037 ctype = TREE_TYPE (cbase);
4039 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4041 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4042 or ratio == 1, it is better to handle this like
4044 ubase - ratio * cbase + ratio * var
4046 (also holds in the case ratio == -1, TODO. */
4048 if (cst_and_fits_in_hwi (cbase))
4050 offset = - ratio * int_cst_value (cbase);
4051 cost = difference_cost (data,
4052 ubase, build_int_cst (utype, 0),
4053 &symbol_present, &var_present, &offset,
4055 cost.cost /= avg_loop_niter (data->current_loop);
4057 else if (ratio == 1)
4059 tree real_cbase = cbase;
4061 /* Check to see if any adjustment is needed. */
4062 if (cstepi == 0 && stmt_is_after_inc)
4064 aff_tree real_cbase_aff;
4067 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4069 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4071 aff_combination_add (&real_cbase_aff, &cstep_aff);
4072 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4075 cost = difference_cost (data,
4077 &symbol_present, &var_present, &offset,
4079 cost.cost /= avg_loop_niter (data->current_loop);
4082 && !POINTER_TYPE_P (ctype)
4083 && multiplier_allowed_in_address_p
4084 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4085 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4088 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4089 cost = difference_cost (data,
4091 &symbol_present, &var_present, &offset,
4093 cost.cost /= avg_loop_niter (data->current_loop);
4097 cost = force_var_cost (data, cbase, depends_on);
4098 cost = add_costs (cost,
4099 difference_cost (data,
4100 ubase, build_int_cst (utype, 0),
4101 &symbol_present, &var_present,
4102 &offset, depends_on));
4103 cost.cost /= avg_loop_niter (data->current_loop);
4104 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4110 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4111 /* Clear depends on. */
4112 if (*inv_expr_id != -1 && depends_on && *depends_on)
4113 bitmap_clear (*depends_on);
4116 /* If we are after the increment, the value of the candidate is higher by
4118 if (stmt_is_after_inc)
4119 offset -= ratio * cstepi;
4121 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4122 (symbol/var1/const parts may be omitted). If we are looking for an
4123 address, find the cost of addressing this. */
4125 return add_costs (cost,
4126 get_address_cost (symbol_present, var_present,
4127 offset, ratio, cstepi,
4128 TYPE_MODE (TREE_TYPE (utype)),
4129 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4130 speed, stmt_is_after_inc,
4133 /* Otherwise estimate the costs for computing the expression. */
4134 if (!symbol_present && !var_present && !offset)
4137 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4141 /* Symbol + offset should be compile-time computable so consider that they
4142 are added once to the variable, if present. */
4143 if (var_present && (symbol_present || offset))
4144 cost.cost += adjust_setup_cost (data,
4145 add_cost (TYPE_MODE (ctype), speed));
4147 /* Having offset does not affect runtime cost in case it is added to
4148 symbol, but it increases complexity. */
4152 cost.cost += add_cost (TYPE_MODE (ctype), speed);
4154 aratio = ratio > 0 ? ratio : -ratio;
4156 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4161 *can_autoinc = false;
4164 /* Just get the expression, expand it and measure the cost. */
4165 tree comp = get_computation_at (data->current_loop, use, cand, at);
4168 return infinite_cost;
4171 comp = build_simple_mem_ref (comp);
4173 return new_cost (computation_cost (comp, speed), 0);
4177 /* Determines the cost of the computation by that USE is expressed
4178 from induction variable CAND. If ADDRESS_P is true, we just need
4179 to create an address from it, otherwise we want to get it into
4180 register. A set of invariants we depend on is stored in
4181 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4182 autoinc addressing is likely. */
4185 get_computation_cost (struct ivopts_data *data,
4186 struct iv_use *use, struct iv_cand *cand,
4187 bool address_p, bitmap *depends_on,
4188 bool *can_autoinc, int *inv_expr_id)
4190 return get_computation_cost_at (data,
4191 use, cand, address_p, depends_on, use->stmt,
4192 can_autoinc, inv_expr_id);
4195 /* Determines cost of basing replacement of USE on CAND in a generic
4199 determine_use_iv_cost_generic (struct ivopts_data *data,
4200 struct iv_use *use, struct iv_cand *cand)
4204 int inv_expr_id = -1;
4206 /* The simple case first -- if we need to express value of the preserved
4207 original biv, the cost is 0. This also prevents us from counting the
4208 cost of increment twice -- once at this use and once in the cost of
4210 if (cand->pos == IP_ORIGINAL
4211 && cand->incremented_at == use->stmt)
4213 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4217 cost = get_computation_cost (data, use, cand, false, &depends_on,
4218 NULL, &inv_expr_id);
4220 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4223 return !infinite_cost_p (cost);
4226 /* Determines cost of basing replacement of USE on CAND in an address. */
4229 determine_use_iv_cost_address (struct ivopts_data *data,
4230 struct iv_use *use, struct iv_cand *cand)
4234 int inv_expr_id = -1;
4235 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4236 &can_autoinc, &inv_expr_id);
4238 if (cand->ainc_use == use)
4241 cost.cost -= cand->cost_step;
4242 /* If we generated the candidate solely for exploiting autoincrement
4243 opportunities, and it turns out it can't be used, set the cost to
4244 infinity to make sure we ignore it. */
4245 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4246 cost = infinite_cost;
4248 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4251 return !infinite_cost_p (cost);
4254 /* Computes value of candidate CAND at position AT in iteration NITER, and
4255 stores it to VAL. */
4258 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4261 aff_tree step, delta, nit;
4262 struct iv *iv = cand->iv;
4263 tree type = TREE_TYPE (iv->base);
4264 tree steptype = type;
4265 if (POINTER_TYPE_P (type))
4266 steptype = sizetype;
4268 tree_to_aff_combination (iv->step, steptype, &step);
4269 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4270 aff_combination_convert (&nit, steptype);
4271 aff_combination_mult (&nit, &step, &delta);
4272 if (stmt_after_increment (loop, cand, at))
4273 aff_combination_add (&delta, &step);
4275 tree_to_aff_combination (iv->base, type, val);
4276 aff_combination_add (val, &delta);
4279 /* Returns period of induction variable iv. */
4282 iv_period (struct iv *iv)
4284 tree step = iv->step, period, type;
4287 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4289 type = unsigned_type_for (TREE_TYPE (step));
4290 /* Period of the iv is lcm (step, type_range)/step -1,
4291 i.e., N*type_range/step - 1. Since type range is power
4292 of two, N == (step >> num_of_ending_zeros_binary (step),
4293 so the final result is
4295 (type_range >> num_of_ending_zeros_binary (step)) - 1
4298 pow2div = num_ending_zeros (step);
4300 period = build_low_bits_mask (type,
4301 (TYPE_PRECISION (type)
4302 - tree_low_cst (pow2div, 1)));
4307 /* Returns the comparison operator used when eliminating the iv USE. */
4309 static enum tree_code
4310 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4312 struct loop *loop = data->current_loop;
4316 ex_bb = gimple_bb (use->stmt);
4317 exit = EDGE_SUCC (ex_bb, 0);
4318 if (flow_bb_inside_loop_p (loop, exit->dest))
4319 exit = EDGE_SUCC (ex_bb, 1);
4321 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4324 /* Check whether it is possible to express the condition in USE by comparison
4325 of candidate CAND. If so, store the value compared with to BOUND. */
4328 may_eliminate_iv (struct ivopts_data *data,
4329 struct iv_use *use, struct iv_cand *cand, tree *bound)
4334 struct loop *loop = data->current_loop;
4336 struct tree_niter_desc *desc = NULL;
4338 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4341 /* For now works only for exits that dominate the loop latch.
4342 TODO: extend to other conditions inside loop body. */
4343 ex_bb = gimple_bb (use->stmt);
4344 if (use->stmt != last_stmt (ex_bb)
4345 || gimple_code (use->stmt) != GIMPLE_COND
4346 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4349 exit = EDGE_SUCC (ex_bb, 0);
4350 if (flow_bb_inside_loop_p (loop, exit->dest))
4351 exit = EDGE_SUCC (ex_bb, 1);
4352 if (flow_bb_inside_loop_p (loop, exit->dest))
4355 nit = niter_for_exit (data, exit, &desc);
4359 /* Determine whether we can use the variable to test the exit condition.
4360 This is the case iff the period of the induction variable is greater
4361 than the number of iterations for which the exit condition is true. */
4362 period = iv_period (cand->iv);
4364 /* If the number of iterations is constant, compare against it directly. */
4365 if (TREE_CODE (nit) == INTEGER_CST)
4367 /* See cand_value_at. */
4368 if (stmt_after_increment (loop, cand, use->stmt))
4370 if (!tree_int_cst_lt (nit, period))
4375 if (tree_int_cst_lt (period, nit))
4380 /* If not, and if this is the only possible exit of the loop, see whether
4381 we can get a conservative estimate on the number of iterations of the
4382 entire loop and compare against that instead. */
4385 double_int period_value, max_niter;
4387 max_niter = desc->max;
4388 if (stmt_after_increment (loop, cand, use->stmt))
4389 max_niter = double_int_add (max_niter, double_int_one);
4390 period_value = tree_to_double_int (period);
4391 if (double_int_ucmp (max_niter, period_value) > 0)
4393 /* See if we can take advantage of infered loop bound information. */
4394 if (loop_only_exit_p (loop, exit))
4396 if (!estimated_loop_iterations (loop, true, &max_niter))
4398 /* The loop bound is already adjusted by adding 1. */
4399 if (double_int_ucmp (max_niter, period_value) > 0)
4407 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4409 *bound = aff_combination_to_tree (&bnd);
4410 /* It is unlikely that computing the number of iterations using division
4411 would be more profitable than keeping the original induction variable. */
4412 if (expression_expensive_p (*bound))
4418 /* Determines cost of basing replacement of USE on CAND in a condition. */
4421 determine_use_iv_cost_condition (struct ivopts_data *data,
4422 struct iv_use *use, struct iv_cand *cand)
4424 tree bound = NULL_TREE;
4426 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4427 comp_cost elim_cost, express_cost, cost;
4429 int inv_expr_id = -1;
4430 tree *control_var, *bound_cst;
4432 /* Only consider real candidates. */
4435 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4439 /* Try iv elimination. */
4440 if (may_eliminate_iv (data, use, cand, &bound))
4442 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4443 /* The bound is a loop invariant, so it will be only computed
4445 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4448 elim_cost = infinite_cost;
4450 /* Try expressing the original giv. If it is compared with an invariant,
4451 note that we cannot get rid of it. */
4452 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4456 /* When the condition is a comparison of the candidate IV against
4457 zero, prefer this IV.
4459 TODO: The constant that we're substracting from the cost should
4460 be target-dependent. This information should be added to the
4461 target costs for each backend. */
4462 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4463 && integer_zerop (*bound_cst)
4464 && (operand_equal_p (*control_var, cand->var_after, 0)
4465 || operand_equal_p (*control_var, cand->var_before, 0)))
4466 elim_cost.cost -= 1;
4468 express_cost = get_computation_cost (data, use, cand, false,
4469 &depends_on_express, NULL,
4471 fd_ivopts_data = data;
4472 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4474 /* Choose the better approach, preferring the eliminated IV. */
4475 if (compare_costs (elim_cost, express_cost) <= 0)
4478 depends_on = depends_on_elim;
4479 depends_on_elim = NULL;
4483 cost = express_cost;
4484 depends_on = depends_on_express;
4485 depends_on_express = NULL;
4489 set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4491 if (depends_on_elim)
4492 BITMAP_FREE (depends_on_elim);
4493 if (depends_on_express)
4494 BITMAP_FREE (depends_on_express);
4496 return !infinite_cost_p (cost);
4499 /* Determines cost of basing replacement of USE on CAND. Returns false
4500 if USE cannot be based on CAND. */
4503 determine_use_iv_cost (struct ivopts_data *data,
4504 struct iv_use *use, struct iv_cand *cand)
4508 case USE_NONLINEAR_EXPR:
4509 return determine_use_iv_cost_generic (data, use, cand);
4512 return determine_use_iv_cost_address (data, use, cand);
4515 return determine_use_iv_cost_condition (data, use, cand);
4522 /* Return true if get_computation_cost indicates that autoincrement is
4523 a possibility for the pair of USE and CAND, false otherwise. */
4526 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4527 struct iv_cand *cand)
4533 if (use->type != USE_ADDRESS)
4536 cost = get_computation_cost (data, use, cand, true, &depends_on,
4537 &can_autoinc, NULL);
4539 BITMAP_FREE (depends_on);
4541 return !infinite_cost_p (cost) && can_autoinc;
4544 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4545 use that allows autoincrement, and set their AINC_USE if possible. */
4548 set_autoinc_for_original_candidates (struct ivopts_data *data)
4552 for (i = 0; i < n_iv_cands (data); i++)
4554 struct iv_cand *cand = iv_cand (data, i);
4555 struct iv_use *closest = NULL;
4556 if (cand->pos != IP_ORIGINAL)
4558 for (j = 0; j < n_iv_uses (data); j++)
4560 struct iv_use *use = iv_use (data, j);
4561 unsigned uid = gimple_uid (use->stmt);
4562 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4563 || uid > gimple_uid (cand->incremented_at))
4565 if (closest == NULL || uid > gimple_uid (closest->stmt))
4568 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4570 cand->ainc_use = closest;
4574 /* Finds the candidates for the induction variables. */
4577 find_iv_candidates (struct ivopts_data *data)
4579 /* Add commonly used ivs. */
4580 add_standard_iv_candidates (data);
4582 /* Add old induction variables. */
4583 add_old_ivs_candidates (data);
4585 /* Add induction variables derived from uses. */
4586 add_derived_ivs_candidates (data);
4588 set_autoinc_for_original_candidates (data);
4590 /* Record the important candidates. */
4591 record_important_candidates (data);
4594 /* Determines costs of basing the use of the iv on an iv candidate. */
4597 determine_use_iv_costs (struct ivopts_data *data)
4601 struct iv_cand *cand;
4602 bitmap to_clear = BITMAP_ALLOC (NULL);
4604 alloc_use_cost_map (data);
4606 for (i = 0; i < n_iv_uses (data); i++)
4608 use = iv_use (data, i);
4610 if (data->consider_all_candidates)
4612 for (j = 0; j < n_iv_cands (data); j++)
4614 cand = iv_cand (data, j);
4615 determine_use_iv_cost (data, use, cand);
4622 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4624 cand = iv_cand (data, j);
4625 if (!determine_use_iv_cost (data, use, cand))
4626 bitmap_set_bit (to_clear, j);
4629 /* Remove the candidates for that the cost is infinite from
4630 the list of related candidates. */
4631 bitmap_and_compl_into (use->related_cands, to_clear);
4632 bitmap_clear (to_clear);
4636 BITMAP_FREE (to_clear);
4638 if (dump_file && (dump_flags & TDF_DETAILS))
4640 fprintf (dump_file, "Use-candidate costs:\n");
4642 for (i = 0; i < n_iv_uses (data); i++)
4644 use = iv_use (data, i);
4646 fprintf (dump_file, "Use %d:\n", i);
4647 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4648 for (j = 0; j < use->n_map_members; j++)
4650 if (!use->cost_map[j].cand
4651 || infinite_cost_p (use->cost_map[j].cost))
4654 fprintf (dump_file, " %d\t%d\t%d\t",
4655 use->cost_map[j].cand->id,
4656 use->cost_map[j].cost.cost,
4657 use->cost_map[j].cost.complexity);
4658 if (use->cost_map[j].depends_on)
4659 bitmap_print (dump_file,
4660 use->cost_map[j].depends_on, "","");
4661 if (use->cost_map[j].inv_expr_id != -1)
4662 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4663 fprintf (dump_file, "\n");
4666 fprintf (dump_file, "\n");
4668 fprintf (dump_file, "\n");
4672 /* Determines cost of the candidate CAND. */
4675 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4677 comp_cost cost_base;
4678 unsigned cost, cost_step;
4687 /* There are two costs associated with the candidate -- its increment
4688 and its initialization. The second is almost negligible for any loop
4689 that rolls enough, so we take it just very little into account. */
4691 base = cand->iv->base;
4692 cost_base = force_var_cost (data, base, NULL);
4693 /* It will be exceptional that the iv register happens to be initialized with
4694 the proper value at no cost. In general, there will at least be a regcopy
4696 if (cost_base.cost == 0)
4697 cost_base.cost = COSTS_N_INSNS (1);
4698 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4700 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4702 /* Prefer the original ivs unless we may gain something by replacing it.
4703 The reason is to make debugging simpler; so this is not relevant for
4704 artificial ivs created by other optimization passes. */
4705 if (cand->pos != IP_ORIGINAL
4706 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4709 /* Prefer not to insert statements into latch unless there are some
4710 already (so that we do not create unnecessary jumps). */
4711 if (cand->pos == IP_END
4712 && empty_block_p (ip_end_pos (data->current_loop)))
4716 cand->cost_step = cost_step;
4719 /* Determines costs of computation of the candidates. */
4722 determine_iv_costs (struct ivopts_data *data)
4726 if (dump_file && (dump_flags & TDF_DETAILS))
4728 fprintf (dump_file, "Candidate costs:\n");
4729 fprintf (dump_file, " cand\tcost\n");
4732 for (i = 0; i < n_iv_cands (data); i++)
4734 struct iv_cand *cand = iv_cand (data, i);
4736 determine_iv_cost (data, cand);
4738 if (dump_file && (dump_flags & TDF_DETAILS))
4739 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4742 if (dump_file && (dump_flags & TDF_DETAILS))
4743 fprintf (dump_file, "\n");
4746 /* Calculates cost for having SIZE induction variables. */
4749 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4751 /* We add size to the cost, so that we prefer eliminating ivs
4753 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4754 data->body_includes_call);
4757 /* For each size of the induction variable set determine the penalty. */
4760 determine_set_costs (struct ivopts_data *data)
4764 gimple_stmt_iterator psi;
4766 struct loop *loop = data->current_loop;
4769 if (dump_file && (dump_flags & TDF_DETAILS))
4771 fprintf (dump_file, "Global costs:\n");
4772 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4773 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4774 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4775 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4779 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4781 phi = gsi_stmt (psi);
4782 op = PHI_RESULT (phi);
4784 if (!is_gimple_reg (op))
4787 if (get_iv (data, op))
4793 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4795 struct version_info *info = ver_info (data, j);
4797 if (info->inv_id && info->has_nonlin_use)
4801 data->regs_used = n;
4802 if (dump_file && (dump_flags & TDF_DETAILS))
4803 fprintf (dump_file, " regs_used %d\n", n);
4805 if (dump_file && (dump_flags & TDF_DETAILS))
4807 fprintf (dump_file, " cost for size:\n");
4808 fprintf (dump_file, " ivs\tcost\n");
4809 for (j = 0; j <= 2 * target_avail_regs; j++)
4810 fprintf (dump_file, " %d\t%d\n", j,
4811 ivopts_global_cost_for_size (data, j));
4812 fprintf (dump_file, "\n");
4816 /* Returns true if A is a cheaper cost pair than B. */
4819 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4829 cmp = compare_costs (a->cost, b->cost);
4836 /* In case the costs are the same, prefer the cheaper candidate. */
4837 if (a->cand->cost < b->cand->cost)
4844 /* Returns candidate by that USE is expressed in IVS. */
4846 static struct cost_pair *
4847 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4849 return ivs->cand_for_use[use->id];
4852 /* Computes the cost field of IVS structure. */
4855 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4857 comp_cost cost = ivs->cand_use_cost;
4859 cost.cost += ivs->cand_cost;
4861 cost.cost += ivopts_global_cost_for_size (data,
4862 ivs->n_regs + ivs->num_used_inv_expr);
4867 /* Remove invariants in set INVS to set IVS. */
4870 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4878 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4880 ivs->n_invariant_uses[iid]--;
4881 if (ivs->n_invariant_uses[iid] == 0)
4886 /* Set USE not to be expressed by any candidate in IVS. */
4889 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4892 unsigned uid = use->id, cid;
4893 struct cost_pair *cp;
4895 cp = ivs->cand_for_use[uid];
4901 ivs->cand_for_use[uid] = NULL;
4902 ivs->n_cand_uses[cid]--;
4904 if (ivs->n_cand_uses[cid] == 0)
4906 bitmap_clear_bit (ivs->cands, cid);
4907 /* Do not count the pseudocandidates. */
4911 ivs->cand_cost -= cp->cand->cost;
4913 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4916 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4918 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4920 if (cp->inv_expr_id != -1)
4922 ivs->used_inv_expr[cp->inv_expr_id]--;
4923 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
4924 ivs->num_used_inv_expr--;
4926 iv_ca_recount_cost (data, ivs);
4929 /* Add invariants in set INVS to set IVS. */
4932 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4940 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4942 ivs->n_invariant_uses[iid]++;
4943 if (ivs->n_invariant_uses[iid] == 1)
4948 /* Set cost pair for USE in set IVS to CP. */
4951 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4952 struct iv_use *use, struct cost_pair *cp)
4954 unsigned uid = use->id, cid;
4956 if (ivs->cand_for_use[uid] == cp)
4959 if (ivs->cand_for_use[uid])
4960 iv_ca_set_no_cp (data, ivs, use);
4967 ivs->cand_for_use[uid] = cp;
4968 ivs->n_cand_uses[cid]++;
4969 if (ivs->n_cand_uses[cid] == 1)
4971 bitmap_set_bit (ivs->cands, cid);
4972 /* Do not count the pseudocandidates. */
4976 ivs->cand_cost += cp->cand->cost;
4978 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4981 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4982 iv_ca_set_add_invariants (ivs, cp->depends_on);
4984 if (cp->inv_expr_id != -1)
4986 ivs->used_inv_expr[cp->inv_expr_id]++;
4987 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
4988 ivs->num_used_inv_expr++;
4990 iv_ca_recount_cost (data, ivs);
4994 /* Extend set IVS by expressing USE by some of the candidates in it
4995 if possible. All important candidates will be considered
4996 if IMPORTANT_CANDIDATES is true. */
4999 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5000 struct iv_use *use, bool important_candidates)
5002 struct cost_pair *best_cp = NULL, *cp;
5007 gcc_assert (ivs->upto >= use->id);
5009 if (ivs->upto == use->id)
5015 cands = (important_candidates ? data->important_candidates : ivs->cands);
5016 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5018 struct iv_cand *cand = iv_cand (data, i);
5020 cp = get_use_iv_cost (data, use, cand);
5022 if (cheaper_cost_pair (cp, best_cp))
5026 iv_ca_set_cp (data, ivs, use, best_cp);
5029 /* Get cost for assignment IVS. */
5032 iv_ca_cost (struct iv_ca *ivs)
5034 /* This was a conditional expression but it triggered a bug in
5037 return infinite_cost;
5042 /* Returns true if all dependences of CP are among invariants in IVS. */
5045 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5050 if (!cp->depends_on)
5053 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5055 if (ivs->n_invariant_uses[i] == 0)
5062 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5063 it before NEXT_CHANGE. */
5065 static struct iv_ca_delta *
5066 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5067 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5069 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5072 change->old_cp = old_cp;
5073 change->new_cp = new_cp;
5074 change->next_change = next_change;
5079 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5082 static struct iv_ca_delta *
5083 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5085 struct iv_ca_delta *last;
5093 for (last = l1; last->next_change; last = last->next_change)
5095 last->next_change = l2;
5100 /* Reverse the list of changes DELTA, forming the inverse to it. */
5102 static struct iv_ca_delta *
5103 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5105 struct iv_ca_delta *act, *next, *prev = NULL;
5106 struct cost_pair *tmp;
5108 for (act = delta; act; act = next)
5110 next = act->next_change;
5111 act->next_change = prev;
5115 act->old_cp = act->new_cp;
5122 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5123 reverted instead. */
5126 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5127 struct iv_ca_delta *delta, bool forward)
5129 struct cost_pair *from, *to;
5130 struct iv_ca_delta *act;
5133 delta = iv_ca_delta_reverse (delta);
5135 for (act = delta; act; act = act->next_change)
5139 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5140 iv_ca_set_cp (data, ivs, act->use, to);
5144 iv_ca_delta_reverse (delta);
5147 /* Returns true if CAND is used in IVS. */
5150 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5152 return ivs->n_cand_uses[cand->id] > 0;
5155 /* Returns number of induction variable candidates in the set IVS. */
5158 iv_ca_n_cands (struct iv_ca *ivs)
5160 return ivs->n_cands;
5163 /* Free the list of changes DELTA. */
5166 iv_ca_delta_free (struct iv_ca_delta **delta)
5168 struct iv_ca_delta *act, *next;
5170 for (act = *delta; act; act = next)
5172 next = act->next_change;
5179 /* Allocates new iv candidates assignment. */
5181 static struct iv_ca *
5182 iv_ca_new (struct ivopts_data *data)
5184 struct iv_ca *nw = XNEW (struct iv_ca);
5188 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5189 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5190 nw->cands = BITMAP_ALLOC (NULL);
5193 nw->cand_use_cost = zero_cost;
5195 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5196 nw->cost = zero_cost;
5197 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5198 nw->num_used_inv_expr = 0;
5203 /* Free memory occupied by the set IVS. */
5206 iv_ca_free (struct iv_ca **ivs)
5208 free ((*ivs)->cand_for_use);
5209 free ((*ivs)->n_cand_uses);
5210 BITMAP_FREE ((*ivs)->cands);
5211 free ((*ivs)->n_invariant_uses);
5212 free ((*ivs)->used_inv_expr);
5217 /* Dumps IVS to FILE. */
5220 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5222 const char *pref = " invariants ";
5224 comp_cost cost = iv_ca_cost (ivs);
5226 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5227 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5228 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5229 bitmap_print (file, ivs->cands, " candidates: ","\n");
5231 for (i = 0; i < ivs->upto; i++)
5233 struct iv_use *use = iv_use (data, i);
5234 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5236 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5237 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5239 fprintf (file, " use:%d --> ??\n", use->id);
5242 for (i = 1; i <= data->max_inv_id; i++)
5243 if (ivs->n_invariant_uses[i])
5245 fprintf (file, "%s%d", pref, i);
5248 fprintf (file, "\n\n");
5251 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5252 new set, and store differences in DELTA. Number of induction variables
5253 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5254 the function will try to find a solution with mimimal iv candidates. */
5257 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5258 struct iv_cand *cand, struct iv_ca_delta **delta,
5259 unsigned *n_ivs, bool min_ncand)
5264 struct cost_pair *old_cp, *new_cp;
5267 for (i = 0; i < ivs->upto; i++)
5269 use = iv_use (data, i);
5270 old_cp = iv_ca_cand_for_use (ivs, use);
5273 && old_cp->cand == cand)
5276 new_cp = get_use_iv_cost (data, use, cand);
5280 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5283 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5286 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5289 iv_ca_delta_commit (data, ivs, *delta, true);
5290 cost = iv_ca_cost (ivs);
5292 *n_ivs = iv_ca_n_cands (ivs);
5293 iv_ca_delta_commit (data, ivs, *delta, false);
5298 /* Try narrowing set IVS by removing CAND. Return the cost of
5299 the new set and store the differences in DELTA. */
5302 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5303 struct iv_cand *cand, struct iv_ca_delta **delta)
5307 struct cost_pair *old_cp, *new_cp, *cp;
5309 struct iv_cand *cnd;
5313 for (i = 0; i < n_iv_uses (data); i++)
5315 use = iv_use (data, i);
5317 old_cp = iv_ca_cand_for_use (ivs, use);
5318 if (old_cp->cand != cand)
5323 if (data->consider_all_candidates)
5325 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5330 cnd = iv_cand (data, ci);
5332 cp = get_use_iv_cost (data, use, cnd);
5336 if (!iv_ca_has_deps (ivs, cp))
5339 if (!cheaper_cost_pair (cp, new_cp))
5347 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5352 cnd = iv_cand (data, ci);
5354 cp = get_use_iv_cost (data, use, cnd);
5357 if (!iv_ca_has_deps (ivs, cp))
5360 if (!cheaper_cost_pair (cp, new_cp))
5369 iv_ca_delta_free (delta);
5370 return infinite_cost;
5373 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5376 iv_ca_delta_commit (data, ivs, *delta, true);
5377 cost = iv_ca_cost (ivs);
5378 iv_ca_delta_commit (data, ivs, *delta, false);
5383 /* Try optimizing the set of candidates IVS by removing candidates different
5384 from to EXCEPT_CAND from it. Return cost of the new set, and store
5385 differences in DELTA. */
5388 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5389 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5392 struct iv_ca_delta *act_delta, *best_delta;
5394 comp_cost best_cost, acost;
5395 struct iv_cand *cand;
5398 best_cost = iv_ca_cost (ivs);
5400 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5402 cand = iv_cand (data, i);
5404 if (cand == except_cand)
5407 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5409 if (compare_costs (acost, best_cost) < 0)
5412 iv_ca_delta_free (&best_delta);
5413 best_delta = act_delta;
5416 iv_ca_delta_free (&act_delta);
5425 /* Recurse to possibly remove other unnecessary ivs. */
5426 iv_ca_delta_commit (data, ivs, best_delta, true);
5427 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5428 iv_ca_delta_commit (data, ivs, best_delta, false);
5429 *delta = iv_ca_delta_join (best_delta, *delta);
5433 /* Tries to extend the sets IVS in the best possible way in order
5434 to express the USE. If ORIGINALP is true, prefer candidates from
5435 the original set of IVs, otherwise favor important candidates not
5436 based on any memory object. */
5439 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5440 struct iv_use *use, bool originalp)
5442 comp_cost best_cost, act_cost;
5445 struct iv_cand *cand;
5446 struct iv_ca_delta *best_delta = NULL, *act_delta;
5447 struct cost_pair *cp;
5449 iv_ca_add_use (data, ivs, use, false);
5450 best_cost = iv_ca_cost (ivs);
5452 cp = iv_ca_cand_for_use (ivs, use);
5457 iv_ca_add_use (data, ivs, use, true);
5458 best_cost = iv_ca_cost (ivs);
5459 cp = iv_ca_cand_for_use (ivs, use);
5463 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5464 iv_ca_set_no_cp (data, ivs, use);
5467 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5468 first try important candidates not based on any memory object. Only if
5469 this fails, try the specific ones. Rationale -- in loops with many
5470 variables the best choice often is to use just one generic biv. If we
5471 added here many ivs specific to the uses, the optimization algorithm later
5472 would be likely to get stuck in a local minimum, thus causing us to create
5473 too many ivs. The approach from few ivs to more seems more likely to be
5474 successful -- starting from few ivs, replacing an expensive use by a
5475 specific iv should always be a win. */
5476 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5478 cand = iv_cand (data, i);
5480 if (originalp && cand->pos !=IP_ORIGINAL)
5483 if (!originalp && cand->iv->base_object != NULL_TREE)
5486 if (iv_ca_cand_used_p (ivs, cand))
5489 cp = get_use_iv_cost (data, use, cand);
5493 iv_ca_set_cp (data, ivs, use, cp);
5494 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5496 iv_ca_set_no_cp (data, ivs, use);
5497 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5499 if (compare_costs (act_cost, best_cost) < 0)
5501 best_cost = act_cost;
5503 iv_ca_delta_free (&best_delta);
5504 best_delta = act_delta;
5507 iv_ca_delta_free (&act_delta);
5510 if (infinite_cost_p (best_cost))
5512 for (i = 0; i < use->n_map_members; i++)
5514 cp = use->cost_map + i;
5519 /* Already tried this. */
5520 if (cand->important)
5522 if (originalp && cand->pos == IP_ORIGINAL)
5524 if (!originalp && cand->iv->base_object == NULL_TREE)
5528 if (iv_ca_cand_used_p (ivs, cand))
5532 iv_ca_set_cp (data, ivs, use, cp);
5533 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5534 iv_ca_set_no_cp (data, ivs, use);
5535 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5538 if (compare_costs (act_cost, best_cost) < 0)
5540 best_cost = act_cost;
5543 iv_ca_delta_free (&best_delta);
5544 best_delta = act_delta;
5547 iv_ca_delta_free (&act_delta);
5551 iv_ca_delta_commit (data, ivs, best_delta, true);
5552 iv_ca_delta_free (&best_delta);
5554 return !infinite_cost_p (best_cost);
5557 /* Finds an initial assignment of candidates to uses. */
5559 static struct iv_ca *
5560 get_initial_solution (struct ivopts_data *data, bool originalp)
5562 struct iv_ca *ivs = iv_ca_new (data);
5565 for (i = 0; i < n_iv_uses (data); i++)
5566 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5575 /* Tries to improve set of induction variables IVS. */
5578 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5581 comp_cost acost, best_cost = iv_ca_cost (ivs);
5582 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5583 struct iv_cand *cand;
5585 /* Try extending the set of induction variables by one. */
5586 for (i = 0; i < n_iv_cands (data); i++)
5588 cand = iv_cand (data, i);
5590 if (iv_ca_cand_used_p (ivs, cand))
5593 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5597 /* If we successfully added the candidate and the set is small enough,
5598 try optimizing it by removing other candidates. */
5599 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5601 iv_ca_delta_commit (data, ivs, act_delta, true);
5602 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5603 iv_ca_delta_commit (data, ivs, act_delta, false);
5604 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5607 if (compare_costs (acost, best_cost) < 0)
5610 iv_ca_delta_free (&best_delta);
5611 best_delta = act_delta;
5614 iv_ca_delta_free (&act_delta);
5619 /* Try removing the candidates from the set instead. */
5620 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5622 /* Nothing more we can do. */
5627 iv_ca_delta_commit (data, ivs, best_delta, true);
5628 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5629 iv_ca_delta_free (&best_delta);
5633 /* Attempts to find the optimal set of induction variables. We do simple
5634 greedy heuristic -- we try to replace at most one candidate in the selected
5635 solution and remove the unused ivs while this improves the cost. */
5637 static struct iv_ca *
5638 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5642 /* Get the initial solution. */
5643 set = get_initial_solution (data, originalp);
5646 if (dump_file && (dump_flags & TDF_DETAILS))
5647 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5651 if (dump_file && (dump_flags & TDF_DETAILS))
5653 fprintf (dump_file, "Initial set of candidates:\n");
5654 iv_ca_dump (data, dump_file, set);
5657 while (try_improve_iv_set (data, set))
5659 if (dump_file && (dump_flags & TDF_DETAILS))
5661 fprintf (dump_file, "Improved to:\n");
5662 iv_ca_dump (data, dump_file, set);
5669 static struct iv_ca *
5670 find_optimal_iv_set (struct ivopts_data *data)
5673 struct iv_ca *set, *origset;
5675 comp_cost cost, origcost;
5677 /* Determine the cost based on a strategy that starts with original IVs,
5678 and try again using a strategy that prefers candidates not based
5680 origset = find_optimal_iv_set_1 (data, true);
5681 set = find_optimal_iv_set_1 (data, false);
5683 if (!origset && !set)
5686 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5687 cost = set ? iv_ca_cost (set) : infinite_cost;
5689 if (dump_file && (dump_flags & TDF_DETAILS))
5691 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5692 origcost.cost, origcost.complexity);
5693 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5694 cost.cost, cost.complexity);
5697 /* Choose the one with the best cost. */
5698 if (compare_costs (origcost, cost) <= 0)
5705 iv_ca_free (&origset);
5707 for (i = 0; i < n_iv_uses (data); i++)
5709 use = iv_use (data, i);
5710 use->selected = iv_ca_cand_for_use (set, use)->cand;
5716 /* Creates a new induction variable corresponding to CAND. */
5719 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5721 gimple_stmt_iterator incr_pos;
5731 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5735 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5743 incr_pos = gsi_for_stmt (cand->incremented_at);
5747 /* Mark that the iv is preserved. */
5748 name_info (data, cand->var_before)->preserve_biv = true;
5749 name_info (data, cand->var_after)->preserve_biv = true;
5751 /* Rewrite the increment so that it uses var_before directly. */
5752 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5756 gimple_add_tmp_var (cand->var_before);
5757 add_referenced_var (cand->var_before);
5759 base = unshare_expr (cand->iv->base);
5761 create_iv (base, unshare_expr (cand->iv->step),
5762 cand->var_before, data->current_loop,
5763 &incr_pos, after, &cand->var_before, &cand->var_after);
5766 /* Creates new induction variables described in SET. */
5769 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5772 struct iv_cand *cand;
5775 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5777 cand = iv_cand (data, i);
5778 create_new_iv (data, cand);
5781 if (dump_file && (dump_flags & TDF_DETAILS))
5783 fprintf (dump_file, "\nSelected IV set: \n");
5784 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5786 cand = iv_cand (data, i);
5787 dump_cand (dump_file, cand);
5789 fprintf (dump_file, "\n");
5793 /* Rewrites USE (definition of iv used in a nonlinear expression)
5794 using candidate CAND. */
5797 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5798 struct iv_use *use, struct iv_cand *cand)
5803 gimple_stmt_iterator bsi;
5805 /* An important special case -- if we are asked to express value of
5806 the original iv by itself, just exit; there is no need to
5807 introduce a new computation (that might also need casting the
5808 variable to unsigned and back). */
5809 if (cand->pos == IP_ORIGINAL
5810 && cand->incremented_at == use->stmt)
5812 tree step, ctype, utype;
5813 enum tree_code incr_code = PLUS_EXPR, old_code;
5815 gcc_assert (is_gimple_assign (use->stmt));
5816 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5818 step = cand->iv->step;
5819 ctype = TREE_TYPE (step);
5820 utype = TREE_TYPE (cand->var_after);
5821 if (TREE_CODE (step) == NEGATE_EXPR)
5823 incr_code = MINUS_EXPR;
5824 step = TREE_OPERAND (step, 0);
5827 /* Check whether we may leave the computation unchanged.
5828 This is the case only if it does not rely on other
5829 computations in the loop -- otherwise, the computation
5830 we rely upon may be removed in remove_unused_ivs,
5831 thus leading to ICE. */
5832 old_code = gimple_assign_rhs_code (use->stmt);
5833 if (old_code == PLUS_EXPR
5834 || old_code == MINUS_EXPR
5835 || old_code == POINTER_PLUS_EXPR)
5837 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5838 op = gimple_assign_rhs2 (use->stmt);
5839 else if (old_code != MINUS_EXPR
5840 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5841 op = gimple_assign_rhs1 (use->stmt);
5849 && (TREE_CODE (op) == INTEGER_CST
5850 || operand_equal_p (op, step, 0)))
5853 /* Otherwise, add the necessary computations to express
5855 op = fold_convert (ctype, cand->var_before);
5856 comp = fold_convert (utype,
5857 build2 (incr_code, ctype, op,
5858 unshare_expr (step)));
5862 comp = get_computation (data->current_loop, use, cand);
5863 gcc_assert (comp != NULL_TREE);
5866 switch (gimple_code (use->stmt))
5869 tgt = PHI_RESULT (use->stmt);
5871 /* If we should keep the biv, do not replace it. */
5872 if (name_info (data, tgt)->preserve_biv)
5875 bsi = gsi_after_labels (gimple_bb (use->stmt));
5879 tgt = gimple_assign_lhs (use->stmt);
5880 bsi = gsi_for_stmt (use->stmt);
5887 if (!valid_gimple_rhs_p (comp)
5888 || (gimple_code (use->stmt) != GIMPLE_PHI
5889 /* We can't allow re-allocating the stmt as it might be pointed
5891 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5892 >= gimple_num_ops (gsi_stmt (bsi)))))
5894 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5895 true, GSI_SAME_STMT);
5896 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5898 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5899 /* As this isn't a plain copy we have to reset alignment
5901 if (SSA_NAME_PTR_INFO (comp))
5903 SSA_NAME_PTR_INFO (comp)->align = 1;
5904 SSA_NAME_PTR_INFO (comp)->misalign = 0;
5909 if (gimple_code (use->stmt) == GIMPLE_PHI)
5911 ass = gimple_build_assign (tgt, comp);
5912 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5914 bsi = gsi_for_stmt (use->stmt);
5915 remove_phi_node (&bsi, false);
5919 gimple_assign_set_rhs_from_tree (&bsi, comp);
5920 use->stmt = gsi_stmt (bsi);
5924 /* Copies the reference information from OLD_REF to NEW_REF. */
5927 copy_ref_info (tree new_ref, tree old_ref)
5929 tree new_ptr_base = NULL_TREE;
5931 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5932 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5934 new_ptr_base = TREE_OPERAND (new_ref, 0);
5936 /* We can transfer points-to information from an old pointer
5937 or decl base to the new one. */
5939 && TREE_CODE (new_ptr_base) == SSA_NAME
5940 && !SSA_NAME_PTR_INFO (new_ptr_base))
5942 tree base = get_base_address (old_ref);
5945 else if ((TREE_CODE (base) == MEM_REF
5946 || TREE_CODE (base) == TARGET_MEM_REF)
5947 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
5948 && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
5950 struct ptr_info_def *new_pi;
5951 duplicate_ssa_name_ptr_info
5952 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5953 new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
5954 /* We have to be careful about transfering alignment information. */
5955 if (TREE_CODE (old_ref) == MEM_REF
5956 && !(TREE_CODE (new_ref) == TARGET_MEM_REF
5957 && (TMR_INDEX2 (new_ref)
5958 || (TMR_STEP (new_ref)
5959 && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
5960 < new_pi->align)))))
5962 new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
5963 mem_ref_offset (new_ref)).low;
5964 new_pi->misalign &= (new_pi->align - 1);
5969 new_pi->misalign = 0;
5972 else if (TREE_CODE (base) == VAR_DECL
5973 || TREE_CODE (base) == PARM_DECL
5974 || TREE_CODE (base) == RESULT_DECL)
5976 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5977 pt_solution_set_var (&pi->pt, base);
5982 /* Performs a peephole optimization to reorder the iv update statement with
5983 a mem ref to enable instruction combining in later phases. The mem ref uses
5984 the iv value before the update, so the reordering transformation requires
5985 adjustment of the offset. CAND is the selected IV_CAND.
5989 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5997 directly propagating t over to (1) will introduce overlapping live range
5998 thus increase register pressure. This peephole transform it into:
6002 t = MEM_REF (base, iv2, 8, 8);
6009 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6012 gimple iv_update, stmt;
6014 gimple_stmt_iterator gsi, gsi_iv;
6016 if (cand->pos != IP_NORMAL)
6019 var_after = cand->var_after;
6020 iv_update = SSA_NAME_DEF_STMT (var_after);
6022 bb = gimple_bb (iv_update);
6023 gsi = gsi_last_nondebug_bb (bb);
6024 stmt = gsi_stmt (gsi);
6026 /* Only handle conditional statement for now. */
6027 if (gimple_code (stmt) != GIMPLE_COND)
6030 gsi_prev_nondebug (&gsi);
6031 stmt = gsi_stmt (gsi);
6032 if (stmt != iv_update)
6035 gsi_prev_nondebug (&gsi);
6036 if (gsi_end_p (gsi))
6039 stmt = gsi_stmt (gsi);
6040 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6043 if (stmt != use->stmt)
6046 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6049 if (dump_file && (dump_flags & TDF_DETAILS))
6051 fprintf (dump_file, "Reordering \n");
6052 print_gimple_stmt (dump_file, iv_update, 0, 0);
6053 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6054 fprintf (dump_file, "\n");
6057 gsi = gsi_for_stmt (use->stmt);
6058 gsi_iv = gsi_for_stmt (iv_update);
6059 gsi_move_before (&gsi_iv, &gsi);
6061 cand->pos = IP_BEFORE_USE;
6062 cand->incremented_at = use->stmt;
6065 /* Rewrites USE (address that is an iv) using candidate CAND. */
6068 rewrite_use_address (struct ivopts_data *data,
6069 struct iv_use *use, struct iv_cand *cand)
6072 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6073 tree base_hint = NULL_TREE;
6077 adjust_iv_update_pos (cand, use);
6078 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6080 unshare_aff_combination (&aff);
6082 /* To avoid undefined overflow problems, all IV candidates use unsigned
6083 integer types. The drawback is that this makes it impossible for
6084 create_mem_ref to distinguish an IV that is based on a memory object
6085 from one that represents simply an offset.
6087 To work around this problem, we pass a hint to create_mem_ref that
6088 indicates which variable (if any) in aff is an IV based on a memory
6089 object. Note that we only consider the candidate. If this is not
6090 based on an object, the base of the reference is in some subexpression
6091 of the use -- but these will use pointer types, so they are recognized
6092 by the create_mem_ref heuristics anyway. */
6093 if (cand->iv->base_object)
6094 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6096 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6097 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6098 reference_alias_ptr_type (*use->op_p),
6099 iv, base_hint, data->speed);
6100 copy_ref_info (ref, *use->op_p);
6104 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6108 rewrite_use_compare (struct ivopts_data *data,
6109 struct iv_use *use, struct iv_cand *cand)
6111 tree comp, *var_p, op, bound;
6112 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6113 enum tree_code compare;
6114 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6120 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6121 tree var_type = TREE_TYPE (var);
6124 if (dump_file && (dump_flags & TDF_DETAILS))
6126 fprintf (dump_file, "Replacing exit test: ");
6127 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6129 compare = iv_elimination_compare (data, use);
6130 bound = unshare_expr (fold_convert (var_type, bound));
6131 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6133 gsi_insert_seq_on_edge_immediate (
6134 loop_preheader_edge (data->current_loop),
6137 gimple_cond_set_lhs (use->stmt, var);
6138 gimple_cond_set_code (use->stmt, compare);
6139 gimple_cond_set_rhs (use->stmt, op);
6143 /* The induction variable elimination failed; just express the original
6145 comp = get_computation (data->current_loop, use, cand);
6146 gcc_assert (comp != NULL_TREE);
6148 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6151 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6152 true, GSI_SAME_STMT);
6155 /* Rewrites USE using candidate CAND. */
6158 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6162 case USE_NONLINEAR_EXPR:
6163 rewrite_use_nonlinear_expr (data, use, cand);
6167 rewrite_use_address (data, use, cand);
6171 rewrite_use_compare (data, use, cand);
6178 update_stmt (use->stmt);
6181 /* Rewrite the uses using the selected induction variables. */
6184 rewrite_uses (struct ivopts_data *data)
6187 struct iv_cand *cand;
6190 for (i = 0; i < n_iv_uses (data); i++)
6192 use = iv_use (data, i);
6193 cand = use->selected;
6196 rewrite_use (data, use, cand);
6200 /* Removes the ivs that are not used after rewriting. */
6203 remove_unused_ivs (struct ivopts_data *data)
6207 bitmap toremove = BITMAP_ALLOC (NULL);
6209 /* Figure out an order in which to release SSA DEFs so that we don't
6210 release something that we'd have to propagate into a debug stmt
6212 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6214 struct version_info *info;
6216 info = ver_info (data, j);
6218 && !integer_zerop (info->iv->step)
6220 && !info->iv->have_use_for
6221 && !info->preserve_biv)
6222 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6225 release_defs_bitset (toremove);
6227 BITMAP_FREE (toremove);
6230 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6231 for pointer_map_traverse. */
6234 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6235 void *data ATTRIBUTE_UNUSED)
6237 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6243 /* Frees data allocated by the optimization of a single loop. */
6246 free_loop_data (struct ivopts_data *data)
6254 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6255 pointer_map_destroy (data->niters);
6256 data->niters = NULL;
6259 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6261 struct version_info *info;
6263 info = ver_info (data, i);
6266 info->has_nonlin_use = false;
6267 info->preserve_biv = false;
6270 bitmap_clear (data->relevant);
6271 bitmap_clear (data->important_candidates);
6273 for (i = 0; i < n_iv_uses (data); i++)
6275 struct iv_use *use = iv_use (data, i);
6278 BITMAP_FREE (use->related_cands);
6279 for (j = 0; j < use->n_map_members; j++)
6280 if (use->cost_map[j].depends_on)
6281 BITMAP_FREE (use->cost_map[j].depends_on);
6282 free (use->cost_map);
6285 VEC_truncate (iv_use_p, data->iv_uses, 0);
6287 for (i = 0; i < n_iv_cands (data); i++)
6289 struct iv_cand *cand = iv_cand (data, i);
6292 if (cand->depends_on)
6293 BITMAP_FREE (cand->depends_on);
6296 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6298 if (data->version_info_size < num_ssa_names)
6300 data->version_info_size = 2 * num_ssa_names;
6301 free (data->version_info);
6302 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6305 data->max_inv_id = 0;
6307 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6308 SET_DECL_RTL (obj, NULL_RTX);
6310 VEC_truncate (tree, decl_rtl_to_reset, 0);
6312 htab_empty (data->inv_expr_tab);
6313 data->inv_expr_id = 0;
6316 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6320 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6322 free_loop_data (data);
6323 free (data->version_info);
6324 BITMAP_FREE (data->relevant);
6325 BITMAP_FREE (data->important_candidates);
6327 VEC_free (tree, heap, decl_rtl_to_reset);
6328 VEC_free (iv_use_p, heap, data->iv_uses);
6329 VEC_free (iv_cand_p, heap, data->iv_candidates);
6330 htab_delete (data->inv_expr_tab);
6333 /* Returns true if the loop body BODY includes any function calls. */
6336 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6338 gimple_stmt_iterator gsi;
6341 for (i = 0; i < num_nodes; i++)
6342 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6344 gimple stmt = gsi_stmt (gsi);
6345 if (is_gimple_call (stmt)
6346 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6352 /* Optimizes the LOOP. Returns true if anything changed. */
6355 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6357 bool changed = false;
6358 struct iv_ca *iv_ca;
6362 gcc_assert (!data->niters);
6363 data->current_loop = loop;
6364 data->speed = optimize_loop_for_speed_p (loop);
6366 if (dump_file && (dump_flags & TDF_DETAILS))
6368 fprintf (dump_file, "Processing loop %d\n", loop->num);
6370 exit = single_dom_exit (loop);
6373 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6374 exit->src->index, exit->dest->index);
6375 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6376 fprintf (dump_file, "\n");
6379 fprintf (dump_file, "\n");
6382 body = get_loop_body (loop);
6383 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6384 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6387 /* For each ssa name determines whether it behaves as an induction variable
6389 if (!find_induction_variables (data))
6392 /* Finds interesting uses (item 1). */
6393 find_interesting_uses (data);
6394 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6397 /* Finds candidates for the induction variables (item 2). */
6398 find_iv_candidates (data);
6400 /* Calculates the costs (item 3, part 1). */
6401 determine_iv_costs (data);
6402 determine_use_iv_costs (data);
6403 determine_set_costs (data);
6405 /* Find the optimal set of induction variables (item 3, part 2). */
6406 iv_ca = find_optimal_iv_set (data);
6411 /* Create the new induction variables (item 4, part 1). */
6412 create_new_ivs (data, iv_ca);
6413 iv_ca_free (&iv_ca);
6415 /* Rewrite the uses (item 4, part 2). */
6416 rewrite_uses (data);
6418 /* Remove the ivs that are unused after rewriting. */
6419 remove_unused_ivs (data);
6421 /* We have changed the structure of induction variables; it might happen
6422 that definitions in the scev database refer to some of them that were
6427 free_loop_data (data);
6432 /* Main entry point. Optimizes induction variables in loops. */
6435 tree_ssa_iv_optimize (void)
6438 struct ivopts_data data;
6441 tree_ssa_iv_optimize_init (&data);
6443 /* Optimize the loops starting with the innermost ones. */
6444 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6446 if (dump_file && (dump_flags & TDF_DETAILS))
6447 flow_loop_dump (loop, dump_file, NULL, 1);
6449 tree_ssa_iv_optimize_loop (&data, loop);
6452 tree_ssa_iv_optimize_finalize (&data);