1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
86 #include "pointer-set.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* Cost of a computation. */
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
148 /* The candidate - cost pair. */
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand *selected;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
187 /* The induction variable candidate. */
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
207 /* The data used by the induction variable optimizations. */
209 typedef struct iv_use *iv_use_p;
211 DEF_VEC_ALLOC_P(iv_use_p,heap);
213 typedef struct iv_cand *iv_cand_p;
214 DEF_VEC_P(iv_cand_p);
215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
219 /* The currently optimized loop. */
220 struct loop *current_loop;
222 /* Are we optimizing for speed? */
225 /* Number of registers used in it. */
228 /* Numbers of iterations for all exits of the current loop. */
229 struct pointer_map_t *niters;
231 /* The size of version_info array allocated. */
232 unsigned version_info_size;
234 /* The array of information for the ssa names. */
235 struct version_info *version_info;
237 /* The bitmap of indices in version_info whose value was changed. */
240 /* The maximum invariant id. */
243 /* The uses of induction variables. */
244 VEC(iv_use_p,heap) *iv_uses;
246 /* The candidates. */
247 VEC(iv_cand_p,heap) *iv_candidates;
249 /* A bitmap of important candidates. */
250 bitmap important_candidates;
252 /* Whether to consider just related and important candidates when replacing a
254 bool consider_all_candidates;
257 /* An assignment of iv candidates to uses. */
261 /* The number of uses covered by the assignment. */
264 /* Number of uses that cannot be expressed by the candidates in the set. */
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair **cand_for_use;
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses;
273 /* The candidates used. */
276 /* The number of candidates in the set. */
279 /* Total number of registers needed. */
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost;
285 /* Total cost of candidates. */
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses;
291 /* Total cost of the assignment. */
295 /* Difference of two iv candidate assignments. */
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair *old_cp;
305 /* A new assignment. */
306 struct cost_pair *new_cp;
308 /* Next change in the list. */
309 struct iv_ca_delta *next_change;
312 /* Bound on number of candidates below that all candidates are considered. */
314 #define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
317 /* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
320 #define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
323 /* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
326 #define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
329 /* The list of trees for that the decl_rtl field must be reset is stored
332 static VEC(tree,heap) *decl_rtl_to_reset;
334 /* Number of uses recorded in DATA. */
336 static inline unsigned
337 n_iv_uses (struct ivopts_data *data)
339 return VEC_length (iv_use_p, data->iv_uses);
342 /* Ith use recorded in DATA. */
344 static inline struct iv_use *
345 iv_use (struct ivopts_data *data, unsigned i)
347 return VEC_index (iv_use_p, data->iv_uses, i);
350 /* Number of candidates recorded in DATA. */
352 static inline unsigned
353 n_iv_cands (struct ivopts_data *data)
355 return VEC_length (iv_cand_p, data->iv_candidates);
358 /* Ith candidate recorded in DATA. */
360 static inline struct iv_cand *
361 iv_cand (struct ivopts_data *data, unsigned i)
363 return VEC_index (iv_cand_p, data->iv_candidates, i);
366 /* The single loop exit if it dominates the latch, NULL otherwise. */
369 single_dom_exit (struct loop *loop)
371 edge exit = single_exit (loop);
376 if (!just_once_each_iteration_p (loop, exit->src))
382 /* Dumps information about the induction variable IV to FILE. */
384 extern void dump_iv (FILE *, struct iv *);
386 dump_iv (FILE *file, struct iv *iv)
390 fprintf (file, "ssa name ");
391 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " type ");
396 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
397 fprintf (file, "\n");
401 fprintf (file, " base ");
402 print_generic_expr (file, iv->base, TDF_SLIM);
403 fprintf (file, "\n");
405 fprintf (file, " step ");
406 print_generic_expr (file, iv->step, TDF_SLIM);
407 fprintf (file, "\n");
411 fprintf (file, " invariant ");
412 print_generic_expr (file, iv->base, TDF_SLIM);
413 fprintf (file, "\n");
418 fprintf (file, " base object ");
419 print_generic_expr (file, iv->base_object, TDF_SLIM);
420 fprintf (file, "\n");
424 fprintf (file, " is a biv\n");
427 /* Dumps information about the USE to FILE. */
429 extern void dump_use (FILE *, struct iv_use *);
431 dump_use (FILE *file, struct iv_use *use)
433 fprintf (file, "use %d\n", use->id);
437 case USE_NONLINEAR_EXPR:
438 fprintf (file, " generic\n");
442 fprintf (file, " address\n");
446 fprintf (file, " compare\n");
453 fprintf (file, " in statement ");
454 print_gimple_stmt (file, use->stmt, 0, 0);
455 fprintf (file, "\n");
457 fprintf (file, " at position ");
459 print_generic_expr (file, *use->op_p, TDF_SLIM);
460 fprintf (file, "\n");
462 dump_iv (file, use->iv);
464 if (use->related_cands)
466 fprintf (file, " related candidates ");
467 dump_bitmap (file, use->related_cands);
471 /* Dumps information about the uses to FILE. */
473 extern void dump_uses (FILE *, struct ivopts_data *);
475 dump_uses (FILE *file, struct ivopts_data *data)
480 for (i = 0; i < n_iv_uses (data); i++)
482 use = iv_use (data, i);
484 dump_use (file, use);
485 fprintf (file, "\n");
489 /* Dumps information about induction variable candidate CAND to FILE. */
491 extern void dump_cand (FILE *, struct iv_cand *);
493 dump_cand (FILE *file, struct iv_cand *cand)
495 struct iv *iv = cand->iv;
497 fprintf (file, "candidate %d%s\n",
498 cand->id, cand->important ? " (important)" : "");
500 if (cand->depends_on)
502 fprintf (file, " depends on ");
503 dump_bitmap (file, cand->depends_on);
508 fprintf (file, " final value replacement\n");
515 fprintf (file, " incremented before exit test\n");
519 fprintf (file, " incremented at end\n");
523 fprintf (file, " original biv\n");
530 /* Returns the info for ssa version VER. */
532 static inline struct version_info *
533 ver_info (struct ivopts_data *data, unsigned ver)
535 return data->version_info + ver;
538 /* Returns the info for ssa name NAME. */
540 static inline struct version_info *
541 name_info (struct ivopts_data *data, tree name)
543 return ver_info (data, SSA_NAME_VERSION (name));
546 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
550 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
552 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
556 if (sbb == loop->latch)
562 return stmt == last_stmt (bb);
565 /* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
569 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
571 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
578 if (stmt_bb != cand_bb)
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
585 if (gsi_stmt (bsi) == cand->incremented_at)
587 if (gsi_stmt (bsi) == stmt)
592 /* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
596 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
604 return stmt_after_ip_normal_pos (loop, stmt);
607 return stmt_after_ip_original_pos (cand, stmt);
614 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
617 abnormal_ssa_name_p (tree exp)
622 if (TREE_CODE (exp) != SSA_NAME)
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
628 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
632 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
633 void *data ATTRIBUTE_UNUSED)
635 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
637 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
639 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
643 return !abnormal_ssa_name_p (*index);
646 /* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
650 contains_abnormal_ssa_name_p (tree expr)
653 enum tree_code_class codeclass;
658 code = TREE_CODE (expr);
659 codeclass = TREE_CODE_CLASS (code);
661 if (code == SSA_NAME)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
664 if (code == INTEGER_CST
665 || is_gimple_min_invariant (expr))
668 if (code == ADDR_EXPR)
669 return !for_each_index (&TREE_OPERAND (expr, 0),
670 idx_contains_abnormal_ssa_name_p,
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
694 /* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
698 niter_for_exit (struct ivopts_data *data, edge exit)
700 struct tree_niter_desc desc;
706 data->niters = pointer_map_create ();
710 slot = pointer_map_contains (data->niters, exit);
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data->current_loop,
721 && integer_zerop (desc.may_be_zero)
722 && !contains_abnormal_ssa_name_p (desc.niter))
727 *pointer_map_insert (data->niters, exit) = niter;
730 niter = (tree) *slot;
735 /* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
740 niter_for_single_dom_exit (struct ivopts_data *data)
742 edge exit = single_dom_exit (data->current_loop);
747 return niter_for_exit (data, exit);
750 /* Initializes data structures used by the iv optimization pass, stored
754 tree_ssa_iv_optimize_init (struct ivopts_data *data)
756 data->version_info_size = 2 * num_ssa_names;
757 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
758 data->relevant = BITMAP_ALLOC (NULL);
759 data->important_candidates = BITMAP_ALLOC (NULL);
760 data->max_inv_id = 0;
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr))
780 return determine_base_object (TREE_OPERAND (expr, 0));
782 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
791 obj = TREE_OPERAND (expr, 0);
792 base = get_base_address (obj);
797 if (TREE_CODE (base) == INDIRECT_REF)
798 return determine_base_object (TREE_OPERAND (base, 0));
800 return fold_convert (ptr_type_node,
801 build_fold_addr_expr (base));
803 case POINTER_PLUS_EXPR:
804 return determine_base_object (TREE_OPERAND (expr, 0));
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
812 return fold_convert (ptr_type_node, expr);
816 /* Allocates an induction variable with given initial value BASE and step STEP
820 alloc_iv (tree base, tree step)
822 struct iv *iv = XCNEW (struct iv);
823 gcc_assert (step != NULL_TREE);
826 iv->base_object = determine_base_object (base);
829 iv->have_use_for = false;
831 iv->ssa_name = NULL_TREE;
836 /* Sets STEP and BASE for induction variable IV. */
839 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
841 struct version_info *info = name_info (data, iv);
843 gcc_assert (!info->iv);
845 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
846 info->iv = alloc_iv (base, step);
847 info->iv->ssa_name = iv;
850 /* Finds induction variable declaration for VAR. */
853 get_iv (struct ivopts_data *data, tree var)
856 tree type = TREE_TYPE (var);
858 if (!POINTER_TYPE_P (type)
859 && !INTEGRAL_TYPE_P (type))
862 if (!name_info (data, var)->iv)
864 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
867 || !flow_bb_inside_loop_p (data->current_loop, bb))
868 set_iv (data, var, var, build_int_cst (type, 0));
871 return name_info (data, var)->iv;
874 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
878 determine_biv_step (gimple phi)
880 struct loop *loop = gimple_bb (phi)->loop_father;
881 tree name = PHI_RESULT (phi);
884 if (!is_gimple_reg (name))
887 if (!simple_iv (loop, phi, name, &iv, true))
890 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
893 /* Finds basic ivs. */
896 find_bivs (struct ivopts_data *data)
899 tree step, type, base;
901 struct loop *loop = data->current_loop;
902 gimple_stmt_iterator psi;
904 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
906 phi = gsi_stmt (psi);
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
911 step = determine_biv_step (phi);
915 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
916 base = expand_simple_operations (base);
917 if (contains_abnormal_ssa_name_p (base)
918 || contains_abnormal_ssa_name_p (step))
921 type = TREE_TYPE (PHI_RESULT (phi));
922 base = fold_convert (type, base);
925 if (POINTER_TYPE_P (type))
926 step = fold_convert (sizetype, step);
928 step = fold_convert (type, step);
931 set_iv (data, PHI_RESULT (phi), base, step);
938 /* Marks basic ivs. */
941 mark_bivs (struct ivopts_data *data)
945 struct iv *iv, *incr_iv;
946 struct loop *loop = data->current_loop;
948 gimple_stmt_iterator psi;
950 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
952 phi = gsi_stmt (psi);
954 iv = get_iv (data, PHI_RESULT (phi));
958 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
959 incr_iv = get_iv (data, var);
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
965 if (incr_bb->loop_father != data->current_loop
966 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
970 incr_iv->biv_p = true;
974 /* Checks whether STMT defines a linear induction variable and stores its
978 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
981 struct loop *loop = data->current_loop;
983 iv->base = NULL_TREE;
984 iv->step = NULL_TREE;
986 if (gimple_code (stmt) != GIMPLE_ASSIGN)
989 lhs = gimple_assign_lhs (stmt);
990 if (TREE_CODE (lhs) != SSA_NAME)
993 if (!simple_iv (loop, stmt, lhs, iv, true))
995 iv->base = expand_simple_operations (iv->base);
997 if (contains_abnormal_ssa_name_p (iv->base)
998 || contains_abnormal_ssa_name_p (iv->step))
1004 /* Finds general ivs in statement STMT. */
1007 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1011 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1014 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1017 /* Finds general ivs in basic block BB. */
1020 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1022 gimple_stmt_iterator bsi;
1024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 find_givs_in_stmt (data, gsi_stmt (bsi));
1028 /* Finds general ivs. */
1031 find_givs (struct ivopts_data *data)
1033 struct loop *loop = data->current_loop;
1034 basic_block *body = get_loop_body_in_dom_order (loop);
1037 for (i = 0; i < loop->num_nodes; i++)
1038 find_givs_in_bb (data, body[i]);
1042 /* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1046 find_induction_variables (struct ivopts_data *data)
1051 if (!find_bivs (data))
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1059 tree niter = niter_for_single_dom_exit (data);
1063 fprintf (dump_file, " number of iterations ");
1064 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n");
1068 fprintf (dump_file, "Induction variables:\n\n");
1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1072 if (ver_info (data, i)->iv)
1073 dump_iv (dump_file, ver_info (data, i)->iv);
1080 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1082 static struct iv_use *
1083 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1084 gimple stmt, enum use_type use_type)
1086 struct iv_use *use = XCNEW (struct iv_use);
1088 use->id = n_iv_uses (data);
1089 use->type = use_type;
1093 use->related_cands = BITMAP_ALLOC (NULL);
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1097 iv->ssa_name = NULL_TREE;
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 dump_use (dump_file, use);
1102 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1107 /* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1112 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1115 struct version_info *info;
1117 if (TREE_CODE (op) != SSA_NAME
1118 || !is_gimple_reg (op))
1121 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1123 && flow_bb_inside_loop_p (data->current_loop, bb))
1126 info = name_info (data, op);
1128 info->has_nonlin_use |= nonlinear_use;
1130 info->inv_id = ++data->max_inv_id;
1131 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1134 /* Checks whether the use OP is interesting and if so, records it. */
1136 static struct iv_use *
1137 find_interesting_uses_op (struct ivopts_data *data, tree op)
1144 if (TREE_CODE (op) != SSA_NAME)
1147 iv = get_iv (data, op);
1151 if (iv->have_use_for)
1153 use = iv_use (data, iv->use_id);
1155 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1159 if (integer_zerop (iv->step))
1161 record_invariant (data, op, true);
1164 iv->have_use_for = true;
1166 civ = XNEW (struct iv);
1169 stmt = SSA_NAME_DEF_STMT (op);
1170 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1171 || is_gimple_assign (stmt));
1173 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1174 iv->use_id = use->id;
1179 /* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1188 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1189 tree **control_var, tree **bound,
1190 struct iv **iv_var, struct iv **iv_bound)
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv;
1195 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1196 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1199 if (gimple_code (stmt) == GIMPLE_COND)
1201 op0 = gimple_cond_lhs_ptr (stmt);
1202 op1 = gimple_cond_rhs_ptr (stmt);
1206 op0 = gimple_assign_rhs1_ptr (stmt);
1207 op1 = gimple_assign_rhs2_ptr (stmt);
1210 zero = integer_zero_node;
1211 const_iv.step = integer_zero_node;
1213 if (TREE_CODE (*op0) == SSA_NAME)
1214 iv0 = get_iv (data, *op0);
1215 if (TREE_CODE (*op1) == SSA_NAME)
1216 iv1 = get_iv (data, *op1);
1218 /* Exactly one of the compared values must be an iv, and the other one must
1223 if (integer_zerop (iv0->step))
1225 /* Control variable may be on the other side. */
1226 tmp_op = op0; op0 = op1; op1 = tmp_op;
1227 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1229 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1233 *control_var = op0;;
1244 /* Checks whether the condition in STMT is interesting and if so,
1248 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1250 tree *var_p, *bound_p;
1251 struct iv *var_iv, *civ;
1253 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1255 find_interesting_uses_op (data, *var_p);
1256 find_interesting_uses_op (data, *bound_p);
1260 civ = XNEW (struct iv);
1262 record_use (data, NULL, civ, stmt, USE_COMPARE);
1265 /* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1270 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1275 gcc_assert (loop_depth (loop) > 0);
1277 if (is_gimple_min_invariant (expr))
1280 if (TREE_CODE (expr) == SSA_NAME)
1282 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1284 && flow_bb_inside_loop_p (loop, def_bb))
1293 len = TREE_OPERAND_LENGTH (expr);
1294 for (i = 0; i < len; i++)
1295 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1301 /* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1306 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1311 gcc_assert (loop_depth (loop) > 0);
1313 lhs = gimple_get_lhs (stmt);
1314 for (i = 0; i < gimple_num_ops (stmt); i++)
1316 tree op = gimple_op (stmt, i);
1317 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1324 /* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1328 struct ifs_ivopts_data
1330 struct ivopts_data *ivopts_data;
1336 idx_find_step (tree base, tree *idx, void *data)
1338 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1340 tree step, iv_base, iv_step, lbound, off;
1341 struct loop *loop = dta->ivopts_data->current_loop;
1343 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1347 /* If base is a component ref, require that the offset of the reference
1349 if (TREE_CODE (base) == COMPONENT_REF)
1351 off = component_ref_field_offset (base);
1352 return expr_invariant_in_loop_p (loop, off);
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1361 /* Moreover, for a range, the size needs to be invariant as well. */
1362 if (TREE_CODE (base) == ARRAY_RANGE_REF
1363 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1366 step = array_ref_element_size (base);
1367 lbound = array_ref_low_bound (base);
1369 if (!expr_invariant_in_loop_p (loop, step)
1370 || !expr_invariant_in_loop_p (loop, lbound))
1374 if (TREE_CODE (*idx) != SSA_NAME)
1377 iv = get_iv (dta->ivopts_data, *idx);
1381 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1382 *&x[0], which is not folded and does not trigger the
1383 ARRAY_REF path below. */
1386 if (integer_zerop (iv->step))
1389 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1391 step = array_ref_element_size (base);
1393 /* We only handle addresses whose step is an integer constant. */
1394 if (TREE_CODE (step) != INTEGER_CST)
1398 /* The step for pointer arithmetics already is 1 byte. */
1399 step = build_int_cst (sizetype, 1);
1403 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1404 sizetype, &iv_base, &iv_step, dta->stmt,
1407 /* The index might wrap. */
1411 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1412 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1421 idx_record_use (tree base, tree *idx,
1424 struct ivopts_data *data = (struct ivopts_data *) vdata;
1425 find_interesting_uses_op (data, *idx);
1426 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1428 find_interesting_uses_op (data, array_ref_element_size (base));
1429 find_interesting_uses_op (data, array_ref_low_bound (base));
1434 /* If we can prove that TOP = cst * BOT for some constant cst,
1435 store cst to MUL and return true. Otherwise return false.
1436 The returned value is always sign-extended, regardless of the
1437 signedness of TOP and BOT. */
1440 constant_multiple_of (tree top, tree bot, double_int *mul)
1443 enum tree_code code;
1444 double_int res, p0, p1;
1445 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1450 if (operand_equal_p (top, bot, 0))
1452 *mul = double_int_one;
1456 code = TREE_CODE (top);
1460 mby = TREE_OPERAND (top, 1);
1461 if (TREE_CODE (mby) != INTEGER_CST)
1464 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1467 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1473 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1474 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1477 if (code == MINUS_EXPR)
1478 p1 = double_int_neg (p1);
1479 *mul = double_int_sext (double_int_add (p0, p1), precision);
1483 if (TREE_CODE (bot) != INTEGER_CST)
1486 p0 = double_int_sext (tree_to_double_int (top), precision);
1487 p1 = double_int_sext (tree_to_double_int (bot), precision);
1488 if (double_int_zero_p (p1))
1490 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1492 return double_int_zero_p (res);
1499 /* Returns true if memory reference REF with step STEP may be unaligned. */
1502 may_be_unaligned_p (tree ref, tree step)
1506 HOST_WIDE_INT bitsize;
1507 HOST_WIDE_INT bitpos;
1509 enum machine_mode mode;
1510 int unsignedp, volatilep;
1511 unsigned base_align;
1513 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1514 thus they are not misaligned. */
1515 if (TREE_CODE (ref) == TARGET_MEM_REF)
1518 /* The test below is basically copy of what expr.c:normal_inner_ref
1519 does to check whether the object must be loaded by parts when
1520 STRICT_ALIGNMENT is true. */
1521 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1522 &unsignedp, &volatilep, true);
1523 base_type = TREE_TYPE (base);
1524 base_align = TYPE_ALIGN (base_type);
1526 if (mode != BLKmode)
1529 tree al = build_int_cst (TREE_TYPE (step),
1530 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1532 if (base_align < GET_MODE_ALIGNMENT (mode)
1533 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1534 || bitpos % BITS_PER_UNIT != 0)
1537 if (!constant_multiple_of (step, al, &mul))
1544 /* Return true if EXPR may be non-addressable. */
1547 may_be_nonaddressable_p (tree expr)
1549 switch (TREE_CODE (expr))
1551 case TARGET_MEM_REF:
1552 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1553 target, thus they are always addressable. */
1557 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1558 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1560 case VIEW_CONVERT_EXPR:
1561 /* This kind of view-conversions may wrap non-addressable objects
1562 and make them look addressable. After some processing the
1563 non-addressability may be uncovered again, causing ADDR_EXPRs
1564 of inappropriate objects to be built. */
1565 return is_gimple_reg (TREE_OPERAND (expr, 0))
1566 || !is_gimple_addressable (TREE_OPERAND (expr, 0))
1567 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1570 case ARRAY_RANGE_REF:
1571 return TYPE_NONALIASED_COMPONENT (TREE_TYPE (TREE_OPERAND (expr, 0)))
1572 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1584 /* Finds addresses in *OP_P inside STMT. */
1587 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1589 tree base = *op_p, step = build_int_cst (sizetype, 0);
1591 struct ifs_ivopts_data ifs_ivopts_data;
1593 /* Do not play with volatile memory references. A bit too conservative,
1594 perhaps, but safe. */
1595 if (gimple_has_volatile_ops (stmt))
1598 /* Ignore bitfields for now. Not really something terribly complicated
1600 if (TREE_CODE (base) == BIT_FIELD_REF)
1603 base = unshare_expr (base);
1605 if (TREE_CODE (base) == TARGET_MEM_REF)
1607 tree type = build_pointer_type (TREE_TYPE (base));
1611 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1613 civ = get_iv (data, TMR_BASE (base));
1617 TMR_BASE (base) = civ->base;
1620 if (TMR_INDEX (base)
1621 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1623 civ = get_iv (data, TMR_INDEX (base));
1627 TMR_INDEX (base) = civ->base;
1632 if (TMR_STEP (base))
1633 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1635 step = fold_build2 (PLUS_EXPR, type, step, astep);
1639 if (integer_zerop (step))
1641 base = tree_mem_ref_addr (type, base);
1645 ifs_ivopts_data.ivopts_data = data;
1646 ifs_ivopts_data.stmt = stmt;
1647 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1648 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1649 || integer_zerop (ifs_ivopts_data.step))
1651 step = ifs_ivopts_data.step;
1653 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1654 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1656 /* Check that the base expression is addressable. This needs
1657 to be done after substituting bases of IVs into it. */
1658 if (may_be_nonaddressable_p (base))
1661 /* Moreover, on strict alignment platforms, check that it is
1662 sufficiently aligned. */
1663 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1666 base = build_fold_addr_expr (base);
1668 /* Substituting bases of IVs into the base expression might
1669 have caused folding opportunities. */
1670 if (TREE_CODE (base) == ADDR_EXPR)
1672 tree *ref = &TREE_OPERAND (base, 0);
1673 while (handled_component_p (*ref))
1674 ref = &TREE_OPERAND (*ref, 0);
1675 if (TREE_CODE (*ref) == INDIRECT_REF)
1676 *ref = fold_indirect_ref (*ref);
1680 civ = alloc_iv (base, step);
1681 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1685 for_each_index (op_p, idx_record_use, data);
1688 /* Finds and records invariants used in STMT. */
1691 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1694 use_operand_p use_p;
1697 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1699 op = USE_FROM_PTR (use_p);
1700 record_invariant (data, op, false);
1704 /* Finds interesting uses of induction variables in the statement STMT. */
1707 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1710 tree op, *lhs, *rhs;
1712 use_operand_p use_p;
1713 enum tree_code code;
1715 find_invariants_stmt (data, stmt);
1717 if (gimple_code (stmt) == GIMPLE_COND)
1719 find_interesting_uses_cond (data, stmt);
1723 if (is_gimple_assign (stmt))
1725 lhs = gimple_assign_lhs_ptr (stmt);
1726 rhs = gimple_assign_rhs1_ptr (stmt);
1728 if (TREE_CODE (*lhs) == SSA_NAME)
1730 /* If the statement defines an induction variable, the uses are not
1731 interesting by themselves. */
1733 iv = get_iv (data, *lhs);
1735 if (iv && !integer_zerop (iv->step))
1739 code = gimple_assign_rhs_code (stmt);
1740 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1741 && (REFERENCE_CLASS_P (*rhs)
1742 || is_gimple_val (*rhs)))
1744 if (REFERENCE_CLASS_P (*rhs))
1745 find_interesting_uses_address (data, stmt, rhs);
1747 find_interesting_uses_op (data, *rhs);
1749 if (REFERENCE_CLASS_P (*lhs))
1750 find_interesting_uses_address (data, stmt, lhs);
1753 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1755 find_interesting_uses_cond (data, stmt);
1759 /* TODO -- we should also handle address uses of type
1761 memory = call (whatever);
1768 if (gimple_code (stmt) == GIMPLE_PHI
1769 && gimple_bb (stmt) == data->current_loop->header)
1771 iv = get_iv (data, PHI_RESULT (stmt));
1773 if (iv && !integer_zerop (iv->step))
1777 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1779 op = USE_FROM_PTR (use_p);
1781 if (TREE_CODE (op) != SSA_NAME)
1784 iv = get_iv (data, op);
1788 find_interesting_uses_op (data, op);
1792 /* Finds interesting uses of induction variables outside of loops
1793 on loop exit edge EXIT. */
1796 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1799 gimple_stmt_iterator psi;
1802 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1804 phi = gsi_stmt (psi);
1805 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1806 if (is_gimple_reg (def))
1807 find_interesting_uses_op (data, def);
1811 /* Finds uses of the induction variables that are interesting. */
1814 find_interesting_uses (struct ivopts_data *data)
1817 gimple_stmt_iterator bsi;
1818 basic_block *body = get_loop_body (data->current_loop);
1820 struct version_info *info;
1823 if (dump_file && (dump_flags & TDF_DETAILS))
1824 fprintf (dump_file, "Uses:\n\n");
1826 for (i = 0; i < data->current_loop->num_nodes; i++)
1831 FOR_EACH_EDGE (e, ei, bb->succs)
1832 if (e->dest != EXIT_BLOCK_PTR
1833 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1834 find_interesting_uses_outside (data, e);
1836 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1837 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1838 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1839 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1842 if (dump_file && (dump_flags & TDF_DETAILS))
1846 fprintf (dump_file, "\n");
1848 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1850 info = ver_info (data, i);
1853 fprintf (dump_file, " ");
1854 print_generic_expr (dump_file, info->name, TDF_SLIM);
1855 fprintf (dump_file, " is invariant (%d)%s\n",
1856 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1860 fprintf (dump_file, "\n");
1866 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1867 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1868 we are at the top-level of the processed address. */
1871 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1872 unsigned HOST_WIDE_INT *offset)
1874 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1875 enum tree_code code;
1876 tree type, orig_type = TREE_TYPE (expr);
1877 unsigned HOST_WIDE_INT off0, off1, st;
1878 tree orig_expr = expr;
1882 type = TREE_TYPE (expr);
1883 code = TREE_CODE (expr);
1889 if (!cst_and_fits_in_hwi (expr)
1890 || integer_zerop (expr))
1893 *offset = int_cst_value (expr);
1894 return build_int_cst (orig_type, 0);
1896 case POINTER_PLUS_EXPR:
1899 op0 = TREE_OPERAND (expr, 0);
1900 op1 = TREE_OPERAND (expr, 1);
1902 op0 = strip_offset_1 (op0, false, false, &off0);
1903 op1 = strip_offset_1 (op1, false, false, &off1);
1905 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1906 if (op0 == TREE_OPERAND (expr, 0)
1907 && op1 == TREE_OPERAND (expr, 1))
1910 if (integer_zerop (op1))
1912 else if (integer_zerop (op0))
1914 if (code == MINUS_EXPR)
1915 expr = fold_build1 (NEGATE_EXPR, type, op1);
1920 expr = fold_build2 (code, type, op0, op1);
1922 return fold_convert (orig_type, expr);
1925 case ARRAY_RANGE_REF:
1929 step = array_ref_element_size (expr);
1930 if (!cst_and_fits_in_hwi (step))
1933 st = int_cst_value (step);
1934 op1 = TREE_OPERAND (expr, 1);
1935 op1 = strip_offset_1 (op1, false, false, &off1);
1936 *offset = off1 * st;
1939 && integer_zerop (op1))
1941 /* Strip the component reference completely. */
1942 op0 = TREE_OPERAND (expr, 0);
1943 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1953 tmp = component_ref_field_offset (expr);
1955 && cst_and_fits_in_hwi (tmp))
1957 /* Strip the component reference completely. */
1958 op0 = TREE_OPERAND (expr, 0);
1959 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1960 *offset = off0 + int_cst_value (tmp);
1966 op0 = TREE_OPERAND (expr, 0);
1967 op0 = strip_offset_1 (op0, true, true, &off0);
1970 if (op0 == TREE_OPERAND (expr, 0))
1973 expr = build_fold_addr_expr (op0);
1974 return fold_convert (orig_type, expr);
1977 inside_addr = false;
1984 /* Default handling of expressions for that we want to recurse into
1985 the first operand. */
1986 op0 = TREE_OPERAND (expr, 0);
1987 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1990 if (op0 == TREE_OPERAND (expr, 0)
1991 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1994 expr = copy_node (expr);
1995 TREE_OPERAND (expr, 0) = op0;
1997 TREE_OPERAND (expr, 1) = op1;
1999 /* Inside address, we might strip the top level component references,
2000 thus changing type of the expression. Handling of ADDR_EXPR
2002 expr = fold_convert (orig_type, expr);
2007 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2010 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2012 return strip_offset_1 (expr, false, false, offset);
2015 /* Returns variant of TYPE that can be used as base for different uses.
2016 We return unsigned type with the same precision, which avoids problems
2020 generic_type_for (tree type)
2022 if (POINTER_TYPE_P (type))
2023 return unsigned_type_for (type);
2025 if (TYPE_UNSIGNED (type))
2028 return unsigned_type_for (type);
2031 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2032 the bitmap to that we should store it. */
2034 static struct ivopts_data *fd_ivopts_data;
2036 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2038 bitmap *depends_on = (bitmap *) data;
2039 struct version_info *info;
2041 if (TREE_CODE (*expr_p) != SSA_NAME)
2043 info = name_info (fd_ivopts_data, *expr_p);
2045 if (!info->inv_id || info->has_nonlin_use)
2049 *depends_on = BITMAP_ALLOC (NULL);
2050 bitmap_set_bit (*depends_on, info->inv_id);
2055 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2056 position to POS. If USE is not NULL, the candidate is set as related to
2057 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2058 replacement of the final value of the iv by a direct computation. */
2060 static struct iv_cand *
2061 add_candidate_1 (struct ivopts_data *data,
2062 tree base, tree step, bool important, enum iv_position pos,
2063 struct iv_use *use, gimple incremented_at)
2066 struct iv_cand *cand = NULL;
2067 tree type, orig_type;
2071 orig_type = TREE_TYPE (base);
2072 type = generic_type_for (orig_type);
2073 /* Don't convert the base to the generic type for pointers as the generic
2074 type is an integer type with the same size as the pointer type. */
2075 if (type != orig_type && !POINTER_TYPE_P (orig_type))
2077 base = fold_convert (type, base);
2078 step = fold_convert (type, step);
2082 for (i = 0; i < n_iv_cands (data); i++)
2084 cand = iv_cand (data, i);
2086 if (cand->pos != pos)
2089 if (cand->incremented_at != incremented_at)
2103 if (operand_equal_p (base, cand->iv->base, 0)
2104 && operand_equal_p (step, cand->iv->step, 0))
2108 if (i == n_iv_cands (data))
2110 cand = XCNEW (struct iv_cand);
2116 cand->iv = alloc_iv (base, step);
2119 if (pos != IP_ORIGINAL && cand->iv)
2121 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2122 cand->var_after = cand->var_before;
2124 cand->important = important;
2125 cand->incremented_at = incremented_at;
2126 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2129 && TREE_CODE (step) != INTEGER_CST)
2131 fd_ivopts_data = data;
2132 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2135 if (dump_file && (dump_flags & TDF_DETAILS))
2136 dump_cand (dump_file, cand);
2139 if (important && !cand->important)
2141 cand->important = true;
2142 if (dump_file && (dump_flags & TDF_DETAILS))
2143 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2148 bitmap_set_bit (use->related_cands, i);
2149 if (dump_file && (dump_flags & TDF_DETAILS))
2150 fprintf (dump_file, "Candidate %d is related to use %d\n",
2157 /* Returns true if incrementing the induction variable at the end of the LOOP
2160 The purpose is to avoid splitting latch edge with a biv increment, thus
2161 creating a jump, possibly confusing other optimization passes and leaving
2162 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2163 is not available (so we do not have a better alternative), or if the latch
2164 edge is already nonempty. */
2167 allow_ip_end_pos_p (struct loop *loop)
2169 if (!ip_normal_pos (loop))
2172 if (!empty_block_p (ip_end_pos (loop)))
2178 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2179 position to POS. If USE is not NULL, the candidate is set as related to
2180 it. The candidate computation is scheduled on all available positions. */
2183 add_candidate (struct ivopts_data *data,
2184 tree base, tree step, bool important, struct iv_use *use)
2186 if (ip_normal_pos (data->current_loop))
2187 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2188 if (ip_end_pos (data->current_loop)
2189 && allow_ip_end_pos_p (data->current_loop))
2190 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2193 /* Add a standard "0 + 1 * iteration" iv candidate for a
2194 type with SIZE bits. */
2197 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2200 tree type = lang_hooks.types.type_for_size (size, true);
2201 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2205 /* Adds standard iv candidates. */
2208 add_standard_iv_candidates (struct ivopts_data *data)
2210 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2212 /* The same for a double-integer type if it is still fast enough. */
2213 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2214 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2218 /* Adds candidates bases on the old induction variable IV. */
2221 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2225 struct iv_cand *cand;
2227 add_candidate (data, iv->base, iv->step, true, NULL);
2229 /* The same, but with initial value zero. */
2230 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2231 add_candidate (data, size_int (0), iv->step, true, NULL);
2233 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2234 iv->step, true, NULL);
2236 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2237 if (gimple_code (phi) == GIMPLE_PHI)
2239 /* Additionally record the possibility of leaving the original iv
2241 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2242 cand = add_candidate_1 (data,
2243 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2244 SSA_NAME_DEF_STMT (def));
2245 cand->var_before = iv->ssa_name;
2246 cand->var_after = def;
2250 /* Adds candidates based on the old induction variables. */
2253 add_old_ivs_candidates (struct ivopts_data *data)
2259 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2261 iv = ver_info (data, i)->iv;
2262 if (iv && iv->biv_p && !integer_zerop (iv->step))
2263 add_old_iv_candidates (data, iv);
2267 /* Adds candidates based on the value of the induction variable IV and USE. */
2270 add_iv_value_candidates (struct ivopts_data *data,
2271 struct iv *iv, struct iv_use *use)
2273 unsigned HOST_WIDE_INT offset;
2277 add_candidate (data, iv->base, iv->step, false, use);
2279 /* The same, but with initial value zero. Make such variable important,
2280 since it is generic enough so that possibly many uses may be based
2282 basetype = TREE_TYPE (iv->base);
2283 if (POINTER_TYPE_P (basetype))
2284 basetype = sizetype;
2285 add_candidate (data, build_int_cst (basetype, 0),
2286 iv->step, true, use);
2288 /* Third, try removing the constant offset. Make sure to even
2289 add a candidate for &a[0] vs. (T *)&a. */
2290 base = strip_offset (iv->base, &offset);
2292 || base != iv->base)
2293 add_candidate (data, base, iv->step, false, use);
2296 /* Adds candidates based on the uses. */
2299 add_derived_ivs_candidates (struct ivopts_data *data)
2303 for (i = 0; i < n_iv_uses (data); i++)
2305 struct iv_use *use = iv_use (data, i);
2312 case USE_NONLINEAR_EXPR:
2315 /* Just add the ivs based on the value of the iv used here. */
2316 add_iv_value_candidates (data, use->iv, use);
2325 /* Record important candidates and add them to related_cands bitmaps
2329 record_important_candidates (struct ivopts_data *data)
2334 for (i = 0; i < n_iv_cands (data); i++)
2336 struct iv_cand *cand = iv_cand (data, i);
2338 if (cand->important)
2339 bitmap_set_bit (data->important_candidates, i);
2342 data->consider_all_candidates = (n_iv_cands (data)
2343 <= CONSIDER_ALL_CANDIDATES_BOUND);
2345 if (data->consider_all_candidates)
2347 /* We will not need "related_cands" bitmaps in this case,
2348 so release them to decrease peak memory consumption. */
2349 for (i = 0; i < n_iv_uses (data); i++)
2351 use = iv_use (data, i);
2352 BITMAP_FREE (use->related_cands);
2357 /* Add important candidates to the related_cands bitmaps. */
2358 for (i = 0; i < n_iv_uses (data); i++)
2359 bitmap_ior_into (iv_use (data, i)->related_cands,
2360 data->important_candidates);
2364 /* Finds the candidates for the induction variables. */
2367 find_iv_candidates (struct ivopts_data *data)
2369 /* Add commonly used ivs. */
2370 add_standard_iv_candidates (data);
2372 /* Add old induction variables. */
2373 add_old_ivs_candidates (data);
2375 /* Add induction variables derived from uses. */
2376 add_derived_ivs_candidates (data);
2378 /* Record the important candidates. */
2379 record_important_candidates (data);
2382 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2383 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2384 we allocate a simple list to every use. */
2387 alloc_use_cost_map (struct ivopts_data *data)
2389 unsigned i, size, s, j;
2391 for (i = 0; i < n_iv_uses (data); i++)
2393 struct iv_use *use = iv_use (data, i);
2396 if (data->consider_all_candidates)
2397 size = n_iv_cands (data);
2401 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2406 /* Round up to the power of two, so that moduling by it is fast. */
2407 for (size = 1; size < s; size <<= 1)
2411 use->n_map_members = size;
2412 use->cost_map = XCNEWVEC (struct cost_pair, size);
2416 /* Returns description of computation cost of expression whose runtime
2417 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2420 new_cost (unsigned runtime, unsigned complexity)
2424 cost.cost = runtime;
2425 cost.complexity = complexity;
2430 /* Adds costs COST1 and COST2. */
2433 add_costs (comp_cost cost1, comp_cost cost2)
2435 cost1.cost += cost2.cost;
2436 cost1.complexity += cost2.complexity;
2440 /* Subtracts costs COST1 and COST2. */
2443 sub_costs (comp_cost cost1, comp_cost cost2)
2445 cost1.cost -= cost2.cost;
2446 cost1.complexity -= cost2.complexity;
2451 /* Returns a negative number if COST1 < COST2, a positive number if
2452 COST1 > COST2, and 0 if COST1 = COST2. */
2455 compare_costs (comp_cost cost1, comp_cost cost2)
2457 if (cost1.cost == cost2.cost)
2458 return cost1.complexity - cost2.complexity;
2460 return cost1.cost - cost2.cost;
2463 /* Returns true if COST is infinite. */
2466 infinite_cost_p (comp_cost cost)
2468 return cost.cost == INFTY;
2471 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2472 on invariants DEPENDS_ON and that the value used in expressing it
2476 set_use_iv_cost (struct ivopts_data *data,
2477 struct iv_use *use, struct iv_cand *cand,
2478 comp_cost cost, bitmap depends_on, tree value)
2482 if (infinite_cost_p (cost))
2484 BITMAP_FREE (depends_on);
2488 if (data->consider_all_candidates)
2490 use->cost_map[cand->id].cand = cand;
2491 use->cost_map[cand->id].cost = cost;
2492 use->cost_map[cand->id].depends_on = depends_on;
2493 use->cost_map[cand->id].value = value;
2497 /* n_map_members is a power of two, so this computes modulo. */
2498 s = cand->id & (use->n_map_members - 1);
2499 for (i = s; i < use->n_map_members; i++)
2500 if (!use->cost_map[i].cand)
2502 for (i = 0; i < s; i++)
2503 if (!use->cost_map[i].cand)
2509 use->cost_map[i].cand = cand;
2510 use->cost_map[i].cost = cost;
2511 use->cost_map[i].depends_on = depends_on;
2512 use->cost_map[i].value = value;
2515 /* Gets cost of (USE, CANDIDATE) pair. */
2517 static struct cost_pair *
2518 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2519 struct iv_cand *cand)
2522 struct cost_pair *ret;
2527 if (data->consider_all_candidates)
2529 ret = use->cost_map + cand->id;
2536 /* n_map_members is a power of two, so this computes modulo. */
2537 s = cand->id & (use->n_map_members - 1);
2538 for (i = s; i < use->n_map_members; i++)
2539 if (use->cost_map[i].cand == cand)
2540 return use->cost_map + i;
2542 for (i = 0; i < s; i++)
2543 if (use->cost_map[i].cand == cand)
2544 return use->cost_map + i;
2549 /* Returns estimate on cost of computing SEQ. */
2552 seq_cost (rtx seq, bool speed)
2557 for (; seq; seq = NEXT_INSN (seq))
2559 set = single_set (seq);
2561 cost += rtx_cost (set, SET,speed);
2569 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2571 produce_memory_decl_rtl (tree obj, int *regno)
2576 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2578 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2579 x = gen_rtx_SYMBOL_REF (Pmode, name);
2580 SET_SYMBOL_REF_DECL (x, obj);
2581 x = gen_rtx_MEM (DECL_MODE (obj), x);
2582 targetm.encode_section_info (obj, x, true);
2586 x = gen_raw_REG (Pmode, (*regno)++);
2587 x = gen_rtx_MEM (DECL_MODE (obj), x);
2593 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2594 walk_tree. DATA contains the actual fake register number. */
2597 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2599 tree obj = NULL_TREE;
2601 int *regno = (int *) data;
2603 switch (TREE_CODE (*expr_p))
2606 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2607 handled_component_p (*expr_p);
2608 expr_p = &TREE_OPERAND (*expr_p, 0))
2611 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2612 x = produce_memory_decl_rtl (obj, regno);
2617 obj = SSA_NAME_VAR (*expr_p);
2618 if (!DECL_RTL_SET_P (obj))
2619 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2628 if (DECL_RTL_SET_P (obj))
2631 if (DECL_MODE (obj) == BLKmode)
2632 x = produce_memory_decl_rtl (obj, regno);
2634 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2644 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2645 SET_DECL_RTL (obj, x);
2651 /* Determines cost of the computation of EXPR. */
2654 computation_cost (tree expr, bool speed)
2657 tree type = TREE_TYPE (expr);
2659 /* Avoid using hard regs in ways which may be unsupported. */
2660 int regno = LAST_VIRTUAL_REGISTER + 1;
2661 enum function_frequency real_frequency = cfun->function_frequency;
2663 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2664 crtl->maybe_hot_insn_p = speed;
2665 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2667 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2670 default_rtl_profile ();
2671 cfun->function_frequency = real_frequency;
2673 cost = seq_cost (seq, speed);
2675 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2680 /* Returns variable containing the value of candidate CAND at statement AT. */
2683 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2685 if (stmt_after_increment (loop, cand, stmt))
2686 return cand->var_after;
2688 return cand->var_before;
2691 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2692 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2695 tree_int_cst_sign_bit (const_tree t)
2697 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2698 unsigned HOST_WIDE_INT w;
2700 if (bitno < HOST_BITS_PER_WIDE_INT)
2701 w = TREE_INT_CST_LOW (t);
2704 w = TREE_INT_CST_HIGH (t);
2705 bitno -= HOST_BITS_PER_WIDE_INT;
2708 return (w >> bitno) & 1;
2711 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2712 same precision that is at least as wide as the precision of TYPE, stores
2713 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2717 determine_common_wider_type (tree *a, tree *b)
2719 tree wider_type = NULL;
2721 tree atype = TREE_TYPE (*a);
2723 if (CONVERT_EXPR_P (*a))
2725 suba = TREE_OPERAND (*a, 0);
2726 wider_type = TREE_TYPE (suba);
2727 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2733 if (CONVERT_EXPR_P (*b))
2735 subb = TREE_OPERAND (*b, 0);
2736 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2747 /* Determines the expression by that USE is expressed from induction variable
2748 CAND at statement AT in LOOP. The expression is stored in a decomposed
2749 form into AFF. Returns false if USE cannot be expressed using CAND. */
2752 get_computation_aff (struct loop *loop,
2753 struct iv_use *use, struct iv_cand *cand, gimple at,
2754 struct affine_tree_combination *aff)
2756 tree ubase = use->iv->base;
2757 tree ustep = use->iv->step;
2758 tree cbase = cand->iv->base;
2759 tree cstep = cand->iv->step, cstep_common;
2760 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2761 tree common_type, var;
2763 aff_tree cbase_aff, var_aff;
2766 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2768 /* We do not have a precision to express the values of use. */
2772 var = var_at_stmt (loop, cand, at);
2773 uutype = unsigned_type_for (utype);
2775 /* If the conversion is not noop, perform it. */
2776 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2778 cstep = fold_convert (uutype, cstep);
2779 cbase = fold_convert (uutype, cbase);
2780 var = fold_convert (uutype, var);
2783 if (!constant_multiple_of (ustep, cstep, &rat))
2786 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2787 type, we achieve better folding by computing their difference in this
2788 wider type, and cast the result to UUTYPE. We do not need to worry about
2789 overflows, as all the arithmetics will in the end be performed in UUTYPE
2791 common_type = determine_common_wider_type (&ubase, &cbase);
2793 /* use = ubase - ratio * cbase + ratio * var. */
2794 tree_to_aff_combination (ubase, common_type, aff);
2795 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2796 tree_to_aff_combination (var, uutype, &var_aff);
2798 /* We need to shift the value if we are after the increment. */
2799 if (stmt_after_increment (loop, cand, at))
2803 if (common_type != uutype)
2804 cstep_common = fold_convert (common_type, cstep);
2806 cstep_common = cstep;
2808 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2809 aff_combination_add (&cbase_aff, &cstep_aff);
2812 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2813 aff_combination_add (aff, &cbase_aff);
2814 if (common_type != uutype)
2815 aff_combination_convert (aff, uutype);
2817 aff_combination_scale (&var_aff, rat);
2818 aff_combination_add (aff, &var_aff);
2823 /* Determines the expression by that USE is expressed from induction variable
2824 CAND at statement AT in LOOP. The computation is unshared. */
2827 get_computation_at (struct loop *loop,
2828 struct iv_use *use, struct iv_cand *cand, gimple at)
2831 tree type = TREE_TYPE (use->iv->base);
2833 if (!get_computation_aff (loop, use, cand, at, &aff))
2835 unshare_aff_combination (&aff);
2836 return fold_convert (type, aff_combination_to_tree (&aff));
2839 /* Determines the expression by that USE is expressed from induction variable
2840 CAND in LOOP. The computation is unshared. */
2843 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2845 return get_computation_at (loop, use, cand, use->stmt);
2848 /* Returns cost of addition in MODE. */
2851 add_cost (enum machine_mode mode, bool speed)
2853 static unsigned costs[NUM_MACHINE_MODES];
2861 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2862 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2863 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2868 cost = seq_cost (seq, speed);
2874 if (dump_file && (dump_flags & TDF_DETAILS))
2875 fprintf (dump_file, "Addition in %s costs %d\n",
2876 GET_MODE_NAME (mode), cost);
2880 /* Entry in a hashtable of already known costs for multiplication. */
2883 HOST_WIDE_INT cst; /* The constant to multiply by. */
2884 enum machine_mode mode; /* In mode. */
2885 unsigned cost; /* The cost. */
2888 /* Counts hash value for the ENTRY. */
2891 mbc_entry_hash (const void *entry)
2893 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2895 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2898 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2901 mbc_entry_eq (const void *entry1, const void *entry2)
2903 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2904 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2906 return (e1->mode == e2->mode
2907 && e1->cst == e2->cst);
2910 /* Returns cost of multiplication by constant CST in MODE. */
2913 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2915 static htab_t costs;
2916 struct mbc_entry **cached, act;
2921 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2925 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2927 return (*cached)->cost;
2929 *cached = XNEW (struct mbc_entry);
2930 (*cached)->mode = mode;
2931 (*cached)->cst = cst;
2934 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2935 gen_int_mode (cst, mode), NULL_RTX, 0);
2939 cost = seq_cost (seq, speed);
2941 if (dump_file && (dump_flags & TDF_DETAILS))
2942 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2943 (int) cst, GET_MODE_NAME (mode), cost);
2945 (*cached)->cost = cost;
2950 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2951 validity for a memory reference accessing memory of mode MODE. */
2954 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2956 #define MAX_RATIO 128
2957 static sbitmap valid_mult[MAX_MACHINE_MODE];
2959 if (!valid_mult[mode])
2961 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2965 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2966 sbitmap_zero (valid_mult[mode]);
2967 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2968 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2970 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2971 if (memory_address_p (mode, addr))
2972 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2975 if (dump_file && (dump_flags & TDF_DETAILS))
2977 fprintf (dump_file, " allowed multipliers:");
2978 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2979 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2980 fprintf (dump_file, " %d", (int) i);
2981 fprintf (dump_file, "\n");
2982 fprintf (dump_file, "\n");
2986 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2989 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2992 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2993 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2994 variable is omitted. Compute the cost for a memory reference that accesses
2995 a memory location of mode MEM_MODE.
2997 TODO -- there must be some better way. This all is quite crude. */
3000 get_address_cost (bool symbol_present, bool var_present,
3001 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3002 enum machine_mode mem_mode,
3005 static bool initialized[MAX_MACHINE_MODE];
3006 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3007 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3008 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3009 unsigned cost, acost, complexity;
3010 bool offset_p, ratio_p;
3011 HOST_WIDE_INT s_offset;
3012 unsigned HOST_WIDE_INT mask;
3015 if (!initialized[mem_mode])
3018 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3019 int old_cse_not_expected;
3020 unsigned sym_p, var_p, off_p, rat_p, add_c;
3021 rtx seq, addr, base;
3024 initialized[mem_mode] = true;
3026 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3028 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3029 for (i = start; i <= 1 << 20; i <<= 1)
3031 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3032 if (!memory_address_p (mem_mode, addr))
3035 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3036 off[mem_mode] = max_offset[mem_mode];
3038 for (i = start; i <= 1 << 20; i <<= 1)
3040 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3041 if (!memory_address_p (mem_mode, addr))
3044 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3046 if (dump_file && (dump_flags & TDF_DETAILS))
3048 fprintf (dump_file, "get_address_cost:\n");
3049 fprintf (dump_file, " min offset %s %d\n",
3050 GET_MODE_NAME (mem_mode),
3051 (int) min_offset[mem_mode]);
3052 fprintf (dump_file, " max offset %s %d\n",
3053 GET_MODE_NAME (mem_mode),
3054 (int) max_offset[mem_mode]);
3058 for (i = 2; i <= MAX_RATIO; i++)
3059 if (multiplier_allowed_in_address_p (i, mem_mode))
3065 /* Compute the cost of various addressing modes. */
3067 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3068 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3070 for (i = 0; i < 16; i++)
3073 var_p = (i >> 1) & 1;
3074 off_p = (i >> 2) & 1;
3075 rat_p = (i >> 3) & 1;
3079 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3080 gen_int_mode (rat[mem_mode], Pmode));
3083 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3087 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3088 /* ??? We can run into trouble with some backends by presenting
3089 it with symbols which haven't been properly passed through
3090 targetm.encode_section_info. By setting the local bit, we
3091 enhance the probability of things working. */
3092 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3095 base = gen_rtx_fmt_e (CONST, Pmode,
3096 gen_rtx_fmt_ee (PLUS, Pmode,
3098 gen_int_mode (off[mem_mode],
3102 base = gen_int_mode (off[mem_mode], Pmode);
3107 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3110 /* To avoid splitting addressing modes, pretend that no cse will
3112 old_cse_not_expected = cse_not_expected;
3113 cse_not_expected = true;
3114 addr = memory_address (mem_mode, addr);
3115 cse_not_expected = old_cse_not_expected;
3119 acost = seq_cost (seq, speed);
3120 acost += address_cost (addr, mem_mode, speed);
3124 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3127 /* On some targets, it is quite expensive to load symbol to a register,
3128 which makes addresses that contain symbols look much more expensive.
3129 However, the symbol will have to be loaded in any case before the
3130 loop (and quite likely we have it in register already), so it does not
3131 make much sense to penalize them too heavily. So make some final
3132 tweaks for the SYMBOL_PRESENT modes:
3134 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3135 var is cheaper, use this mode with small penalty.
3136 If VAR_PRESENT is true, try whether the mode with
3137 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3138 if this is the case, use it. */
3139 add_c = add_cost (Pmode, speed);
3140 for (i = 0; i < 8; i++)
3143 off_p = (i >> 1) & 1;
3144 rat_p = (i >> 2) & 1;
3146 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3150 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3151 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3154 if (dump_file && (dump_flags & TDF_DETAILS))
3156 fprintf (dump_file, "Address costs:\n");
3158 for (i = 0; i < 16; i++)
3161 var_p = (i >> 1) & 1;
3162 off_p = (i >> 2) & 1;
3163 rat_p = (i >> 3) & 1;
3165 fprintf (dump_file, " ");
3167 fprintf (dump_file, "sym + ");
3169 fprintf (dump_file, "var + ");
3171 fprintf (dump_file, "cst + ");
3173 fprintf (dump_file, "rat * ");
3175 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3176 fprintf (dump_file, "index costs %d\n", acost);
3178 fprintf (dump_file, "\n");
3182 bits = GET_MODE_BITSIZE (Pmode);
3183 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3185 if ((offset >> (bits - 1) & 1))
3190 offset_p = (s_offset != 0
3191 && min_offset[mem_mode] <= s_offset
3192 && s_offset <= max_offset[mem_mode]);
3193 ratio_p = (ratio != 1
3194 && multiplier_allowed_in_address_p (ratio, mem_mode));
3196 if (ratio != 1 && !ratio_p)
3197 cost += multiply_by_cost (ratio, Pmode, speed);
3199 if (s_offset && !offset_p && !symbol_present)
3200 cost += add_cost (Pmode, speed);
3202 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3203 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3204 return new_cost (cost + acost, complexity);
3207 /* Estimates cost of forcing expression EXPR into a variable. */
3210 force_expr_to_var_cost (tree expr, bool speed)
3212 static bool costs_initialized = false;
3213 static unsigned integer_cost [2];
3214 static unsigned symbol_cost [2];
3215 static unsigned address_cost [2];
3217 comp_cost cost0, cost1, cost;
3218 enum machine_mode mode;
3220 if (!costs_initialized)
3222 tree type = build_pointer_type (integer_type_node);
3227 var = create_tmp_var_raw (integer_type_node, "test_var");
3228 TREE_STATIC (var) = 1;
3229 x = produce_memory_decl_rtl (var, NULL);
3230 SET_DECL_RTL (var, x);
3232 addr = build1 (ADDR_EXPR, type, var);
3235 for (i = 0; i < 2; i++)
3237 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3240 symbol_cost[i] = computation_cost (addr, i) + 1;
3243 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3245 build_int_cst (sizetype, 2000)), i) + 1;
3246 if (dump_file && (dump_flags & TDF_DETAILS))
3248 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3249 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3250 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3251 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3252 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3253 fprintf (dump_file, "\n");
3257 costs_initialized = true;
3262 if (SSA_VAR_P (expr))
3265 if (is_gimple_min_invariant (expr))
3267 if (TREE_CODE (expr) == INTEGER_CST)
3268 return new_cost (integer_cost [speed], 0);
3270 if (TREE_CODE (expr) == ADDR_EXPR)
3272 tree obj = TREE_OPERAND (expr, 0);
3274 if (TREE_CODE (obj) == VAR_DECL
3275 || TREE_CODE (obj) == PARM_DECL
3276 || TREE_CODE (obj) == RESULT_DECL)
3277 return new_cost (symbol_cost [speed], 0);
3280 return new_cost (address_cost [speed], 0);
3283 switch (TREE_CODE (expr))
3285 case POINTER_PLUS_EXPR:
3289 op0 = TREE_OPERAND (expr, 0);
3290 op1 = TREE_OPERAND (expr, 1);
3294 if (is_gimple_val (op0))
3297 cost0 = force_expr_to_var_cost (op0, speed);
3299 if (is_gimple_val (op1))
3302 cost1 = force_expr_to_var_cost (op1, speed);
3307 /* Just an arbitrary value, FIXME. */
3308 return new_cost (target_spill_cost[speed], 0);
3311 mode = TYPE_MODE (TREE_TYPE (expr));
3312 switch (TREE_CODE (expr))
3314 case POINTER_PLUS_EXPR:
3317 cost = new_cost (add_cost (mode, speed), 0);
3321 if (cst_and_fits_in_hwi (op0))
3322 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3323 else if (cst_and_fits_in_hwi (op1))
3324 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3326 return new_cost (target_spill_cost [speed], 0);
3333 cost = add_costs (cost, cost0);
3334 cost = add_costs (cost, cost1);
3336 /* Bound the cost by target_spill_cost. The parts of complicated
3337 computations often are either loop invariant or at least can
3338 be shared between several iv uses, so letting this grow without
3339 limits would not give reasonable results. */
3340 if (cost.cost > target_spill_cost [speed])
3341 cost.cost = target_spill_cost [speed];
3346 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3347 invariants the computation depends on. */
3350 force_var_cost (struct ivopts_data *data,
3351 tree expr, bitmap *depends_on)
3355 fd_ivopts_data = data;
3356 walk_tree (&expr, find_depends, depends_on, NULL);
3359 return force_expr_to_var_cost (expr, data->speed);
3362 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3363 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3364 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3365 invariants the computation depends on. */
3368 split_address_cost (struct ivopts_data *data,
3369 tree addr, bool *symbol_present, bool *var_present,
3370 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3373 HOST_WIDE_INT bitsize;
3374 HOST_WIDE_INT bitpos;
3376 enum machine_mode mode;
3377 int unsignedp, volatilep;
3379 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3380 &unsignedp, &volatilep, false);
3383 || bitpos % BITS_PER_UNIT != 0
3384 || TREE_CODE (core) != VAR_DECL)
3386 *symbol_present = false;
3387 *var_present = true;
3388 fd_ivopts_data = data;
3389 walk_tree (&addr, find_depends, depends_on, NULL);
3390 return new_cost (target_spill_cost[data->speed], 0);
3393 *offset += bitpos / BITS_PER_UNIT;
3394 if (TREE_STATIC (core)
3395 || DECL_EXTERNAL (core))
3397 *symbol_present = true;
3398 *var_present = false;
3402 *symbol_present = false;
3403 *var_present = true;
3407 /* Estimates cost of expressing difference of addresses E1 - E2 as
3408 var + symbol + offset. The value of offset is added to OFFSET,
3409 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3410 part is missing. DEPENDS_ON is a set of the invariants the computation
3414 ptr_difference_cost (struct ivopts_data *data,
3415 tree e1, tree e2, bool *symbol_present, bool *var_present,
3416 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3418 HOST_WIDE_INT diff = 0;
3420 bool speed = optimize_loop_for_speed_p (data->current_loop);
3422 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3424 if (ptr_difference_const (e1, e2, &diff))
3427 *symbol_present = false;
3428 *var_present = false;
3432 if (integer_zerop (e2))
3433 return split_address_cost (data, TREE_OPERAND (e1, 0),
3434 symbol_present, var_present, offset, depends_on);
3436 *symbol_present = false;
3437 *var_present = true;
3439 cost = force_var_cost (data, e1, depends_on);
3440 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3441 cost.cost += add_cost (Pmode, speed);
3446 /* Estimates cost of expressing difference E1 - E2 as
3447 var + symbol + offset. The value of offset is added to OFFSET,
3448 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3449 part is missing. DEPENDS_ON is a set of the invariants the computation
3453 difference_cost (struct ivopts_data *data,
3454 tree e1, tree e2, bool *symbol_present, bool *var_present,
3455 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3458 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3459 unsigned HOST_WIDE_INT off1, off2;
3461 e1 = strip_offset (e1, &off1);
3462 e2 = strip_offset (e2, &off2);
3463 *offset += off1 - off2;
3468 if (TREE_CODE (e1) == ADDR_EXPR)
3469 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3471 *symbol_present = false;
3473 if (operand_equal_p (e1, e2, 0))
3475 *var_present = false;
3478 *var_present = true;
3479 if (integer_zerop (e2))
3480 return force_var_cost (data, e1, depends_on);
3482 if (integer_zerop (e1))
3484 cost = force_var_cost (data, e2, depends_on);
3485 cost.cost += multiply_by_cost (-1, mode, data->speed);
3490 cost = force_var_cost (data, e1, depends_on);
3491 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3492 cost.cost += add_cost (mode, data->speed);
3497 /* Determines the cost of the computation by that USE is expressed
3498 from induction variable CAND. If ADDRESS_P is true, we just need
3499 to create an address from it, otherwise we want to get it into
3500 register. A set of invariants we depend on is stored in
3501 DEPENDS_ON. AT is the statement at that the value is computed. */
3504 get_computation_cost_at (struct ivopts_data *data,
3505 struct iv_use *use, struct iv_cand *cand,
3506 bool address_p, bitmap *depends_on, gimple at)
3508 tree ubase = use->iv->base, ustep = use->iv->step;
3510 tree utype = TREE_TYPE (ubase), ctype;
3511 unsigned HOST_WIDE_INT cstepi, offset = 0;
3512 HOST_WIDE_INT ratio, aratio;
3513 bool var_present, symbol_present;
3517 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3521 /* Only consider real candidates. */
3523 return infinite_cost;
3525 cbase = cand->iv->base;
3526 cstep = cand->iv->step;
3527 ctype = TREE_TYPE (cbase);
3529 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3531 /* We do not have a precision to express the values of use. */
3532 return infinite_cost;
3537 /* Do not try to express address of an object with computation based
3538 on address of a different object. This may cause problems in rtl
3539 level alias analysis (that does not expect this to be happening,
3540 as this is illegal in C), and would be unlikely to be useful
3542 if (use->iv->base_object
3543 && cand->iv->base_object
3544 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3545 return infinite_cost;
3548 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3550 /* TODO -- add direct handling of this case. */
3554 /* CSTEPI is removed from the offset in case statement is after the
3555 increment. If the step is not constant, we use zero instead.
3556 This is a bit imprecise (there is the extra addition), but
3557 redundancy elimination is likely to transform the code so that
3558 it uses value of the variable before increment anyway,
3559 so it is not that much unrealistic. */
3560 if (cst_and_fits_in_hwi (cstep))
3561 cstepi = int_cst_value (cstep);
3565 if (!constant_multiple_of (ustep, cstep, &rat))
3566 return infinite_cost;
3568 if (double_int_fits_in_shwi_p (rat))
3569 ratio = double_int_to_shwi (rat);
3571 return infinite_cost;
3573 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3574 or ratio == 1, it is better to handle this like
3576 ubase - ratio * cbase + ratio * var
3578 (also holds in the case ratio == -1, TODO. */
3580 if (cst_and_fits_in_hwi (cbase))
3582 offset = - ratio * int_cst_value (cbase);
3583 cost = difference_cost (data,
3584 ubase, build_int_cst (utype, 0),
3585 &symbol_present, &var_present, &offset,
3588 else if (ratio == 1)
3590 cost = difference_cost (data,
3592 &symbol_present, &var_present, &offset,
3597 cost = force_var_cost (data, cbase, depends_on);
3598 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3599 cost = add_costs (cost,
3600 difference_cost (data,
3601 ubase, build_int_cst (utype, 0),
3602 &symbol_present, &var_present,
3603 &offset, depends_on));
3606 /* If we are after the increment, the value of the candidate is higher by
3608 if (stmt_after_increment (data->current_loop, cand, at))
3609 offset -= ratio * cstepi;
3611 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3612 (symbol/var/const parts may be omitted). If we are looking for an address,
3613 find the cost of addressing this. */
3615 return add_costs (cost, get_address_cost (symbol_present, var_present,
3617 TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
3619 /* Otherwise estimate the costs for computing the expression. */
3620 aratio = ratio > 0 ? ratio : -ratio;
3621 if (!symbol_present && !var_present && !offset)
3624 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3630 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3634 /* Symbol + offset should be compile-time computable. */
3635 && (symbol_present || offset))
3638 /* Having offset does not affect runtime cost in case it is added to
3639 symbol, but it increases complexity. */
3643 cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
3648 /* Just get the expression, expand it and measure the cost. */
3649 tree comp = get_computation_at (data->current_loop, use, cand, at);
3652 return infinite_cost;
3655 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3657 return new_cost (computation_cost (comp, speed), 0);
3661 /* Determines the cost of the computation by that USE is expressed
3662 from induction variable CAND. If ADDRESS_P is true, we just need
3663 to create an address from it, otherwise we want to get it into
3664 register. A set of invariants we depend on is stored in
3668 get_computation_cost (struct ivopts_data *data,
3669 struct iv_use *use, struct iv_cand *cand,
3670 bool address_p, bitmap *depends_on)
3672 return get_computation_cost_at (data,
3673 use, cand, address_p, depends_on, use->stmt);
3676 /* Determines cost of basing replacement of USE on CAND in a generic
3680 determine_use_iv_cost_generic (struct ivopts_data *data,
3681 struct iv_use *use, struct iv_cand *cand)
3686 /* The simple case first -- if we need to express value of the preserved
3687 original biv, the cost is 0. This also prevents us from counting the
3688 cost of increment twice -- once at this use and once in the cost of
3690 if (cand->pos == IP_ORIGINAL
3691 && cand->incremented_at == use->stmt)
3693 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3697 cost = get_computation_cost (data, use, cand, false, &depends_on);
3698 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3700 return !infinite_cost_p (cost);
3703 /* Determines cost of basing replacement of USE on CAND in an address. */
3706 determine_use_iv_cost_address (struct ivopts_data *data,
3707 struct iv_use *use, struct iv_cand *cand)
3710 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3712 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3714 return !infinite_cost_p (cost);
3717 /* Computes value of candidate CAND at position AT in iteration NITER, and
3718 stores it to VAL. */
3721 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3724 aff_tree step, delta, nit;
3725 struct iv *iv = cand->iv;
3726 tree type = TREE_TYPE (iv->base);
3727 tree steptype = type;
3728 if (POINTER_TYPE_P (type))
3729 steptype = sizetype;
3731 tree_to_aff_combination (iv->step, steptype, &step);
3732 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3733 aff_combination_convert (&nit, steptype);
3734 aff_combination_mult (&nit, &step, &delta);
3735 if (stmt_after_increment (loop, cand, at))
3736 aff_combination_add (&delta, &step);
3738 tree_to_aff_combination (iv->base, type, val);
3739 aff_combination_add (val, &delta);
3742 /* Returns period of induction variable iv. */
3745 iv_period (struct iv *iv)
3747 tree step = iv->step, period, type;
3750 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3752 /* Period of the iv is gcd (step, type range). Since type range is power
3753 of two, it suffices to determine the maximum power of two that divides
3755 pow2div = num_ending_zeros (step);
3756 type = unsigned_type_for (TREE_TYPE (step));
3758 period = build_low_bits_mask (type,
3759 (TYPE_PRECISION (type)
3760 - tree_low_cst (pow2div, 1)));
3765 /* Returns the comparison operator used when eliminating the iv USE. */
3767 static enum tree_code
3768 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3770 struct loop *loop = data->current_loop;
3774 ex_bb = gimple_bb (use->stmt);
3775 exit = EDGE_SUCC (ex_bb, 0);
3776 if (flow_bb_inside_loop_p (loop, exit->dest))
3777 exit = EDGE_SUCC (ex_bb, 1);
3779 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3782 /* Check whether it is possible to express the condition in USE by comparison
3783 of candidate CAND. If so, store the value compared with to BOUND. */
3786 may_eliminate_iv (struct ivopts_data *data,
3787 struct iv_use *use, struct iv_cand *cand, tree *bound)
3792 struct loop *loop = data->current_loop;
3795 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3798 /* For now works only for exits that dominate the loop latch.
3799 TODO: extend to other conditions inside loop body. */
3800 ex_bb = gimple_bb (use->stmt);
3801 if (use->stmt != last_stmt (ex_bb)
3802 || gimple_code (use->stmt) != GIMPLE_COND
3803 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3806 exit = EDGE_SUCC (ex_bb, 0);
3807 if (flow_bb_inside_loop_p (loop, exit->dest))
3808 exit = EDGE_SUCC (ex_bb, 1);
3809 if (flow_bb_inside_loop_p (loop, exit->dest))
3812 nit = niter_for_exit (data, exit);
3816 /* Determine whether we can use the variable to test the exit condition.
3817 This is the case iff the period of the induction variable is greater
3818 than the number of iterations for which the exit condition is true. */
3819 period = iv_period (cand->iv);
3821 /* If the number of iterations is constant, compare against it directly. */
3822 if (TREE_CODE (nit) == INTEGER_CST)
3824 if (!tree_int_cst_lt (nit, period))
3828 /* If not, and if this is the only possible exit of the loop, see whether
3829 we can get a conservative estimate on the number of iterations of the
3830 entire loop and compare against that instead. */
3831 else if (loop_only_exit_p (loop, exit))
3833 double_int period_value, max_niter;
3834 if (!estimated_loop_iterations (loop, true, &max_niter))
3836 period_value = tree_to_double_int (period);
3837 if (double_int_ucmp (max_niter, period_value) >= 0)
3841 /* Otherwise, punt. */
3845 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3847 *bound = aff_combination_to_tree (&bnd);
3848 /* It is unlikely that computing the number of iterations using division
3849 would be more profitable than keeping the original induction variable. */
3850 if (expression_expensive_p (*bound))
3855 /* Determines cost of basing replacement of USE on CAND in a condition. */
3858 determine_use_iv_cost_condition (struct ivopts_data *data,
3859 struct iv_use *use, struct iv_cand *cand)
3861 tree bound = NULL_TREE;
3863 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3864 comp_cost elim_cost, express_cost, cost;
3867 /* Only consider real candidates. */
3870 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3874 /* Try iv elimination. */
3875 if (may_eliminate_iv (data, use, cand, &bound))
3877 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3878 /* The bound is a loop invariant, so it will be only computed
3880 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3883 elim_cost = infinite_cost;
3885 /* Try expressing the original giv. If it is compared with an invariant,
3886 note that we cannot get rid of it. */
3887 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3890 express_cost = get_computation_cost (data, use, cand, false,
3891 &depends_on_express);
3892 fd_ivopts_data = data;
3893 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3895 /* Choose the better approach, preferring the eliminated IV. */
3896 if (compare_costs (elim_cost, express_cost) <= 0)
3899 depends_on = depends_on_elim;
3900 depends_on_elim = NULL;
3904 cost = express_cost;
3905 depends_on = depends_on_express;
3906 depends_on_express = NULL;
3910 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3912 if (depends_on_elim)
3913 BITMAP_FREE (depends_on_elim);
3914 if (depends_on_express)
3915 BITMAP_FREE (depends_on_express);
3917 return !infinite_cost_p (cost);
3920 /* Determines cost of basing replacement of USE on CAND. Returns false
3921 if USE cannot be based on CAND. */
3924 determine_use_iv_cost (struct ivopts_data *data,
3925 struct iv_use *use, struct iv_cand *cand)
3929 case USE_NONLINEAR_EXPR:
3930 return determine_use_iv_cost_generic (data, use, cand);
3933 return determine_use_iv_cost_address (data, use, cand);
3936 return determine_use_iv_cost_condition (data, use, cand);
3943 /* Determines costs of basing the use of the iv on an iv candidate. */
3946 determine_use_iv_costs (struct ivopts_data *data)
3950 struct iv_cand *cand;
3951 bitmap to_clear = BITMAP_ALLOC (NULL);
3953 alloc_use_cost_map (data);
3955 for (i = 0; i < n_iv_uses (data); i++)
3957 use = iv_use (data, i);
3959 if (data->consider_all_candidates)
3961 for (j = 0; j < n_iv_cands (data); j++)
3963 cand = iv_cand (data, j);
3964 determine_use_iv_cost (data, use, cand);
3971 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3973 cand = iv_cand (data, j);
3974 if (!determine_use_iv_cost (data, use, cand))
3975 bitmap_set_bit (to_clear, j);
3978 /* Remove the candidates for that the cost is infinite from
3979 the list of related candidates. */
3980 bitmap_and_compl_into (use->related_cands, to_clear);
3981 bitmap_clear (to_clear);
3985 BITMAP_FREE (to_clear);
3987 if (dump_file && (dump_flags & TDF_DETAILS))
3989 fprintf (dump_file, "Use-candidate costs:\n");
3991 for (i = 0; i < n_iv_uses (data); i++)
3993 use = iv_use (data, i);
3995 fprintf (dump_file, "Use %d:\n", i);
3996 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3997 for (j = 0; j < use->n_map_members; j++)
3999 if (!use->cost_map[j].cand
4000 || infinite_cost_p (use->cost_map[j].cost))
4003 fprintf (dump_file, " %d\t%d\t%d\t",
4004 use->cost_map[j].cand->id,
4005 use->cost_map[j].cost.cost,
4006 use->cost_map[j].cost.complexity);
4007 if (use->cost_map[j].depends_on)
4008 bitmap_print (dump_file,
4009 use->cost_map[j].depends_on, "","");
4010 fprintf (dump_file, "\n");
4013 fprintf (dump_file, "\n");
4015 fprintf (dump_file, "\n");
4019 /* Determines cost of the candidate CAND. */
4022 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4024 comp_cost cost_base;
4025 unsigned cost, cost_step;
4034 /* There are two costs associated with the candidate -- its increment
4035 and its initialization. The second is almost negligible for any loop
4036 that rolls enough, so we take it just very little into account. */
4038 base = cand->iv->base;
4039 cost_base = force_var_cost (data, base, NULL);
4040 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4042 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4044 /* Prefer the original ivs unless we may gain something by replacing it.
4045 The reason is to make debugging simpler; so this is not relevant for
4046 artificial ivs created by other optimization passes. */
4047 if (cand->pos != IP_ORIGINAL
4048 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4051 /* Prefer not to insert statements into latch unless there are some
4052 already (so that we do not create unnecessary jumps). */
4053 if (cand->pos == IP_END
4054 && empty_block_p (ip_end_pos (data->current_loop)))
4060 /* Determines costs of computation of the candidates. */
4063 determine_iv_costs (struct ivopts_data *data)
4067 if (dump_file && (dump_flags & TDF_DETAILS))
4069 fprintf (dump_file, "Candidate costs:\n");
4070 fprintf (dump_file, " cand\tcost\n");
4073 for (i = 0; i < n_iv_cands (data); i++)
4075 struct iv_cand *cand = iv_cand (data, i);
4077 determine_iv_cost (data, cand);
4079 if (dump_file && (dump_flags & TDF_DETAILS))
4080 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4083 if (dump_file && (dump_flags & TDF_DETAILS))
4084 fprintf (dump_file, "\n");
4087 /* Calculates cost for having SIZE induction variables. */
4090 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4092 /* We add size to the cost, so that we prefer eliminating ivs
4094 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4097 /* For each size of the induction variable set determine the penalty. */
4100 determine_set_costs (struct ivopts_data *data)
4104 gimple_stmt_iterator psi;
4106 struct loop *loop = data->current_loop;
4109 /* We use the following model (definitely improvable, especially the
4110 cost function -- TODO):
4112 We estimate the number of registers available (using MD data), name it A.
4114 We estimate the number of registers used by the loop, name it U. This
4115 number is obtained as the number of loop phi nodes (not counting virtual
4116 registers and bivs) + the number of variables from outside of the loop.
4118 We set a reserve R (free regs that are used for temporary computations,
4119 etc.). For now the reserve is a constant 3.
4121 Let I be the number of induction variables.
4123 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4124 make a lot of ivs without a reason).
4125 -- if A - R < U + I <= A, the cost is I * PRES_COST
4126 -- if U + I > A, the cost is I * PRES_COST and
4127 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4129 if (dump_file && (dump_flags & TDF_DETAILS))
4131 fprintf (dump_file, "Global costs:\n");
4132 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4133 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4134 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4138 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4140 phi = gsi_stmt (psi);
4141 op = PHI_RESULT (phi);
4143 if (!is_gimple_reg (op))
4146 if (get_iv (data, op))
4152 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4154 struct version_info *info = ver_info (data, j);
4156 if (info->inv_id && info->has_nonlin_use)
4160 data->regs_used = n;
4161 if (dump_file && (dump_flags & TDF_DETAILS))
4162 fprintf (dump_file, " regs_used %d\n", n);
4164 if (dump_file && (dump_flags & TDF_DETAILS))
4166 fprintf (dump_file, " cost for size:\n");
4167 fprintf (dump_file, " ivs\tcost\n");
4168 for (j = 0; j <= 2 * target_avail_regs; j++)
4169 fprintf (dump_file, " %d\t%d\n", j,
4170 ivopts_global_cost_for_size (data, j));
4171 fprintf (dump_file, "\n");
4175 /* Returns true if A is a cheaper cost pair than B. */
4178 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4188 cmp = compare_costs (a->cost, b->cost);
4195 /* In case the costs are the same, prefer the cheaper candidate. */
4196 if (a->cand->cost < b->cand->cost)
4202 /* Computes the cost field of IVS structure. */
4205 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4207 comp_cost cost = ivs->cand_use_cost;
4208 cost.cost += ivs->cand_cost;
4209 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4214 /* Remove invariants in set INVS to set IVS. */
4217 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4225 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4227 ivs->n_invariant_uses[iid]--;
4228 if (ivs->n_invariant_uses[iid] == 0)
4233 /* Set USE not to be expressed by any candidate in IVS. */
4236 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4239 unsigned uid = use->id, cid;
4240 struct cost_pair *cp;
4242 cp = ivs->cand_for_use[uid];
4248 ivs->cand_for_use[uid] = NULL;
4249 ivs->n_cand_uses[cid]--;
4251 if (ivs->n_cand_uses[cid] == 0)
4253 bitmap_clear_bit (ivs->cands, cid);
4254 /* Do not count the pseudocandidates. */
4258 ivs->cand_cost -= cp->cand->cost;
4260 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4263 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4265 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4266 iv_ca_recount_cost (data, ivs);
4269 /* Add invariants in set INVS to set IVS. */
4272 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4280 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4282 ivs->n_invariant_uses[iid]++;
4283 if (ivs->n_invariant_uses[iid] == 1)
4288 /* Set cost pair for USE in set IVS to CP. */
4291 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4292 struct iv_use *use, struct cost_pair *cp)
4294 unsigned uid = use->id, cid;
4296 if (ivs->cand_for_use[uid] == cp)
4299 if (ivs->cand_for_use[uid])
4300 iv_ca_set_no_cp (data, ivs, use);
4307 ivs->cand_for_use[uid] = cp;
4308 ivs->n_cand_uses[cid]++;
4309 if (ivs->n_cand_uses[cid] == 1)
4311 bitmap_set_bit (ivs->cands, cid);
4312 /* Do not count the pseudocandidates. */
4316 ivs->cand_cost += cp->cand->cost;
4318 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4321 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4322 iv_ca_set_add_invariants (ivs, cp->depends_on);
4323 iv_ca_recount_cost (data, ivs);
4327 /* Extend set IVS by expressing USE by some of the candidates in it
4331 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4334 struct cost_pair *best_cp = NULL, *cp;
4338 gcc_assert (ivs->upto >= use->id);
4340 if (ivs->upto == use->id)
4346 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4348 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4350 if (cheaper_cost_pair (cp, best_cp))
4354 iv_ca_set_cp (data, ivs, use, best_cp);
4357 /* Get cost for assignment IVS. */
4360 iv_ca_cost (struct iv_ca *ivs)
4362 /* This was a conditional expression but it triggered a bug in
4365 return infinite_cost;
4370 /* Returns true if all dependences of CP are among invariants in IVS. */
4373 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4378 if (!cp->depends_on)
4381 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4383 if (ivs->n_invariant_uses[i] == 0)
4390 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4391 it before NEXT_CHANGE. */
4393 static struct iv_ca_delta *
4394 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4395 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4397 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4400 change->old_cp = old_cp;
4401 change->new_cp = new_cp;
4402 change->next_change = next_change;
4407 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4410 static struct iv_ca_delta *
4411 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4413 struct iv_ca_delta *last;
4421 for (last = l1; last->next_change; last = last->next_change)
4423 last->next_change = l2;
4428 /* Returns candidate by that USE is expressed in IVS. */
4430 static struct cost_pair *
4431 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4433 return ivs->cand_for_use[use->id];
4436 /* Reverse the list of changes DELTA, forming the inverse to it. */
4438 static struct iv_ca_delta *
4439 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4441 struct iv_ca_delta *act, *next, *prev = NULL;
4442 struct cost_pair *tmp;
4444 for (act = delta; act; act = next)
4446 next = act->next_change;
4447 act->next_change = prev;
4451 act->old_cp = act->new_cp;
4458 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4459 reverted instead. */
4462 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4463 struct iv_ca_delta *delta, bool forward)
4465 struct cost_pair *from, *to;
4466 struct iv_ca_delta *act;
4469 delta = iv_ca_delta_reverse (delta);
4471 for (act = delta; act; act = act->next_change)
4475 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4476 iv_ca_set_cp (data, ivs, act->use, to);
4480 iv_ca_delta_reverse (delta);
4483 /* Returns true if CAND is used in IVS. */
4486 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4488 return ivs->n_cand_uses[cand->id] > 0;
4491 /* Returns number of induction variable candidates in the set IVS. */
4494 iv_ca_n_cands (struct iv_ca *ivs)
4496 return ivs->n_cands;
4499 /* Free the list of changes DELTA. */
4502 iv_ca_delta_free (struct iv_ca_delta **delta)
4504 struct iv_ca_delta *act, *next;
4506 for (act = *delta; act; act = next)
4508 next = act->next_change;
4515 /* Allocates new iv candidates assignment. */
4517 static struct iv_ca *
4518 iv_ca_new (struct ivopts_data *data)
4520 struct iv_ca *nw = XNEW (struct iv_ca);
4524 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4525 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4526 nw->cands = BITMAP_ALLOC (NULL);
4529 nw->cand_use_cost = zero_cost;
4531 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4532 nw->cost = zero_cost;
4537 /* Free memory occupied by the set IVS. */
4540 iv_ca_free (struct iv_ca **ivs)
4542 free ((*ivs)->cand_for_use);
4543 free ((*ivs)->n_cand_uses);
4544 BITMAP_FREE ((*ivs)->cands);
4545 free ((*ivs)->n_invariant_uses);
4550 /* Dumps IVS to FILE. */
4553 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4555 const char *pref = " invariants ";
4557 comp_cost cost = iv_ca_cost (ivs);
4559 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4560 bitmap_print (file, ivs->cands, " candidates ","\n");
4562 for (i = 1; i <= data->max_inv_id; i++)
4563 if (ivs->n_invariant_uses[i])
4565 fprintf (file, "%s%d", pref, i);
4568 fprintf (file, "\n");
4571 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4572 new set, and store differences in DELTA. Number of induction variables
4573 in the new set is stored to N_IVS. */
4576 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4577 struct iv_cand *cand, struct iv_ca_delta **delta,
4583 struct cost_pair *old_cp, *new_cp;
4586 for (i = 0; i < ivs->upto; i++)
4588 use = iv_use (data, i);
4589 old_cp = iv_ca_cand_for_use (ivs, use);
4592 && old_cp->cand == cand)
4595 new_cp = get_use_iv_cost (data, use, cand);
4599 if (!iv_ca_has_deps (ivs, new_cp))
4602 if (!cheaper_cost_pair (new_cp, old_cp))
4605 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4608 iv_ca_delta_commit (data, ivs, *delta, true);
4609 cost = iv_ca_cost (ivs);
4611 *n_ivs = iv_ca_n_cands (ivs);
4612 iv_ca_delta_commit (data, ivs, *delta, false);
4617 /* Try narrowing set IVS by removing CAND. Return the cost of
4618 the new set and store the differences in DELTA. */
4621 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4622 struct iv_cand *cand, struct iv_ca_delta **delta)
4626 struct cost_pair *old_cp, *new_cp, *cp;
4628 struct iv_cand *cnd;
4632 for (i = 0; i < n_iv_uses (data); i++)
4634 use = iv_use (data, i);
4636 old_cp = iv_ca_cand_for_use (ivs, use);
4637 if (old_cp->cand != cand)
4642 if (data->consider_all_candidates)
4644 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4649 cnd = iv_cand (data, ci);
4651 cp = get_use_iv_cost (data, use, cnd);
4654 if (!iv_ca_has_deps (ivs, cp))
4657 if (!cheaper_cost_pair (cp, new_cp))
4665 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4670 cnd = iv_cand (data, ci);
4672 cp = get_use_iv_cost (data, use, cnd);
4675 if (!iv_ca_has_deps (ivs, cp))
4678 if (!cheaper_cost_pair (cp, new_cp))
4687 iv_ca_delta_free (delta);
4688 return infinite_cost;
4691 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4694 iv_ca_delta_commit (data, ivs, *delta, true);
4695 cost = iv_ca_cost (ivs);
4696 iv_ca_delta_commit (data, ivs, *delta, false);
4701 /* Try optimizing the set of candidates IVS by removing candidates different
4702 from to EXCEPT_CAND from it. Return cost of the new set, and store
4703 differences in DELTA. */
4706 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4707 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4710 struct iv_ca_delta *act_delta, *best_delta;
4712 comp_cost best_cost, acost;
4713 struct iv_cand *cand;
4716 best_cost = iv_ca_cost (ivs);
4718 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4720 cand = iv_cand (data, i);
4722 if (cand == except_cand)
4725 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4727 if (compare_costs (acost, best_cost) < 0)
4730 iv_ca_delta_free (&best_delta);
4731 best_delta = act_delta;
4734 iv_ca_delta_free (&act_delta);
4743 /* Recurse to possibly remove other unnecessary ivs. */
4744 iv_ca_delta_commit (data, ivs, best_delta, true);
4745 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4746 iv_ca_delta_commit (data, ivs, best_delta, false);
4747 *delta = iv_ca_delta_join (best_delta, *delta);
4751 /* Tries to extend the sets IVS in the best possible way in order
4752 to express the USE. */
4755 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4758 comp_cost best_cost, act_cost;
4761 struct iv_cand *cand;
4762 struct iv_ca_delta *best_delta = NULL, *act_delta;
4763 struct cost_pair *cp;
4765 iv_ca_add_use (data, ivs, use);
4766 best_cost = iv_ca_cost (ivs);
4768 cp = iv_ca_cand_for_use (ivs, use);
4771 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4772 iv_ca_set_no_cp (data, ivs, use);
4775 /* First try important candidates not based on any memory object. Only if
4776 this fails, try the specific ones. Rationale -- in loops with many
4777 variables the best choice often is to use just one generic biv. If we
4778 added here many ivs specific to the uses, the optimization algorithm later
4779 would be likely to get stuck in a local minimum, thus causing us to create
4780 too many ivs. The approach from few ivs to more seems more likely to be
4781 successful -- starting from few ivs, replacing an expensive use by a
4782 specific iv should always be a win. */
4783 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4785 cand = iv_cand (data, i);
4787 if (cand->iv->base_object != NULL_TREE)
4790 if (iv_ca_cand_used_p (ivs, cand))
4793 cp = get_use_iv_cost (data, use, cand);
4797 iv_ca_set_cp (data, ivs, use, cp);
4798 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4799 iv_ca_set_no_cp (data, ivs, use);
4800 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4802 if (compare_costs (act_cost, best_cost) < 0)
4804 best_cost = act_cost;
4806 iv_ca_delta_free (&best_delta);
4807 best_delta = act_delta;
4810 iv_ca_delta_free (&act_delta);
4813 if (infinite_cost_p (best_cost))
4815 for (i = 0; i < use->n_map_members; i++)
4817 cp = use->cost_map + i;
4822 /* Already tried this. */
4823 if (cand->important && cand->iv->base_object == NULL_TREE)
4826 if (iv_ca_cand_used_p (ivs, cand))
4830 iv_ca_set_cp (data, ivs, use, cp);
4831 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4832 iv_ca_set_no_cp (data, ivs, use);
4833 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4836 if (compare_costs (act_cost, best_cost) < 0)
4838 best_cost = act_cost;
4841 iv_ca_delta_free (&best_delta);
4842 best_delta = act_delta;
4845 iv_ca_delta_free (&act_delta);
4849 iv_ca_delta_commit (data, ivs, best_delta, true);
4850 iv_ca_delta_free (&best_delta);
4852 return !infinite_cost_p (best_cost);
4855 /* Finds an initial assignment of candidates to uses. */
4857 static struct iv_ca *
4858 get_initial_solution (struct ivopts_data *data)
4860 struct iv_ca *ivs = iv_ca_new (data);
4863 for (i = 0; i < n_iv_uses (data); i++)
4864 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4873 /* Tries to improve set of induction variables IVS. */
4876 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4879 comp_cost acost, best_cost = iv_ca_cost (ivs);
4880 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4881 struct iv_cand *cand;
4883 /* Try extending the set of induction variables by one. */
4884 for (i = 0; i < n_iv_cands (data); i++)
4886 cand = iv_cand (data, i);
4888 if (iv_ca_cand_used_p (ivs, cand))
4891 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4895 /* If we successfully added the candidate and the set is small enough,
4896 try optimizing it by removing other candidates. */
4897 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4899 iv_ca_delta_commit (data, ivs, act_delta, true);
4900 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4901 iv_ca_delta_commit (data, ivs, act_delta, false);
4902 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4905 if (compare_costs (acost, best_cost) < 0)
4908 iv_ca_delta_free (&best_delta);
4909 best_delta = act_delta;
4912 iv_ca_delta_free (&act_delta);
4917 /* Try removing the candidates from the set instead. */
4918 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4920 /* Nothing more we can do. */
4925 iv_ca_delta_commit (data, ivs, best_delta, true);
4926 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4927 iv_ca_delta_free (&best_delta);
4931 /* Attempts to find the optimal set of induction variables. We do simple
4932 greedy heuristic -- we try to replace at most one candidate in the selected
4933 solution and remove the unused ivs while this improves the cost. */
4935 static struct iv_ca *
4936 find_optimal_iv_set (struct ivopts_data *data)
4942 /* Get the initial solution. */
4943 set = get_initial_solution (data);
4946 if (dump_file && (dump_flags & TDF_DETAILS))
4947 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4951 if (dump_file && (dump_flags & TDF_DETAILS))
4953 fprintf (dump_file, "Initial set of candidates:\n");
4954 iv_ca_dump (data, dump_file, set);
4957 while (try_improve_iv_set (data, set))
4959 if (dump_file && (dump_flags & TDF_DETAILS))
4961 fprintf (dump_file, "Improved to:\n");
4962 iv_ca_dump (data, dump_file, set);
4966 if (dump_file && (dump_flags & TDF_DETAILS))
4968 comp_cost cost = iv_ca_cost (set);
4969 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4972 for (i = 0; i < n_iv_uses (data); i++)
4974 use = iv_use (data, i);
4975 use->selected = iv_ca_cand_for_use (set, use)->cand;
4981 /* Creates a new induction variable corresponding to CAND. */
4984 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4986 gimple_stmt_iterator incr_pos;
4996 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5000 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5005 /* Mark that the iv is preserved. */
5006 name_info (data, cand->var_before)->preserve_biv = true;
5007 name_info (data, cand->var_after)->preserve_biv = true;
5009 /* Rewrite the increment so that it uses var_before directly. */
5010 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5015 gimple_add_tmp_var (cand->var_before);
5016 add_referenced_var (cand->var_before);
5018 base = unshare_expr (cand->iv->base);
5020 create_iv (base, unshare_expr (cand->iv->step),
5021 cand->var_before, data->current_loop,
5022 &incr_pos, after, &cand->var_before, &cand->var_after);
5025 /* Creates new induction variables described in SET. */
5028 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5031 struct iv_cand *cand;
5034 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5036 cand = iv_cand (data, i);
5037 create_new_iv (data, cand);
5041 /* Returns the phi-node in BB with result RESULT. */
5044 get_phi_with_result (basic_block bb, tree result)
5046 gimple_stmt_iterator i = gsi_start_phis (bb);
5048 for (; !gsi_end_p (i); gsi_next (&i))
5049 if (gimple_phi_result (gsi_stmt (i)) == result)
5050 return gsi_stmt (i);
5057 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5058 is true, remove also the ssa name defined by the statement. */
5061 remove_statement (gimple stmt, bool including_defined_name)
5063 if (gimple_code (stmt) == GIMPLE_PHI)
5065 gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5066 gimple_phi_result (stmt));
5067 gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5068 remove_phi_node (&bsi, including_defined_name);
5072 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5073 gsi_remove (&bsi, true);
5074 release_defs (stmt);
5078 /* Rewrites USE (definition of iv used in a nonlinear expression)
5079 using candidate CAND. */
5082 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5083 struct iv_use *use, struct iv_cand *cand)
5088 gimple_stmt_iterator bsi;
5090 /* An important special case -- if we are asked to express value of
5091 the original iv by itself, just exit; there is no need to
5092 introduce a new computation (that might also need casting the
5093 variable to unsigned and back). */
5094 if (cand->pos == IP_ORIGINAL
5095 && cand->incremented_at == use->stmt)
5097 tree step, ctype, utype;
5098 enum tree_code incr_code = PLUS_EXPR, old_code;
5100 gcc_assert (is_gimple_assign (use->stmt));
5101 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5103 step = cand->iv->step;
5104 ctype = TREE_TYPE (step);
5105 utype = TREE_TYPE (cand->var_after);
5106 if (TREE_CODE (step) == NEGATE_EXPR)
5108 incr_code = MINUS_EXPR;
5109 step = TREE_OPERAND (step, 0);
5112 /* Check whether we may leave the computation unchanged.
5113 This is the case only if it does not rely on other
5114 computations in the loop -- otherwise, the computation
5115 we rely upon may be removed in remove_unused_ivs,
5116 thus leading to ICE. */
5117 old_code = gimple_assign_rhs_code (use->stmt);
5118 if (old_code == PLUS_EXPR
5119 || old_code == MINUS_EXPR
5120 || old_code == POINTER_PLUS_EXPR)
5122 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5123 op = gimple_assign_rhs2 (use->stmt);
5124 else if (old_code != MINUS_EXPR
5125 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5126 op = gimple_assign_rhs1 (use->stmt);
5134 && (TREE_CODE (op) == INTEGER_CST
5135 || operand_equal_p (op, step, 0)))
5138 /* Otherwise, add the necessary computations to express
5140 op = fold_convert (ctype, cand->var_before);
5141 comp = fold_convert (utype,
5142 build2 (incr_code, ctype, op,
5143 unshare_expr (step)));
5147 comp = get_computation (data->current_loop, use, cand);
5148 gcc_assert (comp != NULL_TREE);
5151 switch (gimple_code (use->stmt))
5154 tgt = PHI_RESULT (use->stmt);
5156 /* If we should keep the biv, do not replace it. */
5157 if (name_info (data, tgt)->preserve_biv)
5160 bsi = gsi_after_labels (gimple_bb (use->stmt));
5164 tgt = gimple_assign_lhs (use->stmt);
5165 bsi = gsi_for_stmt (use->stmt);
5172 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5173 true, GSI_SAME_STMT);
5175 if (gimple_code (use->stmt) == GIMPLE_PHI)
5177 ass = gimple_build_assign (tgt, op);
5178 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5179 remove_statement (use->stmt, false);
5183 gimple_assign_set_rhs_from_tree (&bsi, op);
5184 use->stmt = gsi_stmt (bsi);
5188 /* Replaces ssa name in index IDX by its basic variable. Callback for
5192 idx_remove_ssa_names (tree base, tree *idx,
5193 void *data ATTRIBUTE_UNUSED)
5197 if (TREE_CODE (*idx) == SSA_NAME)
5198 *idx = SSA_NAME_VAR (*idx);
5200 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5202 op = &TREE_OPERAND (base, 2);
5204 && TREE_CODE (*op) == SSA_NAME)
5205 *op = SSA_NAME_VAR (*op);
5206 op = &TREE_OPERAND (base, 3);
5208 && TREE_CODE (*op) == SSA_NAME)
5209 *op = SSA_NAME_VAR (*op);
5215 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5218 unshare_and_remove_ssa_names (tree ref)
5220 ref = unshare_expr (ref);
5221 for_each_index (&ref, idx_remove_ssa_names, NULL);
5226 /* Extract the alias analysis info for the memory reference REF. There are
5227 several ways how this information may be stored and what precisely is
5228 its semantics depending on the type of the reference, but there always is
5229 somewhere hidden one _DECL node that is used to determine the set of
5230 virtual operands for the reference. The code below deciphers this jungle
5231 and extracts this single useful piece of information. */
5234 get_ref_tag (tree ref, tree orig)
5236 tree var = get_base_address (ref);
5237 tree aref = NULL_TREE, tag, sv;
5238 HOST_WIDE_INT offset, size, maxsize;
5240 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5242 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5250 if (TREE_CODE (var) == INDIRECT_REF)
5252 /* If the base is a dereference of a pointer, first check its name memory
5253 tag. If it does not have one, use its symbol memory tag. */
5254 var = TREE_OPERAND (var, 0);
5255 if (TREE_CODE (var) != SSA_NAME)
5258 if (SSA_NAME_PTR_INFO (var))
5260 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5265 var = SSA_NAME_VAR (var);
5266 tag = symbol_mem_tag (var);
5267 gcc_assert (tag != NULL_TREE);
5275 tag = symbol_mem_tag (var);
5283 /* Copies the reference information from OLD_REF to NEW_REF. */
5286 copy_ref_info (tree new_ref, tree old_ref)
5288 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5289 copy_mem_ref_info (new_ref, old_ref);
5292 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5293 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5297 /* Rewrites USE (address that is an iv) using candidate CAND. */
5300 rewrite_use_address (struct ivopts_data *data,
5301 struct iv_use *use, struct iv_cand *cand)
5304 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5308 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5310 unshare_aff_combination (&aff);
5312 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5313 copy_ref_info (ref, *use->op_p);
5317 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5321 rewrite_use_compare (struct ivopts_data *data,
5322 struct iv_use *use, struct iv_cand *cand)
5324 tree comp, *var_p, op, bound;
5325 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5326 enum tree_code compare;
5327 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5333 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5334 tree var_type = TREE_TYPE (var);
5337 compare = iv_elimination_compare (data, use);
5338 bound = unshare_expr (fold_convert (var_type, bound));
5339 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5341 gsi_insert_seq_on_edge_immediate (
5342 loop_preheader_edge (data->current_loop),
5345 gimple_cond_set_lhs (use->stmt, var);
5346 gimple_cond_set_code (use->stmt, compare);
5347 gimple_cond_set_rhs (use->stmt, op);
5351 /* The induction variable elimination failed; just express the original
5353 comp = get_computation (data->current_loop, use, cand);
5354 gcc_assert (comp != NULL_TREE);
5356 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5359 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5360 true, GSI_SAME_STMT);
5363 /* Rewrites USE using candidate CAND. */
5366 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5368 push_stmt_changes (&use->stmt);
5372 case USE_NONLINEAR_EXPR:
5373 rewrite_use_nonlinear_expr (data, use, cand);
5377 rewrite_use_address (data, use, cand);
5381 rewrite_use_compare (data, use, cand);
5388 pop_stmt_changes (&use->stmt);
5391 /* Rewrite the uses using the selected induction variables. */
5394 rewrite_uses (struct ivopts_data *data)
5397 struct iv_cand *cand;
5400 for (i = 0; i < n_iv_uses (data); i++)
5402 use = iv_use (data, i);
5403 cand = use->selected;
5406 rewrite_use (data, use, cand);
5410 /* Removes the ivs that are not used after rewriting. */
5413 remove_unused_ivs (struct ivopts_data *data)
5418 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5420 struct version_info *info;
5422 info = ver_info (data, j);
5424 && !integer_zerop (info->iv->step)
5426 && !info->iv->have_use_for
5427 && !info->preserve_biv)
5428 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5432 /* Frees data allocated by the optimization of a single loop. */
5435 free_loop_data (struct ivopts_data *data)
5443 pointer_map_destroy (data->niters);
5444 data->niters = NULL;
5447 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5449 struct version_info *info;
5451 info = ver_info (data, i);
5455 info->has_nonlin_use = false;
5456 info->preserve_biv = false;
5459 bitmap_clear (data->relevant);
5460 bitmap_clear (data->important_candidates);
5462 for (i = 0; i < n_iv_uses (data); i++)
5464 struct iv_use *use = iv_use (data, i);
5467 BITMAP_FREE (use->related_cands);
5468 for (j = 0; j < use->n_map_members; j++)
5469 if (use->cost_map[j].depends_on)
5470 BITMAP_FREE (use->cost_map[j].depends_on);
5471 free (use->cost_map);
5474 VEC_truncate (iv_use_p, data->iv_uses, 0);
5476 for (i = 0; i < n_iv_cands (data); i++)
5478 struct iv_cand *cand = iv_cand (data, i);
5482 if (cand->depends_on)
5483 BITMAP_FREE (cand->depends_on);
5486 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5488 if (data->version_info_size < num_ssa_names)
5490 data->version_info_size = 2 * num_ssa_names;
5491 free (data->version_info);
5492 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5495 data->max_inv_id = 0;
5497 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5498 SET_DECL_RTL (obj, NULL_RTX);
5500 VEC_truncate (tree, decl_rtl_to_reset, 0);
5503 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5507 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5509 free_loop_data (data);
5510 free (data->version_info);
5511 BITMAP_FREE (data->relevant);
5512 BITMAP_FREE (data->important_candidates);
5514 VEC_free (tree, heap, decl_rtl_to_reset);
5515 VEC_free (iv_use_p, heap, data->iv_uses);
5516 VEC_free (iv_cand_p, heap, data->iv_candidates);
5519 /* Optimizes the LOOP. Returns true if anything changed. */
5522 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5524 bool changed = false;
5525 struct iv_ca *iv_ca;
5528 gcc_assert (!data->niters);
5529 data->current_loop = loop;
5530 data->speed = optimize_loop_for_speed_p (loop);
5532 if (dump_file && (dump_flags & TDF_DETAILS))
5534 fprintf (dump_file, "Processing loop %d\n", loop->num);
5536 exit = single_dom_exit (loop);
5539 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5540 exit->src->index, exit->dest->index);
5541 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5542 fprintf (dump_file, "\n");
5545 fprintf (dump_file, "\n");
5548 /* For each ssa name determines whether it behaves as an induction variable
5550 if (!find_induction_variables (data))
5553 /* Finds interesting uses (item 1). */
5554 find_interesting_uses (data);
5555 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5558 /* Finds candidates for the induction variables (item 2). */
5559 find_iv_candidates (data);
5561 /* Calculates the costs (item 3, part 1). */
5562 determine_use_iv_costs (data);
5563 determine_iv_costs (data);
5564 determine_set_costs (data);
5566 /* Find the optimal set of induction variables (item 3, part 2). */
5567 iv_ca = find_optimal_iv_set (data);
5572 /* Create the new induction variables (item 4, part 1). */
5573 create_new_ivs (data, iv_ca);
5574 iv_ca_free (&iv_ca);
5576 /* Rewrite the uses (item 4, part 2). */
5577 rewrite_uses (data);
5579 /* Remove the ivs that are unused after rewriting. */
5580 remove_unused_ivs (data);
5582 /* We have changed the structure of induction variables; it might happen
5583 that definitions in the scev database refer to some of them that were
5588 free_loop_data (data);
5593 /* Main entry point. Optimizes induction variables in loops. */
5596 tree_ssa_iv_optimize (void)
5599 struct ivopts_data data;
5602 tree_ssa_iv_optimize_init (&data);
5604 /* Optimize the loops starting with the innermost ones. */
5605 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5607 if (dump_file && (dump_flags & TDF_DETAILS))
5608 flow_loop_dump (loop, dump_file, NULL, 1);
5610 tree_ssa_iv_optimize_loop (&data, loop);
5613 tree_ssa_iv_optimize_finalize (&data);