1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
86 #include "pointer-set.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* The candidate - cost pair. */
138 struct iv_cand *cand; /* The candidate. */
139 unsigned cost; /* The cost. */
140 bitmap depends_on; /* The list of invariants that have to be
142 tree value; /* For final value elimination, the expression for
143 the final value of the iv. For iv elimination,
144 the new bound to compare with. */
150 unsigned id; /* The id of the use. */
151 enum use_type type; /* Type of the use. */
152 struct iv *iv; /* The induction variable it is based on. */
153 tree stmt; /* Statement in that it occurs. */
154 tree *op_p; /* The place where it occurs. */
155 bitmap related_cands; /* The set of "related" iv candidates, plus the common
158 unsigned n_map_members; /* Number of candidates in the cost_map list. */
159 struct cost_pair *cost_map;
160 /* The costs wrto the iv candidates. */
162 struct iv_cand *selected;
163 /* The selected candidate. */
166 /* The position where the iv is computed. */
169 IP_NORMAL, /* At the end, just before the exit condition. */
170 IP_END, /* At the end of the latch block. */
171 IP_ORIGINAL /* The original biv. */
174 /* The induction variable candidate. */
177 unsigned id; /* The number of the candidate. */
178 bool important; /* Whether this is an "important" candidate, i.e. such
179 that it should be considered by all uses. */
180 enum iv_position pos; /* Where it is computed. */
181 tree incremented_at; /* For original biv, the statement where it is
183 tree var_before; /* The variable used for it before increment. */
184 tree var_after; /* The variable used for it after increment. */
185 struct iv *iv; /* The value of the candidate. NULL for
186 "pseudocandidate" used to indicate the possibility
187 to replace the final value of an iv by direct
188 computation of the value. */
189 unsigned cost; /* Cost of the candidate. */
190 bitmap depends_on; /* The list of invariants that are used in step of the
194 /* The data used by the induction variable optimizations. */
196 typedef struct iv_use *iv_use_p;
198 DEF_VEC_ALLOC_P(iv_use_p,heap);
200 typedef struct iv_cand *iv_cand_p;
201 DEF_VEC_P(iv_cand_p);
202 DEF_VEC_ALLOC_P(iv_cand_p,heap);
206 /* The currently optimized loop. */
207 struct loop *current_loop;
209 /* Number of registers used in it. */
212 /* Numbers of iterations for all exits of the current loop. */
213 struct pointer_map_t *niters;
215 /* The size of version_info array allocated. */
216 unsigned version_info_size;
218 /* The array of information for the ssa names. */
219 struct version_info *version_info;
221 /* The bitmap of indices in version_info whose value was changed. */
224 /* The maximum invariant id. */
227 /* The uses of induction variables. */
228 VEC(iv_use_p,heap) *iv_uses;
230 /* The candidates. */
231 VEC(iv_cand_p,heap) *iv_candidates;
233 /* A bitmap of important candidates. */
234 bitmap important_candidates;
236 /* Whether to consider just related and important candidates when replacing a
238 bool consider_all_candidates;
241 /* An assignment of iv candidates to uses. */
245 /* The number of uses covered by the assignment. */
248 /* Number of uses that cannot be expressed by the candidates in the set. */
251 /* Candidate assigned to a use, together with the related costs. */
252 struct cost_pair **cand_for_use;
254 /* Number of times each candidate is used. */
255 unsigned *n_cand_uses;
257 /* The candidates used. */
260 /* The number of candidates in the set. */
263 /* Total number of registers needed. */
266 /* Total cost of expressing uses. */
267 unsigned cand_use_cost;
269 /* Total cost of candidates. */
272 /* Number of times each invariant is used. */
273 unsigned *n_invariant_uses;
275 /* Total cost of the assignment. */
279 /* Difference of two iv candidate assignments. */
286 /* An old assignment (for rollback purposes). */
287 struct cost_pair *old_cp;
289 /* A new assignment. */
290 struct cost_pair *new_cp;
292 /* Next change in the list. */
293 struct iv_ca_delta *next_change;
296 /* Bound on number of candidates below that all candidates are considered. */
298 #define CONSIDER_ALL_CANDIDATES_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301 /* If there are more iv occurrences, we just give up (it is quite unlikely that
302 optimizing such a loop would help, and it would take ages). */
304 #define MAX_CONSIDERED_USES \
305 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307 /* If there are at most this number of ivs in the set, try removing unnecessary
308 ivs from the set always. */
310 #define ALWAYS_PRUNE_CAND_SET_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313 /* The list of trees for that the decl_rtl field must be reset is stored
316 static VEC(tree,heap) *decl_rtl_to_reset;
318 /* Number of uses recorded in DATA. */
320 static inline unsigned
321 n_iv_uses (struct ivopts_data *data)
323 return VEC_length (iv_use_p, data->iv_uses);
326 /* Ith use recorded in DATA. */
328 static inline struct iv_use *
329 iv_use (struct ivopts_data *data, unsigned i)
331 return VEC_index (iv_use_p, data->iv_uses, i);
334 /* Number of candidates recorded in DATA. */
336 static inline unsigned
337 n_iv_cands (struct ivopts_data *data)
339 return VEC_length (iv_cand_p, data->iv_candidates);
342 /* Ith candidate recorded in DATA. */
344 static inline struct iv_cand *
345 iv_cand (struct ivopts_data *data, unsigned i)
347 return VEC_index (iv_cand_p, data->iv_candidates, i);
350 /* The single loop exit if it dominates the latch, NULL otherwise. */
353 single_dom_exit (struct loop *loop)
355 edge exit = single_exit (loop);
360 if (!just_once_each_iteration_p (loop, exit->src))
366 /* Dumps information about the induction variable IV to FILE. */
368 extern void dump_iv (FILE *, struct iv *);
370 dump_iv (FILE *file, struct iv *iv)
374 fprintf (file, "ssa name ");
375 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
376 fprintf (file, "\n");
379 fprintf (file, " type ");
380 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
381 fprintf (file, "\n");
385 fprintf (file, " base ");
386 print_generic_expr (file, iv->base, TDF_SLIM);
387 fprintf (file, "\n");
389 fprintf (file, " step ");
390 print_generic_expr (file, iv->step, TDF_SLIM);
391 fprintf (file, "\n");
395 fprintf (file, " invariant ");
396 print_generic_expr (file, iv->base, TDF_SLIM);
397 fprintf (file, "\n");
402 fprintf (file, " base object ");
403 print_generic_expr (file, iv->base_object, TDF_SLIM);
404 fprintf (file, "\n");
408 fprintf (file, " is a biv\n");
411 /* Dumps information about the USE to FILE. */
413 extern void dump_use (FILE *, struct iv_use *);
415 dump_use (FILE *file, struct iv_use *use)
417 fprintf (file, "use %d\n", use->id);
421 case USE_NONLINEAR_EXPR:
422 fprintf (file, " generic\n");
426 fprintf (file, " address\n");
430 fprintf (file, " compare\n");
437 fprintf (file, " in statement ");
438 print_generic_expr (file, use->stmt, TDF_SLIM);
439 fprintf (file, "\n");
441 fprintf (file, " at position ");
443 print_generic_expr (file, *use->op_p, TDF_SLIM);
444 fprintf (file, "\n");
446 dump_iv (file, use->iv);
448 if (use->related_cands)
450 fprintf (file, " related candidates ");
451 dump_bitmap (file, use->related_cands);
455 /* Dumps information about the uses to FILE. */
457 extern void dump_uses (FILE *, struct ivopts_data *);
459 dump_uses (FILE *file, struct ivopts_data *data)
464 for (i = 0; i < n_iv_uses (data); i++)
466 use = iv_use (data, i);
468 dump_use (file, use);
469 fprintf (file, "\n");
473 /* Dumps information about induction variable candidate CAND to FILE. */
475 extern void dump_cand (FILE *, struct iv_cand *);
477 dump_cand (FILE *file, struct iv_cand *cand)
479 struct iv *iv = cand->iv;
481 fprintf (file, "candidate %d%s\n",
482 cand->id, cand->important ? " (important)" : "");
484 if (cand->depends_on)
486 fprintf (file, " depends on ");
487 dump_bitmap (file, cand->depends_on);
492 fprintf (file, " final value replacement\n");
499 fprintf (file, " incremented before exit test\n");
503 fprintf (file, " incremented at end\n");
507 fprintf (file, " original biv\n");
514 /* Returns the info for ssa version VER. */
516 static inline struct version_info *
517 ver_info (struct ivopts_data *data, unsigned ver)
519 return data->version_info + ver;
522 /* Returns the info for ssa name NAME. */
524 static inline struct version_info *
525 name_info (struct ivopts_data *data, tree name)
527 return ver_info (data, SSA_NAME_VERSION (name));
530 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
534 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
536 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
540 if (sbb == loop->latch)
546 return stmt == last_stmt (bb);
549 /* Returns true if STMT if after the place where the original induction
550 variable CAND is incremented. */
553 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
555 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
556 basic_block stmt_bb = bb_for_stmt (stmt);
557 block_stmt_iterator bsi;
559 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
562 if (stmt_bb != cand_bb)
565 /* Scan the block from the end, since the original ivs are usually
566 incremented at the end of the loop body. */
567 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
569 if (bsi_stmt (bsi) == cand->incremented_at)
571 if (bsi_stmt (bsi) == stmt)
576 /* Returns true if STMT if after the place where the induction variable
577 CAND is incremented in LOOP. */
580 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
588 return stmt_after_ip_normal_pos (loop, stmt);
591 return stmt_after_ip_original_pos (cand, stmt);
598 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
601 abnormal_ssa_name_p (tree exp)
606 if (TREE_CODE (exp) != SSA_NAME)
609 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
612 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
613 abnormal phi node. Callback for for_each_index. */
616 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
617 void *data ATTRIBUTE_UNUSED)
619 if (TREE_CODE (base) == ARRAY_REF)
621 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
623 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
627 return !abnormal_ssa_name_p (*index);
630 /* Returns true if EXPR contains a ssa name that occurs in an
631 abnormal phi node. */
634 contains_abnormal_ssa_name_p (tree expr)
637 enum tree_code_class class;
642 code = TREE_CODE (expr);
643 class = TREE_CODE_CLASS (code);
645 if (code == SSA_NAME)
646 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
648 if (code == INTEGER_CST
649 || is_gimple_min_invariant (expr))
652 if (code == ADDR_EXPR)
653 return !for_each_index (&TREE_OPERAND (expr, 0),
654 idx_contains_abnormal_ssa_name_p,
661 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
666 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
678 /* Returns tree describing number of iterations determined from
679 EXIT of DATA->current_loop, or NULL if something goes wrong. */
682 niter_for_exit (struct ivopts_data *data, edge exit)
684 struct tree_niter_desc desc;
690 data->niters = pointer_map_create ();
694 slot = pointer_map_contains (data->niters, exit);
698 /* Try to determine number of iterations. We must know it
699 unconditionally (i.e., without possibility of # of iterations
700 being zero). Also, we cannot safely work with ssa names that
701 appear in phi nodes on abnormal edges, so that we do not create
702 overlapping life ranges for them (PR 27283). */
703 if (number_of_iterations_exit (data->current_loop,
705 && integer_zerop (desc.may_be_zero)
706 && !contains_abnormal_ssa_name_p (desc.niter))
711 *pointer_map_insert (data->niters, exit) = niter;
719 /* Returns tree describing number of iterations determined from
720 single dominating exit of DATA->current_loop, or NULL if something
724 niter_for_single_dom_exit (struct ivopts_data *data)
726 edge exit = single_dom_exit (data->current_loop);
731 return niter_for_exit (data, exit);
734 /* Initializes data structures used by the iv optimization pass, stored
738 tree_ssa_iv_optimize_init (struct ivopts_data *data)
740 data->version_info_size = 2 * num_ssa_names;
741 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
742 data->relevant = BITMAP_ALLOC (NULL);
743 data->important_candidates = BITMAP_ALLOC (NULL);
744 data->max_inv_id = 0;
746 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
747 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
748 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
751 /* Returns a memory object to that EXPR points. In case we are able to
752 determine that it does not point to any such object, NULL is returned. */
755 determine_base_object (tree expr)
757 enum tree_code code = TREE_CODE (expr);
758 tree base, obj, op0, op1;
760 /* If this is a pointer casted to any type, we need to determine
761 the base object for the pointer; so handle conversions before
762 throwing away non-pointer expressions. */
763 if (TREE_CODE (expr) == NOP_EXPR
764 || TREE_CODE (expr) == CONVERT_EXPR)
765 return determine_base_object (TREE_OPERAND (expr, 0));
767 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
776 obj = TREE_OPERAND (expr, 0);
777 base = get_base_address (obj);
782 if (TREE_CODE (base) == INDIRECT_REF)
783 return determine_base_object (TREE_OPERAND (base, 0));
785 return fold_convert (ptr_type_node,
786 build_fold_addr_expr (base));
790 op0 = determine_base_object (TREE_OPERAND (expr, 0));
791 op1 = determine_base_object (TREE_OPERAND (expr, 1));
797 return (code == PLUS_EXPR
799 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
801 return fold_build2 (code, ptr_type_node, op0, op1);
804 return fold_convert (ptr_type_node, expr);
808 /* Allocates an induction variable with given initial value BASE and step STEP
812 alloc_iv (tree base, tree step)
814 struct iv *iv = XCNEW (struct iv);
815 gcc_assert (step != NULL_TREE);
818 iv->base_object = determine_base_object (base);
821 iv->have_use_for = false;
823 iv->ssa_name = NULL_TREE;
828 /* Sets STEP and BASE for induction variable IV. */
831 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
833 struct version_info *info = name_info (data, iv);
835 gcc_assert (!info->iv);
837 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
838 info->iv = alloc_iv (base, step);
839 info->iv->ssa_name = iv;
842 /* Finds induction variable declaration for VAR. */
845 get_iv (struct ivopts_data *data, tree var)
848 tree type = TREE_TYPE (var);
850 if (!POINTER_TYPE_P (type)
851 && !INTEGRAL_TYPE_P (type))
854 if (!name_info (data, var)->iv)
856 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
859 || !flow_bb_inside_loop_p (data->current_loop, bb))
860 set_iv (data, var, var, build_int_cst (type, 0));
863 return name_info (data, var)->iv;
866 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
867 not define a simple affine biv with nonzero step. */
870 determine_biv_step (tree phi)
872 struct loop *loop = bb_for_stmt (phi)->loop_father;
873 tree name = PHI_RESULT (phi);
876 if (!is_gimple_reg (name))
879 if (!simple_iv (loop, phi, name, &iv, true))
882 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
885 /* Finds basic ivs. */
888 find_bivs (struct ivopts_data *data)
890 tree phi, step, type, base;
892 struct loop *loop = data->current_loop;
894 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
896 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
899 step = determine_biv_step (phi);
903 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
904 base = expand_simple_operations (base);
905 if (contains_abnormal_ssa_name_p (base)
906 || contains_abnormal_ssa_name_p (step))
909 type = TREE_TYPE (PHI_RESULT (phi));
910 base = fold_convert (type, base);
912 step = fold_convert (type, step);
914 set_iv (data, PHI_RESULT (phi), base, step);
921 /* Marks basic ivs. */
924 mark_bivs (struct ivopts_data *data)
927 struct iv *iv, *incr_iv;
928 struct loop *loop = data->current_loop;
931 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
933 iv = get_iv (data, PHI_RESULT (phi));
937 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
938 incr_iv = get_iv (data, var);
942 /* If the increment is in the subloop, ignore it. */
943 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
944 if (incr_bb->loop_father != data->current_loop
945 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
949 incr_iv->biv_p = true;
953 /* Checks whether STMT defines a linear induction variable and stores its
957 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
960 struct loop *loop = data->current_loop;
962 iv->base = NULL_TREE;
963 iv->step = NULL_TREE;
965 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
968 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
969 if (TREE_CODE (lhs) != SSA_NAME)
972 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
974 iv->base = expand_simple_operations (iv->base);
976 if (contains_abnormal_ssa_name_p (iv->base)
977 || contains_abnormal_ssa_name_p (iv->step))
983 /* Finds general ivs in statement STMT. */
986 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
990 if (!find_givs_in_stmt_scev (data, stmt, &iv))
993 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
996 /* Finds general ivs in basic block BB. */
999 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1001 block_stmt_iterator bsi;
1003 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1004 find_givs_in_stmt (data, bsi_stmt (bsi));
1007 /* Finds general ivs. */
1010 find_givs (struct ivopts_data *data)
1012 struct loop *loop = data->current_loop;
1013 basic_block *body = get_loop_body_in_dom_order (loop);
1016 for (i = 0; i < loop->num_nodes; i++)
1017 find_givs_in_bb (data, body[i]);
1021 /* For each ssa name defined in LOOP determines whether it is an induction
1022 variable and if so, its initial value and step. */
1025 find_induction_variables (struct ivopts_data *data)
1030 if (!find_bivs (data))
1036 if (dump_file && (dump_flags & TDF_DETAILS))
1038 tree niter = niter_for_single_dom_exit (data);
1042 fprintf (dump_file, " number of iterations ");
1043 print_generic_expr (dump_file, niter, TDF_SLIM);
1044 fprintf (dump_file, "\n\n");
1047 fprintf (dump_file, "Induction variables:\n\n");
1049 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1051 if (ver_info (data, i)->iv)
1052 dump_iv (dump_file, ver_info (data, i)->iv);
1059 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1061 static struct iv_use *
1062 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1063 tree stmt, enum use_type use_type)
1065 struct iv_use *use = XCNEW (struct iv_use);
1067 use->id = n_iv_uses (data);
1068 use->type = use_type;
1072 use->related_cands = BITMAP_ALLOC (NULL);
1074 /* To avoid showing ssa name in the dumps, if it was not reset by the
1076 iv->ssa_name = NULL_TREE;
1078 if (dump_file && (dump_flags & TDF_DETAILS))
1079 dump_use (dump_file, use);
1081 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1086 /* Checks whether OP is a loop-level invariant and if so, records it.
1087 NONLINEAR_USE is true if the invariant is used in a way we do not
1088 handle specially. */
1091 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1094 struct version_info *info;
1096 if (TREE_CODE (op) != SSA_NAME
1097 || !is_gimple_reg (op))
1100 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1102 && flow_bb_inside_loop_p (data->current_loop, bb))
1105 info = name_info (data, op);
1107 info->has_nonlin_use |= nonlinear_use;
1109 info->inv_id = ++data->max_inv_id;
1110 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1113 /* Checks whether the use OP is interesting and if so, records it. */
1115 static struct iv_use *
1116 find_interesting_uses_op (struct ivopts_data *data, tree op)
1123 if (TREE_CODE (op) != SSA_NAME)
1126 iv = get_iv (data, op);
1130 if (iv->have_use_for)
1132 use = iv_use (data, iv->use_id);
1134 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1138 if (integer_zerop (iv->step))
1140 record_invariant (data, op, true);
1143 iv->have_use_for = true;
1145 civ = XNEW (struct iv);
1148 stmt = SSA_NAME_DEF_STMT (op);
1149 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1150 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1152 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1153 iv->use_id = use->id;
1158 /* Given a condition *COND_P, checks whether it is a compare of an induction
1159 variable and an invariant. If this is the case, CONTROL_VAR is set
1160 to location of the iv, BOUND to the location of the invariant,
1161 IV_VAR and IV_BOUND are set to the corresponding induction variable
1162 descriptions, and true is returned. If this is not the case,
1163 CONTROL_VAR and BOUND are set to the arguments of the condition and
1164 false is returned. */
1167 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1168 tree **control_var, tree **bound,
1169 struct iv **iv_var, struct iv **iv_bound)
1171 /* The nodes returned when COND has just one operand. Note that you should
1172 not modify anything in BOUND or IV_BOUND because of this. */
1173 static struct iv const_iv;
1175 tree cond = *cond_p;
1176 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1177 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1180 zero = integer_zero_node;
1181 const_iv.step = integer_zero_node;
1183 if (TREE_CODE (cond) == SSA_NAME)
1186 iv0 = get_iv (data, cond);
1187 ret = (iv0 && !integer_zerop (iv0->step));
1191 if (!COMPARISON_CLASS_P (cond))
1197 op0 = &TREE_OPERAND (cond, 0);
1198 op1 = &TREE_OPERAND (cond, 1);
1199 if (TREE_CODE (*op0) == SSA_NAME)
1200 iv0 = get_iv (data, *op0);
1201 if (TREE_CODE (*op1) == SSA_NAME)
1202 iv1 = get_iv (data, *op1);
1204 /* Exactly one of the compared values must be an iv, and the other one must
1209 if (integer_zerop (iv0->step))
1211 /* Control variable may be on the other side. */
1212 tmp_op = op0; op0 = op1; op1 = tmp_op;
1213 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1215 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1219 *control_var = op0;;
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231 and if so, records it. */
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1236 tree *var_p, *bound_p;
1237 struct iv *var_iv, *civ;
1239 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1241 find_interesting_uses_op (data, *var_p);
1242 find_interesting_uses_op (data, *bound_p);
1246 civ = XNEW (struct iv);
1248 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1251 /* Returns true if expression EXPR is obviously invariant in LOOP,
1252 i.e. if all its operands are defined outside of the LOOP. */
1255 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1260 if (is_gimple_min_invariant (expr))
1263 if (TREE_CODE (expr) == SSA_NAME)
1265 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1267 && flow_bb_inside_loop_p (loop, def_bb))
1273 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1276 len = TREE_OPERAND_LENGTH (expr);
1277 for (i = 0; i < len; i++)
1278 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1284 /* Cumulates the steps of indices into DATA and replaces their values with the
1285 initial ones. Returns false when the value of the index cannot be determined.
1286 Callback for for_each_index. */
1288 struct ifs_ivopts_data
1290 struct ivopts_data *ivopts_data;
1296 idx_find_step (tree base, tree *idx, void *data)
1298 struct ifs_ivopts_data *dta = data;
1300 tree step, iv_base, iv_step, lbound, off;
1301 struct loop *loop = dta->ivopts_data->current_loop;
1303 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1304 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1307 /* If base is a component ref, require that the offset of the reference
1309 if (TREE_CODE (base) == COMPONENT_REF)
1311 off = component_ref_field_offset (base);
1312 return expr_invariant_in_loop_p (loop, off);
1315 /* If base is array, first check whether we will be able to move the
1316 reference out of the loop (in order to take its address in strength
1317 reduction). In order for this to work we need both lower bound
1318 and step to be loop invariants. */
1319 if (TREE_CODE (base) == ARRAY_REF)
1321 step = array_ref_element_size (base);
1322 lbound = array_ref_low_bound (base);
1324 if (!expr_invariant_in_loop_p (loop, step)
1325 || !expr_invariant_in_loop_p (loop, lbound))
1329 if (TREE_CODE (*idx) != SSA_NAME)
1332 iv = get_iv (dta->ivopts_data, *idx);
1336 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1337 *&x[0], which is not folded and does not trigger the
1338 ARRAY_REF path below. */
1341 if (integer_zerop (iv->step))
1344 if (TREE_CODE (base) == ARRAY_REF)
1346 step = array_ref_element_size (base);
1348 /* We only handle addresses whose step is an integer constant. */
1349 if (TREE_CODE (step) != INTEGER_CST)
1353 /* The step for pointer arithmetics already is 1 byte. */
1354 step = build_int_cst (sizetype, 1);
1358 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1359 sizetype, &iv_base, &iv_step, dta->stmt,
1362 /* The index might wrap. */
1366 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1367 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1372 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1373 object is passed to it in DATA. */
1376 idx_record_use (tree base, tree *idx,
1379 find_interesting_uses_op (data, *idx);
1380 if (TREE_CODE (base) == ARRAY_REF)
1382 find_interesting_uses_op (data, array_ref_element_size (base));
1383 find_interesting_uses_op (data, array_ref_low_bound (base));
1388 /* Returns true if memory reference REF may be unaligned. */
1391 may_be_unaligned_p (tree ref)
1395 HOST_WIDE_INT bitsize;
1396 HOST_WIDE_INT bitpos;
1398 enum machine_mode mode;
1399 int unsignedp, volatilep;
1400 unsigned base_align;
1402 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1403 thus they are not misaligned. */
1404 if (TREE_CODE (ref) == TARGET_MEM_REF)
1407 /* The test below is basically copy of what expr.c:normal_inner_ref
1408 does to check whether the object must be loaded by parts when
1409 STRICT_ALIGNMENT is true. */
1410 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1411 &unsignedp, &volatilep, true);
1412 base_type = TREE_TYPE (base);
1413 base_align = TYPE_ALIGN (base_type);
1416 && (base_align < GET_MODE_ALIGNMENT (mode)
1417 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1418 || bitpos % BITS_PER_UNIT != 0))
1424 /* Return true if EXPR may be non-addressable. */
1427 may_be_nonaddressable_p (tree expr)
1429 switch (TREE_CODE (expr))
1432 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1433 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1436 case ARRAY_RANGE_REF:
1437 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1439 case VIEW_CONVERT_EXPR:
1440 /* This kind of view-conversions may wrap non-addressable objects
1441 and make them look addressable. After some processing the
1442 non-addressability may be uncovered again, causing ADDR_EXPRs
1443 of inappropriate objects to be built. */
1444 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1445 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1454 /* Finds addresses in *OP_P inside STMT. */
1457 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1459 tree base = *op_p, step = build_int_cst (sizetype, 0);
1461 struct ifs_ivopts_data ifs_ivopts_data;
1463 /* Do not play with volatile memory references. A bit too conservative,
1464 perhaps, but safe. */
1465 if (stmt_ann (stmt)->has_volatile_ops)
1468 /* Ignore bitfields for now. Not really something terribly complicated
1470 if (TREE_CODE (base) == BIT_FIELD_REF)
1473 if (may_be_nonaddressable_p (base))
1476 if (STRICT_ALIGNMENT
1477 && may_be_unaligned_p (base))
1480 base = unshare_expr (base);
1482 if (TREE_CODE (base) == TARGET_MEM_REF)
1484 tree type = build_pointer_type (TREE_TYPE (base));
1488 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1490 civ = get_iv (data, TMR_BASE (base));
1494 TMR_BASE (base) = civ->base;
1497 if (TMR_INDEX (base)
1498 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1500 civ = get_iv (data, TMR_INDEX (base));
1504 TMR_INDEX (base) = civ->base;
1509 if (TMR_STEP (base))
1510 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1512 step = fold_build2 (PLUS_EXPR, type, step, astep);
1516 if (integer_zerop (step))
1518 base = tree_mem_ref_addr (type, base);
1522 ifs_ivopts_data.ivopts_data = data;
1523 ifs_ivopts_data.stmt = stmt;
1524 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1525 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1526 || integer_zerop (ifs_ivopts_data.step))
1528 step = ifs_ivopts_data.step;
1530 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1531 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1533 base = build_fold_addr_expr (base);
1535 /* Substituting bases of IVs into the base expression might
1536 have caused folding opportunities. */
1537 if (TREE_CODE (base) == ADDR_EXPR)
1539 tree *ref = &TREE_OPERAND (base, 0);
1540 while (handled_component_p (*ref))
1541 ref = &TREE_OPERAND (*ref, 0);
1542 if (TREE_CODE (*ref) == INDIRECT_REF)
1543 *ref = fold_indirect_ref (*ref);
1547 civ = alloc_iv (base, step);
1548 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1552 for_each_index (op_p, idx_record_use, data);
1555 /* Finds and records invariants used in STMT. */
1558 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1561 use_operand_p use_p;
1564 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1566 op = USE_FROM_PTR (use_p);
1567 record_invariant (data, op, false);
1571 /* Finds interesting uses of induction variables in the statement STMT. */
1574 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1579 use_operand_p use_p;
1581 find_invariants_stmt (data, stmt);
1583 if (TREE_CODE (stmt) == COND_EXPR)
1585 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1589 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1591 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1592 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1594 if (TREE_CODE (lhs) == SSA_NAME)
1596 /* If the statement defines an induction variable, the uses are not
1597 interesting by themselves. */
1599 iv = get_iv (data, lhs);
1601 if (iv && !integer_zerop (iv->step))
1605 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1607 case tcc_comparison:
1608 find_interesting_uses_cond (data, stmt,
1609 &GIMPLE_STMT_OPERAND (stmt, 1));
1613 find_interesting_uses_address (data, stmt,
1614 &GIMPLE_STMT_OPERAND (stmt, 1));
1615 if (REFERENCE_CLASS_P (lhs))
1616 find_interesting_uses_address (data, stmt,
1617 &GIMPLE_STMT_OPERAND (stmt, 0));
1623 if (REFERENCE_CLASS_P (lhs)
1624 && is_gimple_val (rhs))
1626 find_interesting_uses_address (data, stmt,
1627 &GIMPLE_STMT_OPERAND (stmt, 0));
1628 find_interesting_uses_op (data, rhs);
1632 /* TODO -- we should also handle address uses of type
1634 memory = call (whatever);
1641 if (TREE_CODE (stmt) == PHI_NODE
1642 && bb_for_stmt (stmt) == data->current_loop->header)
1644 lhs = PHI_RESULT (stmt);
1645 iv = get_iv (data, lhs);
1647 if (iv && !integer_zerop (iv->step))
1651 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1653 op = USE_FROM_PTR (use_p);
1655 if (TREE_CODE (op) != SSA_NAME)
1658 iv = get_iv (data, op);
1662 find_interesting_uses_op (data, op);
1666 /* Finds interesting uses of induction variables outside of loops
1667 on loop exit edge EXIT. */
1670 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1674 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1676 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1677 if (is_gimple_reg (def))
1678 find_interesting_uses_op (data, def);
1682 /* Finds uses of the induction variables that are interesting. */
1685 find_interesting_uses (struct ivopts_data *data)
1688 block_stmt_iterator bsi;
1690 basic_block *body = get_loop_body (data->current_loop);
1692 struct version_info *info;
1695 if (dump_file && (dump_flags & TDF_DETAILS))
1696 fprintf (dump_file, "Uses:\n\n");
1698 for (i = 0; i < data->current_loop->num_nodes; i++)
1703 FOR_EACH_EDGE (e, ei, bb->succs)
1704 if (e->dest != EXIT_BLOCK_PTR
1705 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1706 find_interesting_uses_outside (data, e);
1708 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1709 find_interesting_uses_stmt (data, phi);
1710 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1711 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1714 if (dump_file && (dump_flags & TDF_DETAILS))
1718 fprintf (dump_file, "\n");
1720 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1722 info = ver_info (data, i);
1725 fprintf (dump_file, " ");
1726 print_generic_expr (dump_file, info->name, TDF_SLIM);
1727 fprintf (dump_file, " is invariant (%d)%s\n",
1728 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1732 fprintf (dump_file, "\n");
1738 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1739 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1740 we are at the top-level of the processed address. */
1743 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1744 unsigned HOST_WIDE_INT *offset)
1746 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1747 enum tree_code code;
1748 tree type, orig_type = TREE_TYPE (expr);
1749 unsigned HOST_WIDE_INT off0, off1, st;
1750 tree orig_expr = expr;
1754 type = TREE_TYPE (expr);
1755 code = TREE_CODE (expr);
1761 if (!cst_and_fits_in_hwi (expr)
1762 || integer_zerop (expr))
1765 *offset = int_cst_value (expr);
1766 return build_int_cst (orig_type, 0);
1770 op0 = TREE_OPERAND (expr, 0);
1771 op1 = TREE_OPERAND (expr, 1);
1773 op0 = strip_offset_1 (op0, false, false, &off0);
1774 op1 = strip_offset_1 (op1, false, false, &off1);
1776 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1777 if (op0 == TREE_OPERAND (expr, 0)
1778 && op1 == TREE_OPERAND (expr, 1))
1781 if (integer_zerop (op1))
1783 else if (integer_zerop (op0))
1785 if (code == PLUS_EXPR)
1788 expr = fold_build1 (NEGATE_EXPR, type, op1);
1791 expr = fold_build2 (code, type, op0, op1);
1793 return fold_convert (orig_type, expr);
1799 step = array_ref_element_size (expr);
1800 if (!cst_and_fits_in_hwi (step))
1803 st = int_cst_value (step);
1804 op1 = TREE_OPERAND (expr, 1);
1805 op1 = strip_offset_1 (op1, false, false, &off1);
1806 *offset = off1 * st;
1809 && integer_zerop (op1))
1811 /* Strip the component reference completely. */
1812 op0 = TREE_OPERAND (expr, 0);
1813 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1823 tmp = component_ref_field_offset (expr);
1825 && cst_and_fits_in_hwi (tmp))
1827 /* Strip the component reference completely. */
1828 op0 = TREE_OPERAND (expr, 0);
1829 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1830 *offset = off0 + int_cst_value (tmp);
1836 op0 = TREE_OPERAND (expr, 0);
1837 op0 = strip_offset_1 (op0, true, true, &off0);
1840 if (op0 == TREE_OPERAND (expr, 0))
1843 expr = build_fold_addr_expr (op0);
1844 return fold_convert (orig_type, expr);
1847 inside_addr = false;
1854 /* Default handling of expressions for that we want to recurse into
1855 the first operand. */
1856 op0 = TREE_OPERAND (expr, 0);
1857 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1860 if (op0 == TREE_OPERAND (expr, 0)
1861 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1864 expr = copy_node (expr);
1865 TREE_OPERAND (expr, 0) = op0;
1867 TREE_OPERAND (expr, 1) = op1;
1869 /* Inside address, we might strip the top level component references,
1870 thus changing type of the expression. Handling of ADDR_EXPR
1872 expr = fold_convert (orig_type, expr);
1877 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1880 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1882 return strip_offset_1 (expr, false, false, offset);
1885 /* Returns variant of TYPE that can be used as base for different uses.
1886 We return unsigned type with the same precision, which avoids problems
1890 generic_type_for (tree type)
1892 if (POINTER_TYPE_P (type))
1893 return unsigned_type_for (type);
1895 if (TYPE_UNSIGNED (type))
1898 return unsigned_type_for (type);
1901 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1902 the bitmap to that we should store it. */
1904 static struct ivopts_data *fd_ivopts_data;
1906 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1908 bitmap *depends_on = data;
1909 struct version_info *info;
1911 if (TREE_CODE (*expr_p) != SSA_NAME)
1913 info = name_info (fd_ivopts_data, *expr_p);
1915 if (!info->inv_id || info->has_nonlin_use)
1919 *depends_on = BITMAP_ALLOC (NULL);
1920 bitmap_set_bit (*depends_on, info->inv_id);
1925 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1926 position to POS. If USE is not NULL, the candidate is set as related to
1927 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1928 replacement of the final value of the iv by a direct computation. */
1930 static struct iv_cand *
1931 add_candidate_1 (struct ivopts_data *data,
1932 tree base, tree step, bool important, enum iv_position pos,
1933 struct iv_use *use, tree incremented_at)
1936 struct iv_cand *cand = NULL;
1937 tree type, orig_type;
1941 orig_type = TREE_TYPE (base);
1942 type = generic_type_for (orig_type);
1943 if (type != orig_type)
1945 base = fold_convert (type, base);
1946 step = fold_convert (type, step);
1950 for (i = 0; i < n_iv_cands (data); i++)
1952 cand = iv_cand (data, i);
1954 if (cand->pos != pos)
1957 if (cand->incremented_at != incremented_at)
1971 if (operand_equal_p (base, cand->iv->base, 0)
1972 && operand_equal_p (step, cand->iv->step, 0))
1976 if (i == n_iv_cands (data))
1978 cand = XCNEW (struct iv_cand);
1984 cand->iv = alloc_iv (base, step);
1987 if (pos != IP_ORIGINAL && cand->iv)
1989 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1990 cand->var_after = cand->var_before;
1992 cand->important = important;
1993 cand->incremented_at = incremented_at;
1994 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1997 && TREE_CODE (step) != INTEGER_CST)
1999 fd_ivopts_data = data;
2000 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2003 if (dump_file && (dump_flags & TDF_DETAILS))
2004 dump_cand (dump_file, cand);
2007 if (important && !cand->important)
2009 cand->important = true;
2010 if (dump_file && (dump_flags & TDF_DETAILS))
2011 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2016 bitmap_set_bit (use->related_cands, i);
2017 if (dump_file && (dump_flags & TDF_DETAILS))
2018 fprintf (dump_file, "Candidate %d is related to use %d\n",
2025 /* Returns true if incrementing the induction variable at the end of the LOOP
2028 The purpose is to avoid splitting latch edge with a biv increment, thus
2029 creating a jump, possibly confusing other optimization passes and leaving
2030 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2031 is not available (so we do not have a better alternative), or if the latch
2032 edge is already nonempty. */
2035 allow_ip_end_pos_p (struct loop *loop)
2037 if (!ip_normal_pos (loop))
2040 if (!empty_block_p (ip_end_pos (loop)))
2046 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2047 position to POS. If USE is not NULL, the candidate is set as related to
2048 it. The candidate computation is scheduled on all available positions. */
2051 add_candidate (struct ivopts_data *data,
2052 tree base, tree step, bool important, struct iv_use *use)
2054 if (ip_normal_pos (data->current_loop))
2055 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2056 if (ip_end_pos (data->current_loop)
2057 && allow_ip_end_pos_p (data->current_loop))
2058 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2061 /* Add a standard "0 + 1 * iteration" iv candidate for a
2062 type with SIZE bits. */
2065 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2068 tree type = lang_hooks.types.type_for_size (size, true);
2069 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2073 /* Adds standard iv candidates. */
2076 add_standard_iv_candidates (struct ivopts_data *data)
2078 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2080 /* The same for a double-integer type if it is still fast enough. */
2081 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2082 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2086 /* Adds candidates bases on the old induction variable IV. */
2089 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2092 struct iv_cand *cand;
2094 add_candidate (data, iv->base, iv->step, true, NULL);
2096 /* The same, but with initial value zero. */
2097 add_candidate (data,
2098 build_int_cst (TREE_TYPE (iv->base), 0),
2099 iv->step, true, NULL);
2101 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2102 if (TREE_CODE (phi) == PHI_NODE)
2104 /* Additionally record the possibility of leaving the original iv
2106 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2107 cand = add_candidate_1 (data,
2108 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2109 SSA_NAME_DEF_STMT (def));
2110 cand->var_before = iv->ssa_name;
2111 cand->var_after = def;
2115 /* Adds candidates based on the old induction variables. */
2118 add_old_ivs_candidates (struct ivopts_data *data)
2124 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2126 iv = ver_info (data, i)->iv;
2127 if (iv && iv->biv_p && !integer_zerop (iv->step))
2128 add_old_iv_candidates (data, iv);
2132 /* Adds candidates based on the value of the induction variable IV and USE. */
2135 add_iv_value_candidates (struct ivopts_data *data,
2136 struct iv *iv, struct iv_use *use)
2138 unsigned HOST_WIDE_INT offset;
2141 add_candidate (data, iv->base, iv->step, false, use);
2143 /* The same, but with initial value zero. Make such variable important,
2144 since it is generic enough so that possibly many uses may be based
2146 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2147 iv->step, true, use);
2149 /* Third, try removing the constant offset. */
2150 base = strip_offset (iv->base, &offset);
2152 add_candidate (data, base, iv->step, false, use);
2155 /* Adds candidates based on the uses. */
2158 add_derived_ivs_candidates (struct ivopts_data *data)
2162 for (i = 0; i < n_iv_uses (data); i++)
2164 struct iv_use *use = iv_use (data, i);
2171 case USE_NONLINEAR_EXPR:
2174 /* Just add the ivs based on the value of the iv used here. */
2175 add_iv_value_candidates (data, use->iv, use);
2184 /* Record important candidates and add them to related_cands bitmaps
2188 record_important_candidates (struct ivopts_data *data)
2193 for (i = 0; i < n_iv_cands (data); i++)
2195 struct iv_cand *cand = iv_cand (data, i);
2197 if (cand->important)
2198 bitmap_set_bit (data->important_candidates, i);
2201 data->consider_all_candidates = (n_iv_cands (data)
2202 <= CONSIDER_ALL_CANDIDATES_BOUND);
2204 if (data->consider_all_candidates)
2206 /* We will not need "related_cands" bitmaps in this case,
2207 so release them to decrease peak memory consumption. */
2208 for (i = 0; i < n_iv_uses (data); i++)
2210 use = iv_use (data, i);
2211 BITMAP_FREE (use->related_cands);
2216 /* Add important candidates to the related_cands bitmaps. */
2217 for (i = 0; i < n_iv_uses (data); i++)
2218 bitmap_ior_into (iv_use (data, i)->related_cands,
2219 data->important_candidates);
2223 /* Finds the candidates for the induction variables. */
2226 find_iv_candidates (struct ivopts_data *data)
2228 /* Add commonly used ivs. */
2229 add_standard_iv_candidates (data);
2231 /* Add old induction variables. */
2232 add_old_ivs_candidates (data);
2234 /* Add induction variables derived from uses. */
2235 add_derived_ivs_candidates (data);
2237 /* Record the important candidates. */
2238 record_important_candidates (data);
2241 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2242 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2243 we allocate a simple list to every use. */
2246 alloc_use_cost_map (struct ivopts_data *data)
2248 unsigned i, size, s, j;
2250 for (i = 0; i < n_iv_uses (data); i++)
2252 struct iv_use *use = iv_use (data, i);
2255 if (data->consider_all_candidates)
2256 size = n_iv_cands (data);
2260 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2265 /* Round up to the power of two, so that moduling by it is fast. */
2266 for (size = 1; size < s; size <<= 1)
2270 use->n_map_members = size;
2271 use->cost_map = XCNEWVEC (struct cost_pair, size);
2275 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2276 on invariants DEPENDS_ON and that the value used in expressing it
2280 set_use_iv_cost (struct ivopts_data *data,
2281 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2282 bitmap depends_on, tree value)
2288 BITMAP_FREE (depends_on);
2292 if (data->consider_all_candidates)
2294 use->cost_map[cand->id].cand = cand;
2295 use->cost_map[cand->id].cost = cost;
2296 use->cost_map[cand->id].depends_on = depends_on;
2297 use->cost_map[cand->id].value = value;
2301 /* n_map_members is a power of two, so this computes modulo. */
2302 s = cand->id & (use->n_map_members - 1);
2303 for (i = s; i < use->n_map_members; i++)
2304 if (!use->cost_map[i].cand)
2306 for (i = 0; i < s; i++)
2307 if (!use->cost_map[i].cand)
2313 use->cost_map[i].cand = cand;
2314 use->cost_map[i].cost = cost;
2315 use->cost_map[i].depends_on = depends_on;
2316 use->cost_map[i].value = value;
2319 /* Gets cost of (USE, CANDIDATE) pair. */
2321 static struct cost_pair *
2322 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2323 struct iv_cand *cand)
2326 struct cost_pair *ret;
2331 if (data->consider_all_candidates)
2333 ret = use->cost_map + cand->id;
2340 /* n_map_members is a power of two, so this computes modulo. */
2341 s = cand->id & (use->n_map_members - 1);
2342 for (i = s; i < use->n_map_members; i++)
2343 if (use->cost_map[i].cand == cand)
2344 return use->cost_map + i;
2346 for (i = 0; i < s; i++)
2347 if (use->cost_map[i].cand == cand)
2348 return use->cost_map + i;
2353 /* Returns estimate on cost of computing SEQ. */
2361 for (; seq; seq = NEXT_INSN (seq))
2363 set = single_set (seq);
2365 cost += rtx_cost (set, SET);
2373 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2375 produce_memory_decl_rtl (tree obj, int *regno)
2380 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2382 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2383 x = gen_rtx_SYMBOL_REF (Pmode, name);
2384 SET_SYMBOL_REF_DECL (x, obj);
2385 x = gen_rtx_MEM (DECL_MODE (obj), x);
2386 targetm.encode_section_info (obj, x, true);
2390 x = gen_raw_REG (Pmode, (*regno)++);
2391 x = gen_rtx_MEM (DECL_MODE (obj), x);
2397 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2398 walk_tree. DATA contains the actual fake register number. */
2401 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2403 tree obj = NULL_TREE;
2407 switch (TREE_CODE (*expr_p))
2410 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2411 handled_component_p (*expr_p);
2412 expr_p = &TREE_OPERAND (*expr_p, 0))
2415 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2416 x = produce_memory_decl_rtl (obj, regno);
2421 obj = SSA_NAME_VAR (*expr_p);
2422 if (!DECL_RTL_SET_P (obj))
2423 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2432 if (DECL_RTL_SET_P (obj))
2435 if (DECL_MODE (obj) == BLKmode)
2436 x = produce_memory_decl_rtl (obj, regno);
2438 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2448 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2449 SET_DECL_RTL (obj, x);
2455 /* Determines cost of the computation of EXPR. */
2458 computation_cost (tree expr)
2461 tree type = TREE_TYPE (expr);
2463 /* Avoid using hard regs in ways which may be unsupported. */
2464 int regno = LAST_VIRTUAL_REGISTER + 1;
2466 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2468 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2472 cost = seq_cost (seq);
2474 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2479 /* Returns variable containing the value of candidate CAND at statement AT. */
2482 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2484 if (stmt_after_increment (loop, cand, stmt))
2485 return cand->var_after;
2487 return cand->var_before;
2490 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2491 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2494 tree_int_cst_sign_bit (tree t)
2496 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2497 unsigned HOST_WIDE_INT w;
2499 if (bitno < HOST_BITS_PER_WIDE_INT)
2500 w = TREE_INT_CST_LOW (t);
2503 w = TREE_INT_CST_HIGH (t);
2504 bitno -= HOST_BITS_PER_WIDE_INT;
2507 return (w >> bitno) & 1;
2510 /* If we can prove that TOP = cst * BOT for some constant cst,
2511 store cst to MUL and return true. Otherwise return false.
2512 The returned value is always sign-extended, regardless of the
2513 signedness of TOP and BOT. */
2516 constant_multiple_of (tree top, tree bot, double_int *mul)
2519 enum tree_code code;
2520 double_int res, p0, p1;
2521 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2526 if (operand_equal_p (top, bot, 0))
2528 *mul = double_int_one;
2532 code = TREE_CODE (top);
2536 mby = TREE_OPERAND (top, 1);
2537 if (TREE_CODE (mby) != INTEGER_CST)
2540 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2543 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2549 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2550 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2553 if (code == MINUS_EXPR)
2554 p1 = double_int_neg (p1);
2555 *mul = double_int_sext (double_int_add (p0, p1), precision);
2559 if (TREE_CODE (bot) != INTEGER_CST)
2562 p0 = double_int_sext (tree_to_double_int (top), precision);
2563 p1 = double_int_sext (tree_to_double_int (bot), precision);
2564 if (double_int_zero_p (p1))
2566 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2568 return double_int_zero_p (res);
2575 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2576 same precision that is at least as wide as the precision of TYPE, stores
2577 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2581 determine_common_wider_type (tree *a, tree *b)
2583 tree wider_type = NULL;
2585 tree atype = TREE_TYPE (*a);
2587 if ((TREE_CODE (*a) == NOP_EXPR
2588 || TREE_CODE (*a) == CONVERT_EXPR))
2590 suba = TREE_OPERAND (*a, 0);
2591 wider_type = TREE_TYPE (suba);
2592 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2598 if ((TREE_CODE (*b) == NOP_EXPR
2599 || TREE_CODE (*b) == CONVERT_EXPR))
2601 subb = TREE_OPERAND (*b, 0);
2602 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2613 /* Determines the expression by that USE is expressed from induction variable
2614 CAND at statement AT in LOOP. The expression is stored in a decomposed
2615 form into AFF. Returns false if USE cannot be expressed using CAND. */
2618 get_computation_aff (struct loop *loop,
2619 struct iv_use *use, struct iv_cand *cand, tree at,
2620 struct affine_tree_combination *aff)
2622 tree ubase = use->iv->base;
2623 tree ustep = use->iv->step;
2624 tree cbase = cand->iv->base;
2625 tree cstep = cand->iv->step, cstep_common;
2626 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2627 tree common_type, var;
2629 aff_tree cbase_aff, var_aff;
2632 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2634 /* We do not have a precision to express the values of use. */
2638 var = var_at_stmt (loop, cand, at);
2639 uutype = unsigned_type_for (utype);
2641 /* If the conversion is not noop, perform it. */
2642 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2644 cstep = fold_convert (uutype, cstep);
2645 cbase = fold_convert (uutype, cbase);
2646 var = fold_convert (uutype, var);
2649 if (!constant_multiple_of (ustep, cstep, &rat))
2652 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2653 type, we achieve better folding by computing their difference in this
2654 wider type, and cast the result to UUTYPE. We do not need to worry about
2655 overflows, as all the arithmetics will in the end be performed in UUTYPE
2657 common_type = determine_common_wider_type (&ubase, &cbase);
2659 /* use = ubase - ratio * cbase + ratio * var. */
2660 tree_to_aff_combination (ubase, common_type, aff);
2661 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2662 tree_to_aff_combination (var, uutype, &var_aff);
2664 /* We need to shift the value if we are after the increment. */
2665 if (stmt_after_increment (loop, cand, at))
2669 if (common_type != uutype)
2670 cstep_common = fold_convert (common_type, cstep);
2672 cstep_common = cstep;
2674 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2675 aff_combination_add (&cbase_aff, &cstep_aff);
2678 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2679 aff_combination_add (aff, &cbase_aff);
2680 if (common_type != uutype)
2681 aff_combination_convert (aff, uutype);
2683 aff_combination_scale (&var_aff, rat);
2684 aff_combination_add (aff, &var_aff);
2689 /* Determines the expression by that USE is expressed from induction variable
2690 CAND at statement AT in LOOP. The computation is unshared. */
2693 get_computation_at (struct loop *loop,
2694 struct iv_use *use, struct iv_cand *cand, tree at)
2697 tree type = TREE_TYPE (use->iv->base);
2699 if (!get_computation_aff (loop, use, cand, at, &aff))
2701 unshare_aff_combination (&aff);
2702 return fold_convert (type, aff_combination_to_tree (&aff));
2705 /* Determines the expression by that USE is expressed from induction variable
2706 CAND in LOOP. The computation is unshared. */
2709 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2711 return get_computation_at (loop, use, cand, use->stmt);
2714 /* Returns cost of addition in MODE. */
2717 add_cost (enum machine_mode mode)
2719 static unsigned costs[NUM_MACHINE_MODES];
2727 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2728 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2729 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2734 cost = seq_cost (seq);
2740 if (dump_file && (dump_flags & TDF_DETAILS))
2741 fprintf (dump_file, "Addition in %s costs %d\n",
2742 GET_MODE_NAME (mode), cost);
2746 /* Entry in a hashtable of already known costs for multiplication. */
2749 HOST_WIDE_INT cst; /* The constant to multiply by. */
2750 enum machine_mode mode; /* In mode. */
2751 unsigned cost; /* The cost. */
2754 /* Counts hash value for the ENTRY. */
2757 mbc_entry_hash (const void *entry)
2759 const struct mbc_entry *e = entry;
2761 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2764 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2767 mbc_entry_eq (const void *entry1, const void *entry2)
2769 const struct mbc_entry *e1 = entry1;
2770 const struct mbc_entry *e2 = entry2;
2772 return (e1->mode == e2->mode
2773 && e1->cst == e2->cst);
2776 /* Returns cost of multiplication by constant CST in MODE. */
2779 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2781 static htab_t costs;
2782 struct mbc_entry **cached, act;
2787 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2791 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2793 return (*cached)->cost;
2795 *cached = XNEW (struct mbc_entry);
2796 (*cached)->mode = mode;
2797 (*cached)->cst = cst;
2800 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2801 gen_int_mode (cst, mode), NULL_RTX, 0);
2805 cost = seq_cost (seq);
2807 if (dump_file && (dump_flags & TDF_DETAILS))
2808 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2809 (int) cst, GET_MODE_NAME (mode), cost);
2811 (*cached)->cost = cost;
2816 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2817 validity for a memory reference accessing memory of mode MODE. */
2820 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2822 #define MAX_RATIO 128
2823 static sbitmap valid_mult[MAX_MACHINE_MODE];
2825 if (!valid_mult[mode])
2827 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2831 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2832 sbitmap_zero (valid_mult[mode]);
2833 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2834 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2836 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2837 if (memory_address_p (mode, addr))
2838 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2841 if (dump_file && (dump_flags & TDF_DETAILS))
2843 fprintf (dump_file, " allowed multipliers:");
2844 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2845 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2846 fprintf (dump_file, " %d", (int) i);
2847 fprintf (dump_file, "\n");
2848 fprintf (dump_file, "\n");
2852 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2855 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2858 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2859 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2860 variable is omitted. Compute the cost for a memory reference that accesses
2861 a memory location of mode MEM_MODE.
2863 TODO -- there must be some better way. This all is quite crude. */
2866 get_address_cost (bool symbol_present, bool var_present,
2867 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2868 enum machine_mode mem_mode)
2870 static bool initialized[MAX_MACHINE_MODE];
2871 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2872 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2873 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2874 unsigned cost, acost;
2875 bool offset_p, ratio_p;
2876 HOST_WIDE_INT s_offset;
2877 unsigned HOST_WIDE_INT mask;
2880 if (!initialized[mem_mode])
2883 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2884 int old_cse_not_expected;
2885 unsigned sym_p, var_p, off_p, rat_p, add_c;
2886 rtx seq, addr, base;
2889 initialized[mem_mode] = true;
2891 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2893 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2894 for (i = start; i <= 1 << 20; i <<= 1)
2896 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2897 if (!memory_address_p (mem_mode, addr))
2900 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2901 off[mem_mode] = max_offset[mem_mode];
2903 for (i = start; i <= 1 << 20; i <<= 1)
2905 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2906 if (!memory_address_p (mem_mode, addr))
2909 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2911 if (dump_file && (dump_flags & TDF_DETAILS))
2913 fprintf (dump_file, "get_address_cost:\n");
2914 fprintf (dump_file, " min offset %s %d\n",
2915 GET_MODE_NAME (mem_mode),
2916 (int) min_offset[mem_mode]);
2917 fprintf (dump_file, " max offset %s %d\n",
2918 GET_MODE_NAME (mem_mode),
2919 (int) max_offset[mem_mode]);
2923 for (i = 2; i <= MAX_RATIO; i++)
2924 if (multiplier_allowed_in_address_p (i, mem_mode))
2930 /* Compute the cost of various addressing modes. */
2932 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2933 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2935 for (i = 0; i < 16; i++)
2938 var_p = (i >> 1) & 1;
2939 off_p = (i >> 2) & 1;
2940 rat_p = (i >> 3) & 1;
2944 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2945 gen_int_mode (rat[mem_mode], Pmode));
2948 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2952 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2953 /* ??? We can run into trouble with some backends by presenting
2954 it with symbols which havn't been properly passed through
2955 targetm.encode_section_info. By setting the local bit, we
2956 enhance the probability of things working. */
2957 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
2960 base = gen_rtx_fmt_e (CONST, Pmode,
2961 gen_rtx_fmt_ee (PLUS, Pmode,
2963 gen_int_mode (off[mem_mode],
2967 base = gen_int_mode (off[mem_mode], Pmode);
2972 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2975 /* To avoid splitting addressing modes, pretend that no cse will
2977 old_cse_not_expected = cse_not_expected;
2978 cse_not_expected = true;
2979 addr = memory_address (mem_mode, addr);
2980 cse_not_expected = old_cse_not_expected;
2984 acost = seq_cost (seq);
2985 acost += address_cost (addr, mem_mode);
2989 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2992 /* On some targets, it is quite expensive to load symbol to a register,
2993 which makes addresses that contain symbols look much more expensive.
2994 However, the symbol will have to be loaded in any case before the
2995 loop (and quite likely we have it in register already), so it does not
2996 make much sense to penalize them too heavily. So make some final
2997 tweaks for the SYMBOL_PRESENT modes:
2999 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3000 var is cheaper, use this mode with small penalty.
3001 If VAR_PRESENT is true, try whether the mode with
3002 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3003 if this is the case, use it. */
3004 add_c = add_cost (Pmode);
3005 for (i = 0; i < 8; i++)
3008 off_p = (i >> 1) & 1;
3009 rat_p = (i >> 2) & 1;
3011 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3015 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3016 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3019 if (dump_file && (dump_flags & TDF_DETAILS))
3021 fprintf (dump_file, "Address costs:\n");
3023 for (i = 0; i < 16; i++)
3026 var_p = (i >> 1) & 1;
3027 off_p = (i >> 2) & 1;
3028 rat_p = (i >> 3) & 1;
3030 fprintf (dump_file, " ");
3032 fprintf (dump_file, "sym + ");
3034 fprintf (dump_file, "var + ");
3036 fprintf (dump_file, "cst + ");
3038 fprintf (dump_file, "rat * ");
3040 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3041 fprintf (dump_file, "index costs %d\n", acost);
3043 fprintf (dump_file, "\n");
3047 bits = GET_MODE_BITSIZE (Pmode);
3048 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3050 if ((offset >> (bits - 1) & 1))
3055 offset_p = (s_offset != 0
3056 && min_offset[mem_mode] <= s_offset
3057 && s_offset <= max_offset[mem_mode]);
3058 ratio_p = (ratio != 1
3059 && multiplier_allowed_in_address_p (ratio, mem_mode));
3061 if (ratio != 1 && !ratio_p)
3062 cost += multiply_by_cost (ratio, Pmode);
3064 if (s_offset && !offset_p && !symbol_present)
3065 cost += add_cost (Pmode);
3067 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3068 return cost + acost;
3071 /* Estimates cost of forcing expression EXPR into a variable. */
3074 force_expr_to_var_cost (tree expr)
3076 static bool costs_initialized = false;
3077 static unsigned integer_cost;
3078 static unsigned symbol_cost;
3079 static unsigned address_cost;
3081 unsigned cost0, cost1, cost;
3082 enum machine_mode mode;
3084 if (!costs_initialized)
3086 tree type = build_pointer_type (integer_type_node);
3090 var = create_tmp_var_raw (integer_type_node, "test_var");
3091 TREE_STATIC (var) = 1;
3092 x = produce_memory_decl_rtl (var, NULL);
3093 SET_DECL_RTL (var, x);
3095 integer_cost = computation_cost (build_int_cst (integer_type_node,
3098 addr = build1 (ADDR_EXPR, type, var);
3099 symbol_cost = computation_cost (addr) + 1;
3102 = computation_cost (build2 (PLUS_EXPR, type,
3104 build_int_cst (type, 2000))) + 1;
3105 if (dump_file && (dump_flags & TDF_DETAILS))
3107 fprintf (dump_file, "force_expr_to_var_cost:\n");
3108 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3109 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3110 fprintf (dump_file, " address %d\n", (int) address_cost);
3111 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3112 fprintf (dump_file, "\n");
3115 costs_initialized = true;
3120 if (SSA_VAR_P (expr))
3123 if (TREE_INVARIANT (expr))
3125 if (TREE_CODE (expr) == INTEGER_CST)
3126 return integer_cost;
3128 if (TREE_CODE (expr) == ADDR_EXPR)
3130 tree obj = TREE_OPERAND (expr, 0);
3132 if (TREE_CODE (obj) == VAR_DECL
3133 || TREE_CODE (obj) == PARM_DECL
3134 || TREE_CODE (obj) == RESULT_DECL)
3138 return address_cost;
3141 switch (TREE_CODE (expr))
3146 op0 = TREE_OPERAND (expr, 0);
3147 op1 = TREE_OPERAND (expr, 1);
3151 if (is_gimple_val (op0))
3154 cost0 = force_expr_to_var_cost (op0);
3156 if (is_gimple_val (op1))
3159 cost1 = force_expr_to_var_cost (op1);
3164 /* Just an arbitrary value, FIXME. */
3165 return target_spill_cost;
3168 mode = TYPE_MODE (TREE_TYPE (expr));
3169 switch (TREE_CODE (expr))
3173 cost = add_cost (mode);
3177 if (cst_and_fits_in_hwi (op0))
3178 cost = multiply_by_cost (int_cst_value (op0), mode);
3179 else if (cst_and_fits_in_hwi (op1))
3180 cost = multiply_by_cost (int_cst_value (op1), mode);
3182 return target_spill_cost;
3192 /* Bound the cost by target_spill_cost. The parts of complicated
3193 computations often are either loop invariant or at least can
3194 be shared between several iv uses, so letting this grow without
3195 limits would not give reasonable results. */
3196 return cost < target_spill_cost ? cost : target_spill_cost;
3199 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3200 invariants the computation depends on. */
3203 force_var_cost (struct ivopts_data *data,
3204 tree expr, bitmap *depends_on)
3208 fd_ivopts_data = data;
3209 walk_tree (&expr, find_depends, depends_on, NULL);
3212 return force_expr_to_var_cost (expr);
3215 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3216 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3217 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3218 invariants the computation depends on. */
3221 split_address_cost (struct ivopts_data *data,
3222 tree addr, bool *symbol_present, bool *var_present,
3223 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3226 HOST_WIDE_INT bitsize;
3227 HOST_WIDE_INT bitpos;
3229 enum machine_mode mode;
3230 int unsignedp, volatilep;
3232 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3233 &unsignedp, &volatilep, false);
3236 || bitpos % BITS_PER_UNIT != 0
3237 || TREE_CODE (core) != VAR_DECL)
3239 *symbol_present = false;
3240 *var_present = true;
3241 fd_ivopts_data = data;
3242 walk_tree (&addr, find_depends, depends_on, NULL);
3243 return target_spill_cost;
3246 *offset += bitpos / BITS_PER_UNIT;
3247 if (TREE_STATIC (core)
3248 || DECL_EXTERNAL (core))
3250 *symbol_present = true;
3251 *var_present = false;
3255 *symbol_present = false;
3256 *var_present = true;
3260 /* Estimates cost of expressing difference of addresses E1 - E2 as
3261 var + symbol + offset. The value of offset is added to OFFSET,
3262 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3263 part is missing. DEPENDS_ON is a set of the invariants the computation
3267 ptr_difference_cost (struct ivopts_data *data,
3268 tree e1, tree e2, bool *symbol_present, bool *var_present,
3269 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3271 HOST_WIDE_INT diff = 0;
3274 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3276 if (ptr_difference_const (e1, e2, &diff))
3279 *symbol_present = false;
3280 *var_present = false;
3284 if (e2 == integer_zero_node)
3285 return split_address_cost (data, TREE_OPERAND (e1, 0),
3286 symbol_present, var_present, offset, depends_on);
3288 *symbol_present = false;
3289 *var_present = true;
3291 cost = force_var_cost (data, e1, depends_on);
3292 cost += force_var_cost (data, e2, depends_on);
3293 cost += add_cost (Pmode);
3298 /* Estimates cost of expressing difference E1 - E2 as
3299 var + symbol + offset. The value of offset is added to OFFSET,
3300 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3301 part is missing. DEPENDS_ON is a set of the invariants the computation
3305 difference_cost (struct ivopts_data *data,
3306 tree e1, tree e2, bool *symbol_present, bool *var_present,
3307 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3310 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3311 unsigned HOST_WIDE_INT off1, off2;
3313 e1 = strip_offset (e1, &off1);
3314 e2 = strip_offset (e2, &off2);
3315 *offset += off1 - off2;
3320 if (TREE_CODE (e1) == ADDR_EXPR)
3321 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3323 *symbol_present = false;
3325 if (operand_equal_p (e1, e2, 0))
3327 *var_present = false;
3330 *var_present = true;
3331 if (integer_zerop (e2))
3332 return force_var_cost (data, e1, depends_on);
3334 if (integer_zerop (e1))
3336 cost = force_var_cost (data, e2, depends_on);
3337 cost += multiply_by_cost (-1, mode);
3342 cost = force_var_cost (data, e1, depends_on);
3343 cost += force_var_cost (data, e2, depends_on);
3344 cost += add_cost (mode);
3349 /* Determines the cost of the computation by that USE is expressed
3350 from induction variable CAND. If ADDRESS_P is true, we just need
3351 to create an address from it, otherwise we want to get it into
3352 register. A set of invariants we depend on is stored in
3353 DEPENDS_ON. AT is the statement at that the value is computed. */
3356 get_computation_cost_at (struct ivopts_data *data,
3357 struct iv_use *use, struct iv_cand *cand,
3358 bool address_p, bitmap *depends_on, tree at)
3360 tree ubase = use->iv->base, ustep = use->iv->step;
3362 tree utype = TREE_TYPE (ubase), ctype;
3363 unsigned HOST_WIDE_INT cstepi, offset = 0;
3364 HOST_WIDE_INT ratio, aratio;
3365 bool var_present, symbol_present;
3366 unsigned cost = 0, n_sums;
3371 /* Only consider real candidates. */
3375 cbase = cand->iv->base;
3376 cstep = cand->iv->step;
3377 ctype = TREE_TYPE (cbase);
3379 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3381 /* We do not have a precision to express the values of use. */
3387 /* Do not try to express address of an object with computation based
3388 on address of a different object. This may cause problems in rtl
3389 level alias analysis (that does not expect this to be happening,
3390 as this is illegal in C), and would be unlikely to be useful
3392 if (use->iv->base_object
3393 && cand->iv->base_object
3394 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3398 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3400 /* TODO -- add direct handling of this case. */
3404 /* CSTEPI is removed from the offset in case statement is after the
3405 increment. If the step is not constant, we use zero instead.
3406 This is a bit imprecise (there is the extra addition), but
3407 redundancy elimination is likely to transform the code so that
3408 it uses value of the variable before increment anyway,
3409 so it is not that much unrealistic. */
3410 if (cst_and_fits_in_hwi (cstep))
3411 cstepi = int_cst_value (cstep);
3415 if (!constant_multiple_of (ustep, cstep, &rat))
3418 if (double_int_fits_in_shwi_p (rat))
3419 ratio = double_int_to_shwi (rat);
3423 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3424 or ratio == 1, it is better to handle this like
3426 ubase - ratio * cbase + ratio * var
3428 (also holds in the case ratio == -1, TODO. */
3430 if (cst_and_fits_in_hwi (cbase))
3432 offset = - ratio * int_cst_value (cbase);
3433 cost += difference_cost (data,
3434 ubase, integer_zero_node,
3435 &symbol_present, &var_present, &offset,
3438 else if (ratio == 1)
3440 cost += difference_cost (data,
3442 &symbol_present, &var_present, &offset,
3447 cost += force_var_cost (data, cbase, depends_on);
3448 cost += add_cost (TYPE_MODE (ctype));
3449 cost += difference_cost (data,
3450 ubase, integer_zero_node,
3451 &symbol_present, &var_present, &offset,
3455 /* If we are after the increment, the value of the candidate is higher by
3457 if (stmt_after_increment (data->current_loop, cand, at))
3458 offset -= ratio * cstepi;
3460 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3461 (symbol/var/const parts may be omitted). If we are looking for an address,
3462 find the cost of addressing this. */
3464 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3465 TYPE_MODE (TREE_TYPE (*use->op_p)));
3467 /* Otherwise estimate the costs for computing the expression. */
3468 aratio = ratio > 0 ? ratio : -ratio;
3469 if (!symbol_present && !var_present && !offset)
3472 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3478 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3482 /* Symbol + offset should be compile-time computable. */
3483 && (symbol_present || offset))
3486 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3490 /* Just get the expression, expand it and measure the cost. */
3491 tree comp = get_computation_at (data->current_loop, use, cand, at);
3497 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3499 return computation_cost (comp);
3503 /* Determines the cost of the computation by that USE is expressed
3504 from induction variable CAND. If ADDRESS_P is true, we just need
3505 to create an address from it, otherwise we want to get it into
3506 register. A set of invariants we depend on is stored in
3510 get_computation_cost (struct ivopts_data *data,
3511 struct iv_use *use, struct iv_cand *cand,
3512 bool address_p, bitmap *depends_on)
3514 return get_computation_cost_at (data,
3515 use, cand, address_p, depends_on, use->stmt);
3518 /* Determines cost of basing replacement of USE on CAND in a generic
3522 determine_use_iv_cost_generic (struct ivopts_data *data,
3523 struct iv_use *use, struct iv_cand *cand)
3528 /* The simple case first -- if we need to express value of the preserved
3529 original biv, the cost is 0. This also prevents us from counting the
3530 cost of increment twice -- once at this use and once in the cost of
3532 if (cand->pos == IP_ORIGINAL
3533 && cand->incremented_at == use->stmt)
3535 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3539 cost = get_computation_cost (data, use, cand, false, &depends_on);
3540 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3542 return cost != INFTY;
3545 /* Determines cost of basing replacement of USE on CAND in an address. */
3548 determine_use_iv_cost_address (struct ivopts_data *data,
3549 struct iv_use *use, struct iv_cand *cand)
3552 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3554 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3556 return cost != INFTY;
3559 /* Computes value of candidate CAND at position AT in iteration NITER, and
3560 stores it to VAL. */
3563 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3566 aff_tree step, delta, nit;
3567 struct iv *iv = cand->iv;
3568 tree type = TREE_TYPE (iv->base);
3570 tree_to_aff_combination (iv->step, type, &step);
3571 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3572 aff_combination_convert (&nit, type);
3573 aff_combination_mult (&nit, &step, &delta);
3574 if (stmt_after_increment (loop, cand, at))
3575 aff_combination_add (&delta, &step);
3577 tree_to_aff_combination (iv->base, type, val);
3578 aff_combination_add (val, &delta);
3581 /* Returns period of induction variable iv. */
3584 iv_period (struct iv *iv)
3586 tree step = iv->step, period, type;
3589 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3591 /* Period of the iv is gcd (step, type range). Since type range is power
3592 of two, it suffices to determine the maximum power of two that divides
3594 pow2div = num_ending_zeros (step);
3595 type = unsigned_type_for (TREE_TYPE (step));
3597 period = build_low_bits_mask (type,
3598 (TYPE_PRECISION (type)
3599 - tree_low_cst (pow2div, 1)));
3604 /* Returns the comparison operator used when eliminating the iv USE. */
3606 static enum tree_code
3607 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3609 struct loop *loop = data->current_loop;
3613 ex_bb = bb_for_stmt (use->stmt);
3614 exit = EDGE_SUCC (ex_bb, 0);
3615 if (flow_bb_inside_loop_p (loop, exit->dest))
3616 exit = EDGE_SUCC (ex_bb, 1);
3618 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3621 /* Check whether it is possible to express the condition in USE by comparison
3622 of candidate CAND. If so, store the value compared with to BOUND. */
3625 may_eliminate_iv (struct ivopts_data *data,
3626 struct iv_use *use, struct iv_cand *cand, tree *bound)
3631 tree wider_type, period, per_type;
3632 struct loop *loop = data->current_loop;
3635 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3638 /* For now works only for exits that dominate the loop latch. TODO -- extend
3639 for other conditions inside loop body. */
3640 ex_bb = bb_for_stmt (use->stmt);
3641 if (use->stmt != last_stmt (ex_bb)
3642 || TREE_CODE (use->stmt) != COND_EXPR)
3644 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3647 exit = EDGE_SUCC (ex_bb, 0);
3648 if (flow_bb_inside_loop_p (loop, exit->dest))
3649 exit = EDGE_SUCC (ex_bb, 1);
3650 if (flow_bb_inside_loop_p (loop, exit->dest))
3653 nit = niter_for_exit (data, exit);
3657 nit_type = TREE_TYPE (nit);
3659 /* Determine whether we may use the variable to test whether niter iterations
3660 elapsed. This is the case iff the period of the induction variable is
3661 greater than the number of iterations. */
3662 period = iv_period (cand->iv);
3665 per_type = TREE_TYPE (period);
3667 wider_type = TREE_TYPE (period);
3668 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3669 wider_type = per_type;
3671 wider_type = nit_type;
3673 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3674 fold_convert (wider_type, period),
3675 fold_convert (wider_type, nit))))
3678 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3679 *bound = aff_combination_to_tree (&bnd);
3683 /* Determines cost of basing replacement of USE on CAND in a condition. */
3686 determine_use_iv_cost_condition (struct ivopts_data *data,
3687 struct iv_use *use, struct iv_cand *cand)
3689 tree bound = NULL_TREE;
3691 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3692 unsigned elim_cost, express_cost, cost;
3695 /* Only consider real candidates. */
3698 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3702 /* Try iv elimination. */
3703 if (may_eliminate_iv (data, use, cand, &bound))
3704 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3708 /* Try expressing the original giv. If it is compared with an invariant,
3709 note that we cannot get rid of it. */
3710 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3713 express_cost = get_computation_cost (data, use, cand, false,
3714 &depends_on_express);
3715 fd_ivopts_data = data;
3716 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3718 /* Choose the better approach. */
3719 if (elim_cost < express_cost)
3722 depends_on = depends_on_elim;
3723 depends_on_elim = NULL;
3727 cost = express_cost;
3728 depends_on = depends_on_express;
3729 depends_on_express = NULL;
3733 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3735 if (depends_on_elim)
3736 BITMAP_FREE (depends_on_elim);
3737 if (depends_on_express)
3738 BITMAP_FREE (depends_on_express);
3740 return cost != INFTY;
3743 /* Determines cost of basing replacement of USE on CAND. Returns false
3744 if USE cannot be based on CAND. */
3747 determine_use_iv_cost (struct ivopts_data *data,
3748 struct iv_use *use, struct iv_cand *cand)
3752 case USE_NONLINEAR_EXPR:
3753 return determine_use_iv_cost_generic (data, use, cand);
3756 return determine_use_iv_cost_address (data, use, cand);
3759 return determine_use_iv_cost_condition (data, use, cand);
3766 /* Determines costs of basing the use of the iv on an iv candidate. */
3769 determine_use_iv_costs (struct ivopts_data *data)
3773 struct iv_cand *cand;
3774 bitmap to_clear = BITMAP_ALLOC (NULL);
3776 alloc_use_cost_map (data);
3778 for (i = 0; i < n_iv_uses (data); i++)
3780 use = iv_use (data, i);
3782 if (data->consider_all_candidates)
3784 for (j = 0; j < n_iv_cands (data); j++)
3786 cand = iv_cand (data, j);
3787 determine_use_iv_cost (data, use, cand);
3794 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3796 cand = iv_cand (data, j);
3797 if (!determine_use_iv_cost (data, use, cand))
3798 bitmap_set_bit (to_clear, j);
3801 /* Remove the candidates for that the cost is infinite from
3802 the list of related candidates. */
3803 bitmap_and_compl_into (use->related_cands, to_clear);
3804 bitmap_clear (to_clear);
3808 BITMAP_FREE (to_clear);
3810 if (dump_file && (dump_flags & TDF_DETAILS))
3812 fprintf (dump_file, "Use-candidate costs:\n");
3814 for (i = 0; i < n_iv_uses (data); i++)
3816 use = iv_use (data, i);
3818 fprintf (dump_file, "Use %d:\n", i);
3819 fprintf (dump_file, " cand\tcost\tdepends on\n");
3820 for (j = 0; j < use->n_map_members; j++)
3822 if (!use->cost_map[j].cand
3823 || use->cost_map[j].cost == INFTY)
3826 fprintf (dump_file, " %d\t%d\t",
3827 use->cost_map[j].cand->id,
3828 use->cost_map[j].cost);
3829 if (use->cost_map[j].depends_on)
3830 bitmap_print (dump_file,
3831 use->cost_map[j].depends_on, "","");
3832 fprintf (dump_file, "\n");
3835 fprintf (dump_file, "\n");
3837 fprintf (dump_file, "\n");
3841 /* Determines cost of the candidate CAND. */
3844 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3846 unsigned cost_base, cost_step;
3855 /* There are two costs associated with the candidate -- its increment
3856 and its initialization. The second is almost negligible for any loop
3857 that rolls enough, so we take it just very little into account. */
3859 base = cand->iv->base;
3860 cost_base = force_var_cost (data, base, NULL);
3861 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3863 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3865 /* Prefer the original iv unless we may gain something by replacing it;
3866 this is not really relevant for artificial ivs created by other
3868 if (cand->pos == IP_ORIGINAL
3869 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3872 /* Prefer not to insert statements into latch unless there are some
3873 already (so that we do not create unnecessary jumps). */
3874 if (cand->pos == IP_END
3875 && empty_block_p (ip_end_pos (data->current_loop)))
3879 /* Determines costs of computation of the candidates. */
3882 determine_iv_costs (struct ivopts_data *data)
3886 if (dump_file && (dump_flags & TDF_DETAILS))
3888 fprintf (dump_file, "Candidate costs:\n");
3889 fprintf (dump_file, " cand\tcost\n");
3892 for (i = 0; i < n_iv_cands (data); i++)
3894 struct iv_cand *cand = iv_cand (data, i);
3896 determine_iv_cost (data, cand);
3898 if (dump_file && (dump_flags & TDF_DETAILS))
3899 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3902 if (dump_file && (dump_flags & TDF_DETAILS))
3903 fprintf (dump_file, "\n");
3906 /* Calculates cost for having SIZE induction variables. */
3909 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3911 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3914 /* For each size of the induction variable set determine the penalty. */
3917 determine_set_costs (struct ivopts_data *data)
3921 struct loop *loop = data->current_loop;
3924 /* We use the following model (definitely improvable, especially the
3925 cost function -- TODO):
3927 We estimate the number of registers available (using MD data), name it A.
3929 We estimate the number of registers used by the loop, name it U. This
3930 number is obtained as the number of loop phi nodes (not counting virtual
3931 registers and bivs) + the number of variables from outside of the loop.
3933 We set a reserve R (free regs that are used for temporary computations,
3934 etc.). For now the reserve is a constant 3.
3936 Let I be the number of induction variables.
3938 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3939 make a lot of ivs without a reason).
3940 -- if A - R < U + I <= A, the cost is I * PRES_COST
3941 -- if U + I > A, the cost is I * PRES_COST and
3942 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3944 if (dump_file && (dump_flags & TDF_DETAILS))
3946 fprintf (dump_file, "Global costs:\n");
3947 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3948 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3949 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3950 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3954 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3956 op = PHI_RESULT (phi);
3958 if (!is_gimple_reg (op))
3961 if (get_iv (data, op))
3967 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3969 struct version_info *info = ver_info (data, j);
3971 if (info->inv_id && info->has_nonlin_use)
3975 data->regs_used = n;
3976 if (dump_file && (dump_flags & TDF_DETAILS))
3977 fprintf (dump_file, " regs_used %d\n", n);
3979 if (dump_file && (dump_flags & TDF_DETAILS))
3981 fprintf (dump_file, " cost for size:\n");
3982 fprintf (dump_file, " ivs\tcost\n");
3983 for (j = 0; j <= 2 * target_avail_regs; j++)
3984 fprintf (dump_file, " %d\t%d\n", j,
3985 ivopts_global_cost_for_size (data, j));
3986 fprintf (dump_file, "\n");
3990 /* Returns true if A is a cheaper cost pair than B. */
3993 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4001 if (a->cost < b->cost)
4004 if (a->cost > b->cost)
4007 /* In case the costs are the same, prefer the cheaper candidate. */
4008 if (a->cand->cost < b->cand->cost)
4014 /* Computes the cost field of IVS structure. */
4017 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4021 cost += ivs->cand_use_cost;
4022 cost += ivs->cand_cost;
4023 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4028 /* Remove invariants in set INVS to set IVS. */
4031 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4039 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4041 ivs->n_invariant_uses[iid]--;
4042 if (ivs->n_invariant_uses[iid] == 0)
4047 /* Set USE not to be expressed by any candidate in IVS. */
4050 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4053 unsigned uid = use->id, cid;
4054 struct cost_pair *cp;
4056 cp = ivs->cand_for_use[uid];
4062 ivs->cand_for_use[uid] = NULL;
4063 ivs->n_cand_uses[cid]--;
4065 if (ivs->n_cand_uses[cid] == 0)
4067 bitmap_clear_bit (ivs->cands, cid);
4068 /* Do not count the pseudocandidates. */
4072 ivs->cand_cost -= cp->cand->cost;
4074 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4077 ivs->cand_use_cost -= cp->cost;
4079 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4080 iv_ca_recount_cost (data, ivs);
4083 /* Add invariants in set INVS to set IVS. */
4086 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4094 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4096 ivs->n_invariant_uses[iid]++;
4097 if (ivs->n_invariant_uses[iid] == 1)
4102 /* Set cost pair for USE in set IVS to CP. */
4105 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4106 struct iv_use *use, struct cost_pair *cp)
4108 unsigned uid = use->id, cid;
4110 if (ivs->cand_for_use[uid] == cp)
4113 if (ivs->cand_for_use[uid])
4114 iv_ca_set_no_cp (data, ivs, use);
4121 ivs->cand_for_use[uid] = cp;
4122 ivs->n_cand_uses[cid]++;
4123 if (ivs->n_cand_uses[cid] == 1)
4125 bitmap_set_bit (ivs->cands, cid);
4126 /* Do not count the pseudocandidates. */
4130 ivs->cand_cost += cp->cand->cost;
4132 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4135 ivs->cand_use_cost += cp->cost;
4136 iv_ca_set_add_invariants (ivs, cp->depends_on);
4137 iv_ca_recount_cost (data, ivs);
4141 /* Extend set IVS by expressing USE by some of the candidates in it
4145 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4148 struct cost_pair *best_cp = NULL, *cp;
4152 gcc_assert (ivs->upto >= use->id);
4154 if (ivs->upto == use->id)
4160 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4162 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4164 if (cheaper_cost_pair (cp, best_cp))
4168 iv_ca_set_cp (data, ivs, use, best_cp);
4171 /* Get cost for assignment IVS. */
4174 iv_ca_cost (struct iv_ca *ivs)
4176 return (ivs->bad_uses ? INFTY : ivs->cost);
4179 /* Returns true if all dependences of CP are among invariants in IVS. */
4182 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4187 if (!cp->depends_on)
4190 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4192 if (ivs->n_invariant_uses[i] == 0)
4199 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4200 it before NEXT_CHANGE. */
4202 static struct iv_ca_delta *
4203 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4204 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4206 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4209 change->old_cp = old_cp;
4210 change->new_cp = new_cp;
4211 change->next_change = next_change;
4216 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4219 static struct iv_ca_delta *
4220 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4222 struct iv_ca_delta *last;
4230 for (last = l1; last->next_change; last = last->next_change)
4232 last->next_change = l2;
4237 /* Returns candidate by that USE is expressed in IVS. */
4239 static struct cost_pair *
4240 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4242 return ivs->cand_for_use[use->id];
4245 /* Reverse the list of changes DELTA, forming the inverse to it. */
4247 static struct iv_ca_delta *
4248 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4250 struct iv_ca_delta *act, *next, *prev = NULL;
4251 struct cost_pair *tmp;
4253 for (act = delta; act; act = next)
4255 next = act->next_change;
4256 act->next_change = prev;
4260 act->old_cp = act->new_cp;
4267 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4268 reverted instead. */
4271 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4272 struct iv_ca_delta *delta, bool forward)
4274 struct cost_pair *from, *to;
4275 struct iv_ca_delta *act;
4278 delta = iv_ca_delta_reverse (delta);
4280 for (act = delta; act; act = act->next_change)
4284 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4285 iv_ca_set_cp (data, ivs, act->use, to);
4289 iv_ca_delta_reverse (delta);
4292 /* Returns true if CAND is used in IVS. */
4295 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4297 return ivs->n_cand_uses[cand->id] > 0;
4300 /* Returns number of induction variable candidates in the set IVS. */
4303 iv_ca_n_cands (struct iv_ca *ivs)
4305 return ivs->n_cands;
4308 /* Free the list of changes DELTA. */
4311 iv_ca_delta_free (struct iv_ca_delta **delta)
4313 struct iv_ca_delta *act, *next;
4315 for (act = *delta; act; act = next)
4317 next = act->next_change;
4324 /* Allocates new iv candidates assignment. */
4326 static struct iv_ca *
4327 iv_ca_new (struct ivopts_data *data)
4329 struct iv_ca *nw = XNEW (struct iv_ca);
4333 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4334 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4335 nw->cands = BITMAP_ALLOC (NULL);
4338 nw->cand_use_cost = 0;
4340 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4346 /* Free memory occupied by the set IVS. */
4349 iv_ca_free (struct iv_ca **ivs)
4351 free ((*ivs)->cand_for_use);
4352 free ((*ivs)->n_cand_uses);
4353 BITMAP_FREE ((*ivs)->cands);
4354 free ((*ivs)->n_invariant_uses);
4359 /* Dumps IVS to FILE. */
4362 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4364 const char *pref = " invariants ";
4367 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4368 bitmap_print (file, ivs->cands, " candidates ","\n");
4370 for (i = 1; i <= data->max_inv_id; i++)
4371 if (ivs->n_invariant_uses[i])
4373 fprintf (file, "%s%d", pref, i);
4376 fprintf (file, "\n");
4379 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4380 new set, and store differences in DELTA. Number of induction variables
4381 in the new set is stored to N_IVS. */
4384 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4385 struct iv_cand *cand, struct iv_ca_delta **delta,
4390 struct cost_pair *old_cp, *new_cp;
4393 for (i = 0; i < ivs->upto; i++)
4395 use = iv_use (data, i);
4396 old_cp = iv_ca_cand_for_use (ivs, use);
4399 && old_cp->cand == cand)
4402 new_cp = get_use_iv_cost (data, use, cand);
4406 if (!iv_ca_has_deps (ivs, new_cp))
4409 if (!cheaper_cost_pair (new_cp, old_cp))
4412 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4415 iv_ca_delta_commit (data, ivs, *delta, true);
4416 cost = iv_ca_cost (ivs);
4418 *n_ivs = iv_ca_n_cands (ivs);
4419 iv_ca_delta_commit (data, ivs, *delta, false);
4424 /* Try narrowing set IVS by removing CAND. Return the cost of
4425 the new set and store the differences in DELTA. */
4428 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4429 struct iv_cand *cand, struct iv_ca_delta **delta)
4433 struct cost_pair *old_cp, *new_cp, *cp;
4435 struct iv_cand *cnd;
4439 for (i = 0; i < n_iv_uses (data); i++)
4441 use = iv_use (data, i);
4443 old_cp = iv_ca_cand_for_use (ivs, use);
4444 if (old_cp->cand != cand)
4449 if (data->consider_all_candidates)
4451 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4456 cnd = iv_cand (data, ci);
4458 cp = get_use_iv_cost (data, use, cnd);
4461 if (!iv_ca_has_deps (ivs, cp))
4464 if (!cheaper_cost_pair (cp, new_cp))
4472 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4477 cnd = iv_cand (data, ci);
4479 cp = get_use_iv_cost (data, use, cnd);
4482 if (!iv_ca_has_deps (ivs, cp))
4485 if (!cheaper_cost_pair (cp, new_cp))
4494 iv_ca_delta_free (delta);
4498 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4501 iv_ca_delta_commit (data, ivs, *delta, true);
4502 cost = iv_ca_cost (ivs);
4503 iv_ca_delta_commit (data, ivs, *delta, false);
4508 /* Try optimizing the set of candidates IVS by removing candidates different
4509 from to EXCEPT_CAND from it. Return cost of the new set, and store
4510 differences in DELTA. */
4513 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4514 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4517 struct iv_ca_delta *act_delta, *best_delta;
4518 unsigned i, best_cost, acost;
4519 struct iv_cand *cand;
4522 best_cost = iv_ca_cost (ivs);
4524 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4526 cand = iv_cand (data, i);
4528 if (cand == except_cand)
4531 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4533 if (acost < best_cost)
4536 iv_ca_delta_free (&best_delta);
4537 best_delta = act_delta;
4540 iv_ca_delta_free (&act_delta);
4549 /* Recurse to possibly remove other unnecessary ivs. */
4550 iv_ca_delta_commit (data, ivs, best_delta, true);
4551 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4552 iv_ca_delta_commit (data, ivs, best_delta, false);
4553 *delta = iv_ca_delta_join (best_delta, *delta);
4557 /* Tries to extend the sets IVS in the best possible way in order
4558 to express the USE. */
4561 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4564 unsigned best_cost, act_cost;
4567 struct iv_cand *cand;
4568 struct iv_ca_delta *best_delta = NULL, *act_delta;
4569 struct cost_pair *cp;
4571 iv_ca_add_use (data, ivs, use);
4572 best_cost = iv_ca_cost (ivs);
4574 cp = iv_ca_cand_for_use (ivs, use);
4577 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4578 iv_ca_set_no_cp (data, ivs, use);
4581 /* First try important candidates. Only if it fails, try the specific ones.
4582 Rationale -- in loops with many variables the best choice often is to use
4583 just one generic biv. If we added here many ivs specific to the uses,
4584 the optimization algorithm later would be likely to get stuck in a local
4585 minimum, thus causing us to create too many ivs. The approach from
4586 few ivs to more seems more likely to be successful -- starting from few
4587 ivs, replacing an expensive use by a specific iv should always be a
4589 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4591 cand = iv_cand (data, i);
4593 if (iv_ca_cand_used_p (ivs, cand))
4596 cp = get_use_iv_cost (data, use, cand);
4600 iv_ca_set_cp (data, ivs, use, cp);
4601 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4602 iv_ca_set_no_cp (data, ivs, use);
4603 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4605 if (act_cost < best_cost)
4607 best_cost = act_cost;
4609 iv_ca_delta_free (&best_delta);
4610 best_delta = act_delta;
4613 iv_ca_delta_free (&act_delta);
4616 if (best_cost == INFTY)
4618 for (i = 0; i < use->n_map_members; i++)
4620 cp = use->cost_map + i;
4625 /* Already tried this. */
4626 if (cand->important)
4629 if (iv_ca_cand_used_p (ivs, cand))
4633 iv_ca_set_cp (data, ivs, use, cp);
4634 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4635 iv_ca_set_no_cp (data, ivs, use);
4636 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4639 if (act_cost < best_cost)
4641 best_cost = act_cost;
4644 iv_ca_delta_free (&best_delta);
4645 best_delta = act_delta;
4648 iv_ca_delta_free (&act_delta);
4652 iv_ca_delta_commit (data, ivs, best_delta, true);
4653 iv_ca_delta_free (&best_delta);
4655 return (best_cost != INFTY);
4658 /* Finds an initial assignment of candidates to uses. */
4660 static struct iv_ca *
4661 get_initial_solution (struct ivopts_data *data)
4663 struct iv_ca *ivs = iv_ca_new (data);
4666 for (i = 0; i < n_iv_uses (data); i++)
4667 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4676 /* Tries to improve set of induction variables IVS. */
4679 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4681 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4682 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4683 struct iv_cand *cand;
4685 /* Try extending the set of induction variables by one. */
4686 for (i = 0; i < n_iv_cands (data); i++)
4688 cand = iv_cand (data, i);
4690 if (iv_ca_cand_used_p (ivs, cand))
4693 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4697 /* If we successfully added the candidate and the set is small enough,
4698 try optimizing it by removing other candidates. */
4699 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4701 iv_ca_delta_commit (data, ivs, act_delta, true);
4702 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4703 iv_ca_delta_commit (data, ivs, act_delta, false);
4704 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4707 if (acost < best_cost)
4710 iv_ca_delta_free (&best_delta);
4711 best_delta = act_delta;
4714 iv_ca_delta_free (&act_delta);
4719 /* Try removing the candidates from the set instead. */
4720 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4722 /* Nothing more we can do. */
4727 iv_ca_delta_commit (data, ivs, best_delta, true);
4728 gcc_assert (best_cost == iv_ca_cost (ivs));
4729 iv_ca_delta_free (&best_delta);
4733 /* Attempts to find the optimal set of induction variables. We do simple
4734 greedy heuristic -- we try to replace at most one candidate in the selected
4735 solution and remove the unused ivs while this improves the cost. */
4737 static struct iv_ca *
4738 find_optimal_iv_set (struct ivopts_data *data)
4744 /* Get the initial solution. */
4745 set = get_initial_solution (data);
4748 if (dump_file && (dump_flags & TDF_DETAILS))
4749 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4753 if (dump_file && (dump_flags & TDF_DETAILS))
4755 fprintf (dump_file, "Initial set of candidates:\n");
4756 iv_ca_dump (data, dump_file, set);
4759 while (try_improve_iv_set (data, set))
4761 if (dump_file && (dump_flags & TDF_DETAILS))
4763 fprintf (dump_file, "Improved to:\n");
4764 iv_ca_dump (data, dump_file, set);
4768 if (dump_file && (dump_flags & TDF_DETAILS))
4769 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4771 for (i = 0; i < n_iv_uses (data); i++)
4773 use = iv_use (data, i);
4774 use->selected = iv_ca_cand_for_use (set, use)->cand;
4780 /* Creates a new induction variable corresponding to CAND. */
4783 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4785 block_stmt_iterator incr_pos;
4795 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4799 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4804 /* Mark that the iv is preserved. */
4805 name_info (data, cand->var_before)->preserve_biv = true;
4806 name_info (data, cand->var_after)->preserve_biv = true;
4808 /* Rewrite the increment so that it uses var_before directly. */
4809 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4814 gimple_add_tmp_var (cand->var_before);
4815 add_referenced_var (cand->var_before);
4817 base = unshare_expr (cand->iv->base);
4819 create_iv (base, unshare_expr (cand->iv->step),
4820 cand->var_before, data->current_loop,
4821 &incr_pos, after, &cand->var_before, &cand->var_after);
4824 /* Creates new induction variables described in SET. */
4827 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4830 struct iv_cand *cand;
4833 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4835 cand = iv_cand (data, i);
4836 create_new_iv (data, cand);
4840 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4841 is true, remove also the ssa name defined by the statement. */
4844 remove_statement (tree stmt, bool including_defined_name)
4846 if (TREE_CODE (stmt) == PHI_NODE)
4848 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4852 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4854 bsi_remove (&bsi, true);
4855 release_defs (stmt);
4859 /* Rewrites USE (definition of iv used in a nonlinear expression)
4860 using candidate CAND. */
4863 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4864 struct iv_use *use, struct iv_cand *cand)
4867 tree op, stmts, tgt, ass;
4868 block_stmt_iterator bsi, pbsi;
4870 /* An important special case -- if we are asked to express value of
4871 the original iv by itself, just exit; there is no need to
4872 introduce a new computation (that might also need casting the
4873 variable to unsigned and back). */
4874 if (cand->pos == IP_ORIGINAL
4875 && cand->incremented_at == use->stmt)
4877 tree step, ctype, utype;
4878 enum tree_code incr_code = PLUS_EXPR;
4880 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4881 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4883 step = cand->iv->step;
4884 ctype = TREE_TYPE (step);
4885 utype = TREE_TYPE (cand->var_after);
4886 if (TREE_CODE (step) == NEGATE_EXPR)
4888 incr_code = MINUS_EXPR;
4889 step = TREE_OPERAND (step, 0);
4892 /* Check whether we may leave the computation unchanged.
4893 This is the case only if it does not rely on other
4894 computations in the loop -- otherwise, the computation
4895 we rely upon may be removed in remove_unused_ivs,
4896 thus leading to ICE. */
4897 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4898 if (TREE_CODE (op) == PLUS_EXPR
4899 || TREE_CODE (op) == MINUS_EXPR)
4901 if (TREE_OPERAND (op, 0) == cand->var_before)
4902 op = TREE_OPERAND (op, 1);
4903 else if (TREE_CODE (op) == PLUS_EXPR
4904 && TREE_OPERAND (op, 1) == cand->var_before)
4905 op = TREE_OPERAND (op, 0);
4913 && (TREE_CODE (op) == INTEGER_CST
4914 || operand_equal_p (op, step, 0)))
4917 /* Otherwise, add the necessary computations to express
4919 op = fold_convert (ctype, cand->var_before);
4920 comp = fold_convert (utype,
4921 build2 (incr_code, ctype, op,
4922 unshare_expr (step)));
4926 comp = get_computation (data->current_loop, use, cand);
4927 gcc_assert (comp != NULL_TREE);
4930 switch (TREE_CODE (use->stmt))
4933 tgt = PHI_RESULT (use->stmt);
4935 /* If we should keep the biv, do not replace it. */
4936 if (name_info (data, tgt)->preserve_biv)
4939 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4940 while (!bsi_end_p (pbsi)
4941 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4948 case GIMPLE_MODIFY_STMT:
4949 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4950 bsi = bsi_for_stmt (use->stmt);
4957 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4959 if (TREE_CODE (use->stmt) == PHI_NODE)
4962 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4963 ass = build_gimple_modify_stmt (tgt, op);
4964 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4965 remove_statement (use->stmt, false);
4966 SSA_NAME_DEF_STMT (tgt) = ass;
4971 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4972 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4976 /* Replaces ssa name in index IDX by its basic variable. Callback for
4980 idx_remove_ssa_names (tree base, tree *idx,
4981 void *data ATTRIBUTE_UNUSED)
4985 if (TREE_CODE (*idx) == SSA_NAME)
4986 *idx = SSA_NAME_VAR (*idx);
4988 if (TREE_CODE (base) == ARRAY_REF)
4990 op = &TREE_OPERAND (base, 2);
4992 && TREE_CODE (*op) == SSA_NAME)
4993 *op = SSA_NAME_VAR (*op);
4994 op = &TREE_OPERAND (base, 3);
4996 && TREE_CODE (*op) == SSA_NAME)
4997 *op = SSA_NAME_VAR (*op);
5003 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5006 unshare_and_remove_ssa_names (tree ref)
5008 ref = unshare_expr (ref);
5009 for_each_index (&ref, idx_remove_ssa_names, NULL);
5014 /* Extract the alias analysis info for the memory reference REF. There are
5015 several ways how this information may be stored and what precisely is
5016 its semantics depending on the type of the reference, but there always is
5017 somewhere hidden one _DECL node that is used to determine the set of
5018 virtual operands for the reference. The code below deciphers this jungle
5019 and extracts this single useful piece of information. */
5022 get_ref_tag (tree ref, tree orig)
5024 tree var = get_base_address (ref);
5025 tree aref = NULL_TREE, tag, sv;
5026 HOST_WIDE_INT offset, size, maxsize;
5028 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5030 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5035 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5036 return unshare_expr (sv);
5041 if (TREE_CODE (var) == INDIRECT_REF)
5043 /* If the base is a dereference of a pointer, first check its name memory
5044 tag. If it does not have one, use its symbol memory tag. */
5045 var = TREE_OPERAND (var, 0);
5046 if (TREE_CODE (var) != SSA_NAME)
5049 if (SSA_NAME_PTR_INFO (var))
5051 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5056 var = SSA_NAME_VAR (var);
5057 tag = symbol_mem_tag (var);
5058 gcc_assert (tag != NULL_TREE);
5066 tag = symbol_mem_tag (var);
5074 /* Copies the reference information from OLD_REF to NEW_REF. */
5077 copy_ref_info (tree new_ref, tree old_ref)
5079 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5080 copy_mem_ref_info (new_ref, old_ref);
5083 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5084 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5088 /* Rewrites USE (address that is an iv) using candidate CAND. */
5091 rewrite_use_address (struct ivopts_data *data,
5092 struct iv_use *use, struct iv_cand *cand)
5095 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5099 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5101 unshare_aff_combination (&aff);
5103 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5104 copy_ref_info (ref, *use->op_p);
5108 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5112 rewrite_use_compare (struct ivopts_data *data,
5113 struct iv_use *use, struct iv_cand *cand)
5115 tree comp, *var_p, op, bound;
5116 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5117 enum tree_code compare;
5118 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5124 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5125 tree var_type = TREE_TYPE (var);
5127 compare = iv_elimination_compare (data, use);
5128 bound = unshare_expr (fold_convert (var_type, bound));
5129 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5131 *use->op_p = build2 (compare, boolean_type_node, var, op);
5135 /* The induction variable elimination failed; just express the original
5137 comp = get_computation (data->current_loop, use, cand);
5138 gcc_assert (comp != NULL_TREE);
5140 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5143 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5146 /* Rewrites USE using candidate CAND. */
5149 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5151 push_stmt_changes (&use->stmt);
5155 case USE_NONLINEAR_EXPR:
5156 rewrite_use_nonlinear_expr (data, use, cand);
5160 rewrite_use_address (data, use, cand);
5164 rewrite_use_compare (data, use, cand);
5171 pop_stmt_changes (&use->stmt);
5174 /* Rewrite the uses using the selected induction variables. */
5177 rewrite_uses (struct ivopts_data *data)
5180 struct iv_cand *cand;
5183 for (i = 0; i < n_iv_uses (data); i++)
5185 use = iv_use (data, i);
5186 cand = use->selected;
5189 rewrite_use (data, use, cand);
5193 /* Removes the ivs that are not used after rewriting. */
5196 remove_unused_ivs (struct ivopts_data *data)
5201 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5203 struct version_info *info;
5205 info = ver_info (data, j);
5207 && !integer_zerop (info->iv->step)
5209 && !info->iv->have_use_for
5210 && !info->preserve_biv)
5211 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5215 /* Frees data allocated by the optimization of a single loop. */
5218 free_loop_data (struct ivopts_data *data)
5226 pointer_map_destroy (data->niters);
5227 data->niters = NULL;
5230 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5232 struct version_info *info;
5234 info = ver_info (data, i);
5238 info->has_nonlin_use = false;
5239 info->preserve_biv = false;
5242 bitmap_clear (data->relevant);
5243 bitmap_clear (data->important_candidates);
5245 for (i = 0; i < n_iv_uses (data); i++)
5247 struct iv_use *use = iv_use (data, i);
5250 BITMAP_FREE (use->related_cands);
5251 for (j = 0; j < use->n_map_members; j++)
5252 if (use->cost_map[j].depends_on)
5253 BITMAP_FREE (use->cost_map[j].depends_on);
5254 free (use->cost_map);
5257 VEC_truncate (iv_use_p, data->iv_uses, 0);
5259 for (i = 0; i < n_iv_cands (data); i++)
5261 struct iv_cand *cand = iv_cand (data, i);
5265 if (cand->depends_on)
5266 BITMAP_FREE (cand->depends_on);
5269 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5271 if (data->version_info_size < num_ssa_names)
5273 data->version_info_size = 2 * num_ssa_names;
5274 free (data->version_info);
5275 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5278 data->max_inv_id = 0;
5280 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5281 SET_DECL_RTL (obj, NULL_RTX);
5283 VEC_truncate (tree, decl_rtl_to_reset, 0);
5286 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5290 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5292 free_loop_data (data);
5293 free (data->version_info);
5294 BITMAP_FREE (data->relevant);
5295 BITMAP_FREE (data->important_candidates);
5297 VEC_free (tree, heap, decl_rtl_to_reset);
5298 VEC_free (iv_use_p, heap, data->iv_uses);
5299 VEC_free (iv_cand_p, heap, data->iv_candidates);
5302 /* Optimizes the LOOP. Returns true if anything changed. */
5305 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5307 bool changed = false;
5308 struct iv_ca *iv_ca;
5311 gcc_assert (!data->niters);
5312 data->current_loop = loop;
5314 if (dump_file && (dump_flags & TDF_DETAILS))
5316 fprintf (dump_file, "Processing loop %d\n", loop->num);
5318 exit = single_dom_exit (loop);
5321 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5322 exit->src->index, exit->dest->index);
5323 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5324 fprintf (dump_file, "\n");
5327 fprintf (dump_file, "\n");
5330 /* For each ssa name determines whether it behaves as an induction variable
5332 if (!find_induction_variables (data))
5335 /* Finds interesting uses (item 1). */
5336 find_interesting_uses (data);
5337 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5340 /* Finds candidates for the induction variables (item 2). */
5341 find_iv_candidates (data);
5343 /* Calculates the costs (item 3, part 1). */
5344 determine_use_iv_costs (data);
5345 determine_iv_costs (data);
5346 determine_set_costs (data);
5348 /* Find the optimal set of induction variables (item 3, part 2). */
5349 iv_ca = find_optimal_iv_set (data);
5354 /* Create the new induction variables (item 4, part 1). */
5355 create_new_ivs (data, iv_ca);
5356 iv_ca_free (&iv_ca);
5358 /* Rewrite the uses (item 4, part 2). */
5359 rewrite_uses (data);
5361 /* Remove the ivs that are unused after rewriting. */
5362 remove_unused_ivs (data);
5364 /* We have changed the structure of induction variables; it might happen
5365 that definitions in the scev database refer to some of them that were
5370 free_loop_data (data);
5375 /* Main entry point. Optimizes induction variables in loops. */
5378 tree_ssa_iv_optimize (void)
5381 struct ivopts_data data;
5384 tree_ssa_iv_optimize_init (&data);
5386 /* Optimize the loops starting with the innermost ones. */
5387 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5389 if (dump_file && (dump_flags & TDF_DETAILS))
5390 flow_loop_dump (loop, dump_file, NULL, 1);
5392 tree_ssa_iv_optimize_loop (&data, loop);
5395 tree_ssa_iv_optimize_finalize (&data);