1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
94 /* The infinite cost. */
95 #define INFTY 10000000
97 /* The expected number of loop iterations. TODO -- use profiling instead of
99 #define AVG_LOOP_NITER(LOOP) 5
102 /* Representation of the induction variable. */
105 tree base; /* Initial value of the iv. */
106 tree base_object; /* A memory object to that the induction variable points. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
128 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
129 USE_ADDRESS, /* Use in an address. */
130 USE_COMPARE /* Use is a compare. */
133 /* The candidate - cost pair. */
136 struct iv_cand *cand; /* The candidate. */
137 unsigned cost; /* The cost. */
138 bitmap depends_on; /* The list of invariants that have to be
140 tree value; /* For final value elimination, the expression for
141 the final value of the iv. For iv elimination,
142 the new bound to compare with. */
148 unsigned id; /* The id of the use. */
149 enum use_type type; /* Type of the use. */
150 struct iv *iv; /* The induction variable it is based on. */
151 tree stmt; /* Statement in that it occurs. */
152 tree *op_p; /* The place where it occurs. */
153 bitmap related_cands; /* The set of "related" iv candidates, plus the common
156 unsigned n_map_members; /* Number of candidates in the cost_map list. */
157 struct cost_pair *cost_map;
158 /* The costs wrto the iv candidates. */
160 struct iv_cand *selected;
161 /* The selected candidate. */
164 /* The position where the iv is computed. */
167 IP_NORMAL, /* At the end, just before the exit condition. */
168 IP_END, /* At the end of the latch block. */
169 IP_ORIGINAL /* The original biv. */
172 /* The induction variable candidate. */
175 unsigned id; /* The number of the candidate. */
176 bool important; /* Whether this is an "important" candidate, i.e. such
177 that it should be considered by all uses. */
178 enum iv_position pos; /* Where it is computed. */
179 tree incremented_at; /* For original biv, the statement where it is
181 tree var_before; /* The variable used for it before increment. */
182 tree var_after; /* The variable used for it after increment. */
183 struct iv *iv; /* The value of the candidate. NULL for
184 "pseudocandidate" used to indicate the possibility
185 to replace the final value of an iv by direct
186 computation of the value. */
187 unsigned cost; /* Cost of the candidate. */
188 bitmap depends_on; /* The list of invariants that are used in step of the
192 /* The data used by the induction variable optimizations. */
194 typedef struct iv_use *iv_use_p;
196 DEF_VEC_ALLOC_P(iv_use_p,heap);
198 typedef struct iv_cand *iv_cand_p;
199 DEF_VEC_P(iv_cand_p);
200 DEF_VEC_ALLOC_P(iv_cand_p,heap);
204 /* The currently optimized loop. */
205 struct loop *current_loop;
207 /* Number of registers used in it. */
210 /* Numbers of iterations for all exits of the current loop. */
213 /* The size of version_info array allocated. */
214 unsigned version_info_size;
216 /* The array of information for the ssa names. */
217 struct version_info *version_info;
219 /* The bitmap of indices in version_info whose value was changed. */
222 /* The maximum invariant id. */
225 /* The uses of induction variables. */
226 VEC(iv_use_p,heap) *iv_uses;
228 /* The candidates. */
229 VEC(iv_cand_p,heap) *iv_candidates;
231 /* A bitmap of important candidates. */
232 bitmap important_candidates;
234 /* Whether to consider just related and important candidates when replacing a
236 bool consider_all_candidates;
239 /* An assignment of iv candidates to uses. */
243 /* The number of uses covered by the assignment. */
246 /* Number of uses that cannot be expressed by the candidates in the set. */
249 /* Candidate assigned to a use, together with the related costs. */
250 struct cost_pair **cand_for_use;
252 /* Number of times each candidate is used. */
253 unsigned *n_cand_uses;
255 /* The candidates used. */
258 /* The number of candidates in the set. */
261 /* Total number of registers needed. */
264 /* Total cost of expressing uses. */
265 unsigned cand_use_cost;
267 /* Total cost of candidates. */
270 /* Number of times each invariant is used. */
271 unsigned *n_invariant_uses;
273 /* Total cost of the assignment. */
277 /* Difference of two iv candidate assignments. */
284 /* An old assignment (for rollback purposes). */
285 struct cost_pair *old_cp;
287 /* A new assignment. */
288 struct cost_pair *new_cp;
290 /* Next change in the list. */
291 struct iv_ca_delta *next_change;
294 /* Bound on number of candidates below that all candidates are considered. */
296 #define CONSIDER_ALL_CANDIDATES_BOUND \
297 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
299 /* If there are more iv occurrences, we just give up (it is quite unlikely that
300 optimizing such a loop would help, and it would take ages). */
302 #define MAX_CONSIDERED_USES \
303 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
305 /* If there are at most this number of ivs in the set, try removing unnecessary
306 ivs from the set always. */
308 #define ALWAYS_PRUNE_CAND_SET_BOUND \
309 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
311 /* The list of trees for that the decl_rtl field must be reset is stored
314 static VEC(tree,heap) *decl_rtl_to_reset;
316 /* Number of uses recorded in DATA. */
318 static inline unsigned
319 n_iv_uses (struct ivopts_data *data)
321 return VEC_length (iv_use_p, data->iv_uses);
324 /* Ith use recorded in DATA. */
326 static inline struct iv_use *
327 iv_use (struct ivopts_data *data, unsigned i)
329 return VEC_index (iv_use_p, data->iv_uses, i);
332 /* Number of candidates recorded in DATA. */
334 static inline unsigned
335 n_iv_cands (struct ivopts_data *data)
337 return VEC_length (iv_cand_p, data->iv_candidates);
340 /* Ith candidate recorded in DATA. */
342 static inline struct iv_cand *
343 iv_cand (struct ivopts_data *data, unsigned i)
345 return VEC_index (iv_cand_p, data->iv_candidates, i);
348 /* The single loop exit if it dominates the latch, NULL otherwise. */
351 single_dom_exit (struct loop *loop)
353 edge exit = single_exit (loop);
358 if (!just_once_each_iteration_p (loop, exit->src))
364 /* Dumps information about the induction variable IV to FILE. */
366 extern void dump_iv (FILE *, struct iv *);
368 dump_iv (FILE *file, struct iv *iv)
372 fprintf (file, "ssa name ");
373 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
374 fprintf (file, "\n");
377 fprintf (file, " type ");
378 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
379 fprintf (file, "\n");
383 fprintf (file, " base ");
384 print_generic_expr (file, iv->base, TDF_SLIM);
385 fprintf (file, "\n");
387 fprintf (file, " step ");
388 print_generic_expr (file, iv->step, TDF_SLIM);
389 fprintf (file, "\n");
393 fprintf (file, " invariant ");
394 print_generic_expr (file, iv->base, TDF_SLIM);
395 fprintf (file, "\n");
400 fprintf (file, " base object ");
401 print_generic_expr (file, iv->base_object, TDF_SLIM);
402 fprintf (file, "\n");
406 fprintf (file, " is a biv\n");
409 /* Dumps information about the USE to FILE. */
411 extern void dump_use (FILE *, struct iv_use *);
413 dump_use (FILE *file, struct iv_use *use)
415 fprintf (file, "use %d\n", use->id);
419 case USE_NONLINEAR_EXPR:
420 fprintf (file, " generic\n");
424 fprintf (file, " address\n");
428 fprintf (file, " compare\n");
435 fprintf (file, " in statement ");
436 print_generic_expr (file, use->stmt, TDF_SLIM);
437 fprintf (file, "\n");
439 fprintf (file, " at position ");
441 print_generic_expr (file, *use->op_p, TDF_SLIM);
442 fprintf (file, "\n");
444 dump_iv (file, use->iv);
446 if (use->related_cands)
448 fprintf (file, " related candidates ");
449 dump_bitmap (file, use->related_cands);
453 /* Dumps information about the uses to FILE. */
455 extern void dump_uses (FILE *, struct ivopts_data *);
457 dump_uses (FILE *file, struct ivopts_data *data)
462 for (i = 0; i < n_iv_uses (data); i++)
464 use = iv_use (data, i);
466 dump_use (file, use);
467 fprintf (file, "\n");
471 /* Dumps information about induction variable candidate CAND to FILE. */
473 extern void dump_cand (FILE *, struct iv_cand *);
475 dump_cand (FILE *file, struct iv_cand *cand)
477 struct iv *iv = cand->iv;
479 fprintf (file, "candidate %d%s\n",
480 cand->id, cand->important ? " (important)" : "");
482 if (cand->depends_on)
484 fprintf (file, " depends on ");
485 dump_bitmap (file, cand->depends_on);
490 fprintf (file, " final value replacement\n");
497 fprintf (file, " incremented before exit test\n");
501 fprintf (file, " incremented at end\n");
505 fprintf (file, " original biv\n");
512 /* Returns the info for ssa version VER. */
514 static inline struct version_info *
515 ver_info (struct ivopts_data *data, unsigned ver)
517 return data->version_info + ver;
520 /* Returns the info for ssa name NAME. */
522 static inline struct version_info *
523 name_info (struct ivopts_data *data, tree name)
525 return ver_info (data, SSA_NAME_VERSION (name));
528 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
532 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
534 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
538 if (sbb == loop->latch)
544 return stmt == last_stmt (bb);
547 /* Returns true if STMT if after the place where the original induction
548 variable CAND is incremented. */
551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
553 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
554 basic_block stmt_bb = bb_for_stmt (stmt);
555 block_stmt_iterator bsi;
557 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
560 if (stmt_bb != cand_bb)
563 /* Scan the block from the end, since the original ivs are usually
564 incremented at the end of the loop body. */
565 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
567 if (bsi_stmt (bsi) == cand->incremented_at)
569 if (bsi_stmt (bsi) == stmt)
574 /* Returns true if STMT if after the place where the induction variable
575 CAND is incremented in LOOP. */
578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
586 return stmt_after_ip_normal_pos (loop, stmt);
589 return stmt_after_ip_original_pos (cand, stmt);
596 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
599 abnormal_ssa_name_p (tree exp)
604 if (TREE_CODE (exp) != SSA_NAME)
607 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
610 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
611 abnormal phi node. Callback for for_each_index. */
614 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
615 void *data ATTRIBUTE_UNUSED)
617 if (TREE_CODE (base) == ARRAY_REF)
619 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
621 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
625 return !abnormal_ssa_name_p (*index);
628 /* Returns true if EXPR contains a ssa name that occurs in an
629 abnormal phi node. */
632 contains_abnormal_ssa_name_p (tree expr)
635 enum tree_code_class class;
640 code = TREE_CODE (expr);
641 class = TREE_CODE_CLASS (code);
643 if (code == SSA_NAME)
644 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
646 if (code == INTEGER_CST
647 || is_gimple_min_invariant (expr))
650 if (code == ADDR_EXPR)
651 return !for_each_index (&TREE_OPERAND (expr, 0),
652 idx_contains_abnormal_ssa_name_p,
659 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
664 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
676 /* Element of the table in that we cache the numbers of iterations obtained
677 from exits of the loop. */
681 /* The edge for that the number of iterations is cached. */
684 /* Number of iterations corresponding to this exit, or NULL if it cannot be
689 /* Hash function for nfe_cache_elt E. */
692 nfe_hash (const void *e)
694 const struct nfe_cache_elt *elt = e;
696 return htab_hash_pointer (elt->exit);
699 /* Equality function for nfe_cache_elt E1 and edge E2. */
702 nfe_eq (const void *e1, const void *e2)
704 const struct nfe_cache_elt *elt1 = e1;
706 return elt1->exit == e2;
709 /* Returns tree describing number of iterations determined from
710 EXIT of DATA->current_loop, or NULL if something goes wrong. */
713 niter_for_exit (struct ivopts_data *data, edge exit)
715 struct nfe_cache_elt *nfe_desc;
716 struct tree_niter_desc desc;
719 slot = htab_find_slot_with_hash (data->niters, exit,
720 htab_hash_pointer (exit),
725 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
726 nfe_desc->exit = exit;
728 /* Try to determine number of iterations. We must know it
729 unconditionally (i.e., without possibility of # of iterations
730 being zero). Also, we cannot safely work with ssa names that
731 appear in phi nodes on abnormal edges, so that we do not create
732 overlapping life ranges for them (PR 27283). */
733 if (number_of_iterations_exit (data->current_loop,
735 && zero_p (desc.may_be_zero)
736 && !contains_abnormal_ssa_name_p (desc.niter))
737 nfe_desc->niter = desc.niter;
739 nfe_desc->niter = NULL_TREE;
744 return nfe_desc->niter;
747 /* Returns tree describing number of iterations determined from
748 single dominating exit of DATA->current_loop, or NULL if something
752 niter_for_single_dom_exit (struct ivopts_data *data)
754 edge exit = single_dom_exit (data->current_loop);
759 return niter_for_exit (data, exit);
762 /* Initializes data structures used by the iv optimization pass, stored
766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
768 data->version_info_size = 2 * num_ssa_names;
769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770 data->relevant = BITMAP_ALLOC (NULL);
771 data->important_candidates = BITMAP_ALLOC (NULL);
772 data->max_inv_id = 0;
773 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
779 /* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
783 determine_base_object (tree expr)
785 enum tree_code code = TREE_CODE (expr);
786 tree base, obj, op0, op1;
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
791 if (TREE_CODE (expr) == NOP_EXPR
792 || TREE_CODE (expr) == CONVERT_EXPR)
793 return determine_base_object (TREE_OPERAND (expr, 0));
795 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
804 obj = TREE_OPERAND (expr, 0);
805 base = get_base_address (obj);
810 if (TREE_CODE (base) == INDIRECT_REF)
811 return determine_base_object (TREE_OPERAND (base, 0));
813 return fold_convert (ptr_type_node,
814 build_fold_addr_expr (base));
818 op0 = determine_base_object (TREE_OPERAND (expr, 0));
819 op1 = determine_base_object (TREE_OPERAND (expr, 1));
825 return (code == PLUS_EXPR
827 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
829 return fold_build2 (code, ptr_type_node, op0, op1);
832 return fold_convert (ptr_type_node, expr);
836 /* Allocates an induction variable with given initial value BASE and step STEP
840 alloc_iv (tree base, tree step)
842 struct iv *iv = XCNEW (struct iv);
844 if (step && integer_zerop (step))
848 iv->base_object = determine_base_object (base);
851 iv->have_use_for = false;
853 iv->ssa_name = NULL_TREE;
858 /* Sets STEP and BASE for induction variable IV. */
861 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
863 struct version_info *info = name_info (data, iv);
865 gcc_assert (!info->iv);
867 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
868 info->iv = alloc_iv (base, step);
869 info->iv->ssa_name = iv;
872 /* Finds induction variable declaration for VAR. */
875 get_iv (struct ivopts_data *data, tree var)
879 if (!name_info (data, var)->iv)
881 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
884 || !flow_bb_inside_loop_p (data->current_loop, bb))
885 set_iv (data, var, var, NULL_TREE);
888 return name_info (data, var)->iv;
891 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
892 not define a simple affine biv with nonzero step. */
895 determine_biv_step (tree phi)
897 struct loop *loop = bb_for_stmt (phi)->loop_father;
898 tree name = PHI_RESULT (phi);
901 if (!is_gimple_reg (name))
904 if (!simple_iv (loop, phi, name, &iv, true))
907 return (zero_p (iv.step) ? NULL_TREE : iv.step);
910 /* Finds basic ivs. */
913 find_bivs (struct ivopts_data *data)
915 tree phi, step, type, base;
917 struct loop *loop = data->current_loop;
919 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
921 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
924 step = determine_biv_step (phi);
928 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
929 base = expand_simple_operations (base);
930 if (contains_abnormal_ssa_name_p (base)
931 || contains_abnormal_ssa_name_p (step))
934 type = TREE_TYPE (PHI_RESULT (phi));
935 base = fold_convert (type, base);
937 step = fold_convert (type, step);
939 set_iv (data, PHI_RESULT (phi), base, step);
946 /* Marks basic ivs. */
949 mark_bivs (struct ivopts_data *data)
952 struct iv *iv, *incr_iv;
953 struct loop *loop = data->current_loop;
956 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
958 iv = get_iv (data, PHI_RESULT (phi));
962 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
963 incr_iv = get_iv (data, var);
967 /* If the increment is in the subloop, ignore it. */
968 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
969 if (incr_bb->loop_father != data->current_loop
970 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
974 incr_iv->biv_p = true;
978 /* Checks whether STMT defines a linear induction variable and stores its
982 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
985 struct loop *loop = data->current_loop;
987 iv->base = NULL_TREE;
988 iv->step = NULL_TREE;
990 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
993 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
994 if (TREE_CODE (lhs) != SSA_NAME)
997 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
999 iv->base = expand_simple_operations (iv->base);
1001 if (contains_abnormal_ssa_name_p (iv->base)
1002 || contains_abnormal_ssa_name_p (iv->step))
1008 /* Finds general ivs in statement STMT. */
1011 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1015 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1018 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1021 /* Finds general ivs in basic block BB. */
1024 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1026 block_stmt_iterator bsi;
1028 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1029 find_givs_in_stmt (data, bsi_stmt (bsi));
1032 /* Finds general ivs. */
1035 find_givs (struct ivopts_data *data)
1037 struct loop *loop = data->current_loop;
1038 basic_block *body = get_loop_body_in_dom_order (loop);
1041 for (i = 0; i < loop->num_nodes; i++)
1042 find_givs_in_bb (data, body[i]);
1046 /* For each ssa name defined in LOOP determines whether it is an induction
1047 variable and if so, its initial value and step. */
1050 find_induction_variables (struct ivopts_data *data)
1055 if (!find_bivs (data))
1061 if (dump_file && (dump_flags & TDF_DETAILS))
1063 tree niter = niter_for_single_dom_exit (data);
1067 fprintf (dump_file, " number of iterations ");
1068 print_generic_expr (dump_file, niter, TDF_SLIM);
1069 fprintf (dump_file, "\n\n");
1072 fprintf (dump_file, "Induction variables:\n\n");
1074 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1076 if (ver_info (data, i)->iv)
1077 dump_iv (dump_file, ver_info (data, i)->iv);
1084 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1086 static struct iv_use *
1087 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1088 tree stmt, enum use_type use_type)
1090 struct iv_use *use = XCNEW (struct iv_use);
1092 use->id = n_iv_uses (data);
1093 use->type = use_type;
1097 use->related_cands = BITMAP_ALLOC (NULL);
1099 /* To avoid showing ssa name in the dumps, if it was not reset by the
1101 iv->ssa_name = NULL_TREE;
1103 if (dump_file && (dump_flags & TDF_DETAILS))
1104 dump_use (dump_file, use);
1106 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1111 /* Checks whether OP is a loop-level invariant and if so, records it.
1112 NONLINEAR_USE is true if the invariant is used in a way we do not
1113 handle specially. */
1116 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1119 struct version_info *info;
1121 if (TREE_CODE (op) != SSA_NAME
1122 || !is_gimple_reg (op))
1125 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1127 && flow_bb_inside_loop_p (data->current_loop, bb))
1130 info = name_info (data, op);
1132 info->has_nonlin_use |= nonlinear_use;
1134 info->inv_id = ++data->max_inv_id;
1135 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1138 /* Checks whether the use OP is interesting and if so, records it. */
1140 static struct iv_use *
1141 find_interesting_uses_op (struct ivopts_data *data, tree op)
1148 if (TREE_CODE (op) != SSA_NAME)
1151 iv = get_iv (data, op);
1155 if (iv->have_use_for)
1157 use = iv_use (data, iv->use_id);
1159 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1163 if (zero_p (iv->step))
1165 record_invariant (data, op, true);
1168 iv->have_use_for = true;
1170 civ = XNEW (struct iv);
1173 stmt = SSA_NAME_DEF_STMT (op);
1174 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1175 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1177 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1178 iv->use_id = use->id;
1183 /* Checks whether the condition *COND_P in STMT is interesting
1184 and if so, records it. */
1187 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1191 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1193 tree zero = integer_zero_node;
1195 const_iv.step = NULL_TREE;
1197 if (TREE_CODE (*cond_p) != SSA_NAME
1198 && !COMPARISON_CLASS_P (*cond_p))
1201 if (TREE_CODE (*cond_p) == SSA_NAME)
1208 op0_p = &TREE_OPERAND (*cond_p, 0);
1209 op1_p = &TREE_OPERAND (*cond_p, 1);
1212 if (TREE_CODE (*op0_p) == SSA_NAME)
1213 iv0 = get_iv (data, *op0_p);
1217 if (TREE_CODE (*op1_p) == SSA_NAME)
1218 iv1 = get_iv (data, *op1_p);
1222 if (/* When comparing with non-invariant value, we may not do any senseful
1223 induction variable elimination. */
1225 /* Eliminating condition based on two ivs would be nontrivial.
1226 ??? TODO -- it is not really important to handle this case. */
1227 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1229 find_interesting_uses_op (data, *op0_p);
1230 find_interesting_uses_op (data, *op1_p);
1234 if (zero_p (iv0->step) && zero_p (iv1->step))
1236 /* If both are invariants, this is a work for unswitching. */
1240 civ = XNEW (struct iv);
1241 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1242 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1245 /* Returns true if expression EXPR is obviously invariant in LOOP,
1246 i.e. if all its operands are defined outside of the LOOP. */
1249 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1254 if (is_gimple_min_invariant (expr))
1257 if (TREE_CODE (expr) == SSA_NAME)
1259 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1261 && flow_bb_inside_loop_p (loop, def_bb))
1267 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1270 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1271 for (i = 0; i < len; i++)
1272 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1278 /* Cumulates the steps of indices into DATA and replaces their values with the
1279 initial ones. Returns false when the value of the index cannot be determined.
1280 Callback for for_each_index. */
1282 struct ifs_ivopts_data
1284 struct ivopts_data *ivopts_data;
1290 idx_find_step (tree base, tree *idx, void *data)
1292 struct ifs_ivopts_data *dta = data;
1294 tree step, iv_base, iv_step, lbound, off;
1295 struct loop *loop = dta->ivopts_data->current_loop;
1297 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1298 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1301 /* If base is a component ref, require that the offset of the reference
1303 if (TREE_CODE (base) == COMPONENT_REF)
1305 off = component_ref_field_offset (base);
1306 return expr_invariant_in_loop_p (loop, off);
1309 /* If base is array, first check whether we will be able to move the
1310 reference out of the loop (in order to take its address in strength
1311 reduction). In order for this to work we need both lower bound
1312 and step to be loop invariants. */
1313 if (TREE_CODE (base) == ARRAY_REF)
1315 step = array_ref_element_size (base);
1316 lbound = array_ref_low_bound (base);
1318 if (!expr_invariant_in_loop_p (loop, step)
1319 || !expr_invariant_in_loop_p (loop, lbound))
1323 if (TREE_CODE (*idx) != SSA_NAME)
1326 iv = get_iv (dta->ivopts_data, *idx);
1330 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1331 *&x[0], which is not folded and does not trigger the
1332 ARRAY_REF path below. */
1338 if (TREE_CODE (base) == ARRAY_REF)
1340 step = array_ref_element_size (base);
1342 /* We only handle addresses whose step is an integer constant. */
1343 if (TREE_CODE (step) != INTEGER_CST)
1347 /* The step for pointer arithmetics already is 1 byte. */
1348 step = build_int_cst (sizetype, 1);
1352 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1353 sizetype, &iv_base, &iv_step, dta->stmt,
1356 /* The index might wrap. */
1360 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1363 *dta->step_p = step;
1365 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1370 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1371 object is passed to it in DATA. */
1374 idx_record_use (tree base, tree *idx,
1377 find_interesting_uses_op (data, *idx);
1378 if (TREE_CODE (base) == ARRAY_REF)
1380 find_interesting_uses_op (data, array_ref_element_size (base));
1381 find_interesting_uses_op (data, array_ref_low_bound (base));
1386 /* Returns true if memory reference REF may be unaligned. */
1389 may_be_unaligned_p (tree ref)
1393 HOST_WIDE_INT bitsize;
1394 HOST_WIDE_INT bitpos;
1396 enum machine_mode mode;
1397 int unsignedp, volatilep;
1398 unsigned base_align;
1400 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1401 thus they are not misaligned. */
1402 if (TREE_CODE (ref) == TARGET_MEM_REF)
1405 /* The test below is basically copy of what expr.c:normal_inner_ref
1406 does to check whether the object must be loaded by parts when
1407 STRICT_ALIGNMENT is true. */
1408 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1409 &unsignedp, &volatilep, true);
1410 base_type = TREE_TYPE (base);
1411 base_align = TYPE_ALIGN (base_type);
1414 && (base_align < GET_MODE_ALIGNMENT (mode)
1415 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1416 || bitpos % BITS_PER_UNIT != 0))
1422 /* Return true if EXPR may be non-addressable. */
1425 may_be_nonaddressable_p (tree expr)
1427 switch (TREE_CODE (expr))
1430 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1431 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1434 case ARRAY_RANGE_REF:
1435 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1437 case VIEW_CONVERT_EXPR:
1438 /* This kind of view-conversions may wrap non-addressable objects
1439 and make them look addressable. After some processing the
1440 non-addressability may be uncovered again, causing ADDR_EXPRs
1441 of inappropriate objects to be built. */
1442 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1443 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1452 /* Finds addresses in *OP_P inside STMT. */
1455 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1457 tree base = *op_p, step = NULL;
1459 struct ifs_ivopts_data ifs_ivopts_data;
1461 /* Do not play with volatile memory references. A bit too conservative,
1462 perhaps, but safe. */
1463 if (stmt_ann (stmt)->has_volatile_ops)
1466 /* Ignore bitfields for now. Not really something terribly complicated
1468 if (TREE_CODE (base) == BIT_FIELD_REF)
1471 if (may_be_nonaddressable_p (base))
1474 if (STRICT_ALIGNMENT
1475 && may_be_unaligned_p (base))
1478 base = unshare_expr (base);
1480 if (TREE_CODE (base) == TARGET_MEM_REF)
1482 tree type = build_pointer_type (TREE_TYPE (base));
1486 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1488 civ = get_iv (data, TMR_BASE (base));
1492 TMR_BASE (base) = civ->base;
1495 if (TMR_INDEX (base)
1496 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1498 civ = get_iv (data, TMR_INDEX (base));
1502 TMR_INDEX (base) = civ->base;
1507 if (TMR_STEP (base))
1508 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1511 step = fold_build2 (PLUS_EXPR, type, step, astep);
1519 base = tree_mem_ref_addr (type, base);
1523 ifs_ivopts_data.ivopts_data = data;
1524 ifs_ivopts_data.stmt = stmt;
1525 ifs_ivopts_data.step_p = &step;
1526 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1530 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1531 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1533 base = build_fold_addr_expr (base);
1535 /* Substituting bases of IVs into the base expression might
1536 have caused folding opportunities. */
1537 if (TREE_CODE (base) == ADDR_EXPR)
1539 tree *ref = &TREE_OPERAND (base, 0);
1540 while (handled_component_p (*ref))
1541 ref = &TREE_OPERAND (*ref, 0);
1542 if (TREE_CODE (*ref) == INDIRECT_REF)
1543 *ref = fold_indirect_ref (*ref);
1547 civ = alloc_iv (base, step);
1548 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1552 for_each_index (op_p, idx_record_use, data);
1555 /* Finds and records invariants used in STMT. */
1558 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1561 use_operand_p use_p;
1564 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1566 op = USE_FROM_PTR (use_p);
1567 record_invariant (data, op, false);
1571 /* Finds interesting uses of induction variables in the statement STMT. */
1574 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1579 use_operand_p use_p;
1581 find_invariants_stmt (data, stmt);
1583 if (TREE_CODE (stmt) == COND_EXPR)
1585 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1589 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1591 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1592 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1594 if (TREE_CODE (lhs) == SSA_NAME)
1596 /* If the statement defines an induction variable, the uses are not
1597 interesting by themselves. */
1599 iv = get_iv (data, lhs);
1601 if (iv && !zero_p (iv->step))
1605 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1607 case tcc_comparison:
1608 find_interesting_uses_cond (data, stmt,
1609 &GIMPLE_STMT_OPERAND (stmt, 1));
1613 find_interesting_uses_address (data, stmt,
1614 &GIMPLE_STMT_OPERAND (stmt, 1));
1615 if (REFERENCE_CLASS_P (lhs))
1616 find_interesting_uses_address (data, stmt,
1617 &GIMPLE_STMT_OPERAND (stmt, 0));
1623 if (REFERENCE_CLASS_P (lhs)
1624 && is_gimple_val (rhs))
1626 find_interesting_uses_address (data, stmt,
1627 &GIMPLE_STMT_OPERAND (stmt, 0));
1628 find_interesting_uses_op (data, rhs);
1632 /* TODO -- we should also handle address uses of type
1634 memory = call (whatever);
1641 if (TREE_CODE (stmt) == PHI_NODE
1642 && bb_for_stmt (stmt) == data->current_loop->header)
1644 lhs = PHI_RESULT (stmt);
1645 iv = get_iv (data, lhs);
1647 if (iv && !zero_p (iv->step))
1651 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1653 op = USE_FROM_PTR (use_p);
1655 if (TREE_CODE (op) != SSA_NAME)
1658 iv = get_iv (data, op);
1662 find_interesting_uses_op (data, op);
1666 /* Finds interesting uses of induction variables outside of loops
1667 on loop exit edge EXIT. */
1670 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1674 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1676 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1677 find_interesting_uses_op (data, def);
1681 /* Finds uses of the induction variables that are interesting. */
1684 find_interesting_uses (struct ivopts_data *data)
1687 block_stmt_iterator bsi;
1689 basic_block *body = get_loop_body (data->current_loop);
1691 struct version_info *info;
1694 if (dump_file && (dump_flags & TDF_DETAILS))
1695 fprintf (dump_file, "Uses:\n\n");
1697 for (i = 0; i < data->current_loop->num_nodes; i++)
1702 FOR_EACH_EDGE (e, ei, bb->succs)
1703 if (e->dest != EXIT_BLOCK_PTR
1704 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1705 find_interesting_uses_outside (data, e);
1707 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1708 find_interesting_uses_stmt (data, phi);
1709 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1710 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1713 if (dump_file && (dump_flags & TDF_DETAILS))
1717 fprintf (dump_file, "\n");
1719 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1721 info = ver_info (data, i);
1724 fprintf (dump_file, " ");
1725 print_generic_expr (dump_file, info->name, TDF_SLIM);
1726 fprintf (dump_file, " is invariant (%d)%s\n",
1727 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1731 fprintf (dump_file, "\n");
1737 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1738 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1739 we are at the top-level of the processed address. */
1742 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1743 unsigned HOST_WIDE_INT *offset)
1745 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1746 enum tree_code code;
1747 tree type, orig_type = TREE_TYPE (expr);
1748 unsigned HOST_WIDE_INT off0, off1, st;
1749 tree orig_expr = expr;
1753 type = TREE_TYPE (expr);
1754 code = TREE_CODE (expr);
1760 if (!cst_and_fits_in_hwi (expr)
1764 *offset = int_cst_value (expr);
1765 return build_int_cst (orig_type, 0);
1769 op0 = TREE_OPERAND (expr, 0);
1770 op1 = TREE_OPERAND (expr, 1);
1772 op0 = strip_offset_1 (op0, false, false, &off0);
1773 op1 = strip_offset_1 (op1, false, false, &off1);
1775 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1776 if (op0 == TREE_OPERAND (expr, 0)
1777 && op1 == TREE_OPERAND (expr, 1))
1782 else if (zero_p (op0))
1784 if (code == PLUS_EXPR)
1787 expr = fold_build1 (NEGATE_EXPR, type, op1);
1790 expr = fold_build2 (code, type, op0, op1);
1792 return fold_convert (orig_type, expr);
1798 step = array_ref_element_size (expr);
1799 if (!cst_and_fits_in_hwi (step))
1802 st = int_cst_value (step);
1803 op1 = TREE_OPERAND (expr, 1);
1804 op1 = strip_offset_1 (op1, false, false, &off1);
1805 *offset = off1 * st;
1810 /* Strip the component reference completely. */
1811 op0 = TREE_OPERAND (expr, 0);
1812 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1822 tmp = component_ref_field_offset (expr);
1824 && cst_and_fits_in_hwi (tmp))
1826 /* Strip the component reference completely. */
1827 op0 = TREE_OPERAND (expr, 0);
1828 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1829 *offset = off0 + int_cst_value (tmp);
1835 op0 = TREE_OPERAND (expr, 0);
1836 op0 = strip_offset_1 (op0, true, true, &off0);
1839 if (op0 == TREE_OPERAND (expr, 0))
1842 expr = build_fold_addr_expr (op0);
1843 return fold_convert (orig_type, expr);
1846 inside_addr = false;
1853 /* Default handling of expressions for that we want to recurse into
1854 the first operand. */
1855 op0 = TREE_OPERAND (expr, 0);
1856 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1859 if (op0 == TREE_OPERAND (expr, 0)
1860 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1863 expr = copy_node (expr);
1864 TREE_OPERAND (expr, 0) = op0;
1866 TREE_OPERAND (expr, 1) = op1;
1868 /* Inside address, we might strip the top level component references,
1869 thus changing type of the expression. Handling of ADDR_EXPR
1871 expr = fold_convert (orig_type, expr);
1876 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1879 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1881 return strip_offset_1 (expr, false, false, offset);
1884 /* Returns variant of TYPE that can be used as base for different uses.
1885 We return unsigned type with the same precision, which avoids problems
1889 generic_type_for (tree type)
1891 if (POINTER_TYPE_P (type))
1892 return unsigned_type_for (type);
1894 if (TYPE_UNSIGNED (type))
1897 return unsigned_type_for (type);
1900 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1901 the bitmap to that we should store it. */
1903 static struct ivopts_data *fd_ivopts_data;
1905 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1907 bitmap *depends_on = data;
1908 struct version_info *info;
1910 if (TREE_CODE (*expr_p) != SSA_NAME)
1912 info = name_info (fd_ivopts_data, *expr_p);
1914 if (!info->inv_id || info->has_nonlin_use)
1918 *depends_on = BITMAP_ALLOC (NULL);
1919 bitmap_set_bit (*depends_on, info->inv_id);
1924 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1925 position to POS. If USE is not NULL, the candidate is set as related to
1926 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1927 replacement of the final value of the iv by a direct computation. */
1929 static struct iv_cand *
1930 add_candidate_1 (struct ivopts_data *data,
1931 tree base, tree step, bool important, enum iv_position pos,
1932 struct iv_use *use, tree incremented_at)
1935 struct iv_cand *cand = NULL;
1936 tree type, orig_type;
1940 orig_type = TREE_TYPE (base);
1941 type = generic_type_for (orig_type);
1942 if (type != orig_type)
1944 base = fold_convert (type, base);
1946 step = fold_convert (type, step);
1950 for (i = 0; i < n_iv_cands (data); i++)
1952 cand = iv_cand (data, i);
1954 if (cand->pos != pos)
1957 if (cand->incremented_at != incremented_at)
1971 if (!operand_equal_p (base, cand->iv->base, 0))
1974 if (zero_p (cand->iv->step))
1981 if (step && operand_equal_p (step, cand->iv->step, 0))
1986 if (i == n_iv_cands (data))
1988 cand = XCNEW (struct iv_cand);
1994 cand->iv = alloc_iv (base, step);
1997 if (pos != IP_ORIGINAL && cand->iv)
1999 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2000 cand->var_after = cand->var_before;
2002 cand->important = important;
2003 cand->incremented_at = incremented_at;
2004 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2007 && TREE_CODE (step) != INTEGER_CST)
2009 fd_ivopts_data = data;
2010 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2013 if (dump_file && (dump_flags & TDF_DETAILS))
2014 dump_cand (dump_file, cand);
2017 if (important && !cand->important)
2019 cand->important = true;
2020 if (dump_file && (dump_flags & TDF_DETAILS))
2021 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2026 bitmap_set_bit (use->related_cands, i);
2027 if (dump_file && (dump_flags & TDF_DETAILS))
2028 fprintf (dump_file, "Candidate %d is related to use %d\n",
2035 /* Returns true if incrementing the induction variable at the end of the LOOP
2038 The purpose is to avoid splitting latch edge with a biv increment, thus
2039 creating a jump, possibly confusing other optimization passes and leaving
2040 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2041 is not available (so we do not have a better alternative), or if the latch
2042 edge is already nonempty. */
2045 allow_ip_end_pos_p (struct loop *loop)
2047 if (!ip_normal_pos (loop))
2050 if (!empty_block_p (ip_end_pos (loop)))
2056 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2057 position to POS. If USE is not NULL, the candidate is set as related to
2058 it. The candidate computation is scheduled on all available positions. */
2061 add_candidate (struct ivopts_data *data,
2062 tree base, tree step, bool important, struct iv_use *use)
2064 if (ip_normal_pos (data->current_loop))
2065 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2066 if (ip_end_pos (data->current_loop)
2067 && allow_ip_end_pos_p (data->current_loop))
2068 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2071 /* Add a standard "0 + 1 * iteration" iv candidate for a
2072 type with SIZE bits. */
2075 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2078 tree type = lang_hooks.types.type_for_size (size, true);
2079 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2083 /* Adds standard iv candidates. */
2086 add_standard_iv_candidates (struct ivopts_data *data)
2088 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2090 /* The same for a double-integer type if it is still fast enough. */
2091 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2092 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2096 /* Adds candidates bases on the old induction variable IV. */
2099 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2102 struct iv_cand *cand;
2104 add_candidate (data, iv->base, iv->step, true, NULL);
2106 /* The same, but with initial value zero. */
2107 add_candidate (data,
2108 build_int_cst (TREE_TYPE (iv->base), 0),
2109 iv->step, true, NULL);
2111 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2112 if (TREE_CODE (phi) == PHI_NODE)
2114 /* Additionally record the possibility of leaving the original iv
2116 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2117 cand = add_candidate_1 (data,
2118 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2119 SSA_NAME_DEF_STMT (def));
2120 cand->var_before = iv->ssa_name;
2121 cand->var_after = def;
2125 /* Adds candidates based on the old induction variables. */
2128 add_old_ivs_candidates (struct ivopts_data *data)
2134 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2136 iv = ver_info (data, i)->iv;
2137 if (iv && iv->biv_p && !zero_p (iv->step))
2138 add_old_iv_candidates (data, iv);
2142 /* Adds candidates based on the value of the induction variable IV and USE. */
2145 add_iv_value_candidates (struct ivopts_data *data,
2146 struct iv *iv, struct iv_use *use)
2148 unsigned HOST_WIDE_INT offset;
2151 add_candidate (data, iv->base, iv->step, false, use);
2153 /* The same, but with initial value zero. Make such variable important,
2154 since it is generic enough so that possibly many uses may be based
2156 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2157 iv->step, true, use);
2159 /* Third, try removing the constant offset. */
2160 base = strip_offset (iv->base, &offset);
2162 add_candidate (data, base, iv->step, false, use);
2165 /* Adds candidates based on the uses. */
2168 add_derived_ivs_candidates (struct ivopts_data *data)
2172 for (i = 0; i < n_iv_uses (data); i++)
2174 struct iv_use *use = iv_use (data, i);
2181 case USE_NONLINEAR_EXPR:
2184 /* Just add the ivs based on the value of the iv used here. */
2185 add_iv_value_candidates (data, use->iv, use);
2194 /* Record important candidates and add them to related_cands bitmaps
2198 record_important_candidates (struct ivopts_data *data)
2203 for (i = 0; i < n_iv_cands (data); i++)
2205 struct iv_cand *cand = iv_cand (data, i);
2207 if (cand->important)
2208 bitmap_set_bit (data->important_candidates, i);
2211 data->consider_all_candidates = (n_iv_cands (data)
2212 <= CONSIDER_ALL_CANDIDATES_BOUND);
2214 if (data->consider_all_candidates)
2216 /* We will not need "related_cands" bitmaps in this case,
2217 so release them to decrease peak memory consumption. */
2218 for (i = 0; i < n_iv_uses (data); i++)
2220 use = iv_use (data, i);
2221 BITMAP_FREE (use->related_cands);
2226 /* Add important candidates to the related_cands bitmaps. */
2227 for (i = 0; i < n_iv_uses (data); i++)
2228 bitmap_ior_into (iv_use (data, i)->related_cands,
2229 data->important_candidates);
2233 /* Finds the candidates for the induction variables. */
2236 find_iv_candidates (struct ivopts_data *data)
2238 /* Add commonly used ivs. */
2239 add_standard_iv_candidates (data);
2241 /* Add old induction variables. */
2242 add_old_ivs_candidates (data);
2244 /* Add induction variables derived from uses. */
2245 add_derived_ivs_candidates (data);
2247 /* Record the important candidates. */
2248 record_important_candidates (data);
2251 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2252 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2253 we allocate a simple list to every use. */
2256 alloc_use_cost_map (struct ivopts_data *data)
2258 unsigned i, size, s, j;
2260 for (i = 0; i < n_iv_uses (data); i++)
2262 struct iv_use *use = iv_use (data, i);
2265 if (data->consider_all_candidates)
2266 size = n_iv_cands (data);
2270 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2275 /* Round up to the power of two, so that moduling by it is fast. */
2276 for (size = 1; size < s; size <<= 1)
2280 use->n_map_members = size;
2281 use->cost_map = XCNEWVEC (struct cost_pair, size);
2285 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2286 on invariants DEPENDS_ON and that the value used in expressing it
2290 set_use_iv_cost (struct ivopts_data *data,
2291 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2292 bitmap depends_on, tree value)
2298 BITMAP_FREE (depends_on);
2302 if (data->consider_all_candidates)
2304 use->cost_map[cand->id].cand = cand;
2305 use->cost_map[cand->id].cost = cost;
2306 use->cost_map[cand->id].depends_on = depends_on;
2307 use->cost_map[cand->id].value = value;
2311 /* n_map_members is a power of two, so this computes modulo. */
2312 s = cand->id & (use->n_map_members - 1);
2313 for (i = s; i < use->n_map_members; i++)
2314 if (!use->cost_map[i].cand)
2316 for (i = 0; i < s; i++)
2317 if (!use->cost_map[i].cand)
2323 use->cost_map[i].cand = cand;
2324 use->cost_map[i].cost = cost;
2325 use->cost_map[i].depends_on = depends_on;
2326 use->cost_map[i].value = value;
2329 /* Gets cost of (USE, CANDIDATE) pair. */
2331 static struct cost_pair *
2332 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2333 struct iv_cand *cand)
2336 struct cost_pair *ret;
2341 if (data->consider_all_candidates)
2343 ret = use->cost_map + cand->id;
2350 /* n_map_members is a power of two, so this computes modulo. */
2351 s = cand->id & (use->n_map_members - 1);
2352 for (i = s; i < use->n_map_members; i++)
2353 if (use->cost_map[i].cand == cand)
2354 return use->cost_map + i;
2356 for (i = 0; i < s; i++)
2357 if (use->cost_map[i].cand == cand)
2358 return use->cost_map + i;
2363 /* Returns estimate on cost of computing SEQ. */
2371 for (; seq; seq = NEXT_INSN (seq))
2373 set = single_set (seq);
2375 cost += rtx_cost (set, SET);
2383 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2385 produce_memory_decl_rtl (tree obj, int *regno)
2390 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2392 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2393 x = gen_rtx_SYMBOL_REF (Pmode, name);
2396 x = gen_raw_REG (Pmode, (*regno)++);
2398 return gen_rtx_MEM (DECL_MODE (obj), x);
2401 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2402 walk_tree. DATA contains the actual fake register number. */
2405 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2407 tree obj = NULL_TREE;
2411 switch (TREE_CODE (*expr_p))
2414 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2415 handled_component_p (*expr_p);
2416 expr_p = &TREE_OPERAND (*expr_p, 0))
2419 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2420 x = produce_memory_decl_rtl (obj, regno);
2425 obj = SSA_NAME_VAR (*expr_p);
2426 if (!DECL_RTL_SET_P (obj))
2427 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2436 if (DECL_RTL_SET_P (obj))
2439 if (DECL_MODE (obj) == BLKmode)
2440 x = produce_memory_decl_rtl (obj, regno);
2442 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2452 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2453 SET_DECL_RTL (obj, x);
2459 /* Determines cost of the computation of EXPR. */
2462 computation_cost (tree expr)
2465 tree type = TREE_TYPE (expr);
2467 /* Avoid using hard regs in ways which may be unsupported. */
2468 int regno = LAST_VIRTUAL_REGISTER + 1;
2470 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2472 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2476 cost = seq_cost (seq);
2478 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2483 /* Returns variable containing the value of candidate CAND at statement AT. */
2486 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2488 if (stmt_after_increment (loop, cand, stmt))
2489 return cand->var_after;
2491 return cand->var_before;
2494 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2495 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2498 tree_int_cst_sign_bit (tree t)
2500 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2501 unsigned HOST_WIDE_INT w;
2503 if (bitno < HOST_BITS_PER_WIDE_INT)
2504 w = TREE_INT_CST_LOW (t);
2507 w = TREE_INT_CST_HIGH (t);
2508 bitno -= HOST_BITS_PER_WIDE_INT;
2511 return (w >> bitno) & 1;
2514 /* If we can prove that TOP = cst * BOT for some constant cst,
2515 store cst to MUL and return true. Otherwise return false.
2516 The returned value is always sign-extended, regardless of the
2517 signedness of TOP and BOT. */
2520 constant_multiple_of (tree top, tree bot, double_int *mul)
2523 enum tree_code code;
2524 double_int res, p0, p1;
2525 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2530 if (operand_equal_p (top, bot, 0))
2532 *mul = double_int_one;
2536 code = TREE_CODE (top);
2540 mby = TREE_OPERAND (top, 1);
2541 if (TREE_CODE (mby) != INTEGER_CST)
2544 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2547 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2553 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2554 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2557 if (code == MINUS_EXPR)
2558 p1 = double_int_neg (p1);
2559 *mul = double_int_sext (double_int_add (p0, p1), precision);
2563 if (TREE_CODE (bot) != INTEGER_CST)
2566 p0 = double_int_sext (tree_to_double_int (top), precision);
2567 p1 = double_int_sext (tree_to_double_int (bot), precision);
2568 if (double_int_zero_p (p1))
2570 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2572 return double_int_zero_p (res);
2579 /* Folds EXPR using the affine expressions framework. */
2582 fold_affine_expr (tree expr)
2584 tree type = TREE_TYPE (expr);
2585 struct affine_tree_combination comb;
2587 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2590 tree_to_aff_combination (expr, type, &comb);
2591 return aff_combination_to_tree (&comb);
2594 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2595 same precision that is at least as wide as the precision of TYPE, stores
2596 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2600 determine_common_wider_type (tree *a, tree *b)
2602 tree wider_type = NULL;
2604 tree atype = TREE_TYPE (*a);
2606 if ((TREE_CODE (*a) == NOP_EXPR
2607 || TREE_CODE (*a) == CONVERT_EXPR))
2609 suba = TREE_OPERAND (*a, 0);
2610 wider_type = TREE_TYPE (suba);
2611 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2617 if ((TREE_CODE (*b) == NOP_EXPR
2618 || TREE_CODE (*b) == CONVERT_EXPR))
2620 subb = TREE_OPERAND (*b, 0);
2621 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2632 /* Determines the expression by that USE is expressed from induction variable
2633 CAND at statement AT in LOOP. The expression is stored in a decomposed
2634 form into AFF. Returns false if USE cannot be expressed using CAND. */
2637 get_computation_aff (struct loop *loop,
2638 struct iv_use *use, struct iv_cand *cand, tree at,
2639 struct affine_tree_combination *aff)
2641 tree ubase = use->iv->base;
2642 tree ustep = use->iv->step;
2643 tree cbase = cand->iv->base;
2644 tree cstep = cand->iv->step, cstep_common;
2645 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2646 tree common_type, var;
2648 aff_tree cbase_aff, var_aff;
2651 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2653 /* We do not have a precision to express the values of use. */
2657 var = var_at_stmt (loop, cand, at);
2658 uutype = unsigned_type_for (utype);
2660 /* If the conversion is not noop, perform it. */
2661 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2663 cstep = fold_convert (uutype, cstep);
2664 cbase = fold_convert (uutype, cbase);
2665 var = fold_convert (uutype, var);
2668 if (!constant_multiple_of (ustep, cstep, &rat))
2671 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2672 type, we achieve better folding by computing their difference in this
2673 wider type, and cast the result to UUTYPE. We do not need to worry about
2674 overflows, as all the arithmetics will in the end be performed in UUTYPE
2676 common_type = determine_common_wider_type (&ubase, &cbase);
2678 /* use = ubase - ratio * cbase + ratio * var. */
2679 tree_to_aff_combination (ubase, common_type, aff);
2680 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2681 tree_to_aff_combination (var, uutype, &var_aff);
2683 /* We need to shift the value if we are after the increment. */
2684 if (stmt_after_increment (loop, cand, at))
2688 if (common_type != uutype)
2689 cstep_common = fold_convert (common_type, cstep);
2691 cstep_common = cstep;
2693 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2694 aff_combination_add (&cbase_aff, &cstep_aff);
2697 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2698 aff_combination_add (aff, &cbase_aff);
2699 if (common_type != uutype)
2700 aff_combination_convert (aff, uutype);
2702 aff_combination_scale (&var_aff, rat);
2703 aff_combination_add (aff, &var_aff);
2708 /* Determines the expression by that USE is expressed from induction variable
2709 CAND at statement AT in LOOP. The computation is unshared. */
2712 get_computation_at (struct loop *loop,
2713 struct iv_use *use, struct iv_cand *cand, tree at)
2716 tree type = TREE_TYPE (use->iv->base);
2718 if (!get_computation_aff (loop, use, cand, at, &aff))
2720 unshare_aff_combination (&aff);
2721 return fold_convert (type, aff_combination_to_tree (&aff));
2724 /* Determines the expression by that USE is expressed from induction variable
2725 CAND in LOOP. The computation is unshared. */
2728 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2730 return get_computation_at (loop, use, cand, use->stmt);
2733 /* Returns cost of addition in MODE. */
2736 add_cost (enum machine_mode mode)
2738 static unsigned costs[NUM_MACHINE_MODES];
2746 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2747 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2748 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2753 cost = seq_cost (seq);
2759 if (dump_file && (dump_flags & TDF_DETAILS))
2760 fprintf (dump_file, "Addition in %s costs %d\n",
2761 GET_MODE_NAME (mode), cost);
2765 /* Entry in a hashtable of already known costs for multiplication. */
2768 HOST_WIDE_INT cst; /* The constant to multiply by. */
2769 enum machine_mode mode; /* In mode. */
2770 unsigned cost; /* The cost. */
2773 /* Counts hash value for the ENTRY. */
2776 mbc_entry_hash (const void *entry)
2778 const struct mbc_entry *e = entry;
2780 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2783 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2786 mbc_entry_eq (const void *entry1, const void *entry2)
2788 const struct mbc_entry *e1 = entry1;
2789 const struct mbc_entry *e2 = entry2;
2791 return (e1->mode == e2->mode
2792 && e1->cst == e2->cst);
2795 /* Returns cost of multiplication by constant CST in MODE. */
2798 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2800 static htab_t costs;
2801 struct mbc_entry **cached, act;
2806 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2810 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2812 return (*cached)->cost;
2814 *cached = XNEW (struct mbc_entry);
2815 (*cached)->mode = mode;
2816 (*cached)->cst = cst;
2819 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2820 gen_int_mode (cst, mode), NULL_RTX, 0);
2824 cost = seq_cost (seq);
2826 if (dump_file && (dump_flags & TDF_DETAILS))
2827 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2828 (int) cst, GET_MODE_NAME (mode), cost);
2830 (*cached)->cost = cost;
2835 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2836 validity for a memory reference accessing memory of mode MODE. */
2839 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2841 #define MAX_RATIO 128
2842 static sbitmap valid_mult[MAX_MACHINE_MODE];
2844 if (!valid_mult[mode])
2846 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2850 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2851 sbitmap_zero (valid_mult[mode]);
2852 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2853 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2855 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2856 if (memory_address_p (mode, addr))
2857 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2860 if (dump_file && (dump_flags & TDF_DETAILS))
2862 fprintf (dump_file, " allowed multipliers:");
2863 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2864 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2865 fprintf (dump_file, " %d", (int) i);
2866 fprintf (dump_file, "\n");
2867 fprintf (dump_file, "\n");
2871 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2874 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2877 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2878 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2879 variable is omitted. Compute the cost for a memory reference that accesses
2880 a memory location of mode MEM_MODE.
2882 TODO -- there must be some better way. This all is quite crude. */
2885 get_address_cost (bool symbol_present, bool var_present,
2886 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2887 enum machine_mode mem_mode)
2889 static bool initialized[MAX_MACHINE_MODE];
2890 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2891 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2892 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2893 unsigned cost, acost;
2894 bool offset_p, ratio_p;
2895 HOST_WIDE_INT s_offset;
2896 unsigned HOST_WIDE_INT mask;
2899 if (!initialized[mem_mode])
2902 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2903 int old_cse_not_expected;
2904 unsigned sym_p, var_p, off_p, rat_p, add_c;
2905 rtx seq, addr, base;
2908 initialized[mem_mode] = true;
2910 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2912 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2913 for (i = start; i <= 1 << 20; i <<= 1)
2915 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2916 if (!memory_address_p (mem_mode, addr))
2919 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2920 off[mem_mode] = max_offset[mem_mode];
2922 for (i = start; i <= 1 << 20; i <<= 1)
2924 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2925 if (!memory_address_p (mem_mode, addr))
2928 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2930 if (dump_file && (dump_flags & TDF_DETAILS))
2932 fprintf (dump_file, "get_address_cost:\n");
2933 fprintf (dump_file, " min offset %s %d\n",
2934 GET_MODE_NAME (mem_mode),
2935 (int) min_offset[mem_mode]);
2936 fprintf (dump_file, " max offset %s %d\n",
2937 GET_MODE_NAME (mem_mode),
2938 (int) max_offset[mem_mode]);
2942 for (i = 2; i <= MAX_RATIO; i++)
2943 if (multiplier_allowed_in_address_p (i, mem_mode))
2949 /* Compute the cost of various addressing modes. */
2951 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2952 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2954 for (i = 0; i < 16; i++)
2957 var_p = (i >> 1) & 1;
2958 off_p = (i >> 2) & 1;
2959 rat_p = (i >> 3) & 1;
2963 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2964 gen_int_mode (rat[mem_mode], Pmode));
2967 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2971 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2973 base = gen_rtx_fmt_e (CONST, Pmode,
2974 gen_rtx_fmt_ee (PLUS, Pmode,
2976 gen_int_mode (off[mem_mode],
2980 base = gen_int_mode (off[mem_mode], Pmode);
2985 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2988 /* To avoid splitting addressing modes, pretend that no cse will
2990 old_cse_not_expected = cse_not_expected;
2991 cse_not_expected = true;
2992 addr = memory_address (mem_mode, addr);
2993 cse_not_expected = old_cse_not_expected;
2997 acost = seq_cost (seq);
2998 acost += address_cost (addr, mem_mode);
3002 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3005 /* On some targets, it is quite expensive to load symbol to a register,
3006 which makes addresses that contain symbols look much more expensive.
3007 However, the symbol will have to be loaded in any case before the
3008 loop (and quite likely we have it in register already), so it does not
3009 make much sense to penalize them too heavily. So make some final
3010 tweaks for the SYMBOL_PRESENT modes:
3012 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3013 var is cheaper, use this mode with small penalty.
3014 If VAR_PRESENT is true, try whether the mode with
3015 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3016 if this is the case, use it. */
3017 add_c = add_cost (Pmode);
3018 for (i = 0; i < 8; i++)
3021 off_p = (i >> 1) & 1;
3022 rat_p = (i >> 2) & 1;
3024 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3028 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3029 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3032 if (dump_file && (dump_flags & TDF_DETAILS))
3034 fprintf (dump_file, "Address costs:\n");
3036 for (i = 0; i < 16; i++)
3039 var_p = (i >> 1) & 1;
3040 off_p = (i >> 2) & 1;
3041 rat_p = (i >> 3) & 1;
3043 fprintf (dump_file, " ");
3045 fprintf (dump_file, "sym + ");
3047 fprintf (dump_file, "var + ");
3049 fprintf (dump_file, "cst + ");
3051 fprintf (dump_file, "rat * ");
3053 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3054 fprintf (dump_file, "index costs %d\n", acost);
3056 fprintf (dump_file, "\n");
3060 bits = GET_MODE_BITSIZE (Pmode);
3061 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3063 if ((offset >> (bits - 1) & 1))
3068 offset_p = (s_offset != 0
3069 && min_offset[mem_mode] <= s_offset
3070 && s_offset <= max_offset[mem_mode]);
3071 ratio_p = (ratio != 1
3072 && multiplier_allowed_in_address_p (ratio, mem_mode));
3074 if (ratio != 1 && !ratio_p)
3075 cost += multiply_by_cost (ratio, Pmode);
3077 if (s_offset && !offset_p && !symbol_present)
3078 cost += add_cost (Pmode);
3080 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3081 return cost + acost;
3084 /* Estimates cost of forcing expression EXPR into a variable. */
3087 force_expr_to_var_cost (tree expr)
3089 static bool costs_initialized = false;
3090 static unsigned integer_cost;
3091 static unsigned symbol_cost;
3092 static unsigned address_cost;
3094 unsigned cost0, cost1, cost;
3095 enum machine_mode mode;
3097 if (!costs_initialized)
3099 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3100 rtx x = gen_rtx_MEM (DECL_MODE (var),
3101 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3103 tree type = build_pointer_type (integer_type_node);
3105 integer_cost = computation_cost (build_int_cst (integer_type_node,
3108 SET_DECL_RTL (var, x);
3109 TREE_STATIC (var) = 1;
3110 addr = build1 (ADDR_EXPR, type, var);
3111 symbol_cost = computation_cost (addr) + 1;
3114 = computation_cost (build2 (PLUS_EXPR, type,
3116 build_int_cst (type, 2000))) + 1;
3117 if (dump_file && (dump_flags & TDF_DETAILS))
3119 fprintf (dump_file, "force_expr_to_var_cost:\n");
3120 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3121 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3122 fprintf (dump_file, " address %d\n", (int) address_cost);
3123 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3124 fprintf (dump_file, "\n");
3127 costs_initialized = true;
3132 if (SSA_VAR_P (expr))
3135 if (TREE_INVARIANT (expr))
3137 if (TREE_CODE (expr) == INTEGER_CST)
3138 return integer_cost;
3140 if (TREE_CODE (expr) == ADDR_EXPR)
3142 tree obj = TREE_OPERAND (expr, 0);
3144 if (TREE_CODE (obj) == VAR_DECL
3145 || TREE_CODE (obj) == PARM_DECL
3146 || TREE_CODE (obj) == RESULT_DECL)
3150 return address_cost;
3153 switch (TREE_CODE (expr))
3158 op0 = TREE_OPERAND (expr, 0);
3159 op1 = TREE_OPERAND (expr, 1);
3163 if (is_gimple_val (op0))
3166 cost0 = force_expr_to_var_cost (op0);
3168 if (is_gimple_val (op1))
3171 cost1 = force_expr_to_var_cost (op1);
3176 /* Just an arbitrary value, FIXME. */
3177 return target_spill_cost;
3180 mode = TYPE_MODE (TREE_TYPE (expr));
3181 switch (TREE_CODE (expr))
3185 cost = add_cost (mode);
3189 if (cst_and_fits_in_hwi (op0))
3190 cost = multiply_by_cost (int_cst_value (op0), mode);
3191 else if (cst_and_fits_in_hwi (op1))
3192 cost = multiply_by_cost (int_cst_value (op1), mode);
3194 return target_spill_cost;
3204 /* Bound the cost by target_spill_cost. The parts of complicated
3205 computations often are either loop invariant or at least can
3206 be shared between several iv uses, so letting this grow without
3207 limits would not give reasonable results. */
3208 return cost < target_spill_cost ? cost : target_spill_cost;
3211 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3212 invariants the computation depends on. */
3215 force_var_cost (struct ivopts_data *data,
3216 tree expr, bitmap *depends_on)
3220 fd_ivopts_data = data;
3221 walk_tree (&expr, find_depends, depends_on, NULL);
3224 return force_expr_to_var_cost (expr);
3227 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3228 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3229 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3230 invariants the computation depends on. */
3233 split_address_cost (struct ivopts_data *data,
3234 tree addr, bool *symbol_present, bool *var_present,
3235 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3238 HOST_WIDE_INT bitsize;
3239 HOST_WIDE_INT bitpos;
3241 enum machine_mode mode;
3242 int unsignedp, volatilep;
3244 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3245 &unsignedp, &volatilep, false);
3248 || bitpos % BITS_PER_UNIT != 0
3249 || TREE_CODE (core) != VAR_DECL)
3251 *symbol_present = false;
3252 *var_present = true;
3253 fd_ivopts_data = data;
3254 walk_tree (&addr, find_depends, depends_on, NULL);
3255 return target_spill_cost;
3258 *offset += bitpos / BITS_PER_UNIT;
3259 if (TREE_STATIC (core)
3260 || DECL_EXTERNAL (core))
3262 *symbol_present = true;
3263 *var_present = false;
3267 *symbol_present = false;
3268 *var_present = true;
3272 /* Estimates cost of expressing difference of addresses E1 - E2 as
3273 var + symbol + offset. The value of offset is added to OFFSET,
3274 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3275 part is missing. DEPENDS_ON is a set of the invariants the computation
3279 ptr_difference_cost (struct ivopts_data *data,
3280 tree e1, tree e2, bool *symbol_present, bool *var_present,
3281 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3283 HOST_WIDE_INT diff = 0;
3286 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3288 if (ptr_difference_const (e1, e2, &diff))
3291 *symbol_present = false;
3292 *var_present = false;
3296 if (e2 == integer_zero_node)
3297 return split_address_cost (data, TREE_OPERAND (e1, 0),
3298 symbol_present, var_present, offset, depends_on);
3300 *symbol_present = false;
3301 *var_present = true;
3303 cost = force_var_cost (data, e1, depends_on);
3304 cost += force_var_cost (data, e2, depends_on);
3305 cost += add_cost (Pmode);
3310 /* Estimates cost of expressing difference E1 - E2 as
3311 var + symbol + offset. The value of offset is added to OFFSET,
3312 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3313 part is missing. DEPENDS_ON is a set of the invariants the computation
3317 difference_cost (struct ivopts_data *data,
3318 tree e1, tree e2, bool *symbol_present, bool *var_present,
3319 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3322 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3323 unsigned HOST_WIDE_INT off1, off2;
3325 e1 = strip_offset (e1, &off1);
3326 e2 = strip_offset (e2, &off2);
3327 *offset += off1 - off2;
3332 if (TREE_CODE (e1) == ADDR_EXPR)
3333 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3335 *symbol_present = false;
3337 if (operand_equal_p (e1, e2, 0))
3339 *var_present = false;
3342 *var_present = true;
3344 return force_var_cost (data, e1, depends_on);
3348 cost = force_var_cost (data, e2, depends_on);
3349 cost += multiply_by_cost (-1, mode);
3354 cost = force_var_cost (data, e1, depends_on);
3355 cost += force_var_cost (data, e2, depends_on);
3356 cost += add_cost (mode);
3361 /* Determines the cost of the computation by that USE is expressed
3362 from induction variable CAND. If ADDRESS_P is true, we just need
3363 to create an address from it, otherwise we want to get it into
3364 register. A set of invariants we depend on is stored in
3365 DEPENDS_ON. AT is the statement at that the value is computed. */
3368 get_computation_cost_at (struct ivopts_data *data,
3369 struct iv_use *use, struct iv_cand *cand,
3370 bool address_p, bitmap *depends_on, tree at)
3372 tree ubase = use->iv->base, ustep = use->iv->step;
3374 tree utype = TREE_TYPE (ubase), ctype;
3375 unsigned HOST_WIDE_INT cstepi, offset = 0;
3376 HOST_WIDE_INT ratio, aratio;
3377 bool var_present, symbol_present;
3378 unsigned cost = 0, n_sums;
3383 /* Only consider real candidates. */
3387 cbase = cand->iv->base;
3388 cstep = cand->iv->step;
3389 ctype = TREE_TYPE (cbase);
3391 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3393 /* We do not have a precision to express the values of use. */
3399 /* Do not try to express address of an object with computation based
3400 on address of a different object. This may cause problems in rtl
3401 level alias analysis (that does not expect this to be happening,
3402 as this is illegal in C), and would be unlikely to be useful
3404 if (use->iv->base_object
3405 && cand->iv->base_object
3406 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3410 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3412 /* TODO -- add direct handling of this case. */
3416 /* CSTEPI is removed from the offset in case statement is after the
3417 increment. If the step is not constant, we use zero instead.
3418 This is a bit imprecise (there is the extra addition), but
3419 redundancy elimination is likely to transform the code so that
3420 it uses value of the variable before increment anyway,
3421 so it is not that much unrealistic. */
3422 if (cst_and_fits_in_hwi (cstep))
3423 cstepi = int_cst_value (cstep);
3427 if (!constant_multiple_of (ustep, cstep, &rat))
3430 if (double_int_fits_in_shwi_p (rat))
3431 ratio = double_int_to_shwi (rat);
3435 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3436 or ratio == 1, it is better to handle this like
3438 ubase - ratio * cbase + ratio * var
3440 (also holds in the case ratio == -1, TODO. */
3442 if (cst_and_fits_in_hwi (cbase))
3444 offset = - ratio * int_cst_value (cbase);
3445 cost += difference_cost (data,
3446 ubase, integer_zero_node,
3447 &symbol_present, &var_present, &offset,
3450 else if (ratio == 1)
3452 cost += difference_cost (data,
3454 &symbol_present, &var_present, &offset,
3459 cost += force_var_cost (data, cbase, depends_on);
3460 cost += add_cost (TYPE_MODE (ctype));
3461 cost += difference_cost (data,
3462 ubase, integer_zero_node,
3463 &symbol_present, &var_present, &offset,
3467 /* If we are after the increment, the value of the candidate is higher by
3469 if (stmt_after_increment (data->current_loop, cand, at))
3470 offset -= ratio * cstepi;
3472 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3473 (symbol/var/const parts may be omitted). If we are looking for an address,
3474 find the cost of addressing this. */
3476 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3477 TYPE_MODE (TREE_TYPE (*use->op_p)));
3479 /* Otherwise estimate the costs for computing the expression. */
3480 aratio = ratio > 0 ? ratio : -ratio;
3481 if (!symbol_present && !var_present && !offset)
3484 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3490 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3494 /* Symbol + offset should be compile-time computable. */
3495 && (symbol_present || offset))
3498 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3502 /* Just get the expression, expand it and measure the cost. */
3503 tree comp = get_computation_at (data->current_loop, use, cand, at);
3509 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3511 return computation_cost (comp);
3515 /* Determines the cost of the computation by that USE is expressed
3516 from induction variable CAND. If ADDRESS_P is true, we just need
3517 to create an address from it, otherwise we want to get it into
3518 register. A set of invariants we depend on is stored in
3522 get_computation_cost (struct ivopts_data *data,
3523 struct iv_use *use, struct iv_cand *cand,
3524 bool address_p, bitmap *depends_on)
3526 return get_computation_cost_at (data,
3527 use, cand, address_p, depends_on, use->stmt);
3530 /* Determines cost of basing replacement of USE on CAND in a generic
3534 determine_use_iv_cost_generic (struct ivopts_data *data,
3535 struct iv_use *use, struct iv_cand *cand)
3540 /* The simple case first -- if we need to express value of the preserved
3541 original biv, the cost is 0. This also prevents us from counting the
3542 cost of increment twice -- once at this use and once in the cost of
3544 if (cand->pos == IP_ORIGINAL
3545 && cand->incremented_at == use->stmt)
3547 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3551 cost = get_computation_cost (data, use, cand, false, &depends_on);
3552 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3554 return cost != INFTY;
3557 /* Determines cost of basing replacement of USE on CAND in an address. */
3560 determine_use_iv_cost_address (struct ivopts_data *data,
3561 struct iv_use *use, struct iv_cand *cand)
3564 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3566 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3568 return cost != INFTY;
3571 /* Computes value of induction variable IV in iteration NITER. */
3574 iv_value (struct iv *iv, tree niter)
3577 tree type = TREE_TYPE (iv->base);
3579 niter = fold_convert (type, niter);
3580 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3582 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3585 /* Computes value of candidate CAND at position AT in iteration NITER. */
3588 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3590 tree val = iv_value (cand->iv, niter);
3591 tree type = TREE_TYPE (cand->iv->base);
3593 if (stmt_after_increment (loop, cand, at))
3594 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3599 /* Returns period of induction variable iv. */
3602 iv_period (struct iv *iv)
3604 tree step = iv->step, period, type;
3607 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3609 /* Period of the iv is gcd (step, type range). Since type range is power
3610 of two, it suffices to determine the maximum power of two that divides
3612 pow2div = num_ending_zeros (step);
3613 type = unsigned_type_for (TREE_TYPE (step));
3615 period = build_low_bits_mask (type,
3616 (TYPE_PRECISION (type)
3617 - tree_low_cst (pow2div, 1)));
3622 /* Returns the comparison operator used when eliminating the iv USE. */
3624 static enum tree_code
3625 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3627 struct loop *loop = data->current_loop;
3631 ex_bb = bb_for_stmt (use->stmt);
3632 exit = EDGE_SUCC (ex_bb, 0);
3633 if (flow_bb_inside_loop_p (loop, exit->dest))
3634 exit = EDGE_SUCC (ex_bb, 1);
3636 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3639 /* Check whether it is possible to express the condition in USE by comparison
3640 of candidate CAND. If so, store the value compared with to BOUND. */
3643 may_eliminate_iv (struct ivopts_data *data,
3644 struct iv_use *use, struct iv_cand *cand, tree *bound)
3649 tree wider_type, period, per_type;
3650 struct loop *loop = data->current_loop;
3652 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3655 /* For now works only for exits that dominate the loop latch. TODO -- extend
3656 for other conditions inside loop body. */
3657 ex_bb = bb_for_stmt (use->stmt);
3658 if (use->stmt != last_stmt (ex_bb)
3659 || TREE_CODE (use->stmt) != COND_EXPR)
3661 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3664 exit = EDGE_SUCC (ex_bb, 0);
3665 if (flow_bb_inside_loop_p (loop, exit->dest))
3666 exit = EDGE_SUCC (ex_bb, 1);
3667 if (flow_bb_inside_loop_p (loop, exit->dest))
3670 nit = niter_for_exit (data, exit);
3674 nit_type = TREE_TYPE (nit);
3676 /* Determine whether we may use the variable to test whether niter iterations
3677 elapsed. This is the case iff the period of the induction variable is
3678 greater than the number of iterations. */
3679 period = iv_period (cand->iv);
3682 per_type = TREE_TYPE (period);
3684 wider_type = TREE_TYPE (period);
3685 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3686 wider_type = per_type;
3688 wider_type = nit_type;
3690 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3691 fold_convert (wider_type, period),
3692 fold_convert (wider_type, nit))))
3695 *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
3699 /* Determines cost of basing replacement of USE on CAND in a condition. */
3702 determine_use_iv_cost_condition (struct ivopts_data *data,
3703 struct iv_use *use, struct iv_cand *cand)
3705 tree bound = NULL_TREE, op, cond;
3706 bitmap depends_on = NULL;
3709 /* Only consider real candidates. */
3712 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3716 if (may_eliminate_iv (data, use, cand, &bound))
3718 cost = force_var_cost (data, bound, &depends_on);
3720 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3721 return cost != INFTY;
3724 /* The induction variable elimination failed; just express the original
3725 giv. If it is compared with an invariant, note that we cannot get
3727 cost = get_computation_cost (data, use, cand, false, &depends_on);
3730 if (TREE_CODE (cond) != SSA_NAME)
3732 op = TREE_OPERAND (cond, 0);
3733 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
3734 op = TREE_OPERAND (cond, 1);
3735 if (TREE_CODE (op) == SSA_NAME)
3737 op = get_iv (data, op)->base;
3738 fd_ivopts_data = data;
3739 walk_tree (&op, find_depends, &depends_on, NULL);
3743 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3744 return cost != INFTY;
3747 /* Determines cost of basing replacement of USE on CAND. Returns false
3748 if USE cannot be based on CAND. */
3751 determine_use_iv_cost (struct ivopts_data *data,
3752 struct iv_use *use, struct iv_cand *cand)
3756 case USE_NONLINEAR_EXPR:
3757 return determine_use_iv_cost_generic (data, use, cand);
3760 return determine_use_iv_cost_address (data, use, cand);
3763 return determine_use_iv_cost_condition (data, use, cand);
3770 /* Determines costs of basing the use of the iv on an iv candidate. */
3773 determine_use_iv_costs (struct ivopts_data *data)
3777 struct iv_cand *cand;
3778 bitmap to_clear = BITMAP_ALLOC (NULL);
3780 alloc_use_cost_map (data);
3782 for (i = 0; i < n_iv_uses (data); i++)
3784 use = iv_use (data, i);
3786 if (data->consider_all_candidates)
3788 for (j = 0; j < n_iv_cands (data); j++)
3790 cand = iv_cand (data, j);
3791 determine_use_iv_cost (data, use, cand);
3798 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3800 cand = iv_cand (data, j);
3801 if (!determine_use_iv_cost (data, use, cand))
3802 bitmap_set_bit (to_clear, j);
3805 /* Remove the candidates for that the cost is infinite from
3806 the list of related candidates. */
3807 bitmap_and_compl_into (use->related_cands, to_clear);
3808 bitmap_clear (to_clear);
3812 BITMAP_FREE (to_clear);
3814 if (dump_file && (dump_flags & TDF_DETAILS))
3816 fprintf (dump_file, "Use-candidate costs:\n");
3818 for (i = 0; i < n_iv_uses (data); i++)
3820 use = iv_use (data, i);
3822 fprintf (dump_file, "Use %d:\n", i);
3823 fprintf (dump_file, " cand\tcost\tdepends on\n");
3824 for (j = 0; j < use->n_map_members; j++)
3826 if (!use->cost_map[j].cand
3827 || use->cost_map[j].cost == INFTY)
3830 fprintf (dump_file, " %d\t%d\t",
3831 use->cost_map[j].cand->id,
3832 use->cost_map[j].cost);
3833 if (use->cost_map[j].depends_on)
3834 bitmap_print (dump_file,
3835 use->cost_map[j].depends_on, "","");
3836 fprintf (dump_file, "\n");
3839 fprintf (dump_file, "\n");
3841 fprintf (dump_file, "\n");
3845 /* Determines cost of the candidate CAND. */
3848 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3850 unsigned cost_base, cost_step;
3859 /* There are two costs associated with the candidate -- its increment
3860 and its initialization. The second is almost negligible for any loop
3861 that rolls enough, so we take it just very little into account. */
3863 base = cand->iv->base;
3864 cost_base = force_var_cost (data, base, NULL);
3865 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3867 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3869 /* Prefer the original iv unless we may gain something by replacing it;
3870 this is not really relevant for artificial ivs created by other
3872 if (cand->pos == IP_ORIGINAL
3873 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3876 /* Prefer not to insert statements into latch unless there are some
3877 already (so that we do not create unnecessary jumps). */
3878 if (cand->pos == IP_END
3879 && empty_block_p (ip_end_pos (data->current_loop)))
3883 /* Determines costs of computation of the candidates. */
3886 determine_iv_costs (struct ivopts_data *data)
3890 if (dump_file && (dump_flags & TDF_DETAILS))
3892 fprintf (dump_file, "Candidate costs:\n");
3893 fprintf (dump_file, " cand\tcost\n");
3896 for (i = 0; i < n_iv_cands (data); i++)
3898 struct iv_cand *cand = iv_cand (data, i);
3900 determine_iv_cost (data, cand);
3902 if (dump_file && (dump_flags & TDF_DETAILS))
3903 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3906 if (dump_file && (dump_flags & TDF_DETAILS))
3907 fprintf (dump_file, "\n");
3910 /* Calculates cost for having SIZE induction variables. */
3913 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3915 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3918 /* For each size of the induction variable set determine the penalty. */
3921 determine_set_costs (struct ivopts_data *data)
3925 struct loop *loop = data->current_loop;
3928 /* We use the following model (definitely improvable, especially the
3929 cost function -- TODO):
3931 We estimate the number of registers available (using MD data), name it A.
3933 We estimate the number of registers used by the loop, name it U. This
3934 number is obtained as the number of loop phi nodes (not counting virtual
3935 registers and bivs) + the number of variables from outside of the loop.
3937 We set a reserve R (free regs that are used for temporary computations,
3938 etc.). For now the reserve is a constant 3.
3940 Let I be the number of induction variables.
3942 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3943 make a lot of ivs without a reason).
3944 -- if A - R < U + I <= A, the cost is I * PRES_COST
3945 -- if U + I > A, the cost is I * PRES_COST and
3946 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3948 if (dump_file && (dump_flags & TDF_DETAILS))
3950 fprintf (dump_file, "Global costs:\n");
3951 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3952 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3953 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3954 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3958 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3960 op = PHI_RESULT (phi);
3962 if (!is_gimple_reg (op))
3965 if (get_iv (data, op))
3971 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3973 struct version_info *info = ver_info (data, j);
3975 if (info->inv_id && info->has_nonlin_use)
3979 data->regs_used = n;
3980 if (dump_file && (dump_flags & TDF_DETAILS))
3981 fprintf (dump_file, " regs_used %d\n", n);
3983 if (dump_file && (dump_flags & TDF_DETAILS))
3985 fprintf (dump_file, " cost for size:\n");
3986 fprintf (dump_file, " ivs\tcost\n");
3987 for (j = 0; j <= 2 * target_avail_regs; j++)
3988 fprintf (dump_file, " %d\t%d\n", j,
3989 ivopts_global_cost_for_size (data, j));
3990 fprintf (dump_file, "\n");
3994 /* Returns true if A is a cheaper cost pair than B. */
3997 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4005 if (a->cost < b->cost)
4008 if (a->cost > b->cost)
4011 /* In case the costs are the same, prefer the cheaper candidate. */
4012 if (a->cand->cost < b->cand->cost)
4018 /* Computes the cost field of IVS structure. */
4021 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4025 cost += ivs->cand_use_cost;
4026 cost += ivs->cand_cost;
4027 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4032 /* Remove invariants in set INVS to set IVS. */
4035 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4043 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4045 ivs->n_invariant_uses[iid]--;
4046 if (ivs->n_invariant_uses[iid] == 0)
4051 /* Set USE not to be expressed by any candidate in IVS. */
4054 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4057 unsigned uid = use->id, cid;
4058 struct cost_pair *cp;
4060 cp = ivs->cand_for_use[uid];
4066 ivs->cand_for_use[uid] = NULL;
4067 ivs->n_cand_uses[cid]--;
4069 if (ivs->n_cand_uses[cid] == 0)
4071 bitmap_clear_bit (ivs->cands, cid);
4072 /* Do not count the pseudocandidates. */
4076 ivs->cand_cost -= cp->cand->cost;
4078 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4081 ivs->cand_use_cost -= cp->cost;
4083 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4084 iv_ca_recount_cost (data, ivs);
4087 /* Add invariants in set INVS to set IVS. */
4090 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4098 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4100 ivs->n_invariant_uses[iid]++;
4101 if (ivs->n_invariant_uses[iid] == 1)
4106 /* Set cost pair for USE in set IVS to CP. */
4109 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4110 struct iv_use *use, struct cost_pair *cp)
4112 unsigned uid = use->id, cid;
4114 if (ivs->cand_for_use[uid] == cp)
4117 if (ivs->cand_for_use[uid])
4118 iv_ca_set_no_cp (data, ivs, use);
4125 ivs->cand_for_use[uid] = cp;
4126 ivs->n_cand_uses[cid]++;
4127 if (ivs->n_cand_uses[cid] == 1)
4129 bitmap_set_bit (ivs->cands, cid);
4130 /* Do not count the pseudocandidates. */
4134 ivs->cand_cost += cp->cand->cost;
4136 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4139 ivs->cand_use_cost += cp->cost;
4140 iv_ca_set_add_invariants (ivs, cp->depends_on);
4141 iv_ca_recount_cost (data, ivs);
4145 /* Extend set IVS by expressing USE by some of the candidates in it
4149 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4152 struct cost_pair *best_cp = NULL, *cp;
4156 gcc_assert (ivs->upto >= use->id);
4158 if (ivs->upto == use->id)
4164 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4166 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4168 if (cheaper_cost_pair (cp, best_cp))
4172 iv_ca_set_cp (data, ivs, use, best_cp);
4175 /* Get cost for assignment IVS. */
4178 iv_ca_cost (struct iv_ca *ivs)
4180 return (ivs->bad_uses ? INFTY : ivs->cost);
4183 /* Returns true if all dependences of CP are among invariants in IVS. */
4186 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4191 if (!cp->depends_on)
4194 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4196 if (ivs->n_invariant_uses[i] == 0)
4203 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4204 it before NEXT_CHANGE. */
4206 static struct iv_ca_delta *
4207 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4208 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4210 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4213 change->old_cp = old_cp;
4214 change->new_cp = new_cp;
4215 change->next_change = next_change;
4220 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4223 static struct iv_ca_delta *
4224 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4226 struct iv_ca_delta *last;
4234 for (last = l1; last->next_change; last = last->next_change)
4236 last->next_change = l2;
4241 /* Returns candidate by that USE is expressed in IVS. */
4243 static struct cost_pair *
4244 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4246 return ivs->cand_for_use[use->id];
4249 /* Reverse the list of changes DELTA, forming the inverse to it. */
4251 static struct iv_ca_delta *
4252 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4254 struct iv_ca_delta *act, *next, *prev = NULL;
4255 struct cost_pair *tmp;
4257 for (act = delta; act; act = next)
4259 next = act->next_change;
4260 act->next_change = prev;
4264 act->old_cp = act->new_cp;
4271 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4272 reverted instead. */
4275 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4276 struct iv_ca_delta *delta, bool forward)
4278 struct cost_pair *from, *to;
4279 struct iv_ca_delta *act;
4282 delta = iv_ca_delta_reverse (delta);
4284 for (act = delta; act; act = act->next_change)
4288 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4289 iv_ca_set_cp (data, ivs, act->use, to);
4293 iv_ca_delta_reverse (delta);
4296 /* Returns true if CAND is used in IVS. */
4299 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4301 return ivs->n_cand_uses[cand->id] > 0;
4304 /* Returns number of induction variable candidates in the set IVS. */
4307 iv_ca_n_cands (struct iv_ca *ivs)
4309 return ivs->n_cands;
4312 /* Free the list of changes DELTA. */
4315 iv_ca_delta_free (struct iv_ca_delta **delta)
4317 struct iv_ca_delta *act, *next;
4319 for (act = *delta; act; act = next)
4321 next = act->next_change;
4328 /* Allocates new iv candidates assignment. */
4330 static struct iv_ca *
4331 iv_ca_new (struct ivopts_data *data)
4333 struct iv_ca *nw = XNEW (struct iv_ca);
4337 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4338 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4339 nw->cands = BITMAP_ALLOC (NULL);
4342 nw->cand_use_cost = 0;
4344 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4350 /* Free memory occupied by the set IVS. */
4353 iv_ca_free (struct iv_ca **ivs)
4355 free ((*ivs)->cand_for_use);
4356 free ((*ivs)->n_cand_uses);
4357 BITMAP_FREE ((*ivs)->cands);
4358 free ((*ivs)->n_invariant_uses);
4363 /* Dumps IVS to FILE. */
4366 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4368 const char *pref = " invariants ";
4371 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4372 bitmap_print (file, ivs->cands, " candidates ","\n");
4374 for (i = 1; i <= data->max_inv_id; i++)
4375 if (ivs->n_invariant_uses[i])
4377 fprintf (file, "%s%d", pref, i);
4380 fprintf (file, "\n");
4383 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4384 new set, and store differences in DELTA. Number of induction variables
4385 in the new set is stored to N_IVS. */
4388 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4389 struct iv_cand *cand, struct iv_ca_delta **delta,
4394 struct cost_pair *old_cp, *new_cp;
4397 for (i = 0; i < ivs->upto; i++)
4399 use = iv_use (data, i);
4400 old_cp = iv_ca_cand_for_use (ivs, use);
4403 && old_cp->cand == cand)
4406 new_cp = get_use_iv_cost (data, use, cand);
4410 if (!iv_ca_has_deps (ivs, new_cp))
4413 if (!cheaper_cost_pair (new_cp, old_cp))
4416 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4419 iv_ca_delta_commit (data, ivs, *delta, true);
4420 cost = iv_ca_cost (ivs);
4422 *n_ivs = iv_ca_n_cands (ivs);
4423 iv_ca_delta_commit (data, ivs, *delta, false);
4428 /* Try narrowing set IVS by removing CAND. Return the cost of
4429 the new set and store the differences in DELTA. */
4432 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4433 struct iv_cand *cand, struct iv_ca_delta **delta)
4437 struct cost_pair *old_cp, *new_cp, *cp;
4439 struct iv_cand *cnd;
4443 for (i = 0; i < n_iv_uses (data); i++)
4445 use = iv_use (data, i);
4447 old_cp = iv_ca_cand_for_use (ivs, use);
4448 if (old_cp->cand != cand)
4453 if (data->consider_all_candidates)
4455 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4460 cnd = iv_cand (data, ci);
4462 cp = get_use_iv_cost (data, use, cnd);
4465 if (!iv_ca_has_deps (ivs, cp))
4468 if (!cheaper_cost_pair (cp, new_cp))
4476 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4481 cnd = iv_cand (data, ci);
4483 cp = get_use_iv_cost (data, use, cnd);
4486 if (!iv_ca_has_deps (ivs, cp))
4489 if (!cheaper_cost_pair (cp, new_cp))
4498 iv_ca_delta_free (delta);
4502 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4505 iv_ca_delta_commit (data, ivs, *delta, true);
4506 cost = iv_ca_cost (ivs);
4507 iv_ca_delta_commit (data, ivs, *delta, false);
4512 /* Try optimizing the set of candidates IVS by removing candidates different
4513 from to EXCEPT_CAND from it. Return cost of the new set, and store
4514 differences in DELTA. */
4517 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4518 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4521 struct iv_ca_delta *act_delta, *best_delta;
4522 unsigned i, best_cost, acost;
4523 struct iv_cand *cand;
4526 best_cost = iv_ca_cost (ivs);
4528 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4530 cand = iv_cand (data, i);
4532 if (cand == except_cand)
4535 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4537 if (acost < best_cost)
4540 iv_ca_delta_free (&best_delta);
4541 best_delta = act_delta;
4544 iv_ca_delta_free (&act_delta);
4553 /* Recurse to possibly remove other unnecessary ivs. */
4554 iv_ca_delta_commit (data, ivs, best_delta, true);
4555 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4556 iv_ca_delta_commit (data, ivs, best_delta, false);
4557 *delta = iv_ca_delta_join (best_delta, *delta);
4561 /* Tries to extend the sets IVS in the best possible way in order
4562 to express the USE. */
4565 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4568 unsigned best_cost, act_cost;
4571 struct iv_cand *cand;
4572 struct iv_ca_delta *best_delta = NULL, *act_delta;
4573 struct cost_pair *cp;
4575 iv_ca_add_use (data, ivs, use);
4576 best_cost = iv_ca_cost (ivs);
4578 cp = iv_ca_cand_for_use (ivs, use);
4581 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4582 iv_ca_set_no_cp (data, ivs, use);
4585 /* First try important candidates. Only if it fails, try the specific ones.
4586 Rationale -- in loops with many variables the best choice often is to use
4587 just one generic biv. If we added here many ivs specific to the uses,
4588 the optimization algorithm later would be likely to get stuck in a local
4589 minimum, thus causing us to create too many ivs. The approach from
4590 few ivs to more seems more likely to be successful -- starting from few
4591 ivs, replacing an expensive use by a specific iv should always be a
4593 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4595 cand = iv_cand (data, i);
4597 if (iv_ca_cand_used_p (ivs, cand))
4600 cp = get_use_iv_cost (data, use, cand);
4604 iv_ca_set_cp (data, ivs, use, cp);
4605 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4606 iv_ca_set_no_cp (data, ivs, use);
4607 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4609 if (act_cost < best_cost)
4611 best_cost = act_cost;
4613 iv_ca_delta_free (&best_delta);
4614 best_delta = act_delta;
4617 iv_ca_delta_free (&act_delta);
4620 if (best_cost == INFTY)
4622 for (i = 0; i < use->n_map_members; i++)
4624 cp = use->cost_map + i;
4629 /* Already tried this. */
4630 if (cand->important)
4633 if (iv_ca_cand_used_p (ivs, cand))
4637 iv_ca_set_cp (data, ivs, use, cp);
4638 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4639 iv_ca_set_no_cp (data, ivs, use);
4640 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4643 if (act_cost < best_cost)
4645 best_cost = act_cost;
4648 iv_ca_delta_free (&best_delta);
4649 best_delta = act_delta;
4652 iv_ca_delta_free (&act_delta);
4656 iv_ca_delta_commit (data, ivs, best_delta, true);
4657 iv_ca_delta_free (&best_delta);
4659 return (best_cost != INFTY);
4662 /* Finds an initial assignment of candidates to uses. */
4664 static struct iv_ca *
4665 get_initial_solution (struct ivopts_data *data)
4667 struct iv_ca *ivs = iv_ca_new (data);
4670 for (i = 0; i < n_iv_uses (data); i++)
4671 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4680 /* Tries to improve set of induction variables IVS. */
4683 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4685 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4686 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4687 struct iv_cand *cand;
4689 /* Try extending the set of induction variables by one. */
4690 for (i = 0; i < n_iv_cands (data); i++)
4692 cand = iv_cand (data, i);
4694 if (iv_ca_cand_used_p (ivs, cand))
4697 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4701 /* If we successfully added the candidate and the set is small enough,
4702 try optimizing it by removing other candidates. */
4703 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4705 iv_ca_delta_commit (data, ivs, act_delta, true);
4706 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4707 iv_ca_delta_commit (data, ivs, act_delta, false);
4708 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4711 if (acost < best_cost)
4714 iv_ca_delta_free (&best_delta);
4715 best_delta = act_delta;
4718 iv_ca_delta_free (&act_delta);
4723 /* Try removing the candidates from the set instead. */
4724 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4726 /* Nothing more we can do. */
4731 iv_ca_delta_commit (data, ivs, best_delta, true);
4732 gcc_assert (best_cost == iv_ca_cost (ivs));
4733 iv_ca_delta_free (&best_delta);
4737 /* Attempts to find the optimal set of induction variables. We do simple
4738 greedy heuristic -- we try to replace at most one candidate in the selected
4739 solution and remove the unused ivs while this improves the cost. */
4741 static struct iv_ca *
4742 find_optimal_iv_set (struct ivopts_data *data)
4748 /* Get the initial solution. */
4749 set = get_initial_solution (data);
4752 if (dump_file && (dump_flags & TDF_DETAILS))
4753 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4757 if (dump_file && (dump_flags & TDF_DETAILS))
4759 fprintf (dump_file, "Initial set of candidates:\n");
4760 iv_ca_dump (data, dump_file, set);
4763 while (try_improve_iv_set (data, set))
4765 if (dump_file && (dump_flags & TDF_DETAILS))
4767 fprintf (dump_file, "Improved to:\n");
4768 iv_ca_dump (data, dump_file, set);
4772 if (dump_file && (dump_flags & TDF_DETAILS))
4773 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4775 for (i = 0; i < n_iv_uses (data); i++)
4777 use = iv_use (data, i);
4778 use->selected = iv_ca_cand_for_use (set, use)->cand;
4784 /* Creates a new induction variable corresponding to CAND. */
4787 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4789 block_stmt_iterator incr_pos;
4799 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4803 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4808 /* Mark that the iv is preserved. */
4809 name_info (data, cand->var_before)->preserve_biv = true;
4810 name_info (data, cand->var_after)->preserve_biv = true;
4812 /* Rewrite the increment so that it uses var_before directly. */
4813 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4818 gimple_add_tmp_var (cand->var_before);
4819 add_referenced_var (cand->var_before);
4821 base = unshare_expr (cand->iv->base);
4823 create_iv (base, unshare_expr (cand->iv->step),
4824 cand->var_before, data->current_loop,
4825 &incr_pos, after, &cand->var_before, &cand->var_after);
4828 /* Creates new induction variables described in SET. */
4831 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4834 struct iv_cand *cand;
4837 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4839 cand = iv_cand (data, i);
4840 create_new_iv (data, cand);
4844 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4845 is true, remove also the ssa name defined by the statement. */
4848 remove_statement (tree stmt, bool including_defined_name)
4850 if (TREE_CODE (stmt) == PHI_NODE)
4852 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4856 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4858 bsi_remove (&bsi, true);
4862 /* Rewrites USE (definition of iv used in a nonlinear expression)
4863 using candidate CAND. */
4866 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4867 struct iv_use *use, struct iv_cand *cand)
4870 tree op, stmts, tgt, ass;
4871 block_stmt_iterator bsi, pbsi;
4873 /* An important special case -- if we are asked to express value of
4874 the original iv by itself, just exit; there is no need to
4875 introduce a new computation (that might also need casting the
4876 variable to unsigned and back). */
4877 if (cand->pos == IP_ORIGINAL
4878 && cand->incremented_at == use->stmt)
4880 tree step, ctype, utype;
4881 enum tree_code incr_code = PLUS_EXPR;
4883 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4884 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4886 step = cand->iv->step;
4887 ctype = TREE_TYPE (step);
4888 utype = TREE_TYPE (cand->var_after);
4889 if (TREE_CODE (step) == NEGATE_EXPR)
4891 incr_code = MINUS_EXPR;
4892 step = TREE_OPERAND (step, 0);
4895 /* Check whether we may leave the computation unchanged.
4896 This is the case only if it does not rely on other
4897 computations in the loop -- otherwise, the computation
4898 we rely upon may be removed in remove_unused_ivs,
4899 thus leading to ICE. */
4900 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4901 if (TREE_CODE (op) == PLUS_EXPR
4902 || TREE_CODE (op) == MINUS_EXPR)
4904 if (TREE_OPERAND (op, 0) == cand->var_before)
4905 op = TREE_OPERAND (op, 1);
4906 else if (TREE_CODE (op) == PLUS_EXPR
4907 && TREE_OPERAND (op, 1) == cand->var_before)
4908 op = TREE_OPERAND (op, 0);
4916 && (TREE_CODE (op) == INTEGER_CST
4917 || operand_equal_p (op, step, 0)))
4920 /* Otherwise, add the necessary computations to express
4922 op = fold_convert (ctype, cand->var_before);
4923 comp = fold_convert (utype,
4924 build2 (incr_code, ctype, op,
4925 unshare_expr (step)));
4929 comp = get_computation (data->current_loop, use, cand);
4930 gcc_assert (comp != NULL_TREE);
4933 switch (TREE_CODE (use->stmt))
4936 tgt = PHI_RESULT (use->stmt);
4938 /* If we should keep the biv, do not replace it. */
4939 if (name_info (data, tgt)->preserve_biv)
4942 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4943 while (!bsi_end_p (pbsi)
4944 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4951 case GIMPLE_MODIFY_STMT:
4952 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4953 bsi = bsi_for_stmt (use->stmt);
4960 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4962 if (TREE_CODE (use->stmt) == PHI_NODE)
4965 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4966 ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
4967 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4968 remove_statement (use->stmt, false);
4969 SSA_NAME_DEF_STMT (tgt) = ass;
4974 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4975 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4979 /* Replaces ssa name in index IDX by its basic variable. Callback for
4983 idx_remove_ssa_names (tree base, tree *idx,
4984 void *data ATTRIBUTE_UNUSED)
4988 if (TREE_CODE (*idx) == SSA_NAME)
4989 *idx = SSA_NAME_VAR (*idx);
4991 if (TREE_CODE (base) == ARRAY_REF)
4993 op = &TREE_OPERAND (base, 2);
4995 && TREE_CODE (*op) == SSA_NAME)
4996 *op = SSA_NAME_VAR (*op);
4997 op = &TREE_OPERAND (base, 3);
4999 && TREE_CODE (*op) == SSA_NAME)
5000 *op = SSA_NAME_VAR (*op);
5006 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5009 unshare_and_remove_ssa_names (tree ref)
5011 ref = unshare_expr (ref);
5012 for_each_index (&ref, idx_remove_ssa_names, NULL);
5017 /* Extract the alias analysis info for the memory reference REF. There are
5018 several ways how this information may be stored and what precisely is
5019 its semantics depending on the type of the reference, but there always is
5020 somewhere hidden one _DECL node that is used to determine the set of
5021 virtual operands for the reference. The code below deciphers this jungle
5022 and extracts this single useful piece of information. */
5025 get_ref_tag (tree ref, tree orig)
5027 tree var = get_base_address (ref);
5028 tree aref = NULL_TREE, tag, sv;
5029 HOST_WIDE_INT offset, size, maxsize;
5031 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5033 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5038 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5039 return unshare_expr (sv);
5044 if (TREE_CODE (var) == INDIRECT_REF)
5046 /* If the base is a dereference of a pointer, first check its name memory
5047 tag. If it does not have one, use its symbol memory tag. */
5048 var = TREE_OPERAND (var, 0);
5049 if (TREE_CODE (var) != SSA_NAME)
5052 if (SSA_NAME_PTR_INFO (var))
5054 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5059 var = SSA_NAME_VAR (var);
5060 tag = symbol_mem_tag (var);
5061 gcc_assert (tag != NULL_TREE);
5069 tag = symbol_mem_tag (var);
5077 /* Copies the reference information from OLD_REF to NEW_REF. */
5080 copy_ref_info (tree new_ref, tree old_ref)
5082 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5083 copy_mem_ref_info (new_ref, old_ref);
5086 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5087 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5091 /* Rewrites USE (address that is an iv) using candidate CAND. */
5094 rewrite_use_address (struct ivopts_data *data,
5095 struct iv_use *use, struct iv_cand *cand)
5098 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5102 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5104 unshare_aff_combination (&aff);
5106 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5107 copy_ref_info (ref, *use->op_p);
5111 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5115 rewrite_use_compare (struct ivopts_data *data,
5116 struct iv_use *use, struct iv_cand *cand)
5119 tree *op_p, cond, op, stmts, bound;
5120 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5121 enum tree_code compare;
5122 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5127 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5128 tree var_type = TREE_TYPE (var);
5130 compare = iv_elimination_compare (data, use);
5131 bound = fold_convert (var_type, bound);
5132 op = force_gimple_operand (unshare_expr (bound), &stmts,
5136 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5138 *use->op_p = build2 (compare, boolean_type_node, var, op);
5139 update_stmt (use->stmt);
5143 /* The induction variable elimination failed; just express the original
5145 comp = get_computation (data->current_loop, use, cand);
5146 gcc_assert (comp != NULL_TREE);
5149 op_p = &TREE_OPERAND (cond, 0);
5150 if (TREE_CODE (*op_p) != SSA_NAME
5151 || zero_p (get_iv (data, *op_p)->step))
5152 op_p = &TREE_OPERAND (cond, 1);
5154 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5156 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5161 /* Rewrites USE using candidate CAND. */
5164 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5166 push_stmt_changes (&use->stmt);
5170 case USE_NONLINEAR_EXPR:
5171 rewrite_use_nonlinear_expr (data, use, cand);
5175 rewrite_use_address (data, use, cand);
5179 rewrite_use_compare (data, use, cand);
5186 pop_stmt_changes (&use->stmt);
5189 /* Rewrite the uses using the selected induction variables. */
5192 rewrite_uses (struct ivopts_data *data)
5195 struct iv_cand *cand;
5198 for (i = 0; i < n_iv_uses (data); i++)
5200 use = iv_use (data, i);
5201 cand = use->selected;
5204 rewrite_use (data, use, cand);
5208 /* Removes the ivs that are not used after rewriting. */
5211 remove_unused_ivs (struct ivopts_data *data)
5216 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5218 struct version_info *info;
5220 info = ver_info (data, j);
5222 && !zero_p (info->iv->step)
5224 && !info->iv->have_use_for
5225 && !info->preserve_biv)
5226 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5230 /* Frees data allocated by the optimization of a single loop. */
5233 free_loop_data (struct ivopts_data *data)
5239 htab_empty (data->niters);
5241 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5243 struct version_info *info;
5245 info = ver_info (data, i);
5249 info->has_nonlin_use = false;
5250 info->preserve_biv = false;
5253 bitmap_clear (data->relevant);
5254 bitmap_clear (data->important_candidates);
5256 for (i = 0; i < n_iv_uses (data); i++)
5258 struct iv_use *use = iv_use (data, i);
5261 BITMAP_FREE (use->related_cands);
5262 for (j = 0; j < use->n_map_members; j++)
5263 if (use->cost_map[j].depends_on)
5264 BITMAP_FREE (use->cost_map[j].depends_on);
5265 free (use->cost_map);
5268 VEC_truncate (iv_use_p, data->iv_uses, 0);
5270 for (i = 0; i < n_iv_cands (data); i++)
5272 struct iv_cand *cand = iv_cand (data, i);
5276 if (cand->depends_on)
5277 BITMAP_FREE (cand->depends_on);
5280 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5282 if (data->version_info_size < num_ssa_names)
5284 data->version_info_size = 2 * num_ssa_names;
5285 free (data->version_info);
5286 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5289 data->max_inv_id = 0;
5291 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5292 SET_DECL_RTL (obj, NULL_RTX);
5294 VEC_truncate (tree, decl_rtl_to_reset, 0);
5297 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5301 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5303 free_loop_data (data);
5304 free (data->version_info);
5305 BITMAP_FREE (data->relevant);
5306 BITMAP_FREE (data->important_candidates);
5307 htab_delete (data->niters);
5309 VEC_free (tree, heap, decl_rtl_to_reset);
5310 VEC_free (iv_use_p, heap, data->iv_uses);
5311 VEC_free (iv_cand_p, heap, data->iv_candidates);
5314 /* Optimizes the LOOP. Returns true if anything changed. */
5317 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5319 bool changed = false;
5320 struct iv_ca *iv_ca;
5323 data->current_loop = loop;
5325 if (dump_file && (dump_flags & TDF_DETAILS))
5327 fprintf (dump_file, "Processing loop %d\n", loop->num);
5329 exit = single_dom_exit (loop);
5332 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5333 exit->src->index, exit->dest->index);
5334 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5335 fprintf (dump_file, "\n");
5338 fprintf (dump_file, "\n");
5341 /* For each ssa name determines whether it behaves as an induction variable
5343 if (!find_induction_variables (data))
5346 /* Finds interesting uses (item 1). */
5347 find_interesting_uses (data);
5348 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5351 /* Finds candidates for the induction variables (item 2). */
5352 find_iv_candidates (data);
5354 /* Calculates the costs (item 3, part 1). */
5355 determine_use_iv_costs (data);
5356 determine_iv_costs (data);
5357 determine_set_costs (data);
5359 /* Find the optimal set of induction variables (item 3, part 2). */
5360 iv_ca = find_optimal_iv_set (data);
5365 /* Create the new induction variables (item 4, part 1). */
5366 create_new_ivs (data, iv_ca);
5367 iv_ca_free (&iv_ca);
5369 /* Rewrite the uses (item 4, part 2). */
5370 rewrite_uses (data);
5372 /* Remove the ivs that are unused after rewriting. */
5373 remove_unused_ivs (data);
5375 /* We have changed the structure of induction variables; it might happen
5376 that definitions in the scev database refer to some of them that were
5381 free_loop_data (data);
5386 /* Main entry point. Optimizes induction variables in loops. */
5389 tree_ssa_iv_optimize (void)
5392 struct ivopts_data data;
5395 tree_ssa_iv_optimize_init (&data);
5397 /* Optimize the loops starting with the innermost ones. */
5398 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5400 if (dump_file && (dump_flags & TDF_DETAILS))
5401 flow_loop_dump (loop, dump_file, NULL, 1);
5403 tree_ssa_iv_optimize_loop (&data, loop);
5406 tree_ssa_iv_optimize_finalize (&data);