1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
94 /* The infinite cost. */
95 #define INFTY 10000000
97 /* The expected number of loop iterations. TODO -- use profiling instead of
99 #define AVG_LOOP_NITER(LOOP) 5
102 /* Representation of the induction variable. */
105 tree base; /* Initial value of the iv. */
106 tree base_object; /* A memory object to that the induction variable points. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
128 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
129 USE_ADDRESS, /* Use in an address. */
130 USE_COMPARE /* Use is a compare. */
133 /* The candidate - cost pair. */
136 struct iv_cand *cand; /* The candidate. */
137 unsigned cost; /* The cost. */
138 bitmap depends_on; /* The list of invariants that have to be
140 tree value; /* For final value elimination, the expression for
141 the final value of the iv. For iv elimination,
142 the new bound to compare with. */
148 unsigned id; /* The id of the use. */
149 enum use_type type; /* Type of the use. */
150 struct iv *iv; /* The induction variable it is based on. */
151 tree stmt; /* Statement in that it occurs. */
152 tree *op_p; /* The place where it occurs. */
153 bitmap related_cands; /* The set of "related" iv candidates, plus the common
156 unsigned n_map_members; /* Number of candidates in the cost_map list. */
157 struct cost_pair *cost_map;
158 /* The costs wrto the iv candidates. */
160 struct iv_cand *selected;
161 /* The selected candidate. */
164 /* The position where the iv is computed. */
167 IP_NORMAL, /* At the end, just before the exit condition. */
168 IP_END, /* At the end of the latch block. */
169 IP_ORIGINAL /* The original biv. */
172 /* The induction variable candidate. */
175 unsigned id; /* The number of the candidate. */
176 bool important; /* Whether this is an "important" candidate, i.e. such
177 that it should be considered by all uses. */
178 enum iv_position pos; /* Where it is computed. */
179 tree incremented_at; /* For original biv, the statement where it is
181 tree var_before; /* The variable used for it before increment. */
182 tree var_after; /* The variable used for it after increment. */
183 struct iv *iv; /* The value of the candidate. NULL for
184 "pseudocandidate" used to indicate the possibility
185 to replace the final value of an iv by direct
186 computation of the value. */
187 unsigned cost; /* Cost of the candidate. */
188 bitmap depends_on; /* The list of invariants that are used in step of the
192 /* The data used by the induction variable optimizations. */
194 typedef struct iv_use *iv_use_p;
196 DEF_VEC_ALLOC_P(iv_use_p,heap);
198 typedef struct iv_cand *iv_cand_p;
199 DEF_VEC_P(iv_cand_p);
200 DEF_VEC_ALLOC_P(iv_cand_p,heap);
204 /* The currently optimized loop. */
205 struct loop *current_loop;
207 /* Number of registers used in it. */
210 /* Numbers of iterations for all exits of the current loop. */
213 /* The size of version_info array allocated. */
214 unsigned version_info_size;
216 /* The array of information for the ssa names. */
217 struct version_info *version_info;
219 /* The bitmap of indices in version_info whose value was changed. */
222 /* The maximum invariant id. */
225 /* The uses of induction variables. */
226 VEC(iv_use_p,heap) *iv_uses;
228 /* The candidates. */
229 VEC(iv_cand_p,heap) *iv_candidates;
231 /* A bitmap of important candidates. */
232 bitmap important_candidates;
234 /* Whether to consider just related and important candidates when replacing a
236 bool consider_all_candidates;
239 /* An assignment of iv candidates to uses. */
243 /* The number of uses covered by the assignment. */
246 /* Number of uses that cannot be expressed by the candidates in the set. */
249 /* Candidate assigned to a use, together with the related costs. */
250 struct cost_pair **cand_for_use;
252 /* Number of times each candidate is used. */
253 unsigned *n_cand_uses;
255 /* The candidates used. */
258 /* The number of candidates in the set. */
261 /* Total number of registers needed. */
264 /* Total cost of expressing uses. */
265 unsigned cand_use_cost;
267 /* Total cost of candidates. */
270 /* Number of times each invariant is used. */
271 unsigned *n_invariant_uses;
273 /* Total cost of the assignment. */
277 /* Difference of two iv candidate assignments. */
284 /* An old assignment (for rollback purposes). */
285 struct cost_pair *old_cp;
287 /* A new assignment. */
288 struct cost_pair *new_cp;
290 /* Next change in the list. */
291 struct iv_ca_delta *next_change;
294 /* Bound on number of candidates below that all candidates are considered. */
296 #define CONSIDER_ALL_CANDIDATES_BOUND \
297 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
299 /* If there are more iv occurrences, we just give up (it is quite unlikely that
300 optimizing such a loop would help, and it would take ages). */
302 #define MAX_CONSIDERED_USES \
303 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
305 /* If there are at most this number of ivs in the set, try removing unnecessary
306 ivs from the set always. */
308 #define ALWAYS_PRUNE_CAND_SET_BOUND \
309 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
311 /* The list of trees for that the decl_rtl field must be reset is stored
314 static VEC(tree,heap) *decl_rtl_to_reset;
316 /* Number of uses recorded in DATA. */
318 static inline unsigned
319 n_iv_uses (struct ivopts_data *data)
321 return VEC_length (iv_use_p, data->iv_uses);
324 /* Ith use recorded in DATA. */
326 static inline struct iv_use *
327 iv_use (struct ivopts_data *data, unsigned i)
329 return VEC_index (iv_use_p, data->iv_uses, i);
332 /* Number of candidates recorded in DATA. */
334 static inline unsigned
335 n_iv_cands (struct ivopts_data *data)
337 return VEC_length (iv_cand_p, data->iv_candidates);
340 /* Ith candidate recorded in DATA. */
342 static inline struct iv_cand *
343 iv_cand (struct ivopts_data *data, unsigned i)
345 return VEC_index (iv_cand_p, data->iv_candidates, i);
348 /* The single loop exit if it dominates the latch, NULL otherwise. */
351 single_dom_exit (struct loop *loop)
353 edge exit = single_exit (loop);
358 if (!just_once_each_iteration_p (loop, exit->src))
364 /* Dumps information about the induction variable IV to FILE. */
366 extern void dump_iv (FILE *, struct iv *);
368 dump_iv (FILE *file, struct iv *iv)
372 fprintf (file, "ssa name ");
373 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
374 fprintf (file, "\n");
377 fprintf (file, " type ");
378 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
379 fprintf (file, "\n");
383 fprintf (file, " base ");
384 print_generic_expr (file, iv->base, TDF_SLIM);
385 fprintf (file, "\n");
387 fprintf (file, " step ");
388 print_generic_expr (file, iv->step, TDF_SLIM);
389 fprintf (file, "\n");
393 fprintf (file, " invariant ");
394 print_generic_expr (file, iv->base, TDF_SLIM);
395 fprintf (file, "\n");
400 fprintf (file, " base object ");
401 print_generic_expr (file, iv->base_object, TDF_SLIM);
402 fprintf (file, "\n");
406 fprintf (file, " is a biv\n");
409 /* Dumps information about the USE to FILE. */
411 extern void dump_use (FILE *, struct iv_use *);
413 dump_use (FILE *file, struct iv_use *use)
415 fprintf (file, "use %d\n", use->id);
419 case USE_NONLINEAR_EXPR:
420 fprintf (file, " generic\n");
424 fprintf (file, " address\n");
428 fprintf (file, " compare\n");
435 fprintf (file, " in statement ");
436 print_generic_expr (file, use->stmt, TDF_SLIM);
437 fprintf (file, "\n");
439 fprintf (file, " at position ");
441 print_generic_expr (file, *use->op_p, TDF_SLIM);
442 fprintf (file, "\n");
444 dump_iv (file, use->iv);
446 if (use->related_cands)
448 fprintf (file, " related candidates ");
449 dump_bitmap (file, use->related_cands);
453 /* Dumps information about the uses to FILE. */
455 extern void dump_uses (FILE *, struct ivopts_data *);
457 dump_uses (FILE *file, struct ivopts_data *data)
462 for (i = 0; i < n_iv_uses (data); i++)
464 use = iv_use (data, i);
466 dump_use (file, use);
467 fprintf (file, "\n");
471 /* Dumps information about induction variable candidate CAND to FILE. */
473 extern void dump_cand (FILE *, struct iv_cand *);
475 dump_cand (FILE *file, struct iv_cand *cand)
477 struct iv *iv = cand->iv;
479 fprintf (file, "candidate %d%s\n",
480 cand->id, cand->important ? " (important)" : "");
482 if (cand->depends_on)
484 fprintf (file, " depends on ");
485 dump_bitmap (file, cand->depends_on);
490 fprintf (file, " final value replacement\n");
497 fprintf (file, " incremented before exit test\n");
501 fprintf (file, " incremented at end\n");
505 fprintf (file, " original biv\n");
512 /* Returns the info for ssa version VER. */
514 static inline struct version_info *
515 ver_info (struct ivopts_data *data, unsigned ver)
517 return data->version_info + ver;
520 /* Returns the info for ssa name NAME. */
522 static inline struct version_info *
523 name_info (struct ivopts_data *data, tree name)
525 return ver_info (data, SSA_NAME_VERSION (name));
528 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
532 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
534 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
538 if (sbb == loop->latch)
544 return stmt == last_stmt (bb);
547 /* Returns true if STMT if after the place where the original induction
548 variable CAND is incremented. */
551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
553 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
554 basic_block stmt_bb = bb_for_stmt (stmt);
555 block_stmt_iterator bsi;
557 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
560 if (stmt_bb != cand_bb)
563 /* Scan the block from the end, since the original ivs are usually
564 incremented at the end of the loop body. */
565 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
567 if (bsi_stmt (bsi) == cand->incremented_at)
569 if (bsi_stmt (bsi) == stmt)
574 /* Returns true if STMT if after the place where the induction variable
575 CAND is incremented in LOOP. */
578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
586 return stmt_after_ip_normal_pos (loop, stmt);
589 return stmt_after_ip_original_pos (cand, stmt);
596 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
599 abnormal_ssa_name_p (tree exp)
604 if (TREE_CODE (exp) != SSA_NAME)
607 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
610 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
611 abnormal phi node. Callback for for_each_index. */
614 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
615 void *data ATTRIBUTE_UNUSED)
617 if (TREE_CODE (base) == ARRAY_REF)
619 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
621 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
625 return !abnormal_ssa_name_p (*index);
628 /* Returns true if EXPR contains a ssa name that occurs in an
629 abnormal phi node. */
632 contains_abnormal_ssa_name_p (tree expr)
635 enum tree_code_class class;
640 code = TREE_CODE (expr);
641 class = TREE_CODE_CLASS (code);
643 if (code == SSA_NAME)
644 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
646 if (code == INTEGER_CST
647 || is_gimple_min_invariant (expr))
650 if (code == ADDR_EXPR)
651 return !for_each_index (&TREE_OPERAND (expr, 0),
652 idx_contains_abnormal_ssa_name_p,
659 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
664 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
676 /* Element of the table in that we cache the numbers of iterations obtained
677 from exits of the loop. */
681 /* The edge for that the number of iterations is cached. */
684 /* Number of iterations corresponding to this exit, or NULL if it cannot be
689 /* Hash function for nfe_cache_elt E. */
692 nfe_hash (const void *e)
694 const struct nfe_cache_elt *elt = e;
696 return htab_hash_pointer (elt->exit);
699 /* Equality function for nfe_cache_elt E1 and edge E2. */
702 nfe_eq (const void *e1, const void *e2)
704 const struct nfe_cache_elt *elt1 = e1;
706 return elt1->exit == e2;
709 /* Returns tree describing number of iterations determined from
710 EXIT of DATA->current_loop, or NULL if something goes wrong. */
713 niter_for_exit (struct ivopts_data *data, edge exit)
715 struct nfe_cache_elt *nfe_desc;
716 struct tree_niter_desc desc;
719 slot = htab_find_slot_with_hash (data->niters, exit,
720 htab_hash_pointer (exit),
725 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
726 nfe_desc->exit = exit;
728 /* Try to determine number of iterations. We must know it
729 unconditionally (i.e., without possibility of # of iterations
730 being zero). Also, we cannot safely work with ssa names that
731 appear in phi nodes on abnormal edges, so that we do not create
732 overlapping life ranges for them (PR 27283). */
733 if (number_of_iterations_exit (data->current_loop,
735 && integer_zerop (desc.may_be_zero)
736 && !contains_abnormal_ssa_name_p (desc.niter))
737 nfe_desc->niter = desc.niter;
739 nfe_desc->niter = NULL_TREE;
744 return nfe_desc->niter;
747 /* Returns tree describing number of iterations determined from
748 single dominating exit of DATA->current_loop, or NULL if something
752 niter_for_single_dom_exit (struct ivopts_data *data)
754 edge exit = single_dom_exit (data->current_loop);
759 return niter_for_exit (data, exit);
762 /* Initializes data structures used by the iv optimization pass, stored
766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
768 data->version_info_size = 2 * num_ssa_names;
769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770 data->relevant = BITMAP_ALLOC (NULL);
771 data->important_candidates = BITMAP_ALLOC (NULL);
772 data->max_inv_id = 0;
773 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
779 /* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
783 determine_base_object (tree expr)
785 enum tree_code code = TREE_CODE (expr);
786 tree base, obj, op0, op1;
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
791 if (TREE_CODE (expr) == NOP_EXPR
792 || TREE_CODE (expr) == CONVERT_EXPR)
793 return determine_base_object (TREE_OPERAND (expr, 0));
795 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
804 obj = TREE_OPERAND (expr, 0);
805 base = get_base_address (obj);
810 if (TREE_CODE (base) == INDIRECT_REF)
811 return determine_base_object (TREE_OPERAND (base, 0));
813 return fold_convert (ptr_type_node,
814 build_fold_addr_expr (base));
818 op0 = determine_base_object (TREE_OPERAND (expr, 0));
819 op1 = determine_base_object (TREE_OPERAND (expr, 1));
825 return (code == PLUS_EXPR
827 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
829 return fold_build2 (code, ptr_type_node, op0, op1);
832 return fold_convert (ptr_type_node, expr);
836 /* Allocates an induction variable with given initial value BASE and step STEP
840 alloc_iv (tree base, tree step)
842 struct iv *iv = XCNEW (struct iv);
843 gcc_assert (step != NULL_TREE);
846 iv->base_object = determine_base_object (base);
849 iv->have_use_for = false;
851 iv->ssa_name = NULL_TREE;
856 /* Sets STEP and BASE for induction variable IV. */
859 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
861 struct version_info *info = name_info (data, iv);
863 gcc_assert (!info->iv);
865 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
866 info->iv = alloc_iv (base, step);
867 info->iv->ssa_name = iv;
870 /* Finds induction variable declaration for VAR. */
873 get_iv (struct ivopts_data *data, tree var)
876 tree type = TREE_TYPE (var);
878 if (!POINTER_TYPE_P (type)
879 && !INTEGRAL_TYPE_P (type))
882 if (!name_info (data, var)->iv)
884 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
887 || !flow_bb_inside_loop_p (data->current_loop, bb))
888 set_iv (data, var, var, build_int_cst (type, 0));
891 return name_info (data, var)->iv;
894 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
895 not define a simple affine biv with nonzero step. */
898 determine_biv_step (tree phi)
900 struct loop *loop = bb_for_stmt (phi)->loop_father;
901 tree name = PHI_RESULT (phi);
904 if (!is_gimple_reg (name))
907 if (!simple_iv (loop, phi, name, &iv, true))
910 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
913 /* Finds basic ivs. */
916 find_bivs (struct ivopts_data *data)
918 tree phi, step, type, base;
920 struct loop *loop = data->current_loop;
922 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
924 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
927 step = determine_biv_step (phi);
931 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
932 base = expand_simple_operations (base);
933 if (contains_abnormal_ssa_name_p (base)
934 || contains_abnormal_ssa_name_p (step))
937 type = TREE_TYPE (PHI_RESULT (phi));
938 base = fold_convert (type, base);
940 step = fold_convert (type, step);
942 set_iv (data, PHI_RESULT (phi), base, step);
949 /* Marks basic ivs. */
952 mark_bivs (struct ivopts_data *data)
955 struct iv *iv, *incr_iv;
956 struct loop *loop = data->current_loop;
959 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
961 iv = get_iv (data, PHI_RESULT (phi));
965 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
966 incr_iv = get_iv (data, var);
970 /* If the increment is in the subloop, ignore it. */
971 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
972 if (incr_bb->loop_father != data->current_loop
973 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
977 incr_iv->biv_p = true;
981 /* Checks whether STMT defines a linear induction variable and stores its
985 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
988 struct loop *loop = data->current_loop;
990 iv->base = NULL_TREE;
991 iv->step = NULL_TREE;
993 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
996 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
997 if (TREE_CODE (lhs) != SSA_NAME)
1000 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
1002 iv->base = expand_simple_operations (iv->base);
1004 if (contains_abnormal_ssa_name_p (iv->base)
1005 || contains_abnormal_ssa_name_p (iv->step))
1011 /* Finds general ivs in statement STMT. */
1014 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1018 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1021 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1024 /* Finds general ivs in basic block BB. */
1027 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1029 block_stmt_iterator bsi;
1031 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1032 find_givs_in_stmt (data, bsi_stmt (bsi));
1035 /* Finds general ivs. */
1038 find_givs (struct ivopts_data *data)
1040 struct loop *loop = data->current_loop;
1041 basic_block *body = get_loop_body_in_dom_order (loop);
1044 for (i = 0; i < loop->num_nodes; i++)
1045 find_givs_in_bb (data, body[i]);
1049 /* For each ssa name defined in LOOP determines whether it is an induction
1050 variable and if so, its initial value and step. */
1053 find_induction_variables (struct ivopts_data *data)
1058 if (!find_bivs (data))
1064 if (dump_file && (dump_flags & TDF_DETAILS))
1066 tree niter = niter_for_single_dom_exit (data);
1070 fprintf (dump_file, " number of iterations ");
1071 print_generic_expr (dump_file, niter, TDF_SLIM);
1072 fprintf (dump_file, "\n\n");
1075 fprintf (dump_file, "Induction variables:\n\n");
1077 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1079 if (ver_info (data, i)->iv)
1080 dump_iv (dump_file, ver_info (data, i)->iv);
1087 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1089 static struct iv_use *
1090 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1091 tree stmt, enum use_type use_type)
1093 struct iv_use *use = XCNEW (struct iv_use);
1095 use->id = n_iv_uses (data);
1096 use->type = use_type;
1100 use->related_cands = BITMAP_ALLOC (NULL);
1102 /* To avoid showing ssa name in the dumps, if it was not reset by the
1104 iv->ssa_name = NULL_TREE;
1106 if (dump_file && (dump_flags & TDF_DETAILS))
1107 dump_use (dump_file, use);
1109 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1114 /* Checks whether OP is a loop-level invariant and if so, records it.
1115 NONLINEAR_USE is true if the invariant is used in a way we do not
1116 handle specially. */
1119 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1122 struct version_info *info;
1124 if (TREE_CODE (op) != SSA_NAME
1125 || !is_gimple_reg (op))
1128 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1130 && flow_bb_inside_loop_p (data->current_loop, bb))
1133 info = name_info (data, op);
1135 info->has_nonlin_use |= nonlinear_use;
1137 info->inv_id = ++data->max_inv_id;
1138 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1141 /* Checks whether the use OP is interesting and if so, records it. */
1143 static struct iv_use *
1144 find_interesting_uses_op (struct ivopts_data *data, tree op)
1151 if (TREE_CODE (op) != SSA_NAME)
1154 iv = get_iv (data, op);
1158 if (iv->have_use_for)
1160 use = iv_use (data, iv->use_id);
1162 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1166 if (integer_zerop (iv->step))
1168 record_invariant (data, op, true);
1171 iv->have_use_for = true;
1173 civ = XNEW (struct iv);
1176 stmt = SSA_NAME_DEF_STMT (op);
1177 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1178 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1180 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1181 iv->use_id = use->id;
1186 /* Checks whether the condition *COND_P in STMT is interesting
1187 and if so, records it. */
1190 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1194 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1196 tree zero = integer_zero_node;
1198 const_iv.step = integer_zero_node;
1200 if (TREE_CODE (*cond_p) != SSA_NAME
1201 && !COMPARISON_CLASS_P (*cond_p))
1204 if (TREE_CODE (*cond_p) == SSA_NAME)
1211 op0_p = &TREE_OPERAND (*cond_p, 0);
1212 op1_p = &TREE_OPERAND (*cond_p, 1);
1215 if (TREE_CODE (*op0_p) == SSA_NAME)
1216 iv0 = get_iv (data, *op0_p);
1220 if (TREE_CODE (*op1_p) == SSA_NAME)
1221 iv1 = get_iv (data, *op1_p);
1225 if (/* When comparing with non-invariant value, we may not do any senseful
1226 induction variable elimination. */
1228 /* Eliminating condition based on two ivs would be nontrivial.
1229 ??? TODO -- it is not really important to handle this case. */
1230 || (!integer_zerop (iv0->step)
1231 && !integer_zerop (iv1->step)))
1233 find_interesting_uses_op (data, *op0_p);
1234 find_interesting_uses_op (data, *op1_p);
1238 if (integer_zerop (iv0->step)
1239 && integer_zerop (iv1->step))
1241 /* If both are invariants, this is a work for unswitching. */
1245 civ = XNEW (struct iv);
1246 *civ = integer_zerop (iv0->step) ? *iv1: *iv0;
1247 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1250 /* Returns true if expression EXPR is obviously invariant in LOOP,
1251 i.e. if all its operands are defined outside of the LOOP. */
1254 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1259 if (is_gimple_min_invariant (expr))
1262 if (TREE_CODE (expr) == SSA_NAME)
1264 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1266 && flow_bb_inside_loop_p (loop, def_bb))
1272 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1275 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1276 for (i = 0; i < len; i++)
1277 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1283 /* Cumulates the steps of indices into DATA and replaces their values with the
1284 initial ones. Returns false when the value of the index cannot be determined.
1285 Callback for for_each_index. */
1287 struct ifs_ivopts_data
1289 struct ivopts_data *ivopts_data;
1295 idx_find_step (tree base, tree *idx, void *data)
1297 struct ifs_ivopts_data *dta = data;
1299 tree step, iv_base, iv_step, lbound, off;
1300 struct loop *loop = dta->ivopts_data->current_loop;
1302 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1303 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1306 /* If base is a component ref, require that the offset of the reference
1308 if (TREE_CODE (base) == COMPONENT_REF)
1310 off = component_ref_field_offset (base);
1311 return expr_invariant_in_loop_p (loop, off);
1314 /* If base is array, first check whether we will be able to move the
1315 reference out of the loop (in order to take its address in strength
1316 reduction). In order for this to work we need both lower bound
1317 and step to be loop invariants. */
1318 if (TREE_CODE (base) == ARRAY_REF)
1320 step = array_ref_element_size (base);
1321 lbound = array_ref_low_bound (base);
1323 if (!expr_invariant_in_loop_p (loop, step)
1324 || !expr_invariant_in_loop_p (loop, lbound))
1328 if (TREE_CODE (*idx) != SSA_NAME)
1331 iv = get_iv (dta->ivopts_data, *idx);
1335 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1336 *&x[0], which is not folded and does not trigger the
1337 ARRAY_REF path below. */
1340 if (integer_zerop (iv->step))
1343 if (TREE_CODE (base) == ARRAY_REF)
1345 step = array_ref_element_size (base);
1347 /* We only handle addresses whose step is an integer constant. */
1348 if (TREE_CODE (step) != INTEGER_CST)
1352 /* The step for pointer arithmetics already is 1 byte. */
1353 step = build_int_cst (sizetype, 1);
1357 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1358 sizetype, &iv_base, &iv_step, dta->stmt,
1361 /* The index might wrap. */
1365 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1366 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1371 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1372 object is passed to it in DATA. */
1375 idx_record_use (tree base, tree *idx,
1378 find_interesting_uses_op (data, *idx);
1379 if (TREE_CODE (base) == ARRAY_REF)
1381 find_interesting_uses_op (data, array_ref_element_size (base));
1382 find_interesting_uses_op (data, array_ref_low_bound (base));
1387 /* Returns true if memory reference REF may be unaligned. */
1390 may_be_unaligned_p (tree ref)
1394 HOST_WIDE_INT bitsize;
1395 HOST_WIDE_INT bitpos;
1397 enum machine_mode mode;
1398 int unsignedp, volatilep;
1399 unsigned base_align;
1401 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1402 thus they are not misaligned. */
1403 if (TREE_CODE (ref) == TARGET_MEM_REF)
1406 /* The test below is basically copy of what expr.c:normal_inner_ref
1407 does to check whether the object must be loaded by parts when
1408 STRICT_ALIGNMENT is true. */
1409 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1410 &unsignedp, &volatilep, true);
1411 base_type = TREE_TYPE (base);
1412 base_align = TYPE_ALIGN (base_type);
1415 && (base_align < GET_MODE_ALIGNMENT (mode)
1416 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1417 || bitpos % BITS_PER_UNIT != 0))
1423 /* Return true if EXPR may be non-addressable. */
1426 may_be_nonaddressable_p (tree expr)
1428 switch (TREE_CODE (expr))
1431 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1432 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1435 case ARRAY_RANGE_REF:
1436 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1438 case VIEW_CONVERT_EXPR:
1439 /* This kind of view-conversions may wrap non-addressable objects
1440 and make them look addressable. After some processing the
1441 non-addressability may be uncovered again, causing ADDR_EXPRs
1442 of inappropriate objects to be built. */
1443 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1444 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1453 /* Finds addresses in *OP_P inside STMT. */
1456 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1458 tree base = *op_p, step = build_int_cst (sizetype, 0);
1460 struct ifs_ivopts_data ifs_ivopts_data;
1462 /* Do not play with volatile memory references. A bit too conservative,
1463 perhaps, but safe. */
1464 if (stmt_ann (stmt)->has_volatile_ops)
1467 /* Ignore bitfields for now. Not really something terribly complicated
1469 if (TREE_CODE (base) == BIT_FIELD_REF)
1472 if (may_be_nonaddressable_p (base))
1475 if (STRICT_ALIGNMENT
1476 && may_be_unaligned_p (base))
1479 base = unshare_expr (base);
1481 if (TREE_CODE (base) == TARGET_MEM_REF)
1483 tree type = build_pointer_type (TREE_TYPE (base));
1487 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1489 civ = get_iv (data, TMR_BASE (base));
1493 TMR_BASE (base) = civ->base;
1496 if (TMR_INDEX (base)
1497 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1499 civ = get_iv (data, TMR_INDEX (base));
1503 TMR_INDEX (base) = civ->base;
1508 if (TMR_STEP (base))
1509 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1511 step = fold_build2 (PLUS_EXPR, type, step, astep);
1515 if (integer_zerop (step))
1517 base = tree_mem_ref_addr (type, base);
1521 ifs_ivopts_data.ivopts_data = data;
1522 ifs_ivopts_data.stmt = stmt;
1523 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1524 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1525 || integer_zerop (ifs_ivopts_data.step))
1527 step = ifs_ivopts_data.step;
1529 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1530 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1532 base = build_fold_addr_expr (base);
1534 /* Substituting bases of IVs into the base expression might
1535 have caused folding opportunities. */
1536 if (TREE_CODE (base) == ADDR_EXPR)
1538 tree *ref = &TREE_OPERAND (base, 0);
1539 while (handled_component_p (*ref))
1540 ref = &TREE_OPERAND (*ref, 0);
1541 if (TREE_CODE (*ref) == INDIRECT_REF)
1542 *ref = fold_indirect_ref (*ref);
1546 civ = alloc_iv (base, step);
1547 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1551 for_each_index (op_p, idx_record_use, data);
1554 /* Finds and records invariants used in STMT. */
1557 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1560 use_operand_p use_p;
1563 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1565 op = USE_FROM_PTR (use_p);
1566 record_invariant (data, op, false);
1570 /* Finds interesting uses of induction variables in the statement STMT. */
1573 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1578 use_operand_p use_p;
1580 find_invariants_stmt (data, stmt);
1582 if (TREE_CODE (stmt) == COND_EXPR)
1584 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1588 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1590 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1591 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1593 if (TREE_CODE (lhs) == SSA_NAME)
1595 /* If the statement defines an induction variable, the uses are not
1596 interesting by themselves. */
1598 iv = get_iv (data, lhs);
1600 if (iv && !integer_zerop (iv->step))
1604 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1606 case tcc_comparison:
1607 find_interesting_uses_cond (data, stmt,
1608 &GIMPLE_STMT_OPERAND (stmt, 1));
1612 find_interesting_uses_address (data, stmt,
1613 &GIMPLE_STMT_OPERAND (stmt, 1));
1614 if (REFERENCE_CLASS_P (lhs))
1615 find_interesting_uses_address (data, stmt,
1616 &GIMPLE_STMT_OPERAND (stmt, 0));
1622 if (REFERENCE_CLASS_P (lhs)
1623 && is_gimple_val (rhs))
1625 find_interesting_uses_address (data, stmt,
1626 &GIMPLE_STMT_OPERAND (stmt, 0));
1627 find_interesting_uses_op (data, rhs);
1631 /* TODO -- we should also handle address uses of type
1633 memory = call (whatever);
1640 if (TREE_CODE (stmt) == PHI_NODE
1641 && bb_for_stmt (stmt) == data->current_loop->header)
1643 lhs = PHI_RESULT (stmt);
1644 iv = get_iv (data, lhs);
1646 if (iv && !integer_zerop (iv->step))
1650 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1652 op = USE_FROM_PTR (use_p);
1654 if (TREE_CODE (op) != SSA_NAME)
1657 iv = get_iv (data, op);
1661 find_interesting_uses_op (data, op);
1665 /* Finds interesting uses of induction variables outside of loops
1666 on loop exit edge EXIT. */
1669 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1673 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1675 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1676 if (is_gimple_reg (def))
1677 find_interesting_uses_op (data, def);
1681 /* Finds uses of the induction variables that are interesting. */
1684 find_interesting_uses (struct ivopts_data *data)
1687 block_stmt_iterator bsi;
1689 basic_block *body = get_loop_body (data->current_loop);
1691 struct version_info *info;
1694 if (dump_file && (dump_flags & TDF_DETAILS))
1695 fprintf (dump_file, "Uses:\n\n");
1697 for (i = 0; i < data->current_loop->num_nodes; i++)
1702 FOR_EACH_EDGE (e, ei, bb->succs)
1703 if (e->dest != EXIT_BLOCK_PTR
1704 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1705 find_interesting_uses_outside (data, e);
1707 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1708 find_interesting_uses_stmt (data, phi);
1709 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1710 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1713 if (dump_file && (dump_flags & TDF_DETAILS))
1717 fprintf (dump_file, "\n");
1719 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1721 info = ver_info (data, i);
1724 fprintf (dump_file, " ");
1725 print_generic_expr (dump_file, info->name, TDF_SLIM);
1726 fprintf (dump_file, " is invariant (%d)%s\n",
1727 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1731 fprintf (dump_file, "\n");
1737 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1738 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1739 we are at the top-level of the processed address. */
1742 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1743 unsigned HOST_WIDE_INT *offset)
1745 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1746 enum tree_code code;
1747 tree type, orig_type = TREE_TYPE (expr);
1748 unsigned HOST_WIDE_INT off0, off1, st;
1749 tree orig_expr = expr;
1753 type = TREE_TYPE (expr);
1754 code = TREE_CODE (expr);
1760 if (!cst_and_fits_in_hwi (expr)
1761 || integer_zerop (expr))
1764 *offset = int_cst_value (expr);
1765 return build_int_cst (orig_type, 0);
1769 op0 = TREE_OPERAND (expr, 0);
1770 op1 = TREE_OPERAND (expr, 1);
1772 op0 = strip_offset_1 (op0, false, false, &off0);
1773 op1 = strip_offset_1 (op1, false, false, &off1);
1775 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1776 if (op0 == TREE_OPERAND (expr, 0)
1777 && op1 == TREE_OPERAND (expr, 1))
1780 if (integer_zerop (op1))
1782 else if (integer_zerop (op0))
1784 if (code == PLUS_EXPR)
1787 expr = fold_build1 (NEGATE_EXPR, type, op1);
1790 expr = fold_build2 (code, type, op0, op1);
1792 return fold_convert (orig_type, expr);
1798 step = array_ref_element_size (expr);
1799 if (!cst_and_fits_in_hwi (step))
1802 st = int_cst_value (step);
1803 op1 = TREE_OPERAND (expr, 1);
1804 op1 = strip_offset_1 (op1, false, false, &off1);
1805 *offset = off1 * st;
1808 && integer_zerop (op1))
1810 /* Strip the component reference completely. */
1811 op0 = TREE_OPERAND (expr, 0);
1812 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1822 tmp = component_ref_field_offset (expr);
1824 && cst_and_fits_in_hwi (tmp))
1826 /* Strip the component reference completely. */
1827 op0 = TREE_OPERAND (expr, 0);
1828 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1829 *offset = off0 + int_cst_value (tmp);
1835 op0 = TREE_OPERAND (expr, 0);
1836 op0 = strip_offset_1 (op0, true, true, &off0);
1839 if (op0 == TREE_OPERAND (expr, 0))
1842 expr = build_fold_addr_expr (op0);
1843 return fold_convert (orig_type, expr);
1846 inside_addr = false;
1853 /* Default handling of expressions for that we want to recurse into
1854 the first operand. */
1855 op0 = TREE_OPERAND (expr, 0);
1856 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1859 if (op0 == TREE_OPERAND (expr, 0)
1860 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1863 expr = copy_node (expr);
1864 TREE_OPERAND (expr, 0) = op0;
1866 TREE_OPERAND (expr, 1) = op1;
1868 /* Inside address, we might strip the top level component references,
1869 thus changing type of the expression. Handling of ADDR_EXPR
1871 expr = fold_convert (orig_type, expr);
1876 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1879 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1881 return strip_offset_1 (expr, false, false, offset);
1884 /* Returns variant of TYPE that can be used as base for different uses.
1885 We return unsigned type with the same precision, which avoids problems
1889 generic_type_for (tree type)
1891 if (POINTER_TYPE_P (type))
1892 return unsigned_type_for (type);
1894 if (TYPE_UNSIGNED (type))
1897 return unsigned_type_for (type);
1900 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1901 the bitmap to that we should store it. */
1903 static struct ivopts_data *fd_ivopts_data;
1905 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1907 bitmap *depends_on = data;
1908 struct version_info *info;
1910 if (TREE_CODE (*expr_p) != SSA_NAME)
1912 info = name_info (fd_ivopts_data, *expr_p);
1914 if (!info->inv_id || info->has_nonlin_use)
1918 *depends_on = BITMAP_ALLOC (NULL);
1919 bitmap_set_bit (*depends_on, info->inv_id);
1924 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1925 position to POS. If USE is not NULL, the candidate is set as related to
1926 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1927 replacement of the final value of the iv by a direct computation. */
1929 static struct iv_cand *
1930 add_candidate_1 (struct ivopts_data *data,
1931 tree base, tree step, bool important, enum iv_position pos,
1932 struct iv_use *use, tree incremented_at)
1935 struct iv_cand *cand = NULL;
1936 tree type, orig_type;
1940 orig_type = TREE_TYPE (base);
1941 type = generic_type_for (orig_type);
1942 if (type != orig_type)
1944 base = fold_convert (type, base);
1945 step = fold_convert (type, step);
1949 for (i = 0; i < n_iv_cands (data); i++)
1951 cand = iv_cand (data, i);
1953 if (cand->pos != pos)
1956 if (cand->incremented_at != incremented_at)
1970 if (operand_equal_p (base, cand->iv->base, 0)
1971 && operand_equal_p (step, cand->iv->step, 0))
1975 if (i == n_iv_cands (data))
1977 cand = XCNEW (struct iv_cand);
1983 cand->iv = alloc_iv (base, step);
1986 if (pos != IP_ORIGINAL && cand->iv)
1988 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1989 cand->var_after = cand->var_before;
1991 cand->important = important;
1992 cand->incremented_at = incremented_at;
1993 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1996 && TREE_CODE (step) != INTEGER_CST)
1998 fd_ivopts_data = data;
1999 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2002 if (dump_file && (dump_flags & TDF_DETAILS))
2003 dump_cand (dump_file, cand);
2006 if (important && !cand->important)
2008 cand->important = true;
2009 if (dump_file && (dump_flags & TDF_DETAILS))
2010 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2015 bitmap_set_bit (use->related_cands, i);
2016 if (dump_file && (dump_flags & TDF_DETAILS))
2017 fprintf (dump_file, "Candidate %d is related to use %d\n",
2024 /* Returns true if incrementing the induction variable at the end of the LOOP
2027 The purpose is to avoid splitting latch edge with a biv increment, thus
2028 creating a jump, possibly confusing other optimization passes and leaving
2029 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2030 is not available (so we do not have a better alternative), or if the latch
2031 edge is already nonempty. */
2034 allow_ip_end_pos_p (struct loop *loop)
2036 if (!ip_normal_pos (loop))
2039 if (!empty_block_p (ip_end_pos (loop)))
2045 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2046 position to POS. If USE is not NULL, the candidate is set as related to
2047 it. The candidate computation is scheduled on all available positions. */
2050 add_candidate (struct ivopts_data *data,
2051 tree base, tree step, bool important, struct iv_use *use)
2053 if (ip_normal_pos (data->current_loop))
2054 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2055 if (ip_end_pos (data->current_loop)
2056 && allow_ip_end_pos_p (data->current_loop))
2057 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2060 /* Add a standard "0 + 1 * iteration" iv candidate for a
2061 type with SIZE bits. */
2064 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2067 tree type = lang_hooks.types.type_for_size (size, true);
2068 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2072 /* Adds standard iv candidates. */
2075 add_standard_iv_candidates (struct ivopts_data *data)
2077 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2079 /* The same for a double-integer type if it is still fast enough. */
2080 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2081 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2085 /* Adds candidates bases on the old induction variable IV. */
2088 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2091 struct iv_cand *cand;
2093 add_candidate (data, iv->base, iv->step, true, NULL);
2095 /* The same, but with initial value zero. */
2096 add_candidate (data,
2097 build_int_cst (TREE_TYPE (iv->base), 0),
2098 iv->step, true, NULL);
2100 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2101 if (TREE_CODE (phi) == PHI_NODE)
2103 /* Additionally record the possibility of leaving the original iv
2105 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2106 cand = add_candidate_1 (data,
2107 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2108 SSA_NAME_DEF_STMT (def));
2109 cand->var_before = iv->ssa_name;
2110 cand->var_after = def;
2114 /* Adds candidates based on the old induction variables. */
2117 add_old_ivs_candidates (struct ivopts_data *data)
2123 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2125 iv = ver_info (data, i)->iv;
2126 if (iv && iv->biv_p && !integer_zerop (iv->step))
2127 add_old_iv_candidates (data, iv);
2131 /* Adds candidates based on the value of the induction variable IV and USE. */
2134 add_iv_value_candidates (struct ivopts_data *data,
2135 struct iv *iv, struct iv_use *use)
2137 unsigned HOST_WIDE_INT offset;
2140 add_candidate (data, iv->base, iv->step, false, use);
2142 /* The same, but with initial value zero. Make such variable important,
2143 since it is generic enough so that possibly many uses may be based
2145 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2146 iv->step, true, use);
2148 /* Third, try removing the constant offset. */
2149 base = strip_offset (iv->base, &offset);
2151 add_candidate (data, base, iv->step, false, use);
2154 /* Adds candidates based on the uses. */
2157 add_derived_ivs_candidates (struct ivopts_data *data)
2161 for (i = 0; i < n_iv_uses (data); i++)
2163 struct iv_use *use = iv_use (data, i);
2170 case USE_NONLINEAR_EXPR:
2173 /* Just add the ivs based on the value of the iv used here. */
2174 add_iv_value_candidates (data, use->iv, use);
2183 /* Record important candidates and add them to related_cands bitmaps
2187 record_important_candidates (struct ivopts_data *data)
2192 for (i = 0; i < n_iv_cands (data); i++)
2194 struct iv_cand *cand = iv_cand (data, i);
2196 if (cand->important)
2197 bitmap_set_bit (data->important_candidates, i);
2200 data->consider_all_candidates = (n_iv_cands (data)
2201 <= CONSIDER_ALL_CANDIDATES_BOUND);
2203 if (data->consider_all_candidates)
2205 /* We will not need "related_cands" bitmaps in this case,
2206 so release them to decrease peak memory consumption. */
2207 for (i = 0; i < n_iv_uses (data); i++)
2209 use = iv_use (data, i);
2210 BITMAP_FREE (use->related_cands);
2215 /* Add important candidates to the related_cands bitmaps. */
2216 for (i = 0; i < n_iv_uses (data); i++)
2217 bitmap_ior_into (iv_use (data, i)->related_cands,
2218 data->important_candidates);
2222 /* Finds the candidates for the induction variables. */
2225 find_iv_candidates (struct ivopts_data *data)
2227 /* Add commonly used ivs. */
2228 add_standard_iv_candidates (data);
2230 /* Add old induction variables. */
2231 add_old_ivs_candidates (data);
2233 /* Add induction variables derived from uses. */
2234 add_derived_ivs_candidates (data);
2236 /* Record the important candidates. */
2237 record_important_candidates (data);
2240 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2241 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2242 we allocate a simple list to every use. */
2245 alloc_use_cost_map (struct ivopts_data *data)
2247 unsigned i, size, s, j;
2249 for (i = 0; i < n_iv_uses (data); i++)
2251 struct iv_use *use = iv_use (data, i);
2254 if (data->consider_all_candidates)
2255 size = n_iv_cands (data);
2259 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2264 /* Round up to the power of two, so that moduling by it is fast. */
2265 for (size = 1; size < s; size <<= 1)
2269 use->n_map_members = size;
2270 use->cost_map = XCNEWVEC (struct cost_pair, size);
2274 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2275 on invariants DEPENDS_ON and that the value used in expressing it
2279 set_use_iv_cost (struct ivopts_data *data,
2280 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2281 bitmap depends_on, tree value)
2287 BITMAP_FREE (depends_on);
2291 if (data->consider_all_candidates)
2293 use->cost_map[cand->id].cand = cand;
2294 use->cost_map[cand->id].cost = cost;
2295 use->cost_map[cand->id].depends_on = depends_on;
2296 use->cost_map[cand->id].value = value;
2300 /* n_map_members is a power of two, so this computes modulo. */
2301 s = cand->id & (use->n_map_members - 1);
2302 for (i = s; i < use->n_map_members; i++)
2303 if (!use->cost_map[i].cand)
2305 for (i = 0; i < s; i++)
2306 if (!use->cost_map[i].cand)
2312 use->cost_map[i].cand = cand;
2313 use->cost_map[i].cost = cost;
2314 use->cost_map[i].depends_on = depends_on;
2315 use->cost_map[i].value = value;
2318 /* Gets cost of (USE, CANDIDATE) pair. */
2320 static struct cost_pair *
2321 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2322 struct iv_cand *cand)
2325 struct cost_pair *ret;
2330 if (data->consider_all_candidates)
2332 ret = use->cost_map + cand->id;
2339 /* n_map_members is a power of two, so this computes modulo. */
2340 s = cand->id & (use->n_map_members - 1);
2341 for (i = s; i < use->n_map_members; i++)
2342 if (use->cost_map[i].cand == cand)
2343 return use->cost_map + i;
2345 for (i = 0; i < s; i++)
2346 if (use->cost_map[i].cand == cand)
2347 return use->cost_map + i;
2352 /* Returns estimate on cost of computing SEQ. */
2360 for (; seq; seq = NEXT_INSN (seq))
2362 set = single_set (seq);
2364 cost += rtx_cost (set, SET);
2372 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2374 produce_memory_decl_rtl (tree obj, int *regno)
2379 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2381 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2382 x = gen_rtx_SYMBOL_REF (Pmode, name);
2385 x = gen_raw_REG (Pmode, (*regno)++);
2387 return gen_rtx_MEM (DECL_MODE (obj), x);
2390 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2391 walk_tree. DATA contains the actual fake register number. */
2394 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2396 tree obj = NULL_TREE;
2400 switch (TREE_CODE (*expr_p))
2403 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2404 handled_component_p (*expr_p);
2405 expr_p = &TREE_OPERAND (*expr_p, 0))
2408 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2409 x = produce_memory_decl_rtl (obj, regno);
2414 obj = SSA_NAME_VAR (*expr_p);
2415 if (!DECL_RTL_SET_P (obj))
2416 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2425 if (DECL_RTL_SET_P (obj))
2428 if (DECL_MODE (obj) == BLKmode)
2429 x = produce_memory_decl_rtl (obj, regno);
2431 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2441 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2442 SET_DECL_RTL (obj, x);
2448 /* Determines cost of the computation of EXPR. */
2451 computation_cost (tree expr)
2454 tree type = TREE_TYPE (expr);
2456 /* Avoid using hard regs in ways which may be unsupported. */
2457 int regno = LAST_VIRTUAL_REGISTER + 1;
2459 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2461 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2465 cost = seq_cost (seq);
2467 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2472 /* Returns variable containing the value of candidate CAND at statement AT. */
2475 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2477 if (stmt_after_increment (loop, cand, stmt))
2478 return cand->var_after;
2480 return cand->var_before;
2483 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2484 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2487 tree_int_cst_sign_bit (tree t)
2489 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2490 unsigned HOST_WIDE_INT w;
2492 if (bitno < HOST_BITS_PER_WIDE_INT)
2493 w = TREE_INT_CST_LOW (t);
2496 w = TREE_INT_CST_HIGH (t);
2497 bitno -= HOST_BITS_PER_WIDE_INT;
2500 return (w >> bitno) & 1;
2503 /* If we can prove that TOP = cst * BOT for some constant cst,
2504 store cst to MUL and return true. Otherwise return false.
2505 The returned value is always sign-extended, regardless of the
2506 signedness of TOP and BOT. */
2509 constant_multiple_of (tree top, tree bot, double_int *mul)
2512 enum tree_code code;
2513 double_int res, p0, p1;
2514 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2519 if (operand_equal_p (top, bot, 0))
2521 *mul = double_int_one;
2525 code = TREE_CODE (top);
2529 mby = TREE_OPERAND (top, 1);
2530 if (TREE_CODE (mby) != INTEGER_CST)
2533 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2536 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2542 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2543 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2546 if (code == MINUS_EXPR)
2547 p1 = double_int_neg (p1);
2548 *mul = double_int_sext (double_int_add (p0, p1), precision);
2552 if (TREE_CODE (bot) != INTEGER_CST)
2555 p0 = double_int_sext (tree_to_double_int (top), precision);
2556 p1 = double_int_sext (tree_to_double_int (bot), precision);
2557 if (double_int_zero_p (p1))
2559 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2561 return double_int_zero_p (res);
2568 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2569 same precision that is at least as wide as the precision of TYPE, stores
2570 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2574 determine_common_wider_type (tree *a, tree *b)
2576 tree wider_type = NULL;
2578 tree atype = TREE_TYPE (*a);
2580 if ((TREE_CODE (*a) == NOP_EXPR
2581 || TREE_CODE (*a) == CONVERT_EXPR))
2583 suba = TREE_OPERAND (*a, 0);
2584 wider_type = TREE_TYPE (suba);
2585 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2591 if ((TREE_CODE (*b) == NOP_EXPR
2592 || TREE_CODE (*b) == CONVERT_EXPR))
2594 subb = TREE_OPERAND (*b, 0);
2595 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2606 /* Determines the expression by that USE is expressed from induction variable
2607 CAND at statement AT in LOOP. The expression is stored in a decomposed
2608 form into AFF. Returns false if USE cannot be expressed using CAND. */
2611 get_computation_aff (struct loop *loop,
2612 struct iv_use *use, struct iv_cand *cand, tree at,
2613 struct affine_tree_combination *aff)
2615 tree ubase = use->iv->base;
2616 tree ustep = use->iv->step;
2617 tree cbase = cand->iv->base;
2618 tree cstep = cand->iv->step, cstep_common;
2619 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2620 tree common_type, var;
2622 aff_tree cbase_aff, var_aff;
2625 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2627 /* We do not have a precision to express the values of use. */
2631 var = var_at_stmt (loop, cand, at);
2632 uutype = unsigned_type_for (utype);
2634 /* If the conversion is not noop, perform it. */
2635 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2637 cstep = fold_convert (uutype, cstep);
2638 cbase = fold_convert (uutype, cbase);
2639 var = fold_convert (uutype, var);
2642 if (!constant_multiple_of (ustep, cstep, &rat))
2645 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2646 type, we achieve better folding by computing their difference in this
2647 wider type, and cast the result to UUTYPE. We do not need to worry about
2648 overflows, as all the arithmetics will in the end be performed in UUTYPE
2650 common_type = determine_common_wider_type (&ubase, &cbase);
2652 /* use = ubase - ratio * cbase + ratio * var. */
2653 tree_to_aff_combination (ubase, common_type, aff);
2654 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2655 tree_to_aff_combination (var, uutype, &var_aff);
2657 /* We need to shift the value if we are after the increment. */
2658 if (stmt_after_increment (loop, cand, at))
2662 if (common_type != uutype)
2663 cstep_common = fold_convert (common_type, cstep);
2665 cstep_common = cstep;
2667 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2668 aff_combination_add (&cbase_aff, &cstep_aff);
2671 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2672 aff_combination_add (aff, &cbase_aff);
2673 if (common_type != uutype)
2674 aff_combination_convert (aff, uutype);
2676 aff_combination_scale (&var_aff, rat);
2677 aff_combination_add (aff, &var_aff);
2682 /* Determines the expression by that USE is expressed from induction variable
2683 CAND at statement AT in LOOP. The computation is unshared. */
2686 get_computation_at (struct loop *loop,
2687 struct iv_use *use, struct iv_cand *cand, tree at)
2690 tree type = TREE_TYPE (use->iv->base);
2692 if (!get_computation_aff (loop, use, cand, at, &aff))
2694 unshare_aff_combination (&aff);
2695 return fold_convert (type, aff_combination_to_tree (&aff));
2698 /* Determines the expression by that USE is expressed from induction variable
2699 CAND in LOOP. The computation is unshared. */
2702 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2704 return get_computation_at (loop, use, cand, use->stmt);
2707 /* Returns cost of addition in MODE. */
2710 add_cost (enum machine_mode mode)
2712 static unsigned costs[NUM_MACHINE_MODES];
2720 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2721 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2722 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2727 cost = seq_cost (seq);
2733 if (dump_file && (dump_flags & TDF_DETAILS))
2734 fprintf (dump_file, "Addition in %s costs %d\n",
2735 GET_MODE_NAME (mode), cost);
2739 /* Entry in a hashtable of already known costs for multiplication. */
2742 HOST_WIDE_INT cst; /* The constant to multiply by. */
2743 enum machine_mode mode; /* In mode. */
2744 unsigned cost; /* The cost. */
2747 /* Counts hash value for the ENTRY. */
2750 mbc_entry_hash (const void *entry)
2752 const struct mbc_entry *e = entry;
2754 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2757 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2760 mbc_entry_eq (const void *entry1, const void *entry2)
2762 const struct mbc_entry *e1 = entry1;
2763 const struct mbc_entry *e2 = entry2;
2765 return (e1->mode == e2->mode
2766 && e1->cst == e2->cst);
2769 /* Returns cost of multiplication by constant CST in MODE. */
2772 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2774 static htab_t costs;
2775 struct mbc_entry **cached, act;
2780 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2784 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2786 return (*cached)->cost;
2788 *cached = XNEW (struct mbc_entry);
2789 (*cached)->mode = mode;
2790 (*cached)->cst = cst;
2793 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2794 gen_int_mode (cst, mode), NULL_RTX, 0);
2798 cost = seq_cost (seq);
2800 if (dump_file && (dump_flags & TDF_DETAILS))
2801 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2802 (int) cst, GET_MODE_NAME (mode), cost);
2804 (*cached)->cost = cost;
2809 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2810 validity for a memory reference accessing memory of mode MODE. */
2813 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2815 #define MAX_RATIO 128
2816 static sbitmap valid_mult[MAX_MACHINE_MODE];
2818 if (!valid_mult[mode])
2820 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2824 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2825 sbitmap_zero (valid_mult[mode]);
2826 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2827 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2829 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2830 if (memory_address_p (mode, addr))
2831 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2834 if (dump_file && (dump_flags & TDF_DETAILS))
2836 fprintf (dump_file, " allowed multipliers:");
2837 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2838 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2839 fprintf (dump_file, " %d", (int) i);
2840 fprintf (dump_file, "\n");
2841 fprintf (dump_file, "\n");
2845 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2848 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2851 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2852 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2853 variable is omitted. Compute the cost for a memory reference that accesses
2854 a memory location of mode MEM_MODE.
2856 TODO -- there must be some better way. This all is quite crude. */
2859 get_address_cost (bool symbol_present, bool var_present,
2860 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2861 enum machine_mode mem_mode)
2863 static bool initialized[MAX_MACHINE_MODE];
2864 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2865 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2866 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2867 unsigned cost, acost;
2868 bool offset_p, ratio_p;
2869 HOST_WIDE_INT s_offset;
2870 unsigned HOST_WIDE_INT mask;
2873 if (!initialized[mem_mode])
2876 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2877 int old_cse_not_expected;
2878 unsigned sym_p, var_p, off_p, rat_p, add_c;
2879 rtx seq, addr, base;
2882 initialized[mem_mode] = true;
2884 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2886 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2887 for (i = start; i <= 1 << 20; i <<= 1)
2889 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2890 if (!memory_address_p (mem_mode, addr))
2893 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2894 off[mem_mode] = max_offset[mem_mode];
2896 for (i = start; i <= 1 << 20; i <<= 1)
2898 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2899 if (!memory_address_p (mem_mode, addr))
2902 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2904 if (dump_file && (dump_flags & TDF_DETAILS))
2906 fprintf (dump_file, "get_address_cost:\n");
2907 fprintf (dump_file, " min offset %s %d\n",
2908 GET_MODE_NAME (mem_mode),
2909 (int) min_offset[mem_mode]);
2910 fprintf (dump_file, " max offset %s %d\n",
2911 GET_MODE_NAME (mem_mode),
2912 (int) max_offset[mem_mode]);
2916 for (i = 2; i <= MAX_RATIO; i++)
2917 if (multiplier_allowed_in_address_p (i, mem_mode))
2923 /* Compute the cost of various addressing modes. */
2925 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2926 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2928 for (i = 0; i < 16; i++)
2931 var_p = (i >> 1) & 1;
2932 off_p = (i >> 2) & 1;
2933 rat_p = (i >> 3) & 1;
2937 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2938 gen_int_mode (rat[mem_mode], Pmode));
2941 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2945 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2947 base = gen_rtx_fmt_e (CONST, Pmode,
2948 gen_rtx_fmt_ee (PLUS, Pmode,
2950 gen_int_mode (off[mem_mode],
2954 base = gen_int_mode (off[mem_mode], Pmode);
2959 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2962 /* To avoid splitting addressing modes, pretend that no cse will
2964 old_cse_not_expected = cse_not_expected;
2965 cse_not_expected = true;
2966 addr = memory_address (mem_mode, addr);
2967 cse_not_expected = old_cse_not_expected;
2971 acost = seq_cost (seq);
2972 acost += address_cost (addr, mem_mode);
2976 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2979 /* On some targets, it is quite expensive to load symbol to a register,
2980 which makes addresses that contain symbols look much more expensive.
2981 However, the symbol will have to be loaded in any case before the
2982 loop (and quite likely we have it in register already), so it does not
2983 make much sense to penalize them too heavily. So make some final
2984 tweaks for the SYMBOL_PRESENT modes:
2986 If VAR_PRESENT is false, and the mode obtained by changing symbol to
2987 var is cheaper, use this mode with small penalty.
2988 If VAR_PRESENT is true, try whether the mode with
2989 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
2990 if this is the case, use it. */
2991 add_c = add_cost (Pmode);
2992 for (i = 0; i < 8; i++)
2995 off_p = (i >> 1) & 1;
2996 rat_p = (i >> 2) & 1;
2998 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3002 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3003 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3006 if (dump_file && (dump_flags & TDF_DETAILS))
3008 fprintf (dump_file, "Address costs:\n");
3010 for (i = 0; i < 16; i++)
3013 var_p = (i >> 1) & 1;
3014 off_p = (i >> 2) & 1;
3015 rat_p = (i >> 3) & 1;
3017 fprintf (dump_file, " ");
3019 fprintf (dump_file, "sym + ");
3021 fprintf (dump_file, "var + ");
3023 fprintf (dump_file, "cst + ");
3025 fprintf (dump_file, "rat * ");
3027 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3028 fprintf (dump_file, "index costs %d\n", acost);
3030 fprintf (dump_file, "\n");
3034 bits = GET_MODE_BITSIZE (Pmode);
3035 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3037 if ((offset >> (bits - 1) & 1))
3042 offset_p = (s_offset != 0
3043 && min_offset[mem_mode] <= s_offset
3044 && s_offset <= max_offset[mem_mode]);
3045 ratio_p = (ratio != 1
3046 && multiplier_allowed_in_address_p (ratio, mem_mode));
3048 if (ratio != 1 && !ratio_p)
3049 cost += multiply_by_cost (ratio, Pmode);
3051 if (s_offset && !offset_p && !symbol_present)
3052 cost += add_cost (Pmode);
3054 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3055 return cost + acost;
3058 /* Estimates cost of forcing expression EXPR into a variable. */
3061 force_expr_to_var_cost (tree expr)
3063 static bool costs_initialized = false;
3064 static unsigned integer_cost;
3065 static unsigned symbol_cost;
3066 static unsigned address_cost;
3068 unsigned cost0, cost1, cost;
3069 enum machine_mode mode;
3071 if (!costs_initialized)
3073 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3074 rtx x = gen_rtx_MEM (DECL_MODE (var),
3075 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3077 tree type = build_pointer_type (integer_type_node);
3079 integer_cost = computation_cost (build_int_cst (integer_type_node,
3082 SET_DECL_RTL (var, x);
3083 TREE_STATIC (var) = 1;
3084 addr = build1 (ADDR_EXPR, type, var);
3085 symbol_cost = computation_cost (addr) + 1;
3088 = computation_cost (build2 (PLUS_EXPR, type,
3090 build_int_cst (type, 2000))) + 1;
3091 if (dump_file && (dump_flags & TDF_DETAILS))
3093 fprintf (dump_file, "force_expr_to_var_cost:\n");
3094 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3095 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3096 fprintf (dump_file, " address %d\n", (int) address_cost);
3097 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3098 fprintf (dump_file, "\n");
3101 costs_initialized = true;
3106 if (SSA_VAR_P (expr))
3109 if (TREE_INVARIANT (expr))
3111 if (TREE_CODE (expr) == INTEGER_CST)
3112 return integer_cost;
3114 if (TREE_CODE (expr) == ADDR_EXPR)
3116 tree obj = TREE_OPERAND (expr, 0);
3118 if (TREE_CODE (obj) == VAR_DECL
3119 || TREE_CODE (obj) == PARM_DECL
3120 || TREE_CODE (obj) == RESULT_DECL)
3124 return address_cost;
3127 switch (TREE_CODE (expr))
3132 op0 = TREE_OPERAND (expr, 0);
3133 op1 = TREE_OPERAND (expr, 1);
3137 if (is_gimple_val (op0))
3140 cost0 = force_expr_to_var_cost (op0);
3142 if (is_gimple_val (op1))
3145 cost1 = force_expr_to_var_cost (op1);
3150 /* Just an arbitrary value, FIXME. */
3151 return target_spill_cost;
3154 mode = TYPE_MODE (TREE_TYPE (expr));
3155 switch (TREE_CODE (expr))
3159 cost = add_cost (mode);
3163 if (cst_and_fits_in_hwi (op0))
3164 cost = multiply_by_cost (int_cst_value (op0), mode);
3165 else if (cst_and_fits_in_hwi (op1))
3166 cost = multiply_by_cost (int_cst_value (op1), mode);
3168 return target_spill_cost;
3178 /* Bound the cost by target_spill_cost. The parts of complicated
3179 computations often are either loop invariant or at least can
3180 be shared between several iv uses, so letting this grow without
3181 limits would not give reasonable results. */
3182 return cost < target_spill_cost ? cost : target_spill_cost;
3185 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3186 invariants the computation depends on. */
3189 force_var_cost (struct ivopts_data *data,
3190 tree expr, bitmap *depends_on)
3194 fd_ivopts_data = data;
3195 walk_tree (&expr, find_depends, depends_on, NULL);
3198 return force_expr_to_var_cost (expr);
3201 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3202 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3203 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3204 invariants the computation depends on. */
3207 split_address_cost (struct ivopts_data *data,
3208 tree addr, bool *symbol_present, bool *var_present,
3209 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3212 HOST_WIDE_INT bitsize;
3213 HOST_WIDE_INT bitpos;
3215 enum machine_mode mode;
3216 int unsignedp, volatilep;
3218 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3219 &unsignedp, &volatilep, false);
3222 || bitpos % BITS_PER_UNIT != 0
3223 || TREE_CODE (core) != VAR_DECL)
3225 *symbol_present = false;
3226 *var_present = true;
3227 fd_ivopts_data = data;
3228 walk_tree (&addr, find_depends, depends_on, NULL);
3229 return target_spill_cost;
3232 *offset += bitpos / BITS_PER_UNIT;
3233 if (TREE_STATIC (core)
3234 || DECL_EXTERNAL (core))
3236 *symbol_present = true;
3237 *var_present = false;
3241 *symbol_present = false;
3242 *var_present = true;
3246 /* Estimates cost of expressing difference of addresses E1 - E2 as
3247 var + symbol + offset. The value of offset is added to OFFSET,
3248 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3249 part is missing. DEPENDS_ON is a set of the invariants the computation
3253 ptr_difference_cost (struct ivopts_data *data,
3254 tree e1, tree e2, bool *symbol_present, bool *var_present,
3255 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3257 HOST_WIDE_INT diff = 0;
3260 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3262 if (ptr_difference_const (e1, e2, &diff))
3265 *symbol_present = false;
3266 *var_present = false;
3270 if (e2 == integer_zero_node)
3271 return split_address_cost (data, TREE_OPERAND (e1, 0),
3272 symbol_present, var_present, offset, depends_on);
3274 *symbol_present = false;
3275 *var_present = true;
3277 cost = force_var_cost (data, e1, depends_on);
3278 cost += force_var_cost (data, e2, depends_on);
3279 cost += add_cost (Pmode);
3284 /* Estimates cost of expressing difference E1 - E2 as
3285 var + symbol + offset. The value of offset is added to OFFSET,
3286 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3287 part is missing. DEPENDS_ON is a set of the invariants the computation
3291 difference_cost (struct ivopts_data *data,
3292 tree e1, tree e2, bool *symbol_present, bool *var_present,
3293 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3296 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3297 unsigned HOST_WIDE_INT off1, off2;
3299 e1 = strip_offset (e1, &off1);
3300 e2 = strip_offset (e2, &off2);
3301 *offset += off1 - off2;
3306 if (TREE_CODE (e1) == ADDR_EXPR)
3307 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3309 *symbol_present = false;
3311 if (operand_equal_p (e1, e2, 0))
3313 *var_present = false;
3316 *var_present = true;
3317 if (integer_zerop (e2))
3318 return force_var_cost (data, e1, depends_on);
3320 if (integer_zerop (e1))
3322 cost = force_var_cost (data, e2, depends_on);
3323 cost += multiply_by_cost (-1, mode);
3328 cost = force_var_cost (data, e1, depends_on);
3329 cost += force_var_cost (data, e2, depends_on);
3330 cost += add_cost (mode);
3335 /* Determines the cost of the computation by that USE is expressed
3336 from induction variable CAND. If ADDRESS_P is true, we just need
3337 to create an address from it, otherwise we want to get it into
3338 register. A set of invariants we depend on is stored in
3339 DEPENDS_ON. AT is the statement at that the value is computed. */
3342 get_computation_cost_at (struct ivopts_data *data,
3343 struct iv_use *use, struct iv_cand *cand,
3344 bool address_p, bitmap *depends_on, tree at)
3346 tree ubase = use->iv->base, ustep = use->iv->step;
3348 tree utype = TREE_TYPE (ubase), ctype;
3349 unsigned HOST_WIDE_INT cstepi, offset = 0;
3350 HOST_WIDE_INT ratio, aratio;
3351 bool var_present, symbol_present;
3352 unsigned cost = 0, n_sums;
3357 /* Only consider real candidates. */
3361 cbase = cand->iv->base;
3362 cstep = cand->iv->step;
3363 ctype = TREE_TYPE (cbase);
3365 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3367 /* We do not have a precision to express the values of use. */
3373 /* Do not try to express address of an object with computation based
3374 on address of a different object. This may cause problems in rtl
3375 level alias analysis (that does not expect this to be happening,
3376 as this is illegal in C), and would be unlikely to be useful
3378 if (use->iv->base_object
3379 && cand->iv->base_object
3380 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3384 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3386 /* TODO -- add direct handling of this case. */
3390 /* CSTEPI is removed from the offset in case statement is after the
3391 increment. If the step is not constant, we use zero instead.
3392 This is a bit imprecise (there is the extra addition), but
3393 redundancy elimination is likely to transform the code so that
3394 it uses value of the variable before increment anyway,
3395 so it is not that much unrealistic. */
3396 if (cst_and_fits_in_hwi (cstep))
3397 cstepi = int_cst_value (cstep);
3401 if (!constant_multiple_of (ustep, cstep, &rat))
3404 if (double_int_fits_in_shwi_p (rat))
3405 ratio = double_int_to_shwi (rat);
3409 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3410 or ratio == 1, it is better to handle this like
3412 ubase - ratio * cbase + ratio * var
3414 (also holds in the case ratio == -1, TODO. */
3416 if (cst_and_fits_in_hwi (cbase))
3418 offset = - ratio * int_cst_value (cbase);
3419 cost += difference_cost (data,
3420 ubase, integer_zero_node,
3421 &symbol_present, &var_present, &offset,
3424 else if (ratio == 1)
3426 cost += difference_cost (data,
3428 &symbol_present, &var_present, &offset,
3433 cost += force_var_cost (data, cbase, depends_on);
3434 cost += add_cost (TYPE_MODE (ctype));
3435 cost += difference_cost (data,
3436 ubase, integer_zero_node,
3437 &symbol_present, &var_present, &offset,
3441 /* If we are after the increment, the value of the candidate is higher by
3443 if (stmt_after_increment (data->current_loop, cand, at))
3444 offset -= ratio * cstepi;
3446 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3447 (symbol/var/const parts may be omitted). If we are looking for an address,
3448 find the cost of addressing this. */
3450 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3451 TYPE_MODE (TREE_TYPE (*use->op_p)));
3453 /* Otherwise estimate the costs for computing the expression. */
3454 aratio = ratio > 0 ? ratio : -ratio;
3455 if (!symbol_present && !var_present && !offset)
3458 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3464 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3468 /* Symbol + offset should be compile-time computable. */
3469 && (symbol_present || offset))
3472 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3476 /* Just get the expression, expand it and measure the cost. */
3477 tree comp = get_computation_at (data->current_loop, use, cand, at);
3483 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3485 return computation_cost (comp);
3489 /* Determines the cost of the computation by that USE is expressed
3490 from induction variable CAND. If ADDRESS_P is true, we just need
3491 to create an address from it, otherwise we want to get it into
3492 register. A set of invariants we depend on is stored in
3496 get_computation_cost (struct ivopts_data *data,
3497 struct iv_use *use, struct iv_cand *cand,
3498 bool address_p, bitmap *depends_on)
3500 return get_computation_cost_at (data,
3501 use, cand, address_p, depends_on, use->stmt);
3504 /* Determines cost of basing replacement of USE on CAND in a generic
3508 determine_use_iv_cost_generic (struct ivopts_data *data,
3509 struct iv_use *use, struct iv_cand *cand)
3514 /* The simple case first -- if we need to express value of the preserved
3515 original biv, the cost is 0. This also prevents us from counting the
3516 cost of increment twice -- once at this use and once in the cost of
3518 if (cand->pos == IP_ORIGINAL
3519 && cand->incremented_at == use->stmt)
3521 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3525 cost = get_computation_cost (data, use, cand, false, &depends_on);
3526 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3528 return cost != INFTY;
3531 /* Determines cost of basing replacement of USE on CAND in an address. */
3534 determine_use_iv_cost_address (struct ivopts_data *data,
3535 struct iv_use *use, struct iv_cand *cand)
3538 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3540 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3542 return cost != INFTY;
3545 /* Computes value of candidate CAND at position AT in iteration NITER, and
3546 stores it to VAL. */
3549 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3552 aff_tree step, delta, nit;
3553 struct iv *iv = cand->iv;
3554 tree type = TREE_TYPE (iv->base);
3556 tree_to_aff_combination (iv->step, type, &step);
3557 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3558 aff_combination_convert (&nit, type);
3559 aff_combination_mult (&nit, &step, &delta);
3560 if (stmt_after_increment (loop, cand, at))
3561 aff_combination_add (&delta, &step);
3563 tree_to_aff_combination (iv->base, type, val);
3564 aff_combination_add (val, &delta);
3567 /* Returns period of induction variable iv. */
3570 iv_period (struct iv *iv)
3572 tree step = iv->step, period, type;
3575 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3577 /* Period of the iv is gcd (step, type range). Since type range is power
3578 of two, it suffices to determine the maximum power of two that divides
3580 pow2div = num_ending_zeros (step);
3581 type = unsigned_type_for (TREE_TYPE (step));
3583 period = build_low_bits_mask (type,
3584 (TYPE_PRECISION (type)
3585 - tree_low_cst (pow2div, 1)));
3590 /* Returns the comparison operator used when eliminating the iv USE. */
3592 static enum tree_code
3593 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3595 struct loop *loop = data->current_loop;
3599 ex_bb = bb_for_stmt (use->stmt);
3600 exit = EDGE_SUCC (ex_bb, 0);
3601 if (flow_bb_inside_loop_p (loop, exit->dest))
3602 exit = EDGE_SUCC (ex_bb, 1);
3604 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3607 /* Check whether it is possible to express the condition in USE by comparison
3608 of candidate CAND. If so, store the value compared with to BOUND. */
3611 may_eliminate_iv (struct ivopts_data *data,
3612 struct iv_use *use, struct iv_cand *cand, tree *bound)
3617 tree wider_type, period, per_type;
3618 struct loop *loop = data->current_loop;
3621 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3624 /* For now works only for exits that dominate the loop latch. TODO -- extend
3625 for other conditions inside loop body. */
3626 ex_bb = bb_for_stmt (use->stmt);
3627 if (use->stmt != last_stmt (ex_bb)
3628 || TREE_CODE (use->stmt) != COND_EXPR)
3630 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3633 exit = EDGE_SUCC (ex_bb, 0);
3634 if (flow_bb_inside_loop_p (loop, exit->dest))
3635 exit = EDGE_SUCC (ex_bb, 1);
3636 if (flow_bb_inside_loop_p (loop, exit->dest))
3639 nit = niter_for_exit (data, exit);
3643 nit_type = TREE_TYPE (nit);
3645 /* Determine whether we may use the variable to test whether niter iterations
3646 elapsed. This is the case iff the period of the induction variable is
3647 greater than the number of iterations. */
3648 period = iv_period (cand->iv);
3651 per_type = TREE_TYPE (period);
3653 wider_type = TREE_TYPE (period);
3654 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3655 wider_type = per_type;
3657 wider_type = nit_type;
3659 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3660 fold_convert (wider_type, period),
3661 fold_convert (wider_type, nit))))
3664 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3665 *bound = aff_combination_to_tree (&bnd);
3669 /* Determines cost of basing replacement of USE on CAND in a condition. */
3672 determine_use_iv_cost_condition (struct ivopts_data *data,
3673 struct iv_use *use, struct iv_cand *cand)
3675 tree bound = NULL_TREE, op, cond;
3676 bitmap depends_on = NULL;
3679 /* Only consider real candidates. */
3682 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3686 if (may_eliminate_iv (data, use, cand, &bound))
3688 cost = force_var_cost (data, bound, &depends_on);
3690 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3691 return cost != INFTY;
3694 /* The induction variable elimination failed; just express the original
3695 giv. If it is compared with an invariant, note that we cannot get
3697 cost = get_computation_cost (data, use, cand, false, &depends_on);
3700 if (TREE_CODE (cond) != SSA_NAME)
3702 op = TREE_OPERAND (cond, 0);
3703 if (TREE_CODE (op) == SSA_NAME
3704 && !integer_zerop (get_iv (data, op)->step))
3705 op = TREE_OPERAND (cond, 1);
3706 if (TREE_CODE (op) == SSA_NAME)
3708 op = get_iv (data, op)->base;
3709 fd_ivopts_data = data;
3710 walk_tree (&op, find_depends, &depends_on, NULL);
3714 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
3715 return cost != INFTY;
3718 /* Determines cost of basing replacement of USE on CAND. Returns false
3719 if USE cannot be based on CAND. */
3722 determine_use_iv_cost (struct ivopts_data *data,
3723 struct iv_use *use, struct iv_cand *cand)
3727 case USE_NONLINEAR_EXPR:
3728 return determine_use_iv_cost_generic (data, use, cand);
3731 return determine_use_iv_cost_address (data, use, cand);
3734 return determine_use_iv_cost_condition (data, use, cand);
3741 /* Determines costs of basing the use of the iv on an iv candidate. */
3744 determine_use_iv_costs (struct ivopts_data *data)
3748 struct iv_cand *cand;
3749 bitmap to_clear = BITMAP_ALLOC (NULL);
3751 alloc_use_cost_map (data);
3753 for (i = 0; i < n_iv_uses (data); i++)
3755 use = iv_use (data, i);
3757 if (data->consider_all_candidates)
3759 for (j = 0; j < n_iv_cands (data); j++)
3761 cand = iv_cand (data, j);
3762 determine_use_iv_cost (data, use, cand);
3769 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3771 cand = iv_cand (data, j);
3772 if (!determine_use_iv_cost (data, use, cand))
3773 bitmap_set_bit (to_clear, j);
3776 /* Remove the candidates for that the cost is infinite from
3777 the list of related candidates. */
3778 bitmap_and_compl_into (use->related_cands, to_clear);
3779 bitmap_clear (to_clear);
3783 BITMAP_FREE (to_clear);
3785 if (dump_file && (dump_flags & TDF_DETAILS))
3787 fprintf (dump_file, "Use-candidate costs:\n");
3789 for (i = 0; i < n_iv_uses (data); i++)
3791 use = iv_use (data, i);
3793 fprintf (dump_file, "Use %d:\n", i);
3794 fprintf (dump_file, " cand\tcost\tdepends on\n");
3795 for (j = 0; j < use->n_map_members; j++)
3797 if (!use->cost_map[j].cand
3798 || use->cost_map[j].cost == INFTY)
3801 fprintf (dump_file, " %d\t%d\t",
3802 use->cost_map[j].cand->id,
3803 use->cost_map[j].cost);
3804 if (use->cost_map[j].depends_on)
3805 bitmap_print (dump_file,
3806 use->cost_map[j].depends_on, "","");
3807 fprintf (dump_file, "\n");
3810 fprintf (dump_file, "\n");
3812 fprintf (dump_file, "\n");
3816 /* Determines cost of the candidate CAND. */
3819 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3821 unsigned cost_base, cost_step;
3830 /* There are two costs associated with the candidate -- its increment
3831 and its initialization. The second is almost negligible for any loop
3832 that rolls enough, so we take it just very little into account. */
3834 base = cand->iv->base;
3835 cost_base = force_var_cost (data, base, NULL);
3836 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3838 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3840 /* Prefer the original iv unless we may gain something by replacing it;
3841 this is not really relevant for artificial ivs created by other
3843 if (cand->pos == IP_ORIGINAL
3844 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3847 /* Prefer not to insert statements into latch unless there are some
3848 already (so that we do not create unnecessary jumps). */
3849 if (cand->pos == IP_END
3850 && empty_block_p (ip_end_pos (data->current_loop)))
3854 /* Determines costs of computation of the candidates. */
3857 determine_iv_costs (struct ivopts_data *data)
3861 if (dump_file && (dump_flags & TDF_DETAILS))
3863 fprintf (dump_file, "Candidate costs:\n");
3864 fprintf (dump_file, " cand\tcost\n");
3867 for (i = 0; i < n_iv_cands (data); i++)
3869 struct iv_cand *cand = iv_cand (data, i);
3871 determine_iv_cost (data, cand);
3873 if (dump_file && (dump_flags & TDF_DETAILS))
3874 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3877 if (dump_file && (dump_flags & TDF_DETAILS))
3878 fprintf (dump_file, "\n");
3881 /* Calculates cost for having SIZE induction variables. */
3884 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3886 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
3889 /* For each size of the induction variable set determine the penalty. */
3892 determine_set_costs (struct ivopts_data *data)
3896 struct loop *loop = data->current_loop;
3899 /* We use the following model (definitely improvable, especially the
3900 cost function -- TODO):
3902 We estimate the number of registers available (using MD data), name it A.
3904 We estimate the number of registers used by the loop, name it U. This
3905 number is obtained as the number of loop phi nodes (not counting virtual
3906 registers and bivs) + the number of variables from outside of the loop.
3908 We set a reserve R (free regs that are used for temporary computations,
3909 etc.). For now the reserve is a constant 3.
3911 Let I be the number of induction variables.
3913 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3914 make a lot of ivs without a reason).
3915 -- if A - R < U + I <= A, the cost is I * PRES_COST
3916 -- if U + I > A, the cost is I * PRES_COST and
3917 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3919 if (dump_file && (dump_flags & TDF_DETAILS))
3921 fprintf (dump_file, "Global costs:\n");
3922 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3923 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3924 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3925 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3929 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3931 op = PHI_RESULT (phi);
3933 if (!is_gimple_reg (op))
3936 if (get_iv (data, op))
3942 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3944 struct version_info *info = ver_info (data, j);
3946 if (info->inv_id && info->has_nonlin_use)
3950 data->regs_used = n;
3951 if (dump_file && (dump_flags & TDF_DETAILS))
3952 fprintf (dump_file, " regs_used %d\n", n);
3954 if (dump_file && (dump_flags & TDF_DETAILS))
3956 fprintf (dump_file, " cost for size:\n");
3957 fprintf (dump_file, " ivs\tcost\n");
3958 for (j = 0; j <= 2 * target_avail_regs; j++)
3959 fprintf (dump_file, " %d\t%d\n", j,
3960 ivopts_global_cost_for_size (data, j));
3961 fprintf (dump_file, "\n");
3965 /* Returns true if A is a cheaper cost pair than B. */
3968 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3976 if (a->cost < b->cost)
3979 if (a->cost > b->cost)
3982 /* In case the costs are the same, prefer the cheaper candidate. */
3983 if (a->cand->cost < b->cand->cost)
3989 /* Computes the cost field of IVS structure. */
3992 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3996 cost += ivs->cand_use_cost;
3997 cost += ivs->cand_cost;
3998 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4003 /* Remove invariants in set INVS to set IVS. */
4006 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4014 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4016 ivs->n_invariant_uses[iid]--;
4017 if (ivs->n_invariant_uses[iid] == 0)
4022 /* Set USE not to be expressed by any candidate in IVS. */
4025 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4028 unsigned uid = use->id, cid;
4029 struct cost_pair *cp;
4031 cp = ivs->cand_for_use[uid];
4037 ivs->cand_for_use[uid] = NULL;
4038 ivs->n_cand_uses[cid]--;
4040 if (ivs->n_cand_uses[cid] == 0)
4042 bitmap_clear_bit (ivs->cands, cid);
4043 /* Do not count the pseudocandidates. */
4047 ivs->cand_cost -= cp->cand->cost;
4049 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4052 ivs->cand_use_cost -= cp->cost;
4054 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4055 iv_ca_recount_cost (data, ivs);
4058 /* Add invariants in set INVS to set IVS. */
4061 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4069 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4071 ivs->n_invariant_uses[iid]++;
4072 if (ivs->n_invariant_uses[iid] == 1)
4077 /* Set cost pair for USE in set IVS to CP. */
4080 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4081 struct iv_use *use, struct cost_pair *cp)
4083 unsigned uid = use->id, cid;
4085 if (ivs->cand_for_use[uid] == cp)
4088 if (ivs->cand_for_use[uid])
4089 iv_ca_set_no_cp (data, ivs, use);
4096 ivs->cand_for_use[uid] = cp;
4097 ivs->n_cand_uses[cid]++;
4098 if (ivs->n_cand_uses[cid] == 1)
4100 bitmap_set_bit (ivs->cands, cid);
4101 /* Do not count the pseudocandidates. */
4105 ivs->cand_cost += cp->cand->cost;
4107 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4110 ivs->cand_use_cost += cp->cost;
4111 iv_ca_set_add_invariants (ivs, cp->depends_on);
4112 iv_ca_recount_cost (data, ivs);
4116 /* Extend set IVS by expressing USE by some of the candidates in it
4120 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4123 struct cost_pair *best_cp = NULL, *cp;
4127 gcc_assert (ivs->upto >= use->id);
4129 if (ivs->upto == use->id)
4135 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4137 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4139 if (cheaper_cost_pair (cp, best_cp))
4143 iv_ca_set_cp (data, ivs, use, best_cp);
4146 /* Get cost for assignment IVS. */
4149 iv_ca_cost (struct iv_ca *ivs)
4151 return (ivs->bad_uses ? INFTY : ivs->cost);
4154 /* Returns true if all dependences of CP are among invariants in IVS. */
4157 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4162 if (!cp->depends_on)
4165 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4167 if (ivs->n_invariant_uses[i] == 0)
4174 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4175 it before NEXT_CHANGE. */
4177 static struct iv_ca_delta *
4178 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4179 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4181 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4184 change->old_cp = old_cp;
4185 change->new_cp = new_cp;
4186 change->next_change = next_change;
4191 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4194 static struct iv_ca_delta *
4195 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4197 struct iv_ca_delta *last;
4205 for (last = l1; last->next_change; last = last->next_change)
4207 last->next_change = l2;
4212 /* Returns candidate by that USE is expressed in IVS. */
4214 static struct cost_pair *
4215 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4217 return ivs->cand_for_use[use->id];
4220 /* Reverse the list of changes DELTA, forming the inverse to it. */
4222 static struct iv_ca_delta *
4223 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4225 struct iv_ca_delta *act, *next, *prev = NULL;
4226 struct cost_pair *tmp;
4228 for (act = delta; act; act = next)
4230 next = act->next_change;
4231 act->next_change = prev;
4235 act->old_cp = act->new_cp;
4242 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4243 reverted instead. */
4246 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4247 struct iv_ca_delta *delta, bool forward)
4249 struct cost_pair *from, *to;
4250 struct iv_ca_delta *act;
4253 delta = iv_ca_delta_reverse (delta);
4255 for (act = delta; act; act = act->next_change)
4259 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4260 iv_ca_set_cp (data, ivs, act->use, to);
4264 iv_ca_delta_reverse (delta);
4267 /* Returns true if CAND is used in IVS. */
4270 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4272 return ivs->n_cand_uses[cand->id] > 0;
4275 /* Returns number of induction variable candidates in the set IVS. */
4278 iv_ca_n_cands (struct iv_ca *ivs)
4280 return ivs->n_cands;
4283 /* Free the list of changes DELTA. */
4286 iv_ca_delta_free (struct iv_ca_delta **delta)
4288 struct iv_ca_delta *act, *next;
4290 for (act = *delta; act; act = next)
4292 next = act->next_change;
4299 /* Allocates new iv candidates assignment. */
4301 static struct iv_ca *
4302 iv_ca_new (struct ivopts_data *data)
4304 struct iv_ca *nw = XNEW (struct iv_ca);
4308 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4309 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4310 nw->cands = BITMAP_ALLOC (NULL);
4313 nw->cand_use_cost = 0;
4315 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4321 /* Free memory occupied by the set IVS. */
4324 iv_ca_free (struct iv_ca **ivs)
4326 free ((*ivs)->cand_for_use);
4327 free ((*ivs)->n_cand_uses);
4328 BITMAP_FREE ((*ivs)->cands);
4329 free ((*ivs)->n_invariant_uses);
4334 /* Dumps IVS to FILE. */
4337 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4339 const char *pref = " invariants ";
4342 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4343 bitmap_print (file, ivs->cands, " candidates ","\n");
4345 for (i = 1; i <= data->max_inv_id; i++)
4346 if (ivs->n_invariant_uses[i])
4348 fprintf (file, "%s%d", pref, i);
4351 fprintf (file, "\n");
4354 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4355 new set, and store differences in DELTA. Number of induction variables
4356 in the new set is stored to N_IVS. */
4359 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4360 struct iv_cand *cand, struct iv_ca_delta **delta,
4365 struct cost_pair *old_cp, *new_cp;
4368 for (i = 0; i < ivs->upto; i++)
4370 use = iv_use (data, i);
4371 old_cp = iv_ca_cand_for_use (ivs, use);
4374 && old_cp->cand == cand)
4377 new_cp = get_use_iv_cost (data, use, cand);
4381 if (!iv_ca_has_deps (ivs, new_cp))
4384 if (!cheaper_cost_pair (new_cp, old_cp))
4387 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4390 iv_ca_delta_commit (data, ivs, *delta, true);
4391 cost = iv_ca_cost (ivs);
4393 *n_ivs = iv_ca_n_cands (ivs);
4394 iv_ca_delta_commit (data, ivs, *delta, false);
4399 /* Try narrowing set IVS by removing CAND. Return the cost of
4400 the new set and store the differences in DELTA. */
4403 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4404 struct iv_cand *cand, struct iv_ca_delta **delta)
4408 struct cost_pair *old_cp, *new_cp, *cp;
4410 struct iv_cand *cnd;
4414 for (i = 0; i < n_iv_uses (data); i++)
4416 use = iv_use (data, i);
4418 old_cp = iv_ca_cand_for_use (ivs, use);
4419 if (old_cp->cand != cand)
4424 if (data->consider_all_candidates)
4426 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4431 cnd = iv_cand (data, ci);
4433 cp = get_use_iv_cost (data, use, cnd);
4436 if (!iv_ca_has_deps (ivs, cp))
4439 if (!cheaper_cost_pair (cp, new_cp))
4447 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4452 cnd = iv_cand (data, ci);
4454 cp = get_use_iv_cost (data, use, cnd);
4457 if (!iv_ca_has_deps (ivs, cp))
4460 if (!cheaper_cost_pair (cp, new_cp))
4469 iv_ca_delta_free (delta);
4473 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4476 iv_ca_delta_commit (data, ivs, *delta, true);
4477 cost = iv_ca_cost (ivs);
4478 iv_ca_delta_commit (data, ivs, *delta, false);
4483 /* Try optimizing the set of candidates IVS by removing candidates different
4484 from to EXCEPT_CAND from it. Return cost of the new set, and store
4485 differences in DELTA. */
4488 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4489 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4492 struct iv_ca_delta *act_delta, *best_delta;
4493 unsigned i, best_cost, acost;
4494 struct iv_cand *cand;
4497 best_cost = iv_ca_cost (ivs);
4499 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4501 cand = iv_cand (data, i);
4503 if (cand == except_cand)
4506 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4508 if (acost < best_cost)
4511 iv_ca_delta_free (&best_delta);
4512 best_delta = act_delta;
4515 iv_ca_delta_free (&act_delta);
4524 /* Recurse to possibly remove other unnecessary ivs. */
4525 iv_ca_delta_commit (data, ivs, best_delta, true);
4526 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4527 iv_ca_delta_commit (data, ivs, best_delta, false);
4528 *delta = iv_ca_delta_join (best_delta, *delta);
4532 /* Tries to extend the sets IVS in the best possible way in order
4533 to express the USE. */
4536 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4539 unsigned best_cost, act_cost;
4542 struct iv_cand *cand;
4543 struct iv_ca_delta *best_delta = NULL, *act_delta;
4544 struct cost_pair *cp;
4546 iv_ca_add_use (data, ivs, use);
4547 best_cost = iv_ca_cost (ivs);
4549 cp = iv_ca_cand_for_use (ivs, use);
4552 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4553 iv_ca_set_no_cp (data, ivs, use);
4556 /* First try important candidates. Only if it fails, try the specific ones.
4557 Rationale -- in loops with many variables the best choice often is to use
4558 just one generic biv. If we added here many ivs specific to the uses,
4559 the optimization algorithm later would be likely to get stuck in a local
4560 minimum, thus causing us to create too many ivs. The approach from
4561 few ivs to more seems more likely to be successful -- starting from few
4562 ivs, replacing an expensive use by a specific iv should always be a
4564 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4566 cand = iv_cand (data, i);
4568 if (iv_ca_cand_used_p (ivs, cand))
4571 cp = get_use_iv_cost (data, use, cand);
4575 iv_ca_set_cp (data, ivs, use, cp);
4576 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4577 iv_ca_set_no_cp (data, ivs, use);
4578 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4580 if (act_cost < best_cost)
4582 best_cost = act_cost;
4584 iv_ca_delta_free (&best_delta);
4585 best_delta = act_delta;
4588 iv_ca_delta_free (&act_delta);
4591 if (best_cost == INFTY)
4593 for (i = 0; i < use->n_map_members; i++)
4595 cp = use->cost_map + i;
4600 /* Already tried this. */
4601 if (cand->important)
4604 if (iv_ca_cand_used_p (ivs, cand))
4608 iv_ca_set_cp (data, ivs, use, cp);
4609 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4610 iv_ca_set_no_cp (data, ivs, use);
4611 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4614 if (act_cost < best_cost)
4616 best_cost = act_cost;
4619 iv_ca_delta_free (&best_delta);
4620 best_delta = act_delta;
4623 iv_ca_delta_free (&act_delta);
4627 iv_ca_delta_commit (data, ivs, best_delta, true);
4628 iv_ca_delta_free (&best_delta);
4630 return (best_cost != INFTY);
4633 /* Finds an initial assignment of candidates to uses. */
4635 static struct iv_ca *
4636 get_initial_solution (struct ivopts_data *data)
4638 struct iv_ca *ivs = iv_ca_new (data);
4641 for (i = 0; i < n_iv_uses (data); i++)
4642 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4651 /* Tries to improve set of induction variables IVS. */
4654 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4656 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4657 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4658 struct iv_cand *cand;
4660 /* Try extending the set of induction variables by one. */
4661 for (i = 0; i < n_iv_cands (data); i++)
4663 cand = iv_cand (data, i);
4665 if (iv_ca_cand_used_p (ivs, cand))
4668 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4672 /* If we successfully added the candidate and the set is small enough,
4673 try optimizing it by removing other candidates. */
4674 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4676 iv_ca_delta_commit (data, ivs, act_delta, true);
4677 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4678 iv_ca_delta_commit (data, ivs, act_delta, false);
4679 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4682 if (acost < best_cost)
4685 iv_ca_delta_free (&best_delta);
4686 best_delta = act_delta;
4689 iv_ca_delta_free (&act_delta);
4694 /* Try removing the candidates from the set instead. */
4695 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4697 /* Nothing more we can do. */
4702 iv_ca_delta_commit (data, ivs, best_delta, true);
4703 gcc_assert (best_cost == iv_ca_cost (ivs));
4704 iv_ca_delta_free (&best_delta);
4708 /* Attempts to find the optimal set of induction variables. We do simple
4709 greedy heuristic -- we try to replace at most one candidate in the selected
4710 solution and remove the unused ivs while this improves the cost. */
4712 static struct iv_ca *
4713 find_optimal_iv_set (struct ivopts_data *data)
4719 /* Get the initial solution. */
4720 set = get_initial_solution (data);
4723 if (dump_file && (dump_flags & TDF_DETAILS))
4724 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4728 if (dump_file && (dump_flags & TDF_DETAILS))
4730 fprintf (dump_file, "Initial set of candidates:\n");
4731 iv_ca_dump (data, dump_file, set);
4734 while (try_improve_iv_set (data, set))
4736 if (dump_file && (dump_flags & TDF_DETAILS))
4738 fprintf (dump_file, "Improved to:\n");
4739 iv_ca_dump (data, dump_file, set);
4743 if (dump_file && (dump_flags & TDF_DETAILS))
4744 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4746 for (i = 0; i < n_iv_uses (data); i++)
4748 use = iv_use (data, i);
4749 use->selected = iv_ca_cand_for_use (set, use)->cand;
4755 /* Creates a new induction variable corresponding to CAND. */
4758 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4760 block_stmt_iterator incr_pos;
4770 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4774 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4779 /* Mark that the iv is preserved. */
4780 name_info (data, cand->var_before)->preserve_biv = true;
4781 name_info (data, cand->var_after)->preserve_biv = true;
4783 /* Rewrite the increment so that it uses var_before directly. */
4784 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4789 gimple_add_tmp_var (cand->var_before);
4790 add_referenced_var (cand->var_before);
4792 base = unshare_expr (cand->iv->base);
4794 create_iv (base, unshare_expr (cand->iv->step),
4795 cand->var_before, data->current_loop,
4796 &incr_pos, after, &cand->var_before, &cand->var_after);
4799 /* Creates new induction variables described in SET. */
4802 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4805 struct iv_cand *cand;
4808 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4810 cand = iv_cand (data, i);
4811 create_new_iv (data, cand);
4815 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4816 is true, remove also the ssa name defined by the statement. */
4819 remove_statement (tree stmt, bool including_defined_name)
4821 if (TREE_CODE (stmt) == PHI_NODE)
4823 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4827 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4829 bsi_remove (&bsi, true);
4833 /* Rewrites USE (definition of iv used in a nonlinear expression)
4834 using candidate CAND. */
4837 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4838 struct iv_use *use, struct iv_cand *cand)
4841 tree op, stmts, tgt, ass;
4842 block_stmt_iterator bsi, pbsi;
4844 /* An important special case -- if we are asked to express value of
4845 the original iv by itself, just exit; there is no need to
4846 introduce a new computation (that might also need casting the
4847 variable to unsigned and back). */
4848 if (cand->pos == IP_ORIGINAL
4849 && cand->incremented_at == use->stmt)
4851 tree step, ctype, utype;
4852 enum tree_code incr_code = PLUS_EXPR;
4854 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4855 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4857 step = cand->iv->step;
4858 ctype = TREE_TYPE (step);
4859 utype = TREE_TYPE (cand->var_after);
4860 if (TREE_CODE (step) == NEGATE_EXPR)
4862 incr_code = MINUS_EXPR;
4863 step = TREE_OPERAND (step, 0);
4866 /* Check whether we may leave the computation unchanged.
4867 This is the case only if it does not rely on other
4868 computations in the loop -- otherwise, the computation
4869 we rely upon may be removed in remove_unused_ivs,
4870 thus leading to ICE. */
4871 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4872 if (TREE_CODE (op) == PLUS_EXPR
4873 || TREE_CODE (op) == MINUS_EXPR)
4875 if (TREE_OPERAND (op, 0) == cand->var_before)
4876 op = TREE_OPERAND (op, 1);
4877 else if (TREE_CODE (op) == PLUS_EXPR
4878 && TREE_OPERAND (op, 1) == cand->var_before)
4879 op = TREE_OPERAND (op, 0);
4887 && (TREE_CODE (op) == INTEGER_CST
4888 || operand_equal_p (op, step, 0)))
4891 /* Otherwise, add the necessary computations to express
4893 op = fold_convert (ctype, cand->var_before);
4894 comp = fold_convert (utype,
4895 build2 (incr_code, ctype, op,
4896 unshare_expr (step)));
4900 comp = get_computation (data->current_loop, use, cand);
4901 gcc_assert (comp != NULL_TREE);
4904 switch (TREE_CODE (use->stmt))
4907 tgt = PHI_RESULT (use->stmt);
4909 /* If we should keep the biv, do not replace it. */
4910 if (name_info (data, tgt)->preserve_biv)
4913 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4914 while (!bsi_end_p (pbsi)
4915 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4922 case GIMPLE_MODIFY_STMT:
4923 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4924 bsi = bsi_for_stmt (use->stmt);
4931 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4933 if (TREE_CODE (use->stmt) == PHI_NODE)
4936 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4937 ass = build2_gimple (GIMPLE_MODIFY_STMT, tgt, op);
4938 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4939 remove_statement (use->stmt, false);
4940 SSA_NAME_DEF_STMT (tgt) = ass;
4945 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4946 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4950 /* Replaces ssa name in index IDX by its basic variable. Callback for
4954 idx_remove_ssa_names (tree base, tree *idx,
4955 void *data ATTRIBUTE_UNUSED)
4959 if (TREE_CODE (*idx) == SSA_NAME)
4960 *idx = SSA_NAME_VAR (*idx);
4962 if (TREE_CODE (base) == ARRAY_REF)
4964 op = &TREE_OPERAND (base, 2);
4966 && TREE_CODE (*op) == SSA_NAME)
4967 *op = SSA_NAME_VAR (*op);
4968 op = &TREE_OPERAND (base, 3);
4970 && TREE_CODE (*op) == SSA_NAME)
4971 *op = SSA_NAME_VAR (*op);
4977 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4980 unshare_and_remove_ssa_names (tree ref)
4982 ref = unshare_expr (ref);
4983 for_each_index (&ref, idx_remove_ssa_names, NULL);
4988 /* Extract the alias analysis info for the memory reference REF. There are
4989 several ways how this information may be stored and what precisely is
4990 its semantics depending on the type of the reference, but there always is
4991 somewhere hidden one _DECL node that is used to determine the set of
4992 virtual operands for the reference. The code below deciphers this jungle
4993 and extracts this single useful piece of information. */
4996 get_ref_tag (tree ref, tree orig)
4998 tree var = get_base_address (ref);
4999 tree aref = NULL_TREE, tag, sv;
5000 HOST_WIDE_INT offset, size, maxsize;
5002 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5004 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5009 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5010 return unshare_expr (sv);
5015 if (TREE_CODE (var) == INDIRECT_REF)
5017 /* If the base is a dereference of a pointer, first check its name memory
5018 tag. If it does not have one, use its symbol memory tag. */
5019 var = TREE_OPERAND (var, 0);
5020 if (TREE_CODE (var) != SSA_NAME)
5023 if (SSA_NAME_PTR_INFO (var))
5025 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5030 var = SSA_NAME_VAR (var);
5031 tag = symbol_mem_tag (var);
5032 gcc_assert (tag != NULL_TREE);
5040 tag = symbol_mem_tag (var);
5048 /* Copies the reference information from OLD_REF to NEW_REF. */
5051 copy_ref_info (tree new_ref, tree old_ref)
5053 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5054 copy_mem_ref_info (new_ref, old_ref);
5057 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5058 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5062 /* Rewrites USE (address that is an iv) using candidate CAND. */
5065 rewrite_use_address (struct ivopts_data *data,
5066 struct iv_use *use, struct iv_cand *cand)
5069 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5073 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5075 unshare_aff_combination (&aff);
5077 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5078 copy_ref_info (ref, *use->op_p);
5082 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5086 rewrite_use_compare (struct ivopts_data *data,
5087 struct iv_use *use, struct iv_cand *cand)
5090 tree *op_p, cond, op, stmts, bound;
5091 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5092 enum tree_code compare;
5093 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5098 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5099 tree var_type = TREE_TYPE (var);
5101 compare = iv_elimination_compare (data, use);
5102 bound = fold_convert (var_type, bound);
5103 op = force_gimple_operand (unshare_expr (bound), &stmts,
5107 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5109 *use->op_p = build2 (compare, boolean_type_node, var, op);
5110 update_stmt (use->stmt);
5114 /* The induction variable elimination failed; just express the original
5116 comp = get_computation (data->current_loop, use, cand);
5117 gcc_assert (comp != NULL_TREE);
5120 op_p = &TREE_OPERAND (cond, 0);
5121 if (TREE_CODE (*op_p) != SSA_NAME
5122 || integer_zerop (get_iv (data, *op_p)->step))
5123 op_p = &TREE_OPERAND (cond, 1);
5125 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5127 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5132 /* Rewrites USE using candidate CAND. */
5135 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5137 push_stmt_changes (&use->stmt);
5141 case USE_NONLINEAR_EXPR:
5142 rewrite_use_nonlinear_expr (data, use, cand);
5146 rewrite_use_address (data, use, cand);
5150 rewrite_use_compare (data, use, cand);
5157 pop_stmt_changes (&use->stmt);
5160 /* Rewrite the uses using the selected induction variables. */
5163 rewrite_uses (struct ivopts_data *data)
5166 struct iv_cand *cand;
5169 for (i = 0; i < n_iv_uses (data); i++)
5171 use = iv_use (data, i);
5172 cand = use->selected;
5175 rewrite_use (data, use, cand);
5179 /* Removes the ivs that are not used after rewriting. */
5182 remove_unused_ivs (struct ivopts_data *data)
5187 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5189 struct version_info *info;
5191 info = ver_info (data, j);
5193 && !integer_zerop (info->iv->step)
5195 && !info->iv->have_use_for
5196 && !info->preserve_biv)
5197 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5201 /* Frees data allocated by the optimization of a single loop. */
5204 free_loop_data (struct ivopts_data *data)
5210 htab_empty (data->niters);
5212 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5214 struct version_info *info;
5216 info = ver_info (data, i);
5220 info->has_nonlin_use = false;
5221 info->preserve_biv = false;
5224 bitmap_clear (data->relevant);
5225 bitmap_clear (data->important_candidates);
5227 for (i = 0; i < n_iv_uses (data); i++)
5229 struct iv_use *use = iv_use (data, i);
5232 BITMAP_FREE (use->related_cands);
5233 for (j = 0; j < use->n_map_members; j++)
5234 if (use->cost_map[j].depends_on)
5235 BITMAP_FREE (use->cost_map[j].depends_on);
5236 free (use->cost_map);
5239 VEC_truncate (iv_use_p, data->iv_uses, 0);
5241 for (i = 0; i < n_iv_cands (data); i++)
5243 struct iv_cand *cand = iv_cand (data, i);
5247 if (cand->depends_on)
5248 BITMAP_FREE (cand->depends_on);
5251 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5253 if (data->version_info_size < num_ssa_names)
5255 data->version_info_size = 2 * num_ssa_names;
5256 free (data->version_info);
5257 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5260 data->max_inv_id = 0;
5262 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5263 SET_DECL_RTL (obj, NULL_RTX);
5265 VEC_truncate (tree, decl_rtl_to_reset, 0);
5268 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5272 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5274 free_loop_data (data);
5275 free (data->version_info);
5276 BITMAP_FREE (data->relevant);
5277 BITMAP_FREE (data->important_candidates);
5278 htab_delete (data->niters);
5280 VEC_free (tree, heap, decl_rtl_to_reset);
5281 VEC_free (iv_use_p, heap, data->iv_uses);
5282 VEC_free (iv_cand_p, heap, data->iv_candidates);
5285 /* Optimizes the LOOP. Returns true if anything changed. */
5288 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5290 bool changed = false;
5291 struct iv_ca *iv_ca;
5294 data->current_loop = loop;
5296 if (dump_file && (dump_flags & TDF_DETAILS))
5298 fprintf (dump_file, "Processing loop %d\n", loop->num);
5300 exit = single_dom_exit (loop);
5303 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5304 exit->src->index, exit->dest->index);
5305 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5306 fprintf (dump_file, "\n");
5309 fprintf (dump_file, "\n");
5312 /* For each ssa name determines whether it behaves as an induction variable
5314 if (!find_induction_variables (data))
5317 /* Finds interesting uses (item 1). */
5318 find_interesting_uses (data);
5319 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5322 /* Finds candidates for the induction variables (item 2). */
5323 find_iv_candidates (data);
5325 /* Calculates the costs (item 3, part 1). */
5326 determine_use_iv_costs (data);
5327 determine_iv_costs (data);
5328 determine_set_costs (data);
5330 /* Find the optimal set of induction variables (item 3, part 2). */
5331 iv_ca = find_optimal_iv_set (data);
5336 /* Create the new induction variables (item 4, part 1). */
5337 create_new_ivs (data, iv_ca);
5338 iv_ca_free (&iv_ca);
5340 /* Rewrite the uses (item 4, part 2). */
5341 rewrite_uses (data);
5343 /* Remove the ivs that are unused after rewriting. */
5344 remove_unused_ivs (data);
5346 /* We have changed the structure of induction variables; it might happen
5347 that definitions in the scev database refer to some of them that were
5352 free_loop_data (data);
5357 /* Main entry point. Optimizes induction variables in loops. */
5360 tree_ssa_iv_optimize (void)
5363 struct ivopts_data data;
5366 tree_ssa_iv_optimize_init (&data);
5368 /* Optimize the loops starting with the innermost ones. */
5369 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5371 if (dump_file && (dump_flags & TDF_DETAILS))
5372 flow_loop_dump (loop, dump_file, NULL, 1);
5374 tree_ssa_iv_optimize_loop (&data, loop);
5377 tree_ssa_iv_optimize_finalize (&data);