1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
81 #include "tree-pass.h"
83 #include "insn-config.h"
85 #include "pointer-set.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
95 /* The infinite cost. */
96 #define INFTY 10000000
98 /* The expected number of loop iterations. TODO -- use profiling instead of
100 #define AVG_LOOP_NITER(LOOP) 5
103 /* Representation of the induction variable. */
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
123 unsigned inv_id; /* Id of an invariant. */
129 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
130 USE_ADDRESS, /* Use in an address. */
131 USE_COMPARE /* Use is a compare. */
134 /* Cost of a computation. */
137 int cost; /* The runtime cost. */
138 unsigned complexity; /* The estimate of the complexity of the code for
139 the computation (in no concrete units --
140 complexity field should be larger for more
141 complex expressions and addressing modes). */
144 static const comp_cost zero_cost = {0, 0};
145 static const comp_cost infinite_cost = {INFTY, INFTY};
147 /* The candidate - cost pair. */
150 struct iv_cand *cand; /* The candidate. */
151 comp_cost cost; /* The cost. */
152 bitmap depends_on; /* The list of invariants that have to be
154 tree value; /* For final value elimination, the expression for
155 the final value of the iv. For iv elimination,
156 the new bound to compare with. */
162 unsigned id; /* The id of the use. */
163 enum use_type type; /* Type of the use. */
164 struct iv *iv; /* The induction variable it is based on. */
165 gimple stmt; /* Statement in that it occurs. */
166 tree *op_p; /* The place where it occurs. */
167 bitmap related_cands; /* The set of "related" iv candidates, plus the common
170 unsigned n_map_members; /* Number of candidates in the cost_map list. */
171 struct cost_pair *cost_map;
172 /* The costs wrto the iv candidates. */
174 struct iv_cand *selected;
175 /* The selected candidate. */
178 /* The position where the iv is computed. */
181 IP_NORMAL, /* At the end, just before the exit condition. */
182 IP_END, /* At the end of the latch block. */
183 IP_BEFORE_USE, /* Immediately before a specific use. */
184 IP_AFTER_USE, /* Immediately after a specific use. */
185 IP_ORIGINAL /* The original biv. */
188 /* The induction variable candidate. */
191 unsigned id; /* The number of the candidate. */
192 bool important; /* Whether this is an "important" candidate, i.e. such
193 that it should be considered by all uses. */
194 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
195 gimple incremented_at;/* For original biv, the statement where it is
197 tree var_before; /* The variable used for it before increment. */
198 tree var_after; /* The variable used for it after increment. */
199 struct iv *iv; /* The value of the candidate. NULL for
200 "pseudocandidate" used to indicate the possibility
201 to replace the final value of an iv by direct
202 computation of the value. */
203 unsigned cost; /* Cost of the candidate. */
204 unsigned cost_step; /* Cost of the candidate's increment operation. */
205 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
206 where it is incremented. */
207 bitmap depends_on; /* The list of invariants that are used in step of the
211 /* The data used by the induction variable optimizations. */
213 typedef struct iv_use *iv_use_p;
215 DEF_VEC_ALLOC_P(iv_use_p,heap);
217 typedef struct iv_cand *iv_cand_p;
218 DEF_VEC_P(iv_cand_p);
219 DEF_VEC_ALLOC_P(iv_cand_p,heap);
223 /* The currently optimized loop. */
224 struct loop *current_loop;
226 /* Numbers of iterations for all exits of the current loop. */
227 struct pointer_map_t *niters;
229 /* Number of registers used in it. */
232 /* The size of version_info array allocated. */
233 unsigned version_info_size;
235 /* The array of information for the ssa names. */
236 struct version_info *version_info;
238 /* The bitmap of indices in version_info whose value was changed. */
241 /* The uses of induction variables. */
242 VEC(iv_use_p,heap) *iv_uses;
244 /* The candidates. */
245 VEC(iv_cand_p,heap) *iv_candidates;
247 /* A bitmap of important candidates. */
248 bitmap important_candidates;
250 /* The maximum invariant id. */
253 /* Whether to consider just related and important candidates when replacing a
255 bool consider_all_candidates;
257 /* Are we optimizing for speed? */
261 /* An assignment of iv candidates to uses. */
265 /* The number of uses covered by the assignment. */
268 /* Number of uses that cannot be expressed by the candidates in the set. */
271 /* Candidate assigned to a use, together with the related costs. */
272 struct cost_pair **cand_for_use;
274 /* Number of times each candidate is used. */
275 unsigned *n_cand_uses;
277 /* The candidates used. */
280 /* The number of candidates in the set. */
283 /* Total number of registers needed. */
286 /* Total cost of expressing uses. */
287 comp_cost cand_use_cost;
289 /* Total cost of candidates. */
292 /* Number of times each invariant is used. */
293 unsigned *n_invariant_uses;
295 /* Total cost of the assignment. */
299 /* Difference of two iv candidate assignments. */
306 /* An old assignment (for rollback purposes). */
307 struct cost_pair *old_cp;
309 /* A new assignment. */
310 struct cost_pair *new_cp;
312 /* Next change in the list. */
313 struct iv_ca_delta *next_change;
316 /* Bound on number of candidates below that all candidates are considered. */
318 #define CONSIDER_ALL_CANDIDATES_BOUND \
319 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
321 /* If there are more iv occurrences, we just give up (it is quite unlikely that
322 optimizing such a loop would help, and it would take ages). */
324 #define MAX_CONSIDERED_USES \
325 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
327 /* If there are at most this number of ivs in the set, try removing unnecessary
328 ivs from the set always. */
330 #define ALWAYS_PRUNE_CAND_SET_BOUND \
331 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
333 /* The list of trees for that the decl_rtl field must be reset is stored
336 static VEC(tree,heap) *decl_rtl_to_reset;
338 /* Number of uses recorded in DATA. */
340 static inline unsigned
341 n_iv_uses (struct ivopts_data *data)
343 return VEC_length (iv_use_p, data->iv_uses);
346 /* Ith use recorded in DATA. */
348 static inline struct iv_use *
349 iv_use (struct ivopts_data *data, unsigned i)
351 return VEC_index (iv_use_p, data->iv_uses, i);
354 /* Number of candidates recorded in DATA. */
356 static inline unsigned
357 n_iv_cands (struct ivopts_data *data)
359 return VEC_length (iv_cand_p, data->iv_candidates);
362 /* Ith candidate recorded in DATA. */
364 static inline struct iv_cand *
365 iv_cand (struct ivopts_data *data, unsigned i)
367 return VEC_index (iv_cand_p, data->iv_candidates, i);
370 /* The single loop exit if it dominates the latch, NULL otherwise. */
373 single_dom_exit (struct loop *loop)
375 edge exit = single_exit (loop);
380 if (!just_once_each_iteration_p (loop, exit->src))
386 /* Dumps information about the induction variable IV to FILE. */
388 extern void dump_iv (FILE *, struct iv *);
390 dump_iv (FILE *file, struct iv *iv)
394 fprintf (file, "ssa name ");
395 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
396 fprintf (file, "\n");
399 fprintf (file, " type ");
400 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
401 fprintf (file, "\n");
405 fprintf (file, " base ");
406 print_generic_expr (file, iv->base, TDF_SLIM);
407 fprintf (file, "\n");
409 fprintf (file, " step ");
410 print_generic_expr (file, iv->step, TDF_SLIM);
411 fprintf (file, "\n");
415 fprintf (file, " invariant ");
416 print_generic_expr (file, iv->base, TDF_SLIM);
417 fprintf (file, "\n");
422 fprintf (file, " base object ");
423 print_generic_expr (file, iv->base_object, TDF_SLIM);
424 fprintf (file, "\n");
428 fprintf (file, " is a biv\n");
431 /* Dumps information about the USE to FILE. */
433 extern void dump_use (FILE *, struct iv_use *);
435 dump_use (FILE *file, struct iv_use *use)
437 fprintf (file, "use %d\n", use->id);
441 case USE_NONLINEAR_EXPR:
442 fprintf (file, " generic\n");
446 fprintf (file, " address\n");
450 fprintf (file, " compare\n");
457 fprintf (file, " in statement ");
458 print_gimple_stmt (file, use->stmt, 0, 0);
459 fprintf (file, "\n");
461 fprintf (file, " at position ");
463 print_generic_expr (file, *use->op_p, TDF_SLIM);
464 fprintf (file, "\n");
466 dump_iv (file, use->iv);
468 if (use->related_cands)
470 fprintf (file, " related candidates ");
471 dump_bitmap (file, use->related_cands);
475 /* Dumps information about the uses to FILE. */
477 extern void dump_uses (FILE *, struct ivopts_data *);
479 dump_uses (FILE *file, struct ivopts_data *data)
484 for (i = 0; i < n_iv_uses (data); i++)
486 use = iv_use (data, i);
488 dump_use (file, use);
489 fprintf (file, "\n");
493 /* Dumps information about induction variable candidate CAND to FILE. */
495 extern void dump_cand (FILE *, struct iv_cand *);
497 dump_cand (FILE *file, struct iv_cand *cand)
499 struct iv *iv = cand->iv;
501 fprintf (file, "candidate %d%s\n",
502 cand->id, cand->important ? " (important)" : "");
504 if (cand->depends_on)
506 fprintf (file, " depends on ");
507 dump_bitmap (file, cand->depends_on);
512 fprintf (file, " final value replacement\n");
519 fprintf (file, " incremented before exit test\n");
523 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
527 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
531 fprintf (file, " incremented at end\n");
535 fprintf (file, " original biv\n");
542 /* Returns the info for ssa version VER. */
544 static inline struct version_info *
545 ver_info (struct ivopts_data *data, unsigned ver)
547 return data->version_info + ver;
550 /* Returns the info for ssa name NAME. */
552 static inline struct version_info *
553 name_info (struct ivopts_data *data, tree name)
555 return ver_info (data, SSA_NAME_VERSION (name));
558 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
562 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
564 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
568 if (sbb == loop->latch)
574 return stmt == last_stmt (bb);
577 /* Returns true if STMT if after the place where the original induction
578 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
579 if the positions are identical. */
582 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
584 basic_block cand_bb = gimple_bb (cand->incremented_at);
585 basic_block stmt_bb = gimple_bb (stmt);
587 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
590 if (stmt_bb != cand_bb)
594 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
596 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
599 /* Returns true if STMT if after the place where the induction variable
600 CAND is incremented in LOOP. */
603 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
611 return stmt_after_ip_normal_pos (loop, stmt);
615 return stmt_after_inc_pos (cand, stmt, false);
618 return stmt_after_inc_pos (cand, stmt, true);
625 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
628 abnormal_ssa_name_p (tree exp)
633 if (TREE_CODE (exp) != SSA_NAME)
636 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
639 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
640 abnormal phi node. Callback for for_each_index. */
643 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
644 void *data ATTRIBUTE_UNUSED)
646 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
648 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
650 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
654 return !abnormal_ssa_name_p (*index);
657 /* Returns true if EXPR contains a ssa name that occurs in an
658 abnormal phi node. */
661 contains_abnormal_ssa_name_p (tree expr)
664 enum tree_code_class codeclass;
669 code = TREE_CODE (expr);
670 codeclass = TREE_CODE_CLASS (code);
672 if (code == SSA_NAME)
673 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
675 if (code == INTEGER_CST
676 || is_gimple_min_invariant (expr))
679 if (code == ADDR_EXPR)
680 return !for_each_index (&TREE_OPERAND (expr, 0),
681 idx_contains_abnormal_ssa_name_p,
688 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
693 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
705 /* Returns tree describing number of iterations determined from
706 EXIT of DATA->current_loop, or NULL if something goes wrong. */
709 niter_for_exit (struct ivopts_data *data, edge exit)
711 struct tree_niter_desc desc;
717 data->niters = pointer_map_create ();
721 slot = pointer_map_contains (data->niters, exit);
725 /* Try to determine number of iterations. We must know it
726 unconditionally (i.e., without possibility of # of iterations
727 being zero). Also, we cannot safely work with ssa names that
728 appear in phi nodes on abnormal edges, so that we do not create
729 overlapping life ranges for them (PR 27283). */
730 if (number_of_iterations_exit (data->current_loop,
732 && integer_zerop (desc.may_be_zero)
733 && !contains_abnormal_ssa_name_p (desc.niter))
738 *pointer_map_insert (data->niters, exit) = niter;
741 niter = (tree) *slot;
746 /* Returns tree describing number of iterations determined from
747 single dominating exit of DATA->current_loop, or NULL if something
751 niter_for_single_dom_exit (struct ivopts_data *data)
753 edge exit = single_dom_exit (data->current_loop);
758 return niter_for_exit (data, exit);
761 /* Initializes data structures used by the iv optimization pass, stored
765 tree_ssa_iv_optimize_init (struct ivopts_data *data)
767 data->version_info_size = 2 * num_ssa_names;
768 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
769 data->relevant = BITMAP_ALLOC (NULL);
770 data->important_candidates = BITMAP_ALLOC (NULL);
771 data->max_inv_id = 0;
773 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
774 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
775 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
778 /* Returns a memory object to that EXPR points. In case we are able to
779 determine that it does not point to any such object, NULL is returned. */
782 determine_base_object (tree expr)
784 enum tree_code code = TREE_CODE (expr);
787 /* If this is a pointer casted to any type, we need to determine
788 the base object for the pointer; so handle conversions before
789 throwing away non-pointer expressions. */
790 if (CONVERT_EXPR_P (expr))
791 return determine_base_object (TREE_OPERAND (expr, 0));
793 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
802 obj = TREE_OPERAND (expr, 0);
803 base = get_base_address (obj);
808 if (TREE_CODE (base) == INDIRECT_REF)
809 return determine_base_object (TREE_OPERAND (base, 0));
811 return fold_convert (ptr_type_node,
812 build_fold_addr_expr (base));
814 case POINTER_PLUS_EXPR:
815 return determine_base_object (TREE_OPERAND (expr, 0));
819 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
823 return fold_convert (ptr_type_node, expr);
827 /* Allocates an induction variable with given initial value BASE and step STEP
831 alloc_iv (tree base, tree step)
833 struct iv *iv = XCNEW (struct iv);
834 gcc_assert (step != NULL_TREE);
837 iv->base_object = determine_base_object (base);
840 iv->have_use_for = false;
842 iv->ssa_name = NULL_TREE;
847 /* Sets STEP and BASE for induction variable IV. */
850 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
852 struct version_info *info = name_info (data, iv);
854 gcc_assert (!info->iv);
856 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
857 info->iv = alloc_iv (base, step);
858 info->iv->ssa_name = iv;
861 /* Finds induction variable declaration for VAR. */
864 get_iv (struct ivopts_data *data, tree var)
867 tree type = TREE_TYPE (var);
869 if (!POINTER_TYPE_P (type)
870 && !INTEGRAL_TYPE_P (type))
873 if (!name_info (data, var)->iv)
875 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
878 || !flow_bb_inside_loop_p (data->current_loop, bb))
879 set_iv (data, var, var, build_int_cst (type, 0));
882 return name_info (data, var)->iv;
885 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
886 not define a simple affine biv with nonzero step. */
889 determine_biv_step (gimple phi)
891 struct loop *loop = gimple_bb (phi)->loop_father;
892 tree name = PHI_RESULT (phi);
895 if (!is_gimple_reg (name))
898 if (!simple_iv (loop, loop, name, &iv, true))
901 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
904 /* Finds basic ivs. */
907 find_bivs (struct ivopts_data *data)
910 tree step, type, base;
912 struct loop *loop = data->current_loop;
913 gimple_stmt_iterator psi;
915 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
917 phi = gsi_stmt (psi);
919 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
922 step = determine_biv_step (phi);
926 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
927 base = expand_simple_operations (base);
928 if (contains_abnormal_ssa_name_p (base)
929 || contains_abnormal_ssa_name_p (step))
932 type = TREE_TYPE (PHI_RESULT (phi));
933 base = fold_convert (type, base);
936 if (POINTER_TYPE_P (type))
937 step = fold_convert (sizetype, step);
939 step = fold_convert (type, step);
942 set_iv (data, PHI_RESULT (phi), base, step);
949 /* Marks basic ivs. */
952 mark_bivs (struct ivopts_data *data)
956 struct iv *iv, *incr_iv;
957 struct loop *loop = data->current_loop;
959 gimple_stmt_iterator psi;
961 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
963 phi = gsi_stmt (psi);
965 iv = get_iv (data, PHI_RESULT (phi));
969 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
970 incr_iv = get_iv (data, var);
974 /* If the increment is in the subloop, ignore it. */
975 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
976 if (incr_bb->loop_father != data->current_loop
977 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
981 incr_iv->biv_p = true;
985 /* Checks whether STMT defines a linear induction variable and stores its
989 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
992 struct loop *loop = data->current_loop;
994 iv->base = NULL_TREE;
995 iv->step = NULL_TREE;
997 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1000 lhs = gimple_assign_lhs (stmt);
1001 if (TREE_CODE (lhs) != SSA_NAME)
1004 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1006 iv->base = expand_simple_operations (iv->base);
1008 if (contains_abnormal_ssa_name_p (iv->base)
1009 || contains_abnormal_ssa_name_p (iv->step))
1015 /* Finds general ivs in statement STMT. */
1018 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1022 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1025 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1028 /* Finds general ivs in basic block BB. */
1031 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1033 gimple_stmt_iterator bsi;
1035 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1036 find_givs_in_stmt (data, gsi_stmt (bsi));
1039 /* Finds general ivs. */
1042 find_givs (struct ivopts_data *data)
1044 struct loop *loop = data->current_loop;
1045 basic_block *body = get_loop_body_in_dom_order (loop);
1048 for (i = 0; i < loop->num_nodes; i++)
1049 find_givs_in_bb (data, body[i]);
1053 /* For each ssa name defined in LOOP determines whether it is an induction
1054 variable and if so, its initial value and step. */
1057 find_induction_variables (struct ivopts_data *data)
1062 if (!find_bivs (data))
1068 if (dump_file && (dump_flags & TDF_DETAILS))
1070 tree niter = niter_for_single_dom_exit (data);
1074 fprintf (dump_file, " number of iterations ");
1075 print_generic_expr (dump_file, niter, TDF_SLIM);
1076 fprintf (dump_file, "\n\n");
1079 fprintf (dump_file, "Induction variables:\n\n");
1081 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1083 if (ver_info (data, i)->iv)
1084 dump_iv (dump_file, ver_info (data, i)->iv);
1091 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1093 static struct iv_use *
1094 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1095 gimple stmt, enum use_type use_type)
1097 struct iv_use *use = XCNEW (struct iv_use);
1099 use->id = n_iv_uses (data);
1100 use->type = use_type;
1104 use->related_cands = BITMAP_ALLOC (NULL);
1106 /* To avoid showing ssa name in the dumps, if it was not reset by the
1108 iv->ssa_name = NULL_TREE;
1110 if (dump_file && (dump_flags & TDF_DETAILS))
1111 dump_use (dump_file, use);
1113 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1118 /* Checks whether OP is a loop-level invariant and if so, records it.
1119 NONLINEAR_USE is true if the invariant is used in a way we do not
1120 handle specially. */
1123 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1126 struct version_info *info;
1128 if (TREE_CODE (op) != SSA_NAME
1129 || !is_gimple_reg (op))
1132 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1134 && flow_bb_inside_loop_p (data->current_loop, bb))
1137 info = name_info (data, op);
1139 info->has_nonlin_use |= nonlinear_use;
1141 info->inv_id = ++data->max_inv_id;
1142 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1145 /* Checks whether the use OP is interesting and if so, records it. */
1147 static struct iv_use *
1148 find_interesting_uses_op (struct ivopts_data *data, tree op)
1155 if (TREE_CODE (op) != SSA_NAME)
1158 iv = get_iv (data, op);
1162 if (iv->have_use_for)
1164 use = iv_use (data, iv->use_id);
1166 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1170 if (integer_zerop (iv->step))
1172 record_invariant (data, op, true);
1175 iv->have_use_for = true;
1177 civ = XNEW (struct iv);
1180 stmt = SSA_NAME_DEF_STMT (op);
1181 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1182 || is_gimple_assign (stmt));
1184 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1185 iv->use_id = use->id;
1190 /* Given a condition in statement STMT, checks whether it is a compare
1191 of an induction variable and an invariant. If this is the case,
1192 CONTROL_VAR is set to location of the iv, BOUND to the location of
1193 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1194 induction variable descriptions, and true is returned. If this is not
1195 the case, CONTROL_VAR and BOUND are set to the arguments of the
1196 condition and false is returned. */
1199 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1200 tree **control_var, tree **bound,
1201 struct iv **iv_var, struct iv **iv_bound)
1203 /* The objects returned when COND has constant operands. */
1204 static struct iv const_iv;
1206 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1207 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1210 if (gimple_code (stmt) == GIMPLE_COND)
1212 op0 = gimple_cond_lhs_ptr (stmt);
1213 op1 = gimple_cond_rhs_ptr (stmt);
1217 op0 = gimple_assign_rhs1_ptr (stmt);
1218 op1 = gimple_assign_rhs2_ptr (stmt);
1221 zero = integer_zero_node;
1222 const_iv.step = integer_zero_node;
1224 if (TREE_CODE (*op0) == SSA_NAME)
1225 iv0 = get_iv (data, *op0);
1226 if (TREE_CODE (*op1) == SSA_NAME)
1227 iv1 = get_iv (data, *op1);
1229 /* Exactly one of the compared values must be an iv, and the other one must
1234 if (integer_zerop (iv0->step))
1236 /* Control variable may be on the other side. */
1237 tmp_op = op0; op0 = op1; op1 = tmp_op;
1238 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1240 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1244 *control_var = op0;;
1255 /* Checks whether the condition in STMT is interesting and if so,
1259 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1261 tree *var_p, *bound_p;
1262 struct iv *var_iv, *civ;
1264 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1266 find_interesting_uses_op (data, *var_p);
1267 find_interesting_uses_op (data, *bound_p);
1271 civ = XNEW (struct iv);
1273 record_use (data, NULL, civ, stmt, USE_COMPARE);
1276 /* Returns true if expression EXPR is obviously invariant in LOOP,
1277 i.e. if all its operands are defined outside of the LOOP. LOOP
1278 should not be the function body. */
1281 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1286 gcc_assert (loop_depth (loop) > 0);
1288 if (is_gimple_min_invariant (expr))
1291 if (TREE_CODE (expr) == SSA_NAME)
1293 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1295 && flow_bb_inside_loop_p (loop, def_bb))
1304 len = TREE_OPERAND_LENGTH (expr);
1305 for (i = 0; i < len; i++)
1306 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1312 /* Returns true if statement STMT is obviously invariant in LOOP,
1313 i.e. if all its operands on the RHS are defined outside of the LOOP.
1314 LOOP should not be the function body. */
1317 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1322 gcc_assert (loop_depth (loop) > 0);
1324 lhs = gimple_get_lhs (stmt);
1325 for (i = 0; i < gimple_num_ops (stmt); i++)
1327 tree op = gimple_op (stmt, i);
1328 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1335 /* Cumulates the steps of indices into DATA and replaces their values with the
1336 initial ones. Returns false when the value of the index cannot be determined.
1337 Callback for for_each_index. */
1339 struct ifs_ivopts_data
1341 struct ivopts_data *ivopts_data;
1347 idx_find_step (tree base, tree *idx, void *data)
1349 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1351 tree step, iv_base, iv_step, lbound, off;
1352 struct loop *loop = dta->ivopts_data->current_loop;
1354 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1355 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1358 /* If base is a component ref, require that the offset of the reference
1360 if (TREE_CODE (base) == COMPONENT_REF)
1362 off = component_ref_field_offset (base);
1363 return expr_invariant_in_loop_p (loop, off);
1366 /* If base is array, first check whether we will be able to move the
1367 reference out of the loop (in order to take its address in strength
1368 reduction). In order for this to work we need both lower bound
1369 and step to be loop invariants. */
1370 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1372 /* Moreover, for a range, the size needs to be invariant as well. */
1373 if (TREE_CODE (base) == ARRAY_RANGE_REF
1374 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1377 step = array_ref_element_size (base);
1378 lbound = array_ref_low_bound (base);
1380 if (!expr_invariant_in_loop_p (loop, step)
1381 || !expr_invariant_in_loop_p (loop, lbound))
1385 if (TREE_CODE (*idx) != SSA_NAME)
1388 iv = get_iv (dta->ivopts_data, *idx);
1392 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1393 *&x[0], which is not folded and does not trigger the
1394 ARRAY_REF path below. */
1397 if (integer_zerop (iv->step))
1400 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1402 step = array_ref_element_size (base);
1404 /* We only handle addresses whose step is an integer constant. */
1405 if (TREE_CODE (step) != INTEGER_CST)
1409 /* The step for pointer arithmetics already is 1 byte. */
1410 step = build_int_cst (sizetype, 1);
1414 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1415 sizetype, &iv_base, &iv_step, dta->stmt,
1418 /* The index might wrap. */
1422 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1423 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1428 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1429 object is passed to it in DATA. */
1432 idx_record_use (tree base, tree *idx,
1435 struct ivopts_data *data = (struct ivopts_data *) vdata;
1436 find_interesting_uses_op (data, *idx);
1437 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1439 find_interesting_uses_op (data, array_ref_element_size (base));
1440 find_interesting_uses_op (data, array_ref_low_bound (base));
1445 /* If we can prove that TOP = cst * BOT for some constant cst,
1446 store cst to MUL and return true. Otherwise return false.
1447 The returned value is always sign-extended, regardless of the
1448 signedness of TOP and BOT. */
1451 constant_multiple_of (tree top, tree bot, double_int *mul)
1454 enum tree_code code;
1455 double_int res, p0, p1;
1456 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1461 if (operand_equal_p (top, bot, 0))
1463 *mul = double_int_one;
1467 code = TREE_CODE (top);
1471 mby = TREE_OPERAND (top, 1);
1472 if (TREE_CODE (mby) != INTEGER_CST)
1475 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1478 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1484 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1485 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1488 if (code == MINUS_EXPR)
1489 p1 = double_int_neg (p1);
1490 *mul = double_int_sext (double_int_add (p0, p1), precision);
1494 if (TREE_CODE (bot) != INTEGER_CST)
1497 p0 = double_int_sext (tree_to_double_int (top), precision);
1498 p1 = double_int_sext (tree_to_double_int (bot), precision);
1499 if (double_int_zero_p (p1))
1501 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1503 return double_int_zero_p (res);
1510 /* Returns true if memory reference REF with step STEP may be unaligned. */
1513 may_be_unaligned_p (tree ref, tree step)
1517 HOST_WIDE_INT bitsize;
1518 HOST_WIDE_INT bitpos;
1520 enum machine_mode mode;
1521 int unsignedp, volatilep;
1522 unsigned base_align;
1524 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1525 thus they are not misaligned. */
1526 if (TREE_CODE (ref) == TARGET_MEM_REF)
1529 /* The test below is basically copy of what expr.c:normal_inner_ref
1530 does to check whether the object must be loaded by parts when
1531 STRICT_ALIGNMENT is true. */
1532 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1533 &unsignedp, &volatilep, true);
1534 base_type = TREE_TYPE (base);
1535 base_align = TYPE_ALIGN (base_type);
1537 if (mode != BLKmode)
1540 tree al = build_int_cst (TREE_TYPE (step),
1541 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1543 if (base_align < GET_MODE_ALIGNMENT (mode)
1544 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1545 || bitpos % BITS_PER_UNIT != 0)
1548 if (!constant_multiple_of (step, al, &mul))
1555 /* Return true if EXPR may be non-addressable. */
1558 may_be_nonaddressable_p (tree expr)
1560 switch (TREE_CODE (expr))
1562 case TARGET_MEM_REF:
1563 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1564 target, thus they are always addressable. */
1568 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1569 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1571 case VIEW_CONVERT_EXPR:
1572 /* This kind of view-conversions may wrap non-addressable objects
1573 and make them look addressable. After some processing the
1574 non-addressability may be uncovered again, causing ADDR_EXPRs
1575 of inappropriate objects to be built. */
1576 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1577 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1580 /* ... fall through ... */
1583 case ARRAY_RANGE_REF:
1584 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1596 /* Finds addresses in *OP_P inside STMT. */
1599 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1601 tree base = *op_p, step = build_int_cst (sizetype, 0);
1603 struct ifs_ivopts_data ifs_ivopts_data;
1605 /* Do not play with volatile memory references. A bit too conservative,
1606 perhaps, but safe. */
1607 if (gimple_has_volatile_ops (stmt))
1610 /* Ignore bitfields for now. Not really something terribly complicated
1612 if (TREE_CODE (base) == BIT_FIELD_REF)
1615 base = unshare_expr (base);
1617 if (TREE_CODE (base) == TARGET_MEM_REF)
1619 tree type = build_pointer_type (TREE_TYPE (base));
1623 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1625 civ = get_iv (data, TMR_BASE (base));
1629 TMR_BASE (base) = civ->base;
1632 if (TMR_INDEX (base)
1633 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1635 civ = get_iv (data, TMR_INDEX (base));
1639 TMR_INDEX (base) = civ->base;
1644 if (TMR_STEP (base))
1645 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1647 step = fold_build2 (PLUS_EXPR, type, step, astep);
1651 if (integer_zerop (step))
1653 base = tree_mem_ref_addr (type, base);
1657 ifs_ivopts_data.ivopts_data = data;
1658 ifs_ivopts_data.stmt = stmt;
1659 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1660 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1661 || integer_zerop (ifs_ivopts_data.step))
1663 step = ifs_ivopts_data.step;
1665 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1666 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1668 /* Check that the base expression is addressable. This needs
1669 to be done after substituting bases of IVs into it. */
1670 if (may_be_nonaddressable_p (base))
1673 /* Moreover, on strict alignment platforms, check that it is
1674 sufficiently aligned. */
1675 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1678 base = build_fold_addr_expr (base);
1680 /* Substituting bases of IVs into the base expression might
1681 have caused folding opportunities. */
1682 if (TREE_CODE (base) == ADDR_EXPR)
1684 tree *ref = &TREE_OPERAND (base, 0);
1685 while (handled_component_p (*ref))
1686 ref = &TREE_OPERAND (*ref, 0);
1687 if (TREE_CODE (*ref) == INDIRECT_REF)
1689 tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0));
1696 civ = alloc_iv (base, step);
1697 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1701 for_each_index (op_p, idx_record_use, data);
1704 /* Finds and records invariants used in STMT. */
1707 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1710 use_operand_p use_p;
1713 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1715 op = USE_FROM_PTR (use_p);
1716 record_invariant (data, op, false);
1720 /* Finds interesting uses of induction variables in the statement STMT. */
1723 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1726 tree op, *lhs, *rhs;
1728 use_operand_p use_p;
1729 enum tree_code code;
1731 find_invariants_stmt (data, stmt);
1733 if (gimple_code (stmt) == GIMPLE_COND)
1735 find_interesting_uses_cond (data, stmt);
1739 if (is_gimple_assign (stmt))
1741 lhs = gimple_assign_lhs_ptr (stmt);
1742 rhs = gimple_assign_rhs1_ptr (stmt);
1744 if (TREE_CODE (*lhs) == SSA_NAME)
1746 /* If the statement defines an induction variable, the uses are not
1747 interesting by themselves. */
1749 iv = get_iv (data, *lhs);
1751 if (iv && !integer_zerop (iv->step))
1755 code = gimple_assign_rhs_code (stmt);
1756 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1757 && (REFERENCE_CLASS_P (*rhs)
1758 || is_gimple_val (*rhs)))
1760 if (REFERENCE_CLASS_P (*rhs))
1761 find_interesting_uses_address (data, stmt, rhs);
1763 find_interesting_uses_op (data, *rhs);
1765 if (REFERENCE_CLASS_P (*lhs))
1766 find_interesting_uses_address (data, stmt, lhs);
1769 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1771 find_interesting_uses_cond (data, stmt);
1775 /* TODO -- we should also handle address uses of type
1777 memory = call (whatever);
1784 if (gimple_code (stmt) == GIMPLE_PHI
1785 && gimple_bb (stmt) == data->current_loop->header)
1787 iv = get_iv (data, PHI_RESULT (stmt));
1789 if (iv && !integer_zerop (iv->step))
1793 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1795 op = USE_FROM_PTR (use_p);
1797 if (TREE_CODE (op) != SSA_NAME)
1800 iv = get_iv (data, op);
1804 find_interesting_uses_op (data, op);
1808 /* Finds interesting uses of induction variables outside of loops
1809 on loop exit edge EXIT. */
1812 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1815 gimple_stmt_iterator psi;
1818 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1820 phi = gsi_stmt (psi);
1821 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1822 if (is_gimple_reg (def))
1823 find_interesting_uses_op (data, def);
1827 /* Finds uses of the induction variables that are interesting. */
1830 find_interesting_uses (struct ivopts_data *data)
1833 gimple_stmt_iterator bsi;
1834 basic_block *body = get_loop_body (data->current_loop);
1836 struct version_info *info;
1839 if (dump_file && (dump_flags & TDF_DETAILS))
1840 fprintf (dump_file, "Uses:\n\n");
1842 for (i = 0; i < data->current_loop->num_nodes; i++)
1847 FOR_EACH_EDGE (e, ei, bb->succs)
1848 if (e->dest != EXIT_BLOCK_PTR
1849 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1850 find_interesting_uses_outside (data, e);
1852 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1853 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1854 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1855 if (!is_gimple_debug (gsi_stmt (bsi)))
1856 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1859 if (dump_file && (dump_flags & TDF_DETAILS))
1863 fprintf (dump_file, "\n");
1865 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1867 info = ver_info (data, i);
1870 fprintf (dump_file, " ");
1871 print_generic_expr (dump_file, info->name, TDF_SLIM);
1872 fprintf (dump_file, " is invariant (%d)%s\n",
1873 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1877 fprintf (dump_file, "\n");
1883 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1884 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1885 we are at the top-level of the processed address. */
1888 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1889 unsigned HOST_WIDE_INT *offset)
1891 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1892 enum tree_code code;
1893 tree type, orig_type = TREE_TYPE (expr);
1894 unsigned HOST_WIDE_INT off0, off1, st;
1895 tree orig_expr = expr;
1899 type = TREE_TYPE (expr);
1900 code = TREE_CODE (expr);
1906 if (!cst_and_fits_in_hwi (expr)
1907 || integer_zerop (expr))
1910 *offset = int_cst_value (expr);
1911 return build_int_cst (orig_type, 0);
1913 case POINTER_PLUS_EXPR:
1916 op0 = TREE_OPERAND (expr, 0);
1917 op1 = TREE_OPERAND (expr, 1);
1919 op0 = strip_offset_1 (op0, false, false, &off0);
1920 op1 = strip_offset_1 (op1, false, false, &off1);
1922 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1923 if (op0 == TREE_OPERAND (expr, 0)
1924 && op1 == TREE_OPERAND (expr, 1))
1927 if (integer_zerop (op1))
1929 else if (integer_zerop (op0))
1931 if (code == MINUS_EXPR)
1932 expr = fold_build1 (NEGATE_EXPR, type, op1);
1937 expr = fold_build2 (code, type, op0, op1);
1939 return fold_convert (orig_type, expr);
1942 op1 = TREE_OPERAND (expr, 1);
1943 if (!cst_and_fits_in_hwi (op1))
1946 op0 = TREE_OPERAND (expr, 0);
1947 op0 = strip_offset_1 (op0, false, false, &off0);
1948 if (op0 == TREE_OPERAND (expr, 0))
1951 *offset = off0 * int_cst_value (op1);
1952 if (integer_zerop (op0))
1955 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1957 return fold_convert (orig_type, expr);
1960 case ARRAY_RANGE_REF:
1964 step = array_ref_element_size (expr);
1965 if (!cst_and_fits_in_hwi (step))
1968 st = int_cst_value (step);
1969 op1 = TREE_OPERAND (expr, 1);
1970 op1 = strip_offset_1 (op1, false, false, &off1);
1971 *offset = off1 * st;
1974 && integer_zerop (op1))
1976 /* Strip the component reference completely. */
1977 op0 = TREE_OPERAND (expr, 0);
1978 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1988 tmp = component_ref_field_offset (expr);
1990 && cst_and_fits_in_hwi (tmp))
1992 /* Strip the component reference completely. */
1993 op0 = TREE_OPERAND (expr, 0);
1994 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1995 *offset = off0 + int_cst_value (tmp);
2001 op0 = TREE_OPERAND (expr, 0);
2002 op0 = strip_offset_1 (op0, true, true, &off0);
2005 if (op0 == TREE_OPERAND (expr, 0))
2008 expr = build_fold_addr_expr (op0);
2009 return fold_convert (orig_type, expr);
2012 inside_addr = false;
2019 /* Default handling of expressions for that we want to recurse into
2020 the first operand. */
2021 op0 = TREE_OPERAND (expr, 0);
2022 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2025 if (op0 == TREE_OPERAND (expr, 0)
2026 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2029 expr = copy_node (expr);
2030 TREE_OPERAND (expr, 0) = op0;
2032 TREE_OPERAND (expr, 1) = op1;
2034 /* Inside address, we might strip the top level component references,
2035 thus changing type of the expression. Handling of ADDR_EXPR
2037 expr = fold_convert (orig_type, expr);
2042 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2045 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2047 return strip_offset_1 (expr, false, false, offset);
2050 /* Returns variant of TYPE that can be used as base for different uses.
2051 We return unsigned type with the same precision, which avoids problems
2055 generic_type_for (tree type)
2057 if (POINTER_TYPE_P (type))
2058 return unsigned_type_for (type);
2060 if (TYPE_UNSIGNED (type))
2063 return unsigned_type_for (type);
2066 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2067 the bitmap to that we should store it. */
2069 static struct ivopts_data *fd_ivopts_data;
2071 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2073 bitmap *depends_on = (bitmap *) data;
2074 struct version_info *info;
2076 if (TREE_CODE (*expr_p) != SSA_NAME)
2078 info = name_info (fd_ivopts_data, *expr_p);
2080 if (!info->inv_id || info->has_nonlin_use)
2084 *depends_on = BITMAP_ALLOC (NULL);
2085 bitmap_set_bit (*depends_on, info->inv_id);
2090 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2091 position to POS. If USE is not NULL, the candidate is set as related to
2092 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2093 replacement of the final value of the iv by a direct computation. */
2095 static struct iv_cand *
2096 add_candidate_1 (struct ivopts_data *data,
2097 tree base, tree step, bool important, enum iv_position pos,
2098 struct iv_use *use, gimple incremented_at)
2101 struct iv_cand *cand = NULL;
2102 tree type, orig_type;
2106 orig_type = TREE_TYPE (base);
2107 type = generic_type_for (orig_type);
2108 if (type != orig_type)
2110 base = fold_convert (type, base);
2111 step = fold_convert (type, step);
2115 for (i = 0; i < n_iv_cands (data); i++)
2117 cand = iv_cand (data, i);
2119 if (cand->pos != pos)
2122 if (cand->incremented_at != incremented_at
2123 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2124 && cand->ainc_use != use))
2138 if (operand_equal_p (base, cand->iv->base, 0)
2139 && operand_equal_p (step, cand->iv->step, 0))
2143 if (i == n_iv_cands (data))
2145 cand = XCNEW (struct iv_cand);
2151 cand->iv = alloc_iv (base, step);
2154 if (pos != IP_ORIGINAL && cand->iv)
2156 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2157 cand->var_after = cand->var_before;
2159 cand->important = important;
2160 cand->incremented_at = incremented_at;
2161 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2164 && TREE_CODE (step) != INTEGER_CST)
2166 fd_ivopts_data = data;
2167 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2170 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2171 cand->ainc_use = use;
2173 cand->ainc_use = NULL;
2175 if (dump_file && (dump_flags & TDF_DETAILS))
2176 dump_cand (dump_file, cand);
2179 if (important && !cand->important)
2181 cand->important = true;
2182 if (dump_file && (dump_flags & TDF_DETAILS))
2183 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2188 bitmap_set_bit (use->related_cands, i);
2189 if (dump_file && (dump_flags & TDF_DETAILS))
2190 fprintf (dump_file, "Candidate %d is related to use %d\n",
2197 /* Returns true if incrementing the induction variable at the end of the LOOP
2200 The purpose is to avoid splitting latch edge with a biv increment, thus
2201 creating a jump, possibly confusing other optimization passes and leaving
2202 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2203 is not available (so we do not have a better alternative), or if the latch
2204 edge is already nonempty. */
2207 allow_ip_end_pos_p (struct loop *loop)
2209 if (!ip_normal_pos (loop))
2212 if (!empty_block_p (ip_end_pos (loop)))
2218 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2219 Important field is set to IMPORTANT. */
2222 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2223 bool important, struct iv_use *use)
2225 basic_block use_bb = gimple_bb (use->stmt);
2226 enum machine_mode mem_mode;
2227 unsigned HOST_WIDE_INT cstepi;
2229 /* If we insert the increment in any position other than the standard
2230 ones, we must ensure that it is incremented once per iteration.
2231 It must not be in an inner nested loop, or one side of an if
2233 if (use_bb->loop_father != data->current_loop
2234 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2235 || stmt_could_throw_p (use->stmt)
2236 || !cst_and_fits_in_hwi (step))
2239 cstepi = int_cst_value (step);
2241 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2242 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2243 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2245 enum tree_code code = MINUS_EXPR;
2247 tree new_step = step;
2249 if (POINTER_TYPE_P (TREE_TYPE (base)))
2251 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2252 code = POINTER_PLUS_EXPR;
2255 new_step = fold_convert (TREE_TYPE (base), new_step);
2256 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2257 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2260 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2261 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2263 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2268 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2269 position to POS. If USE is not NULL, the candidate is set as related to
2270 it. The candidate computation is scheduled on all available positions. */
2273 add_candidate (struct ivopts_data *data,
2274 tree base, tree step, bool important, struct iv_use *use)
2276 if (ip_normal_pos (data->current_loop))
2277 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2278 if (ip_end_pos (data->current_loop)
2279 && allow_ip_end_pos_p (data->current_loop))
2280 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2282 if (use != NULL && use->type == USE_ADDRESS)
2283 add_autoinc_candidates (data, base, step, important, use);
2286 /* Add a standard "0 + 1 * iteration" iv candidate for a
2287 type with SIZE bits. */
2290 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2293 tree type = lang_hooks.types.type_for_size (size, true);
2294 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2298 /* Adds standard iv candidates. */
2301 add_standard_iv_candidates (struct ivopts_data *data)
2303 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2305 /* The same for a double-integer type if it is still fast enough. */
2306 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2307 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2311 /* Adds candidates bases on the old induction variable IV. */
2314 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2318 struct iv_cand *cand;
2320 add_candidate (data, iv->base, iv->step, true, NULL);
2322 /* The same, but with initial value zero. */
2323 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2324 add_candidate (data, size_int (0), iv->step, true, NULL);
2326 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2327 iv->step, true, NULL);
2329 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2330 if (gimple_code (phi) == GIMPLE_PHI)
2332 /* Additionally record the possibility of leaving the original iv
2334 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2335 cand = add_candidate_1 (data,
2336 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2337 SSA_NAME_DEF_STMT (def));
2338 cand->var_before = iv->ssa_name;
2339 cand->var_after = def;
2343 /* Adds candidates based on the old induction variables. */
2346 add_old_ivs_candidates (struct ivopts_data *data)
2352 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2354 iv = ver_info (data, i)->iv;
2355 if (iv && iv->biv_p && !integer_zerop (iv->step))
2356 add_old_iv_candidates (data, iv);
2360 /* Adds candidates based on the value of the induction variable IV and USE. */
2363 add_iv_value_candidates (struct ivopts_data *data,
2364 struct iv *iv, struct iv_use *use)
2366 unsigned HOST_WIDE_INT offset;
2370 add_candidate (data, iv->base, iv->step, false, use);
2372 /* The same, but with initial value zero. Make such variable important,
2373 since it is generic enough so that possibly many uses may be based
2375 basetype = TREE_TYPE (iv->base);
2376 if (POINTER_TYPE_P (basetype))
2377 basetype = sizetype;
2378 add_candidate (data, build_int_cst (basetype, 0),
2379 iv->step, true, use);
2381 /* Third, try removing the constant offset. Make sure to even
2382 add a candidate for &a[0] vs. (T *)&a. */
2383 base = strip_offset (iv->base, &offset);
2385 || base != iv->base)
2386 add_candidate (data, base, iv->step, false, use);
2389 /* Adds candidates based on the uses. */
2392 add_derived_ivs_candidates (struct ivopts_data *data)
2396 for (i = 0; i < n_iv_uses (data); i++)
2398 struct iv_use *use = iv_use (data, i);
2405 case USE_NONLINEAR_EXPR:
2408 /* Just add the ivs based on the value of the iv used here. */
2409 add_iv_value_candidates (data, use->iv, use);
2418 /* Record important candidates and add them to related_cands bitmaps
2422 record_important_candidates (struct ivopts_data *data)
2427 for (i = 0; i < n_iv_cands (data); i++)
2429 struct iv_cand *cand = iv_cand (data, i);
2431 if (cand->important)
2432 bitmap_set_bit (data->important_candidates, i);
2435 data->consider_all_candidates = (n_iv_cands (data)
2436 <= CONSIDER_ALL_CANDIDATES_BOUND);
2438 if (data->consider_all_candidates)
2440 /* We will not need "related_cands" bitmaps in this case,
2441 so release them to decrease peak memory consumption. */
2442 for (i = 0; i < n_iv_uses (data); i++)
2444 use = iv_use (data, i);
2445 BITMAP_FREE (use->related_cands);
2450 /* Add important candidates to the related_cands bitmaps. */
2451 for (i = 0; i < n_iv_uses (data); i++)
2452 bitmap_ior_into (iv_use (data, i)->related_cands,
2453 data->important_candidates);
2457 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2458 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2459 we allocate a simple list to every use. */
2462 alloc_use_cost_map (struct ivopts_data *data)
2464 unsigned i, size, s, j;
2466 for (i = 0; i < n_iv_uses (data); i++)
2468 struct iv_use *use = iv_use (data, i);
2471 if (data->consider_all_candidates)
2472 size = n_iv_cands (data);
2476 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2481 /* Round up to the power of two, so that moduling by it is fast. */
2482 for (size = 1; size < s; size <<= 1)
2486 use->n_map_members = size;
2487 use->cost_map = XCNEWVEC (struct cost_pair, size);
2491 /* Returns description of computation cost of expression whose runtime
2492 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2495 new_cost (unsigned runtime, unsigned complexity)
2499 cost.cost = runtime;
2500 cost.complexity = complexity;
2505 /* Adds costs COST1 and COST2. */
2508 add_costs (comp_cost cost1, comp_cost cost2)
2510 cost1.cost += cost2.cost;
2511 cost1.complexity += cost2.complexity;
2515 /* Subtracts costs COST1 and COST2. */
2518 sub_costs (comp_cost cost1, comp_cost cost2)
2520 cost1.cost -= cost2.cost;
2521 cost1.complexity -= cost2.complexity;
2526 /* Returns a negative number if COST1 < COST2, a positive number if
2527 COST1 > COST2, and 0 if COST1 = COST2. */
2530 compare_costs (comp_cost cost1, comp_cost cost2)
2532 if (cost1.cost == cost2.cost)
2533 return cost1.complexity - cost2.complexity;
2535 return cost1.cost - cost2.cost;
2538 /* Returns true if COST is infinite. */
2541 infinite_cost_p (comp_cost cost)
2543 return cost.cost == INFTY;
2546 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2547 on invariants DEPENDS_ON and that the value used in expressing it
2551 set_use_iv_cost (struct ivopts_data *data,
2552 struct iv_use *use, struct iv_cand *cand,
2553 comp_cost cost, bitmap depends_on, tree value)
2557 if (infinite_cost_p (cost))
2559 BITMAP_FREE (depends_on);
2563 if (data->consider_all_candidates)
2565 use->cost_map[cand->id].cand = cand;
2566 use->cost_map[cand->id].cost = cost;
2567 use->cost_map[cand->id].depends_on = depends_on;
2568 use->cost_map[cand->id].value = value;
2572 /* n_map_members is a power of two, so this computes modulo. */
2573 s = cand->id & (use->n_map_members - 1);
2574 for (i = s; i < use->n_map_members; i++)
2575 if (!use->cost_map[i].cand)
2577 for (i = 0; i < s; i++)
2578 if (!use->cost_map[i].cand)
2584 use->cost_map[i].cand = cand;
2585 use->cost_map[i].cost = cost;
2586 use->cost_map[i].depends_on = depends_on;
2587 use->cost_map[i].value = value;
2590 /* Gets cost of (USE, CANDIDATE) pair. */
2592 static struct cost_pair *
2593 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2594 struct iv_cand *cand)
2597 struct cost_pair *ret;
2602 if (data->consider_all_candidates)
2604 ret = use->cost_map + cand->id;
2611 /* n_map_members is a power of two, so this computes modulo. */
2612 s = cand->id & (use->n_map_members - 1);
2613 for (i = s; i < use->n_map_members; i++)
2614 if (use->cost_map[i].cand == cand)
2615 return use->cost_map + i;
2617 for (i = 0; i < s; i++)
2618 if (use->cost_map[i].cand == cand)
2619 return use->cost_map + i;
2624 /* Returns estimate on cost of computing SEQ. */
2627 seq_cost (rtx seq, bool speed)
2632 for (; seq; seq = NEXT_INSN (seq))
2634 set = single_set (seq);
2636 cost += rtx_cost (set, SET,speed);
2644 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2646 produce_memory_decl_rtl (tree obj, int *regno)
2648 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2649 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2653 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2655 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2656 x = gen_rtx_SYMBOL_REF (address_mode, name);
2657 SET_SYMBOL_REF_DECL (x, obj);
2658 x = gen_rtx_MEM (DECL_MODE (obj), x);
2659 set_mem_addr_space (x, as);
2660 targetm.encode_section_info (obj, x, true);
2664 x = gen_raw_REG (address_mode, (*regno)++);
2665 x = gen_rtx_MEM (DECL_MODE (obj), x);
2666 set_mem_addr_space (x, as);
2672 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2673 walk_tree. DATA contains the actual fake register number. */
2676 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2678 tree obj = NULL_TREE;
2680 int *regno = (int *) data;
2682 switch (TREE_CODE (*expr_p))
2685 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2686 handled_component_p (*expr_p);
2687 expr_p = &TREE_OPERAND (*expr_p, 0))
2690 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2691 x = produce_memory_decl_rtl (obj, regno);
2696 obj = SSA_NAME_VAR (*expr_p);
2697 if (!DECL_RTL_SET_P (obj))
2698 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2707 if (DECL_RTL_SET_P (obj))
2710 if (DECL_MODE (obj) == BLKmode)
2711 x = produce_memory_decl_rtl (obj, regno);
2713 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2723 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2724 SET_DECL_RTL (obj, x);
2730 /* Determines cost of the computation of EXPR. */
2733 computation_cost (tree expr, bool speed)
2736 tree type = TREE_TYPE (expr);
2738 /* Avoid using hard regs in ways which may be unsupported. */
2739 int regno = LAST_VIRTUAL_REGISTER + 1;
2740 struct cgraph_node *node = cgraph_node (current_function_decl);
2741 enum node_frequency real_frequency = node->frequency;
2743 node->frequency = NODE_FREQUENCY_NORMAL;
2744 crtl->maybe_hot_insn_p = speed;
2745 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2747 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2750 default_rtl_profile ();
2751 node->frequency = real_frequency;
2753 cost = seq_cost (seq, speed);
2755 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2756 TYPE_ADDR_SPACE (type), speed);
2761 /* Returns variable containing the value of candidate CAND at statement AT. */
2764 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2766 if (stmt_after_increment (loop, cand, stmt))
2767 return cand->var_after;
2769 return cand->var_before;
2772 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2773 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2776 tree_int_cst_sign_bit (const_tree t)
2778 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2779 unsigned HOST_WIDE_INT w;
2781 if (bitno < HOST_BITS_PER_WIDE_INT)
2782 w = TREE_INT_CST_LOW (t);
2785 w = TREE_INT_CST_HIGH (t);
2786 bitno -= HOST_BITS_PER_WIDE_INT;
2789 return (w >> bitno) & 1;
2792 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2793 same precision that is at least as wide as the precision of TYPE, stores
2794 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2798 determine_common_wider_type (tree *a, tree *b)
2800 tree wider_type = NULL;
2802 tree atype = TREE_TYPE (*a);
2804 if (CONVERT_EXPR_P (*a))
2806 suba = TREE_OPERAND (*a, 0);
2807 wider_type = TREE_TYPE (suba);
2808 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2814 if (CONVERT_EXPR_P (*b))
2816 subb = TREE_OPERAND (*b, 0);
2817 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2828 /* Determines the expression by that USE is expressed from induction variable
2829 CAND at statement AT in LOOP. The expression is stored in a decomposed
2830 form into AFF. Returns false if USE cannot be expressed using CAND. */
2833 get_computation_aff (struct loop *loop,
2834 struct iv_use *use, struct iv_cand *cand, gimple at,
2835 struct affine_tree_combination *aff)
2837 tree ubase = use->iv->base;
2838 tree ustep = use->iv->step;
2839 tree cbase = cand->iv->base;
2840 tree cstep = cand->iv->step, cstep_common;
2841 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2842 tree common_type, var;
2844 aff_tree cbase_aff, var_aff;
2847 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2849 /* We do not have a precision to express the values of use. */
2853 var = var_at_stmt (loop, cand, at);
2854 uutype = unsigned_type_for (utype);
2856 /* If the conversion is not noop, perform it. */
2857 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2859 cstep = fold_convert (uutype, cstep);
2860 cbase = fold_convert (uutype, cbase);
2861 var = fold_convert (uutype, var);
2864 if (!constant_multiple_of (ustep, cstep, &rat))
2867 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2868 type, we achieve better folding by computing their difference in this
2869 wider type, and cast the result to UUTYPE. We do not need to worry about
2870 overflows, as all the arithmetics will in the end be performed in UUTYPE
2872 common_type = determine_common_wider_type (&ubase, &cbase);
2874 /* use = ubase - ratio * cbase + ratio * var. */
2875 tree_to_aff_combination (ubase, common_type, aff);
2876 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2877 tree_to_aff_combination (var, uutype, &var_aff);
2879 /* We need to shift the value if we are after the increment. */
2880 if (stmt_after_increment (loop, cand, at))
2884 if (common_type != uutype)
2885 cstep_common = fold_convert (common_type, cstep);
2887 cstep_common = cstep;
2889 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2890 aff_combination_add (&cbase_aff, &cstep_aff);
2893 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2894 aff_combination_add (aff, &cbase_aff);
2895 if (common_type != uutype)
2896 aff_combination_convert (aff, uutype);
2898 aff_combination_scale (&var_aff, rat);
2899 aff_combination_add (aff, &var_aff);
2904 /* Determines the expression by that USE is expressed from induction variable
2905 CAND at statement AT in LOOP. The computation is unshared. */
2908 get_computation_at (struct loop *loop,
2909 struct iv_use *use, struct iv_cand *cand, gimple at)
2912 tree type = TREE_TYPE (use->iv->base);
2914 if (!get_computation_aff (loop, use, cand, at, &aff))
2916 unshare_aff_combination (&aff);
2917 return fold_convert (type, aff_combination_to_tree (&aff));
2920 /* Determines the expression by that USE is expressed from induction variable
2921 CAND in LOOP. The computation is unshared. */
2924 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2926 return get_computation_at (loop, use, cand, use->stmt);
2929 /* Returns cost of addition in MODE. */
2932 add_cost (enum machine_mode mode, bool speed)
2934 static unsigned costs[NUM_MACHINE_MODES];
2942 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2943 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2944 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2949 cost = seq_cost (seq, speed);
2955 if (dump_file && (dump_flags & TDF_DETAILS))
2956 fprintf (dump_file, "Addition in %s costs %d\n",
2957 GET_MODE_NAME (mode), cost);
2961 /* Entry in a hashtable of already known costs for multiplication. */
2964 HOST_WIDE_INT cst; /* The constant to multiply by. */
2965 enum machine_mode mode; /* In mode. */
2966 unsigned cost; /* The cost. */
2969 /* Counts hash value for the ENTRY. */
2972 mbc_entry_hash (const void *entry)
2974 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2976 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2979 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2982 mbc_entry_eq (const void *entry1, const void *entry2)
2984 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2985 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2987 return (e1->mode == e2->mode
2988 && e1->cst == e2->cst);
2991 /* Returns cost of multiplication by constant CST in MODE. */
2994 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2996 static htab_t costs;
2997 struct mbc_entry **cached, act;
3002 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3006 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3008 return (*cached)->cost;
3010 *cached = XNEW (struct mbc_entry);
3011 (*cached)->mode = mode;
3012 (*cached)->cst = cst;
3015 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3016 gen_int_mode (cst, mode), NULL_RTX, 0);
3020 cost = seq_cost (seq, speed);
3022 if (dump_file && (dump_flags & TDF_DETAILS))
3023 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3024 (int) cst, GET_MODE_NAME (mode), cost);
3026 (*cached)->cost = cost;
3031 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3032 validity for a memory reference accessing memory of mode MODE in
3033 address space AS. */
3035 DEF_VEC_P (sbitmap);
3036 DEF_VEC_ALLOC_P (sbitmap, heap);
3039 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3042 #define MAX_RATIO 128
3043 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3044 static VEC (sbitmap, heap) *valid_mult_list;
3047 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3048 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3050 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3053 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3054 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3058 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3059 sbitmap_zero (valid_mult);
3060 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3061 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3063 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3064 if (memory_address_addr_space_p (mode, addr, as))
3065 SET_BIT (valid_mult, i + MAX_RATIO);
3068 if (dump_file && (dump_flags & TDF_DETAILS))
3070 fprintf (dump_file, " allowed multipliers:");
3071 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3072 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3073 fprintf (dump_file, " %d", (int) i);
3074 fprintf (dump_file, "\n");
3075 fprintf (dump_file, "\n");
3078 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3081 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3084 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3087 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3088 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3089 variable is omitted. Compute the cost for a memory reference that accesses
3090 a memory location of mode MEM_MODE in address space AS.
3092 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3093 size of MEM_MODE / RATIO) is available. To make this determination, we
3094 look at the size of the increment to be made, which is given in CSTEP.
3095 CSTEP may be zero if the step is unknown.
3096 STMT_AFTER_INC is true iff the statement we're looking at is after the
3097 increment of the original biv.
3099 TODO -- there must be some better way. This all is quite crude. */
3103 HOST_WIDE_INT min_offset, max_offset;
3104 unsigned costs[2][2][2][2];
3105 } *address_cost_data;
3107 DEF_VEC_P (address_cost_data);
3108 DEF_VEC_ALLOC_P (address_cost_data, heap);
3111 get_address_cost (bool symbol_present, bool var_present,
3112 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3113 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3114 addr_space_t as, bool speed,
3115 bool stmt_after_inc, bool *may_autoinc)
3117 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3118 static VEC(address_cost_data, heap) *address_cost_data_list;
3119 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3120 address_cost_data data;
3121 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3122 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3123 unsigned cost, acost, complexity;
3124 bool offset_p, ratio_p, autoinc;
3125 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3126 unsigned HOST_WIDE_INT mask;
3129 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3130 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3133 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3137 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3138 HOST_WIDE_INT rat, off;
3139 int old_cse_not_expected;
3140 unsigned sym_p, var_p, off_p, rat_p, add_c;
3141 rtx seq, addr, base;
3144 data = (address_cost_data) xcalloc (1, sizeof (*data));
3146 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3148 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3149 for (i = start; i <= 1 << 20; i <<= 1)
3151 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3152 if (!memory_address_addr_space_p (mem_mode, addr, as))
3155 data->max_offset = i == start ? 0 : i >> 1;
3156 off = data->max_offset;
3158 for (i = start; i <= 1 << 20; i <<= 1)
3160 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3161 if (!memory_address_addr_space_p (mem_mode, addr, as))
3164 data->min_offset = i == start ? 0 : -(i >> 1);
3166 if (dump_file && (dump_flags & TDF_DETAILS))
3168 fprintf (dump_file, "get_address_cost:\n");
3169 fprintf (dump_file, " min offset %s %d\n",
3170 GET_MODE_NAME (mem_mode),
3171 (int) data->min_offset);
3172 fprintf (dump_file, " max offset %s %d\n",
3173 GET_MODE_NAME (mem_mode),
3174 (int) data->max_offset);
3178 for (i = 2; i <= MAX_RATIO; i++)
3179 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3185 /* Compute the cost of various addressing modes. */
3187 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3188 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3190 if (HAVE_PRE_DECREMENT)
3192 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3193 has_predec[mem_mode]
3194 = memory_address_addr_space_p (mem_mode, addr, as);
3196 if (HAVE_POST_DECREMENT)
3198 addr = gen_rtx_POST_DEC (address_mode, reg0);
3199 has_postdec[mem_mode]
3200 = memory_address_addr_space_p (mem_mode, addr, as);
3202 if (HAVE_PRE_INCREMENT)
3204 addr = gen_rtx_PRE_INC (address_mode, reg0);
3205 has_preinc[mem_mode]
3206 = memory_address_addr_space_p (mem_mode, addr, as);
3208 if (HAVE_POST_INCREMENT)
3210 addr = gen_rtx_POST_INC (address_mode, reg0);
3211 has_postinc[mem_mode]
3212 = memory_address_addr_space_p (mem_mode, addr, as);
3214 for (i = 0; i < 16; i++)
3217 var_p = (i >> 1) & 1;
3218 off_p = (i >> 2) & 1;
3219 rat_p = (i >> 3) & 1;
3223 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3224 gen_int_mode (rat, address_mode));
3227 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3231 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3232 /* ??? We can run into trouble with some backends by presenting
3233 it with symbols which haven't been properly passed through
3234 targetm.encode_section_info. By setting the local bit, we
3235 enhance the probability of things working. */
3236 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3239 base = gen_rtx_fmt_e (CONST, address_mode,
3241 (PLUS, address_mode, base,
3242 gen_int_mode (off, address_mode)));
3245 base = gen_int_mode (off, address_mode);
3250 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3253 /* To avoid splitting addressing modes, pretend that no cse will
3255 old_cse_not_expected = cse_not_expected;
3256 cse_not_expected = true;
3257 addr = memory_address_addr_space (mem_mode, addr, as);
3258 cse_not_expected = old_cse_not_expected;
3262 acost = seq_cost (seq, speed);
3263 acost += address_cost (addr, mem_mode, as, speed);
3267 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3270 /* On some targets, it is quite expensive to load symbol to a register,
3271 which makes addresses that contain symbols look much more expensive.
3272 However, the symbol will have to be loaded in any case before the
3273 loop (and quite likely we have it in register already), so it does not
3274 make much sense to penalize them too heavily. So make some final
3275 tweaks for the SYMBOL_PRESENT modes:
3277 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3278 var is cheaper, use this mode with small penalty.
3279 If VAR_PRESENT is true, try whether the mode with
3280 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3281 if this is the case, use it. */
3282 add_c = add_cost (address_mode, speed);
3283 for (i = 0; i < 8; i++)
3286 off_p = (i >> 1) & 1;
3287 rat_p = (i >> 2) & 1;
3289 acost = data->costs[0][1][off_p][rat_p] + 1;
3293 if (acost < data->costs[1][var_p][off_p][rat_p])
3294 data->costs[1][var_p][off_p][rat_p] = acost;
3297 if (dump_file && (dump_flags & TDF_DETAILS))
3299 fprintf (dump_file, "Address costs:\n");
3301 for (i = 0; i < 16; i++)
3304 var_p = (i >> 1) & 1;
3305 off_p = (i >> 2) & 1;
3306 rat_p = (i >> 3) & 1;
3308 fprintf (dump_file, " ");
3310 fprintf (dump_file, "sym + ");
3312 fprintf (dump_file, "var + ");
3314 fprintf (dump_file, "cst + ");
3316 fprintf (dump_file, "rat * ");
3318 acost = data->costs[sym_p][var_p][off_p][rat_p];
3319 fprintf (dump_file, "index costs %d\n", acost);
3321 if (has_predec[mem_mode] || has_postdec[mem_mode]
3322 || has_preinc[mem_mode] || has_postinc[mem_mode])
3323 fprintf (dump_file, " May include autoinc/dec\n");
3324 fprintf (dump_file, "\n");
3327 VEC_replace (address_cost_data, address_cost_data_list,
3331 bits = GET_MODE_BITSIZE (address_mode);
3332 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3334 if ((offset >> (bits - 1) & 1))
3339 msize = GET_MODE_SIZE (mem_mode);
3340 autoinc_offset = offset;
3342 autoinc_offset += ratio * cstep;
3343 if (symbol_present || var_present || ratio != 1)
3345 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3347 || (has_postdec[mem_mode] && autoinc_offset == 0
3349 || (has_preinc[mem_mode] && autoinc_offset == msize
3351 || (has_predec[mem_mode] && autoinc_offset == -msize
3352 && msize == -cstep))
3356 offset_p = (s_offset != 0
3357 && data->min_offset <= s_offset
3358 && s_offset <= data->max_offset);
3359 ratio_p = (ratio != 1
3360 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3362 if (ratio != 1 && !ratio_p)
3363 cost += multiply_by_cost (ratio, address_mode, speed);
3365 if (s_offset && !offset_p && !symbol_present)
3366 cost += add_cost (address_mode, speed);
3369 *may_autoinc = autoinc;
3370 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3371 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3372 return new_cost (cost + acost, complexity);
3375 /* Estimates cost of forcing expression EXPR into a variable. */
3378 force_expr_to_var_cost (tree expr, bool speed)
3380 static bool costs_initialized = false;
3381 static unsigned integer_cost [2];
3382 static unsigned symbol_cost [2];
3383 static unsigned address_cost [2];
3385 comp_cost cost0, cost1, cost;
3386 enum machine_mode mode;
3388 if (!costs_initialized)
3390 tree type = build_pointer_type (integer_type_node);
3395 var = create_tmp_var_raw (integer_type_node, "test_var");
3396 TREE_STATIC (var) = 1;
3397 x = produce_memory_decl_rtl (var, NULL);
3398 SET_DECL_RTL (var, x);
3400 addr = build1 (ADDR_EXPR, type, var);
3403 for (i = 0; i < 2; i++)
3405 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3408 symbol_cost[i] = computation_cost (addr, i) + 1;
3411 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3413 build_int_cst (sizetype, 2000)), i) + 1;
3414 if (dump_file && (dump_flags & TDF_DETAILS))
3416 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3417 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3418 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3419 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3420 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3421 fprintf (dump_file, "\n");
3425 costs_initialized = true;
3430 if (SSA_VAR_P (expr))
3433 if (is_gimple_min_invariant (expr))
3435 if (TREE_CODE (expr) == INTEGER_CST)
3436 return new_cost (integer_cost [speed], 0);
3438 if (TREE_CODE (expr) == ADDR_EXPR)
3440 tree obj = TREE_OPERAND (expr, 0);
3442 if (TREE_CODE (obj) == VAR_DECL
3443 || TREE_CODE (obj) == PARM_DECL
3444 || TREE_CODE (obj) == RESULT_DECL)
3445 return new_cost (symbol_cost [speed], 0);
3448 return new_cost (address_cost [speed], 0);
3451 switch (TREE_CODE (expr))
3453 case POINTER_PLUS_EXPR:
3457 op0 = TREE_OPERAND (expr, 0);
3458 op1 = TREE_OPERAND (expr, 1);
3462 if (is_gimple_val (op0))
3465 cost0 = force_expr_to_var_cost (op0, speed);
3467 if (is_gimple_val (op1))
3470 cost1 = force_expr_to_var_cost (op1, speed);
3475 op0 = TREE_OPERAND (expr, 0);
3479 if (is_gimple_val (op0))
3482 cost0 = force_expr_to_var_cost (op0, speed);
3488 /* Just an arbitrary value, FIXME. */
3489 return new_cost (target_spill_cost[speed], 0);
3492 mode = TYPE_MODE (TREE_TYPE (expr));
3493 switch (TREE_CODE (expr))
3495 case POINTER_PLUS_EXPR:
3499 cost = new_cost (add_cost (mode, speed), 0);
3503 if (cst_and_fits_in_hwi (op0))
3504 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3505 else if (cst_and_fits_in_hwi (op1))
3506 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3508 return new_cost (target_spill_cost [speed], 0);
3515 cost = add_costs (cost, cost0);
3516 cost = add_costs (cost, cost1);
3518 /* Bound the cost by target_spill_cost. The parts of complicated
3519 computations often are either loop invariant or at least can
3520 be shared between several iv uses, so letting this grow without
3521 limits would not give reasonable results. */
3522 if (cost.cost > (int) target_spill_cost [speed])
3523 cost.cost = target_spill_cost [speed];
3528 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3529 invariants the computation depends on. */
3532 force_var_cost (struct ivopts_data *data,
3533 tree expr, bitmap *depends_on)
3537 fd_ivopts_data = data;
3538 walk_tree (&expr, find_depends, depends_on, NULL);
3541 return force_expr_to_var_cost (expr, data->speed);
3544 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3545 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3546 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3547 invariants the computation depends on. */
3550 split_address_cost (struct ivopts_data *data,
3551 tree addr, bool *symbol_present, bool *var_present,
3552 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3555 HOST_WIDE_INT bitsize;
3556 HOST_WIDE_INT bitpos;
3558 enum machine_mode mode;
3559 int unsignedp, volatilep;
3561 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3562 &unsignedp, &volatilep, false);
3565 || bitpos % BITS_PER_UNIT != 0
3566 || TREE_CODE (core) != VAR_DECL)
3568 *symbol_present = false;
3569 *var_present = true;
3570 fd_ivopts_data = data;
3571 walk_tree (&addr, find_depends, depends_on, NULL);
3572 return new_cost (target_spill_cost[data->speed], 0);
3575 *offset += bitpos / BITS_PER_UNIT;
3576 if (TREE_STATIC (core)
3577 || DECL_EXTERNAL (core))
3579 *symbol_present = true;
3580 *var_present = false;
3584 *symbol_present = false;
3585 *var_present = true;
3589 /* Estimates cost of expressing difference of addresses E1 - E2 as
3590 var + symbol + offset. The value of offset is added to OFFSET,
3591 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3592 part is missing. DEPENDS_ON is a set of the invariants the computation
3596 ptr_difference_cost (struct ivopts_data *data,
3597 tree e1, tree e2, bool *symbol_present, bool *var_present,
3598 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3600 HOST_WIDE_INT diff = 0;
3601 aff_tree aff_e1, aff_e2;
3604 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3606 if (ptr_difference_const (e1, e2, &diff))
3609 *symbol_present = false;
3610 *var_present = false;
3614 if (integer_zerop (e2))
3615 return split_address_cost (data, TREE_OPERAND (e1, 0),
3616 symbol_present, var_present, offset, depends_on);
3618 *symbol_present = false;
3619 *var_present = true;
3621 type = signed_type_for (TREE_TYPE (e1));
3622 tree_to_aff_combination (e1, type, &aff_e1);
3623 tree_to_aff_combination (e2, type, &aff_e2);
3624 aff_combination_scale (&aff_e2, double_int_minus_one);
3625 aff_combination_add (&aff_e1, &aff_e2);
3627 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3630 /* Estimates cost of expressing difference E1 - E2 as
3631 var + symbol + offset. The value of offset is added to OFFSET,
3632 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3633 part is missing. DEPENDS_ON is a set of the invariants the computation
3637 difference_cost (struct ivopts_data *data,
3638 tree e1, tree e2, bool *symbol_present, bool *var_present,
3639 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3641 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3642 unsigned HOST_WIDE_INT off1, off2;
3643 aff_tree aff_e1, aff_e2;
3646 e1 = strip_offset (e1, &off1);
3647 e2 = strip_offset (e2, &off2);
3648 *offset += off1 - off2;
3653 if (TREE_CODE (e1) == ADDR_EXPR)
3654 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3655 offset, depends_on);
3656 *symbol_present = false;
3658 if (operand_equal_p (e1, e2, 0))
3660 *var_present = false;
3664 *var_present = true;
3666 if (integer_zerop (e2))
3667 return force_var_cost (data, e1, depends_on);
3669 if (integer_zerop (e1))
3671 comp_cost cost = force_var_cost (data, e2, depends_on);
3672 cost.cost += multiply_by_cost (-1, mode, data->speed);
3676 type = signed_type_for (TREE_TYPE (e1));
3677 tree_to_aff_combination (e1, type, &aff_e1);
3678 tree_to_aff_combination (e2, type, &aff_e2);
3679 aff_combination_scale (&aff_e2, double_int_minus_one);
3680 aff_combination_add (&aff_e1, &aff_e2);
3682 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3685 /* Determines the cost of the computation by that USE is expressed
3686 from induction variable CAND. If ADDRESS_P is true, we just need
3687 to create an address from it, otherwise we want to get it into
3688 register. A set of invariants we depend on is stored in
3689 DEPENDS_ON. AT is the statement at that the value is computed.
3690 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3691 addressing is likely. */
3694 get_computation_cost_at (struct ivopts_data *data,
3695 struct iv_use *use, struct iv_cand *cand,
3696 bool address_p, bitmap *depends_on, gimple at,
3699 tree ubase = use->iv->base, ustep = use->iv->step;
3701 tree utype = TREE_TYPE (ubase), ctype;
3702 unsigned HOST_WIDE_INT cstepi, offset = 0;
3703 HOST_WIDE_INT ratio, aratio;
3704 bool var_present, symbol_present, stmt_is_after_inc;
3707 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3711 /* Only consider real candidates. */
3713 return infinite_cost;
3715 cbase = cand->iv->base;
3716 cstep = cand->iv->step;
3717 ctype = TREE_TYPE (cbase);
3719 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3721 /* We do not have a precision to express the values of use. */
3722 return infinite_cost;
3727 /* Do not try to express address of an object with computation based
3728 on address of a different object. This may cause problems in rtl
3729 level alias analysis (that does not expect this to be happening,
3730 as this is illegal in C), and would be unlikely to be useful
3732 if (use->iv->base_object
3733 && cand->iv->base_object
3734 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3735 return infinite_cost;
3738 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3740 /* TODO -- add direct handling of this case. */
3744 /* CSTEPI is removed from the offset in case statement is after the
3745 increment. If the step is not constant, we use zero instead.
3746 This is a bit imprecise (there is the extra addition), but
3747 redundancy elimination is likely to transform the code so that
3748 it uses value of the variable before increment anyway,
3749 so it is not that much unrealistic. */
3750 if (cst_and_fits_in_hwi (cstep))
3751 cstepi = int_cst_value (cstep);
3755 if (!constant_multiple_of (ustep, cstep, &rat))
3756 return infinite_cost;
3758 if (double_int_fits_in_shwi_p (rat))
3759 ratio = double_int_to_shwi (rat);
3761 return infinite_cost;
3764 ctype = TREE_TYPE (cbase);
3766 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3767 or ratio == 1, it is better to handle this like
3769 ubase - ratio * cbase + ratio * var
3771 (also holds in the case ratio == -1, TODO. */
3773 if (cst_and_fits_in_hwi (cbase))
3775 offset = - ratio * int_cst_value (cbase);
3776 cost = difference_cost (data,
3777 ubase, build_int_cst (utype, 0),
3778 &symbol_present, &var_present, &offset,
3781 else if (ratio == 1)
3783 cost = difference_cost (data,
3785 &symbol_present, &var_present, &offset,
3789 && !POINTER_TYPE_P (ctype)
3790 && multiplier_allowed_in_address_p
3791 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3792 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3795 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3796 cost = difference_cost (data,
3798 &symbol_present, &var_present, &offset,
3803 cost = force_var_cost (data, cbase, depends_on);
3804 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3805 cost = add_costs (cost,
3806 difference_cost (data,
3807 ubase, build_int_cst (utype, 0),
3808 &symbol_present, &var_present,
3809 &offset, depends_on));
3812 /* If we are after the increment, the value of the candidate is higher by
3814 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3815 if (stmt_is_after_inc)
3816 offset -= ratio * cstepi;
3818 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3819 (symbol/var1/const parts may be omitted). If we are looking for an
3820 address, find the cost of addressing this. */
3822 return add_costs (cost,
3823 get_address_cost (symbol_present, var_present,
3824 offset, ratio, cstepi,
3825 TYPE_MODE (TREE_TYPE (utype)),
3826 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3827 speed, stmt_is_after_inc,
3830 /* Otherwise estimate the costs for computing the expression. */
3831 if (!symbol_present && !var_present && !offset)
3834 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3838 /* Symbol + offset should be compile-time computable so consider that they
3839 are added once to the variable, if present. */
3840 if (var_present && (symbol_present || offset))
3841 cost.cost += add_cost (TYPE_MODE (ctype), speed)
3842 / AVG_LOOP_NITER (data->current_loop);
3844 /* Having offset does not affect runtime cost in case it is added to
3845 symbol, but it increases complexity. */
3849 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3851 aratio = ratio > 0 ? ratio : -ratio;
3853 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3857 *can_autoinc = false;
3860 /* Just get the expression, expand it and measure the cost. */
3861 tree comp = get_computation_at (data->current_loop, use, cand, at);
3864 return infinite_cost;
3867 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3869 return new_cost (computation_cost (comp, speed), 0);
3873 /* Determines the cost of the computation by that USE is expressed
3874 from induction variable CAND. If ADDRESS_P is true, we just need
3875 to create an address from it, otherwise we want to get it into
3876 register. A set of invariants we depend on is stored in
3877 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3878 autoinc addressing is likely. */
3881 get_computation_cost (struct ivopts_data *data,
3882 struct iv_use *use, struct iv_cand *cand,
3883 bool address_p, bitmap *depends_on, bool *can_autoinc)
3885 return get_computation_cost_at (data,
3886 use, cand, address_p, depends_on, use->stmt,
3890 /* Determines cost of basing replacement of USE on CAND in a generic
3894 determine_use_iv_cost_generic (struct ivopts_data *data,
3895 struct iv_use *use, struct iv_cand *cand)
3900 /* The simple case first -- if we need to express value of the preserved
3901 original biv, the cost is 0. This also prevents us from counting the
3902 cost of increment twice -- once at this use and once in the cost of
3904 if (cand->pos == IP_ORIGINAL
3905 && cand->incremented_at == use->stmt)
3907 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3911 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3912 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3914 return !infinite_cost_p (cost);
3917 /* Determines cost of basing replacement of USE on CAND in an address. */
3920 determine_use_iv_cost_address (struct ivopts_data *data,
3921 struct iv_use *use, struct iv_cand *cand)
3925 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3928 if (cand->ainc_use == use)
3931 cost.cost -= cand->cost_step;
3932 /* If we generated the candidate solely for exploiting autoincrement
3933 opportunities, and it turns out it can't be used, set the cost to
3934 infinity to make sure we ignore it. */
3935 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3936 cost = infinite_cost;
3938 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3940 return !infinite_cost_p (cost);
3943 /* Computes value of candidate CAND at position AT in iteration NITER, and
3944 stores it to VAL. */
3947 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3950 aff_tree step, delta, nit;
3951 struct iv *iv = cand->iv;
3952 tree type = TREE_TYPE (iv->base);
3953 tree steptype = type;
3954 if (POINTER_TYPE_P (type))
3955 steptype = sizetype;
3957 tree_to_aff_combination (iv->step, steptype, &step);
3958 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3959 aff_combination_convert (&nit, steptype);
3960 aff_combination_mult (&nit, &step, &delta);
3961 if (stmt_after_increment (loop, cand, at))
3962 aff_combination_add (&delta, &step);
3964 tree_to_aff_combination (iv->base, type, val);
3965 aff_combination_add (val, &delta);
3968 /* Returns period of induction variable iv. */
3971 iv_period (struct iv *iv)
3973 tree step = iv->step, period, type;
3976 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3978 /* Period of the iv is gcd (step, type range). Since type range is power
3979 of two, it suffices to determine the maximum power of two that divides
3981 pow2div = num_ending_zeros (step);
3982 type = unsigned_type_for (TREE_TYPE (step));
3984 period = build_low_bits_mask (type,
3985 (TYPE_PRECISION (type)
3986 - tree_low_cst (pow2div, 1)));
3991 /* Returns the comparison operator used when eliminating the iv USE. */
3993 static enum tree_code
3994 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3996 struct loop *loop = data->current_loop;
4000 ex_bb = gimple_bb (use->stmt);
4001 exit = EDGE_SUCC (ex_bb, 0);
4002 if (flow_bb_inside_loop_p (loop, exit->dest))
4003 exit = EDGE_SUCC (ex_bb, 1);
4005 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4008 /* Check whether it is possible to express the condition in USE by comparison
4009 of candidate CAND. If so, store the value compared with to BOUND. */
4012 may_eliminate_iv (struct ivopts_data *data,
4013 struct iv_use *use, struct iv_cand *cand, tree *bound)
4018 struct loop *loop = data->current_loop;
4021 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4024 /* For now works only for exits that dominate the loop latch.
4025 TODO: extend to other conditions inside loop body. */
4026 ex_bb = gimple_bb (use->stmt);
4027 if (use->stmt != last_stmt (ex_bb)
4028 || gimple_code (use->stmt) != GIMPLE_COND
4029 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4032 exit = EDGE_SUCC (ex_bb, 0);
4033 if (flow_bb_inside_loop_p (loop, exit->dest))
4034 exit = EDGE_SUCC (ex_bb, 1);
4035 if (flow_bb_inside_loop_p (loop, exit->dest))
4038 nit = niter_for_exit (data, exit);
4042 /* Determine whether we can use the variable to test the exit condition.
4043 This is the case iff the period of the induction variable is greater
4044 than the number of iterations for which the exit condition is true. */
4045 period = iv_period (cand->iv);
4047 /* If the number of iterations is constant, compare against it directly. */
4048 if (TREE_CODE (nit) == INTEGER_CST)
4050 if (!tree_int_cst_lt (nit, period))
4054 /* If not, and if this is the only possible exit of the loop, see whether
4055 we can get a conservative estimate on the number of iterations of the
4056 entire loop and compare against that instead. */
4057 else if (loop_only_exit_p (loop, exit))
4059 double_int period_value, max_niter;
4060 if (!estimated_loop_iterations (loop, true, &max_niter))
4062 period_value = tree_to_double_int (period);
4063 if (double_int_ucmp (max_niter, period_value) >= 0)
4067 /* Otherwise, punt. */
4071 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4073 *bound = aff_combination_to_tree (&bnd);
4074 /* It is unlikely that computing the number of iterations using division
4075 would be more profitable than keeping the original induction variable. */
4076 if (expression_expensive_p (*bound))
4081 /* Determines cost of basing replacement of USE on CAND in a condition. */
4084 determine_use_iv_cost_condition (struct ivopts_data *data,
4085 struct iv_use *use, struct iv_cand *cand)
4087 tree bound = NULL_TREE;
4089 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4090 comp_cost elim_cost, express_cost, cost;
4092 tree *control_var, *bound_cst;
4094 /* Only consider real candidates. */
4097 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4101 /* Try iv elimination. */
4102 if (may_eliminate_iv (data, use, cand, &bound))
4104 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4105 /* The bound is a loop invariant, so it will be only computed
4107 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4110 elim_cost = infinite_cost;
4112 /* Try expressing the original giv. If it is compared with an invariant,
4113 note that we cannot get rid of it. */
4114 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4118 /* When the condition is a comparison of the candidate IV against
4119 zero, prefer this IV.
4121 TODO: The constant that we're substracting from the cost should
4122 be target-dependent. This information should be added to the
4123 target costs for each backend. */
4124 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4125 && integer_zerop (*bound_cst)
4126 && (operand_equal_p (*control_var, cand->var_after, 0)
4127 || operand_equal_p (*control_var, cand->var_before, 0)))
4128 elim_cost.cost -= 1;
4130 express_cost = get_computation_cost (data, use, cand, false,
4131 &depends_on_express, NULL);
4132 fd_ivopts_data = data;
4133 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4135 /* Choose the better approach, preferring the eliminated IV. */
4136 if (compare_costs (elim_cost, express_cost) <= 0)
4139 depends_on = depends_on_elim;
4140 depends_on_elim = NULL;
4144 cost = express_cost;
4145 depends_on = depends_on_express;
4146 depends_on_express = NULL;
4150 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4152 if (depends_on_elim)
4153 BITMAP_FREE (depends_on_elim);
4154 if (depends_on_express)
4155 BITMAP_FREE (depends_on_express);
4157 return !infinite_cost_p (cost);
4160 /* Determines cost of basing replacement of USE on CAND. Returns false
4161 if USE cannot be based on CAND. */
4164 determine_use_iv_cost (struct ivopts_data *data,
4165 struct iv_use *use, struct iv_cand *cand)
4169 case USE_NONLINEAR_EXPR:
4170 return determine_use_iv_cost_generic (data, use, cand);
4173 return determine_use_iv_cost_address (data, use, cand);
4176 return determine_use_iv_cost_condition (data, use, cand);
4183 /* Return true if get_computation_cost indicates that autoincrement is
4184 a possibility for the pair of USE and CAND, false otherwise. */
4187 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4188 struct iv_cand *cand)
4194 if (use->type != USE_ADDRESS)
4197 cost = get_computation_cost (data, use, cand, true, &depends_on,
4200 BITMAP_FREE (depends_on);
4202 return !infinite_cost_p (cost) && can_autoinc;
4205 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4206 use that allows autoincrement, and set their AINC_USE if possible. */
4209 set_autoinc_for_original_candidates (struct ivopts_data *data)
4213 for (i = 0; i < n_iv_cands (data); i++)
4215 struct iv_cand *cand = iv_cand (data, i);
4216 struct iv_use *closest = NULL;
4217 if (cand->pos != IP_ORIGINAL)
4219 for (j = 0; j < n_iv_uses (data); j++)
4221 struct iv_use *use = iv_use (data, j);
4222 unsigned uid = gimple_uid (use->stmt);
4223 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4224 || uid > gimple_uid (cand->incremented_at))
4226 if (closest == NULL || uid > gimple_uid (closest->stmt))
4229 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4231 cand->ainc_use = closest;
4235 /* Finds the candidates for the induction variables. */
4238 find_iv_candidates (struct ivopts_data *data)
4240 /* Add commonly used ivs. */
4241 add_standard_iv_candidates (data);
4243 /* Add old induction variables. */
4244 add_old_ivs_candidates (data);
4246 /* Add induction variables derived from uses. */
4247 add_derived_ivs_candidates (data);
4249 set_autoinc_for_original_candidates (data);
4251 /* Record the important candidates. */
4252 record_important_candidates (data);
4255 /* Determines costs of basing the use of the iv on an iv candidate. */
4258 determine_use_iv_costs (struct ivopts_data *data)
4262 struct iv_cand *cand;
4263 bitmap to_clear = BITMAP_ALLOC (NULL);
4265 alloc_use_cost_map (data);
4267 for (i = 0; i < n_iv_uses (data); i++)
4269 use = iv_use (data, i);
4271 if (data->consider_all_candidates)
4273 for (j = 0; j < n_iv_cands (data); j++)
4275 cand = iv_cand (data, j);
4276 determine_use_iv_cost (data, use, cand);
4283 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4285 cand = iv_cand (data, j);
4286 if (!determine_use_iv_cost (data, use, cand))
4287 bitmap_set_bit (to_clear, j);
4290 /* Remove the candidates for that the cost is infinite from
4291 the list of related candidates. */
4292 bitmap_and_compl_into (use->related_cands, to_clear);
4293 bitmap_clear (to_clear);
4297 BITMAP_FREE (to_clear);
4299 if (dump_file && (dump_flags & TDF_DETAILS))
4301 fprintf (dump_file, "Use-candidate costs:\n");
4303 for (i = 0; i < n_iv_uses (data); i++)
4305 use = iv_use (data, i);
4307 fprintf (dump_file, "Use %d:\n", i);
4308 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4309 for (j = 0; j < use->n_map_members; j++)
4311 if (!use->cost_map[j].cand
4312 || infinite_cost_p (use->cost_map[j].cost))
4315 fprintf (dump_file, " %d\t%d\t%d\t",
4316 use->cost_map[j].cand->id,
4317 use->cost_map[j].cost.cost,
4318 use->cost_map[j].cost.complexity);
4319 if (use->cost_map[j].depends_on)
4320 bitmap_print (dump_file,
4321 use->cost_map[j].depends_on, "","");
4322 fprintf (dump_file, "\n");
4325 fprintf (dump_file, "\n");
4327 fprintf (dump_file, "\n");
4331 /* Determines cost of the candidate CAND. */
4334 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4336 comp_cost cost_base;
4337 unsigned cost, cost_step;
4346 /* There are two costs associated with the candidate -- its increment
4347 and its initialization. The second is almost negligible for any loop
4348 that rolls enough, so we take it just very little into account. */
4350 base = cand->iv->base;
4351 cost_base = force_var_cost (data, base, NULL);
4352 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4354 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4356 /* Prefer the original ivs unless we may gain something by replacing it.
4357 The reason is to make debugging simpler; so this is not relevant for
4358 artificial ivs created by other optimization passes. */
4359 if (cand->pos != IP_ORIGINAL
4360 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4363 /* Prefer not to insert statements into latch unless there are some
4364 already (so that we do not create unnecessary jumps). */
4365 if (cand->pos == IP_END
4366 && empty_block_p (ip_end_pos (data->current_loop)))
4370 cand->cost_step = cost_step;
4373 /* Determines costs of computation of the candidates. */
4376 determine_iv_costs (struct ivopts_data *data)
4380 if (dump_file && (dump_flags & TDF_DETAILS))
4382 fprintf (dump_file, "Candidate costs:\n");
4383 fprintf (dump_file, " cand\tcost\n");
4386 for (i = 0; i < n_iv_cands (data); i++)
4388 struct iv_cand *cand = iv_cand (data, i);
4390 determine_iv_cost (data, cand);
4392 if (dump_file && (dump_flags & TDF_DETAILS))
4393 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4396 if (dump_file && (dump_flags & TDF_DETAILS))
4397 fprintf (dump_file, "\n");
4400 /* Calculates cost for having SIZE induction variables. */
4403 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4405 /* We add size to the cost, so that we prefer eliminating ivs
4407 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4410 /* For each size of the induction variable set determine the penalty. */
4413 determine_set_costs (struct ivopts_data *data)
4417 gimple_stmt_iterator psi;
4419 struct loop *loop = data->current_loop;
4422 /* We use the following model (definitely improvable, especially the
4423 cost function -- TODO):
4425 We estimate the number of registers available (using MD data), name it A.
4427 We estimate the number of registers used by the loop, name it U. This
4428 number is obtained as the number of loop phi nodes (not counting virtual
4429 registers and bivs) + the number of variables from outside of the loop.
4431 We set a reserve R (free regs that are used for temporary computations,
4432 etc.). For now the reserve is a constant 3.
4434 Let I be the number of induction variables.
4436 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4437 make a lot of ivs without a reason).
4438 -- if A - R < U + I <= A, the cost is I * PRES_COST
4439 -- if U + I > A, the cost is I * PRES_COST and
4440 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4442 if (dump_file && (dump_flags & TDF_DETAILS))
4444 fprintf (dump_file, "Global costs:\n");
4445 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4446 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4447 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4451 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4453 phi = gsi_stmt (psi);
4454 op = PHI_RESULT (phi);
4456 if (!is_gimple_reg (op))
4459 if (get_iv (data, op))
4465 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4467 struct version_info *info = ver_info (data, j);
4469 if (info->inv_id && info->has_nonlin_use)
4473 data->regs_used = n;
4474 if (dump_file && (dump_flags & TDF_DETAILS))
4475 fprintf (dump_file, " regs_used %d\n", n);
4477 if (dump_file && (dump_flags & TDF_DETAILS))
4479 fprintf (dump_file, " cost for size:\n");
4480 fprintf (dump_file, " ivs\tcost\n");
4481 for (j = 0; j <= 2 * target_avail_regs; j++)
4482 fprintf (dump_file, " %d\t%d\n", j,
4483 ivopts_global_cost_for_size (data, j));
4484 fprintf (dump_file, "\n");
4488 /* Returns true if A is a cheaper cost pair than B. */
4491 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4501 cmp = compare_costs (a->cost, b->cost);
4508 /* In case the costs are the same, prefer the cheaper candidate. */
4509 if (a->cand->cost < b->cand->cost)
4515 /* Computes the cost field of IVS structure. */
4518 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4520 comp_cost cost = ivs->cand_use_cost;
4521 cost.cost += ivs->cand_cost;
4522 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4527 /* Remove invariants in set INVS to set IVS. */
4530 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4538 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4540 ivs->n_invariant_uses[iid]--;
4541 if (ivs->n_invariant_uses[iid] == 0)
4546 /* Set USE not to be expressed by any candidate in IVS. */
4549 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4552 unsigned uid = use->id, cid;
4553 struct cost_pair *cp;
4555 cp = ivs->cand_for_use[uid];
4561 ivs->cand_for_use[uid] = NULL;
4562 ivs->n_cand_uses[cid]--;
4564 if (ivs->n_cand_uses[cid] == 0)
4566 bitmap_clear_bit (ivs->cands, cid);
4567 /* Do not count the pseudocandidates. */
4571 ivs->cand_cost -= cp->cand->cost;
4573 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4576 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4578 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4579 iv_ca_recount_cost (data, ivs);
4582 /* Add invariants in set INVS to set IVS. */
4585 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4593 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4595 ivs->n_invariant_uses[iid]++;
4596 if (ivs->n_invariant_uses[iid] == 1)
4601 /* Set cost pair for USE in set IVS to CP. */
4604 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4605 struct iv_use *use, struct cost_pair *cp)
4607 unsigned uid = use->id, cid;
4609 if (ivs->cand_for_use[uid] == cp)
4612 if (ivs->cand_for_use[uid])
4613 iv_ca_set_no_cp (data, ivs, use);
4620 ivs->cand_for_use[uid] = cp;
4621 ivs->n_cand_uses[cid]++;
4622 if (ivs->n_cand_uses[cid] == 1)
4624 bitmap_set_bit (ivs->cands, cid);
4625 /* Do not count the pseudocandidates. */
4629 ivs->cand_cost += cp->cand->cost;
4631 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4634 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4635 iv_ca_set_add_invariants (ivs, cp->depends_on);
4636 iv_ca_recount_cost (data, ivs);
4640 /* Extend set IVS by expressing USE by some of the candidates in it
4644 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4647 struct cost_pair *best_cp = NULL, *cp;
4651 gcc_assert (ivs->upto >= use->id);
4653 if (ivs->upto == use->id)
4659 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4661 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4663 if (cheaper_cost_pair (cp, best_cp))
4667 iv_ca_set_cp (data, ivs, use, best_cp);
4670 /* Get cost for assignment IVS. */
4673 iv_ca_cost (struct iv_ca *ivs)
4675 /* This was a conditional expression but it triggered a bug in
4678 return infinite_cost;
4683 /* Returns true if all dependences of CP are among invariants in IVS. */
4686 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4691 if (!cp->depends_on)
4694 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4696 if (ivs->n_invariant_uses[i] == 0)
4703 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4704 it before NEXT_CHANGE. */
4706 static struct iv_ca_delta *
4707 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4708 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4710 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4713 change->old_cp = old_cp;
4714 change->new_cp = new_cp;
4715 change->next_change = next_change;
4720 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4723 static struct iv_ca_delta *
4724 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4726 struct iv_ca_delta *last;
4734 for (last = l1; last->next_change; last = last->next_change)
4736 last->next_change = l2;
4741 /* Returns candidate by that USE is expressed in IVS. */
4743 static struct cost_pair *
4744 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4746 return ivs->cand_for_use[use->id];
4749 /* Reverse the list of changes DELTA, forming the inverse to it. */
4751 static struct iv_ca_delta *
4752 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4754 struct iv_ca_delta *act, *next, *prev = NULL;
4755 struct cost_pair *tmp;
4757 for (act = delta; act; act = next)
4759 next = act->next_change;
4760 act->next_change = prev;
4764 act->old_cp = act->new_cp;
4771 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4772 reverted instead. */
4775 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4776 struct iv_ca_delta *delta, bool forward)
4778 struct cost_pair *from, *to;
4779 struct iv_ca_delta *act;
4782 delta = iv_ca_delta_reverse (delta);
4784 for (act = delta; act; act = act->next_change)
4788 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4789 iv_ca_set_cp (data, ivs, act->use, to);
4793 iv_ca_delta_reverse (delta);
4796 /* Returns true if CAND is used in IVS. */
4799 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4801 return ivs->n_cand_uses[cand->id] > 0;
4804 /* Returns number of induction variable candidates in the set IVS. */
4807 iv_ca_n_cands (struct iv_ca *ivs)
4809 return ivs->n_cands;
4812 /* Free the list of changes DELTA. */
4815 iv_ca_delta_free (struct iv_ca_delta **delta)
4817 struct iv_ca_delta *act, *next;
4819 for (act = *delta; act; act = next)
4821 next = act->next_change;
4828 /* Allocates new iv candidates assignment. */
4830 static struct iv_ca *
4831 iv_ca_new (struct ivopts_data *data)
4833 struct iv_ca *nw = XNEW (struct iv_ca);
4837 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4838 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4839 nw->cands = BITMAP_ALLOC (NULL);
4842 nw->cand_use_cost = zero_cost;
4844 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4845 nw->cost = zero_cost;
4850 /* Free memory occupied by the set IVS. */
4853 iv_ca_free (struct iv_ca **ivs)
4855 free ((*ivs)->cand_for_use);
4856 free ((*ivs)->n_cand_uses);
4857 BITMAP_FREE ((*ivs)->cands);
4858 free ((*ivs)->n_invariant_uses);
4863 /* Dumps IVS to FILE. */
4866 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4868 const char *pref = " invariants ";
4870 comp_cost cost = iv_ca_cost (ivs);
4872 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4873 bitmap_print (file, ivs->cands, " candidates ","\n");
4875 for (i = 1; i <= data->max_inv_id; i++)
4876 if (ivs->n_invariant_uses[i])
4878 fprintf (file, "%s%d", pref, i);
4881 fprintf (file, "\n");
4884 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4885 new set, and store differences in DELTA. Number of induction variables
4886 in the new set is stored to N_IVS. */
4889 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4890 struct iv_cand *cand, struct iv_ca_delta **delta,
4896 struct cost_pair *old_cp, *new_cp;
4899 for (i = 0; i < ivs->upto; i++)
4901 use = iv_use (data, i);
4902 old_cp = iv_ca_cand_for_use (ivs, use);
4905 && old_cp->cand == cand)
4908 new_cp = get_use_iv_cost (data, use, cand);
4912 if (!iv_ca_has_deps (ivs, new_cp))
4915 if (!cheaper_cost_pair (new_cp, old_cp))
4918 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4921 iv_ca_delta_commit (data, ivs, *delta, true);
4922 cost = iv_ca_cost (ivs);
4924 *n_ivs = iv_ca_n_cands (ivs);
4925 iv_ca_delta_commit (data, ivs, *delta, false);
4930 /* Try narrowing set IVS by removing CAND. Return the cost of
4931 the new set and store the differences in DELTA. */
4934 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4935 struct iv_cand *cand, struct iv_ca_delta **delta)
4939 struct cost_pair *old_cp, *new_cp, *cp;
4941 struct iv_cand *cnd;
4945 for (i = 0; i < n_iv_uses (data); i++)
4947 use = iv_use (data, i);
4949 old_cp = iv_ca_cand_for_use (ivs, use);
4950 if (old_cp->cand != cand)
4955 if (data->consider_all_candidates)
4957 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4962 cnd = iv_cand (data, ci);
4964 cp = get_use_iv_cost (data, use, cnd);
4967 if (!iv_ca_has_deps (ivs, cp))
4970 if (!cheaper_cost_pair (cp, new_cp))
4978 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4983 cnd = iv_cand (data, ci);
4985 cp = get_use_iv_cost (data, use, cnd);
4988 if (!iv_ca_has_deps (ivs, cp))
4991 if (!cheaper_cost_pair (cp, new_cp))
5000 iv_ca_delta_free (delta);
5001 return infinite_cost;
5004 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5007 iv_ca_delta_commit (data, ivs, *delta, true);
5008 cost = iv_ca_cost (ivs);
5009 iv_ca_delta_commit (data, ivs, *delta, false);
5014 /* Try optimizing the set of candidates IVS by removing candidates different
5015 from to EXCEPT_CAND from it. Return cost of the new set, and store
5016 differences in DELTA. */
5019 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5020 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5023 struct iv_ca_delta *act_delta, *best_delta;
5025 comp_cost best_cost, acost;
5026 struct iv_cand *cand;
5029 best_cost = iv_ca_cost (ivs);
5031 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5033 cand = iv_cand (data, i);
5035 if (cand == except_cand)
5038 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5040 if (compare_costs (acost, best_cost) < 0)
5043 iv_ca_delta_free (&best_delta);
5044 best_delta = act_delta;
5047 iv_ca_delta_free (&act_delta);
5056 /* Recurse to possibly remove other unnecessary ivs. */
5057 iv_ca_delta_commit (data, ivs, best_delta, true);
5058 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5059 iv_ca_delta_commit (data, ivs, best_delta, false);
5060 *delta = iv_ca_delta_join (best_delta, *delta);
5064 /* Tries to extend the sets IVS in the best possible way in order
5065 to express the USE. */
5068 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5071 comp_cost best_cost, act_cost;
5074 struct iv_cand *cand;
5075 struct iv_ca_delta *best_delta = NULL, *act_delta;
5076 struct cost_pair *cp;
5078 iv_ca_add_use (data, ivs, use);
5079 best_cost = iv_ca_cost (ivs);
5081 cp = iv_ca_cand_for_use (ivs, use);
5084 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5085 iv_ca_set_no_cp (data, ivs, use);
5088 /* First try important candidates not based on any memory object. Only if
5089 this fails, try the specific ones. Rationale -- in loops with many
5090 variables the best choice often is to use just one generic biv. If we
5091 added here many ivs specific to the uses, the optimization algorithm later
5092 would be likely to get stuck in a local minimum, thus causing us to create
5093 too many ivs. The approach from few ivs to more seems more likely to be
5094 successful -- starting from few ivs, replacing an expensive use by a
5095 specific iv should always be a win. */
5096 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5098 cand = iv_cand (data, i);
5100 if (cand->iv->base_object != NULL_TREE)
5103 if (iv_ca_cand_used_p (ivs, cand))
5106 cp = get_use_iv_cost (data, use, cand);
5110 iv_ca_set_cp (data, ivs, use, cp);
5111 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5112 iv_ca_set_no_cp (data, ivs, use);
5113 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5115 if (compare_costs (act_cost, best_cost) < 0)
5117 best_cost = act_cost;
5119 iv_ca_delta_free (&best_delta);
5120 best_delta = act_delta;
5123 iv_ca_delta_free (&act_delta);
5126 if (infinite_cost_p (best_cost))
5128 for (i = 0; i < use->n_map_members; i++)
5130 cp = use->cost_map + i;
5135 /* Already tried this. */
5136 if (cand->important && cand->iv->base_object == NULL_TREE)
5139 if (iv_ca_cand_used_p (ivs, cand))
5143 iv_ca_set_cp (data, ivs, use, cp);
5144 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5145 iv_ca_set_no_cp (data, ivs, use);
5146 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5149 if (compare_costs (act_cost, best_cost) < 0)
5151 best_cost = act_cost;
5154 iv_ca_delta_free (&best_delta);
5155 best_delta = act_delta;
5158 iv_ca_delta_free (&act_delta);
5162 iv_ca_delta_commit (data, ivs, best_delta, true);
5163 iv_ca_delta_free (&best_delta);
5165 return !infinite_cost_p (best_cost);
5168 /* Finds an initial assignment of candidates to uses. */
5170 static struct iv_ca *
5171 get_initial_solution (struct ivopts_data *data)
5173 struct iv_ca *ivs = iv_ca_new (data);
5176 for (i = 0; i < n_iv_uses (data); i++)
5177 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5186 /* Tries to improve set of induction variables IVS. */
5189 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5192 comp_cost acost, best_cost = iv_ca_cost (ivs);
5193 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5194 struct iv_cand *cand;
5196 /* Try extending the set of induction variables by one. */
5197 for (i = 0; i < n_iv_cands (data); i++)
5199 cand = iv_cand (data, i);
5201 if (iv_ca_cand_used_p (ivs, cand))
5204 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5208 /* If we successfully added the candidate and the set is small enough,
5209 try optimizing it by removing other candidates. */
5210 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5212 iv_ca_delta_commit (data, ivs, act_delta, true);
5213 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5214 iv_ca_delta_commit (data, ivs, act_delta, false);
5215 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5218 if (compare_costs (acost, best_cost) < 0)
5221 iv_ca_delta_free (&best_delta);
5222 best_delta = act_delta;
5225 iv_ca_delta_free (&act_delta);
5230 /* Try removing the candidates from the set instead. */
5231 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5233 /* Nothing more we can do. */
5238 iv_ca_delta_commit (data, ivs, best_delta, true);
5239 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5240 iv_ca_delta_free (&best_delta);
5244 /* Attempts to find the optimal set of induction variables. We do simple
5245 greedy heuristic -- we try to replace at most one candidate in the selected
5246 solution and remove the unused ivs while this improves the cost. */
5248 static struct iv_ca *
5249 find_optimal_iv_set (struct ivopts_data *data)
5255 /* Get the initial solution. */
5256 set = get_initial_solution (data);
5259 if (dump_file && (dump_flags & TDF_DETAILS))
5260 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5264 if (dump_file && (dump_flags & TDF_DETAILS))
5266 fprintf (dump_file, "Initial set of candidates:\n");
5267 iv_ca_dump (data, dump_file, set);
5270 while (try_improve_iv_set (data, set))
5272 if (dump_file && (dump_flags & TDF_DETAILS))
5274 fprintf (dump_file, "Improved to:\n");
5275 iv_ca_dump (data, dump_file, set);
5279 if (dump_file && (dump_flags & TDF_DETAILS))
5281 comp_cost cost = iv_ca_cost (set);
5282 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5285 for (i = 0; i < n_iv_uses (data); i++)
5287 use = iv_use (data, i);
5288 use->selected = iv_ca_cand_for_use (set, use)->cand;
5294 /* Creates a new induction variable corresponding to CAND. */
5297 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5299 gimple_stmt_iterator incr_pos;
5309 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5313 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5321 incr_pos = gsi_for_stmt (cand->incremented_at);
5325 /* Mark that the iv is preserved. */
5326 name_info (data, cand->var_before)->preserve_biv = true;
5327 name_info (data, cand->var_after)->preserve_biv = true;
5329 /* Rewrite the increment so that it uses var_before directly. */
5330 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5335 gimple_add_tmp_var (cand->var_before);
5336 add_referenced_var (cand->var_before);
5338 base = unshare_expr (cand->iv->base);
5340 create_iv (base, unshare_expr (cand->iv->step),
5341 cand->var_before, data->current_loop,
5342 &incr_pos, after, &cand->var_before, &cand->var_after);
5345 /* Creates new induction variables described in SET. */
5348 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5351 struct iv_cand *cand;
5354 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5356 cand = iv_cand (data, i);
5357 create_new_iv (data, cand);
5362 /* Rewrites USE (definition of iv used in a nonlinear expression)
5363 using candidate CAND. */
5366 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5367 struct iv_use *use, struct iv_cand *cand)
5372 gimple_stmt_iterator bsi;
5374 /* An important special case -- if we are asked to express value of
5375 the original iv by itself, just exit; there is no need to
5376 introduce a new computation (that might also need casting the
5377 variable to unsigned and back). */
5378 if (cand->pos == IP_ORIGINAL
5379 && cand->incremented_at == use->stmt)
5381 tree step, ctype, utype;
5382 enum tree_code incr_code = PLUS_EXPR, old_code;
5384 gcc_assert (is_gimple_assign (use->stmt));
5385 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5387 step = cand->iv->step;
5388 ctype = TREE_TYPE (step);
5389 utype = TREE_TYPE (cand->var_after);
5390 if (TREE_CODE (step) == NEGATE_EXPR)
5392 incr_code = MINUS_EXPR;
5393 step = TREE_OPERAND (step, 0);
5396 /* Check whether we may leave the computation unchanged.
5397 This is the case only if it does not rely on other
5398 computations in the loop -- otherwise, the computation
5399 we rely upon may be removed in remove_unused_ivs,
5400 thus leading to ICE. */
5401 old_code = gimple_assign_rhs_code (use->stmt);
5402 if (old_code == PLUS_EXPR
5403 || old_code == MINUS_EXPR
5404 || old_code == POINTER_PLUS_EXPR)
5406 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5407 op = gimple_assign_rhs2 (use->stmt);
5408 else if (old_code != MINUS_EXPR
5409 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5410 op = gimple_assign_rhs1 (use->stmt);
5418 && (TREE_CODE (op) == INTEGER_CST
5419 || operand_equal_p (op, step, 0)))
5422 /* Otherwise, add the necessary computations to express
5424 op = fold_convert (ctype, cand->var_before);
5425 comp = fold_convert (utype,
5426 build2 (incr_code, ctype, op,
5427 unshare_expr (step)));
5431 comp = get_computation (data->current_loop, use, cand);
5432 gcc_assert (comp != NULL_TREE);
5435 switch (gimple_code (use->stmt))
5438 tgt = PHI_RESULT (use->stmt);
5440 /* If we should keep the biv, do not replace it. */
5441 if (name_info (data, tgt)->preserve_biv)
5444 bsi = gsi_after_labels (gimple_bb (use->stmt));
5448 tgt = gimple_assign_lhs (use->stmt);
5449 bsi = gsi_for_stmt (use->stmt);
5456 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5457 true, GSI_SAME_STMT);
5459 if (gimple_code (use->stmt) == GIMPLE_PHI)
5461 ass = gimple_build_assign (tgt, op);
5462 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5464 bsi = gsi_for_stmt (use->stmt);
5465 remove_phi_node (&bsi, false);
5469 gimple_assign_set_rhs_from_tree (&bsi, op);
5470 use->stmt = gsi_stmt (bsi);
5474 /* Replaces ssa name in index IDX by its basic variable. Callback for
5478 idx_remove_ssa_names (tree base, tree *idx,
5479 void *data ATTRIBUTE_UNUSED)
5483 if (TREE_CODE (*idx) == SSA_NAME)
5484 *idx = SSA_NAME_VAR (*idx);
5486 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5488 op = &TREE_OPERAND (base, 2);
5490 && TREE_CODE (*op) == SSA_NAME)
5491 *op = SSA_NAME_VAR (*op);
5492 op = &TREE_OPERAND (base, 3);
5494 && TREE_CODE (*op) == SSA_NAME)
5495 *op = SSA_NAME_VAR (*op);
5501 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5504 unshare_and_remove_ssa_names (tree ref)
5506 ref = unshare_expr (ref);
5507 for_each_index (&ref, idx_remove_ssa_names, NULL);
5512 /* Copies the reference information from OLD_REF to NEW_REF. */
5515 copy_ref_info (tree new_ref, tree old_ref)
5517 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5518 copy_mem_ref_info (new_ref, old_ref);
5521 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5522 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5523 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5527 /* Rewrites USE (address that is an iv) using candidate CAND. */
5530 rewrite_use_address (struct ivopts_data *data,
5531 struct iv_use *use, struct iv_cand *cand)
5534 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5535 tree base_hint = NULL_TREE;
5539 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5541 unshare_aff_combination (&aff);
5543 /* To avoid undefined overflow problems, all IV candidates use unsigned
5544 integer types. The drawback is that this makes it impossible for
5545 create_mem_ref to distinguish an IV that is based on a memory object
5546 from one that represents simply an offset.
5548 To work around this problem, we pass a hint to create_mem_ref that
5549 indicates which variable (if any) in aff is an IV based on a memory
5550 object. Note that we only consider the candidate. If this is not
5551 based on an object, the base of the reference is in some subexpression
5552 of the use -- but these will use pointer types, so they are recognized
5553 by the create_mem_ref heuristics anyway. */
5554 if (cand->iv->base_object)
5555 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5557 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5559 copy_ref_info (ref, *use->op_p);
5563 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5567 rewrite_use_compare (struct ivopts_data *data,
5568 struct iv_use *use, struct iv_cand *cand)
5570 tree comp, *var_p, op, bound;
5571 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5572 enum tree_code compare;
5573 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5579 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5580 tree var_type = TREE_TYPE (var);
5583 compare = iv_elimination_compare (data, use);
5584 bound = unshare_expr (fold_convert (var_type, bound));
5585 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5587 gsi_insert_seq_on_edge_immediate (
5588 loop_preheader_edge (data->current_loop),
5591 gimple_cond_set_lhs (use->stmt, var);
5592 gimple_cond_set_code (use->stmt, compare);
5593 gimple_cond_set_rhs (use->stmt, op);
5597 /* The induction variable elimination failed; just express the original
5599 comp = get_computation (data->current_loop, use, cand);
5600 gcc_assert (comp != NULL_TREE);
5602 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5605 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5606 true, GSI_SAME_STMT);
5609 /* Rewrites USE using candidate CAND. */
5612 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5616 case USE_NONLINEAR_EXPR:
5617 rewrite_use_nonlinear_expr (data, use, cand);
5621 rewrite_use_address (data, use, cand);
5625 rewrite_use_compare (data, use, cand);
5632 update_stmt (use->stmt);
5635 /* Rewrite the uses using the selected induction variables. */
5638 rewrite_uses (struct ivopts_data *data)
5641 struct iv_cand *cand;
5644 for (i = 0; i < n_iv_uses (data); i++)
5646 use = iv_use (data, i);
5647 cand = use->selected;
5650 rewrite_use (data, use, cand);
5654 /* Removes the ivs that are not used after rewriting. */
5657 remove_unused_ivs (struct ivopts_data *data)
5661 bitmap toremove = BITMAP_ALLOC (NULL);
5663 /* Figure out an order in which to release SSA DEFs so that we don't
5664 release something that we'd have to propagate into a debug stmt
5666 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5668 struct version_info *info;
5670 info = ver_info (data, j);
5672 && !integer_zerop (info->iv->step)
5674 && !info->iv->have_use_for
5675 && !info->preserve_biv)
5676 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5679 release_defs_bitset (toremove);
5681 BITMAP_FREE (toremove);
5684 /* Frees data allocated by the optimization of a single loop. */
5687 free_loop_data (struct ivopts_data *data)
5695 pointer_map_destroy (data->niters);
5696 data->niters = NULL;
5699 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5701 struct version_info *info;
5703 info = ver_info (data, i);
5707 info->has_nonlin_use = false;
5708 info->preserve_biv = false;
5711 bitmap_clear (data->relevant);
5712 bitmap_clear (data->important_candidates);
5714 for (i = 0; i < n_iv_uses (data); i++)
5716 struct iv_use *use = iv_use (data, i);
5719 BITMAP_FREE (use->related_cands);
5720 for (j = 0; j < use->n_map_members; j++)
5721 if (use->cost_map[j].depends_on)
5722 BITMAP_FREE (use->cost_map[j].depends_on);
5723 free (use->cost_map);
5726 VEC_truncate (iv_use_p, data->iv_uses, 0);
5728 for (i = 0; i < n_iv_cands (data); i++)
5730 struct iv_cand *cand = iv_cand (data, i);
5734 if (cand->depends_on)
5735 BITMAP_FREE (cand->depends_on);
5738 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5740 if (data->version_info_size < num_ssa_names)
5742 data->version_info_size = 2 * num_ssa_names;
5743 free (data->version_info);
5744 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5747 data->max_inv_id = 0;
5749 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5750 SET_DECL_RTL (obj, NULL_RTX);
5752 VEC_truncate (tree, decl_rtl_to_reset, 0);
5755 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5759 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5761 free_loop_data (data);
5762 free (data->version_info);
5763 BITMAP_FREE (data->relevant);
5764 BITMAP_FREE (data->important_candidates);
5766 VEC_free (tree, heap, decl_rtl_to_reset);
5767 VEC_free (iv_use_p, heap, data->iv_uses);
5768 VEC_free (iv_cand_p, heap, data->iv_candidates);
5771 /* Optimizes the LOOP. Returns true if anything changed. */
5774 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5776 bool changed = false;
5777 struct iv_ca *iv_ca;
5781 gcc_assert (!data->niters);
5782 data->current_loop = loop;
5783 data->speed = optimize_loop_for_speed_p (loop);
5785 if (dump_file && (dump_flags & TDF_DETAILS))
5787 fprintf (dump_file, "Processing loop %d\n", loop->num);
5789 exit = single_dom_exit (loop);
5792 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5793 exit->src->index, exit->dest->index);
5794 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5795 fprintf (dump_file, "\n");
5798 fprintf (dump_file, "\n");
5801 body = get_loop_body (loop);
5802 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5805 /* For each ssa name determines whether it behaves as an induction variable
5807 if (!find_induction_variables (data))
5810 /* Finds interesting uses (item 1). */
5811 find_interesting_uses (data);
5812 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5815 /* Finds candidates for the induction variables (item 2). */
5816 find_iv_candidates (data);
5818 /* Calculates the costs (item 3, part 1). */
5819 determine_iv_costs (data);
5820 determine_use_iv_costs (data);
5821 determine_set_costs (data);
5823 /* Find the optimal set of induction variables (item 3, part 2). */
5824 iv_ca = find_optimal_iv_set (data);
5829 /* Create the new induction variables (item 4, part 1). */
5830 create_new_ivs (data, iv_ca);
5831 iv_ca_free (&iv_ca);
5833 /* Rewrite the uses (item 4, part 2). */
5834 rewrite_uses (data);
5836 /* Remove the ivs that are unused after rewriting. */
5837 remove_unused_ivs (data);
5839 /* We have changed the structure of induction variables; it might happen
5840 that definitions in the scev database refer to some of them that were
5845 free_loop_data (data);
5850 /* Main entry point. Optimizes induction variables in loops. */
5853 tree_ssa_iv_optimize (void)
5856 struct ivopts_data data;
5859 tree_ssa_iv_optimize_init (&data);
5861 /* Optimize the loops starting with the innermost ones. */
5862 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5864 if (dump_file && (dump_flags & TDF_DETAILS))
5865 flow_loop_dump (loop, dump_file, NULL, 1);
5867 tree_ssa_iv_optimize_loop (&data, loop);
5870 tree_ssa_iv_optimize_finalize (&data);