1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
73 #include "diagnostic.h"
74 #include "tree-pretty-print.h"
75 #include "gimple-pretty-print.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
81 #include "tree-pass.h"
83 #include "insn-config.h"
85 #include "pointer-set.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
95 /* The infinite cost. */
96 #define INFTY 10000000
98 /* The expected number of loop iterations. TODO -- use profiling instead of
100 #define AVG_LOOP_NITER(LOOP) 5
103 /* Representation of the induction variable. */
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
123 unsigned inv_id; /* Id of an invariant. */
129 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
130 USE_ADDRESS, /* Use in an address. */
131 USE_COMPARE /* Use is a compare. */
134 /* Cost of a computation. */
137 int cost; /* The runtime cost. */
138 unsigned complexity; /* The estimate of the complexity of the code for
139 the computation (in no concrete units --
140 complexity field should be larger for more
141 complex expressions and addressing modes). */
144 static const comp_cost zero_cost = {0, 0};
145 static const comp_cost infinite_cost = {INFTY, INFTY};
147 /* The candidate - cost pair. */
150 struct iv_cand *cand; /* The candidate. */
151 comp_cost cost; /* The cost. */
152 bitmap depends_on; /* The list of invariants that have to be
154 tree value; /* For final value elimination, the expression for
155 the final value of the iv. For iv elimination,
156 the new bound to compare with. */
162 unsigned id; /* The id of the use. */
163 enum use_type type; /* Type of the use. */
164 struct iv *iv; /* The induction variable it is based on. */
165 gimple stmt; /* Statement in that it occurs. */
166 tree *op_p; /* The place where it occurs. */
167 bitmap related_cands; /* The set of "related" iv candidates, plus the common
170 unsigned n_map_members; /* Number of candidates in the cost_map list. */
171 struct cost_pair *cost_map;
172 /* The costs wrto the iv candidates. */
174 struct iv_cand *selected;
175 /* The selected candidate. */
178 /* The position where the iv is computed. */
181 IP_NORMAL, /* At the end, just before the exit condition. */
182 IP_END, /* At the end of the latch block. */
183 IP_BEFORE_USE, /* Immediately before a specific use. */
184 IP_AFTER_USE, /* Immediately after a specific use. */
185 IP_ORIGINAL /* The original biv. */
188 /* The induction variable candidate. */
191 unsigned id; /* The number of the candidate. */
192 bool important; /* Whether this is an "important" candidate, i.e. such
193 that it should be considered by all uses. */
194 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
195 gimple incremented_at;/* For original biv, the statement where it is
197 tree var_before; /* The variable used for it before increment. */
198 tree var_after; /* The variable used for it after increment. */
199 struct iv *iv; /* The value of the candidate. NULL for
200 "pseudocandidate" used to indicate the possibility
201 to replace the final value of an iv by direct
202 computation of the value. */
203 unsigned cost; /* Cost of the candidate. */
204 unsigned cost_step; /* Cost of the candidate's increment operation. */
205 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
206 where it is incremented. */
207 bitmap depends_on; /* The list of invariants that are used in step of the
211 /* The data used by the induction variable optimizations. */
213 typedef struct iv_use *iv_use_p;
215 DEF_VEC_ALLOC_P(iv_use_p,heap);
217 typedef struct iv_cand *iv_cand_p;
218 DEF_VEC_P(iv_cand_p);
219 DEF_VEC_ALLOC_P(iv_cand_p,heap);
223 /* The currently optimized loop. */
224 struct loop *current_loop;
226 /* Numbers of iterations for all exits of the current loop. */
227 struct pointer_map_t *niters;
229 /* Number of registers used in it. */
232 /* The size of version_info array allocated. */
233 unsigned version_info_size;
235 /* The array of information for the ssa names. */
236 struct version_info *version_info;
238 /* The bitmap of indices in version_info whose value was changed. */
241 /* The uses of induction variables. */
242 VEC(iv_use_p,heap) *iv_uses;
244 /* The candidates. */
245 VEC(iv_cand_p,heap) *iv_candidates;
247 /* A bitmap of important candidates. */
248 bitmap important_candidates;
250 /* The maximum invariant id. */
253 /* Whether to consider just related and important candidates when replacing a
255 bool consider_all_candidates;
257 /* Are we optimizing for speed? */
261 /* An assignment of iv candidates to uses. */
265 /* The number of uses covered by the assignment. */
268 /* Number of uses that cannot be expressed by the candidates in the set. */
271 /* Candidate assigned to a use, together with the related costs. */
272 struct cost_pair **cand_for_use;
274 /* Number of times each candidate is used. */
275 unsigned *n_cand_uses;
277 /* The candidates used. */
280 /* The number of candidates in the set. */
283 /* Total number of registers needed. */
286 /* Total cost of expressing uses. */
287 comp_cost cand_use_cost;
289 /* Total cost of candidates. */
292 /* Number of times each invariant is used. */
293 unsigned *n_invariant_uses;
295 /* Total cost of the assignment. */
299 /* Difference of two iv candidate assignments. */
306 /* An old assignment (for rollback purposes). */
307 struct cost_pair *old_cp;
309 /* A new assignment. */
310 struct cost_pair *new_cp;
312 /* Next change in the list. */
313 struct iv_ca_delta *next_change;
316 /* Bound on number of candidates below that all candidates are considered. */
318 #define CONSIDER_ALL_CANDIDATES_BOUND \
319 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
321 /* If there are more iv occurrences, we just give up (it is quite unlikely that
322 optimizing such a loop would help, and it would take ages). */
324 #define MAX_CONSIDERED_USES \
325 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
327 /* If there are at most this number of ivs in the set, try removing unnecessary
328 ivs from the set always. */
330 #define ALWAYS_PRUNE_CAND_SET_BOUND \
331 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
333 /* The list of trees for that the decl_rtl field must be reset is stored
336 static VEC(tree,heap) *decl_rtl_to_reset;
338 /* Number of uses recorded in DATA. */
340 static inline unsigned
341 n_iv_uses (struct ivopts_data *data)
343 return VEC_length (iv_use_p, data->iv_uses);
346 /* Ith use recorded in DATA. */
348 static inline struct iv_use *
349 iv_use (struct ivopts_data *data, unsigned i)
351 return VEC_index (iv_use_p, data->iv_uses, i);
354 /* Number of candidates recorded in DATA. */
356 static inline unsigned
357 n_iv_cands (struct ivopts_data *data)
359 return VEC_length (iv_cand_p, data->iv_candidates);
362 /* Ith candidate recorded in DATA. */
364 static inline struct iv_cand *
365 iv_cand (struct ivopts_data *data, unsigned i)
367 return VEC_index (iv_cand_p, data->iv_candidates, i);
370 /* The single loop exit if it dominates the latch, NULL otherwise. */
373 single_dom_exit (struct loop *loop)
375 edge exit = single_exit (loop);
380 if (!just_once_each_iteration_p (loop, exit->src))
386 /* Dumps information about the induction variable IV to FILE. */
388 extern void dump_iv (FILE *, struct iv *);
390 dump_iv (FILE *file, struct iv *iv)
394 fprintf (file, "ssa name ");
395 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
396 fprintf (file, "\n");
399 fprintf (file, " type ");
400 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
401 fprintf (file, "\n");
405 fprintf (file, " base ");
406 print_generic_expr (file, iv->base, TDF_SLIM);
407 fprintf (file, "\n");
409 fprintf (file, " step ");
410 print_generic_expr (file, iv->step, TDF_SLIM);
411 fprintf (file, "\n");
415 fprintf (file, " invariant ");
416 print_generic_expr (file, iv->base, TDF_SLIM);
417 fprintf (file, "\n");
422 fprintf (file, " base object ");
423 print_generic_expr (file, iv->base_object, TDF_SLIM);
424 fprintf (file, "\n");
428 fprintf (file, " is a biv\n");
431 /* Dumps information about the USE to FILE. */
433 extern void dump_use (FILE *, struct iv_use *);
435 dump_use (FILE *file, struct iv_use *use)
437 fprintf (file, "use %d\n", use->id);
441 case USE_NONLINEAR_EXPR:
442 fprintf (file, " generic\n");
446 fprintf (file, " address\n");
450 fprintf (file, " compare\n");
457 fprintf (file, " in statement ");
458 print_gimple_stmt (file, use->stmt, 0, 0);
459 fprintf (file, "\n");
461 fprintf (file, " at position ");
463 print_generic_expr (file, *use->op_p, TDF_SLIM);
464 fprintf (file, "\n");
466 dump_iv (file, use->iv);
468 if (use->related_cands)
470 fprintf (file, " related candidates ");
471 dump_bitmap (file, use->related_cands);
475 /* Dumps information about the uses to FILE. */
477 extern void dump_uses (FILE *, struct ivopts_data *);
479 dump_uses (FILE *file, struct ivopts_data *data)
484 for (i = 0; i < n_iv_uses (data); i++)
486 use = iv_use (data, i);
488 dump_use (file, use);
489 fprintf (file, "\n");
493 /* Dumps information about induction variable candidate CAND to FILE. */
495 extern void dump_cand (FILE *, struct iv_cand *);
497 dump_cand (FILE *file, struct iv_cand *cand)
499 struct iv *iv = cand->iv;
501 fprintf (file, "candidate %d%s\n",
502 cand->id, cand->important ? " (important)" : "");
504 if (cand->depends_on)
506 fprintf (file, " depends on ");
507 dump_bitmap (file, cand->depends_on);
512 fprintf (file, " final value replacement\n");
519 fprintf (file, " incremented before exit test\n");
523 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
527 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
531 fprintf (file, " incremented at end\n");
535 fprintf (file, " original biv\n");
542 /* Returns the info for ssa version VER. */
544 static inline struct version_info *
545 ver_info (struct ivopts_data *data, unsigned ver)
547 return data->version_info + ver;
550 /* Returns the info for ssa name NAME. */
552 static inline struct version_info *
553 name_info (struct ivopts_data *data, tree name)
555 return ver_info (data, SSA_NAME_VERSION (name));
558 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
562 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
564 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
568 if (sbb == loop->latch)
574 return stmt == last_stmt (bb);
577 /* Returns true if STMT if after the place where the original induction
578 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
579 if the positions are identical. */
582 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
584 basic_block cand_bb = gimple_bb (cand->incremented_at);
585 basic_block stmt_bb = gimple_bb (stmt);
587 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
590 if (stmt_bb != cand_bb)
594 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
596 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
599 /* Returns true if STMT if after the place where the induction variable
600 CAND is incremented in LOOP. */
603 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
611 return stmt_after_ip_normal_pos (loop, stmt);
615 return stmt_after_inc_pos (cand, stmt, false);
618 return stmt_after_inc_pos (cand, stmt, true);
625 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
628 abnormal_ssa_name_p (tree exp)
633 if (TREE_CODE (exp) != SSA_NAME)
636 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
639 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
640 abnormal phi node. Callback for for_each_index. */
643 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
644 void *data ATTRIBUTE_UNUSED)
646 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
648 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
650 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
654 return !abnormal_ssa_name_p (*index);
657 /* Returns true if EXPR contains a ssa name that occurs in an
658 abnormal phi node. */
661 contains_abnormal_ssa_name_p (tree expr)
664 enum tree_code_class codeclass;
669 code = TREE_CODE (expr);
670 codeclass = TREE_CODE_CLASS (code);
672 if (code == SSA_NAME)
673 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
675 if (code == INTEGER_CST
676 || is_gimple_min_invariant (expr))
679 if (code == ADDR_EXPR)
680 return !for_each_index (&TREE_OPERAND (expr, 0),
681 idx_contains_abnormal_ssa_name_p,
684 if (code == COND_EXPR)
685 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
686 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
687 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
693 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
698 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
710 /* Returns tree describing number of iterations determined from
711 EXIT of DATA->current_loop, or NULL if something goes wrong. */
714 niter_for_exit (struct ivopts_data *data, edge exit)
716 struct tree_niter_desc desc;
722 data->niters = pointer_map_create ();
726 slot = pointer_map_contains (data->niters, exit);
730 /* Try to determine number of iterations. We must know it
731 unconditionally (i.e., without possibility of # of iterations
732 being zero). Also, we cannot safely work with ssa names that
733 appear in phi nodes on abnormal edges, so that we do not create
734 overlapping life ranges for them (PR 27283). */
735 if (number_of_iterations_exit (data->current_loop,
737 && integer_zerop (desc.may_be_zero)
738 && !contains_abnormal_ssa_name_p (desc.niter))
743 *pointer_map_insert (data->niters, exit) = niter;
746 niter = (tree) *slot;
751 /* Returns tree describing number of iterations determined from
752 single dominating exit of DATA->current_loop, or NULL if something
756 niter_for_single_dom_exit (struct ivopts_data *data)
758 edge exit = single_dom_exit (data->current_loop);
763 return niter_for_exit (data, exit);
766 /* Initializes data structures used by the iv optimization pass, stored
770 tree_ssa_iv_optimize_init (struct ivopts_data *data)
772 data->version_info_size = 2 * num_ssa_names;
773 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
774 data->relevant = BITMAP_ALLOC (NULL);
775 data->important_candidates = BITMAP_ALLOC (NULL);
776 data->max_inv_id = 0;
778 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
779 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
780 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
783 /* Returns a memory object to that EXPR points. In case we are able to
784 determine that it does not point to any such object, NULL is returned. */
787 determine_base_object (tree expr)
789 enum tree_code code = TREE_CODE (expr);
792 /* If this is a pointer casted to any type, we need to determine
793 the base object for the pointer; so handle conversions before
794 throwing away non-pointer expressions. */
795 if (CONVERT_EXPR_P (expr))
796 return determine_base_object (TREE_OPERAND (expr, 0));
798 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
807 obj = TREE_OPERAND (expr, 0);
808 base = get_base_address (obj);
813 if (TREE_CODE (base) == INDIRECT_REF)
814 return determine_base_object (TREE_OPERAND (base, 0));
816 return fold_convert (ptr_type_node,
817 build_fold_addr_expr (base));
819 case POINTER_PLUS_EXPR:
820 return determine_base_object (TREE_OPERAND (expr, 0));
824 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
828 return fold_convert (ptr_type_node, expr);
832 /* Allocates an induction variable with given initial value BASE and step STEP
836 alloc_iv (tree base, tree step)
838 struct iv *iv = XCNEW (struct iv);
839 gcc_assert (step != NULL_TREE);
842 iv->base_object = determine_base_object (base);
845 iv->have_use_for = false;
847 iv->ssa_name = NULL_TREE;
852 /* Sets STEP and BASE for induction variable IV. */
855 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
857 struct version_info *info = name_info (data, iv);
859 gcc_assert (!info->iv);
861 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
862 info->iv = alloc_iv (base, step);
863 info->iv->ssa_name = iv;
866 /* Finds induction variable declaration for VAR. */
869 get_iv (struct ivopts_data *data, tree var)
872 tree type = TREE_TYPE (var);
874 if (!POINTER_TYPE_P (type)
875 && !INTEGRAL_TYPE_P (type))
878 if (!name_info (data, var)->iv)
880 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
883 || !flow_bb_inside_loop_p (data->current_loop, bb))
884 set_iv (data, var, var, build_int_cst (type, 0));
887 return name_info (data, var)->iv;
890 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
891 not define a simple affine biv with nonzero step. */
894 determine_biv_step (gimple phi)
896 struct loop *loop = gimple_bb (phi)->loop_father;
897 tree name = PHI_RESULT (phi);
900 if (!is_gimple_reg (name))
903 if (!simple_iv (loop, loop, name, &iv, true))
906 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
909 /* Finds basic ivs. */
912 find_bivs (struct ivopts_data *data)
915 tree step, type, base;
917 struct loop *loop = data->current_loop;
918 gimple_stmt_iterator psi;
920 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
922 phi = gsi_stmt (psi);
924 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
927 step = determine_biv_step (phi);
931 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
932 base = expand_simple_operations (base);
933 if (contains_abnormal_ssa_name_p (base)
934 || contains_abnormal_ssa_name_p (step))
937 type = TREE_TYPE (PHI_RESULT (phi));
938 base = fold_convert (type, base);
941 if (POINTER_TYPE_P (type))
942 step = fold_convert (sizetype, step);
944 step = fold_convert (type, step);
947 set_iv (data, PHI_RESULT (phi), base, step);
954 /* Marks basic ivs. */
957 mark_bivs (struct ivopts_data *data)
961 struct iv *iv, *incr_iv;
962 struct loop *loop = data->current_loop;
964 gimple_stmt_iterator psi;
966 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
968 phi = gsi_stmt (psi);
970 iv = get_iv (data, PHI_RESULT (phi));
974 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
975 incr_iv = get_iv (data, var);
979 /* If the increment is in the subloop, ignore it. */
980 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
981 if (incr_bb->loop_father != data->current_loop
982 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
986 incr_iv->biv_p = true;
990 /* Checks whether STMT defines a linear induction variable and stores its
994 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
997 struct loop *loop = data->current_loop;
999 iv->base = NULL_TREE;
1000 iv->step = NULL_TREE;
1002 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1005 lhs = gimple_assign_lhs (stmt);
1006 if (TREE_CODE (lhs) != SSA_NAME)
1009 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1011 iv->base = expand_simple_operations (iv->base);
1013 if (contains_abnormal_ssa_name_p (iv->base)
1014 || contains_abnormal_ssa_name_p (iv->step))
1020 /* Finds general ivs in statement STMT. */
1023 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1027 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1030 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1033 /* Finds general ivs in basic block BB. */
1036 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1038 gimple_stmt_iterator bsi;
1040 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1041 find_givs_in_stmt (data, gsi_stmt (bsi));
1044 /* Finds general ivs. */
1047 find_givs (struct ivopts_data *data)
1049 struct loop *loop = data->current_loop;
1050 basic_block *body = get_loop_body_in_dom_order (loop);
1053 for (i = 0; i < loop->num_nodes; i++)
1054 find_givs_in_bb (data, body[i]);
1058 /* For each ssa name defined in LOOP determines whether it is an induction
1059 variable and if so, its initial value and step. */
1062 find_induction_variables (struct ivopts_data *data)
1067 if (!find_bivs (data))
1073 if (dump_file && (dump_flags & TDF_DETAILS))
1075 tree niter = niter_for_single_dom_exit (data);
1079 fprintf (dump_file, " number of iterations ");
1080 print_generic_expr (dump_file, niter, TDF_SLIM);
1081 fprintf (dump_file, "\n\n");
1084 fprintf (dump_file, "Induction variables:\n\n");
1086 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1088 if (ver_info (data, i)->iv)
1089 dump_iv (dump_file, ver_info (data, i)->iv);
1096 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1098 static struct iv_use *
1099 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1100 gimple stmt, enum use_type use_type)
1102 struct iv_use *use = XCNEW (struct iv_use);
1104 use->id = n_iv_uses (data);
1105 use->type = use_type;
1109 use->related_cands = BITMAP_ALLOC (NULL);
1111 /* To avoid showing ssa name in the dumps, if it was not reset by the
1113 iv->ssa_name = NULL_TREE;
1115 if (dump_file && (dump_flags & TDF_DETAILS))
1116 dump_use (dump_file, use);
1118 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1123 /* Checks whether OP is a loop-level invariant and if so, records it.
1124 NONLINEAR_USE is true if the invariant is used in a way we do not
1125 handle specially. */
1128 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1131 struct version_info *info;
1133 if (TREE_CODE (op) != SSA_NAME
1134 || !is_gimple_reg (op))
1137 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1139 && flow_bb_inside_loop_p (data->current_loop, bb))
1142 info = name_info (data, op);
1144 info->has_nonlin_use |= nonlinear_use;
1146 info->inv_id = ++data->max_inv_id;
1147 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1150 /* Checks whether the use OP is interesting and if so, records it. */
1152 static struct iv_use *
1153 find_interesting_uses_op (struct ivopts_data *data, tree op)
1160 if (TREE_CODE (op) != SSA_NAME)
1163 iv = get_iv (data, op);
1167 if (iv->have_use_for)
1169 use = iv_use (data, iv->use_id);
1171 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1175 if (integer_zerop (iv->step))
1177 record_invariant (data, op, true);
1180 iv->have_use_for = true;
1182 civ = XNEW (struct iv);
1185 stmt = SSA_NAME_DEF_STMT (op);
1186 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1187 || is_gimple_assign (stmt));
1189 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1190 iv->use_id = use->id;
1195 /* Given a condition in statement STMT, checks whether it is a compare
1196 of an induction variable and an invariant. If this is the case,
1197 CONTROL_VAR is set to location of the iv, BOUND to the location of
1198 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1199 induction variable descriptions, and true is returned. If this is not
1200 the case, CONTROL_VAR and BOUND are set to the arguments of the
1201 condition and false is returned. */
1204 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1205 tree **control_var, tree **bound,
1206 struct iv **iv_var, struct iv **iv_bound)
1208 /* The objects returned when COND has constant operands. */
1209 static struct iv const_iv;
1211 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1212 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1215 if (gimple_code (stmt) == GIMPLE_COND)
1217 op0 = gimple_cond_lhs_ptr (stmt);
1218 op1 = gimple_cond_rhs_ptr (stmt);
1222 op0 = gimple_assign_rhs1_ptr (stmt);
1223 op1 = gimple_assign_rhs2_ptr (stmt);
1226 zero = integer_zero_node;
1227 const_iv.step = integer_zero_node;
1229 if (TREE_CODE (*op0) == SSA_NAME)
1230 iv0 = get_iv (data, *op0);
1231 if (TREE_CODE (*op1) == SSA_NAME)
1232 iv1 = get_iv (data, *op1);
1234 /* Exactly one of the compared values must be an iv, and the other one must
1239 if (integer_zerop (iv0->step))
1241 /* Control variable may be on the other side. */
1242 tmp_op = op0; op0 = op1; op1 = tmp_op;
1243 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1245 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1249 *control_var = op0;;
1260 /* Checks whether the condition in STMT is interesting and if so,
1264 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1266 tree *var_p, *bound_p;
1267 struct iv *var_iv, *civ;
1269 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1271 find_interesting_uses_op (data, *var_p);
1272 find_interesting_uses_op (data, *bound_p);
1276 civ = XNEW (struct iv);
1278 record_use (data, NULL, civ, stmt, USE_COMPARE);
1281 /* Returns true if expression EXPR is obviously invariant in LOOP,
1282 i.e. if all its operands are defined outside of the LOOP. LOOP
1283 should not be the function body. */
1286 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1291 gcc_assert (loop_depth (loop) > 0);
1293 if (is_gimple_min_invariant (expr))
1296 if (TREE_CODE (expr) == SSA_NAME)
1298 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1300 && flow_bb_inside_loop_p (loop, def_bb))
1309 len = TREE_OPERAND_LENGTH (expr);
1310 for (i = 0; i < len; i++)
1311 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1317 /* Returns true if statement STMT is obviously invariant in LOOP,
1318 i.e. if all its operands on the RHS are defined outside of the LOOP.
1319 LOOP should not be the function body. */
1322 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1327 gcc_assert (loop_depth (loop) > 0);
1329 lhs = gimple_get_lhs (stmt);
1330 for (i = 0; i < gimple_num_ops (stmt); i++)
1332 tree op = gimple_op (stmt, i);
1333 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1340 /* Cumulates the steps of indices into DATA and replaces their values with the
1341 initial ones. Returns false when the value of the index cannot be determined.
1342 Callback for for_each_index. */
1344 struct ifs_ivopts_data
1346 struct ivopts_data *ivopts_data;
1352 idx_find_step (tree base, tree *idx, void *data)
1354 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1356 tree step, iv_base, iv_step, lbound, off;
1357 struct loop *loop = dta->ivopts_data->current_loop;
1359 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1360 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1363 /* If base is a component ref, require that the offset of the reference
1365 if (TREE_CODE (base) == COMPONENT_REF)
1367 off = component_ref_field_offset (base);
1368 return expr_invariant_in_loop_p (loop, off);
1371 /* If base is array, first check whether we will be able to move the
1372 reference out of the loop (in order to take its address in strength
1373 reduction). In order for this to work we need both lower bound
1374 and step to be loop invariants. */
1375 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1377 /* Moreover, for a range, the size needs to be invariant as well. */
1378 if (TREE_CODE (base) == ARRAY_RANGE_REF
1379 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1382 step = array_ref_element_size (base);
1383 lbound = array_ref_low_bound (base);
1385 if (!expr_invariant_in_loop_p (loop, step)
1386 || !expr_invariant_in_loop_p (loop, lbound))
1390 if (TREE_CODE (*idx) != SSA_NAME)
1393 iv = get_iv (dta->ivopts_data, *idx);
1397 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1398 *&x[0], which is not folded and does not trigger the
1399 ARRAY_REF path below. */
1402 if (integer_zerop (iv->step))
1405 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1407 step = array_ref_element_size (base);
1409 /* We only handle addresses whose step is an integer constant. */
1410 if (TREE_CODE (step) != INTEGER_CST)
1414 /* The step for pointer arithmetics already is 1 byte. */
1415 step = build_int_cst (sizetype, 1);
1419 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1420 sizetype, &iv_base, &iv_step, dta->stmt,
1423 /* The index might wrap. */
1427 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1428 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1433 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1434 object is passed to it in DATA. */
1437 idx_record_use (tree base, tree *idx,
1440 struct ivopts_data *data = (struct ivopts_data *) vdata;
1441 find_interesting_uses_op (data, *idx);
1442 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1444 find_interesting_uses_op (data, array_ref_element_size (base));
1445 find_interesting_uses_op (data, array_ref_low_bound (base));
1450 /* If we can prove that TOP = cst * BOT for some constant cst,
1451 store cst to MUL and return true. Otherwise return false.
1452 The returned value is always sign-extended, regardless of the
1453 signedness of TOP and BOT. */
1456 constant_multiple_of (tree top, tree bot, double_int *mul)
1459 enum tree_code code;
1460 double_int res, p0, p1;
1461 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1466 if (operand_equal_p (top, bot, 0))
1468 *mul = double_int_one;
1472 code = TREE_CODE (top);
1476 mby = TREE_OPERAND (top, 1);
1477 if (TREE_CODE (mby) != INTEGER_CST)
1480 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1483 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1489 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1490 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1493 if (code == MINUS_EXPR)
1494 p1 = double_int_neg (p1);
1495 *mul = double_int_sext (double_int_add (p0, p1), precision);
1499 if (TREE_CODE (bot) != INTEGER_CST)
1502 p0 = double_int_sext (tree_to_double_int (top), precision);
1503 p1 = double_int_sext (tree_to_double_int (bot), precision);
1504 if (double_int_zero_p (p1))
1506 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1508 return double_int_zero_p (res);
1515 /* Returns true if memory reference REF with step STEP may be unaligned. */
1518 may_be_unaligned_p (tree ref, tree step)
1522 HOST_WIDE_INT bitsize;
1523 HOST_WIDE_INT bitpos;
1525 enum machine_mode mode;
1526 int unsignedp, volatilep;
1527 unsigned base_align;
1529 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1530 thus they are not misaligned. */
1531 if (TREE_CODE (ref) == TARGET_MEM_REF)
1534 /* The test below is basically copy of what expr.c:normal_inner_ref
1535 does to check whether the object must be loaded by parts when
1536 STRICT_ALIGNMENT is true. */
1537 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1538 &unsignedp, &volatilep, true);
1539 base_type = TREE_TYPE (base);
1540 base_align = TYPE_ALIGN (base_type);
1542 if (mode != BLKmode)
1544 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1546 if (base_align < mode_align
1547 || (bitpos % mode_align) != 0
1548 || (bitpos % BITS_PER_UNIT) != 0)
1552 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1555 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1562 /* Return true if EXPR may be non-addressable. */
1565 may_be_nonaddressable_p (tree expr)
1567 switch (TREE_CODE (expr))
1569 case TARGET_MEM_REF:
1570 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1571 target, thus they are always addressable. */
1575 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1576 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1578 case VIEW_CONVERT_EXPR:
1579 /* This kind of view-conversions may wrap non-addressable objects
1580 and make them look addressable. After some processing the
1581 non-addressability may be uncovered again, causing ADDR_EXPRs
1582 of inappropriate objects to be built. */
1583 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1584 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1587 /* ... fall through ... */
1590 case ARRAY_RANGE_REF:
1591 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1603 /* Finds addresses in *OP_P inside STMT. */
1606 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1608 tree base = *op_p, step = build_int_cst (sizetype, 0);
1610 struct ifs_ivopts_data ifs_ivopts_data;
1612 /* Do not play with volatile memory references. A bit too conservative,
1613 perhaps, but safe. */
1614 if (gimple_has_volatile_ops (stmt))
1617 /* Ignore bitfields for now. Not really something terribly complicated
1619 if (TREE_CODE (base) == BIT_FIELD_REF)
1622 base = unshare_expr (base);
1624 if (TREE_CODE (base) == TARGET_MEM_REF)
1626 tree type = build_pointer_type (TREE_TYPE (base));
1630 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1632 civ = get_iv (data, TMR_BASE (base));
1636 TMR_BASE (base) = civ->base;
1639 if (TMR_INDEX (base)
1640 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1642 civ = get_iv (data, TMR_INDEX (base));
1646 TMR_INDEX (base) = civ->base;
1651 if (TMR_STEP (base))
1652 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1654 step = fold_build2 (PLUS_EXPR, type, step, astep);
1658 if (integer_zerop (step))
1660 base = tree_mem_ref_addr (type, base);
1664 ifs_ivopts_data.ivopts_data = data;
1665 ifs_ivopts_data.stmt = stmt;
1666 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1667 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1668 || integer_zerop (ifs_ivopts_data.step))
1670 step = ifs_ivopts_data.step;
1672 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1673 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1675 /* Check that the base expression is addressable. This needs
1676 to be done after substituting bases of IVs into it. */
1677 if (may_be_nonaddressable_p (base))
1680 /* Moreover, on strict alignment platforms, check that it is
1681 sufficiently aligned. */
1682 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1685 base = build_fold_addr_expr (base);
1687 /* Substituting bases of IVs into the base expression might
1688 have caused folding opportunities. */
1689 if (TREE_CODE (base) == ADDR_EXPR)
1691 tree *ref = &TREE_OPERAND (base, 0);
1692 while (handled_component_p (*ref))
1693 ref = &TREE_OPERAND (*ref, 0);
1694 if (TREE_CODE (*ref) == INDIRECT_REF)
1696 tree tem = gimple_fold_indirect_ref (TREE_OPERAND (*ref, 0));
1703 civ = alloc_iv (base, step);
1704 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1708 for_each_index (op_p, idx_record_use, data);
1711 /* Finds and records invariants used in STMT. */
1714 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1717 use_operand_p use_p;
1720 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1722 op = USE_FROM_PTR (use_p);
1723 record_invariant (data, op, false);
1727 /* Finds interesting uses of induction variables in the statement STMT. */
1730 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1733 tree op, *lhs, *rhs;
1735 use_operand_p use_p;
1736 enum tree_code code;
1738 find_invariants_stmt (data, stmt);
1740 if (gimple_code (stmt) == GIMPLE_COND)
1742 find_interesting_uses_cond (data, stmt);
1746 if (is_gimple_assign (stmt))
1748 lhs = gimple_assign_lhs_ptr (stmt);
1749 rhs = gimple_assign_rhs1_ptr (stmt);
1751 if (TREE_CODE (*lhs) == SSA_NAME)
1753 /* If the statement defines an induction variable, the uses are not
1754 interesting by themselves. */
1756 iv = get_iv (data, *lhs);
1758 if (iv && !integer_zerop (iv->step))
1762 code = gimple_assign_rhs_code (stmt);
1763 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1764 && (REFERENCE_CLASS_P (*rhs)
1765 || is_gimple_val (*rhs)))
1767 if (REFERENCE_CLASS_P (*rhs))
1768 find_interesting_uses_address (data, stmt, rhs);
1770 find_interesting_uses_op (data, *rhs);
1772 if (REFERENCE_CLASS_P (*lhs))
1773 find_interesting_uses_address (data, stmt, lhs);
1776 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1778 find_interesting_uses_cond (data, stmt);
1782 /* TODO -- we should also handle address uses of type
1784 memory = call (whatever);
1791 if (gimple_code (stmt) == GIMPLE_PHI
1792 && gimple_bb (stmt) == data->current_loop->header)
1794 iv = get_iv (data, PHI_RESULT (stmt));
1796 if (iv && !integer_zerop (iv->step))
1800 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1802 op = USE_FROM_PTR (use_p);
1804 if (TREE_CODE (op) != SSA_NAME)
1807 iv = get_iv (data, op);
1811 find_interesting_uses_op (data, op);
1815 /* Finds interesting uses of induction variables outside of loops
1816 on loop exit edge EXIT. */
1819 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1822 gimple_stmt_iterator psi;
1825 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1827 phi = gsi_stmt (psi);
1828 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1829 if (is_gimple_reg (def))
1830 find_interesting_uses_op (data, def);
1834 /* Finds uses of the induction variables that are interesting. */
1837 find_interesting_uses (struct ivopts_data *data)
1840 gimple_stmt_iterator bsi;
1841 basic_block *body = get_loop_body (data->current_loop);
1843 struct version_info *info;
1846 if (dump_file && (dump_flags & TDF_DETAILS))
1847 fprintf (dump_file, "Uses:\n\n");
1849 for (i = 0; i < data->current_loop->num_nodes; i++)
1854 FOR_EACH_EDGE (e, ei, bb->succs)
1855 if (e->dest != EXIT_BLOCK_PTR
1856 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1857 find_interesting_uses_outside (data, e);
1859 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1860 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1861 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1862 if (!is_gimple_debug (gsi_stmt (bsi)))
1863 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1866 if (dump_file && (dump_flags & TDF_DETAILS))
1870 fprintf (dump_file, "\n");
1872 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1874 info = ver_info (data, i);
1877 fprintf (dump_file, " ");
1878 print_generic_expr (dump_file, info->name, TDF_SLIM);
1879 fprintf (dump_file, " is invariant (%d)%s\n",
1880 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1884 fprintf (dump_file, "\n");
1890 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1891 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1892 we are at the top-level of the processed address. */
1895 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1896 unsigned HOST_WIDE_INT *offset)
1898 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1899 enum tree_code code;
1900 tree type, orig_type = TREE_TYPE (expr);
1901 unsigned HOST_WIDE_INT off0, off1, st;
1902 tree orig_expr = expr;
1906 type = TREE_TYPE (expr);
1907 code = TREE_CODE (expr);
1913 if (!cst_and_fits_in_hwi (expr)
1914 || integer_zerop (expr))
1917 *offset = int_cst_value (expr);
1918 return build_int_cst (orig_type, 0);
1920 case POINTER_PLUS_EXPR:
1923 op0 = TREE_OPERAND (expr, 0);
1924 op1 = TREE_OPERAND (expr, 1);
1926 op0 = strip_offset_1 (op0, false, false, &off0);
1927 op1 = strip_offset_1 (op1, false, false, &off1);
1929 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1930 if (op0 == TREE_OPERAND (expr, 0)
1931 && op1 == TREE_OPERAND (expr, 1))
1934 if (integer_zerop (op1))
1936 else if (integer_zerop (op0))
1938 if (code == MINUS_EXPR)
1939 expr = fold_build1 (NEGATE_EXPR, type, op1);
1944 expr = fold_build2 (code, type, op0, op1);
1946 return fold_convert (orig_type, expr);
1949 op1 = TREE_OPERAND (expr, 1);
1950 if (!cst_and_fits_in_hwi (op1))
1953 op0 = TREE_OPERAND (expr, 0);
1954 op0 = strip_offset_1 (op0, false, false, &off0);
1955 if (op0 == TREE_OPERAND (expr, 0))
1958 *offset = off0 * int_cst_value (op1);
1959 if (integer_zerop (op0))
1962 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1964 return fold_convert (orig_type, expr);
1967 case ARRAY_RANGE_REF:
1971 step = array_ref_element_size (expr);
1972 if (!cst_and_fits_in_hwi (step))
1975 st = int_cst_value (step);
1976 op1 = TREE_OPERAND (expr, 1);
1977 op1 = strip_offset_1 (op1, false, false, &off1);
1978 *offset = off1 * st;
1981 && integer_zerop (op1))
1983 /* Strip the component reference completely. */
1984 op0 = TREE_OPERAND (expr, 0);
1985 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1995 tmp = component_ref_field_offset (expr);
1997 && cst_and_fits_in_hwi (tmp))
1999 /* Strip the component reference completely. */
2000 op0 = TREE_OPERAND (expr, 0);
2001 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2002 *offset = off0 + int_cst_value (tmp);
2008 op0 = TREE_OPERAND (expr, 0);
2009 op0 = strip_offset_1 (op0, true, true, &off0);
2012 if (op0 == TREE_OPERAND (expr, 0))
2015 expr = build_fold_addr_expr (op0);
2016 return fold_convert (orig_type, expr);
2019 inside_addr = false;
2026 /* Default handling of expressions for that we want to recurse into
2027 the first operand. */
2028 op0 = TREE_OPERAND (expr, 0);
2029 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2032 if (op0 == TREE_OPERAND (expr, 0)
2033 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2036 expr = copy_node (expr);
2037 TREE_OPERAND (expr, 0) = op0;
2039 TREE_OPERAND (expr, 1) = op1;
2041 /* Inside address, we might strip the top level component references,
2042 thus changing type of the expression. Handling of ADDR_EXPR
2044 expr = fold_convert (orig_type, expr);
2049 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2052 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2054 return strip_offset_1 (expr, false, false, offset);
2057 /* Returns variant of TYPE that can be used as base for different uses.
2058 We return unsigned type with the same precision, which avoids problems
2062 generic_type_for (tree type)
2064 if (POINTER_TYPE_P (type))
2065 return unsigned_type_for (type);
2067 if (TYPE_UNSIGNED (type))
2070 return unsigned_type_for (type);
2073 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2074 the bitmap to that we should store it. */
2076 static struct ivopts_data *fd_ivopts_data;
2078 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2080 bitmap *depends_on = (bitmap *) data;
2081 struct version_info *info;
2083 if (TREE_CODE (*expr_p) != SSA_NAME)
2085 info = name_info (fd_ivopts_data, *expr_p);
2087 if (!info->inv_id || info->has_nonlin_use)
2091 *depends_on = BITMAP_ALLOC (NULL);
2092 bitmap_set_bit (*depends_on, info->inv_id);
2097 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2098 position to POS. If USE is not NULL, the candidate is set as related to
2099 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2100 replacement of the final value of the iv by a direct computation. */
2102 static struct iv_cand *
2103 add_candidate_1 (struct ivopts_data *data,
2104 tree base, tree step, bool important, enum iv_position pos,
2105 struct iv_use *use, gimple incremented_at)
2108 struct iv_cand *cand = NULL;
2109 tree type, orig_type;
2113 orig_type = TREE_TYPE (base);
2114 type = generic_type_for (orig_type);
2115 if (type != orig_type)
2117 base = fold_convert (type, base);
2118 step = fold_convert (type, step);
2122 for (i = 0; i < n_iv_cands (data); i++)
2124 cand = iv_cand (data, i);
2126 if (cand->pos != pos)
2129 if (cand->incremented_at != incremented_at
2130 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2131 && cand->ainc_use != use))
2145 if (operand_equal_p (base, cand->iv->base, 0)
2146 && operand_equal_p (step, cand->iv->step, 0))
2150 if (i == n_iv_cands (data))
2152 cand = XCNEW (struct iv_cand);
2158 cand->iv = alloc_iv (base, step);
2161 if (pos != IP_ORIGINAL && cand->iv)
2163 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2164 cand->var_after = cand->var_before;
2166 cand->important = important;
2167 cand->incremented_at = incremented_at;
2168 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2171 && TREE_CODE (step) != INTEGER_CST)
2173 fd_ivopts_data = data;
2174 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2177 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2178 cand->ainc_use = use;
2180 cand->ainc_use = NULL;
2182 if (dump_file && (dump_flags & TDF_DETAILS))
2183 dump_cand (dump_file, cand);
2186 if (important && !cand->important)
2188 cand->important = true;
2189 if (dump_file && (dump_flags & TDF_DETAILS))
2190 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2195 bitmap_set_bit (use->related_cands, i);
2196 if (dump_file && (dump_flags & TDF_DETAILS))
2197 fprintf (dump_file, "Candidate %d is related to use %d\n",
2204 /* Returns true if incrementing the induction variable at the end of the LOOP
2207 The purpose is to avoid splitting latch edge with a biv increment, thus
2208 creating a jump, possibly confusing other optimization passes and leaving
2209 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2210 is not available (so we do not have a better alternative), or if the latch
2211 edge is already nonempty. */
2214 allow_ip_end_pos_p (struct loop *loop)
2216 if (!ip_normal_pos (loop))
2219 if (!empty_block_p (ip_end_pos (loop)))
2225 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2226 Important field is set to IMPORTANT. */
2229 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2230 bool important, struct iv_use *use)
2232 basic_block use_bb = gimple_bb (use->stmt);
2233 enum machine_mode mem_mode;
2234 unsigned HOST_WIDE_INT cstepi;
2236 /* If we insert the increment in any position other than the standard
2237 ones, we must ensure that it is incremented once per iteration.
2238 It must not be in an inner nested loop, or one side of an if
2240 if (use_bb->loop_father != data->current_loop
2241 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2242 || stmt_could_throw_p (use->stmt)
2243 || !cst_and_fits_in_hwi (step))
2246 cstepi = int_cst_value (step);
2248 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2249 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2250 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2252 enum tree_code code = MINUS_EXPR;
2254 tree new_step = step;
2256 if (POINTER_TYPE_P (TREE_TYPE (base)))
2258 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2259 code = POINTER_PLUS_EXPR;
2262 new_step = fold_convert (TREE_TYPE (base), new_step);
2263 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2264 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2267 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2268 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2270 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2275 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2276 position to POS. If USE is not NULL, the candidate is set as related to
2277 it. The candidate computation is scheduled on all available positions. */
2280 add_candidate (struct ivopts_data *data,
2281 tree base, tree step, bool important, struct iv_use *use)
2283 if (ip_normal_pos (data->current_loop))
2284 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2285 if (ip_end_pos (data->current_loop)
2286 && allow_ip_end_pos_p (data->current_loop))
2287 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2289 if (use != NULL && use->type == USE_ADDRESS)
2290 add_autoinc_candidates (data, base, step, important, use);
2293 /* Add a standard "0 + 1 * iteration" iv candidate for a
2294 type with SIZE bits. */
2297 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2300 tree type = lang_hooks.types.type_for_size (size, true);
2301 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2305 /* Adds standard iv candidates. */
2308 add_standard_iv_candidates (struct ivopts_data *data)
2310 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2312 /* The same for a double-integer type if it is still fast enough. */
2313 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2314 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2318 /* Adds candidates bases on the old induction variable IV. */
2321 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2325 struct iv_cand *cand;
2327 add_candidate (data, iv->base, iv->step, true, NULL);
2329 /* The same, but with initial value zero. */
2330 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2331 add_candidate (data, size_int (0), iv->step, true, NULL);
2333 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2334 iv->step, true, NULL);
2336 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2337 if (gimple_code (phi) == GIMPLE_PHI)
2339 /* Additionally record the possibility of leaving the original iv
2341 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2342 cand = add_candidate_1 (data,
2343 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2344 SSA_NAME_DEF_STMT (def));
2345 cand->var_before = iv->ssa_name;
2346 cand->var_after = def;
2350 /* Adds candidates based on the old induction variables. */
2353 add_old_ivs_candidates (struct ivopts_data *data)
2359 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2361 iv = ver_info (data, i)->iv;
2362 if (iv && iv->biv_p && !integer_zerop (iv->step))
2363 add_old_iv_candidates (data, iv);
2367 /* Adds candidates based on the value of the induction variable IV and USE. */
2370 add_iv_value_candidates (struct ivopts_data *data,
2371 struct iv *iv, struct iv_use *use)
2373 unsigned HOST_WIDE_INT offset;
2377 add_candidate (data, iv->base, iv->step, false, use);
2379 /* The same, but with initial value zero. Make such variable important,
2380 since it is generic enough so that possibly many uses may be based
2382 basetype = TREE_TYPE (iv->base);
2383 if (POINTER_TYPE_P (basetype))
2384 basetype = sizetype;
2385 add_candidate (data, build_int_cst (basetype, 0),
2386 iv->step, true, use);
2388 /* Third, try removing the constant offset. Make sure to even
2389 add a candidate for &a[0] vs. (T *)&a. */
2390 base = strip_offset (iv->base, &offset);
2392 || base != iv->base)
2393 add_candidate (data, base, iv->step, false, use);
2396 /* Adds candidates based on the uses. */
2399 add_derived_ivs_candidates (struct ivopts_data *data)
2403 for (i = 0; i < n_iv_uses (data); i++)
2405 struct iv_use *use = iv_use (data, i);
2412 case USE_NONLINEAR_EXPR:
2415 /* Just add the ivs based on the value of the iv used here. */
2416 add_iv_value_candidates (data, use->iv, use);
2425 /* Record important candidates and add them to related_cands bitmaps
2429 record_important_candidates (struct ivopts_data *data)
2434 for (i = 0; i < n_iv_cands (data); i++)
2436 struct iv_cand *cand = iv_cand (data, i);
2438 if (cand->important)
2439 bitmap_set_bit (data->important_candidates, i);
2442 data->consider_all_candidates = (n_iv_cands (data)
2443 <= CONSIDER_ALL_CANDIDATES_BOUND);
2445 if (data->consider_all_candidates)
2447 /* We will not need "related_cands" bitmaps in this case,
2448 so release them to decrease peak memory consumption. */
2449 for (i = 0; i < n_iv_uses (data); i++)
2451 use = iv_use (data, i);
2452 BITMAP_FREE (use->related_cands);
2457 /* Add important candidates to the related_cands bitmaps. */
2458 for (i = 0; i < n_iv_uses (data); i++)
2459 bitmap_ior_into (iv_use (data, i)->related_cands,
2460 data->important_candidates);
2464 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2465 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2466 we allocate a simple list to every use. */
2469 alloc_use_cost_map (struct ivopts_data *data)
2471 unsigned i, size, s, j;
2473 for (i = 0; i < n_iv_uses (data); i++)
2475 struct iv_use *use = iv_use (data, i);
2478 if (data->consider_all_candidates)
2479 size = n_iv_cands (data);
2483 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2488 /* Round up to the power of two, so that moduling by it is fast. */
2489 for (size = 1; size < s; size <<= 1)
2493 use->n_map_members = size;
2494 use->cost_map = XCNEWVEC (struct cost_pair, size);
2498 /* Returns description of computation cost of expression whose runtime
2499 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2502 new_cost (unsigned runtime, unsigned complexity)
2506 cost.cost = runtime;
2507 cost.complexity = complexity;
2512 /* Adds costs COST1 and COST2. */
2515 add_costs (comp_cost cost1, comp_cost cost2)
2517 cost1.cost += cost2.cost;
2518 cost1.complexity += cost2.complexity;
2522 /* Subtracts costs COST1 and COST2. */
2525 sub_costs (comp_cost cost1, comp_cost cost2)
2527 cost1.cost -= cost2.cost;
2528 cost1.complexity -= cost2.complexity;
2533 /* Returns a negative number if COST1 < COST2, a positive number if
2534 COST1 > COST2, and 0 if COST1 = COST2. */
2537 compare_costs (comp_cost cost1, comp_cost cost2)
2539 if (cost1.cost == cost2.cost)
2540 return cost1.complexity - cost2.complexity;
2542 return cost1.cost - cost2.cost;
2545 /* Returns true if COST is infinite. */
2548 infinite_cost_p (comp_cost cost)
2550 return cost.cost == INFTY;
2553 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2554 on invariants DEPENDS_ON and that the value used in expressing it
2558 set_use_iv_cost (struct ivopts_data *data,
2559 struct iv_use *use, struct iv_cand *cand,
2560 comp_cost cost, bitmap depends_on, tree value)
2564 if (infinite_cost_p (cost))
2566 BITMAP_FREE (depends_on);
2570 if (data->consider_all_candidates)
2572 use->cost_map[cand->id].cand = cand;
2573 use->cost_map[cand->id].cost = cost;
2574 use->cost_map[cand->id].depends_on = depends_on;
2575 use->cost_map[cand->id].value = value;
2579 /* n_map_members is a power of two, so this computes modulo. */
2580 s = cand->id & (use->n_map_members - 1);
2581 for (i = s; i < use->n_map_members; i++)
2582 if (!use->cost_map[i].cand)
2584 for (i = 0; i < s; i++)
2585 if (!use->cost_map[i].cand)
2591 use->cost_map[i].cand = cand;
2592 use->cost_map[i].cost = cost;
2593 use->cost_map[i].depends_on = depends_on;
2594 use->cost_map[i].value = value;
2597 /* Gets cost of (USE, CANDIDATE) pair. */
2599 static struct cost_pair *
2600 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2601 struct iv_cand *cand)
2604 struct cost_pair *ret;
2609 if (data->consider_all_candidates)
2611 ret = use->cost_map + cand->id;
2618 /* n_map_members is a power of two, so this computes modulo. */
2619 s = cand->id & (use->n_map_members - 1);
2620 for (i = s; i < use->n_map_members; i++)
2621 if (use->cost_map[i].cand == cand)
2622 return use->cost_map + i;
2624 for (i = 0; i < s; i++)
2625 if (use->cost_map[i].cand == cand)
2626 return use->cost_map + i;
2631 /* Returns estimate on cost of computing SEQ. */
2634 seq_cost (rtx seq, bool speed)
2639 for (; seq; seq = NEXT_INSN (seq))
2641 set = single_set (seq);
2643 cost += rtx_cost (set, SET,speed);
2651 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2653 produce_memory_decl_rtl (tree obj, int *regno)
2655 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2656 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2660 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2662 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2663 x = gen_rtx_SYMBOL_REF (address_mode, name);
2664 SET_SYMBOL_REF_DECL (x, obj);
2665 x = gen_rtx_MEM (DECL_MODE (obj), x);
2666 set_mem_addr_space (x, as);
2667 targetm.encode_section_info (obj, x, true);
2671 x = gen_raw_REG (address_mode, (*regno)++);
2672 x = gen_rtx_MEM (DECL_MODE (obj), x);
2673 set_mem_addr_space (x, as);
2679 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2680 walk_tree. DATA contains the actual fake register number. */
2683 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2685 tree obj = NULL_TREE;
2687 int *regno = (int *) data;
2689 switch (TREE_CODE (*expr_p))
2692 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2693 handled_component_p (*expr_p);
2694 expr_p = &TREE_OPERAND (*expr_p, 0))
2697 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2698 x = produce_memory_decl_rtl (obj, regno);
2703 obj = SSA_NAME_VAR (*expr_p);
2704 if (!DECL_RTL_SET_P (obj))
2705 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2714 if (DECL_RTL_SET_P (obj))
2717 if (DECL_MODE (obj) == BLKmode)
2718 x = produce_memory_decl_rtl (obj, regno);
2720 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2730 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2731 SET_DECL_RTL (obj, x);
2737 /* Determines cost of the computation of EXPR. */
2740 computation_cost (tree expr, bool speed)
2743 tree type = TREE_TYPE (expr);
2745 /* Avoid using hard regs in ways which may be unsupported. */
2746 int regno = LAST_VIRTUAL_REGISTER + 1;
2747 struct cgraph_node *node = cgraph_node (current_function_decl);
2748 enum node_frequency real_frequency = node->frequency;
2750 node->frequency = NODE_FREQUENCY_NORMAL;
2751 crtl->maybe_hot_insn_p = speed;
2752 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2754 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2757 default_rtl_profile ();
2758 node->frequency = real_frequency;
2760 cost = seq_cost (seq, speed);
2762 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2763 TYPE_ADDR_SPACE (type), speed);
2768 /* Returns variable containing the value of candidate CAND at statement AT. */
2771 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2773 if (stmt_after_increment (loop, cand, stmt))
2774 return cand->var_after;
2776 return cand->var_before;
2779 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2780 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2783 tree_int_cst_sign_bit (const_tree t)
2785 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2786 unsigned HOST_WIDE_INT w;
2788 if (bitno < HOST_BITS_PER_WIDE_INT)
2789 w = TREE_INT_CST_LOW (t);
2792 w = TREE_INT_CST_HIGH (t);
2793 bitno -= HOST_BITS_PER_WIDE_INT;
2796 return (w >> bitno) & 1;
2799 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2800 same precision that is at least as wide as the precision of TYPE, stores
2801 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2805 determine_common_wider_type (tree *a, tree *b)
2807 tree wider_type = NULL;
2809 tree atype = TREE_TYPE (*a);
2811 if (CONVERT_EXPR_P (*a))
2813 suba = TREE_OPERAND (*a, 0);
2814 wider_type = TREE_TYPE (suba);
2815 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2821 if (CONVERT_EXPR_P (*b))
2823 subb = TREE_OPERAND (*b, 0);
2824 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2835 /* Determines the expression by that USE is expressed from induction variable
2836 CAND at statement AT in LOOP. The expression is stored in a decomposed
2837 form into AFF. Returns false if USE cannot be expressed using CAND. */
2840 get_computation_aff (struct loop *loop,
2841 struct iv_use *use, struct iv_cand *cand, gimple at,
2842 struct affine_tree_combination *aff)
2844 tree ubase = use->iv->base;
2845 tree ustep = use->iv->step;
2846 tree cbase = cand->iv->base;
2847 tree cstep = cand->iv->step, cstep_common;
2848 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2849 tree common_type, var;
2851 aff_tree cbase_aff, var_aff;
2854 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2856 /* We do not have a precision to express the values of use. */
2860 var = var_at_stmt (loop, cand, at);
2861 uutype = unsigned_type_for (utype);
2863 /* If the conversion is not noop, perform it. */
2864 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2866 cstep = fold_convert (uutype, cstep);
2867 cbase = fold_convert (uutype, cbase);
2868 var = fold_convert (uutype, var);
2871 if (!constant_multiple_of (ustep, cstep, &rat))
2874 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2875 type, we achieve better folding by computing their difference in this
2876 wider type, and cast the result to UUTYPE. We do not need to worry about
2877 overflows, as all the arithmetics will in the end be performed in UUTYPE
2879 common_type = determine_common_wider_type (&ubase, &cbase);
2881 /* use = ubase - ratio * cbase + ratio * var. */
2882 tree_to_aff_combination (ubase, common_type, aff);
2883 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2884 tree_to_aff_combination (var, uutype, &var_aff);
2886 /* We need to shift the value if we are after the increment. */
2887 if (stmt_after_increment (loop, cand, at))
2891 if (common_type != uutype)
2892 cstep_common = fold_convert (common_type, cstep);
2894 cstep_common = cstep;
2896 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2897 aff_combination_add (&cbase_aff, &cstep_aff);
2900 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2901 aff_combination_add (aff, &cbase_aff);
2902 if (common_type != uutype)
2903 aff_combination_convert (aff, uutype);
2905 aff_combination_scale (&var_aff, rat);
2906 aff_combination_add (aff, &var_aff);
2911 /* Determines the expression by that USE is expressed from induction variable
2912 CAND at statement AT in LOOP. The computation is unshared. */
2915 get_computation_at (struct loop *loop,
2916 struct iv_use *use, struct iv_cand *cand, gimple at)
2919 tree type = TREE_TYPE (use->iv->base);
2921 if (!get_computation_aff (loop, use, cand, at, &aff))
2923 unshare_aff_combination (&aff);
2924 return fold_convert (type, aff_combination_to_tree (&aff));
2927 /* Determines the expression by that USE is expressed from induction variable
2928 CAND in LOOP. The computation is unshared. */
2931 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2933 return get_computation_at (loop, use, cand, use->stmt);
2936 /* Returns cost of addition in MODE. */
2939 add_cost (enum machine_mode mode, bool speed)
2941 static unsigned costs[NUM_MACHINE_MODES];
2949 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2950 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2951 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2956 cost = seq_cost (seq, speed);
2962 if (dump_file && (dump_flags & TDF_DETAILS))
2963 fprintf (dump_file, "Addition in %s costs %d\n",
2964 GET_MODE_NAME (mode), cost);
2968 /* Entry in a hashtable of already known costs for multiplication. */
2971 HOST_WIDE_INT cst; /* The constant to multiply by. */
2972 enum machine_mode mode; /* In mode. */
2973 unsigned cost; /* The cost. */
2976 /* Counts hash value for the ENTRY. */
2979 mbc_entry_hash (const void *entry)
2981 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2983 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2986 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2989 mbc_entry_eq (const void *entry1, const void *entry2)
2991 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2992 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2994 return (e1->mode == e2->mode
2995 && e1->cst == e2->cst);
2998 /* Returns cost of multiplication by constant CST in MODE. */
3001 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3003 static htab_t costs;
3004 struct mbc_entry **cached, act;
3009 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3013 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3015 return (*cached)->cost;
3017 *cached = XNEW (struct mbc_entry);
3018 (*cached)->mode = mode;
3019 (*cached)->cst = cst;
3022 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3023 gen_int_mode (cst, mode), NULL_RTX, 0);
3027 cost = seq_cost (seq, speed);
3029 if (dump_file && (dump_flags & TDF_DETAILS))
3030 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3031 (int) cst, GET_MODE_NAME (mode), cost);
3033 (*cached)->cost = cost;
3038 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3039 validity for a memory reference accessing memory of mode MODE in
3040 address space AS. */
3042 DEF_VEC_P (sbitmap);
3043 DEF_VEC_ALLOC_P (sbitmap, heap);
3046 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3049 #define MAX_RATIO 128
3050 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3051 static VEC (sbitmap, heap) *valid_mult_list;
3054 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3055 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3057 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3060 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3061 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3065 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3066 sbitmap_zero (valid_mult);
3067 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3068 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3070 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3071 if (memory_address_addr_space_p (mode, addr, as))
3072 SET_BIT (valid_mult, i + MAX_RATIO);
3075 if (dump_file && (dump_flags & TDF_DETAILS))
3077 fprintf (dump_file, " allowed multipliers:");
3078 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3079 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3080 fprintf (dump_file, " %d", (int) i);
3081 fprintf (dump_file, "\n");
3082 fprintf (dump_file, "\n");
3085 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3088 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3091 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3094 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3095 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3096 variable is omitted. Compute the cost for a memory reference that accesses
3097 a memory location of mode MEM_MODE in address space AS.
3099 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3100 size of MEM_MODE / RATIO) is available. To make this determination, we
3101 look at the size of the increment to be made, which is given in CSTEP.
3102 CSTEP may be zero if the step is unknown.
3103 STMT_AFTER_INC is true iff the statement we're looking at is after the
3104 increment of the original biv.
3106 TODO -- there must be some better way. This all is quite crude. */
3110 HOST_WIDE_INT min_offset, max_offset;
3111 unsigned costs[2][2][2][2];
3112 } *address_cost_data;
3114 DEF_VEC_P (address_cost_data);
3115 DEF_VEC_ALLOC_P (address_cost_data, heap);
3118 get_address_cost (bool symbol_present, bool var_present,
3119 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3120 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3121 addr_space_t as, bool speed,
3122 bool stmt_after_inc, bool *may_autoinc)
3124 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3125 static VEC(address_cost_data, heap) *address_cost_data_list;
3126 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3127 address_cost_data data;
3128 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3129 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3130 unsigned cost, acost, complexity;
3131 bool offset_p, ratio_p, autoinc;
3132 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3133 unsigned HOST_WIDE_INT mask;
3136 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3137 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3140 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3144 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3145 HOST_WIDE_INT rat, off;
3146 int old_cse_not_expected;
3147 unsigned sym_p, var_p, off_p, rat_p, add_c;
3148 rtx seq, addr, base;
3151 data = (address_cost_data) xcalloc (1, sizeof (*data));
3153 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3155 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3156 for (i = start; i <= 1 << 20; i <<= 1)
3158 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3159 if (!memory_address_addr_space_p (mem_mode, addr, as))
3162 data->max_offset = i == start ? 0 : i >> 1;
3163 off = data->max_offset;
3165 for (i = start; i <= 1 << 20; i <<= 1)
3167 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3168 if (!memory_address_addr_space_p (mem_mode, addr, as))
3171 data->min_offset = i == start ? 0 : -(i >> 1);
3173 if (dump_file && (dump_flags & TDF_DETAILS))
3175 fprintf (dump_file, "get_address_cost:\n");
3176 fprintf (dump_file, " min offset %s %d\n",
3177 GET_MODE_NAME (mem_mode),
3178 (int) data->min_offset);
3179 fprintf (dump_file, " max offset %s %d\n",
3180 GET_MODE_NAME (mem_mode),
3181 (int) data->max_offset);
3185 for (i = 2; i <= MAX_RATIO; i++)
3186 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3192 /* Compute the cost of various addressing modes. */
3194 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3195 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3197 if (HAVE_PRE_DECREMENT)
3199 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3200 has_predec[mem_mode]
3201 = memory_address_addr_space_p (mem_mode, addr, as);
3203 if (HAVE_POST_DECREMENT)
3205 addr = gen_rtx_POST_DEC (address_mode, reg0);
3206 has_postdec[mem_mode]
3207 = memory_address_addr_space_p (mem_mode, addr, as);
3209 if (HAVE_PRE_INCREMENT)
3211 addr = gen_rtx_PRE_INC (address_mode, reg0);
3212 has_preinc[mem_mode]
3213 = memory_address_addr_space_p (mem_mode, addr, as);
3215 if (HAVE_POST_INCREMENT)
3217 addr = gen_rtx_POST_INC (address_mode, reg0);
3218 has_postinc[mem_mode]
3219 = memory_address_addr_space_p (mem_mode, addr, as);
3221 for (i = 0; i < 16; i++)
3224 var_p = (i >> 1) & 1;
3225 off_p = (i >> 2) & 1;
3226 rat_p = (i >> 3) & 1;
3230 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3231 gen_int_mode (rat, address_mode));
3234 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3238 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3239 /* ??? We can run into trouble with some backends by presenting
3240 it with symbols which haven't been properly passed through
3241 targetm.encode_section_info. By setting the local bit, we
3242 enhance the probability of things working. */
3243 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3246 base = gen_rtx_fmt_e (CONST, address_mode,
3248 (PLUS, address_mode, base,
3249 gen_int_mode (off, address_mode)));
3252 base = gen_int_mode (off, address_mode);
3257 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3260 /* To avoid splitting addressing modes, pretend that no cse will
3262 old_cse_not_expected = cse_not_expected;
3263 cse_not_expected = true;
3264 addr = memory_address_addr_space (mem_mode, addr, as);
3265 cse_not_expected = old_cse_not_expected;
3269 acost = seq_cost (seq, speed);
3270 acost += address_cost (addr, mem_mode, as, speed);
3274 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3277 /* On some targets, it is quite expensive to load symbol to a register,
3278 which makes addresses that contain symbols look much more expensive.
3279 However, the symbol will have to be loaded in any case before the
3280 loop (and quite likely we have it in register already), so it does not
3281 make much sense to penalize them too heavily. So make some final
3282 tweaks for the SYMBOL_PRESENT modes:
3284 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3285 var is cheaper, use this mode with small penalty.
3286 If VAR_PRESENT is true, try whether the mode with
3287 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3288 if this is the case, use it. */
3289 add_c = add_cost (address_mode, speed);
3290 for (i = 0; i < 8; i++)
3293 off_p = (i >> 1) & 1;
3294 rat_p = (i >> 2) & 1;
3296 acost = data->costs[0][1][off_p][rat_p] + 1;
3300 if (acost < data->costs[1][var_p][off_p][rat_p])
3301 data->costs[1][var_p][off_p][rat_p] = acost;
3304 if (dump_file && (dump_flags & TDF_DETAILS))
3306 fprintf (dump_file, "Address costs:\n");
3308 for (i = 0; i < 16; i++)
3311 var_p = (i >> 1) & 1;
3312 off_p = (i >> 2) & 1;
3313 rat_p = (i >> 3) & 1;
3315 fprintf (dump_file, " ");
3317 fprintf (dump_file, "sym + ");
3319 fprintf (dump_file, "var + ");
3321 fprintf (dump_file, "cst + ");
3323 fprintf (dump_file, "rat * ");
3325 acost = data->costs[sym_p][var_p][off_p][rat_p];
3326 fprintf (dump_file, "index costs %d\n", acost);
3328 if (has_predec[mem_mode] || has_postdec[mem_mode]
3329 || has_preinc[mem_mode] || has_postinc[mem_mode])
3330 fprintf (dump_file, " May include autoinc/dec\n");
3331 fprintf (dump_file, "\n");
3334 VEC_replace (address_cost_data, address_cost_data_list,
3338 bits = GET_MODE_BITSIZE (address_mode);
3339 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3341 if ((offset >> (bits - 1) & 1))
3346 msize = GET_MODE_SIZE (mem_mode);
3347 autoinc_offset = offset;
3349 autoinc_offset += ratio * cstep;
3350 if (symbol_present || var_present || ratio != 1)
3352 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3354 || (has_postdec[mem_mode] && autoinc_offset == 0
3356 || (has_preinc[mem_mode] && autoinc_offset == msize
3358 || (has_predec[mem_mode] && autoinc_offset == -msize
3359 && msize == -cstep))
3363 offset_p = (s_offset != 0
3364 && data->min_offset <= s_offset
3365 && s_offset <= data->max_offset);
3366 ratio_p = (ratio != 1
3367 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3369 if (ratio != 1 && !ratio_p)
3370 cost += multiply_by_cost (ratio, address_mode, speed);
3372 if (s_offset && !offset_p && !symbol_present)
3373 cost += add_cost (address_mode, speed);
3376 *may_autoinc = autoinc;
3377 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3378 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3379 return new_cost (cost + acost, complexity);
3382 /* Estimates cost of forcing expression EXPR into a variable. */
3385 force_expr_to_var_cost (tree expr, bool speed)
3387 static bool costs_initialized = false;
3388 static unsigned integer_cost [2];
3389 static unsigned symbol_cost [2];
3390 static unsigned address_cost [2];
3392 comp_cost cost0, cost1, cost;
3393 enum machine_mode mode;
3395 if (!costs_initialized)
3397 tree type = build_pointer_type (integer_type_node);
3402 var = create_tmp_var_raw (integer_type_node, "test_var");
3403 TREE_STATIC (var) = 1;
3404 x = produce_memory_decl_rtl (var, NULL);
3405 SET_DECL_RTL (var, x);
3407 addr = build1 (ADDR_EXPR, type, var);
3410 for (i = 0; i < 2; i++)
3412 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3415 symbol_cost[i] = computation_cost (addr, i) + 1;
3418 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3420 build_int_cst (sizetype, 2000)), i) + 1;
3421 if (dump_file && (dump_flags & TDF_DETAILS))
3423 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3424 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3425 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3426 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3427 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3428 fprintf (dump_file, "\n");
3432 costs_initialized = true;
3437 if (SSA_VAR_P (expr))
3440 if (is_gimple_min_invariant (expr))
3442 if (TREE_CODE (expr) == INTEGER_CST)
3443 return new_cost (integer_cost [speed], 0);
3445 if (TREE_CODE (expr) == ADDR_EXPR)
3447 tree obj = TREE_OPERAND (expr, 0);
3449 if (TREE_CODE (obj) == VAR_DECL
3450 || TREE_CODE (obj) == PARM_DECL
3451 || TREE_CODE (obj) == RESULT_DECL)
3452 return new_cost (symbol_cost [speed], 0);
3455 return new_cost (address_cost [speed], 0);
3458 switch (TREE_CODE (expr))
3460 case POINTER_PLUS_EXPR:
3464 op0 = TREE_OPERAND (expr, 0);
3465 op1 = TREE_OPERAND (expr, 1);
3469 if (is_gimple_val (op0))
3472 cost0 = force_expr_to_var_cost (op0, speed);
3474 if (is_gimple_val (op1))
3477 cost1 = force_expr_to_var_cost (op1, speed);
3482 op0 = TREE_OPERAND (expr, 0);
3486 if (is_gimple_val (op0))
3489 cost0 = force_expr_to_var_cost (op0, speed);
3495 /* Just an arbitrary value, FIXME. */
3496 return new_cost (target_spill_cost[speed], 0);
3499 mode = TYPE_MODE (TREE_TYPE (expr));
3500 switch (TREE_CODE (expr))
3502 case POINTER_PLUS_EXPR:
3506 cost = new_cost (add_cost (mode, speed), 0);
3510 if (cst_and_fits_in_hwi (op0))
3511 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3512 else if (cst_and_fits_in_hwi (op1))
3513 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3515 return new_cost (target_spill_cost [speed], 0);
3522 cost = add_costs (cost, cost0);
3523 cost = add_costs (cost, cost1);
3525 /* Bound the cost by target_spill_cost. The parts of complicated
3526 computations often are either loop invariant or at least can
3527 be shared between several iv uses, so letting this grow without
3528 limits would not give reasonable results. */
3529 if (cost.cost > (int) target_spill_cost [speed])
3530 cost.cost = target_spill_cost [speed];
3535 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3536 invariants the computation depends on. */
3539 force_var_cost (struct ivopts_data *data,
3540 tree expr, bitmap *depends_on)
3544 fd_ivopts_data = data;
3545 walk_tree (&expr, find_depends, depends_on, NULL);
3548 return force_expr_to_var_cost (expr, data->speed);
3551 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3552 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3553 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3554 invariants the computation depends on. */
3557 split_address_cost (struct ivopts_data *data,
3558 tree addr, bool *symbol_present, bool *var_present,
3559 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3562 HOST_WIDE_INT bitsize;
3563 HOST_WIDE_INT bitpos;
3565 enum machine_mode mode;
3566 int unsignedp, volatilep;
3568 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3569 &unsignedp, &volatilep, false);
3572 || bitpos % BITS_PER_UNIT != 0
3573 || TREE_CODE (core) != VAR_DECL)
3575 *symbol_present = false;
3576 *var_present = true;
3577 fd_ivopts_data = data;
3578 walk_tree (&addr, find_depends, depends_on, NULL);
3579 return new_cost (target_spill_cost[data->speed], 0);
3582 *offset += bitpos / BITS_PER_UNIT;
3583 if (TREE_STATIC (core)
3584 || DECL_EXTERNAL (core))
3586 *symbol_present = true;
3587 *var_present = false;
3591 *symbol_present = false;
3592 *var_present = true;
3596 /* Estimates cost of expressing difference of addresses E1 - E2 as
3597 var + symbol + offset. The value of offset is added to OFFSET,
3598 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3599 part is missing. DEPENDS_ON is a set of the invariants the computation
3603 ptr_difference_cost (struct ivopts_data *data,
3604 tree e1, tree e2, bool *symbol_present, bool *var_present,
3605 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3607 HOST_WIDE_INT diff = 0;
3608 aff_tree aff_e1, aff_e2;
3611 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3613 if (ptr_difference_const (e1, e2, &diff))
3616 *symbol_present = false;
3617 *var_present = false;
3621 if (integer_zerop (e2))
3622 return split_address_cost (data, TREE_OPERAND (e1, 0),
3623 symbol_present, var_present, offset, depends_on);
3625 *symbol_present = false;
3626 *var_present = true;
3628 type = signed_type_for (TREE_TYPE (e1));
3629 tree_to_aff_combination (e1, type, &aff_e1);
3630 tree_to_aff_combination (e2, type, &aff_e2);
3631 aff_combination_scale (&aff_e2, double_int_minus_one);
3632 aff_combination_add (&aff_e1, &aff_e2);
3634 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3637 /* Estimates cost of expressing difference E1 - E2 as
3638 var + symbol + offset. The value of offset is added to OFFSET,
3639 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3640 part is missing. DEPENDS_ON is a set of the invariants the computation
3644 difference_cost (struct ivopts_data *data,
3645 tree e1, tree e2, bool *symbol_present, bool *var_present,
3646 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3648 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3649 unsigned HOST_WIDE_INT off1, off2;
3650 aff_tree aff_e1, aff_e2;
3653 e1 = strip_offset (e1, &off1);
3654 e2 = strip_offset (e2, &off2);
3655 *offset += off1 - off2;
3660 if (TREE_CODE (e1) == ADDR_EXPR)
3661 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3662 offset, depends_on);
3663 *symbol_present = false;
3665 if (operand_equal_p (e1, e2, 0))
3667 *var_present = false;
3671 *var_present = true;
3673 if (integer_zerop (e2))
3674 return force_var_cost (data, e1, depends_on);
3676 if (integer_zerop (e1))
3678 comp_cost cost = force_var_cost (data, e2, depends_on);
3679 cost.cost += multiply_by_cost (-1, mode, data->speed);
3683 type = signed_type_for (TREE_TYPE (e1));
3684 tree_to_aff_combination (e1, type, &aff_e1);
3685 tree_to_aff_combination (e2, type, &aff_e2);
3686 aff_combination_scale (&aff_e2, double_int_minus_one);
3687 aff_combination_add (&aff_e1, &aff_e2);
3689 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3692 /* Determines the cost of the computation by that USE is expressed
3693 from induction variable CAND. If ADDRESS_P is true, we just need
3694 to create an address from it, otherwise we want to get it into
3695 register. A set of invariants we depend on is stored in
3696 DEPENDS_ON. AT is the statement at that the value is computed.
3697 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3698 addressing is likely. */
3701 get_computation_cost_at (struct ivopts_data *data,
3702 struct iv_use *use, struct iv_cand *cand,
3703 bool address_p, bitmap *depends_on, gimple at,
3706 tree ubase = use->iv->base, ustep = use->iv->step;
3708 tree utype = TREE_TYPE (ubase), ctype;
3709 unsigned HOST_WIDE_INT cstepi, offset = 0;
3710 HOST_WIDE_INT ratio, aratio;
3711 bool var_present, symbol_present, stmt_is_after_inc;
3714 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3718 /* Only consider real candidates. */
3720 return infinite_cost;
3722 cbase = cand->iv->base;
3723 cstep = cand->iv->step;
3724 ctype = TREE_TYPE (cbase);
3726 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3728 /* We do not have a precision to express the values of use. */
3729 return infinite_cost;
3734 /* Do not try to express address of an object with computation based
3735 on address of a different object. This may cause problems in rtl
3736 level alias analysis (that does not expect this to be happening,
3737 as this is illegal in C), and would be unlikely to be useful
3739 if (use->iv->base_object
3740 && cand->iv->base_object
3741 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3742 return infinite_cost;
3745 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3747 /* TODO -- add direct handling of this case. */
3751 /* CSTEPI is removed from the offset in case statement is after the
3752 increment. If the step is not constant, we use zero instead.
3753 This is a bit imprecise (there is the extra addition), but
3754 redundancy elimination is likely to transform the code so that
3755 it uses value of the variable before increment anyway,
3756 so it is not that much unrealistic. */
3757 if (cst_and_fits_in_hwi (cstep))
3758 cstepi = int_cst_value (cstep);
3762 if (!constant_multiple_of (ustep, cstep, &rat))
3763 return infinite_cost;
3765 if (double_int_fits_in_shwi_p (rat))
3766 ratio = double_int_to_shwi (rat);
3768 return infinite_cost;
3771 ctype = TREE_TYPE (cbase);
3773 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3774 or ratio == 1, it is better to handle this like
3776 ubase - ratio * cbase + ratio * var
3778 (also holds in the case ratio == -1, TODO. */
3780 if (cst_and_fits_in_hwi (cbase))
3782 offset = - ratio * int_cst_value (cbase);
3783 cost = difference_cost (data,
3784 ubase, build_int_cst (utype, 0),
3785 &symbol_present, &var_present, &offset,
3788 else if (ratio == 1)
3790 cost = difference_cost (data,
3792 &symbol_present, &var_present, &offset,
3796 && !POINTER_TYPE_P (ctype)
3797 && multiplier_allowed_in_address_p
3798 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3799 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3802 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3803 cost = difference_cost (data,
3805 &symbol_present, &var_present, &offset,
3810 cost = force_var_cost (data, cbase, depends_on);
3811 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3812 cost = add_costs (cost,
3813 difference_cost (data,
3814 ubase, build_int_cst (utype, 0),
3815 &symbol_present, &var_present,
3816 &offset, depends_on));
3819 /* If we are after the increment, the value of the candidate is higher by
3821 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3822 if (stmt_is_after_inc)
3823 offset -= ratio * cstepi;
3825 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3826 (symbol/var1/const parts may be omitted). If we are looking for an
3827 address, find the cost of addressing this. */
3829 return add_costs (cost,
3830 get_address_cost (symbol_present, var_present,
3831 offset, ratio, cstepi,
3832 TYPE_MODE (TREE_TYPE (utype)),
3833 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3834 speed, stmt_is_after_inc,
3837 /* Otherwise estimate the costs for computing the expression. */
3838 if (!symbol_present && !var_present && !offset)
3841 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3845 /* Symbol + offset should be compile-time computable so consider that they
3846 are added once to the variable, if present. */
3847 if (var_present && (symbol_present || offset))
3848 cost.cost += add_cost (TYPE_MODE (ctype), speed)
3849 / AVG_LOOP_NITER (data->current_loop);
3851 /* Having offset does not affect runtime cost in case it is added to
3852 symbol, but it increases complexity. */
3856 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3858 aratio = ratio > 0 ? ratio : -ratio;
3860 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3864 *can_autoinc = false;
3867 /* Just get the expression, expand it and measure the cost. */
3868 tree comp = get_computation_at (data->current_loop, use, cand, at);
3871 return infinite_cost;
3874 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3876 return new_cost (computation_cost (comp, speed), 0);
3880 /* Determines the cost of the computation by that USE is expressed
3881 from induction variable CAND. If ADDRESS_P is true, we just need
3882 to create an address from it, otherwise we want to get it into
3883 register. A set of invariants we depend on is stored in
3884 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3885 autoinc addressing is likely. */
3888 get_computation_cost (struct ivopts_data *data,
3889 struct iv_use *use, struct iv_cand *cand,
3890 bool address_p, bitmap *depends_on, bool *can_autoinc)
3892 return get_computation_cost_at (data,
3893 use, cand, address_p, depends_on, use->stmt,
3897 /* Determines cost of basing replacement of USE on CAND in a generic
3901 determine_use_iv_cost_generic (struct ivopts_data *data,
3902 struct iv_use *use, struct iv_cand *cand)
3907 /* The simple case first -- if we need to express value of the preserved
3908 original biv, the cost is 0. This also prevents us from counting the
3909 cost of increment twice -- once at this use and once in the cost of
3911 if (cand->pos == IP_ORIGINAL
3912 && cand->incremented_at == use->stmt)
3914 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3918 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3919 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3921 return !infinite_cost_p (cost);
3924 /* Determines cost of basing replacement of USE on CAND in an address. */
3927 determine_use_iv_cost_address (struct ivopts_data *data,
3928 struct iv_use *use, struct iv_cand *cand)
3932 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3935 if (cand->ainc_use == use)
3938 cost.cost -= cand->cost_step;
3939 /* If we generated the candidate solely for exploiting autoincrement
3940 opportunities, and it turns out it can't be used, set the cost to
3941 infinity to make sure we ignore it. */
3942 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3943 cost = infinite_cost;
3945 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3947 return !infinite_cost_p (cost);
3950 /* Computes value of candidate CAND at position AT in iteration NITER, and
3951 stores it to VAL. */
3954 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3957 aff_tree step, delta, nit;
3958 struct iv *iv = cand->iv;
3959 tree type = TREE_TYPE (iv->base);
3960 tree steptype = type;
3961 if (POINTER_TYPE_P (type))
3962 steptype = sizetype;
3964 tree_to_aff_combination (iv->step, steptype, &step);
3965 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3966 aff_combination_convert (&nit, steptype);
3967 aff_combination_mult (&nit, &step, &delta);
3968 if (stmt_after_increment (loop, cand, at))
3969 aff_combination_add (&delta, &step);
3971 tree_to_aff_combination (iv->base, type, val);
3972 aff_combination_add (val, &delta);
3975 /* Returns period of induction variable iv. */
3978 iv_period (struct iv *iv)
3980 tree step = iv->step, period, type;
3983 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3985 /* Period of the iv is gcd (step, type range). Since type range is power
3986 of two, it suffices to determine the maximum power of two that divides
3988 pow2div = num_ending_zeros (step);
3989 type = unsigned_type_for (TREE_TYPE (step));
3991 period = build_low_bits_mask (type,
3992 (TYPE_PRECISION (type)
3993 - tree_low_cst (pow2div, 1)));
3998 /* Returns the comparison operator used when eliminating the iv USE. */
4000 static enum tree_code
4001 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4003 struct loop *loop = data->current_loop;
4007 ex_bb = gimple_bb (use->stmt);
4008 exit = EDGE_SUCC (ex_bb, 0);
4009 if (flow_bb_inside_loop_p (loop, exit->dest))
4010 exit = EDGE_SUCC (ex_bb, 1);
4012 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4015 /* Check whether it is possible to express the condition in USE by comparison
4016 of candidate CAND. If so, store the value compared with to BOUND. */
4019 may_eliminate_iv (struct ivopts_data *data,
4020 struct iv_use *use, struct iv_cand *cand, tree *bound)
4025 struct loop *loop = data->current_loop;
4028 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4031 /* For now works only for exits that dominate the loop latch.
4032 TODO: extend to other conditions inside loop body. */
4033 ex_bb = gimple_bb (use->stmt);
4034 if (use->stmt != last_stmt (ex_bb)
4035 || gimple_code (use->stmt) != GIMPLE_COND
4036 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4039 exit = EDGE_SUCC (ex_bb, 0);
4040 if (flow_bb_inside_loop_p (loop, exit->dest))
4041 exit = EDGE_SUCC (ex_bb, 1);
4042 if (flow_bb_inside_loop_p (loop, exit->dest))
4045 nit = niter_for_exit (data, exit);
4049 /* Determine whether we can use the variable to test the exit condition.
4050 This is the case iff the period of the induction variable is greater
4051 than the number of iterations for which the exit condition is true. */
4052 period = iv_period (cand->iv);
4054 /* If the number of iterations is constant, compare against it directly. */
4055 if (TREE_CODE (nit) == INTEGER_CST)
4057 if (!tree_int_cst_lt (nit, period))
4061 /* If not, and if this is the only possible exit of the loop, see whether
4062 we can get a conservative estimate on the number of iterations of the
4063 entire loop and compare against that instead. */
4064 else if (loop_only_exit_p (loop, exit))
4066 double_int period_value, max_niter;
4067 if (!estimated_loop_iterations (loop, true, &max_niter))
4069 period_value = tree_to_double_int (period);
4070 if (double_int_ucmp (max_niter, period_value) >= 0)
4074 /* Otherwise, punt. */
4078 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4080 *bound = aff_combination_to_tree (&bnd);
4081 /* It is unlikely that computing the number of iterations using division
4082 would be more profitable than keeping the original induction variable. */
4083 if (expression_expensive_p (*bound))
4088 /* Determines cost of basing replacement of USE on CAND in a condition. */
4091 determine_use_iv_cost_condition (struct ivopts_data *data,
4092 struct iv_use *use, struct iv_cand *cand)
4094 tree bound = NULL_TREE;
4096 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4097 comp_cost elim_cost, express_cost, cost;
4099 tree *control_var, *bound_cst;
4101 /* Only consider real candidates. */
4104 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4108 /* Try iv elimination. */
4109 if (may_eliminate_iv (data, use, cand, &bound))
4111 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4112 /* The bound is a loop invariant, so it will be only computed
4114 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4117 elim_cost = infinite_cost;
4119 /* Try expressing the original giv. If it is compared with an invariant,
4120 note that we cannot get rid of it. */
4121 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4125 /* When the condition is a comparison of the candidate IV against
4126 zero, prefer this IV.
4128 TODO: The constant that we're substracting from the cost should
4129 be target-dependent. This information should be added to the
4130 target costs for each backend. */
4131 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4132 && integer_zerop (*bound_cst)
4133 && (operand_equal_p (*control_var, cand->var_after, 0)
4134 || operand_equal_p (*control_var, cand->var_before, 0)))
4135 elim_cost.cost -= 1;
4137 express_cost = get_computation_cost (data, use, cand, false,
4138 &depends_on_express, NULL);
4139 fd_ivopts_data = data;
4140 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4142 /* Choose the better approach, preferring the eliminated IV. */
4143 if (compare_costs (elim_cost, express_cost) <= 0)
4146 depends_on = depends_on_elim;
4147 depends_on_elim = NULL;
4151 cost = express_cost;
4152 depends_on = depends_on_express;
4153 depends_on_express = NULL;
4157 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4159 if (depends_on_elim)
4160 BITMAP_FREE (depends_on_elim);
4161 if (depends_on_express)
4162 BITMAP_FREE (depends_on_express);
4164 return !infinite_cost_p (cost);
4167 /* Determines cost of basing replacement of USE on CAND. Returns false
4168 if USE cannot be based on CAND. */
4171 determine_use_iv_cost (struct ivopts_data *data,
4172 struct iv_use *use, struct iv_cand *cand)
4176 case USE_NONLINEAR_EXPR:
4177 return determine_use_iv_cost_generic (data, use, cand);
4180 return determine_use_iv_cost_address (data, use, cand);
4183 return determine_use_iv_cost_condition (data, use, cand);
4190 /* Return true if get_computation_cost indicates that autoincrement is
4191 a possibility for the pair of USE and CAND, false otherwise. */
4194 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4195 struct iv_cand *cand)
4201 if (use->type != USE_ADDRESS)
4204 cost = get_computation_cost (data, use, cand, true, &depends_on,
4207 BITMAP_FREE (depends_on);
4209 return !infinite_cost_p (cost) && can_autoinc;
4212 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4213 use that allows autoincrement, and set their AINC_USE if possible. */
4216 set_autoinc_for_original_candidates (struct ivopts_data *data)
4220 for (i = 0; i < n_iv_cands (data); i++)
4222 struct iv_cand *cand = iv_cand (data, i);
4223 struct iv_use *closest = NULL;
4224 if (cand->pos != IP_ORIGINAL)
4226 for (j = 0; j < n_iv_uses (data); j++)
4228 struct iv_use *use = iv_use (data, j);
4229 unsigned uid = gimple_uid (use->stmt);
4230 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4231 || uid > gimple_uid (cand->incremented_at))
4233 if (closest == NULL || uid > gimple_uid (closest->stmt))
4236 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4238 cand->ainc_use = closest;
4242 /* Finds the candidates for the induction variables. */
4245 find_iv_candidates (struct ivopts_data *data)
4247 /* Add commonly used ivs. */
4248 add_standard_iv_candidates (data);
4250 /* Add old induction variables. */
4251 add_old_ivs_candidates (data);
4253 /* Add induction variables derived from uses. */
4254 add_derived_ivs_candidates (data);
4256 set_autoinc_for_original_candidates (data);
4258 /* Record the important candidates. */
4259 record_important_candidates (data);
4262 /* Determines costs of basing the use of the iv on an iv candidate. */
4265 determine_use_iv_costs (struct ivopts_data *data)
4269 struct iv_cand *cand;
4270 bitmap to_clear = BITMAP_ALLOC (NULL);
4272 alloc_use_cost_map (data);
4274 for (i = 0; i < n_iv_uses (data); i++)
4276 use = iv_use (data, i);
4278 if (data->consider_all_candidates)
4280 for (j = 0; j < n_iv_cands (data); j++)
4282 cand = iv_cand (data, j);
4283 determine_use_iv_cost (data, use, cand);
4290 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4292 cand = iv_cand (data, j);
4293 if (!determine_use_iv_cost (data, use, cand))
4294 bitmap_set_bit (to_clear, j);
4297 /* Remove the candidates for that the cost is infinite from
4298 the list of related candidates. */
4299 bitmap_and_compl_into (use->related_cands, to_clear);
4300 bitmap_clear (to_clear);
4304 BITMAP_FREE (to_clear);
4306 if (dump_file && (dump_flags & TDF_DETAILS))
4308 fprintf (dump_file, "Use-candidate costs:\n");
4310 for (i = 0; i < n_iv_uses (data); i++)
4312 use = iv_use (data, i);
4314 fprintf (dump_file, "Use %d:\n", i);
4315 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4316 for (j = 0; j < use->n_map_members; j++)
4318 if (!use->cost_map[j].cand
4319 || infinite_cost_p (use->cost_map[j].cost))
4322 fprintf (dump_file, " %d\t%d\t%d\t",
4323 use->cost_map[j].cand->id,
4324 use->cost_map[j].cost.cost,
4325 use->cost_map[j].cost.complexity);
4326 if (use->cost_map[j].depends_on)
4327 bitmap_print (dump_file,
4328 use->cost_map[j].depends_on, "","");
4329 fprintf (dump_file, "\n");
4332 fprintf (dump_file, "\n");
4334 fprintf (dump_file, "\n");
4338 /* Determines cost of the candidate CAND. */
4341 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4343 comp_cost cost_base;
4344 unsigned cost, cost_step;
4353 /* There are two costs associated with the candidate -- its increment
4354 and its initialization. The second is almost negligible for any loop
4355 that rolls enough, so we take it just very little into account. */
4357 base = cand->iv->base;
4358 cost_base = force_var_cost (data, base, NULL);
4359 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4361 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4363 /* Prefer the original ivs unless we may gain something by replacing it.
4364 The reason is to make debugging simpler; so this is not relevant for
4365 artificial ivs created by other optimization passes. */
4366 if (cand->pos != IP_ORIGINAL
4367 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4370 /* Prefer not to insert statements into latch unless there are some
4371 already (so that we do not create unnecessary jumps). */
4372 if (cand->pos == IP_END
4373 && empty_block_p (ip_end_pos (data->current_loop)))
4377 cand->cost_step = cost_step;
4380 /* Determines costs of computation of the candidates. */
4383 determine_iv_costs (struct ivopts_data *data)
4387 if (dump_file && (dump_flags & TDF_DETAILS))
4389 fprintf (dump_file, "Candidate costs:\n");
4390 fprintf (dump_file, " cand\tcost\n");
4393 for (i = 0; i < n_iv_cands (data); i++)
4395 struct iv_cand *cand = iv_cand (data, i);
4397 determine_iv_cost (data, cand);
4399 if (dump_file && (dump_flags & TDF_DETAILS))
4400 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4403 if (dump_file && (dump_flags & TDF_DETAILS))
4404 fprintf (dump_file, "\n");
4407 /* Calculates cost for having SIZE induction variables. */
4410 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4412 /* We add size to the cost, so that we prefer eliminating ivs
4414 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4417 /* For each size of the induction variable set determine the penalty. */
4420 determine_set_costs (struct ivopts_data *data)
4424 gimple_stmt_iterator psi;
4426 struct loop *loop = data->current_loop;
4429 /* We use the following model (definitely improvable, especially the
4430 cost function -- TODO):
4432 We estimate the number of registers available (using MD data), name it A.
4434 We estimate the number of registers used by the loop, name it U. This
4435 number is obtained as the number of loop phi nodes (not counting virtual
4436 registers and bivs) + the number of variables from outside of the loop.
4438 We set a reserve R (free regs that are used for temporary computations,
4439 etc.). For now the reserve is a constant 3.
4441 Let I be the number of induction variables.
4443 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4444 make a lot of ivs without a reason).
4445 -- if A - R < U + I <= A, the cost is I * PRES_COST
4446 -- if U + I > A, the cost is I * PRES_COST and
4447 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4449 if (dump_file && (dump_flags & TDF_DETAILS))
4451 fprintf (dump_file, "Global costs:\n");
4452 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4453 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4454 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4458 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4460 phi = gsi_stmt (psi);
4461 op = PHI_RESULT (phi);
4463 if (!is_gimple_reg (op))
4466 if (get_iv (data, op))
4472 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4474 struct version_info *info = ver_info (data, j);
4476 if (info->inv_id && info->has_nonlin_use)
4480 data->regs_used = n;
4481 if (dump_file && (dump_flags & TDF_DETAILS))
4482 fprintf (dump_file, " regs_used %d\n", n);
4484 if (dump_file && (dump_flags & TDF_DETAILS))
4486 fprintf (dump_file, " cost for size:\n");
4487 fprintf (dump_file, " ivs\tcost\n");
4488 for (j = 0; j <= 2 * target_avail_regs; j++)
4489 fprintf (dump_file, " %d\t%d\n", j,
4490 ivopts_global_cost_for_size (data, j));
4491 fprintf (dump_file, "\n");
4495 /* Returns true if A is a cheaper cost pair than B. */
4498 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4508 cmp = compare_costs (a->cost, b->cost);
4515 /* In case the costs are the same, prefer the cheaper candidate. */
4516 if (a->cand->cost < b->cand->cost)
4522 /* Computes the cost field of IVS structure. */
4525 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4527 comp_cost cost = ivs->cand_use_cost;
4528 cost.cost += ivs->cand_cost;
4529 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4534 /* Remove invariants in set INVS to set IVS. */
4537 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4545 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4547 ivs->n_invariant_uses[iid]--;
4548 if (ivs->n_invariant_uses[iid] == 0)
4553 /* Set USE not to be expressed by any candidate in IVS. */
4556 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4559 unsigned uid = use->id, cid;
4560 struct cost_pair *cp;
4562 cp = ivs->cand_for_use[uid];
4568 ivs->cand_for_use[uid] = NULL;
4569 ivs->n_cand_uses[cid]--;
4571 if (ivs->n_cand_uses[cid] == 0)
4573 bitmap_clear_bit (ivs->cands, cid);
4574 /* Do not count the pseudocandidates. */
4578 ivs->cand_cost -= cp->cand->cost;
4580 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4583 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4585 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4586 iv_ca_recount_cost (data, ivs);
4589 /* Add invariants in set INVS to set IVS. */
4592 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4600 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4602 ivs->n_invariant_uses[iid]++;
4603 if (ivs->n_invariant_uses[iid] == 1)
4608 /* Set cost pair for USE in set IVS to CP. */
4611 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4612 struct iv_use *use, struct cost_pair *cp)
4614 unsigned uid = use->id, cid;
4616 if (ivs->cand_for_use[uid] == cp)
4619 if (ivs->cand_for_use[uid])
4620 iv_ca_set_no_cp (data, ivs, use);
4627 ivs->cand_for_use[uid] = cp;
4628 ivs->n_cand_uses[cid]++;
4629 if (ivs->n_cand_uses[cid] == 1)
4631 bitmap_set_bit (ivs->cands, cid);
4632 /* Do not count the pseudocandidates. */
4636 ivs->cand_cost += cp->cand->cost;
4638 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4641 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4642 iv_ca_set_add_invariants (ivs, cp->depends_on);
4643 iv_ca_recount_cost (data, ivs);
4647 /* Extend set IVS by expressing USE by some of the candidates in it
4651 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4654 struct cost_pair *best_cp = NULL, *cp;
4658 gcc_assert (ivs->upto >= use->id);
4660 if (ivs->upto == use->id)
4666 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4668 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4670 if (cheaper_cost_pair (cp, best_cp))
4674 iv_ca_set_cp (data, ivs, use, best_cp);
4677 /* Get cost for assignment IVS. */
4680 iv_ca_cost (struct iv_ca *ivs)
4682 /* This was a conditional expression but it triggered a bug in
4685 return infinite_cost;
4690 /* Returns true if all dependences of CP are among invariants in IVS. */
4693 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4698 if (!cp->depends_on)
4701 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4703 if (ivs->n_invariant_uses[i] == 0)
4710 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4711 it before NEXT_CHANGE. */
4713 static struct iv_ca_delta *
4714 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4715 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4717 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4720 change->old_cp = old_cp;
4721 change->new_cp = new_cp;
4722 change->next_change = next_change;
4727 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4730 static struct iv_ca_delta *
4731 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4733 struct iv_ca_delta *last;
4741 for (last = l1; last->next_change; last = last->next_change)
4743 last->next_change = l2;
4748 /* Returns candidate by that USE is expressed in IVS. */
4750 static struct cost_pair *
4751 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4753 return ivs->cand_for_use[use->id];
4756 /* Reverse the list of changes DELTA, forming the inverse to it. */
4758 static struct iv_ca_delta *
4759 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4761 struct iv_ca_delta *act, *next, *prev = NULL;
4762 struct cost_pair *tmp;
4764 for (act = delta; act; act = next)
4766 next = act->next_change;
4767 act->next_change = prev;
4771 act->old_cp = act->new_cp;
4778 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4779 reverted instead. */
4782 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4783 struct iv_ca_delta *delta, bool forward)
4785 struct cost_pair *from, *to;
4786 struct iv_ca_delta *act;
4789 delta = iv_ca_delta_reverse (delta);
4791 for (act = delta; act; act = act->next_change)
4795 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4796 iv_ca_set_cp (data, ivs, act->use, to);
4800 iv_ca_delta_reverse (delta);
4803 /* Returns true if CAND is used in IVS. */
4806 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4808 return ivs->n_cand_uses[cand->id] > 0;
4811 /* Returns number of induction variable candidates in the set IVS. */
4814 iv_ca_n_cands (struct iv_ca *ivs)
4816 return ivs->n_cands;
4819 /* Free the list of changes DELTA. */
4822 iv_ca_delta_free (struct iv_ca_delta **delta)
4824 struct iv_ca_delta *act, *next;
4826 for (act = *delta; act; act = next)
4828 next = act->next_change;
4835 /* Allocates new iv candidates assignment. */
4837 static struct iv_ca *
4838 iv_ca_new (struct ivopts_data *data)
4840 struct iv_ca *nw = XNEW (struct iv_ca);
4844 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4845 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4846 nw->cands = BITMAP_ALLOC (NULL);
4849 nw->cand_use_cost = zero_cost;
4851 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4852 nw->cost = zero_cost;
4857 /* Free memory occupied by the set IVS. */
4860 iv_ca_free (struct iv_ca **ivs)
4862 free ((*ivs)->cand_for_use);
4863 free ((*ivs)->n_cand_uses);
4864 BITMAP_FREE ((*ivs)->cands);
4865 free ((*ivs)->n_invariant_uses);
4870 /* Dumps IVS to FILE. */
4873 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4875 const char *pref = " invariants ";
4877 comp_cost cost = iv_ca_cost (ivs);
4879 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4880 bitmap_print (file, ivs->cands, " candidates ","\n");
4882 for (i = 1; i <= data->max_inv_id; i++)
4883 if (ivs->n_invariant_uses[i])
4885 fprintf (file, "%s%d", pref, i);
4888 fprintf (file, "\n");
4891 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4892 new set, and store differences in DELTA. Number of induction variables
4893 in the new set is stored to N_IVS. */
4896 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4897 struct iv_cand *cand, struct iv_ca_delta **delta,
4903 struct cost_pair *old_cp, *new_cp;
4906 for (i = 0; i < ivs->upto; i++)
4908 use = iv_use (data, i);
4909 old_cp = iv_ca_cand_for_use (ivs, use);
4912 && old_cp->cand == cand)
4915 new_cp = get_use_iv_cost (data, use, cand);
4919 if (!iv_ca_has_deps (ivs, new_cp))
4922 if (!cheaper_cost_pair (new_cp, old_cp))
4925 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4928 iv_ca_delta_commit (data, ivs, *delta, true);
4929 cost = iv_ca_cost (ivs);
4931 *n_ivs = iv_ca_n_cands (ivs);
4932 iv_ca_delta_commit (data, ivs, *delta, false);
4937 /* Try narrowing set IVS by removing CAND. Return the cost of
4938 the new set and store the differences in DELTA. */
4941 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4942 struct iv_cand *cand, struct iv_ca_delta **delta)
4946 struct cost_pair *old_cp, *new_cp, *cp;
4948 struct iv_cand *cnd;
4952 for (i = 0; i < n_iv_uses (data); i++)
4954 use = iv_use (data, i);
4956 old_cp = iv_ca_cand_for_use (ivs, use);
4957 if (old_cp->cand != cand)
4962 if (data->consider_all_candidates)
4964 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4969 cnd = iv_cand (data, ci);
4971 cp = get_use_iv_cost (data, use, cnd);
4974 if (!iv_ca_has_deps (ivs, cp))
4977 if (!cheaper_cost_pair (cp, new_cp))
4985 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4990 cnd = iv_cand (data, ci);
4992 cp = get_use_iv_cost (data, use, cnd);
4995 if (!iv_ca_has_deps (ivs, cp))
4998 if (!cheaper_cost_pair (cp, new_cp))
5007 iv_ca_delta_free (delta);
5008 return infinite_cost;
5011 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5014 iv_ca_delta_commit (data, ivs, *delta, true);
5015 cost = iv_ca_cost (ivs);
5016 iv_ca_delta_commit (data, ivs, *delta, false);
5021 /* Try optimizing the set of candidates IVS by removing candidates different
5022 from to EXCEPT_CAND from it. Return cost of the new set, and store
5023 differences in DELTA. */
5026 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5027 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5030 struct iv_ca_delta *act_delta, *best_delta;
5032 comp_cost best_cost, acost;
5033 struct iv_cand *cand;
5036 best_cost = iv_ca_cost (ivs);
5038 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5040 cand = iv_cand (data, i);
5042 if (cand == except_cand)
5045 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5047 if (compare_costs (acost, best_cost) < 0)
5050 iv_ca_delta_free (&best_delta);
5051 best_delta = act_delta;
5054 iv_ca_delta_free (&act_delta);
5063 /* Recurse to possibly remove other unnecessary ivs. */
5064 iv_ca_delta_commit (data, ivs, best_delta, true);
5065 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5066 iv_ca_delta_commit (data, ivs, best_delta, false);
5067 *delta = iv_ca_delta_join (best_delta, *delta);
5071 /* Tries to extend the sets IVS in the best possible way in order
5072 to express the USE. */
5075 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5078 comp_cost best_cost, act_cost;
5081 struct iv_cand *cand;
5082 struct iv_ca_delta *best_delta = NULL, *act_delta;
5083 struct cost_pair *cp;
5085 iv_ca_add_use (data, ivs, use);
5086 best_cost = iv_ca_cost (ivs);
5088 cp = iv_ca_cand_for_use (ivs, use);
5091 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5092 iv_ca_set_no_cp (data, ivs, use);
5095 /* First try important candidates not based on any memory object. Only if
5096 this fails, try the specific ones. Rationale -- in loops with many
5097 variables the best choice often is to use just one generic biv. If we
5098 added here many ivs specific to the uses, the optimization algorithm later
5099 would be likely to get stuck in a local minimum, thus causing us to create
5100 too many ivs. The approach from few ivs to more seems more likely to be
5101 successful -- starting from few ivs, replacing an expensive use by a
5102 specific iv should always be a win. */
5103 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5105 cand = iv_cand (data, i);
5107 if (cand->iv->base_object != NULL_TREE)
5110 if (iv_ca_cand_used_p (ivs, cand))
5113 cp = get_use_iv_cost (data, use, cand);
5117 iv_ca_set_cp (data, ivs, use, cp);
5118 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5119 iv_ca_set_no_cp (data, ivs, use);
5120 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5122 if (compare_costs (act_cost, best_cost) < 0)
5124 best_cost = act_cost;
5126 iv_ca_delta_free (&best_delta);
5127 best_delta = act_delta;
5130 iv_ca_delta_free (&act_delta);
5133 if (infinite_cost_p (best_cost))
5135 for (i = 0; i < use->n_map_members; i++)
5137 cp = use->cost_map + i;
5142 /* Already tried this. */
5143 if (cand->important && cand->iv->base_object == NULL_TREE)
5146 if (iv_ca_cand_used_p (ivs, cand))
5150 iv_ca_set_cp (data, ivs, use, cp);
5151 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5152 iv_ca_set_no_cp (data, ivs, use);
5153 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5156 if (compare_costs (act_cost, best_cost) < 0)
5158 best_cost = act_cost;
5161 iv_ca_delta_free (&best_delta);
5162 best_delta = act_delta;
5165 iv_ca_delta_free (&act_delta);
5169 iv_ca_delta_commit (data, ivs, best_delta, true);
5170 iv_ca_delta_free (&best_delta);
5172 return !infinite_cost_p (best_cost);
5175 /* Finds an initial assignment of candidates to uses. */
5177 static struct iv_ca *
5178 get_initial_solution (struct ivopts_data *data)
5180 struct iv_ca *ivs = iv_ca_new (data);
5183 for (i = 0; i < n_iv_uses (data); i++)
5184 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5193 /* Tries to improve set of induction variables IVS. */
5196 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5199 comp_cost acost, best_cost = iv_ca_cost (ivs);
5200 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5201 struct iv_cand *cand;
5203 /* Try extending the set of induction variables by one. */
5204 for (i = 0; i < n_iv_cands (data); i++)
5206 cand = iv_cand (data, i);
5208 if (iv_ca_cand_used_p (ivs, cand))
5211 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5215 /* If we successfully added the candidate and the set is small enough,
5216 try optimizing it by removing other candidates. */
5217 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5219 iv_ca_delta_commit (data, ivs, act_delta, true);
5220 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5221 iv_ca_delta_commit (data, ivs, act_delta, false);
5222 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5225 if (compare_costs (acost, best_cost) < 0)
5228 iv_ca_delta_free (&best_delta);
5229 best_delta = act_delta;
5232 iv_ca_delta_free (&act_delta);
5237 /* Try removing the candidates from the set instead. */
5238 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5240 /* Nothing more we can do. */
5245 iv_ca_delta_commit (data, ivs, best_delta, true);
5246 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5247 iv_ca_delta_free (&best_delta);
5251 /* Attempts to find the optimal set of induction variables. We do simple
5252 greedy heuristic -- we try to replace at most one candidate in the selected
5253 solution and remove the unused ivs while this improves the cost. */
5255 static struct iv_ca *
5256 find_optimal_iv_set (struct ivopts_data *data)
5262 /* Get the initial solution. */
5263 set = get_initial_solution (data);
5266 if (dump_file && (dump_flags & TDF_DETAILS))
5267 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5271 if (dump_file && (dump_flags & TDF_DETAILS))
5273 fprintf (dump_file, "Initial set of candidates:\n");
5274 iv_ca_dump (data, dump_file, set);
5277 while (try_improve_iv_set (data, set))
5279 if (dump_file && (dump_flags & TDF_DETAILS))
5281 fprintf (dump_file, "Improved to:\n");
5282 iv_ca_dump (data, dump_file, set);
5286 if (dump_file && (dump_flags & TDF_DETAILS))
5288 comp_cost cost = iv_ca_cost (set);
5289 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5292 for (i = 0; i < n_iv_uses (data); i++)
5294 use = iv_use (data, i);
5295 use->selected = iv_ca_cand_for_use (set, use)->cand;
5301 /* Creates a new induction variable corresponding to CAND. */
5304 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5306 gimple_stmt_iterator incr_pos;
5316 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5320 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5328 incr_pos = gsi_for_stmt (cand->incremented_at);
5332 /* Mark that the iv is preserved. */
5333 name_info (data, cand->var_before)->preserve_biv = true;
5334 name_info (data, cand->var_after)->preserve_biv = true;
5336 /* Rewrite the increment so that it uses var_before directly. */
5337 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5342 gimple_add_tmp_var (cand->var_before);
5343 add_referenced_var (cand->var_before);
5345 base = unshare_expr (cand->iv->base);
5347 create_iv (base, unshare_expr (cand->iv->step),
5348 cand->var_before, data->current_loop,
5349 &incr_pos, after, &cand->var_before, &cand->var_after);
5352 /* Creates new induction variables described in SET. */
5355 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5358 struct iv_cand *cand;
5361 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5363 cand = iv_cand (data, i);
5364 create_new_iv (data, cand);
5369 /* Rewrites USE (definition of iv used in a nonlinear expression)
5370 using candidate CAND. */
5373 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5374 struct iv_use *use, struct iv_cand *cand)
5379 gimple_stmt_iterator bsi;
5381 /* An important special case -- if we are asked to express value of
5382 the original iv by itself, just exit; there is no need to
5383 introduce a new computation (that might also need casting the
5384 variable to unsigned and back). */
5385 if (cand->pos == IP_ORIGINAL
5386 && cand->incremented_at == use->stmt)
5388 tree step, ctype, utype;
5389 enum tree_code incr_code = PLUS_EXPR, old_code;
5391 gcc_assert (is_gimple_assign (use->stmt));
5392 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5394 step = cand->iv->step;
5395 ctype = TREE_TYPE (step);
5396 utype = TREE_TYPE (cand->var_after);
5397 if (TREE_CODE (step) == NEGATE_EXPR)
5399 incr_code = MINUS_EXPR;
5400 step = TREE_OPERAND (step, 0);
5403 /* Check whether we may leave the computation unchanged.
5404 This is the case only if it does not rely on other
5405 computations in the loop -- otherwise, the computation
5406 we rely upon may be removed in remove_unused_ivs,
5407 thus leading to ICE. */
5408 old_code = gimple_assign_rhs_code (use->stmt);
5409 if (old_code == PLUS_EXPR
5410 || old_code == MINUS_EXPR
5411 || old_code == POINTER_PLUS_EXPR)
5413 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5414 op = gimple_assign_rhs2 (use->stmt);
5415 else if (old_code != MINUS_EXPR
5416 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5417 op = gimple_assign_rhs1 (use->stmt);
5425 && (TREE_CODE (op) == INTEGER_CST
5426 || operand_equal_p (op, step, 0)))
5429 /* Otherwise, add the necessary computations to express
5431 op = fold_convert (ctype, cand->var_before);
5432 comp = fold_convert (utype,
5433 build2 (incr_code, ctype, op,
5434 unshare_expr (step)));
5438 comp = get_computation (data->current_loop, use, cand);
5439 gcc_assert (comp != NULL_TREE);
5442 switch (gimple_code (use->stmt))
5445 tgt = PHI_RESULT (use->stmt);
5447 /* If we should keep the biv, do not replace it. */
5448 if (name_info (data, tgt)->preserve_biv)
5451 bsi = gsi_after_labels (gimple_bb (use->stmt));
5455 tgt = gimple_assign_lhs (use->stmt);
5456 bsi = gsi_for_stmt (use->stmt);
5463 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5464 true, GSI_SAME_STMT);
5466 if (gimple_code (use->stmt) == GIMPLE_PHI)
5468 ass = gimple_build_assign (tgt, op);
5469 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5471 bsi = gsi_for_stmt (use->stmt);
5472 remove_phi_node (&bsi, false);
5476 gimple_assign_set_rhs_from_tree (&bsi, op);
5477 use->stmt = gsi_stmt (bsi);
5481 /* Replaces ssa name in index IDX by its basic variable. Callback for
5485 idx_remove_ssa_names (tree base, tree *idx,
5486 void *data ATTRIBUTE_UNUSED)
5490 if (TREE_CODE (*idx) == SSA_NAME)
5491 *idx = SSA_NAME_VAR (*idx);
5493 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5495 op = &TREE_OPERAND (base, 2);
5497 && TREE_CODE (*op) == SSA_NAME)
5498 *op = SSA_NAME_VAR (*op);
5499 op = &TREE_OPERAND (base, 3);
5501 && TREE_CODE (*op) == SSA_NAME)
5502 *op = SSA_NAME_VAR (*op);
5508 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5511 unshare_and_remove_ssa_names (tree ref)
5513 ref = unshare_expr (ref);
5514 for_each_index (&ref, idx_remove_ssa_names, NULL);
5519 /* Copies the reference information from OLD_REF to NEW_REF. */
5522 copy_ref_info (tree new_ref, tree old_ref)
5524 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5525 copy_mem_ref_info (new_ref, old_ref);
5528 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5529 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5530 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5534 /* Rewrites USE (address that is an iv) using candidate CAND. */
5537 rewrite_use_address (struct ivopts_data *data,
5538 struct iv_use *use, struct iv_cand *cand)
5541 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5542 tree base_hint = NULL_TREE;
5546 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5548 unshare_aff_combination (&aff);
5550 /* To avoid undefined overflow problems, all IV candidates use unsigned
5551 integer types. The drawback is that this makes it impossible for
5552 create_mem_ref to distinguish an IV that is based on a memory object
5553 from one that represents simply an offset.
5555 To work around this problem, we pass a hint to create_mem_ref that
5556 indicates which variable (if any) in aff is an IV based on a memory
5557 object. Note that we only consider the candidate. If this is not
5558 based on an object, the base of the reference is in some subexpression
5559 of the use -- but these will use pointer types, so they are recognized
5560 by the create_mem_ref heuristics anyway. */
5561 if (cand->iv->base_object)
5562 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5564 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5566 copy_ref_info (ref, *use->op_p);
5570 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5574 rewrite_use_compare (struct ivopts_data *data,
5575 struct iv_use *use, struct iv_cand *cand)
5577 tree comp, *var_p, op, bound;
5578 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5579 enum tree_code compare;
5580 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5586 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5587 tree var_type = TREE_TYPE (var);
5590 compare = iv_elimination_compare (data, use);
5591 bound = unshare_expr (fold_convert (var_type, bound));
5592 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5594 gsi_insert_seq_on_edge_immediate (
5595 loop_preheader_edge (data->current_loop),
5598 gimple_cond_set_lhs (use->stmt, var);
5599 gimple_cond_set_code (use->stmt, compare);
5600 gimple_cond_set_rhs (use->stmt, op);
5604 /* The induction variable elimination failed; just express the original
5606 comp = get_computation (data->current_loop, use, cand);
5607 gcc_assert (comp != NULL_TREE);
5609 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5612 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5613 true, GSI_SAME_STMT);
5616 /* Rewrites USE using candidate CAND. */
5619 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5623 case USE_NONLINEAR_EXPR:
5624 rewrite_use_nonlinear_expr (data, use, cand);
5628 rewrite_use_address (data, use, cand);
5632 rewrite_use_compare (data, use, cand);
5639 update_stmt (use->stmt);
5642 /* Rewrite the uses using the selected induction variables. */
5645 rewrite_uses (struct ivopts_data *data)
5648 struct iv_cand *cand;
5651 for (i = 0; i < n_iv_uses (data); i++)
5653 use = iv_use (data, i);
5654 cand = use->selected;
5657 rewrite_use (data, use, cand);
5661 /* Removes the ivs that are not used after rewriting. */
5664 remove_unused_ivs (struct ivopts_data *data)
5668 bitmap toremove = BITMAP_ALLOC (NULL);
5670 /* Figure out an order in which to release SSA DEFs so that we don't
5671 release something that we'd have to propagate into a debug stmt
5673 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5675 struct version_info *info;
5677 info = ver_info (data, j);
5679 && !integer_zerop (info->iv->step)
5681 && !info->iv->have_use_for
5682 && !info->preserve_biv)
5683 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5686 release_defs_bitset (toremove);
5688 BITMAP_FREE (toremove);
5691 /* Frees data allocated by the optimization of a single loop. */
5694 free_loop_data (struct ivopts_data *data)
5702 pointer_map_destroy (data->niters);
5703 data->niters = NULL;
5706 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5708 struct version_info *info;
5710 info = ver_info (data, i);
5714 info->has_nonlin_use = false;
5715 info->preserve_biv = false;
5718 bitmap_clear (data->relevant);
5719 bitmap_clear (data->important_candidates);
5721 for (i = 0; i < n_iv_uses (data); i++)
5723 struct iv_use *use = iv_use (data, i);
5726 BITMAP_FREE (use->related_cands);
5727 for (j = 0; j < use->n_map_members; j++)
5728 if (use->cost_map[j].depends_on)
5729 BITMAP_FREE (use->cost_map[j].depends_on);
5730 free (use->cost_map);
5733 VEC_truncate (iv_use_p, data->iv_uses, 0);
5735 for (i = 0; i < n_iv_cands (data); i++)
5737 struct iv_cand *cand = iv_cand (data, i);
5741 if (cand->depends_on)
5742 BITMAP_FREE (cand->depends_on);
5745 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5747 if (data->version_info_size < num_ssa_names)
5749 data->version_info_size = 2 * num_ssa_names;
5750 free (data->version_info);
5751 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5754 data->max_inv_id = 0;
5756 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5757 SET_DECL_RTL (obj, NULL_RTX);
5759 VEC_truncate (tree, decl_rtl_to_reset, 0);
5762 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5766 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5768 free_loop_data (data);
5769 free (data->version_info);
5770 BITMAP_FREE (data->relevant);
5771 BITMAP_FREE (data->important_candidates);
5773 VEC_free (tree, heap, decl_rtl_to_reset);
5774 VEC_free (iv_use_p, heap, data->iv_uses);
5775 VEC_free (iv_cand_p, heap, data->iv_candidates);
5778 /* Optimizes the LOOP. Returns true if anything changed. */
5781 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5783 bool changed = false;
5784 struct iv_ca *iv_ca;
5788 gcc_assert (!data->niters);
5789 data->current_loop = loop;
5790 data->speed = optimize_loop_for_speed_p (loop);
5792 if (dump_file && (dump_flags & TDF_DETAILS))
5794 fprintf (dump_file, "Processing loop %d\n", loop->num);
5796 exit = single_dom_exit (loop);
5799 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5800 exit->src->index, exit->dest->index);
5801 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5802 fprintf (dump_file, "\n");
5805 fprintf (dump_file, "\n");
5808 body = get_loop_body (loop);
5809 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5812 /* For each ssa name determines whether it behaves as an induction variable
5814 if (!find_induction_variables (data))
5817 /* Finds interesting uses (item 1). */
5818 find_interesting_uses (data);
5819 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5822 /* Finds candidates for the induction variables (item 2). */
5823 find_iv_candidates (data);
5825 /* Calculates the costs (item 3, part 1). */
5826 determine_iv_costs (data);
5827 determine_use_iv_costs (data);
5828 determine_set_costs (data);
5830 /* Find the optimal set of induction variables (item 3, part 2). */
5831 iv_ca = find_optimal_iv_set (data);
5836 /* Create the new induction variables (item 4, part 1). */
5837 create_new_ivs (data, iv_ca);
5838 iv_ca_free (&iv_ca);
5840 /* Rewrite the uses (item 4, part 2). */
5841 rewrite_uses (data);
5843 /* Remove the ivs that are unused after rewriting. */
5844 remove_unused_ivs (data);
5846 /* We have changed the structure of induction variables; it might happen
5847 that definitions in the scev database refer to some of them that were
5852 free_loop_data (data);
5857 /* Main entry point. Optimizes induction variables in loops. */
5860 tree_ssa_iv_optimize (void)
5863 struct ivopts_data data;
5866 tree_ssa_iv_optimize_init (&data);
5868 /* Optimize the loops starting with the innermost ones. */
5869 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5871 if (dump_file && (dump_flags & TDF_DETAILS))
5872 flow_loop_dump (loop, dump_file, NULL, 1);
5874 tree_ssa_iv_optimize_loop (&data, loop);
5877 tree_ssa_iv_optimize_finalize (&data);