1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
79 #include "tree-pass.h"
81 #include "insn-config.h"
83 #include "pointer-set.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
92 #include "tree-ssa-propagate.h"
94 /* FIXME: Expressions are expanded to RTL in this pass to determine the
95 cost of different addressing modes. This should be moved to a TBD
96 interface between the GIMPLE and RTL worlds. */
99 /* The infinite cost. */
100 #define INFTY 10000000
102 /* The expected number of loop iterations. TODO -- use profiling instead of
104 #define AVG_LOOP_NITER(LOOP) 5
107 /* Representation of the induction variable. */
110 tree base; /* Initial value of the iv. */
111 tree base_object; /* A memory object to that the induction variable points. */
112 tree step; /* Step of the iv (constant only). */
113 tree ssa_name; /* The ssa name with the value. */
114 bool biv_p; /* Is it a biv? */
115 bool have_use_for; /* Do we already have a use for it? */
116 unsigned use_id; /* The identifier in the use if it is the case. */
119 /* Per-ssa version information (induction variable descriptions, etc.). */
122 tree name; /* The ssa name. */
123 struct iv *iv; /* Induction variable description. */
124 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
125 an expression that is not an induction variable. */
126 bool preserve_biv; /* For the original biv, whether to preserve it. */
127 unsigned inv_id; /* Id of an invariant. */
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_ADDRESS, /* Use in an address. */
135 USE_COMPARE /* Use is a compare. */
138 /* Cost of a computation. */
141 int cost; /* The runtime cost. */
142 unsigned complexity; /* The estimate of the complexity of the code for
143 the computation (in no concrete units --
144 complexity field should be larger for more
145 complex expressions and addressing modes). */
148 static const comp_cost zero_cost = {0, 0};
149 static const comp_cost infinite_cost = {INFTY, INFTY};
151 /* The candidate - cost pair. */
154 struct iv_cand *cand; /* The candidate. */
155 comp_cost cost; /* The cost. */
156 bitmap depends_on; /* The list of invariants that have to be
158 tree value; /* For final value elimination, the expression for
159 the final value of the iv. For iv elimination,
160 the new bound to compare with. */
166 unsigned id; /* The id of the use. */
167 enum use_type type; /* Type of the use. */
168 struct iv *iv; /* The induction variable it is based on. */
169 gimple stmt; /* Statement in that it occurs. */
170 tree *op_p; /* The place where it occurs. */
171 bitmap related_cands; /* The set of "related" iv candidates, plus the common
174 unsigned n_map_members; /* Number of candidates in the cost_map list. */
175 struct cost_pair *cost_map;
176 /* The costs wrto the iv candidates. */
178 struct iv_cand *selected;
179 /* The selected candidate. */
182 /* The position where the iv is computed. */
185 IP_NORMAL, /* At the end, just before the exit condition. */
186 IP_END, /* At the end of the latch block. */
187 IP_BEFORE_USE, /* Immediately before a specific use. */
188 IP_AFTER_USE, /* Immediately after a specific use. */
189 IP_ORIGINAL /* The original biv. */
192 /* The induction variable candidate. */
195 unsigned id; /* The number of the candidate. */
196 bool important; /* Whether this is an "important" candidate, i.e. such
197 that it should be considered by all uses. */
198 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
199 gimple incremented_at;/* For original biv, the statement where it is
201 tree var_before; /* The variable used for it before increment. */
202 tree var_after; /* The variable used for it after increment. */
203 struct iv *iv; /* The value of the candidate. NULL for
204 "pseudocandidate" used to indicate the possibility
205 to replace the final value of an iv by direct
206 computation of the value. */
207 unsigned cost; /* Cost of the candidate. */
208 unsigned cost_step; /* Cost of the candidate's increment operation. */
209 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
210 where it is incremented. */
211 bitmap depends_on; /* The list of invariants that are used in step of the
215 /* The data used by the induction variable optimizations. */
217 typedef struct iv_use *iv_use_p;
219 DEF_VEC_ALLOC_P(iv_use_p,heap);
221 typedef struct iv_cand *iv_cand_p;
222 DEF_VEC_P(iv_cand_p);
223 DEF_VEC_ALLOC_P(iv_cand_p,heap);
227 /* The currently optimized loop. */
228 struct loop *current_loop;
230 /* Numbers of iterations for all exits of the current loop. */
231 struct pointer_map_t *niters;
233 /* Number of registers used in it. */
236 /* The size of version_info array allocated. */
237 unsigned version_info_size;
239 /* The array of information for the ssa names. */
240 struct version_info *version_info;
242 /* The bitmap of indices in version_info whose value was changed. */
245 /* The uses of induction variables. */
246 VEC(iv_use_p,heap) *iv_uses;
248 /* The candidates. */
249 VEC(iv_cand_p,heap) *iv_candidates;
251 /* A bitmap of important candidates. */
252 bitmap important_candidates;
254 /* The maximum invariant id. */
257 /* Whether to consider just related and important candidates when replacing a
259 bool consider_all_candidates;
261 /* Are we optimizing for speed? */
265 /* An assignment of iv candidates to uses. */
269 /* The number of uses covered by the assignment. */
272 /* Number of uses that cannot be expressed by the candidates in the set. */
275 /* Candidate assigned to a use, together with the related costs. */
276 struct cost_pair **cand_for_use;
278 /* Number of times each candidate is used. */
279 unsigned *n_cand_uses;
281 /* The candidates used. */
284 /* The number of candidates in the set. */
287 /* Total number of registers needed. */
290 /* Total cost of expressing uses. */
291 comp_cost cand_use_cost;
293 /* Total cost of candidates. */
296 /* Number of times each invariant is used. */
297 unsigned *n_invariant_uses;
299 /* Total cost of the assignment. */
303 /* Difference of two iv candidate assignments. */
310 /* An old assignment (for rollback purposes). */
311 struct cost_pair *old_cp;
313 /* A new assignment. */
314 struct cost_pair *new_cp;
316 /* Next change in the list. */
317 struct iv_ca_delta *next_change;
320 /* Bound on number of candidates below that all candidates are considered. */
322 #define CONSIDER_ALL_CANDIDATES_BOUND \
323 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
325 /* If there are more iv occurrences, we just give up (it is quite unlikely that
326 optimizing such a loop would help, and it would take ages). */
328 #define MAX_CONSIDERED_USES \
329 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
331 /* If there are at most this number of ivs in the set, try removing unnecessary
332 ivs from the set always. */
334 #define ALWAYS_PRUNE_CAND_SET_BOUND \
335 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
337 /* The list of trees for that the decl_rtl field must be reset is stored
340 static VEC(tree,heap) *decl_rtl_to_reset;
342 /* Number of uses recorded in DATA. */
344 static inline unsigned
345 n_iv_uses (struct ivopts_data *data)
347 return VEC_length (iv_use_p, data->iv_uses);
350 /* Ith use recorded in DATA. */
352 static inline struct iv_use *
353 iv_use (struct ivopts_data *data, unsigned i)
355 return VEC_index (iv_use_p, data->iv_uses, i);
358 /* Number of candidates recorded in DATA. */
360 static inline unsigned
361 n_iv_cands (struct ivopts_data *data)
363 return VEC_length (iv_cand_p, data->iv_candidates);
366 /* Ith candidate recorded in DATA. */
368 static inline struct iv_cand *
369 iv_cand (struct ivopts_data *data, unsigned i)
371 return VEC_index (iv_cand_p, data->iv_candidates, i);
374 /* The single loop exit if it dominates the latch, NULL otherwise. */
377 single_dom_exit (struct loop *loop)
379 edge exit = single_exit (loop);
384 if (!just_once_each_iteration_p (loop, exit->src))
390 /* Dumps information about the induction variable IV to FILE. */
392 extern void dump_iv (FILE *, struct iv *);
394 dump_iv (FILE *file, struct iv *iv)
398 fprintf (file, "ssa name ");
399 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
400 fprintf (file, "\n");
403 fprintf (file, " type ");
404 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
405 fprintf (file, "\n");
409 fprintf (file, " base ");
410 print_generic_expr (file, iv->base, TDF_SLIM);
411 fprintf (file, "\n");
413 fprintf (file, " step ");
414 print_generic_expr (file, iv->step, TDF_SLIM);
415 fprintf (file, "\n");
419 fprintf (file, " invariant ");
420 print_generic_expr (file, iv->base, TDF_SLIM);
421 fprintf (file, "\n");
426 fprintf (file, " base object ");
427 print_generic_expr (file, iv->base_object, TDF_SLIM);
428 fprintf (file, "\n");
432 fprintf (file, " is a biv\n");
435 /* Dumps information about the USE to FILE. */
437 extern void dump_use (FILE *, struct iv_use *);
439 dump_use (FILE *file, struct iv_use *use)
441 fprintf (file, "use %d\n", use->id);
445 case USE_NONLINEAR_EXPR:
446 fprintf (file, " generic\n");
450 fprintf (file, " address\n");
454 fprintf (file, " compare\n");
461 fprintf (file, " in statement ");
462 print_gimple_stmt (file, use->stmt, 0, 0);
463 fprintf (file, "\n");
465 fprintf (file, " at position ");
467 print_generic_expr (file, *use->op_p, TDF_SLIM);
468 fprintf (file, "\n");
470 dump_iv (file, use->iv);
472 if (use->related_cands)
474 fprintf (file, " related candidates ");
475 dump_bitmap (file, use->related_cands);
479 /* Dumps information about the uses to FILE. */
481 extern void dump_uses (FILE *, struct ivopts_data *);
483 dump_uses (FILE *file, struct ivopts_data *data)
488 for (i = 0; i < n_iv_uses (data); i++)
490 use = iv_use (data, i);
492 dump_use (file, use);
493 fprintf (file, "\n");
497 /* Dumps information about induction variable candidate CAND to FILE. */
499 extern void dump_cand (FILE *, struct iv_cand *);
501 dump_cand (FILE *file, struct iv_cand *cand)
503 struct iv *iv = cand->iv;
505 fprintf (file, "candidate %d%s\n",
506 cand->id, cand->important ? " (important)" : "");
508 if (cand->depends_on)
510 fprintf (file, " depends on ");
511 dump_bitmap (file, cand->depends_on);
516 fprintf (file, " final value replacement\n");
523 fprintf (file, " incremented before exit test\n");
527 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
531 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
535 fprintf (file, " incremented at end\n");
539 fprintf (file, " original biv\n");
546 /* Returns the info for ssa version VER. */
548 static inline struct version_info *
549 ver_info (struct ivopts_data *data, unsigned ver)
551 return data->version_info + ver;
554 /* Returns the info for ssa name NAME. */
556 static inline struct version_info *
557 name_info (struct ivopts_data *data, tree name)
559 return ver_info (data, SSA_NAME_VERSION (name));
562 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
566 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
568 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
572 if (sbb == loop->latch)
578 return stmt == last_stmt (bb);
581 /* Returns true if STMT if after the place where the original induction
582 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
583 if the positions are identical. */
586 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
588 basic_block cand_bb = gimple_bb (cand->incremented_at);
589 basic_block stmt_bb = gimple_bb (stmt);
591 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
594 if (stmt_bb != cand_bb)
598 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
600 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
603 /* Returns true if STMT if after the place where the induction variable
604 CAND is incremented in LOOP. */
607 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
615 return stmt_after_ip_normal_pos (loop, stmt);
619 return stmt_after_inc_pos (cand, stmt, false);
622 return stmt_after_inc_pos (cand, stmt, true);
629 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
632 abnormal_ssa_name_p (tree exp)
637 if (TREE_CODE (exp) != SSA_NAME)
640 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
643 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
644 abnormal phi node. Callback for for_each_index. */
647 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
648 void *data ATTRIBUTE_UNUSED)
650 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
652 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
654 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
658 return !abnormal_ssa_name_p (*index);
661 /* Returns true if EXPR contains a ssa name that occurs in an
662 abnormal phi node. */
665 contains_abnormal_ssa_name_p (tree expr)
668 enum tree_code_class codeclass;
673 code = TREE_CODE (expr);
674 codeclass = TREE_CODE_CLASS (code);
676 if (code == SSA_NAME)
677 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
679 if (code == INTEGER_CST
680 || is_gimple_min_invariant (expr))
683 if (code == ADDR_EXPR)
684 return !for_each_index (&TREE_OPERAND (expr, 0),
685 idx_contains_abnormal_ssa_name_p,
688 if (code == COND_EXPR)
689 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
690 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
691 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
697 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
702 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
714 /* Returns tree describing number of iterations determined from
715 EXIT of DATA->current_loop, or NULL if something goes wrong. */
718 niter_for_exit (struct ivopts_data *data, edge exit)
720 struct tree_niter_desc desc;
726 data->niters = pointer_map_create ();
730 slot = pointer_map_contains (data->niters, exit);
734 /* Try to determine number of iterations. We must know it
735 unconditionally (i.e., without possibility of # of iterations
736 being zero). Also, we cannot safely work with ssa names that
737 appear in phi nodes on abnormal edges, so that we do not create
738 overlapping life ranges for them (PR 27283). */
739 if (number_of_iterations_exit (data->current_loop,
741 && integer_zerop (desc.may_be_zero)
742 && !contains_abnormal_ssa_name_p (desc.niter))
747 *pointer_map_insert (data->niters, exit) = niter;
750 niter = (tree) *slot;
755 /* Returns tree describing number of iterations determined from
756 single dominating exit of DATA->current_loop, or NULL if something
760 niter_for_single_dom_exit (struct ivopts_data *data)
762 edge exit = single_dom_exit (data->current_loop);
767 return niter_for_exit (data, exit);
770 /* Initializes data structures used by the iv optimization pass, stored
774 tree_ssa_iv_optimize_init (struct ivopts_data *data)
776 data->version_info_size = 2 * num_ssa_names;
777 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
778 data->relevant = BITMAP_ALLOC (NULL);
779 data->important_candidates = BITMAP_ALLOC (NULL);
780 data->max_inv_id = 0;
782 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
783 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
784 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
787 /* Returns a memory object to that EXPR points. In case we are able to
788 determine that it does not point to any such object, NULL is returned. */
791 determine_base_object (tree expr)
793 enum tree_code code = TREE_CODE (expr);
796 /* If this is a pointer casted to any type, we need to determine
797 the base object for the pointer; so handle conversions before
798 throwing away non-pointer expressions. */
799 if (CONVERT_EXPR_P (expr))
800 return determine_base_object (TREE_OPERAND (expr, 0));
802 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
811 obj = TREE_OPERAND (expr, 0);
812 base = get_base_address (obj);
817 if (TREE_CODE (base) == MEM_REF)
818 return determine_base_object (TREE_OPERAND (base, 0));
820 return fold_convert (ptr_type_node,
821 build_fold_addr_expr (base));
823 case POINTER_PLUS_EXPR:
824 return determine_base_object (TREE_OPERAND (expr, 0));
828 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
832 return fold_convert (ptr_type_node, expr);
836 /* Allocates an induction variable with given initial value BASE and step STEP
840 alloc_iv (tree base, tree step)
842 struct iv *iv = XCNEW (struct iv);
843 gcc_assert (step != NULL_TREE);
846 iv->base_object = determine_base_object (base);
849 iv->have_use_for = false;
851 iv->ssa_name = NULL_TREE;
856 /* Sets STEP and BASE for induction variable IV. */
859 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
861 struct version_info *info = name_info (data, iv);
863 gcc_assert (!info->iv);
865 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
866 info->iv = alloc_iv (base, step);
867 info->iv->ssa_name = iv;
870 /* Finds induction variable declaration for VAR. */
873 get_iv (struct ivopts_data *data, tree var)
876 tree type = TREE_TYPE (var);
878 if (!POINTER_TYPE_P (type)
879 && !INTEGRAL_TYPE_P (type))
882 if (!name_info (data, var)->iv)
884 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
887 || !flow_bb_inside_loop_p (data->current_loop, bb))
888 set_iv (data, var, var, build_int_cst (type, 0));
891 return name_info (data, var)->iv;
894 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
895 not define a simple affine biv with nonzero step. */
898 determine_biv_step (gimple phi)
900 struct loop *loop = gimple_bb (phi)->loop_father;
901 tree name = PHI_RESULT (phi);
904 if (!is_gimple_reg (name))
907 if (!simple_iv (loop, loop, name, &iv, true))
910 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
913 /* Finds basic ivs. */
916 find_bivs (struct ivopts_data *data)
919 tree step, type, base;
921 struct loop *loop = data->current_loop;
922 gimple_stmt_iterator psi;
924 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
926 phi = gsi_stmt (psi);
928 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
931 step = determine_biv_step (phi);
935 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
936 base = expand_simple_operations (base);
937 if (contains_abnormal_ssa_name_p (base)
938 || contains_abnormal_ssa_name_p (step))
941 type = TREE_TYPE (PHI_RESULT (phi));
942 base = fold_convert (type, base);
945 if (POINTER_TYPE_P (type))
946 step = fold_convert (sizetype, step);
948 step = fold_convert (type, step);
951 set_iv (data, PHI_RESULT (phi), base, step);
958 /* Marks basic ivs. */
961 mark_bivs (struct ivopts_data *data)
965 struct iv *iv, *incr_iv;
966 struct loop *loop = data->current_loop;
968 gimple_stmt_iterator psi;
970 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
972 phi = gsi_stmt (psi);
974 iv = get_iv (data, PHI_RESULT (phi));
978 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
979 incr_iv = get_iv (data, var);
983 /* If the increment is in the subloop, ignore it. */
984 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
985 if (incr_bb->loop_father != data->current_loop
986 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
990 incr_iv->biv_p = true;
994 /* Checks whether STMT defines a linear induction variable and stores its
998 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1001 struct loop *loop = data->current_loop;
1003 iv->base = NULL_TREE;
1004 iv->step = NULL_TREE;
1006 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1009 lhs = gimple_assign_lhs (stmt);
1010 if (TREE_CODE (lhs) != SSA_NAME)
1013 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1015 iv->base = expand_simple_operations (iv->base);
1017 if (contains_abnormal_ssa_name_p (iv->base)
1018 || contains_abnormal_ssa_name_p (iv->step))
1024 /* Finds general ivs in statement STMT. */
1027 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1031 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1034 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1037 /* Finds general ivs in basic block BB. */
1040 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1042 gimple_stmt_iterator bsi;
1044 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1045 find_givs_in_stmt (data, gsi_stmt (bsi));
1048 /* Finds general ivs. */
1051 find_givs (struct ivopts_data *data)
1053 struct loop *loop = data->current_loop;
1054 basic_block *body = get_loop_body_in_dom_order (loop);
1057 for (i = 0; i < loop->num_nodes; i++)
1058 find_givs_in_bb (data, body[i]);
1062 /* For each ssa name defined in LOOP determines whether it is an induction
1063 variable and if so, its initial value and step. */
1066 find_induction_variables (struct ivopts_data *data)
1071 if (!find_bivs (data))
1077 if (dump_file && (dump_flags & TDF_DETAILS))
1079 tree niter = niter_for_single_dom_exit (data);
1083 fprintf (dump_file, " number of iterations ");
1084 print_generic_expr (dump_file, niter, TDF_SLIM);
1085 fprintf (dump_file, "\n\n");
1088 fprintf (dump_file, "Induction variables:\n\n");
1090 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1092 if (ver_info (data, i)->iv)
1093 dump_iv (dump_file, ver_info (data, i)->iv);
1100 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1102 static struct iv_use *
1103 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1104 gimple stmt, enum use_type use_type)
1106 struct iv_use *use = XCNEW (struct iv_use);
1108 use->id = n_iv_uses (data);
1109 use->type = use_type;
1113 use->related_cands = BITMAP_ALLOC (NULL);
1115 /* To avoid showing ssa name in the dumps, if it was not reset by the
1117 iv->ssa_name = NULL_TREE;
1119 if (dump_file && (dump_flags & TDF_DETAILS))
1120 dump_use (dump_file, use);
1122 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1127 /* Checks whether OP is a loop-level invariant and if so, records it.
1128 NONLINEAR_USE is true if the invariant is used in a way we do not
1129 handle specially. */
1132 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1135 struct version_info *info;
1137 if (TREE_CODE (op) != SSA_NAME
1138 || !is_gimple_reg (op))
1141 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1143 && flow_bb_inside_loop_p (data->current_loop, bb))
1146 info = name_info (data, op);
1148 info->has_nonlin_use |= nonlinear_use;
1150 info->inv_id = ++data->max_inv_id;
1151 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1154 /* Checks whether the use OP is interesting and if so, records it. */
1156 static struct iv_use *
1157 find_interesting_uses_op (struct ivopts_data *data, tree op)
1164 if (TREE_CODE (op) != SSA_NAME)
1167 iv = get_iv (data, op);
1171 if (iv->have_use_for)
1173 use = iv_use (data, iv->use_id);
1175 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1179 if (integer_zerop (iv->step))
1181 record_invariant (data, op, true);
1184 iv->have_use_for = true;
1186 civ = XNEW (struct iv);
1189 stmt = SSA_NAME_DEF_STMT (op);
1190 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1191 || is_gimple_assign (stmt));
1193 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1194 iv->use_id = use->id;
1199 /* Given a condition in statement STMT, checks whether it is a compare
1200 of an induction variable and an invariant. If this is the case,
1201 CONTROL_VAR is set to location of the iv, BOUND to the location of
1202 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1203 induction variable descriptions, and true is returned. If this is not
1204 the case, CONTROL_VAR and BOUND are set to the arguments of the
1205 condition and false is returned. */
1208 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1209 tree **control_var, tree **bound,
1210 struct iv **iv_var, struct iv **iv_bound)
1212 /* The objects returned when COND has constant operands. */
1213 static struct iv const_iv;
1215 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1216 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1219 if (gimple_code (stmt) == GIMPLE_COND)
1221 op0 = gimple_cond_lhs_ptr (stmt);
1222 op1 = gimple_cond_rhs_ptr (stmt);
1226 op0 = gimple_assign_rhs1_ptr (stmt);
1227 op1 = gimple_assign_rhs2_ptr (stmt);
1230 zero = integer_zero_node;
1231 const_iv.step = integer_zero_node;
1233 if (TREE_CODE (*op0) == SSA_NAME)
1234 iv0 = get_iv (data, *op0);
1235 if (TREE_CODE (*op1) == SSA_NAME)
1236 iv1 = get_iv (data, *op1);
1238 /* Exactly one of the compared values must be an iv, and the other one must
1243 if (integer_zerop (iv0->step))
1245 /* Control variable may be on the other side. */
1246 tmp_op = op0; op0 = op1; op1 = tmp_op;
1247 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1249 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1253 *control_var = op0;;
1264 /* Checks whether the condition in STMT is interesting and if so,
1268 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1270 tree *var_p, *bound_p;
1271 struct iv *var_iv, *civ;
1273 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1275 find_interesting_uses_op (data, *var_p);
1276 find_interesting_uses_op (data, *bound_p);
1280 civ = XNEW (struct iv);
1282 record_use (data, NULL, civ, stmt, USE_COMPARE);
1285 /* Returns true if expression EXPR is obviously invariant in LOOP,
1286 i.e. if all its operands are defined outside of the LOOP. LOOP
1287 should not be the function body. */
1290 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1295 gcc_assert (loop_depth (loop) > 0);
1297 if (is_gimple_min_invariant (expr))
1300 if (TREE_CODE (expr) == SSA_NAME)
1302 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1304 && flow_bb_inside_loop_p (loop, def_bb))
1313 len = TREE_OPERAND_LENGTH (expr);
1314 for (i = 0; i < len; i++)
1315 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1321 /* Returns true if statement STMT is obviously invariant in LOOP,
1322 i.e. if all its operands on the RHS are defined outside of the LOOP.
1323 LOOP should not be the function body. */
1326 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1331 gcc_assert (loop_depth (loop) > 0);
1333 lhs = gimple_get_lhs (stmt);
1334 for (i = 0; i < gimple_num_ops (stmt); i++)
1336 tree op = gimple_op (stmt, i);
1337 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1344 /* Cumulates the steps of indices into DATA and replaces their values with the
1345 initial ones. Returns false when the value of the index cannot be determined.
1346 Callback for for_each_index. */
1348 struct ifs_ivopts_data
1350 struct ivopts_data *ivopts_data;
1356 idx_find_step (tree base, tree *idx, void *data)
1358 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1360 tree step, iv_base, iv_step, lbound, off;
1361 struct loop *loop = dta->ivopts_data->current_loop;
1363 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1364 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1367 /* If base is a component ref, require that the offset of the reference
1369 if (TREE_CODE (base) == COMPONENT_REF)
1371 off = component_ref_field_offset (base);
1372 return expr_invariant_in_loop_p (loop, off);
1375 /* If base is array, first check whether we will be able to move the
1376 reference out of the loop (in order to take its address in strength
1377 reduction). In order for this to work we need both lower bound
1378 and step to be loop invariants. */
1379 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1381 /* Moreover, for a range, the size needs to be invariant as well. */
1382 if (TREE_CODE (base) == ARRAY_RANGE_REF
1383 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1386 step = array_ref_element_size (base);
1387 lbound = array_ref_low_bound (base);
1389 if (!expr_invariant_in_loop_p (loop, step)
1390 || !expr_invariant_in_loop_p (loop, lbound))
1394 if (TREE_CODE (*idx) != SSA_NAME)
1397 iv = get_iv (dta->ivopts_data, *idx);
1401 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1402 *&x[0], which is not folded and does not trigger the
1403 ARRAY_REF path below. */
1406 if (integer_zerop (iv->step))
1409 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1411 step = array_ref_element_size (base);
1413 /* We only handle addresses whose step is an integer constant. */
1414 if (TREE_CODE (step) != INTEGER_CST)
1418 /* The step for pointer arithmetics already is 1 byte. */
1419 step = build_int_cst (sizetype, 1);
1423 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1424 sizetype, &iv_base, &iv_step, dta->stmt,
1427 /* The index might wrap. */
1431 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1432 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1437 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1438 object is passed to it in DATA. */
1441 idx_record_use (tree base, tree *idx,
1444 struct ivopts_data *data = (struct ivopts_data *) vdata;
1445 find_interesting_uses_op (data, *idx);
1446 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1448 find_interesting_uses_op (data, array_ref_element_size (base));
1449 find_interesting_uses_op (data, array_ref_low_bound (base));
1454 /* If we can prove that TOP = cst * BOT for some constant cst,
1455 store cst to MUL and return true. Otherwise return false.
1456 The returned value is always sign-extended, regardless of the
1457 signedness of TOP and BOT. */
1460 constant_multiple_of (tree top, tree bot, double_int *mul)
1463 enum tree_code code;
1464 double_int res, p0, p1;
1465 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1470 if (operand_equal_p (top, bot, 0))
1472 *mul = double_int_one;
1476 code = TREE_CODE (top);
1480 mby = TREE_OPERAND (top, 1);
1481 if (TREE_CODE (mby) != INTEGER_CST)
1484 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1487 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1493 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1494 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1497 if (code == MINUS_EXPR)
1498 p1 = double_int_neg (p1);
1499 *mul = double_int_sext (double_int_add (p0, p1), precision);
1503 if (TREE_CODE (bot) != INTEGER_CST)
1506 p0 = double_int_sext (tree_to_double_int (top), precision);
1507 p1 = double_int_sext (tree_to_double_int (bot), precision);
1508 if (double_int_zero_p (p1))
1510 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1512 return double_int_zero_p (res);
1519 /* Returns true if memory reference REF with step STEP may be unaligned. */
1522 may_be_unaligned_p (tree ref, tree step)
1526 HOST_WIDE_INT bitsize;
1527 HOST_WIDE_INT bitpos;
1529 enum machine_mode mode;
1530 int unsignedp, volatilep;
1531 unsigned base_align;
1533 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1534 thus they are not misaligned. */
1535 if (TREE_CODE (ref) == TARGET_MEM_REF)
1538 /* The test below is basically copy of what expr.c:normal_inner_ref
1539 does to check whether the object must be loaded by parts when
1540 STRICT_ALIGNMENT is true. */
1541 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1542 &unsignedp, &volatilep, true);
1543 base_type = TREE_TYPE (base);
1544 base_align = TYPE_ALIGN (base_type);
1546 if (mode != BLKmode)
1548 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1550 if (base_align < mode_align
1551 || (bitpos % mode_align) != 0
1552 || (bitpos % BITS_PER_UNIT) != 0)
1556 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1559 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1566 /* Return true if EXPR may be non-addressable. */
1569 may_be_nonaddressable_p (tree expr)
1571 switch (TREE_CODE (expr))
1573 case TARGET_MEM_REF:
1574 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1575 target, thus they are always addressable. */
1579 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1580 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1582 case VIEW_CONVERT_EXPR:
1583 /* This kind of view-conversions may wrap non-addressable objects
1584 and make them look addressable. After some processing the
1585 non-addressability may be uncovered again, causing ADDR_EXPRs
1586 of inappropriate objects to be built. */
1587 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1588 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1591 /* ... fall through ... */
1594 case ARRAY_RANGE_REF:
1595 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1607 /* Finds addresses in *OP_P inside STMT. */
1610 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1612 tree base = *op_p, step = build_int_cst (sizetype, 0);
1614 struct ifs_ivopts_data ifs_ivopts_data;
1616 /* Do not play with volatile memory references. A bit too conservative,
1617 perhaps, but safe. */
1618 if (gimple_has_volatile_ops (stmt))
1621 /* Ignore bitfields for now. Not really something terribly complicated
1623 if (TREE_CODE (base) == BIT_FIELD_REF)
1626 base = unshare_expr (base);
1628 if (TREE_CODE (base) == TARGET_MEM_REF)
1630 tree type = build_pointer_type (TREE_TYPE (base));
1634 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1636 civ = get_iv (data, TMR_BASE (base));
1640 TMR_BASE (base) = civ->base;
1643 if (TMR_INDEX (base)
1644 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1646 civ = get_iv (data, TMR_INDEX (base));
1650 TMR_INDEX (base) = civ->base;
1655 if (TMR_STEP (base))
1656 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1658 step = fold_build2 (PLUS_EXPR, type, step, astep);
1662 if (integer_zerop (step))
1664 base = tree_mem_ref_addr (type, base);
1668 ifs_ivopts_data.ivopts_data = data;
1669 ifs_ivopts_data.stmt = stmt;
1670 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1671 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1672 || integer_zerop (ifs_ivopts_data.step))
1674 step = ifs_ivopts_data.step;
1676 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1677 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1679 /* Check that the base expression is addressable. This needs
1680 to be done after substituting bases of IVs into it. */
1681 if (may_be_nonaddressable_p (base))
1684 /* Moreover, on strict alignment platforms, check that it is
1685 sufficiently aligned. */
1686 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1689 base = build_fold_addr_expr (base);
1691 /* Substituting bases of IVs into the base expression might
1692 have caused folding opportunities. */
1693 if (TREE_CODE (base) == ADDR_EXPR)
1695 tree *ref = &TREE_OPERAND (base, 0);
1696 while (handled_component_p (*ref))
1697 ref = &TREE_OPERAND (*ref, 0);
1698 if (TREE_CODE (*ref) == MEM_REF)
1700 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1701 TREE_OPERAND (*ref, 0),
1702 TREE_OPERAND (*ref, 1));
1709 civ = alloc_iv (base, step);
1710 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1714 for_each_index (op_p, idx_record_use, data);
1717 /* Finds and records invariants used in STMT. */
1720 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1723 use_operand_p use_p;
1726 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1728 op = USE_FROM_PTR (use_p);
1729 record_invariant (data, op, false);
1733 /* Finds interesting uses of induction variables in the statement STMT. */
1736 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1739 tree op, *lhs, *rhs;
1741 use_operand_p use_p;
1742 enum tree_code code;
1744 find_invariants_stmt (data, stmt);
1746 if (gimple_code (stmt) == GIMPLE_COND)
1748 find_interesting_uses_cond (data, stmt);
1752 if (is_gimple_assign (stmt))
1754 lhs = gimple_assign_lhs_ptr (stmt);
1755 rhs = gimple_assign_rhs1_ptr (stmt);
1757 if (TREE_CODE (*lhs) == SSA_NAME)
1759 /* If the statement defines an induction variable, the uses are not
1760 interesting by themselves. */
1762 iv = get_iv (data, *lhs);
1764 if (iv && !integer_zerop (iv->step))
1768 code = gimple_assign_rhs_code (stmt);
1769 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1770 && (REFERENCE_CLASS_P (*rhs)
1771 || is_gimple_val (*rhs)))
1773 if (REFERENCE_CLASS_P (*rhs))
1774 find_interesting_uses_address (data, stmt, rhs);
1776 find_interesting_uses_op (data, *rhs);
1778 if (REFERENCE_CLASS_P (*lhs))
1779 find_interesting_uses_address (data, stmt, lhs);
1782 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1784 find_interesting_uses_cond (data, stmt);
1788 /* TODO -- we should also handle address uses of type
1790 memory = call (whatever);
1797 if (gimple_code (stmt) == GIMPLE_PHI
1798 && gimple_bb (stmt) == data->current_loop->header)
1800 iv = get_iv (data, PHI_RESULT (stmt));
1802 if (iv && !integer_zerop (iv->step))
1806 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1808 op = USE_FROM_PTR (use_p);
1810 if (TREE_CODE (op) != SSA_NAME)
1813 iv = get_iv (data, op);
1817 find_interesting_uses_op (data, op);
1821 /* Finds interesting uses of induction variables outside of loops
1822 on loop exit edge EXIT. */
1825 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1828 gimple_stmt_iterator psi;
1831 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1833 phi = gsi_stmt (psi);
1834 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1835 if (is_gimple_reg (def))
1836 find_interesting_uses_op (data, def);
1840 /* Finds uses of the induction variables that are interesting. */
1843 find_interesting_uses (struct ivopts_data *data)
1846 gimple_stmt_iterator bsi;
1847 basic_block *body = get_loop_body (data->current_loop);
1849 struct version_info *info;
1852 if (dump_file && (dump_flags & TDF_DETAILS))
1853 fprintf (dump_file, "Uses:\n\n");
1855 for (i = 0; i < data->current_loop->num_nodes; i++)
1860 FOR_EACH_EDGE (e, ei, bb->succs)
1861 if (e->dest != EXIT_BLOCK_PTR
1862 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1863 find_interesting_uses_outside (data, e);
1865 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1866 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1867 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1868 if (!is_gimple_debug (gsi_stmt (bsi)))
1869 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1872 if (dump_file && (dump_flags & TDF_DETAILS))
1876 fprintf (dump_file, "\n");
1878 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1880 info = ver_info (data, i);
1883 fprintf (dump_file, " ");
1884 print_generic_expr (dump_file, info->name, TDF_SLIM);
1885 fprintf (dump_file, " is invariant (%d)%s\n",
1886 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1890 fprintf (dump_file, "\n");
1896 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1897 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1898 we are at the top-level of the processed address. */
1901 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1902 unsigned HOST_WIDE_INT *offset)
1904 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1905 enum tree_code code;
1906 tree type, orig_type = TREE_TYPE (expr);
1907 unsigned HOST_WIDE_INT off0, off1, st;
1908 tree orig_expr = expr;
1912 type = TREE_TYPE (expr);
1913 code = TREE_CODE (expr);
1919 if (!cst_and_fits_in_hwi (expr)
1920 || integer_zerop (expr))
1923 *offset = int_cst_value (expr);
1924 return build_int_cst (orig_type, 0);
1926 case POINTER_PLUS_EXPR:
1929 op0 = TREE_OPERAND (expr, 0);
1930 op1 = TREE_OPERAND (expr, 1);
1932 op0 = strip_offset_1 (op0, false, false, &off0);
1933 op1 = strip_offset_1 (op1, false, false, &off1);
1935 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1936 if (op0 == TREE_OPERAND (expr, 0)
1937 && op1 == TREE_OPERAND (expr, 1))
1940 if (integer_zerop (op1))
1942 else if (integer_zerop (op0))
1944 if (code == MINUS_EXPR)
1945 expr = fold_build1 (NEGATE_EXPR, type, op1);
1950 expr = fold_build2 (code, type, op0, op1);
1952 return fold_convert (orig_type, expr);
1955 op1 = TREE_OPERAND (expr, 1);
1956 if (!cst_and_fits_in_hwi (op1))
1959 op0 = TREE_OPERAND (expr, 0);
1960 op0 = strip_offset_1 (op0, false, false, &off0);
1961 if (op0 == TREE_OPERAND (expr, 0))
1964 *offset = off0 * int_cst_value (op1);
1965 if (integer_zerop (op0))
1968 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1970 return fold_convert (orig_type, expr);
1973 case ARRAY_RANGE_REF:
1977 step = array_ref_element_size (expr);
1978 if (!cst_and_fits_in_hwi (step))
1981 st = int_cst_value (step);
1982 op1 = TREE_OPERAND (expr, 1);
1983 op1 = strip_offset_1 (op1, false, false, &off1);
1984 *offset = off1 * st;
1987 && integer_zerop (op1))
1989 /* Strip the component reference completely. */
1990 op0 = TREE_OPERAND (expr, 0);
1991 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2001 tmp = component_ref_field_offset (expr);
2003 && cst_and_fits_in_hwi (tmp))
2005 /* Strip the component reference completely. */
2006 op0 = TREE_OPERAND (expr, 0);
2007 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2008 *offset = off0 + int_cst_value (tmp);
2014 op0 = TREE_OPERAND (expr, 0);
2015 op0 = strip_offset_1 (op0, true, true, &off0);
2018 if (op0 == TREE_OPERAND (expr, 0))
2021 expr = build_fold_addr_expr (op0);
2022 return fold_convert (orig_type, expr);
2025 /* ??? Offset operand? */
2026 inside_addr = false;
2033 /* Default handling of expressions for that we want to recurse into
2034 the first operand. */
2035 op0 = TREE_OPERAND (expr, 0);
2036 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2039 if (op0 == TREE_OPERAND (expr, 0)
2040 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2043 expr = copy_node (expr);
2044 TREE_OPERAND (expr, 0) = op0;
2046 TREE_OPERAND (expr, 1) = op1;
2048 /* Inside address, we might strip the top level component references,
2049 thus changing type of the expression. Handling of ADDR_EXPR
2051 expr = fold_convert (orig_type, expr);
2056 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2059 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2061 return strip_offset_1 (expr, false, false, offset);
2064 /* Returns variant of TYPE that can be used as base for different uses.
2065 We return unsigned type with the same precision, which avoids problems
2069 generic_type_for (tree type)
2071 if (POINTER_TYPE_P (type))
2072 return unsigned_type_for (type);
2074 if (TYPE_UNSIGNED (type))
2077 return unsigned_type_for (type);
2080 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2081 the bitmap to that we should store it. */
2083 static struct ivopts_data *fd_ivopts_data;
2085 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2087 bitmap *depends_on = (bitmap *) data;
2088 struct version_info *info;
2090 if (TREE_CODE (*expr_p) != SSA_NAME)
2092 info = name_info (fd_ivopts_data, *expr_p);
2094 if (!info->inv_id || info->has_nonlin_use)
2098 *depends_on = BITMAP_ALLOC (NULL);
2099 bitmap_set_bit (*depends_on, info->inv_id);
2104 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2105 position to POS. If USE is not NULL, the candidate is set as related to
2106 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2107 replacement of the final value of the iv by a direct computation. */
2109 static struct iv_cand *
2110 add_candidate_1 (struct ivopts_data *data,
2111 tree base, tree step, bool important, enum iv_position pos,
2112 struct iv_use *use, gimple incremented_at)
2115 struct iv_cand *cand = NULL;
2116 tree type, orig_type;
2120 orig_type = TREE_TYPE (base);
2121 type = generic_type_for (orig_type);
2122 if (type != orig_type)
2124 base = fold_convert (type, base);
2125 step = fold_convert (type, step);
2129 for (i = 0; i < n_iv_cands (data); i++)
2131 cand = iv_cand (data, i);
2133 if (cand->pos != pos)
2136 if (cand->incremented_at != incremented_at
2137 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2138 && cand->ainc_use != use))
2152 if (operand_equal_p (base, cand->iv->base, 0)
2153 && operand_equal_p (step, cand->iv->step, 0))
2157 if (i == n_iv_cands (data))
2159 cand = XCNEW (struct iv_cand);
2165 cand->iv = alloc_iv (base, step);
2168 if (pos != IP_ORIGINAL && cand->iv)
2170 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2171 cand->var_after = cand->var_before;
2173 cand->important = important;
2174 cand->incremented_at = incremented_at;
2175 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2178 && TREE_CODE (step) != INTEGER_CST)
2180 fd_ivopts_data = data;
2181 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2184 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2185 cand->ainc_use = use;
2187 cand->ainc_use = NULL;
2189 if (dump_file && (dump_flags & TDF_DETAILS))
2190 dump_cand (dump_file, cand);
2193 if (important && !cand->important)
2195 cand->important = true;
2196 if (dump_file && (dump_flags & TDF_DETAILS))
2197 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2202 bitmap_set_bit (use->related_cands, i);
2203 if (dump_file && (dump_flags & TDF_DETAILS))
2204 fprintf (dump_file, "Candidate %d is related to use %d\n",
2211 /* Returns true if incrementing the induction variable at the end of the LOOP
2214 The purpose is to avoid splitting latch edge with a biv increment, thus
2215 creating a jump, possibly confusing other optimization passes and leaving
2216 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2217 is not available (so we do not have a better alternative), or if the latch
2218 edge is already nonempty. */
2221 allow_ip_end_pos_p (struct loop *loop)
2223 if (!ip_normal_pos (loop))
2226 if (!empty_block_p (ip_end_pos (loop)))
2232 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2233 Important field is set to IMPORTANT. */
2236 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2237 bool important, struct iv_use *use)
2239 basic_block use_bb = gimple_bb (use->stmt);
2240 enum machine_mode mem_mode;
2241 unsigned HOST_WIDE_INT cstepi;
2243 /* If we insert the increment in any position other than the standard
2244 ones, we must ensure that it is incremented once per iteration.
2245 It must not be in an inner nested loop, or one side of an if
2247 if (use_bb->loop_father != data->current_loop
2248 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2249 || stmt_could_throw_p (use->stmt)
2250 || !cst_and_fits_in_hwi (step))
2253 cstepi = int_cst_value (step);
2255 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2256 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2257 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2259 enum tree_code code = MINUS_EXPR;
2261 tree new_step = step;
2263 if (POINTER_TYPE_P (TREE_TYPE (base)))
2265 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2266 code = POINTER_PLUS_EXPR;
2269 new_step = fold_convert (TREE_TYPE (base), new_step);
2270 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2271 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2274 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2275 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2277 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2282 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2283 position to POS. If USE is not NULL, the candidate is set as related to
2284 it. The candidate computation is scheduled on all available positions. */
2287 add_candidate (struct ivopts_data *data,
2288 tree base, tree step, bool important, struct iv_use *use)
2290 if (ip_normal_pos (data->current_loop))
2291 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2292 if (ip_end_pos (data->current_loop)
2293 && allow_ip_end_pos_p (data->current_loop))
2294 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2296 if (use != NULL && use->type == USE_ADDRESS)
2297 add_autoinc_candidates (data, base, step, important, use);
2300 /* Add a standard "0 + 1 * iteration" iv candidate for a
2301 type with SIZE bits. */
2304 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2307 tree type = lang_hooks.types.type_for_size (size, true);
2308 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2312 /* Adds standard iv candidates. */
2315 add_standard_iv_candidates (struct ivopts_data *data)
2317 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2319 /* The same for a double-integer type if it is still fast enough. */
2320 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2321 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2325 /* Adds candidates bases on the old induction variable IV. */
2328 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2332 struct iv_cand *cand;
2334 add_candidate (data, iv->base, iv->step, true, NULL);
2336 /* The same, but with initial value zero. */
2337 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2338 add_candidate (data, size_int (0), iv->step, true, NULL);
2340 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2341 iv->step, true, NULL);
2343 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2344 if (gimple_code (phi) == GIMPLE_PHI)
2346 /* Additionally record the possibility of leaving the original iv
2348 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2349 cand = add_candidate_1 (data,
2350 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2351 SSA_NAME_DEF_STMT (def));
2352 cand->var_before = iv->ssa_name;
2353 cand->var_after = def;
2357 /* Adds candidates based on the old induction variables. */
2360 add_old_ivs_candidates (struct ivopts_data *data)
2366 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2368 iv = ver_info (data, i)->iv;
2369 if (iv && iv->biv_p && !integer_zerop (iv->step))
2370 add_old_iv_candidates (data, iv);
2374 /* Adds candidates based on the value of the induction variable IV and USE. */
2377 add_iv_value_candidates (struct ivopts_data *data,
2378 struct iv *iv, struct iv_use *use)
2380 unsigned HOST_WIDE_INT offset;
2384 add_candidate (data, iv->base, iv->step, false, use);
2386 /* The same, but with initial value zero. Make such variable important,
2387 since it is generic enough so that possibly many uses may be based
2389 basetype = TREE_TYPE (iv->base);
2390 if (POINTER_TYPE_P (basetype))
2391 basetype = sizetype;
2392 add_candidate (data, build_int_cst (basetype, 0),
2393 iv->step, true, use);
2395 /* Third, try removing the constant offset. Make sure to even
2396 add a candidate for &a[0] vs. (T *)&a. */
2397 base = strip_offset (iv->base, &offset);
2399 || base != iv->base)
2400 add_candidate (data, base, iv->step, false, use);
2403 /* Adds candidates based on the uses. */
2406 add_derived_ivs_candidates (struct ivopts_data *data)
2410 for (i = 0; i < n_iv_uses (data); i++)
2412 struct iv_use *use = iv_use (data, i);
2419 case USE_NONLINEAR_EXPR:
2422 /* Just add the ivs based on the value of the iv used here. */
2423 add_iv_value_candidates (data, use->iv, use);
2432 /* Record important candidates and add them to related_cands bitmaps
2436 record_important_candidates (struct ivopts_data *data)
2441 for (i = 0; i < n_iv_cands (data); i++)
2443 struct iv_cand *cand = iv_cand (data, i);
2445 if (cand->important)
2446 bitmap_set_bit (data->important_candidates, i);
2449 data->consider_all_candidates = (n_iv_cands (data)
2450 <= CONSIDER_ALL_CANDIDATES_BOUND);
2452 if (data->consider_all_candidates)
2454 /* We will not need "related_cands" bitmaps in this case,
2455 so release them to decrease peak memory consumption. */
2456 for (i = 0; i < n_iv_uses (data); i++)
2458 use = iv_use (data, i);
2459 BITMAP_FREE (use->related_cands);
2464 /* Add important candidates to the related_cands bitmaps. */
2465 for (i = 0; i < n_iv_uses (data); i++)
2466 bitmap_ior_into (iv_use (data, i)->related_cands,
2467 data->important_candidates);
2471 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2472 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2473 we allocate a simple list to every use. */
2476 alloc_use_cost_map (struct ivopts_data *data)
2478 unsigned i, size, s, j;
2480 for (i = 0; i < n_iv_uses (data); i++)
2482 struct iv_use *use = iv_use (data, i);
2485 if (data->consider_all_candidates)
2486 size = n_iv_cands (data);
2490 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2495 /* Round up to the power of two, so that moduling by it is fast. */
2496 for (size = 1; size < s; size <<= 1)
2500 use->n_map_members = size;
2501 use->cost_map = XCNEWVEC (struct cost_pair, size);
2505 /* Returns description of computation cost of expression whose runtime
2506 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2509 new_cost (unsigned runtime, unsigned complexity)
2513 cost.cost = runtime;
2514 cost.complexity = complexity;
2519 /* Adds costs COST1 and COST2. */
2522 add_costs (comp_cost cost1, comp_cost cost2)
2524 cost1.cost += cost2.cost;
2525 cost1.complexity += cost2.complexity;
2529 /* Subtracts costs COST1 and COST2. */
2532 sub_costs (comp_cost cost1, comp_cost cost2)
2534 cost1.cost -= cost2.cost;
2535 cost1.complexity -= cost2.complexity;
2540 /* Returns a negative number if COST1 < COST2, a positive number if
2541 COST1 > COST2, and 0 if COST1 = COST2. */
2544 compare_costs (comp_cost cost1, comp_cost cost2)
2546 if (cost1.cost == cost2.cost)
2547 return cost1.complexity - cost2.complexity;
2549 return cost1.cost - cost2.cost;
2552 /* Returns true if COST is infinite. */
2555 infinite_cost_p (comp_cost cost)
2557 return cost.cost == INFTY;
2560 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2561 on invariants DEPENDS_ON and that the value used in expressing it
2565 set_use_iv_cost (struct ivopts_data *data,
2566 struct iv_use *use, struct iv_cand *cand,
2567 comp_cost cost, bitmap depends_on, tree value)
2571 if (infinite_cost_p (cost))
2573 BITMAP_FREE (depends_on);
2577 if (data->consider_all_candidates)
2579 use->cost_map[cand->id].cand = cand;
2580 use->cost_map[cand->id].cost = cost;
2581 use->cost_map[cand->id].depends_on = depends_on;
2582 use->cost_map[cand->id].value = value;
2586 /* n_map_members is a power of two, so this computes modulo. */
2587 s = cand->id & (use->n_map_members - 1);
2588 for (i = s; i < use->n_map_members; i++)
2589 if (!use->cost_map[i].cand)
2591 for (i = 0; i < s; i++)
2592 if (!use->cost_map[i].cand)
2598 use->cost_map[i].cand = cand;
2599 use->cost_map[i].cost = cost;
2600 use->cost_map[i].depends_on = depends_on;
2601 use->cost_map[i].value = value;
2604 /* Gets cost of (USE, CANDIDATE) pair. */
2606 static struct cost_pair *
2607 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2608 struct iv_cand *cand)
2611 struct cost_pair *ret;
2616 if (data->consider_all_candidates)
2618 ret = use->cost_map + cand->id;
2625 /* n_map_members is a power of two, so this computes modulo. */
2626 s = cand->id & (use->n_map_members - 1);
2627 for (i = s; i < use->n_map_members; i++)
2628 if (use->cost_map[i].cand == cand)
2629 return use->cost_map + i;
2631 for (i = 0; i < s; i++)
2632 if (use->cost_map[i].cand == cand)
2633 return use->cost_map + i;
2638 /* Returns estimate on cost of computing SEQ. */
2641 seq_cost (rtx seq, bool speed)
2646 for (; seq; seq = NEXT_INSN (seq))
2648 set = single_set (seq);
2650 cost += rtx_cost (set, SET,speed);
2658 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2660 produce_memory_decl_rtl (tree obj, int *regno)
2662 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2663 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2667 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2669 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2670 x = gen_rtx_SYMBOL_REF (address_mode, name);
2671 SET_SYMBOL_REF_DECL (x, obj);
2672 x = gen_rtx_MEM (DECL_MODE (obj), x);
2673 set_mem_addr_space (x, as);
2674 targetm.encode_section_info (obj, x, true);
2678 x = gen_raw_REG (address_mode, (*regno)++);
2679 x = gen_rtx_MEM (DECL_MODE (obj), x);
2680 set_mem_addr_space (x, as);
2686 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2687 walk_tree. DATA contains the actual fake register number. */
2690 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2692 tree obj = NULL_TREE;
2694 int *regno = (int *) data;
2696 switch (TREE_CODE (*expr_p))
2699 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2700 handled_component_p (*expr_p);
2701 expr_p = &TREE_OPERAND (*expr_p, 0))
2704 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2705 x = produce_memory_decl_rtl (obj, regno);
2710 obj = SSA_NAME_VAR (*expr_p);
2711 if (!DECL_RTL_SET_P (obj))
2712 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2721 if (DECL_RTL_SET_P (obj))
2724 if (DECL_MODE (obj) == BLKmode)
2725 x = produce_memory_decl_rtl (obj, regno);
2727 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2737 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2738 SET_DECL_RTL (obj, x);
2744 /* Determines cost of the computation of EXPR. */
2747 computation_cost (tree expr, bool speed)
2750 tree type = TREE_TYPE (expr);
2752 /* Avoid using hard regs in ways which may be unsupported. */
2753 int regno = LAST_VIRTUAL_REGISTER + 1;
2754 struct cgraph_node *node = cgraph_node (current_function_decl);
2755 enum node_frequency real_frequency = node->frequency;
2757 node->frequency = NODE_FREQUENCY_NORMAL;
2758 crtl->maybe_hot_insn_p = speed;
2759 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2761 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2764 default_rtl_profile ();
2765 node->frequency = real_frequency;
2767 cost = seq_cost (seq, speed);
2769 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2770 TYPE_ADDR_SPACE (type), speed);
2775 /* Returns variable containing the value of candidate CAND at statement AT. */
2778 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2780 if (stmt_after_increment (loop, cand, stmt))
2781 return cand->var_after;
2783 return cand->var_before;
2786 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2787 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2790 tree_int_cst_sign_bit (const_tree t)
2792 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2793 unsigned HOST_WIDE_INT w;
2795 if (bitno < HOST_BITS_PER_WIDE_INT)
2796 w = TREE_INT_CST_LOW (t);
2799 w = TREE_INT_CST_HIGH (t);
2800 bitno -= HOST_BITS_PER_WIDE_INT;
2803 return (w >> bitno) & 1;
2806 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2807 same precision that is at least as wide as the precision of TYPE, stores
2808 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2812 determine_common_wider_type (tree *a, tree *b)
2814 tree wider_type = NULL;
2816 tree atype = TREE_TYPE (*a);
2818 if (CONVERT_EXPR_P (*a))
2820 suba = TREE_OPERAND (*a, 0);
2821 wider_type = TREE_TYPE (suba);
2822 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2828 if (CONVERT_EXPR_P (*b))
2830 subb = TREE_OPERAND (*b, 0);
2831 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2842 /* Determines the expression by that USE is expressed from induction variable
2843 CAND at statement AT in LOOP. The expression is stored in a decomposed
2844 form into AFF. Returns false if USE cannot be expressed using CAND. */
2847 get_computation_aff (struct loop *loop,
2848 struct iv_use *use, struct iv_cand *cand, gimple at,
2849 struct affine_tree_combination *aff)
2851 tree ubase = use->iv->base;
2852 tree ustep = use->iv->step;
2853 tree cbase = cand->iv->base;
2854 tree cstep = cand->iv->step, cstep_common;
2855 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2856 tree common_type, var;
2858 aff_tree cbase_aff, var_aff;
2861 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2863 /* We do not have a precision to express the values of use. */
2867 var = var_at_stmt (loop, cand, at);
2868 uutype = unsigned_type_for (utype);
2870 /* If the conversion is not noop, perform it. */
2871 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2873 cstep = fold_convert (uutype, cstep);
2874 cbase = fold_convert (uutype, cbase);
2875 var = fold_convert (uutype, var);
2878 if (!constant_multiple_of (ustep, cstep, &rat))
2881 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2882 type, we achieve better folding by computing their difference in this
2883 wider type, and cast the result to UUTYPE. We do not need to worry about
2884 overflows, as all the arithmetics will in the end be performed in UUTYPE
2886 common_type = determine_common_wider_type (&ubase, &cbase);
2888 /* use = ubase - ratio * cbase + ratio * var. */
2889 tree_to_aff_combination (ubase, common_type, aff);
2890 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2891 tree_to_aff_combination (var, uutype, &var_aff);
2893 /* We need to shift the value if we are after the increment. */
2894 if (stmt_after_increment (loop, cand, at))
2898 if (common_type != uutype)
2899 cstep_common = fold_convert (common_type, cstep);
2901 cstep_common = cstep;
2903 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2904 aff_combination_add (&cbase_aff, &cstep_aff);
2907 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2908 aff_combination_add (aff, &cbase_aff);
2909 if (common_type != uutype)
2910 aff_combination_convert (aff, uutype);
2912 aff_combination_scale (&var_aff, rat);
2913 aff_combination_add (aff, &var_aff);
2918 /* Determines the expression by that USE is expressed from induction variable
2919 CAND at statement AT in LOOP. The computation is unshared. */
2922 get_computation_at (struct loop *loop,
2923 struct iv_use *use, struct iv_cand *cand, gimple at)
2926 tree type = TREE_TYPE (use->iv->base);
2928 if (!get_computation_aff (loop, use, cand, at, &aff))
2930 unshare_aff_combination (&aff);
2931 return fold_convert (type, aff_combination_to_tree (&aff));
2934 /* Determines the expression by that USE is expressed from induction variable
2935 CAND in LOOP. The computation is unshared. */
2938 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2940 return get_computation_at (loop, use, cand, use->stmt);
2943 /* Adjust the cost COST for being in loop setup rather than loop body.
2944 If we're optimizing for space, the loop setup overhead is constant;
2945 if we're optimizing for speed, amortize it over the per-iteration cost. */
2947 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
2951 else if (optimize_loop_for_speed_p (data->current_loop))
2952 return cost / AVG_LOOP_NITER (data->current_loop);
2957 /* Returns cost of addition in MODE. */
2960 add_cost (enum machine_mode mode, bool speed)
2962 static unsigned costs[NUM_MACHINE_MODES];
2970 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2971 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2972 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2977 cost = seq_cost (seq, speed);
2983 if (dump_file && (dump_flags & TDF_DETAILS))
2984 fprintf (dump_file, "Addition in %s costs %d\n",
2985 GET_MODE_NAME (mode), cost);
2989 /* Entry in a hashtable of already known costs for multiplication. */
2992 HOST_WIDE_INT cst; /* The constant to multiply by. */
2993 enum machine_mode mode; /* In mode. */
2994 unsigned cost; /* The cost. */
2997 /* Counts hash value for the ENTRY. */
3000 mbc_entry_hash (const void *entry)
3002 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3004 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3007 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3010 mbc_entry_eq (const void *entry1, const void *entry2)
3012 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3013 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3015 return (e1->mode == e2->mode
3016 && e1->cst == e2->cst);
3019 /* Returns cost of multiplication by constant CST in MODE. */
3022 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3024 static htab_t costs;
3025 struct mbc_entry **cached, act;
3030 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3034 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3036 return (*cached)->cost;
3038 *cached = XNEW (struct mbc_entry);
3039 (*cached)->mode = mode;
3040 (*cached)->cst = cst;
3043 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3044 gen_int_mode (cst, mode), NULL_RTX, 0);
3048 cost = seq_cost (seq, speed);
3050 if (dump_file && (dump_flags & TDF_DETAILS))
3051 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3052 (int) cst, GET_MODE_NAME (mode), cost);
3054 (*cached)->cost = cost;
3059 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3060 validity for a memory reference accessing memory of mode MODE in
3061 address space AS. */
3063 DEF_VEC_P (sbitmap);
3064 DEF_VEC_ALLOC_P (sbitmap, heap);
3067 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3070 #define MAX_RATIO 128
3071 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3072 static VEC (sbitmap, heap) *valid_mult_list;
3075 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3076 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3078 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3081 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3082 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3086 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3087 sbitmap_zero (valid_mult);
3088 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3089 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3091 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3092 if (memory_address_addr_space_p (mode, addr, as))
3093 SET_BIT (valid_mult, i + MAX_RATIO);
3096 if (dump_file && (dump_flags & TDF_DETAILS))
3098 fprintf (dump_file, " allowed multipliers:");
3099 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3100 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3101 fprintf (dump_file, " %d", (int) i);
3102 fprintf (dump_file, "\n");
3103 fprintf (dump_file, "\n");
3106 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3109 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3112 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3115 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3116 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3117 variable is omitted. Compute the cost for a memory reference that accesses
3118 a memory location of mode MEM_MODE in address space AS.
3120 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3121 size of MEM_MODE / RATIO) is available. To make this determination, we
3122 look at the size of the increment to be made, which is given in CSTEP.
3123 CSTEP may be zero if the step is unknown.
3124 STMT_AFTER_INC is true iff the statement we're looking at is after the
3125 increment of the original biv.
3127 TODO -- there must be some better way. This all is quite crude. */
3131 HOST_WIDE_INT min_offset, max_offset;
3132 unsigned costs[2][2][2][2];
3133 } *address_cost_data;
3135 DEF_VEC_P (address_cost_data);
3136 DEF_VEC_ALLOC_P (address_cost_data, heap);
3139 get_address_cost (bool symbol_present, bool var_present,
3140 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3141 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3142 addr_space_t as, bool speed,
3143 bool stmt_after_inc, bool *may_autoinc)
3145 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3146 static VEC(address_cost_data, heap) *address_cost_data_list;
3147 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3148 address_cost_data data;
3149 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3150 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3151 unsigned cost, acost, complexity;
3152 bool offset_p, ratio_p, autoinc;
3153 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3154 unsigned HOST_WIDE_INT mask;
3157 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3158 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3161 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3165 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3166 HOST_WIDE_INT rat, off;
3167 int old_cse_not_expected;
3168 unsigned sym_p, var_p, off_p, rat_p, add_c;
3169 rtx seq, addr, base;
3172 data = (address_cost_data) xcalloc (1, sizeof (*data));
3174 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3176 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3177 for (i = start; i <= 1 << 20; i <<= 1)
3179 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3180 if (!memory_address_addr_space_p (mem_mode, addr, as))
3183 data->max_offset = i == start ? 0 : i >> 1;
3184 off = data->max_offset;
3186 for (i = start; i <= 1 << 20; i <<= 1)
3188 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3189 if (!memory_address_addr_space_p (mem_mode, addr, as))
3192 data->min_offset = i == start ? 0 : -(i >> 1);
3194 if (dump_file && (dump_flags & TDF_DETAILS))
3196 fprintf (dump_file, "get_address_cost:\n");
3197 fprintf (dump_file, " min offset %s %d\n",
3198 GET_MODE_NAME (mem_mode),
3199 (int) data->min_offset);
3200 fprintf (dump_file, " max offset %s %d\n",
3201 GET_MODE_NAME (mem_mode),
3202 (int) data->max_offset);
3206 for (i = 2; i <= MAX_RATIO; i++)
3207 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3213 /* Compute the cost of various addressing modes. */
3215 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3216 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3218 if (HAVE_PRE_DECREMENT)
3220 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3221 has_predec[mem_mode]
3222 = memory_address_addr_space_p (mem_mode, addr, as);
3224 if (HAVE_POST_DECREMENT)
3226 addr = gen_rtx_POST_DEC (address_mode, reg0);
3227 has_postdec[mem_mode]
3228 = memory_address_addr_space_p (mem_mode, addr, as);
3230 if (HAVE_PRE_INCREMENT)
3232 addr = gen_rtx_PRE_INC (address_mode, reg0);
3233 has_preinc[mem_mode]
3234 = memory_address_addr_space_p (mem_mode, addr, as);
3236 if (HAVE_POST_INCREMENT)
3238 addr = gen_rtx_POST_INC (address_mode, reg0);
3239 has_postinc[mem_mode]
3240 = memory_address_addr_space_p (mem_mode, addr, as);
3242 for (i = 0; i < 16; i++)
3245 var_p = (i >> 1) & 1;
3246 off_p = (i >> 2) & 1;
3247 rat_p = (i >> 3) & 1;
3251 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3252 gen_int_mode (rat, address_mode));
3255 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3259 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3260 /* ??? We can run into trouble with some backends by presenting
3261 it with symbols which haven't been properly passed through
3262 targetm.encode_section_info. By setting the local bit, we
3263 enhance the probability of things working. */
3264 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3267 base = gen_rtx_fmt_e (CONST, address_mode,
3269 (PLUS, address_mode, base,
3270 gen_int_mode (off, address_mode)));
3273 base = gen_int_mode (off, address_mode);
3278 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3281 /* To avoid splitting addressing modes, pretend that no cse will
3283 old_cse_not_expected = cse_not_expected;
3284 cse_not_expected = true;
3285 addr = memory_address_addr_space (mem_mode, addr, as);
3286 cse_not_expected = old_cse_not_expected;
3290 acost = seq_cost (seq, speed);
3291 acost += address_cost (addr, mem_mode, as, speed);
3295 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3298 /* On some targets, it is quite expensive to load symbol to a register,
3299 which makes addresses that contain symbols look much more expensive.
3300 However, the symbol will have to be loaded in any case before the
3301 loop (and quite likely we have it in register already), so it does not
3302 make much sense to penalize them too heavily. So make some final
3303 tweaks for the SYMBOL_PRESENT modes:
3305 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3306 var is cheaper, use this mode with small penalty.
3307 If VAR_PRESENT is true, try whether the mode with
3308 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3309 if this is the case, use it. */
3310 add_c = add_cost (address_mode, speed);
3311 for (i = 0; i < 8; i++)
3314 off_p = (i >> 1) & 1;
3315 rat_p = (i >> 2) & 1;
3317 acost = data->costs[0][1][off_p][rat_p] + 1;
3321 if (acost < data->costs[1][var_p][off_p][rat_p])
3322 data->costs[1][var_p][off_p][rat_p] = acost;
3325 if (dump_file && (dump_flags & TDF_DETAILS))
3327 fprintf (dump_file, "Address costs:\n");
3329 for (i = 0; i < 16; i++)
3332 var_p = (i >> 1) & 1;
3333 off_p = (i >> 2) & 1;
3334 rat_p = (i >> 3) & 1;
3336 fprintf (dump_file, " ");
3338 fprintf (dump_file, "sym + ");
3340 fprintf (dump_file, "var + ");
3342 fprintf (dump_file, "cst + ");
3344 fprintf (dump_file, "rat * ");
3346 acost = data->costs[sym_p][var_p][off_p][rat_p];
3347 fprintf (dump_file, "index costs %d\n", acost);
3349 if (has_predec[mem_mode] || has_postdec[mem_mode]
3350 || has_preinc[mem_mode] || has_postinc[mem_mode])
3351 fprintf (dump_file, " May include autoinc/dec\n");
3352 fprintf (dump_file, "\n");
3355 VEC_replace (address_cost_data, address_cost_data_list,
3359 bits = GET_MODE_BITSIZE (address_mode);
3360 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3362 if ((offset >> (bits - 1) & 1))
3367 msize = GET_MODE_SIZE (mem_mode);
3368 autoinc_offset = offset;
3370 autoinc_offset += ratio * cstep;
3371 if (symbol_present || var_present || ratio != 1)
3373 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3375 || (has_postdec[mem_mode] && autoinc_offset == 0
3377 || (has_preinc[mem_mode] && autoinc_offset == msize
3379 || (has_predec[mem_mode] && autoinc_offset == -msize
3380 && msize == -cstep))
3384 offset_p = (s_offset != 0
3385 && data->min_offset <= s_offset
3386 && s_offset <= data->max_offset);
3387 ratio_p = (ratio != 1
3388 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3390 if (ratio != 1 && !ratio_p)
3391 cost += multiply_by_cost (ratio, address_mode, speed);
3393 if (s_offset && !offset_p && !symbol_present)
3394 cost += add_cost (address_mode, speed);
3397 *may_autoinc = autoinc;
3398 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3399 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3400 return new_cost (cost + acost, complexity);
3403 /* Estimates cost of forcing expression EXPR into a variable. */
3406 force_expr_to_var_cost (tree expr, bool speed)
3408 static bool costs_initialized = false;
3409 static unsigned integer_cost [2];
3410 static unsigned symbol_cost [2];
3411 static unsigned address_cost [2];
3413 comp_cost cost0, cost1, cost;
3414 enum machine_mode mode;
3416 if (!costs_initialized)
3418 tree type = build_pointer_type (integer_type_node);
3423 var = create_tmp_var_raw (integer_type_node, "test_var");
3424 TREE_STATIC (var) = 1;
3425 x = produce_memory_decl_rtl (var, NULL);
3426 SET_DECL_RTL (var, x);
3428 addr = build1 (ADDR_EXPR, type, var);
3431 for (i = 0; i < 2; i++)
3433 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3436 symbol_cost[i] = computation_cost (addr, i) + 1;
3439 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3441 build_int_cst (sizetype, 2000)), i) + 1;
3442 if (dump_file && (dump_flags & TDF_DETAILS))
3444 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3445 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3446 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3447 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3448 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3449 fprintf (dump_file, "\n");
3453 costs_initialized = true;
3458 if (SSA_VAR_P (expr))
3461 if (is_gimple_min_invariant (expr))
3463 if (TREE_CODE (expr) == INTEGER_CST)
3464 return new_cost (integer_cost [speed], 0);
3466 if (TREE_CODE (expr) == ADDR_EXPR)
3468 tree obj = TREE_OPERAND (expr, 0);
3470 if (TREE_CODE (obj) == VAR_DECL
3471 || TREE_CODE (obj) == PARM_DECL
3472 || TREE_CODE (obj) == RESULT_DECL)
3473 return new_cost (symbol_cost [speed], 0);
3476 return new_cost (address_cost [speed], 0);
3479 switch (TREE_CODE (expr))
3481 case POINTER_PLUS_EXPR:
3485 op0 = TREE_OPERAND (expr, 0);
3486 op1 = TREE_OPERAND (expr, 1);
3490 if (is_gimple_val (op0))
3493 cost0 = force_expr_to_var_cost (op0, speed);
3495 if (is_gimple_val (op1))
3498 cost1 = force_expr_to_var_cost (op1, speed);
3503 op0 = TREE_OPERAND (expr, 0);
3507 if (is_gimple_val (op0))
3510 cost0 = force_expr_to_var_cost (op0, speed);
3516 /* Just an arbitrary value, FIXME. */
3517 return new_cost (target_spill_cost[speed], 0);
3520 mode = TYPE_MODE (TREE_TYPE (expr));
3521 switch (TREE_CODE (expr))
3523 case POINTER_PLUS_EXPR:
3527 cost = new_cost (add_cost (mode, speed), 0);
3531 if (cst_and_fits_in_hwi (op0))
3532 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3533 else if (cst_and_fits_in_hwi (op1))
3534 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3536 return new_cost (target_spill_cost [speed], 0);
3543 cost = add_costs (cost, cost0);
3544 cost = add_costs (cost, cost1);
3546 /* Bound the cost by target_spill_cost. The parts of complicated
3547 computations often are either loop invariant or at least can
3548 be shared between several iv uses, so letting this grow without
3549 limits would not give reasonable results. */
3550 if (cost.cost > (int) target_spill_cost [speed])
3551 cost.cost = target_spill_cost [speed];
3556 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3557 invariants the computation depends on. */
3560 force_var_cost (struct ivopts_data *data,
3561 tree expr, bitmap *depends_on)
3565 fd_ivopts_data = data;
3566 walk_tree (&expr, find_depends, depends_on, NULL);
3569 return force_expr_to_var_cost (expr, data->speed);
3572 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3573 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3574 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3575 invariants the computation depends on. */
3578 split_address_cost (struct ivopts_data *data,
3579 tree addr, bool *symbol_present, bool *var_present,
3580 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3583 HOST_WIDE_INT bitsize;
3584 HOST_WIDE_INT bitpos;
3586 enum machine_mode mode;
3587 int unsignedp, volatilep;
3589 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3590 &unsignedp, &volatilep, false);
3593 || bitpos % BITS_PER_UNIT != 0
3594 || TREE_CODE (core) != VAR_DECL)
3596 *symbol_present = false;
3597 *var_present = true;
3598 fd_ivopts_data = data;
3599 walk_tree (&addr, find_depends, depends_on, NULL);
3600 return new_cost (target_spill_cost[data->speed], 0);
3603 *offset += bitpos / BITS_PER_UNIT;
3604 if (TREE_STATIC (core)
3605 || DECL_EXTERNAL (core))
3607 *symbol_present = true;
3608 *var_present = false;
3612 *symbol_present = false;
3613 *var_present = true;
3617 /* Estimates cost of expressing difference of addresses E1 - E2 as
3618 var + symbol + offset. The value of offset is added to OFFSET,
3619 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3620 part is missing. DEPENDS_ON is a set of the invariants the computation
3624 ptr_difference_cost (struct ivopts_data *data,
3625 tree e1, tree e2, bool *symbol_present, bool *var_present,
3626 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3628 HOST_WIDE_INT diff = 0;
3629 aff_tree aff_e1, aff_e2;
3632 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3634 if (ptr_difference_const (e1, e2, &diff))
3637 *symbol_present = false;
3638 *var_present = false;
3642 if (integer_zerop (e2))
3643 return split_address_cost (data, TREE_OPERAND (e1, 0),
3644 symbol_present, var_present, offset, depends_on);
3646 *symbol_present = false;
3647 *var_present = true;
3649 type = signed_type_for (TREE_TYPE (e1));
3650 tree_to_aff_combination (e1, type, &aff_e1);
3651 tree_to_aff_combination (e2, type, &aff_e2);
3652 aff_combination_scale (&aff_e2, double_int_minus_one);
3653 aff_combination_add (&aff_e1, &aff_e2);
3655 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3658 /* Estimates cost of expressing difference E1 - E2 as
3659 var + symbol + offset. The value of offset is added to OFFSET,
3660 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3661 part is missing. DEPENDS_ON is a set of the invariants the computation
3665 difference_cost (struct ivopts_data *data,
3666 tree e1, tree e2, bool *symbol_present, bool *var_present,
3667 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3669 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3670 unsigned HOST_WIDE_INT off1, off2;
3671 aff_tree aff_e1, aff_e2;
3674 e1 = strip_offset (e1, &off1);
3675 e2 = strip_offset (e2, &off2);
3676 *offset += off1 - off2;
3681 if (TREE_CODE (e1) == ADDR_EXPR)
3682 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3683 offset, depends_on);
3684 *symbol_present = false;
3686 if (operand_equal_p (e1, e2, 0))
3688 *var_present = false;
3692 *var_present = true;
3694 if (integer_zerop (e2))
3695 return force_var_cost (data, e1, depends_on);
3697 if (integer_zerop (e1))
3699 comp_cost cost = force_var_cost (data, e2, depends_on);
3700 cost.cost += multiply_by_cost (-1, mode, data->speed);
3704 type = signed_type_for (TREE_TYPE (e1));
3705 tree_to_aff_combination (e1, type, &aff_e1);
3706 tree_to_aff_combination (e2, type, &aff_e2);
3707 aff_combination_scale (&aff_e2, double_int_minus_one);
3708 aff_combination_add (&aff_e1, &aff_e2);
3710 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3713 /* Determines the cost of the computation by that USE is expressed
3714 from induction variable CAND. If ADDRESS_P is true, we just need
3715 to create an address from it, otherwise we want to get it into
3716 register. A set of invariants we depend on is stored in
3717 DEPENDS_ON. AT is the statement at that the value is computed.
3718 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3719 addressing is likely. */
3722 get_computation_cost_at (struct ivopts_data *data,
3723 struct iv_use *use, struct iv_cand *cand,
3724 bool address_p, bitmap *depends_on, gimple at,
3727 tree ubase = use->iv->base, ustep = use->iv->step;
3729 tree utype = TREE_TYPE (ubase), ctype;
3730 unsigned HOST_WIDE_INT cstepi, offset = 0;
3731 HOST_WIDE_INT ratio, aratio;
3732 bool var_present, symbol_present, stmt_is_after_inc;
3735 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3739 /* Only consider real candidates. */
3741 return infinite_cost;
3743 cbase = cand->iv->base;
3744 cstep = cand->iv->step;
3745 ctype = TREE_TYPE (cbase);
3747 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3749 /* We do not have a precision to express the values of use. */
3750 return infinite_cost;
3755 /* Do not try to express address of an object with computation based
3756 on address of a different object. This may cause problems in rtl
3757 level alias analysis (that does not expect this to be happening,
3758 as this is illegal in C), and would be unlikely to be useful
3760 if (use->iv->base_object
3761 && cand->iv->base_object
3762 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3763 return infinite_cost;
3766 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3768 /* TODO -- add direct handling of this case. */
3772 /* CSTEPI is removed from the offset in case statement is after the
3773 increment. If the step is not constant, we use zero instead.
3774 This is a bit imprecise (there is the extra addition), but
3775 redundancy elimination is likely to transform the code so that
3776 it uses value of the variable before increment anyway,
3777 so it is not that much unrealistic. */
3778 if (cst_and_fits_in_hwi (cstep))
3779 cstepi = int_cst_value (cstep);
3783 if (!constant_multiple_of (ustep, cstep, &rat))
3784 return infinite_cost;
3786 if (double_int_fits_in_shwi_p (rat))
3787 ratio = double_int_to_shwi (rat);
3789 return infinite_cost;
3792 ctype = TREE_TYPE (cbase);
3794 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3795 or ratio == 1, it is better to handle this like
3797 ubase - ratio * cbase + ratio * var
3799 (also holds in the case ratio == -1, TODO. */
3801 if (cst_and_fits_in_hwi (cbase))
3803 offset = - ratio * int_cst_value (cbase);
3804 cost = difference_cost (data,
3805 ubase, build_int_cst (utype, 0),
3806 &symbol_present, &var_present, &offset,
3809 else if (ratio == 1)
3811 cost = difference_cost (data,
3813 &symbol_present, &var_present, &offset,
3817 && !POINTER_TYPE_P (ctype)
3818 && multiplier_allowed_in_address_p
3819 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3820 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3823 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3824 cost = difference_cost (data,
3826 &symbol_present, &var_present, &offset,
3831 cost = force_var_cost (data, cbase, depends_on);
3832 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3833 cost = add_costs (cost,
3834 difference_cost (data,
3835 ubase, build_int_cst (utype, 0),
3836 &symbol_present, &var_present,
3837 &offset, depends_on));
3840 /* If we are after the increment, the value of the candidate is higher by
3842 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3843 if (stmt_is_after_inc)
3844 offset -= ratio * cstepi;
3846 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3847 (symbol/var1/const parts may be omitted). If we are looking for an
3848 address, find the cost of addressing this. */
3850 return add_costs (cost,
3851 get_address_cost (symbol_present, var_present,
3852 offset, ratio, cstepi,
3853 TYPE_MODE (TREE_TYPE (utype)),
3854 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3855 speed, stmt_is_after_inc,
3858 /* Otherwise estimate the costs for computing the expression. */
3859 if (!symbol_present && !var_present && !offset)
3862 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3866 /* Symbol + offset should be compile-time computable so consider that they
3867 are added once to the variable, if present. */
3868 if (var_present && (symbol_present || offset))
3869 cost.cost += adjust_setup_cost (data,
3870 add_cost (TYPE_MODE (ctype), speed));
3872 /* Having offset does not affect runtime cost in case it is added to
3873 symbol, but it increases complexity. */
3877 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3879 aratio = ratio > 0 ? ratio : -ratio;
3881 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3886 *can_autoinc = false;
3889 /* Just get the expression, expand it and measure the cost. */
3890 tree comp = get_computation_at (data->current_loop, use, cand, at);
3893 return infinite_cost;
3896 comp = build_simple_mem_ref (comp);
3898 return new_cost (computation_cost (comp, speed), 0);
3902 /* Determines the cost of the computation by that USE is expressed
3903 from induction variable CAND. If ADDRESS_P is true, we just need
3904 to create an address from it, otherwise we want to get it into
3905 register. A set of invariants we depend on is stored in
3906 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3907 autoinc addressing is likely. */
3910 get_computation_cost (struct ivopts_data *data,
3911 struct iv_use *use, struct iv_cand *cand,
3912 bool address_p, bitmap *depends_on, bool *can_autoinc)
3914 return get_computation_cost_at (data,
3915 use, cand, address_p, depends_on, use->stmt,
3919 /* Determines cost of basing replacement of USE on CAND in a generic
3923 determine_use_iv_cost_generic (struct ivopts_data *data,
3924 struct iv_use *use, struct iv_cand *cand)
3929 /* The simple case first -- if we need to express value of the preserved
3930 original biv, the cost is 0. This also prevents us from counting the
3931 cost of increment twice -- once at this use and once in the cost of
3933 if (cand->pos == IP_ORIGINAL
3934 && cand->incremented_at == use->stmt)
3936 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3940 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3941 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3943 return !infinite_cost_p (cost);
3946 /* Determines cost of basing replacement of USE on CAND in an address. */
3949 determine_use_iv_cost_address (struct ivopts_data *data,
3950 struct iv_use *use, struct iv_cand *cand)
3954 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3957 if (cand->ainc_use == use)
3960 cost.cost -= cand->cost_step;
3961 /* If we generated the candidate solely for exploiting autoincrement
3962 opportunities, and it turns out it can't be used, set the cost to
3963 infinity to make sure we ignore it. */
3964 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3965 cost = infinite_cost;
3967 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3969 return !infinite_cost_p (cost);
3972 /* Computes value of candidate CAND at position AT in iteration NITER, and
3973 stores it to VAL. */
3976 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3979 aff_tree step, delta, nit;
3980 struct iv *iv = cand->iv;
3981 tree type = TREE_TYPE (iv->base);
3982 tree steptype = type;
3983 if (POINTER_TYPE_P (type))
3984 steptype = sizetype;
3986 tree_to_aff_combination (iv->step, steptype, &step);
3987 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3988 aff_combination_convert (&nit, steptype);
3989 aff_combination_mult (&nit, &step, &delta);
3990 if (stmt_after_increment (loop, cand, at))
3991 aff_combination_add (&delta, &step);
3993 tree_to_aff_combination (iv->base, type, val);
3994 aff_combination_add (val, &delta);
3997 /* Returns period of induction variable iv. */
4000 iv_period (struct iv *iv)
4002 tree step = iv->step, period, type;
4005 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4007 /* Period of the iv is gcd (step, type range). Since type range is power
4008 of two, it suffices to determine the maximum power of two that divides
4010 pow2div = num_ending_zeros (step);
4011 type = unsigned_type_for (TREE_TYPE (step));
4013 period = build_low_bits_mask (type,
4014 (TYPE_PRECISION (type)
4015 - tree_low_cst (pow2div, 1)));
4020 /* Returns the comparison operator used when eliminating the iv USE. */
4022 static enum tree_code
4023 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4025 struct loop *loop = data->current_loop;
4029 ex_bb = gimple_bb (use->stmt);
4030 exit = EDGE_SUCC (ex_bb, 0);
4031 if (flow_bb_inside_loop_p (loop, exit->dest))
4032 exit = EDGE_SUCC (ex_bb, 1);
4034 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4037 /* Check whether it is possible to express the condition in USE by comparison
4038 of candidate CAND. If so, store the value compared with to BOUND. */
4041 may_eliminate_iv (struct ivopts_data *data,
4042 struct iv_use *use, struct iv_cand *cand, tree *bound)
4047 struct loop *loop = data->current_loop;
4050 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4053 /* For now works only for exits that dominate the loop latch.
4054 TODO: extend to other conditions inside loop body. */
4055 ex_bb = gimple_bb (use->stmt);
4056 if (use->stmt != last_stmt (ex_bb)
4057 || gimple_code (use->stmt) != GIMPLE_COND
4058 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4061 exit = EDGE_SUCC (ex_bb, 0);
4062 if (flow_bb_inside_loop_p (loop, exit->dest))
4063 exit = EDGE_SUCC (ex_bb, 1);
4064 if (flow_bb_inside_loop_p (loop, exit->dest))
4067 nit = niter_for_exit (data, exit);
4071 /* Determine whether we can use the variable to test the exit condition.
4072 This is the case iff the period of the induction variable is greater
4073 than the number of iterations for which the exit condition is true. */
4074 period = iv_period (cand->iv);
4076 /* If the number of iterations is constant, compare against it directly. */
4077 if (TREE_CODE (nit) == INTEGER_CST)
4079 if (!tree_int_cst_lt (nit, period))
4083 /* If not, and if this is the only possible exit of the loop, see whether
4084 we can get a conservative estimate on the number of iterations of the
4085 entire loop and compare against that instead. */
4086 else if (loop_only_exit_p (loop, exit))
4088 double_int period_value, max_niter;
4089 if (!estimated_loop_iterations (loop, true, &max_niter))
4091 period_value = tree_to_double_int (period);
4092 if (double_int_ucmp (max_niter, period_value) >= 0)
4096 /* Otherwise, punt. */
4100 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4102 *bound = aff_combination_to_tree (&bnd);
4103 /* It is unlikely that computing the number of iterations using division
4104 would be more profitable than keeping the original induction variable. */
4105 if (expression_expensive_p (*bound))
4110 /* Determines cost of basing replacement of USE on CAND in a condition. */
4113 determine_use_iv_cost_condition (struct ivopts_data *data,
4114 struct iv_use *use, struct iv_cand *cand)
4116 tree bound = NULL_TREE;
4118 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4119 comp_cost elim_cost, express_cost, cost;
4121 tree *control_var, *bound_cst;
4123 /* Only consider real candidates. */
4126 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4130 /* Try iv elimination. */
4131 if (may_eliminate_iv (data, use, cand, &bound))
4133 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4134 /* The bound is a loop invariant, so it will be only computed
4136 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4139 elim_cost = infinite_cost;
4141 /* Try expressing the original giv. If it is compared with an invariant,
4142 note that we cannot get rid of it. */
4143 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4147 /* When the condition is a comparison of the candidate IV against
4148 zero, prefer this IV.
4150 TODO: The constant that we're substracting from the cost should
4151 be target-dependent. This information should be added to the
4152 target costs for each backend. */
4153 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4154 && integer_zerop (*bound_cst)
4155 && (operand_equal_p (*control_var, cand->var_after, 0)
4156 || operand_equal_p (*control_var, cand->var_before, 0)))
4157 elim_cost.cost -= 1;
4159 express_cost = get_computation_cost (data, use, cand, false,
4160 &depends_on_express, NULL);
4161 fd_ivopts_data = data;
4162 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4164 /* Choose the better approach, preferring the eliminated IV. */
4165 if (compare_costs (elim_cost, express_cost) <= 0)
4168 depends_on = depends_on_elim;
4169 depends_on_elim = NULL;
4173 cost = express_cost;
4174 depends_on = depends_on_express;
4175 depends_on_express = NULL;
4179 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4181 if (depends_on_elim)
4182 BITMAP_FREE (depends_on_elim);
4183 if (depends_on_express)
4184 BITMAP_FREE (depends_on_express);
4186 return !infinite_cost_p (cost);
4189 /* Determines cost of basing replacement of USE on CAND. Returns false
4190 if USE cannot be based on CAND. */
4193 determine_use_iv_cost (struct ivopts_data *data,
4194 struct iv_use *use, struct iv_cand *cand)
4198 case USE_NONLINEAR_EXPR:
4199 return determine_use_iv_cost_generic (data, use, cand);
4202 return determine_use_iv_cost_address (data, use, cand);
4205 return determine_use_iv_cost_condition (data, use, cand);
4212 /* Return true if get_computation_cost indicates that autoincrement is
4213 a possibility for the pair of USE and CAND, false otherwise. */
4216 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4217 struct iv_cand *cand)
4223 if (use->type != USE_ADDRESS)
4226 cost = get_computation_cost (data, use, cand, true, &depends_on,
4229 BITMAP_FREE (depends_on);
4231 return !infinite_cost_p (cost) && can_autoinc;
4234 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4235 use that allows autoincrement, and set their AINC_USE if possible. */
4238 set_autoinc_for_original_candidates (struct ivopts_data *data)
4242 for (i = 0; i < n_iv_cands (data); i++)
4244 struct iv_cand *cand = iv_cand (data, i);
4245 struct iv_use *closest = NULL;
4246 if (cand->pos != IP_ORIGINAL)
4248 for (j = 0; j < n_iv_uses (data); j++)
4250 struct iv_use *use = iv_use (data, j);
4251 unsigned uid = gimple_uid (use->stmt);
4252 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4253 || uid > gimple_uid (cand->incremented_at))
4255 if (closest == NULL || uid > gimple_uid (closest->stmt))
4258 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4260 cand->ainc_use = closest;
4264 /* Finds the candidates for the induction variables. */
4267 find_iv_candidates (struct ivopts_data *data)
4269 /* Add commonly used ivs. */
4270 add_standard_iv_candidates (data);
4272 /* Add old induction variables. */
4273 add_old_ivs_candidates (data);
4275 /* Add induction variables derived from uses. */
4276 add_derived_ivs_candidates (data);
4278 set_autoinc_for_original_candidates (data);
4280 /* Record the important candidates. */
4281 record_important_candidates (data);
4284 /* Determines costs of basing the use of the iv on an iv candidate. */
4287 determine_use_iv_costs (struct ivopts_data *data)
4291 struct iv_cand *cand;
4292 bitmap to_clear = BITMAP_ALLOC (NULL);
4294 alloc_use_cost_map (data);
4296 for (i = 0; i < n_iv_uses (data); i++)
4298 use = iv_use (data, i);
4300 if (data->consider_all_candidates)
4302 for (j = 0; j < n_iv_cands (data); j++)
4304 cand = iv_cand (data, j);
4305 determine_use_iv_cost (data, use, cand);
4312 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4314 cand = iv_cand (data, j);
4315 if (!determine_use_iv_cost (data, use, cand))
4316 bitmap_set_bit (to_clear, j);
4319 /* Remove the candidates for that the cost is infinite from
4320 the list of related candidates. */
4321 bitmap_and_compl_into (use->related_cands, to_clear);
4322 bitmap_clear (to_clear);
4326 BITMAP_FREE (to_clear);
4328 if (dump_file && (dump_flags & TDF_DETAILS))
4330 fprintf (dump_file, "Use-candidate costs:\n");
4332 for (i = 0; i < n_iv_uses (data); i++)
4334 use = iv_use (data, i);
4336 fprintf (dump_file, "Use %d:\n", i);
4337 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4338 for (j = 0; j < use->n_map_members; j++)
4340 if (!use->cost_map[j].cand
4341 || infinite_cost_p (use->cost_map[j].cost))
4344 fprintf (dump_file, " %d\t%d\t%d\t",
4345 use->cost_map[j].cand->id,
4346 use->cost_map[j].cost.cost,
4347 use->cost_map[j].cost.complexity);
4348 if (use->cost_map[j].depends_on)
4349 bitmap_print (dump_file,
4350 use->cost_map[j].depends_on, "","");
4351 fprintf (dump_file, "\n");
4354 fprintf (dump_file, "\n");
4356 fprintf (dump_file, "\n");
4360 /* Determines cost of the candidate CAND. */
4363 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4365 comp_cost cost_base;
4366 unsigned cost, cost_step;
4375 /* There are two costs associated with the candidate -- its increment
4376 and its initialization. The second is almost negligible for any loop
4377 that rolls enough, so we take it just very little into account. */
4379 base = cand->iv->base;
4380 cost_base = force_var_cost (data, base, NULL);
4381 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4383 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4385 /* Prefer the original ivs unless we may gain something by replacing it.
4386 The reason is to make debugging simpler; so this is not relevant for
4387 artificial ivs created by other optimization passes. */
4388 if (cand->pos != IP_ORIGINAL
4389 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4392 /* Prefer not to insert statements into latch unless there are some
4393 already (so that we do not create unnecessary jumps). */
4394 if (cand->pos == IP_END
4395 && empty_block_p (ip_end_pos (data->current_loop)))
4399 cand->cost_step = cost_step;
4402 /* Determines costs of computation of the candidates. */
4405 determine_iv_costs (struct ivopts_data *data)
4409 if (dump_file && (dump_flags & TDF_DETAILS))
4411 fprintf (dump_file, "Candidate costs:\n");
4412 fprintf (dump_file, " cand\tcost\n");
4415 for (i = 0; i < n_iv_cands (data); i++)
4417 struct iv_cand *cand = iv_cand (data, i);
4419 determine_iv_cost (data, cand);
4421 if (dump_file && (dump_flags & TDF_DETAILS))
4422 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4425 if (dump_file && (dump_flags & TDF_DETAILS))
4426 fprintf (dump_file, "\n");
4429 /* Calculates cost for having SIZE induction variables. */
4432 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4434 /* We add size to the cost, so that we prefer eliminating ivs
4436 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4439 /* For each size of the induction variable set determine the penalty. */
4442 determine_set_costs (struct ivopts_data *data)
4446 gimple_stmt_iterator psi;
4448 struct loop *loop = data->current_loop;
4451 /* We use the following model (definitely improvable, especially the
4452 cost function -- TODO):
4454 We estimate the number of registers available (using MD data), name it A.
4456 We estimate the number of registers used by the loop, name it U. This
4457 number is obtained as the number of loop phi nodes (not counting virtual
4458 registers and bivs) + the number of variables from outside of the loop.
4460 We set a reserve R (free regs that are used for temporary computations,
4461 etc.). For now the reserve is a constant 3.
4463 Let I be the number of induction variables.
4465 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4466 make a lot of ivs without a reason).
4467 -- if A - R < U + I <= A, the cost is I * PRES_COST
4468 -- if U + I > A, the cost is I * PRES_COST and
4469 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4471 if (dump_file && (dump_flags & TDF_DETAILS))
4473 fprintf (dump_file, "Global costs:\n");
4474 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4475 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4476 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4480 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4482 phi = gsi_stmt (psi);
4483 op = PHI_RESULT (phi);
4485 if (!is_gimple_reg (op))
4488 if (get_iv (data, op))
4494 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4496 struct version_info *info = ver_info (data, j);
4498 if (info->inv_id && info->has_nonlin_use)
4502 data->regs_used = n;
4503 if (dump_file && (dump_flags & TDF_DETAILS))
4504 fprintf (dump_file, " regs_used %d\n", n);
4506 if (dump_file && (dump_flags & TDF_DETAILS))
4508 fprintf (dump_file, " cost for size:\n");
4509 fprintf (dump_file, " ivs\tcost\n");
4510 for (j = 0; j <= 2 * target_avail_regs; j++)
4511 fprintf (dump_file, " %d\t%d\n", j,
4512 ivopts_global_cost_for_size (data, j));
4513 fprintf (dump_file, "\n");
4517 /* Returns true if A is a cheaper cost pair than B. */
4520 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4530 cmp = compare_costs (a->cost, b->cost);
4537 /* In case the costs are the same, prefer the cheaper candidate. */
4538 if (a->cand->cost < b->cand->cost)
4544 /* Computes the cost field of IVS structure. */
4547 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4549 comp_cost cost = ivs->cand_use_cost;
4550 cost.cost += ivs->cand_cost;
4551 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4556 /* Remove invariants in set INVS to set IVS. */
4559 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4567 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4569 ivs->n_invariant_uses[iid]--;
4570 if (ivs->n_invariant_uses[iid] == 0)
4575 /* Set USE not to be expressed by any candidate in IVS. */
4578 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4581 unsigned uid = use->id, cid;
4582 struct cost_pair *cp;
4584 cp = ivs->cand_for_use[uid];
4590 ivs->cand_for_use[uid] = NULL;
4591 ivs->n_cand_uses[cid]--;
4593 if (ivs->n_cand_uses[cid] == 0)
4595 bitmap_clear_bit (ivs->cands, cid);
4596 /* Do not count the pseudocandidates. */
4600 ivs->cand_cost -= cp->cand->cost;
4602 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4605 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4607 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4608 iv_ca_recount_cost (data, ivs);
4611 /* Add invariants in set INVS to set IVS. */
4614 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4622 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4624 ivs->n_invariant_uses[iid]++;
4625 if (ivs->n_invariant_uses[iid] == 1)
4630 /* Set cost pair for USE in set IVS to CP. */
4633 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4634 struct iv_use *use, struct cost_pair *cp)
4636 unsigned uid = use->id, cid;
4638 if (ivs->cand_for_use[uid] == cp)
4641 if (ivs->cand_for_use[uid])
4642 iv_ca_set_no_cp (data, ivs, use);
4649 ivs->cand_for_use[uid] = cp;
4650 ivs->n_cand_uses[cid]++;
4651 if (ivs->n_cand_uses[cid] == 1)
4653 bitmap_set_bit (ivs->cands, cid);
4654 /* Do not count the pseudocandidates. */
4658 ivs->cand_cost += cp->cand->cost;
4660 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4663 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4664 iv_ca_set_add_invariants (ivs, cp->depends_on);
4665 iv_ca_recount_cost (data, ivs);
4669 /* Extend set IVS by expressing USE by some of the candidates in it
4673 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4676 struct cost_pair *best_cp = NULL, *cp;
4680 gcc_assert (ivs->upto >= use->id);
4682 if (ivs->upto == use->id)
4688 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4690 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4692 if (cheaper_cost_pair (cp, best_cp))
4696 iv_ca_set_cp (data, ivs, use, best_cp);
4699 /* Get cost for assignment IVS. */
4702 iv_ca_cost (struct iv_ca *ivs)
4704 /* This was a conditional expression but it triggered a bug in
4707 return infinite_cost;
4712 /* Returns true if all dependences of CP are among invariants in IVS. */
4715 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4720 if (!cp->depends_on)
4723 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4725 if (ivs->n_invariant_uses[i] == 0)
4732 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4733 it before NEXT_CHANGE. */
4735 static struct iv_ca_delta *
4736 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4737 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4739 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4742 change->old_cp = old_cp;
4743 change->new_cp = new_cp;
4744 change->next_change = next_change;
4749 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4752 static struct iv_ca_delta *
4753 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4755 struct iv_ca_delta *last;
4763 for (last = l1; last->next_change; last = last->next_change)
4765 last->next_change = l2;
4770 /* Returns candidate by that USE is expressed in IVS. */
4772 static struct cost_pair *
4773 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4775 return ivs->cand_for_use[use->id];
4778 /* Reverse the list of changes DELTA, forming the inverse to it. */
4780 static struct iv_ca_delta *
4781 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4783 struct iv_ca_delta *act, *next, *prev = NULL;
4784 struct cost_pair *tmp;
4786 for (act = delta; act; act = next)
4788 next = act->next_change;
4789 act->next_change = prev;
4793 act->old_cp = act->new_cp;
4800 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4801 reverted instead. */
4804 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4805 struct iv_ca_delta *delta, bool forward)
4807 struct cost_pair *from, *to;
4808 struct iv_ca_delta *act;
4811 delta = iv_ca_delta_reverse (delta);
4813 for (act = delta; act; act = act->next_change)
4817 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4818 iv_ca_set_cp (data, ivs, act->use, to);
4822 iv_ca_delta_reverse (delta);
4825 /* Returns true if CAND is used in IVS. */
4828 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4830 return ivs->n_cand_uses[cand->id] > 0;
4833 /* Returns number of induction variable candidates in the set IVS. */
4836 iv_ca_n_cands (struct iv_ca *ivs)
4838 return ivs->n_cands;
4841 /* Free the list of changes DELTA. */
4844 iv_ca_delta_free (struct iv_ca_delta **delta)
4846 struct iv_ca_delta *act, *next;
4848 for (act = *delta; act; act = next)
4850 next = act->next_change;
4857 /* Allocates new iv candidates assignment. */
4859 static struct iv_ca *
4860 iv_ca_new (struct ivopts_data *data)
4862 struct iv_ca *nw = XNEW (struct iv_ca);
4866 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4867 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4868 nw->cands = BITMAP_ALLOC (NULL);
4871 nw->cand_use_cost = zero_cost;
4873 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4874 nw->cost = zero_cost;
4879 /* Free memory occupied by the set IVS. */
4882 iv_ca_free (struct iv_ca **ivs)
4884 free ((*ivs)->cand_for_use);
4885 free ((*ivs)->n_cand_uses);
4886 BITMAP_FREE ((*ivs)->cands);
4887 free ((*ivs)->n_invariant_uses);
4892 /* Dumps IVS to FILE. */
4895 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4897 const char *pref = " invariants ";
4899 comp_cost cost = iv_ca_cost (ivs);
4901 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4902 bitmap_print (file, ivs->cands, " candidates ","\n");
4904 for (i = 1; i <= data->max_inv_id; i++)
4905 if (ivs->n_invariant_uses[i])
4907 fprintf (file, "%s%d", pref, i);
4910 fprintf (file, "\n");
4913 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4914 new set, and store differences in DELTA. Number of induction variables
4915 in the new set is stored to N_IVS. */
4918 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4919 struct iv_cand *cand, struct iv_ca_delta **delta,
4925 struct cost_pair *old_cp, *new_cp;
4928 for (i = 0; i < ivs->upto; i++)
4930 use = iv_use (data, i);
4931 old_cp = iv_ca_cand_for_use (ivs, use);
4934 && old_cp->cand == cand)
4937 new_cp = get_use_iv_cost (data, use, cand);
4941 if (!iv_ca_has_deps (ivs, new_cp))
4944 if (!cheaper_cost_pair (new_cp, old_cp))
4947 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4950 iv_ca_delta_commit (data, ivs, *delta, true);
4951 cost = iv_ca_cost (ivs);
4953 *n_ivs = iv_ca_n_cands (ivs);
4954 iv_ca_delta_commit (data, ivs, *delta, false);
4959 /* Try narrowing set IVS by removing CAND. Return the cost of
4960 the new set and store the differences in DELTA. */
4963 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4964 struct iv_cand *cand, struct iv_ca_delta **delta)
4968 struct cost_pair *old_cp, *new_cp, *cp;
4970 struct iv_cand *cnd;
4974 for (i = 0; i < n_iv_uses (data); i++)
4976 use = iv_use (data, i);
4978 old_cp = iv_ca_cand_for_use (ivs, use);
4979 if (old_cp->cand != cand)
4984 if (data->consider_all_candidates)
4986 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4991 cnd = iv_cand (data, ci);
4993 cp = get_use_iv_cost (data, use, cnd);
4996 if (!iv_ca_has_deps (ivs, cp))
4999 if (!cheaper_cost_pair (cp, new_cp))
5007 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5012 cnd = iv_cand (data, ci);
5014 cp = get_use_iv_cost (data, use, cnd);
5017 if (!iv_ca_has_deps (ivs, cp))
5020 if (!cheaper_cost_pair (cp, new_cp))
5029 iv_ca_delta_free (delta);
5030 return infinite_cost;
5033 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5036 iv_ca_delta_commit (data, ivs, *delta, true);
5037 cost = iv_ca_cost (ivs);
5038 iv_ca_delta_commit (data, ivs, *delta, false);
5043 /* Try optimizing the set of candidates IVS by removing candidates different
5044 from to EXCEPT_CAND from it. Return cost of the new set, and store
5045 differences in DELTA. */
5048 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5049 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5052 struct iv_ca_delta *act_delta, *best_delta;
5054 comp_cost best_cost, acost;
5055 struct iv_cand *cand;
5058 best_cost = iv_ca_cost (ivs);
5060 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5062 cand = iv_cand (data, i);
5064 if (cand == except_cand)
5067 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5069 if (compare_costs (acost, best_cost) < 0)
5072 iv_ca_delta_free (&best_delta);
5073 best_delta = act_delta;
5076 iv_ca_delta_free (&act_delta);
5085 /* Recurse to possibly remove other unnecessary ivs. */
5086 iv_ca_delta_commit (data, ivs, best_delta, true);
5087 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5088 iv_ca_delta_commit (data, ivs, best_delta, false);
5089 *delta = iv_ca_delta_join (best_delta, *delta);
5093 /* Tries to extend the sets IVS in the best possible way in order
5094 to express the USE. */
5097 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5100 comp_cost best_cost, act_cost;
5103 struct iv_cand *cand;
5104 struct iv_ca_delta *best_delta = NULL, *act_delta;
5105 struct cost_pair *cp;
5107 iv_ca_add_use (data, ivs, use);
5108 best_cost = iv_ca_cost (ivs);
5110 cp = iv_ca_cand_for_use (ivs, use);
5113 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5114 iv_ca_set_no_cp (data, ivs, use);
5117 /* First try important candidates not based on any memory object. Only if
5118 this fails, try the specific ones. Rationale -- in loops with many
5119 variables the best choice often is to use just one generic biv. If we
5120 added here many ivs specific to the uses, the optimization algorithm later
5121 would be likely to get stuck in a local minimum, thus causing us to create
5122 too many ivs. The approach from few ivs to more seems more likely to be
5123 successful -- starting from few ivs, replacing an expensive use by a
5124 specific iv should always be a win. */
5125 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5127 cand = iv_cand (data, i);
5129 if (cand->iv->base_object != NULL_TREE)
5132 if (iv_ca_cand_used_p (ivs, cand))
5135 cp = get_use_iv_cost (data, use, cand);
5139 iv_ca_set_cp (data, ivs, use, cp);
5140 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5141 iv_ca_set_no_cp (data, ivs, use);
5142 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5144 if (compare_costs (act_cost, best_cost) < 0)
5146 best_cost = act_cost;
5148 iv_ca_delta_free (&best_delta);
5149 best_delta = act_delta;
5152 iv_ca_delta_free (&act_delta);
5155 if (infinite_cost_p (best_cost))
5157 for (i = 0; i < use->n_map_members; i++)
5159 cp = use->cost_map + i;
5164 /* Already tried this. */
5165 if (cand->important && cand->iv->base_object == NULL_TREE)
5168 if (iv_ca_cand_used_p (ivs, cand))
5172 iv_ca_set_cp (data, ivs, use, cp);
5173 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5174 iv_ca_set_no_cp (data, ivs, use);
5175 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5178 if (compare_costs (act_cost, best_cost) < 0)
5180 best_cost = act_cost;
5183 iv_ca_delta_free (&best_delta);
5184 best_delta = act_delta;
5187 iv_ca_delta_free (&act_delta);
5191 iv_ca_delta_commit (data, ivs, best_delta, true);
5192 iv_ca_delta_free (&best_delta);
5194 return !infinite_cost_p (best_cost);
5197 /* Finds an initial assignment of candidates to uses. */
5199 static struct iv_ca *
5200 get_initial_solution (struct ivopts_data *data)
5202 struct iv_ca *ivs = iv_ca_new (data);
5205 for (i = 0; i < n_iv_uses (data); i++)
5206 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5215 /* Tries to improve set of induction variables IVS. */
5218 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5221 comp_cost acost, best_cost = iv_ca_cost (ivs);
5222 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5223 struct iv_cand *cand;
5225 /* Try extending the set of induction variables by one. */
5226 for (i = 0; i < n_iv_cands (data); i++)
5228 cand = iv_cand (data, i);
5230 if (iv_ca_cand_used_p (ivs, cand))
5233 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5237 /* If we successfully added the candidate and the set is small enough,
5238 try optimizing it by removing other candidates. */
5239 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5241 iv_ca_delta_commit (data, ivs, act_delta, true);
5242 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5243 iv_ca_delta_commit (data, ivs, act_delta, false);
5244 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5247 if (compare_costs (acost, best_cost) < 0)
5250 iv_ca_delta_free (&best_delta);
5251 best_delta = act_delta;
5254 iv_ca_delta_free (&act_delta);
5259 /* Try removing the candidates from the set instead. */
5260 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5262 /* Nothing more we can do. */
5267 iv_ca_delta_commit (data, ivs, best_delta, true);
5268 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5269 iv_ca_delta_free (&best_delta);
5273 /* Attempts to find the optimal set of induction variables. We do simple
5274 greedy heuristic -- we try to replace at most one candidate in the selected
5275 solution and remove the unused ivs while this improves the cost. */
5277 static struct iv_ca *
5278 find_optimal_iv_set (struct ivopts_data *data)
5284 /* Get the initial solution. */
5285 set = get_initial_solution (data);
5288 if (dump_file && (dump_flags & TDF_DETAILS))
5289 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5293 if (dump_file && (dump_flags & TDF_DETAILS))
5295 fprintf (dump_file, "Initial set of candidates:\n");
5296 iv_ca_dump (data, dump_file, set);
5299 while (try_improve_iv_set (data, set))
5301 if (dump_file && (dump_flags & TDF_DETAILS))
5303 fprintf (dump_file, "Improved to:\n");
5304 iv_ca_dump (data, dump_file, set);
5308 if (dump_file && (dump_flags & TDF_DETAILS))
5310 comp_cost cost = iv_ca_cost (set);
5311 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5314 for (i = 0; i < n_iv_uses (data); i++)
5316 use = iv_use (data, i);
5317 use->selected = iv_ca_cand_for_use (set, use)->cand;
5323 /* Creates a new induction variable corresponding to CAND. */
5326 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5328 gimple_stmt_iterator incr_pos;
5338 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5342 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5350 incr_pos = gsi_for_stmt (cand->incremented_at);
5354 /* Mark that the iv is preserved. */
5355 name_info (data, cand->var_before)->preserve_biv = true;
5356 name_info (data, cand->var_after)->preserve_biv = true;
5358 /* Rewrite the increment so that it uses var_before directly. */
5359 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5364 gimple_add_tmp_var (cand->var_before);
5365 add_referenced_var (cand->var_before);
5367 base = unshare_expr (cand->iv->base);
5369 create_iv (base, unshare_expr (cand->iv->step),
5370 cand->var_before, data->current_loop,
5371 &incr_pos, after, &cand->var_before, &cand->var_after);
5374 /* Creates new induction variables described in SET. */
5377 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5380 struct iv_cand *cand;
5383 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5385 cand = iv_cand (data, i);
5386 create_new_iv (data, cand);
5391 /* Rewrites USE (definition of iv used in a nonlinear expression)
5392 using candidate CAND. */
5395 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5396 struct iv_use *use, struct iv_cand *cand)
5401 gimple_stmt_iterator bsi;
5403 /* An important special case -- if we are asked to express value of
5404 the original iv by itself, just exit; there is no need to
5405 introduce a new computation (that might also need casting the
5406 variable to unsigned and back). */
5407 if (cand->pos == IP_ORIGINAL
5408 && cand->incremented_at == use->stmt)
5410 tree step, ctype, utype;
5411 enum tree_code incr_code = PLUS_EXPR, old_code;
5413 gcc_assert (is_gimple_assign (use->stmt));
5414 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5416 step = cand->iv->step;
5417 ctype = TREE_TYPE (step);
5418 utype = TREE_TYPE (cand->var_after);
5419 if (TREE_CODE (step) == NEGATE_EXPR)
5421 incr_code = MINUS_EXPR;
5422 step = TREE_OPERAND (step, 0);
5425 /* Check whether we may leave the computation unchanged.
5426 This is the case only if it does not rely on other
5427 computations in the loop -- otherwise, the computation
5428 we rely upon may be removed in remove_unused_ivs,
5429 thus leading to ICE. */
5430 old_code = gimple_assign_rhs_code (use->stmt);
5431 if (old_code == PLUS_EXPR
5432 || old_code == MINUS_EXPR
5433 || old_code == POINTER_PLUS_EXPR)
5435 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5436 op = gimple_assign_rhs2 (use->stmt);
5437 else if (old_code != MINUS_EXPR
5438 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5439 op = gimple_assign_rhs1 (use->stmt);
5447 && (TREE_CODE (op) == INTEGER_CST
5448 || operand_equal_p (op, step, 0)))
5451 /* Otherwise, add the necessary computations to express
5453 op = fold_convert (ctype, cand->var_before);
5454 comp = fold_convert (utype,
5455 build2 (incr_code, ctype, op,
5456 unshare_expr (step)));
5460 comp = get_computation (data->current_loop, use, cand);
5461 gcc_assert (comp != NULL_TREE);
5464 switch (gimple_code (use->stmt))
5467 tgt = PHI_RESULT (use->stmt);
5469 /* If we should keep the biv, do not replace it. */
5470 if (name_info (data, tgt)->preserve_biv)
5473 bsi = gsi_after_labels (gimple_bb (use->stmt));
5477 tgt = gimple_assign_lhs (use->stmt);
5478 bsi = gsi_for_stmt (use->stmt);
5485 if (!valid_gimple_rhs_p (comp)
5486 || (gimple_code (use->stmt) != GIMPLE_PHI
5487 /* We can't allow re-allocating the stmt as it might be pointed
5489 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5490 >= gimple_num_ops (gsi_stmt (bsi)))))
5491 comp = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5492 true, GSI_SAME_STMT);
5494 if (gimple_code (use->stmt) == GIMPLE_PHI)
5496 ass = gimple_build_assign (tgt, comp);
5497 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5499 bsi = gsi_for_stmt (use->stmt);
5500 remove_phi_node (&bsi, false);
5504 gimple_assign_set_rhs_from_tree (&bsi, comp);
5505 use->stmt = gsi_stmt (bsi);
5509 /* Replaces ssa name in index IDX by its basic variable. Callback for
5513 idx_remove_ssa_names (tree base, tree *idx,
5514 void *data ATTRIBUTE_UNUSED)
5518 if (TREE_CODE (*idx) == SSA_NAME)
5519 *idx = SSA_NAME_VAR (*idx);
5521 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5523 op = &TREE_OPERAND (base, 2);
5525 && TREE_CODE (*op) == SSA_NAME)
5526 *op = SSA_NAME_VAR (*op);
5527 op = &TREE_OPERAND (base, 3);
5529 && TREE_CODE (*op) == SSA_NAME)
5530 *op = SSA_NAME_VAR (*op);
5536 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5539 unshare_and_remove_ssa_names (tree ref)
5541 ref = unshare_expr (ref);
5542 for_each_index (&ref, idx_remove_ssa_names, NULL);
5547 /* Copies the reference information from OLD_REF to NEW_REF. */
5550 copy_ref_info (tree new_ref, tree old_ref)
5552 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5553 copy_mem_ref_info (new_ref, old_ref);
5556 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5557 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5558 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5559 /* We can transfer points-to information from an old pointer
5560 or decl base to the new one. */
5561 if (TMR_BASE (new_ref)
5562 && TREE_CODE (TMR_BASE (new_ref)) == SSA_NAME
5563 && POINTER_TYPE_P (TREE_TYPE (TMR_BASE (new_ref)))
5564 && !SSA_NAME_PTR_INFO (TMR_BASE (new_ref)))
5566 tree base = get_base_address (old_ref);
5567 if ((INDIRECT_REF_P (base)
5568 || TREE_CODE (base) == MEM_REF)
5569 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5570 duplicate_ssa_name_ptr_info
5571 (TMR_BASE (new_ref), SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5572 else if (TREE_CODE (base) == VAR_DECL
5573 || TREE_CODE (base) == PARM_DECL
5574 || TREE_CODE (base) == RESULT_DECL)
5576 struct ptr_info_def *pi = get_ptr_info (TMR_BASE (new_ref));
5577 pt_solution_set_var (&pi->pt, base);
5583 /* Rewrites USE (address that is an iv) using candidate CAND. */
5586 rewrite_use_address (struct ivopts_data *data,
5587 struct iv_use *use, struct iv_cand *cand)
5590 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5591 tree base_hint = NULL_TREE;
5595 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5597 unshare_aff_combination (&aff);
5599 /* To avoid undefined overflow problems, all IV candidates use unsigned
5600 integer types. The drawback is that this makes it impossible for
5601 create_mem_ref to distinguish an IV that is based on a memory object
5602 from one that represents simply an offset.
5604 To work around this problem, we pass a hint to create_mem_ref that
5605 indicates which variable (if any) in aff is an IV based on a memory
5606 object. Note that we only consider the candidate. If this is not
5607 based on an object, the base of the reference is in some subexpression
5608 of the use -- but these will use pointer types, so they are recognized
5609 by the create_mem_ref heuristics anyway. */
5610 if (cand->iv->base_object)
5611 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5613 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5615 copy_ref_info (ref, *use->op_p);
5619 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5623 rewrite_use_compare (struct ivopts_data *data,
5624 struct iv_use *use, struct iv_cand *cand)
5626 tree comp, *var_p, op, bound;
5627 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5628 enum tree_code compare;
5629 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5635 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5636 tree var_type = TREE_TYPE (var);
5639 compare = iv_elimination_compare (data, use);
5640 bound = unshare_expr (fold_convert (var_type, bound));
5641 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5643 gsi_insert_seq_on_edge_immediate (
5644 loop_preheader_edge (data->current_loop),
5647 gimple_cond_set_lhs (use->stmt, var);
5648 gimple_cond_set_code (use->stmt, compare);
5649 gimple_cond_set_rhs (use->stmt, op);
5653 /* The induction variable elimination failed; just express the original
5655 comp = get_computation (data->current_loop, use, cand);
5656 gcc_assert (comp != NULL_TREE);
5658 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5661 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5662 true, GSI_SAME_STMT);
5665 /* Rewrites USE using candidate CAND. */
5668 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5672 case USE_NONLINEAR_EXPR:
5673 rewrite_use_nonlinear_expr (data, use, cand);
5677 rewrite_use_address (data, use, cand);
5681 rewrite_use_compare (data, use, cand);
5688 update_stmt (use->stmt);
5691 /* Rewrite the uses using the selected induction variables. */
5694 rewrite_uses (struct ivopts_data *data)
5697 struct iv_cand *cand;
5700 for (i = 0; i < n_iv_uses (data); i++)
5702 use = iv_use (data, i);
5703 cand = use->selected;
5706 rewrite_use (data, use, cand);
5710 /* Removes the ivs that are not used after rewriting. */
5713 remove_unused_ivs (struct ivopts_data *data)
5717 bitmap toremove = BITMAP_ALLOC (NULL);
5719 /* Figure out an order in which to release SSA DEFs so that we don't
5720 release something that we'd have to propagate into a debug stmt
5722 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5724 struct version_info *info;
5726 info = ver_info (data, j);
5728 && !integer_zerop (info->iv->step)
5730 && !info->iv->have_use_for
5731 && !info->preserve_biv)
5732 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5735 release_defs_bitset (toremove);
5737 BITMAP_FREE (toremove);
5740 /* Frees data allocated by the optimization of a single loop. */
5743 free_loop_data (struct ivopts_data *data)
5751 pointer_map_destroy (data->niters);
5752 data->niters = NULL;
5755 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5757 struct version_info *info;
5759 info = ver_info (data, i);
5763 info->has_nonlin_use = false;
5764 info->preserve_biv = false;
5767 bitmap_clear (data->relevant);
5768 bitmap_clear (data->important_candidates);
5770 for (i = 0; i < n_iv_uses (data); i++)
5772 struct iv_use *use = iv_use (data, i);
5775 BITMAP_FREE (use->related_cands);
5776 for (j = 0; j < use->n_map_members; j++)
5777 if (use->cost_map[j].depends_on)
5778 BITMAP_FREE (use->cost_map[j].depends_on);
5779 free (use->cost_map);
5782 VEC_truncate (iv_use_p, data->iv_uses, 0);
5784 for (i = 0; i < n_iv_cands (data); i++)
5786 struct iv_cand *cand = iv_cand (data, i);
5790 if (cand->depends_on)
5791 BITMAP_FREE (cand->depends_on);
5794 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5796 if (data->version_info_size < num_ssa_names)
5798 data->version_info_size = 2 * num_ssa_names;
5799 free (data->version_info);
5800 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5803 data->max_inv_id = 0;
5805 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5806 SET_DECL_RTL (obj, NULL_RTX);
5808 VEC_truncate (tree, decl_rtl_to_reset, 0);
5811 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5815 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5817 free_loop_data (data);
5818 free (data->version_info);
5819 BITMAP_FREE (data->relevant);
5820 BITMAP_FREE (data->important_candidates);
5822 VEC_free (tree, heap, decl_rtl_to_reset);
5823 VEC_free (iv_use_p, heap, data->iv_uses);
5824 VEC_free (iv_cand_p, heap, data->iv_candidates);
5827 /* Optimizes the LOOP. Returns true if anything changed. */
5830 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5832 bool changed = false;
5833 struct iv_ca *iv_ca;
5837 gcc_assert (!data->niters);
5838 data->current_loop = loop;
5839 data->speed = optimize_loop_for_speed_p (loop);
5841 if (dump_file && (dump_flags & TDF_DETAILS))
5843 fprintf (dump_file, "Processing loop %d\n", loop->num);
5845 exit = single_dom_exit (loop);
5848 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5849 exit->src->index, exit->dest->index);
5850 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5851 fprintf (dump_file, "\n");
5854 fprintf (dump_file, "\n");
5857 body = get_loop_body (loop);
5858 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5861 /* For each ssa name determines whether it behaves as an induction variable
5863 if (!find_induction_variables (data))
5866 /* Finds interesting uses (item 1). */
5867 find_interesting_uses (data);
5868 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5871 /* Finds candidates for the induction variables (item 2). */
5872 find_iv_candidates (data);
5874 /* Calculates the costs (item 3, part 1). */
5875 determine_iv_costs (data);
5876 determine_use_iv_costs (data);
5877 determine_set_costs (data);
5879 /* Find the optimal set of induction variables (item 3, part 2). */
5880 iv_ca = find_optimal_iv_set (data);
5885 /* Create the new induction variables (item 4, part 1). */
5886 create_new_ivs (data, iv_ca);
5887 iv_ca_free (&iv_ca);
5889 /* Rewrite the uses (item 4, part 2). */
5890 rewrite_uses (data);
5892 /* Remove the ivs that are unused after rewriting. */
5893 remove_unused_ivs (data);
5895 /* We have changed the structure of induction variables; it might happen
5896 that definitions in the scev database refer to some of them that were
5901 free_loop_data (data);
5906 /* Main entry point. Optimizes induction variables in loops. */
5909 tree_ssa_iv_optimize (void)
5912 struct ivopts_data data;
5915 tree_ssa_iv_optimize_init (&data);
5917 /* Optimize the loops starting with the innermost ones. */
5918 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5920 if (dump_file && (dump_flags & TDF_DETAILS))
5921 flow_loop_dump (loop, dump_file, NULL, 1);
5923 tree_ssa_iv_optimize_loop (&data, loop);
5926 tree_ssa_iv_optimize_finalize (&data);