1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
79 #include "tree-pass.h"
81 #include "insn-config.h"
83 #include "pointer-set.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
93 /* FIXME: Expressions are expanded to RTL in this pass to determine the
94 cost of different addressing modes. This should be moved to a TBD
95 interface between the GIMPLE and RTL worlds. */
98 /* The infinite cost. */
99 #define INFTY 10000000
101 /* The expected number of loop iterations. TODO -- use profiling instead of
103 #define AVG_LOOP_NITER(LOOP) 5
106 /* Representation of the induction variable. */
109 tree base; /* Initial value of the iv. */
110 tree base_object; /* A memory object to that the induction variable points. */
111 tree step; /* Step of the iv (constant only). */
112 tree ssa_name; /* The ssa name with the value. */
113 bool biv_p; /* Is it a biv? */
114 bool have_use_for; /* Do we already have a use for it? */
115 unsigned use_id; /* The identifier in the use if it is the case. */
118 /* Per-ssa version information (induction variable descriptions, etc.). */
121 tree name; /* The ssa name. */
122 struct iv *iv; /* Induction variable description. */
123 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
124 an expression that is not an induction variable. */
125 bool preserve_biv; /* For the original biv, whether to preserve it. */
126 unsigned inv_id; /* Id of an invariant. */
132 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
133 USE_ADDRESS, /* Use in an address. */
134 USE_COMPARE /* Use is a compare. */
137 /* Cost of a computation. */
140 int cost; /* The runtime cost. */
141 unsigned complexity; /* The estimate of the complexity of the code for
142 the computation (in no concrete units --
143 complexity field should be larger for more
144 complex expressions and addressing modes). */
147 static const comp_cost zero_cost = {0, 0};
148 static const comp_cost infinite_cost = {INFTY, INFTY};
150 /* The candidate - cost pair. */
153 struct iv_cand *cand; /* The candidate. */
154 comp_cost cost; /* The cost. */
155 bitmap depends_on; /* The list of invariants that have to be
157 tree value; /* For final value elimination, the expression for
158 the final value of the iv. For iv elimination,
159 the new bound to compare with. */
165 unsigned id; /* The id of the use. */
166 enum use_type type; /* Type of the use. */
167 struct iv *iv; /* The induction variable it is based on. */
168 gimple stmt; /* Statement in that it occurs. */
169 tree *op_p; /* The place where it occurs. */
170 bitmap related_cands; /* The set of "related" iv candidates, plus the common
173 unsigned n_map_members; /* Number of candidates in the cost_map list. */
174 struct cost_pair *cost_map;
175 /* The costs wrto the iv candidates. */
177 struct iv_cand *selected;
178 /* The selected candidate. */
181 /* The position where the iv is computed. */
184 IP_NORMAL, /* At the end, just before the exit condition. */
185 IP_END, /* At the end of the latch block. */
186 IP_BEFORE_USE, /* Immediately before a specific use. */
187 IP_AFTER_USE, /* Immediately after a specific use. */
188 IP_ORIGINAL /* The original biv. */
191 /* The induction variable candidate. */
194 unsigned id; /* The number of the candidate. */
195 bool important; /* Whether this is an "important" candidate, i.e. such
196 that it should be considered by all uses. */
197 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
198 gimple incremented_at;/* For original biv, the statement where it is
200 tree var_before; /* The variable used for it before increment. */
201 tree var_after; /* The variable used for it after increment. */
202 struct iv *iv; /* The value of the candidate. NULL for
203 "pseudocandidate" used to indicate the possibility
204 to replace the final value of an iv by direct
205 computation of the value. */
206 unsigned cost; /* Cost of the candidate. */
207 unsigned cost_step; /* Cost of the candidate's increment operation. */
208 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
209 where it is incremented. */
210 bitmap depends_on; /* The list of invariants that are used in step of the
214 /* The data used by the induction variable optimizations. */
216 typedef struct iv_use *iv_use_p;
218 DEF_VEC_ALLOC_P(iv_use_p,heap);
220 typedef struct iv_cand *iv_cand_p;
221 DEF_VEC_P(iv_cand_p);
222 DEF_VEC_ALLOC_P(iv_cand_p,heap);
226 /* The currently optimized loop. */
227 struct loop *current_loop;
229 /* Numbers of iterations for all exits of the current loop. */
230 struct pointer_map_t *niters;
232 /* Number of registers used in it. */
235 /* The size of version_info array allocated. */
236 unsigned version_info_size;
238 /* The array of information for the ssa names. */
239 struct version_info *version_info;
241 /* The bitmap of indices in version_info whose value was changed. */
244 /* The uses of induction variables. */
245 VEC(iv_use_p,heap) *iv_uses;
247 /* The candidates. */
248 VEC(iv_cand_p,heap) *iv_candidates;
250 /* A bitmap of important candidates. */
251 bitmap important_candidates;
253 /* The maximum invariant id. */
256 /* Whether to consider just related and important candidates when replacing a
258 bool consider_all_candidates;
260 /* Are we optimizing for speed? */
264 /* An assignment of iv candidates to uses. */
268 /* The number of uses covered by the assignment. */
271 /* Number of uses that cannot be expressed by the candidates in the set. */
274 /* Candidate assigned to a use, together with the related costs. */
275 struct cost_pair **cand_for_use;
277 /* Number of times each candidate is used. */
278 unsigned *n_cand_uses;
280 /* The candidates used. */
283 /* The number of candidates in the set. */
286 /* Total number of registers needed. */
289 /* Total cost of expressing uses. */
290 comp_cost cand_use_cost;
292 /* Total cost of candidates. */
295 /* Number of times each invariant is used. */
296 unsigned *n_invariant_uses;
298 /* Total cost of the assignment. */
302 /* Difference of two iv candidate assignments. */
309 /* An old assignment (for rollback purposes). */
310 struct cost_pair *old_cp;
312 /* A new assignment. */
313 struct cost_pair *new_cp;
315 /* Next change in the list. */
316 struct iv_ca_delta *next_change;
319 /* Bound on number of candidates below that all candidates are considered. */
321 #define CONSIDER_ALL_CANDIDATES_BOUND \
322 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
324 /* If there are more iv occurrences, we just give up (it is quite unlikely that
325 optimizing such a loop would help, and it would take ages). */
327 #define MAX_CONSIDERED_USES \
328 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
330 /* If there are at most this number of ivs in the set, try removing unnecessary
331 ivs from the set always. */
333 #define ALWAYS_PRUNE_CAND_SET_BOUND \
334 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
336 /* The list of trees for that the decl_rtl field must be reset is stored
339 static VEC(tree,heap) *decl_rtl_to_reset;
341 /* Number of uses recorded in DATA. */
343 static inline unsigned
344 n_iv_uses (struct ivopts_data *data)
346 return VEC_length (iv_use_p, data->iv_uses);
349 /* Ith use recorded in DATA. */
351 static inline struct iv_use *
352 iv_use (struct ivopts_data *data, unsigned i)
354 return VEC_index (iv_use_p, data->iv_uses, i);
357 /* Number of candidates recorded in DATA. */
359 static inline unsigned
360 n_iv_cands (struct ivopts_data *data)
362 return VEC_length (iv_cand_p, data->iv_candidates);
365 /* Ith candidate recorded in DATA. */
367 static inline struct iv_cand *
368 iv_cand (struct ivopts_data *data, unsigned i)
370 return VEC_index (iv_cand_p, data->iv_candidates, i);
373 /* The single loop exit if it dominates the latch, NULL otherwise. */
376 single_dom_exit (struct loop *loop)
378 edge exit = single_exit (loop);
383 if (!just_once_each_iteration_p (loop, exit->src))
389 /* Dumps information about the induction variable IV to FILE. */
391 extern void dump_iv (FILE *, struct iv *);
393 dump_iv (FILE *file, struct iv *iv)
397 fprintf (file, "ssa name ");
398 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
399 fprintf (file, "\n");
402 fprintf (file, " type ");
403 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
404 fprintf (file, "\n");
408 fprintf (file, " base ");
409 print_generic_expr (file, iv->base, TDF_SLIM);
410 fprintf (file, "\n");
412 fprintf (file, " step ");
413 print_generic_expr (file, iv->step, TDF_SLIM);
414 fprintf (file, "\n");
418 fprintf (file, " invariant ");
419 print_generic_expr (file, iv->base, TDF_SLIM);
420 fprintf (file, "\n");
425 fprintf (file, " base object ");
426 print_generic_expr (file, iv->base_object, TDF_SLIM);
427 fprintf (file, "\n");
431 fprintf (file, " is a biv\n");
434 /* Dumps information about the USE to FILE. */
436 extern void dump_use (FILE *, struct iv_use *);
438 dump_use (FILE *file, struct iv_use *use)
440 fprintf (file, "use %d\n", use->id);
444 case USE_NONLINEAR_EXPR:
445 fprintf (file, " generic\n");
449 fprintf (file, " address\n");
453 fprintf (file, " compare\n");
460 fprintf (file, " in statement ");
461 print_gimple_stmt (file, use->stmt, 0, 0);
462 fprintf (file, "\n");
464 fprintf (file, " at position ");
466 print_generic_expr (file, *use->op_p, TDF_SLIM);
467 fprintf (file, "\n");
469 dump_iv (file, use->iv);
471 if (use->related_cands)
473 fprintf (file, " related candidates ");
474 dump_bitmap (file, use->related_cands);
478 /* Dumps information about the uses to FILE. */
480 extern void dump_uses (FILE *, struct ivopts_data *);
482 dump_uses (FILE *file, struct ivopts_data *data)
487 for (i = 0; i < n_iv_uses (data); i++)
489 use = iv_use (data, i);
491 dump_use (file, use);
492 fprintf (file, "\n");
496 /* Dumps information about induction variable candidate CAND to FILE. */
498 extern void dump_cand (FILE *, struct iv_cand *);
500 dump_cand (FILE *file, struct iv_cand *cand)
502 struct iv *iv = cand->iv;
504 fprintf (file, "candidate %d%s\n",
505 cand->id, cand->important ? " (important)" : "");
507 if (cand->depends_on)
509 fprintf (file, " depends on ");
510 dump_bitmap (file, cand->depends_on);
515 fprintf (file, " final value replacement\n");
522 fprintf (file, " incremented before exit test\n");
526 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
530 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
534 fprintf (file, " incremented at end\n");
538 fprintf (file, " original biv\n");
545 /* Returns the info for ssa version VER. */
547 static inline struct version_info *
548 ver_info (struct ivopts_data *data, unsigned ver)
550 return data->version_info + ver;
553 /* Returns the info for ssa name NAME. */
555 static inline struct version_info *
556 name_info (struct ivopts_data *data, tree name)
558 return ver_info (data, SSA_NAME_VERSION (name));
561 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
565 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
567 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
571 if (sbb == loop->latch)
577 return stmt == last_stmt (bb);
580 /* Returns true if STMT if after the place where the original induction
581 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
582 if the positions are identical. */
585 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
587 basic_block cand_bb = gimple_bb (cand->incremented_at);
588 basic_block stmt_bb = gimple_bb (stmt);
590 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
593 if (stmt_bb != cand_bb)
597 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
599 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
602 /* Returns true if STMT if after the place where the induction variable
603 CAND is incremented in LOOP. */
606 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
614 return stmt_after_ip_normal_pos (loop, stmt);
618 return stmt_after_inc_pos (cand, stmt, false);
621 return stmt_after_inc_pos (cand, stmt, true);
628 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
631 abnormal_ssa_name_p (tree exp)
636 if (TREE_CODE (exp) != SSA_NAME)
639 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
642 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
643 abnormal phi node. Callback for for_each_index. */
646 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
647 void *data ATTRIBUTE_UNUSED)
649 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
651 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
653 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
657 return !abnormal_ssa_name_p (*index);
660 /* Returns true if EXPR contains a ssa name that occurs in an
661 abnormal phi node. */
664 contains_abnormal_ssa_name_p (tree expr)
667 enum tree_code_class codeclass;
672 code = TREE_CODE (expr);
673 codeclass = TREE_CODE_CLASS (code);
675 if (code == SSA_NAME)
676 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
678 if (code == INTEGER_CST
679 || is_gimple_min_invariant (expr))
682 if (code == ADDR_EXPR)
683 return !for_each_index (&TREE_OPERAND (expr, 0),
684 idx_contains_abnormal_ssa_name_p,
687 if (code == COND_EXPR)
688 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
689 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
690 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
696 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
701 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
713 /* Returns tree describing number of iterations determined from
714 EXIT of DATA->current_loop, or NULL if something goes wrong. */
717 niter_for_exit (struct ivopts_data *data, edge exit)
719 struct tree_niter_desc desc;
725 data->niters = pointer_map_create ();
729 slot = pointer_map_contains (data->niters, exit);
733 /* Try to determine number of iterations. We must know it
734 unconditionally (i.e., without possibility of # of iterations
735 being zero). Also, we cannot safely work with ssa names that
736 appear in phi nodes on abnormal edges, so that we do not create
737 overlapping life ranges for them (PR 27283). */
738 if (number_of_iterations_exit (data->current_loop,
740 && integer_zerop (desc.may_be_zero)
741 && !contains_abnormal_ssa_name_p (desc.niter))
746 *pointer_map_insert (data->niters, exit) = niter;
749 niter = (tree) *slot;
754 /* Returns tree describing number of iterations determined from
755 single dominating exit of DATA->current_loop, or NULL if something
759 niter_for_single_dom_exit (struct ivopts_data *data)
761 edge exit = single_dom_exit (data->current_loop);
766 return niter_for_exit (data, exit);
769 /* Initializes data structures used by the iv optimization pass, stored
773 tree_ssa_iv_optimize_init (struct ivopts_data *data)
775 data->version_info_size = 2 * num_ssa_names;
776 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
777 data->relevant = BITMAP_ALLOC (NULL);
778 data->important_candidates = BITMAP_ALLOC (NULL);
779 data->max_inv_id = 0;
781 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
782 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
783 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
786 /* Returns a memory object to that EXPR points. In case we are able to
787 determine that it does not point to any such object, NULL is returned. */
790 determine_base_object (tree expr)
792 enum tree_code code = TREE_CODE (expr);
795 /* If this is a pointer casted to any type, we need to determine
796 the base object for the pointer; so handle conversions before
797 throwing away non-pointer expressions. */
798 if (CONVERT_EXPR_P (expr))
799 return determine_base_object (TREE_OPERAND (expr, 0));
801 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
810 obj = TREE_OPERAND (expr, 0);
811 base = get_base_address (obj);
816 if (TREE_CODE (base) == MEM_REF)
817 return determine_base_object (TREE_OPERAND (base, 0));
819 return fold_convert (ptr_type_node,
820 build_fold_addr_expr (base));
822 case POINTER_PLUS_EXPR:
823 return determine_base_object (TREE_OPERAND (expr, 0));
827 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
831 return fold_convert (ptr_type_node, expr);
835 /* Allocates an induction variable with given initial value BASE and step STEP
839 alloc_iv (tree base, tree step)
841 struct iv *iv = XCNEW (struct iv);
842 gcc_assert (step != NULL_TREE);
845 iv->base_object = determine_base_object (base);
848 iv->have_use_for = false;
850 iv->ssa_name = NULL_TREE;
855 /* Sets STEP and BASE for induction variable IV. */
858 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
860 struct version_info *info = name_info (data, iv);
862 gcc_assert (!info->iv);
864 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
865 info->iv = alloc_iv (base, step);
866 info->iv->ssa_name = iv;
869 /* Finds induction variable declaration for VAR. */
872 get_iv (struct ivopts_data *data, tree var)
875 tree type = TREE_TYPE (var);
877 if (!POINTER_TYPE_P (type)
878 && !INTEGRAL_TYPE_P (type))
881 if (!name_info (data, var)->iv)
883 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
886 || !flow_bb_inside_loop_p (data->current_loop, bb))
887 set_iv (data, var, var, build_int_cst (type, 0));
890 return name_info (data, var)->iv;
893 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
894 not define a simple affine biv with nonzero step. */
897 determine_biv_step (gimple phi)
899 struct loop *loop = gimple_bb (phi)->loop_father;
900 tree name = PHI_RESULT (phi);
903 if (!is_gimple_reg (name))
906 if (!simple_iv (loop, loop, name, &iv, true))
909 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
912 /* Finds basic ivs. */
915 find_bivs (struct ivopts_data *data)
918 tree step, type, base;
920 struct loop *loop = data->current_loop;
921 gimple_stmt_iterator psi;
923 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
925 phi = gsi_stmt (psi);
927 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
930 step = determine_biv_step (phi);
934 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
935 base = expand_simple_operations (base);
936 if (contains_abnormal_ssa_name_p (base)
937 || contains_abnormal_ssa_name_p (step))
940 type = TREE_TYPE (PHI_RESULT (phi));
941 base = fold_convert (type, base);
944 if (POINTER_TYPE_P (type))
945 step = fold_convert (sizetype, step);
947 step = fold_convert (type, step);
950 set_iv (data, PHI_RESULT (phi), base, step);
957 /* Marks basic ivs. */
960 mark_bivs (struct ivopts_data *data)
964 struct iv *iv, *incr_iv;
965 struct loop *loop = data->current_loop;
967 gimple_stmt_iterator psi;
969 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
971 phi = gsi_stmt (psi);
973 iv = get_iv (data, PHI_RESULT (phi));
977 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
978 incr_iv = get_iv (data, var);
982 /* If the increment is in the subloop, ignore it. */
983 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
984 if (incr_bb->loop_father != data->current_loop
985 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
989 incr_iv->biv_p = true;
993 /* Checks whether STMT defines a linear induction variable and stores its
997 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1000 struct loop *loop = data->current_loop;
1002 iv->base = NULL_TREE;
1003 iv->step = NULL_TREE;
1005 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1008 lhs = gimple_assign_lhs (stmt);
1009 if (TREE_CODE (lhs) != SSA_NAME)
1012 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1014 iv->base = expand_simple_operations (iv->base);
1016 if (contains_abnormal_ssa_name_p (iv->base)
1017 || contains_abnormal_ssa_name_p (iv->step))
1023 /* Finds general ivs in statement STMT. */
1026 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1030 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1033 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1036 /* Finds general ivs in basic block BB. */
1039 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1041 gimple_stmt_iterator bsi;
1043 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1044 find_givs_in_stmt (data, gsi_stmt (bsi));
1047 /* Finds general ivs. */
1050 find_givs (struct ivopts_data *data)
1052 struct loop *loop = data->current_loop;
1053 basic_block *body = get_loop_body_in_dom_order (loop);
1056 for (i = 0; i < loop->num_nodes; i++)
1057 find_givs_in_bb (data, body[i]);
1061 /* For each ssa name defined in LOOP determines whether it is an induction
1062 variable and if so, its initial value and step. */
1065 find_induction_variables (struct ivopts_data *data)
1070 if (!find_bivs (data))
1076 if (dump_file && (dump_flags & TDF_DETAILS))
1078 tree niter = niter_for_single_dom_exit (data);
1082 fprintf (dump_file, " number of iterations ");
1083 print_generic_expr (dump_file, niter, TDF_SLIM);
1084 fprintf (dump_file, "\n\n");
1087 fprintf (dump_file, "Induction variables:\n\n");
1089 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1091 if (ver_info (data, i)->iv)
1092 dump_iv (dump_file, ver_info (data, i)->iv);
1099 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1101 static struct iv_use *
1102 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1103 gimple stmt, enum use_type use_type)
1105 struct iv_use *use = XCNEW (struct iv_use);
1107 use->id = n_iv_uses (data);
1108 use->type = use_type;
1112 use->related_cands = BITMAP_ALLOC (NULL);
1114 /* To avoid showing ssa name in the dumps, if it was not reset by the
1116 iv->ssa_name = NULL_TREE;
1118 if (dump_file && (dump_flags & TDF_DETAILS))
1119 dump_use (dump_file, use);
1121 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1126 /* Checks whether OP is a loop-level invariant and if so, records it.
1127 NONLINEAR_USE is true if the invariant is used in a way we do not
1128 handle specially. */
1131 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1134 struct version_info *info;
1136 if (TREE_CODE (op) != SSA_NAME
1137 || !is_gimple_reg (op))
1140 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1142 && flow_bb_inside_loop_p (data->current_loop, bb))
1145 info = name_info (data, op);
1147 info->has_nonlin_use |= nonlinear_use;
1149 info->inv_id = ++data->max_inv_id;
1150 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1153 /* Checks whether the use OP is interesting and if so, records it. */
1155 static struct iv_use *
1156 find_interesting_uses_op (struct ivopts_data *data, tree op)
1163 if (TREE_CODE (op) != SSA_NAME)
1166 iv = get_iv (data, op);
1170 if (iv->have_use_for)
1172 use = iv_use (data, iv->use_id);
1174 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1178 if (integer_zerop (iv->step))
1180 record_invariant (data, op, true);
1183 iv->have_use_for = true;
1185 civ = XNEW (struct iv);
1188 stmt = SSA_NAME_DEF_STMT (op);
1189 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1190 || is_gimple_assign (stmt));
1192 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1193 iv->use_id = use->id;
1198 /* Given a condition in statement STMT, checks whether it is a compare
1199 of an induction variable and an invariant. If this is the case,
1200 CONTROL_VAR is set to location of the iv, BOUND to the location of
1201 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1202 induction variable descriptions, and true is returned. If this is not
1203 the case, CONTROL_VAR and BOUND are set to the arguments of the
1204 condition and false is returned. */
1207 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1208 tree **control_var, tree **bound,
1209 struct iv **iv_var, struct iv **iv_bound)
1211 /* The objects returned when COND has constant operands. */
1212 static struct iv const_iv;
1214 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1215 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1218 if (gimple_code (stmt) == GIMPLE_COND)
1220 op0 = gimple_cond_lhs_ptr (stmt);
1221 op1 = gimple_cond_rhs_ptr (stmt);
1225 op0 = gimple_assign_rhs1_ptr (stmt);
1226 op1 = gimple_assign_rhs2_ptr (stmt);
1229 zero = integer_zero_node;
1230 const_iv.step = integer_zero_node;
1232 if (TREE_CODE (*op0) == SSA_NAME)
1233 iv0 = get_iv (data, *op0);
1234 if (TREE_CODE (*op1) == SSA_NAME)
1235 iv1 = get_iv (data, *op1);
1237 /* Exactly one of the compared values must be an iv, and the other one must
1242 if (integer_zerop (iv0->step))
1244 /* Control variable may be on the other side. */
1245 tmp_op = op0; op0 = op1; op1 = tmp_op;
1246 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1248 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1252 *control_var = op0;;
1263 /* Checks whether the condition in STMT is interesting and if so,
1267 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1269 tree *var_p, *bound_p;
1270 struct iv *var_iv, *civ;
1272 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1274 find_interesting_uses_op (data, *var_p);
1275 find_interesting_uses_op (data, *bound_p);
1279 civ = XNEW (struct iv);
1281 record_use (data, NULL, civ, stmt, USE_COMPARE);
1284 /* Returns true if expression EXPR is obviously invariant in LOOP,
1285 i.e. if all its operands are defined outside of the LOOP. LOOP
1286 should not be the function body. */
1289 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1294 gcc_assert (loop_depth (loop) > 0);
1296 if (is_gimple_min_invariant (expr))
1299 if (TREE_CODE (expr) == SSA_NAME)
1301 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1303 && flow_bb_inside_loop_p (loop, def_bb))
1312 len = TREE_OPERAND_LENGTH (expr);
1313 for (i = 0; i < len; i++)
1314 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1320 /* Returns true if statement STMT is obviously invariant in LOOP,
1321 i.e. if all its operands on the RHS are defined outside of the LOOP.
1322 LOOP should not be the function body. */
1325 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1330 gcc_assert (loop_depth (loop) > 0);
1332 lhs = gimple_get_lhs (stmt);
1333 for (i = 0; i < gimple_num_ops (stmt); i++)
1335 tree op = gimple_op (stmt, i);
1336 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1343 /* Cumulates the steps of indices into DATA and replaces their values with the
1344 initial ones. Returns false when the value of the index cannot be determined.
1345 Callback for for_each_index. */
1347 struct ifs_ivopts_data
1349 struct ivopts_data *ivopts_data;
1355 idx_find_step (tree base, tree *idx, void *data)
1357 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1359 tree step, iv_base, iv_step, lbound, off;
1360 struct loop *loop = dta->ivopts_data->current_loop;
1362 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1363 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1366 /* If base is a component ref, require that the offset of the reference
1368 if (TREE_CODE (base) == COMPONENT_REF)
1370 off = component_ref_field_offset (base);
1371 return expr_invariant_in_loop_p (loop, off);
1374 /* If base is array, first check whether we will be able to move the
1375 reference out of the loop (in order to take its address in strength
1376 reduction). In order for this to work we need both lower bound
1377 and step to be loop invariants. */
1378 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1380 /* Moreover, for a range, the size needs to be invariant as well. */
1381 if (TREE_CODE (base) == ARRAY_RANGE_REF
1382 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1385 step = array_ref_element_size (base);
1386 lbound = array_ref_low_bound (base);
1388 if (!expr_invariant_in_loop_p (loop, step)
1389 || !expr_invariant_in_loop_p (loop, lbound))
1393 if (TREE_CODE (*idx) != SSA_NAME)
1396 iv = get_iv (dta->ivopts_data, *idx);
1400 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1401 *&x[0], which is not folded and does not trigger the
1402 ARRAY_REF path below. */
1405 if (integer_zerop (iv->step))
1408 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1410 step = array_ref_element_size (base);
1412 /* We only handle addresses whose step is an integer constant. */
1413 if (TREE_CODE (step) != INTEGER_CST)
1417 /* The step for pointer arithmetics already is 1 byte. */
1418 step = build_int_cst (sizetype, 1);
1422 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1423 sizetype, &iv_base, &iv_step, dta->stmt,
1426 /* The index might wrap. */
1430 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1431 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1436 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1437 object is passed to it in DATA. */
1440 idx_record_use (tree base, tree *idx,
1443 struct ivopts_data *data = (struct ivopts_data *) vdata;
1444 find_interesting_uses_op (data, *idx);
1445 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1447 find_interesting_uses_op (data, array_ref_element_size (base));
1448 find_interesting_uses_op (data, array_ref_low_bound (base));
1453 /* If we can prove that TOP = cst * BOT for some constant cst,
1454 store cst to MUL and return true. Otherwise return false.
1455 The returned value is always sign-extended, regardless of the
1456 signedness of TOP and BOT. */
1459 constant_multiple_of (tree top, tree bot, double_int *mul)
1462 enum tree_code code;
1463 double_int res, p0, p1;
1464 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1469 if (operand_equal_p (top, bot, 0))
1471 *mul = double_int_one;
1475 code = TREE_CODE (top);
1479 mby = TREE_OPERAND (top, 1);
1480 if (TREE_CODE (mby) != INTEGER_CST)
1483 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1486 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1492 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1493 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1496 if (code == MINUS_EXPR)
1497 p1 = double_int_neg (p1);
1498 *mul = double_int_sext (double_int_add (p0, p1), precision);
1502 if (TREE_CODE (bot) != INTEGER_CST)
1505 p0 = double_int_sext (tree_to_double_int (top), precision);
1506 p1 = double_int_sext (tree_to_double_int (bot), precision);
1507 if (double_int_zero_p (p1))
1509 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1511 return double_int_zero_p (res);
1518 /* Returns true if memory reference REF with step STEP may be unaligned. */
1521 may_be_unaligned_p (tree ref, tree step)
1525 HOST_WIDE_INT bitsize;
1526 HOST_WIDE_INT bitpos;
1528 enum machine_mode mode;
1529 int unsignedp, volatilep;
1530 unsigned base_align;
1532 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1533 thus they are not misaligned. */
1534 if (TREE_CODE (ref) == TARGET_MEM_REF)
1537 /* The test below is basically copy of what expr.c:normal_inner_ref
1538 does to check whether the object must be loaded by parts when
1539 STRICT_ALIGNMENT is true. */
1540 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1541 &unsignedp, &volatilep, true);
1542 base_type = TREE_TYPE (base);
1543 base_align = TYPE_ALIGN (base_type);
1545 if (mode != BLKmode)
1547 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1549 if (base_align < mode_align
1550 || (bitpos % mode_align) != 0
1551 || (bitpos % BITS_PER_UNIT) != 0)
1555 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1558 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1565 /* Return true if EXPR may be non-addressable. */
1568 may_be_nonaddressable_p (tree expr)
1570 switch (TREE_CODE (expr))
1572 case TARGET_MEM_REF:
1573 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1574 target, thus they are always addressable. */
1578 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1579 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1581 case VIEW_CONVERT_EXPR:
1582 /* This kind of view-conversions may wrap non-addressable objects
1583 and make them look addressable. After some processing the
1584 non-addressability may be uncovered again, causing ADDR_EXPRs
1585 of inappropriate objects to be built. */
1586 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1587 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1590 /* ... fall through ... */
1593 case ARRAY_RANGE_REF:
1594 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1606 /* Finds addresses in *OP_P inside STMT. */
1609 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1611 tree base = *op_p, step = build_int_cst (sizetype, 0);
1613 struct ifs_ivopts_data ifs_ivopts_data;
1615 /* Do not play with volatile memory references. A bit too conservative,
1616 perhaps, but safe. */
1617 if (gimple_has_volatile_ops (stmt))
1620 /* Ignore bitfields for now. Not really something terribly complicated
1622 if (TREE_CODE (base) == BIT_FIELD_REF)
1625 base = unshare_expr (base);
1627 if (TREE_CODE (base) == TARGET_MEM_REF)
1629 tree type = build_pointer_type (TREE_TYPE (base));
1633 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1635 civ = get_iv (data, TMR_BASE (base));
1639 TMR_BASE (base) = civ->base;
1642 if (TMR_INDEX (base)
1643 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1645 civ = get_iv (data, TMR_INDEX (base));
1649 TMR_INDEX (base) = civ->base;
1654 if (TMR_STEP (base))
1655 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1657 step = fold_build2 (PLUS_EXPR, type, step, astep);
1661 if (integer_zerop (step))
1663 base = tree_mem_ref_addr (type, base);
1667 ifs_ivopts_data.ivopts_data = data;
1668 ifs_ivopts_data.stmt = stmt;
1669 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1670 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1671 || integer_zerop (ifs_ivopts_data.step))
1673 step = ifs_ivopts_data.step;
1675 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1676 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1678 /* Check that the base expression is addressable. This needs
1679 to be done after substituting bases of IVs into it. */
1680 if (may_be_nonaddressable_p (base))
1683 /* Moreover, on strict alignment platforms, check that it is
1684 sufficiently aligned. */
1685 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1688 base = build_fold_addr_expr (base);
1690 /* Substituting bases of IVs into the base expression might
1691 have caused folding opportunities. */
1692 if (TREE_CODE (base) == ADDR_EXPR)
1694 tree *ref = &TREE_OPERAND (base, 0);
1695 while (handled_component_p (*ref))
1696 ref = &TREE_OPERAND (*ref, 0);
1697 if (TREE_CODE (*ref) == MEM_REF)
1699 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1700 TREE_OPERAND (*ref, 0),
1701 TREE_OPERAND (*ref, 1));
1708 civ = alloc_iv (base, step);
1709 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1713 for_each_index (op_p, idx_record_use, data);
1716 /* Finds and records invariants used in STMT. */
1719 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1722 use_operand_p use_p;
1725 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1727 op = USE_FROM_PTR (use_p);
1728 record_invariant (data, op, false);
1732 /* Finds interesting uses of induction variables in the statement STMT. */
1735 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1738 tree op, *lhs, *rhs;
1740 use_operand_p use_p;
1741 enum tree_code code;
1743 find_invariants_stmt (data, stmt);
1745 if (gimple_code (stmt) == GIMPLE_COND)
1747 find_interesting_uses_cond (data, stmt);
1751 if (is_gimple_assign (stmt))
1753 lhs = gimple_assign_lhs_ptr (stmt);
1754 rhs = gimple_assign_rhs1_ptr (stmt);
1756 if (TREE_CODE (*lhs) == SSA_NAME)
1758 /* If the statement defines an induction variable, the uses are not
1759 interesting by themselves. */
1761 iv = get_iv (data, *lhs);
1763 if (iv && !integer_zerop (iv->step))
1767 code = gimple_assign_rhs_code (stmt);
1768 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1769 && (REFERENCE_CLASS_P (*rhs)
1770 || is_gimple_val (*rhs)))
1772 if (REFERENCE_CLASS_P (*rhs))
1773 find_interesting_uses_address (data, stmt, rhs);
1775 find_interesting_uses_op (data, *rhs);
1777 if (REFERENCE_CLASS_P (*lhs))
1778 find_interesting_uses_address (data, stmt, lhs);
1781 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1783 find_interesting_uses_cond (data, stmt);
1787 /* TODO -- we should also handle address uses of type
1789 memory = call (whatever);
1796 if (gimple_code (stmt) == GIMPLE_PHI
1797 && gimple_bb (stmt) == data->current_loop->header)
1799 iv = get_iv (data, PHI_RESULT (stmt));
1801 if (iv && !integer_zerop (iv->step))
1805 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1807 op = USE_FROM_PTR (use_p);
1809 if (TREE_CODE (op) != SSA_NAME)
1812 iv = get_iv (data, op);
1816 find_interesting_uses_op (data, op);
1820 /* Finds interesting uses of induction variables outside of loops
1821 on loop exit edge EXIT. */
1824 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1827 gimple_stmt_iterator psi;
1830 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1832 phi = gsi_stmt (psi);
1833 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1834 if (is_gimple_reg (def))
1835 find_interesting_uses_op (data, def);
1839 /* Finds uses of the induction variables that are interesting. */
1842 find_interesting_uses (struct ivopts_data *data)
1845 gimple_stmt_iterator bsi;
1846 basic_block *body = get_loop_body (data->current_loop);
1848 struct version_info *info;
1851 if (dump_file && (dump_flags & TDF_DETAILS))
1852 fprintf (dump_file, "Uses:\n\n");
1854 for (i = 0; i < data->current_loop->num_nodes; i++)
1859 FOR_EACH_EDGE (e, ei, bb->succs)
1860 if (e->dest != EXIT_BLOCK_PTR
1861 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1862 find_interesting_uses_outside (data, e);
1864 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1865 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1866 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1867 if (!is_gimple_debug (gsi_stmt (bsi)))
1868 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1871 if (dump_file && (dump_flags & TDF_DETAILS))
1875 fprintf (dump_file, "\n");
1877 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1879 info = ver_info (data, i);
1882 fprintf (dump_file, " ");
1883 print_generic_expr (dump_file, info->name, TDF_SLIM);
1884 fprintf (dump_file, " is invariant (%d)%s\n",
1885 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1889 fprintf (dump_file, "\n");
1895 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1896 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1897 we are at the top-level of the processed address. */
1900 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1901 unsigned HOST_WIDE_INT *offset)
1903 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1904 enum tree_code code;
1905 tree type, orig_type = TREE_TYPE (expr);
1906 unsigned HOST_WIDE_INT off0, off1, st;
1907 tree orig_expr = expr;
1911 type = TREE_TYPE (expr);
1912 code = TREE_CODE (expr);
1918 if (!cst_and_fits_in_hwi (expr)
1919 || integer_zerop (expr))
1922 *offset = int_cst_value (expr);
1923 return build_int_cst (orig_type, 0);
1925 case POINTER_PLUS_EXPR:
1928 op0 = TREE_OPERAND (expr, 0);
1929 op1 = TREE_OPERAND (expr, 1);
1931 op0 = strip_offset_1 (op0, false, false, &off0);
1932 op1 = strip_offset_1 (op1, false, false, &off1);
1934 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1935 if (op0 == TREE_OPERAND (expr, 0)
1936 && op1 == TREE_OPERAND (expr, 1))
1939 if (integer_zerop (op1))
1941 else if (integer_zerop (op0))
1943 if (code == MINUS_EXPR)
1944 expr = fold_build1 (NEGATE_EXPR, type, op1);
1949 expr = fold_build2 (code, type, op0, op1);
1951 return fold_convert (orig_type, expr);
1954 op1 = TREE_OPERAND (expr, 1);
1955 if (!cst_and_fits_in_hwi (op1))
1958 op0 = TREE_OPERAND (expr, 0);
1959 op0 = strip_offset_1 (op0, false, false, &off0);
1960 if (op0 == TREE_OPERAND (expr, 0))
1963 *offset = off0 * int_cst_value (op1);
1964 if (integer_zerop (op0))
1967 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1969 return fold_convert (orig_type, expr);
1972 case ARRAY_RANGE_REF:
1976 step = array_ref_element_size (expr);
1977 if (!cst_and_fits_in_hwi (step))
1980 st = int_cst_value (step);
1981 op1 = TREE_OPERAND (expr, 1);
1982 op1 = strip_offset_1 (op1, false, false, &off1);
1983 *offset = off1 * st;
1986 && integer_zerop (op1))
1988 /* Strip the component reference completely. */
1989 op0 = TREE_OPERAND (expr, 0);
1990 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2000 tmp = component_ref_field_offset (expr);
2002 && cst_and_fits_in_hwi (tmp))
2004 /* Strip the component reference completely. */
2005 op0 = TREE_OPERAND (expr, 0);
2006 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2007 *offset = off0 + int_cst_value (tmp);
2013 op0 = TREE_OPERAND (expr, 0);
2014 op0 = strip_offset_1 (op0, true, true, &off0);
2017 if (op0 == TREE_OPERAND (expr, 0))
2020 expr = build_fold_addr_expr (op0);
2021 return fold_convert (orig_type, expr);
2024 /* ??? Offset operand? */
2025 inside_addr = false;
2032 /* Default handling of expressions for that we want to recurse into
2033 the first operand. */
2034 op0 = TREE_OPERAND (expr, 0);
2035 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2038 if (op0 == TREE_OPERAND (expr, 0)
2039 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2042 expr = copy_node (expr);
2043 TREE_OPERAND (expr, 0) = op0;
2045 TREE_OPERAND (expr, 1) = op1;
2047 /* Inside address, we might strip the top level component references,
2048 thus changing type of the expression. Handling of ADDR_EXPR
2050 expr = fold_convert (orig_type, expr);
2055 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2058 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2060 return strip_offset_1 (expr, false, false, offset);
2063 /* Returns variant of TYPE that can be used as base for different uses.
2064 We return unsigned type with the same precision, which avoids problems
2068 generic_type_for (tree type)
2070 if (POINTER_TYPE_P (type))
2071 return unsigned_type_for (type);
2073 if (TYPE_UNSIGNED (type))
2076 return unsigned_type_for (type);
2079 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2080 the bitmap to that we should store it. */
2082 static struct ivopts_data *fd_ivopts_data;
2084 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2086 bitmap *depends_on = (bitmap *) data;
2087 struct version_info *info;
2089 if (TREE_CODE (*expr_p) != SSA_NAME)
2091 info = name_info (fd_ivopts_data, *expr_p);
2093 if (!info->inv_id || info->has_nonlin_use)
2097 *depends_on = BITMAP_ALLOC (NULL);
2098 bitmap_set_bit (*depends_on, info->inv_id);
2103 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2104 position to POS. If USE is not NULL, the candidate is set as related to
2105 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2106 replacement of the final value of the iv by a direct computation. */
2108 static struct iv_cand *
2109 add_candidate_1 (struct ivopts_data *data,
2110 tree base, tree step, bool important, enum iv_position pos,
2111 struct iv_use *use, gimple incremented_at)
2114 struct iv_cand *cand = NULL;
2115 tree type, orig_type;
2119 orig_type = TREE_TYPE (base);
2120 type = generic_type_for (orig_type);
2121 if (type != orig_type)
2123 base = fold_convert (type, base);
2124 step = fold_convert (type, step);
2128 for (i = 0; i < n_iv_cands (data); i++)
2130 cand = iv_cand (data, i);
2132 if (cand->pos != pos)
2135 if (cand->incremented_at != incremented_at
2136 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2137 && cand->ainc_use != use))
2151 if (operand_equal_p (base, cand->iv->base, 0)
2152 && operand_equal_p (step, cand->iv->step, 0))
2156 if (i == n_iv_cands (data))
2158 cand = XCNEW (struct iv_cand);
2164 cand->iv = alloc_iv (base, step);
2167 if (pos != IP_ORIGINAL && cand->iv)
2169 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2170 cand->var_after = cand->var_before;
2172 cand->important = important;
2173 cand->incremented_at = incremented_at;
2174 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2177 && TREE_CODE (step) != INTEGER_CST)
2179 fd_ivopts_data = data;
2180 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2183 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2184 cand->ainc_use = use;
2186 cand->ainc_use = NULL;
2188 if (dump_file && (dump_flags & TDF_DETAILS))
2189 dump_cand (dump_file, cand);
2192 if (important && !cand->important)
2194 cand->important = true;
2195 if (dump_file && (dump_flags & TDF_DETAILS))
2196 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2201 bitmap_set_bit (use->related_cands, i);
2202 if (dump_file && (dump_flags & TDF_DETAILS))
2203 fprintf (dump_file, "Candidate %d is related to use %d\n",
2210 /* Returns true if incrementing the induction variable at the end of the LOOP
2213 The purpose is to avoid splitting latch edge with a biv increment, thus
2214 creating a jump, possibly confusing other optimization passes and leaving
2215 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2216 is not available (so we do not have a better alternative), or if the latch
2217 edge is already nonempty. */
2220 allow_ip_end_pos_p (struct loop *loop)
2222 if (!ip_normal_pos (loop))
2225 if (!empty_block_p (ip_end_pos (loop)))
2231 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2232 Important field is set to IMPORTANT. */
2235 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2236 bool important, struct iv_use *use)
2238 basic_block use_bb = gimple_bb (use->stmt);
2239 enum machine_mode mem_mode;
2240 unsigned HOST_WIDE_INT cstepi;
2242 /* If we insert the increment in any position other than the standard
2243 ones, we must ensure that it is incremented once per iteration.
2244 It must not be in an inner nested loop, or one side of an if
2246 if (use_bb->loop_father != data->current_loop
2247 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2248 || stmt_could_throw_p (use->stmt)
2249 || !cst_and_fits_in_hwi (step))
2252 cstepi = int_cst_value (step);
2254 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2255 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2256 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2258 enum tree_code code = MINUS_EXPR;
2260 tree new_step = step;
2262 if (POINTER_TYPE_P (TREE_TYPE (base)))
2264 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2265 code = POINTER_PLUS_EXPR;
2268 new_step = fold_convert (TREE_TYPE (base), new_step);
2269 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2270 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2273 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2274 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2276 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2281 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2282 position to POS. If USE is not NULL, the candidate is set as related to
2283 it. The candidate computation is scheduled on all available positions. */
2286 add_candidate (struct ivopts_data *data,
2287 tree base, tree step, bool important, struct iv_use *use)
2289 if (ip_normal_pos (data->current_loop))
2290 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2291 if (ip_end_pos (data->current_loop)
2292 && allow_ip_end_pos_p (data->current_loop))
2293 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2295 if (use != NULL && use->type == USE_ADDRESS)
2296 add_autoinc_candidates (data, base, step, important, use);
2299 /* Add a standard "0 + 1 * iteration" iv candidate for a
2300 type with SIZE bits. */
2303 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2306 tree type = lang_hooks.types.type_for_size (size, true);
2307 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2311 /* Adds standard iv candidates. */
2314 add_standard_iv_candidates (struct ivopts_data *data)
2316 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2318 /* The same for a double-integer type if it is still fast enough. */
2319 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2320 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2324 /* Adds candidates bases on the old induction variable IV. */
2327 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2331 struct iv_cand *cand;
2333 add_candidate (data, iv->base, iv->step, true, NULL);
2335 /* The same, but with initial value zero. */
2336 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2337 add_candidate (data, size_int (0), iv->step, true, NULL);
2339 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2340 iv->step, true, NULL);
2342 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2343 if (gimple_code (phi) == GIMPLE_PHI)
2345 /* Additionally record the possibility of leaving the original iv
2347 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2348 cand = add_candidate_1 (data,
2349 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2350 SSA_NAME_DEF_STMT (def));
2351 cand->var_before = iv->ssa_name;
2352 cand->var_after = def;
2356 /* Adds candidates based on the old induction variables. */
2359 add_old_ivs_candidates (struct ivopts_data *data)
2365 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2367 iv = ver_info (data, i)->iv;
2368 if (iv && iv->biv_p && !integer_zerop (iv->step))
2369 add_old_iv_candidates (data, iv);
2373 /* Adds candidates based on the value of the induction variable IV and USE. */
2376 add_iv_value_candidates (struct ivopts_data *data,
2377 struct iv *iv, struct iv_use *use)
2379 unsigned HOST_WIDE_INT offset;
2383 add_candidate (data, iv->base, iv->step, false, use);
2385 /* The same, but with initial value zero. Make such variable important,
2386 since it is generic enough so that possibly many uses may be based
2388 basetype = TREE_TYPE (iv->base);
2389 if (POINTER_TYPE_P (basetype))
2390 basetype = sizetype;
2391 add_candidate (data, build_int_cst (basetype, 0),
2392 iv->step, true, use);
2394 /* Third, try removing the constant offset. Make sure to even
2395 add a candidate for &a[0] vs. (T *)&a. */
2396 base = strip_offset (iv->base, &offset);
2398 || base != iv->base)
2399 add_candidate (data, base, iv->step, false, use);
2402 /* Adds candidates based on the uses. */
2405 add_derived_ivs_candidates (struct ivopts_data *data)
2409 for (i = 0; i < n_iv_uses (data); i++)
2411 struct iv_use *use = iv_use (data, i);
2418 case USE_NONLINEAR_EXPR:
2421 /* Just add the ivs based on the value of the iv used here. */
2422 add_iv_value_candidates (data, use->iv, use);
2431 /* Record important candidates and add them to related_cands bitmaps
2435 record_important_candidates (struct ivopts_data *data)
2440 for (i = 0; i < n_iv_cands (data); i++)
2442 struct iv_cand *cand = iv_cand (data, i);
2444 if (cand->important)
2445 bitmap_set_bit (data->important_candidates, i);
2448 data->consider_all_candidates = (n_iv_cands (data)
2449 <= CONSIDER_ALL_CANDIDATES_BOUND);
2451 if (data->consider_all_candidates)
2453 /* We will not need "related_cands" bitmaps in this case,
2454 so release them to decrease peak memory consumption. */
2455 for (i = 0; i < n_iv_uses (data); i++)
2457 use = iv_use (data, i);
2458 BITMAP_FREE (use->related_cands);
2463 /* Add important candidates to the related_cands bitmaps. */
2464 for (i = 0; i < n_iv_uses (data); i++)
2465 bitmap_ior_into (iv_use (data, i)->related_cands,
2466 data->important_candidates);
2470 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2471 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2472 we allocate a simple list to every use. */
2475 alloc_use_cost_map (struct ivopts_data *data)
2477 unsigned i, size, s, j;
2479 for (i = 0; i < n_iv_uses (data); i++)
2481 struct iv_use *use = iv_use (data, i);
2484 if (data->consider_all_candidates)
2485 size = n_iv_cands (data);
2489 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2494 /* Round up to the power of two, so that moduling by it is fast. */
2495 for (size = 1; size < s; size <<= 1)
2499 use->n_map_members = size;
2500 use->cost_map = XCNEWVEC (struct cost_pair, size);
2504 /* Returns description of computation cost of expression whose runtime
2505 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2508 new_cost (unsigned runtime, unsigned complexity)
2512 cost.cost = runtime;
2513 cost.complexity = complexity;
2518 /* Adds costs COST1 and COST2. */
2521 add_costs (comp_cost cost1, comp_cost cost2)
2523 cost1.cost += cost2.cost;
2524 cost1.complexity += cost2.complexity;
2528 /* Subtracts costs COST1 and COST2. */
2531 sub_costs (comp_cost cost1, comp_cost cost2)
2533 cost1.cost -= cost2.cost;
2534 cost1.complexity -= cost2.complexity;
2539 /* Returns a negative number if COST1 < COST2, a positive number if
2540 COST1 > COST2, and 0 if COST1 = COST2. */
2543 compare_costs (comp_cost cost1, comp_cost cost2)
2545 if (cost1.cost == cost2.cost)
2546 return cost1.complexity - cost2.complexity;
2548 return cost1.cost - cost2.cost;
2551 /* Returns true if COST is infinite. */
2554 infinite_cost_p (comp_cost cost)
2556 return cost.cost == INFTY;
2559 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2560 on invariants DEPENDS_ON and that the value used in expressing it
2564 set_use_iv_cost (struct ivopts_data *data,
2565 struct iv_use *use, struct iv_cand *cand,
2566 comp_cost cost, bitmap depends_on, tree value)
2570 if (infinite_cost_p (cost))
2572 BITMAP_FREE (depends_on);
2576 if (data->consider_all_candidates)
2578 use->cost_map[cand->id].cand = cand;
2579 use->cost_map[cand->id].cost = cost;
2580 use->cost_map[cand->id].depends_on = depends_on;
2581 use->cost_map[cand->id].value = value;
2585 /* n_map_members is a power of two, so this computes modulo. */
2586 s = cand->id & (use->n_map_members - 1);
2587 for (i = s; i < use->n_map_members; i++)
2588 if (!use->cost_map[i].cand)
2590 for (i = 0; i < s; i++)
2591 if (!use->cost_map[i].cand)
2597 use->cost_map[i].cand = cand;
2598 use->cost_map[i].cost = cost;
2599 use->cost_map[i].depends_on = depends_on;
2600 use->cost_map[i].value = value;
2603 /* Gets cost of (USE, CANDIDATE) pair. */
2605 static struct cost_pair *
2606 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2607 struct iv_cand *cand)
2610 struct cost_pair *ret;
2615 if (data->consider_all_candidates)
2617 ret = use->cost_map + cand->id;
2624 /* n_map_members is a power of two, so this computes modulo. */
2625 s = cand->id & (use->n_map_members - 1);
2626 for (i = s; i < use->n_map_members; i++)
2627 if (use->cost_map[i].cand == cand)
2628 return use->cost_map + i;
2630 for (i = 0; i < s; i++)
2631 if (use->cost_map[i].cand == cand)
2632 return use->cost_map + i;
2637 /* Returns estimate on cost of computing SEQ. */
2640 seq_cost (rtx seq, bool speed)
2645 for (; seq; seq = NEXT_INSN (seq))
2647 set = single_set (seq);
2649 cost += rtx_cost (set, SET,speed);
2657 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2659 produce_memory_decl_rtl (tree obj, int *regno)
2661 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2662 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2666 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2668 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2669 x = gen_rtx_SYMBOL_REF (address_mode, name);
2670 SET_SYMBOL_REF_DECL (x, obj);
2671 x = gen_rtx_MEM (DECL_MODE (obj), x);
2672 set_mem_addr_space (x, as);
2673 targetm.encode_section_info (obj, x, true);
2677 x = gen_raw_REG (address_mode, (*regno)++);
2678 x = gen_rtx_MEM (DECL_MODE (obj), x);
2679 set_mem_addr_space (x, as);
2685 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2686 walk_tree. DATA contains the actual fake register number. */
2689 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2691 tree obj = NULL_TREE;
2693 int *regno = (int *) data;
2695 switch (TREE_CODE (*expr_p))
2698 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2699 handled_component_p (*expr_p);
2700 expr_p = &TREE_OPERAND (*expr_p, 0))
2703 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2704 x = produce_memory_decl_rtl (obj, regno);
2709 obj = SSA_NAME_VAR (*expr_p);
2710 if (!DECL_RTL_SET_P (obj))
2711 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2720 if (DECL_RTL_SET_P (obj))
2723 if (DECL_MODE (obj) == BLKmode)
2724 x = produce_memory_decl_rtl (obj, regno);
2726 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2736 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2737 SET_DECL_RTL (obj, x);
2743 /* Determines cost of the computation of EXPR. */
2746 computation_cost (tree expr, bool speed)
2749 tree type = TREE_TYPE (expr);
2751 /* Avoid using hard regs in ways which may be unsupported. */
2752 int regno = LAST_VIRTUAL_REGISTER + 1;
2753 struct cgraph_node *node = cgraph_node (current_function_decl);
2754 enum node_frequency real_frequency = node->frequency;
2756 node->frequency = NODE_FREQUENCY_NORMAL;
2757 crtl->maybe_hot_insn_p = speed;
2758 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2760 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2763 default_rtl_profile ();
2764 node->frequency = real_frequency;
2766 cost = seq_cost (seq, speed);
2768 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2769 TYPE_ADDR_SPACE (type), speed);
2774 /* Returns variable containing the value of candidate CAND at statement AT. */
2777 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2779 if (stmt_after_increment (loop, cand, stmt))
2780 return cand->var_after;
2782 return cand->var_before;
2785 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2786 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2789 tree_int_cst_sign_bit (const_tree t)
2791 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2792 unsigned HOST_WIDE_INT w;
2794 if (bitno < HOST_BITS_PER_WIDE_INT)
2795 w = TREE_INT_CST_LOW (t);
2798 w = TREE_INT_CST_HIGH (t);
2799 bitno -= HOST_BITS_PER_WIDE_INT;
2802 return (w >> bitno) & 1;
2805 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2806 same precision that is at least as wide as the precision of TYPE, stores
2807 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2811 determine_common_wider_type (tree *a, tree *b)
2813 tree wider_type = NULL;
2815 tree atype = TREE_TYPE (*a);
2817 if (CONVERT_EXPR_P (*a))
2819 suba = TREE_OPERAND (*a, 0);
2820 wider_type = TREE_TYPE (suba);
2821 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2827 if (CONVERT_EXPR_P (*b))
2829 subb = TREE_OPERAND (*b, 0);
2830 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2841 /* Determines the expression by that USE is expressed from induction variable
2842 CAND at statement AT in LOOP. The expression is stored in a decomposed
2843 form into AFF. Returns false if USE cannot be expressed using CAND. */
2846 get_computation_aff (struct loop *loop,
2847 struct iv_use *use, struct iv_cand *cand, gimple at,
2848 struct affine_tree_combination *aff)
2850 tree ubase = use->iv->base;
2851 tree ustep = use->iv->step;
2852 tree cbase = cand->iv->base;
2853 tree cstep = cand->iv->step, cstep_common;
2854 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2855 tree common_type, var;
2857 aff_tree cbase_aff, var_aff;
2860 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2862 /* We do not have a precision to express the values of use. */
2866 var = var_at_stmt (loop, cand, at);
2867 uutype = unsigned_type_for (utype);
2869 /* If the conversion is not noop, perform it. */
2870 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2872 cstep = fold_convert (uutype, cstep);
2873 cbase = fold_convert (uutype, cbase);
2874 var = fold_convert (uutype, var);
2877 if (!constant_multiple_of (ustep, cstep, &rat))
2880 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2881 type, we achieve better folding by computing their difference in this
2882 wider type, and cast the result to UUTYPE. We do not need to worry about
2883 overflows, as all the arithmetics will in the end be performed in UUTYPE
2885 common_type = determine_common_wider_type (&ubase, &cbase);
2887 /* use = ubase - ratio * cbase + ratio * var. */
2888 tree_to_aff_combination (ubase, common_type, aff);
2889 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2890 tree_to_aff_combination (var, uutype, &var_aff);
2892 /* We need to shift the value if we are after the increment. */
2893 if (stmt_after_increment (loop, cand, at))
2897 if (common_type != uutype)
2898 cstep_common = fold_convert (common_type, cstep);
2900 cstep_common = cstep;
2902 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2903 aff_combination_add (&cbase_aff, &cstep_aff);
2906 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2907 aff_combination_add (aff, &cbase_aff);
2908 if (common_type != uutype)
2909 aff_combination_convert (aff, uutype);
2911 aff_combination_scale (&var_aff, rat);
2912 aff_combination_add (aff, &var_aff);
2917 /* Determines the expression by that USE is expressed from induction variable
2918 CAND at statement AT in LOOP. The computation is unshared. */
2921 get_computation_at (struct loop *loop,
2922 struct iv_use *use, struct iv_cand *cand, gimple at)
2925 tree type = TREE_TYPE (use->iv->base);
2927 if (!get_computation_aff (loop, use, cand, at, &aff))
2929 unshare_aff_combination (&aff);
2930 return fold_convert (type, aff_combination_to_tree (&aff));
2933 /* Determines the expression by that USE is expressed from induction variable
2934 CAND in LOOP. The computation is unshared. */
2937 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2939 return get_computation_at (loop, use, cand, use->stmt);
2942 /* Adjust the cost COST for being in loop setup rather than loop body.
2943 If we're optimizing for space, the loop setup overhead is constant;
2944 if we're optimizing for speed, amortize it over the per-iteration cost. */
2946 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
2950 else if (optimize_loop_for_speed_p (data->current_loop))
2951 return cost / AVG_LOOP_NITER (data->current_loop);
2956 /* Returns cost of addition in MODE. */
2959 add_cost (enum machine_mode mode, bool speed)
2961 static unsigned costs[NUM_MACHINE_MODES];
2969 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2970 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2971 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2976 cost = seq_cost (seq, speed);
2982 if (dump_file && (dump_flags & TDF_DETAILS))
2983 fprintf (dump_file, "Addition in %s costs %d\n",
2984 GET_MODE_NAME (mode), cost);
2988 /* Entry in a hashtable of already known costs for multiplication. */
2991 HOST_WIDE_INT cst; /* The constant to multiply by. */
2992 enum machine_mode mode; /* In mode. */
2993 unsigned cost; /* The cost. */
2996 /* Counts hash value for the ENTRY. */
2999 mbc_entry_hash (const void *entry)
3001 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3003 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3006 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3009 mbc_entry_eq (const void *entry1, const void *entry2)
3011 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3012 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3014 return (e1->mode == e2->mode
3015 && e1->cst == e2->cst);
3018 /* Returns cost of multiplication by constant CST in MODE. */
3021 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3023 static htab_t costs;
3024 struct mbc_entry **cached, act;
3029 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3033 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3035 return (*cached)->cost;
3037 *cached = XNEW (struct mbc_entry);
3038 (*cached)->mode = mode;
3039 (*cached)->cst = cst;
3042 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3043 gen_int_mode (cst, mode), NULL_RTX, 0);
3047 cost = seq_cost (seq, speed);
3049 if (dump_file && (dump_flags & TDF_DETAILS))
3050 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3051 (int) cst, GET_MODE_NAME (mode), cost);
3053 (*cached)->cost = cost;
3058 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3059 validity for a memory reference accessing memory of mode MODE in
3060 address space AS. */
3062 DEF_VEC_P (sbitmap);
3063 DEF_VEC_ALLOC_P (sbitmap, heap);
3066 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3069 #define MAX_RATIO 128
3070 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3071 static VEC (sbitmap, heap) *valid_mult_list;
3074 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3075 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3077 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3080 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3081 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3085 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3086 sbitmap_zero (valid_mult);
3087 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3088 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3090 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3091 if (memory_address_addr_space_p (mode, addr, as))
3092 SET_BIT (valid_mult, i + MAX_RATIO);
3095 if (dump_file && (dump_flags & TDF_DETAILS))
3097 fprintf (dump_file, " allowed multipliers:");
3098 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3099 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3100 fprintf (dump_file, " %d", (int) i);
3101 fprintf (dump_file, "\n");
3102 fprintf (dump_file, "\n");
3105 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3108 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3111 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3114 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3115 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3116 variable is omitted. Compute the cost for a memory reference that accesses
3117 a memory location of mode MEM_MODE in address space AS.
3119 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3120 size of MEM_MODE / RATIO) is available. To make this determination, we
3121 look at the size of the increment to be made, which is given in CSTEP.
3122 CSTEP may be zero if the step is unknown.
3123 STMT_AFTER_INC is true iff the statement we're looking at is after the
3124 increment of the original biv.
3126 TODO -- there must be some better way. This all is quite crude. */
3130 HOST_WIDE_INT min_offset, max_offset;
3131 unsigned costs[2][2][2][2];
3132 } *address_cost_data;
3134 DEF_VEC_P (address_cost_data);
3135 DEF_VEC_ALLOC_P (address_cost_data, heap);
3138 get_address_cost (bool symbol_present, bool var_present,
3139 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3140 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3141 addr_space_t as, bool speed,
3142 bool stmt_after_inc, bool *may_autoinc)
3144 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3145 static VEC(address_cost_data, heap) *address_cost_data_list;
3146 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3147 address_cost_data data;
3148 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3149 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3150 unsigned cost, acost, complexity;
3151 bool offset_p, ratio_p, autoinc;
3152 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3153 unsigned HOST_WIDE_INT mask;
3156 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3157 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3160 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3164 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3165 HOST_WIDE_INT rat, off;
3166 int old_cse_not_expected;
3167 unsigned sym_p, var_p, off_p, rat_p, add_c;
3168 rtx seq, addr, base;
3171 data = (address_cost_data) xcalloc (1, sizeof (*data));
3173 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3175 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3176 for (i = start; i <= 1 << 20; i <<= 1)
3178 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3179 if (!memory_address_addr_space_p (mem_mode, addr, as))
3182 data->max_offset = i == start ? 0 : i >> 1;
3183 off = data->max_offset;
3185 for (i = start; i <= 1 << 20; i <<= 1)
3187 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3188 if (!memory_address_addr_space_p (mem_mode, addr, as))
3191 data->min_offset = i == start ? 0 : -(i >> 1);
3193 if (dump_file && (dump_flags & TDF_DETAILS))
3195 fprintf (dump_file, "get_address_cost:\n");
3196 fprintf (dump_file, " min offset %s %d\n",
3197 GET_MODE_NAME (mem_mode),
3198 (int) data->min_offset);
3199 fprintf (dump_file, " max offset %s %d\n",
3200 GET_MODE_NAME (mem_mode),
3201 (int) data->max_offset);
3205 for (i = 2; i <= MAX_RATIO; i++)
3206 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3212 /* Compute the cost of various addressing modes. */
3214 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3215 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3217 if (HAVE_PRE_DECREMENT)
3219 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3220 has_predec[mem_mode]
3221 = memory_address_addr_space_p (mem_mode, addr, as);
3223 if (HAVE_POST_DECREMENT)
3225 addr = gen_rtx_POST_DEC (address_mode, reg0);
3226 has_postdec[mem_mode]
3227 = memory_address_addr_space_p (mem_mode, addr, as);
3229 if (HAVE_PRE_INCREMENT)
3231 addr = gen_rtx_PRE_INC (address_mode, reg0);
3232 has_preinc[mem_mode]
3233 = memory_address_addr_space_p (mem_mode, addr, as);
3235 if (HAVE_POST_INCREMENT)
3237 addr = gen_rtx_POST_INC (address_mode, reg0);
3238 has_postinc[mem_mode]
3239 = memory_address_addr_space_p (mem_mode, addr, as);
3241 for (i = 0; i < 16; i++)
3244 var_p = (i >> 1) & 1;
3245 off_p = (i >> 2) & 1;
3246 rat_p = (i >> 3) & 1;
3250 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3251 gen_int_mode (rat, address_mode));
3254 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3258 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3259 /* ??? We can run into trouble with some backends by presenting
3260 it with symbols which haven't been properly passed through
3261 targetm.encode_section_info. By setting the local bit, we
3262 enhance the probability of things working. */
3263 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3266 base = gen_rtx_fmt_e (CONST, address_mode,
3268 (PLUS, address_mode, base,
3269 gen_int_mode (off, address_mode)));
3272 base = gen_int_mode (off, address_mode);
3277 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3280 /* To avoid splitting addressing modes, pretend that no cse will
3282 old_cse_not_expected = cse_not_expected;
3283 cse_not_expected = true;
3284 addr = memory_address_addr_space (mem_mode, addr, as);
3285 cse_not_expected = old_cse_not_expected;
3289 acost = seq_cost (seq, speed);
3290 acost += address_cost (addr, mem_mode, as, speed);
3294 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3297 /* On some targets, it is quite expensive to load symbol to a register,
3298 which makes addresses that contain symbols look much more expensive.
3299 However, the symbol will have to be loaded in any case before the
3300 loop (and quite likely we have it in register already), so it does not
3301 make much sense to penalize them too heavily. So make some final
3302 tweaks for the SYMBOL_PRESENT modes:
3304 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3305 var is cheaper, use this mode with small penalty.
3306 If VAR_PRESENT is true, try whether the mode with
3307 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3308 if this is the case, use it. */
3309 add_c = add_cost (address_mode, speed);
3310 for (i = 0; i < 8; i++)
3313 off_p = (i >> 1) & 1;
3314 rat_p = (i >> 2) & 1;
3316 acost = data->costs[0][1][off_p][rat_p] + 1;
3320 if (acost < data->costs[1][var_p][off_p][rat_p])
3321 data->costs[1][var_p][off_p][rat_p] = acost;
3324 if (dump_file && (dump_flags & TDF_DETAILS))
3326 fprintf (dump_file, "Address costs:\n");
3328 for (i = 0; i < 16; i++)
3331 var_p = (i >> 1) & 1;
3332 off_p = (i >> 2) & 1;
3333 rat_p = (i >> 3) & 1;
3335 fprintf (dump_file, " ");
3337 fprintf (dump_file, "sym + ");
3339 fprintf (dump_file, "var + ");
3341 fprintf (dump_file, "cst + ");
3343 fprintf (dump_file, "rat * ");
3345 acost = data->costs[sym_p][var_p][off_p][rat_p];
3346 fprintf (dump_file, "index costs %d\n", acost);
3348 if (has_predec[mem_mode] || has_postdec[mem_mode]
3349 || has_preinc[mem_mode] || has_postinc[mem_mode])
3350 fprintf (dump_file, " May include autoinc/dec\n");
3351 fprintf (dump_file, "\n");
3354 VEC_replace (address_cost_data, address_cost_data_list,
3358 bits = GET_MODE_BITSIZE (address_mode);
3359 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3361 if ((offset >> (bits - 1) & 1))
3366 msize = GET_MODE_SIZE (mem_mode);
3367 autoinc_offset = offset;
3369 autoinc_offset += ratio * cstep;
3370 if (symbol_present || var_present || ratio != 1)
3372 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3374 || (has_postdec[mem_mode] && autoinc_offset == 0
3376 || (has_preinc[mem_mode] && autoinc_offset == msize
3378 || (has_predec[mem_mode] && autoinc_offset == -msize
3379 && msize == -cstep))
3383 offset_p = (s_offset != 0
3384 && data->min_offset <= s_offset
3385 && s_offset <= data->max_offset);
3386 ratio_p = (ratio != 1
3387 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3389 if (ratio != 1 && !ratio_p)
3390 cost += multiply_by_cost (ratio, address_mode, speed);
3392 if (s_offset && !offset_p && !symbol_present)
3393 cost += add_cost (address_mode, speed);
3396 *may_autoinc = autoinc;
3397 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3398 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3399 return new_cost (cost + acost, complexity);
3402 /* Estimates cost of forcing expression EXPR into a variable. */
3405 force_expr_to_var_cost (tree expr, bool speed)
3407 static bool costs_initialized = false;
3408 static unsigned integer_cost [2];
3409 static unsigned symbol_cost [2];
3410 static unsigned address_cost [2];
3412 comp_cost cost0, cost1, cost;
3413 enum machine_mode mode;
3415 if (!costs_initialized)
3417 tree type = build_pointer_type (integer_type_node);
3422 var = create_tmp_var_raw (integer_type_node, "test_var");
3423 TREE_STATIC (var) = 1;
3424 x = produce_memory_decl_rtl (var, NULL);
3425 SET_DECL_RTL (var, x);
3427 addr = build1 (ADDR_EXPR, type, var);
3430 for (i = 0; i < 2; i++)
3432 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3435 symbol_cost[i] = computation_cost (addr, i) + 1;
3438 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3440 build_int_cst (sizetype, 2000)), i) + 1;
3441 if (dump_file && (dump_flags & TDF_DETAILS))
3443 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3444 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3445 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3446 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3447 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3448 fprintf (dump_file, "\n");
3452 costs_initialized = true;
3457 if (SSA_VAR_P (expr))
3460 if (is_gimple_min_invariant (expr))
3462 if (TREE_CODE (expr) == INTEGER_CST)
3463 return new_cost (integer_cost [speed], 0);
3465 if (TREE_CODE (expr) == ADDR_EXPR)
3467 tree obj = TREE_OPERAND (expr, 0);
3469 if (TREE_CODE (obj) == VAR_DECL
3470 || TREE_CODE (obj) == PARM_DECL
3471 || TREE_CODE (obj) == RESULT_DECL)
3472 return new_cost (symbol_cost [speed], 0);
3475 return new_cost (address_cost [speed], 0);
3478 switch (TREE_CODE (expr))
3480 case POINTER_PLUS_EXPR:
3484 op0 = TREE_OPERAND (expr, 0);
3485 op1 = TREE_OPERAND (expr, 1);
3489 if (is_gimple_val (op0))
3492 cost0 = force_expr_to_var_cost (op0, speed);
3494 if (is_gimple_val (op1))
3497 cost1 = force_expr_to_var_cost (op1, speed);
3502 op0 = TREE_OPERAND (expr, 0);
3506 if (is_gimple_val (op0))
3509 cost0 = force_expr_to_var_cost (op0, speed);
3515 /* Just an arbitrary value, FIXME. */
3516 return new_cost (target_spill_cost[speed], 0);
3519 mode = TYPE_MODE (TREE_TYPE (expr));
3520 switch (TREE_CODE (expr))
3522 case POINTER_PLUS_EXPR:
3526 cost = new_cost (add_cost (mode, speed), 0);
3530 if (cst_and_fits_in_hwi (op0))
3531 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3532 else if (cst_and_fits_in_hwi (op1))
3533 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3535 return new_cost (target_spill_cost [speed], 0);
3542 cost = add_costs (cost, cost0);
3543 cost = add_costs (cost, cost1);
3545 /* Bound the cost by target_spill_cost. The parts of complicated
3546 computations often are either loop invariant or at least can
3547 be shared between several iv uses, so letting this grow without
3548 limits would not give reasonable results. */
3549 if (cost.cost > (int) target_spill_cost [speed])
3550 cost.cost = target_spill_cost [speed];
3555 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3556 invariants the computation depends on. */
3559 force_var_cost (struct ivopts_data *data,
3560 tree expr, bitmap *depends_on)
3564 fd_ivopts_data = data;
3565 walk_tree (&expr, find_depends, depends_on, NULL);
3568 return force_expr_to_var_cost (expr, data->speed);
3571 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3572 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3573 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3574 invariants the computation depends on. */
3577 split_address_cost (struct ivopts_data *data,
3578 tree addr, bool *symbol_present, bool *var_present,
3579 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3582 HOST_WIDE_INT bitsize;
3583 HOST_WIDE_INT bitpos;
3585 enum machine_mode mode;
3586 int unsignedp, volatilep;
3588 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3589 &unsignedp, &volatilep, false);
3592 || bitpos % BITS_PER_UNIT != 0
3593 || TREE_CODE (core) != VAR_DECL)
3595 *symbol_present = false;
3596 *var_present = true;
3597 fd_ivopts_data = data;
3598 walk_tree (&addr, find_depends, depends_on, NULL);
3599 return new_cost (target_spill_cost[data->speed], 0);
3602 *offset += bitpos / BITS_PER_UNIT;
3603 if (TREE_STATIC (core)
3604 || DECL_EXTERNAL (core))
3606 *symbol_present = true;
3607 *var_present = false;
3611 *symbol_present = false;
3612 *var_present = true;
3616 /* Estimates cost of expressing difference of addresses E1 - E2 as
3617 var + symbol + offset. The value of offset is added to OFFSET,
3618 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3619 part is missing. DEPENDS_ON is a set of the invariants the computation
3623 ptr_difference_cost (struct ivopts_data *data,
3624 tree e1, tree e2, bool *symbol_present, bool *var_present,
3625 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3627 HOST_WIDE_INT diff = 0;
3628 aff_tree aff_e1, aff_e2;
3631 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3633 if (ptr_difference_const (e1, e2, &diff))
3636 *symbol_present = false;
3637 *var_present = false;
3641 if (integer_zerop (e2))
3642 return split_address_cost (data, TREE_OPERAND (e1, 0),
3643 symbol_present, var_present, offset, depends_on);
3645 *symbol_present = false;
3646 *var_present = true;
3648 type = signed_type_for (TREE_TYPE (e1));
3649 tree_to_aff_combination (e1, type, &aff_e1);
3650 tree_to_aff_combination (e2, type, &aff_e2);
3651 aff_combination_scale (&aff_e2, double_int_minus_one);
3652 aff_combination_add (&aff_e1, &aff_e2);
3654 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3657 /* Estimates cost of expressing difference E1 - E2 as
3658 var + symbol + offset. The value of offset is added to OFFSET,
3659 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3660 part is missing. DEPENDS_ON is a set of the invariants the computation
3664 difference_cost (struct ivopts_data *data,
3665 tree e1, tree e2, bool *symbol_present, bool *var_present,
3666 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3668 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3669 unsigned HOST_WIDE_INT off1, off2;
3670 aff_tree aff_e1, aff_e2;
3673 e1 = strip_offset (e1, &off1);
3674 e2 = strip_offset (e2, &off2);
3675 *offset += off1 - off2;
3680 if (TREE_CODE (e1) == ADDR_EXPR)
3681 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3682 offset, depends_on);
3683 *symbol_present = false;
3685 if (operand_equal_p (e1, e2, 0))
3687 *var_present = false;
3691 *var_present = true;
3693 if (integer_zerop (e2))
3694 return force_var_cost (data, e1, depends_on);
3696 if (integer_zerop (e1))
3698 comp_cost cost = force_var_cost (data, e2, depends_on);
3699 cost.cost += multiply_by_cost (-1, mode, data->speed);
3703 type = signed_type_for (TREE_TYPE (e1));
3704 tree_to_aff_combination (e1, type, &aff_e1);
3705 tree_to_aff_combination (e2, type, &aff_e2);
3706 aff_combination_scale (&aff_e2, double_int_minus_one);
3707 aff_combination_add (&aff_e1, &aff_e2);
3709 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3712 /* Determines the cost of the computation by that USE is expressed
3713 from induction variable CAND. If ADDRESS_P is true, we just need
3714 to create an address from it, otherwise we want to get it into
3715 register. A set of invariants we depend on is stored in
3716 DEPENDS_ON. AT is the statement at that the value is computed.
3717 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3718 addressing is likely. */
3721 get_computation_cost_at (struct ivopts_data *data,
3722 struct iv_use *use, struct iv_cand *cand,
3723 bool address_p, bitmap *depends_on, gimple at,
3726 tree ubase = use->iv->base, ustep = use->iv->step;
3728 tree utype = TREE_TYPE (ubase), ctype;
3729 unsigned HOST_WIDE_INT cstepi, offset = 0;
3730 HOST_WIDE_INT ratio, aratio;
3731 bool var_present, symbol_present, stmt_is_after_inc;
3734 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3738 /* Only consider real candidates. */
3740 return infinite_cost;
3742 cbase = cand->iv->base;
3743 cstep = cand->iv->step;
3744 ctype = TREE_TYPE (cbase);
3746 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3748 /* We do not have a precision to express the values of use. */
3749 return infinite_cost;
3754 /* Do not try to express address of an object with computation based
3755 on address of a different object. This may cause problems in rtl
3756 level alias analysis (that does not expect this to be happening,
3757 as this is illegal in C), and would be unlikely to be useful
3759 if (use->iv->base_object
3760 && cand->iv->base_object
3761 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3762 return infinite_cost;
3765 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3767 /* TODO -- add direct handling of this case. */
3771 /* CSTEPI is removed from the offset in case statement is after the
3772 increment. If the step is not constant, we use zero instead.
3773 This is a bit imprecise (there is the extra addition), but
3774 redundancy elimination is likely to transform the code so that
3775 it uses value of the variable before increment anyway,
3776 so it is not that much unrealistic. */
3777 if (cst_and_fits_in_hwi (cstep))
3778 cstepi = int_cst_value (cstep);
3782 if (!constant_multiple_of (ustep, cstep, &rat))
3783 return infinite_cost;
3785 if (double_int_fits_in_shwi_p (rat))
3786 ratio = double_int_to_shwi (rat);
3788 return infinite_cost;
3791 ctype = TREE_TYPE (cbase);
3793 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3794 or ratio == 1, it is better to handle this like
3796 ubase - ratio * cbase + ratio * var
3798 (also holds in the case ratio == -1, TODO. */
3800 if (cst_and_fits_in_hwi (cbase))
3802 offset = - ratio * int_cst_value (cbase);
3803 cost = difference_cost (data,
3804 ubase, build_int_cst (utype, 0),
3805 &symbol_present, &var_present, &offset,
3808 else if (ratio == 1)
3810 cost = difference_cost (data,
3812 &symbol_present, &var_present, &offset,
3816 && !POINTER_TYPE_P (ctype)
3817 && multiplier_allowed_in_address_p
3818 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3819 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3822 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3823 cost = difference_cost (data,
3825 &symbol_present, &var_present, &offset,
3830 cost = force_var_cost (data, cbase, depends_on);
3831 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3832 cost = add_costs (cost,
3833 difference_cost (data,
3834 ubase, build_int_cst (utype, 0),
3835 &symbol_present, &var_present,
3836 &offset, depends_on));
3839 /* If we are after the increment, the value of the candidate is higher by
3841 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3842 if (stmt_is_after_inc)
3843 offset -= ratio * cstepi;
3845 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3846 (symbol/var1/const parts may be omitted). If we are looking for an
3847 address, find the cost of addressing this. */
3849 return add_costs (cost,
3850 get_address_cost (symbol_present, var_present,
3851 offset, ratio, cstepi,
3852 TYPE_MODE (TREE_TYPE (utype)),
3853 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3854 speed, stmt_is_after_inc,
3857 /* Otherwise estimate the costs for computing the expression. */
3858 if (!symbol_present && !var_present && !offset)
3861 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3865 /* Symbol + offset should be compile-time computable so consider that they
3866 are added once to the variable, if present. */
3867 if (var_present && (symbol_present || offset))
3868 cost.cost += adjust_setup_cost (data,
3869 add_cost (TYPE_MODE (ctype), speed));
3871 /* Having offset does not affect runtime cost in case it is added to
3872 symbol, but it increases complexity. */
3876 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3878 aratio = ratio > 0 ? ratio : -ratio;
3880 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3885 *can_autoinc = false;
3888 /* Just get the expression, expand it and measure the cost. */
3889 tree comp = get_computation_at (data->current_loop, use, cand, at);
3892 return infinite_cost;
3895 comp = build_simple_mem_ref (comp);
3897 return new_cost (computation_cost (comp, speed), 0);
3901 /* Determines the cost of the computation by that USE is expressed
3902 from induction variable CAND. If ADDRESS_P is true, we just need
3903 to create an address from it, otherwise we want to get it into
3904 register. A set of invariants we depend on is stored in
3905 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3906 autoinc addressing is likely. */
3909 get_computation_cost (struct ivopts_data *data,
3910 struct iv_use *use, struct iv_cand *cand,
3911 bool address_p, bitmap *depends_on, bool *can_autoinc)
3913 return get_computation_cost_at (data,
3914 use, cand, address_p, depends_on, use->stmt,
3918 /* Determines cost of basing replacement of USE on CAND in a generic
3922 determine_use_iv_cost_generic (struct ivopts_data *data,
3923 struct iv_use *use, struct iv_cand *cand)
3928 /* The simple case first -- if we need to express value of the preserved
3929 original biv, the cost is 0. This also prevents us from counting the
3930 cost of increment twice -- once at this use and once in the cost of
3932 if (cand->pos == IP_ORIGINAL
3933 && cand->incremented_at == use->stmt)
3935 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3939 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3940 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3942 return !infinite_cost_p (cost);
3945 /* Determines cost of basing replacement of USE on CAND in an address. */
3948 determine_use_iv_cost_address (struct ivopts_data *data,
3949 struct iv_use *use, struct iv_cand *cand)
3953 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3956 if (cand->ainc_use == use)
3959 cost.cost -= cand->cost_step;
3960 /* If we generated the candidate solely for exploiting autoincrement
3961 opportunities, and it turns out it can't be used, set the cost to
3962 infinity to make sure we ignore it. */
3963 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3964 cost = infinite_cost;
3966 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3968 return !infinite_cost_p (cost);
3971 /* Computes value of candidate CAND at position AT in iteration NITER, and
3972 stores it to VAL. */
3975 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3978 aff_tree step, delta, nit;
3979 struct iv *iv = cand->iv;
3980 tree type = TREE_TYPE (iv->base);
3981 tree steptype = type;
3982 if (POINTER_TYPE_P (type))
3983 steptype = sizetype;
3985 tree_to_aff_combination (iv->step, steptype, &step);
3986 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3987 aff_combination_convert (&nit, steptype);
3988 aff_combination_mult (&nit, &step, &delta);
3989 if (stmt_after_increment (loop, cand, at))
3990 aff_combination_add (&delta, &step);
3992 tree_to_aff_combination (iv->base, type, val);
3993 aff_combination_add (val, &delta);
3996 /* Returns period of induction variable iv. */
3999 iv_period (struct iv *iv)
4001 tree step = iv->step, period, type;
4004 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4006 /* Period of the iv is gcd (step, type range). Since type range is power
4007 of two, it suffices to determine the maximum power of two that divides
4009 pow2div = num_ending_zeros (step);
4010 type = unsigned_type_for (TREE_TYPE (step));
4012 period = build_low_bits_mask (type,
4013 (TYPE_PRECISION (type)
4014 - tree_low_cst (pow2div, 1)));
4019 /* Returns the comparison operator used when eliminating the iv USE. */
4021 static enum tree_code
4022 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4024 struct loop *loop = data->current_loop;
4028 ex_bb = gimple_bb (use->stmt);
4029 exit = EDGE_SUCC (ex_bb, 0);
4030 if (flow_bb_inside_loop_p (loop, exit->dest))
4031 exit = EDGE_SUCC (ex_bb, 1);
4033 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4036 /* Check whether it is possible to express the condition in USE by comparison
4037 of candidate CAND. If so, store the value compared with to BOUND. */
4040 may_eliminate_iv (struct ivopts_data *data,
4041 struct iv_use *use, struct iv_cand *cand, tree *bound)
4046 struct loop *loop = data->current_loop;
4049 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4052 /* For now works only for exits that dominate the loop latch.
4053 TODO: extend to other conditions inside loop body. */
4054 ex_bb = gimple_bb (use->stmt);
4055 if (use->stmt != last_stmt (ex_bb)
4056 || gimple_code (use->stmt) != GIMPLE_COND
4057 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4060 exit = EDGE_SUCC (ex_bb, 0);
4061 if (flow_bb_inside_loop_p (loop, exit->dest))
4062 exit = EDGE_SUCC (ex_bb, 1);
4063 if (flow_bb_inside_loop_p (loop, exit->dest))
4066 nit = niter_for_exit (data, exit);
4070 /* Determine whether we can use the variable to test the exit condition.
4071 This is the case iff the period of the induction variable is greater
4072 than the number of iterations for which the exit condition is true. */
4073 period = iv_period (cand->iv);
4075 /* If the number of iterations is constant, compare against it directly. */
4076 if (TREE_CODE (nit) == INTEGER_CST)
4078 if (!tree_int_cst_lt (nit, period))
4082 /* If not, and if this is the only possible exit of the loop, see whether
4083 we can get a conservative estimate on the number of iterations of the
4084 entire loop and compare against that instead. */
4085 else if (loop_only_exit_p (loop, exit))
4087 double_int period_value, max_niter;
4088 if (!estimated_loop_iterations (loop, true, &max_niter))
4090 period_value = tree_to_double_int (period);
4091 if (double_int_ucmp (max_niter, period_value) >= 0)
4095 /* Otherwise, punt. */
4099 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4101 *bound = aff_combination_to_tree (&bnd);
4102 /* It is unlikely that computing the number of iterations using division
4103 would be more profitable than keeping the original induction variable. */
4104 if (expression_expensive_p (*bound))
4109 /* Determines cost of basing replacement of USE on CAND in a condition. */
4112 determine_use_iv_cost_condition (struct ivopts_data *data,
4113 struct iv_use *use, struct iv_cand *cand)
4115 tree bound = NULL_TREE;
4117 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4118 comp_cost elim_cost, express_cost, cost;
4120 tree *control_var, *bound_cst;
4122 /* Only consider real candidates. */
4125 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4129 /* Try iv elimination. */
4130 if (may_eliminate_iv (data, use, cand, &bound))
4132 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4133 /* The bound is a loop invariant, so it will be only computed
4135 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4138 elim_cost = infinite_cost;
4140 /* Try expressing the original giv. If it is compared with an invariant,
4141 note that we cannot get rid of it. */
4142 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4146 /* When the condition is a comparison of the candidate IV against
4147 zero, prefer this IV.
4149 TODO: The constant that we're substracting from the cost should
4150 be target-dependent. This information should be added to the
4151 target costs for each backend. */
4152 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4153 && integer_zerop (*bound_cst)
4154 && (operand_equal_p (*control_var, cand->var_after, 0)
4155 || operand_equal_p (*control_var, cand->var_before, 0)))
4156 elim_cost.cost -= 1;
4158 express_cost = get_computation_cost (data, use, cand, false,
4159 &depends_on_express, NULL);
4160 fd_ivopts_data = data;
4161 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4163 /* Choose the better approach, preferring the eliminated IV. */
4164 if (compare_costs (elim_cost, express_cost) <= 0)
4167 depends_on = depends_on_elim;
4168 depends_on_elim = NULL;
4172 cost = express_cost;
4173 depends_on = depends_on_express;
4174 depends_on_express = NULL;
4178 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4180 if (depends_on_elim)
4181 BITMAP_FREE (depends_on_elim);
4182 if (depends_on_express)
4183 BITMAP_FREE (depends_on_express);
4185 return !infinite_cost_p (cost);
4188 /* Determines cost of basing replacement of USE on CAND. Returns false
4189 if USE cannot be based on CAND. */
4192 determine_use_iv_cost (struct ivopts_data *data,
4193 struct iv_use *use, struct iv_cand *cand)
4197 case USE_NONLINEAR_EXPR:
4198 return determine_use_iv_cost_generic (data, use, cand);
4201 return determine_use_iv_cost_address (data, use, cand);
4204 return determine_use_iv_cost_condition (data, use, cand);
4211 /* Return true if get_computation_cost indicates that autoincrement is
4212 a possibility for the pair of USE and CAND, false otherwise. */
4215 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4216 struct iv_cand *cand)
4222 if (use->type != USE_ADDRESS)
4225 cost = get_computation_cost (data, use, cand, true, &depends_on,
4228 BITMAP_FREE (depends_on);
4230 return !infinite_cost_p (cost) && can_autoinc;
4233 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4234 use that allows autoincrement, and set their AINC_USE if possible. */
4237 set_autoinc_for_original_candidates (struct ivopts_data *data)
4241 for (i = 0; i < n_iv_cands (data); i++)
4243 struct iv_cand *cand = iv_cand (data, i);
4244 struct iv_use *closest = NULL;
4245 if (cand->pos != IP_ORIGINAL)
4247 for (j = 0; j < n_iv_uses (data); j++)
4249 struct iv_use *use = iv_use (data, j);
4250 unsigned uid = gimple_uid (use->stmt);
4251 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4252 || uid > gimple_uid (cand->incremented_at))
4254 if (closest == NULL || uid > gimple_uid (closest->stmt))
4257 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4259 cand->ainc_use = closest;
4263 /* Finds the candidates for the induction variables. */
4266 find_iv_candidates (struct ivopts_data *data)
4268 /* Add commonly used ivs. */
4269 add_standard_iv_candidates (data);
4271 /* Add old induction variables. */
4272 add_old_ivs_candidates (data);
4274 /* Add induction variables derived from uses. */
4275 add_derived_ivs_candidates (data);
4277 set_autoinc_for_original_candidates (data);
4279 /* Record the important candidates. */
4280 record_important_candidates (data);
4283 /* Determines costs of basing the use of the iv on an iv candidate. */
4286 determine_use_iv_costs (struct ivopts_data *data)
4290 struct iv_cand *cand;
4291 bitmap to_clear = BITMAP_ALLOC (NULL);
4293 alloc_use_cost_map (data);
4295 for (i = 0; i < n_iv_uses (data); i++)
4297 use = iv_use (data, i);
4299 if (data->consider_all_candidates)
4301 for (j = 0; j < n_iv_cands (data); j++)
4303 cand = iv_cand (data, j);
4304 determine_use_iv_cost (data, use, cand);
4311 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4313 cand = iv_cand (data, j);
4314 if (!determine_use_iv_cost (data, use, cand))
4315 bitmap_set_bit (to_clear, j);
4318 /* Remove the candidates for that the cost is infinite from
4319 the list of related candidates. */
4320 bitmap_and_compl_into (use->related_cands, to_clear);
4321 bitmap_clear (to_clear);
4325 BITMAP_FREE (to_clear);
4327 if (dump_file && (dump_flags & TDF_DETAILS))
4329 fprintf (dump_file, "Use-candidate costs:\n");
4331 for (i = 0; i < n_iv_uses (data); i++)
4333 use = iv_use (data, i);
4335 fprintf (dump_file, "Use %d:\n", i);
4336 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4337 for (j = 0; j < use->n_map_members; j++)
4339 if (!use->cost_map[j].cand
4340 || infinite_cost_p (use->cost_map[j].cost))
4343 fprintf (dump_file, " %d\t%d\t%d\t",
4344 use->cost_map[j].cand->id,
4345 use->cost_map[j].cost.cost,
4346 use->cost_map[j].cost.complexity);
4347 if (use->cost_map[j].depends_on)
4348 bitmap_print (dump_file,
4349 use->cost_map[j].depends_on, "","");
4350 fprintf (dump_file, "\n");
4353 fprintf (dump_file, "\n");
4355 fprintf (dump_file, "\n");
4359 /* Determines cost of the candidate CAND. */
4362 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4364 comp_cost cost_base;
4365 unsigned cost, cost_step;
4374 /* There are two costs associated with the candidate -- its increment
4375 and its initialization. The second is almost negligible for any loop
4376 that rolls enough, so we take it just very little into account. */
4378 base = cand->iv->base;
4379 cost_base = force_var_cost (data, base, NULL);
4380 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4382 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4384 /* Prefer the original ivs unless we may gain something by replacing it.
4385 The reason is to make debugging simpler; so this is not relevant for
4386 artificial ivs created by other optimization passes. */
4387 if (cand->pos != IP_ORIGINAL
4388 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4391 /* Prefer not to insert statements into latch unless there are some
4392 already (so that we do not create unnecessary jumps). */
4393 if (cand->pos == IP_END
4394 && empty_block_p (ip_end_pos (data->current_loop)))
4398 cand->cost_step = cost_step;
4401 /* Determines costs of computation of the candidates. */
4404 determine_iv_costs (struct ivopts_data *data)
4408 if (dump_file && (dump_flags & TDF_DETAILS))
4410 fprintf (dump_file, "Candidate costs:\n");
4411 fprintf (dump_file, " cand\tcost\n");
4414 for (i = 0; i < n_iv_cands (data); i++)
4416 struct iv_cand *cand = iv_cand (data, i);
4418 determine_iv_cost (data, cand);
4420 if (dump_file && (dump_flags & TDF_DETAILS))
4421 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4424 if (dump_file && (dump_flags & TDF_DETAILS))
4425 fprintf (dump_file, "\n");
4428 /* Calculates cost for having SIZE induction variables. */
4431 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4433 /* We add size to the cost, so that we prefer eliminating ivs
4435 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4438 /* For each size of the induction variable set determine the penalty. */
4441 determine_set_costs (struct ivopts_data *data)
4445 gimple_stmt_iterator psi;
4447 struct loop *loop = data->current_loop;
4450 /* We use the following model (definitely improvable, especially the
4451 cost function -- TODO):
4453 We estimate the number of registers available (using MD data), name it A.
4455 We estimate the number of registers used by the loop, name it U. This
4456 number is obtained as the number of loop phi nodes (not counting virtual
4457 registers and bivs) + the number of variables from outside of the loop.
4459 We set a reserve R (free regs that are used for temporary computations,
4460 etc.). For now the reserve is a constant 3.
4462 Let I be the number of induction variables.
4464 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4465 make a lot of ivs without a reason).
4466 -- if A - R < U + I <= A, the cost is I * PRES_COST
4467 -- if U + I > A, the cost is I * PRES_COST and
4468 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4470 if (dump_file && (dump_flags & TDF_DETAILS))
4472 fprintf (dump_file, "Global costs:\n");
4473 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4474 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4475 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4479 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4481 phi = gsi_stmt (psi);
4482 op = PHI_RESULT (phi);
4484 if (!is_gimple_reg (op))
4487 if (get_iv (data, op))
4493 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4495 struct version_info *info = ver_info (data, j);
4497 if (info->inv_id && info->has_nonlin_use)
4501 data->regs_used = n;
4502 if (dump_file && (dump_flags & TDF_DETAILS))
4503 fprintf (dump_file, " regs_used %d\n", n);
4505 if (dump_file && (dump_flags & TDF_DETAILS))
4507 fprintf (dump_file, " cost for size:\n");
4508 fprintf (dump_file, " ivs\tcost\n");
4509 for (j = 0; j <= 2 * target_avail_regs; j++)
4510 fprintf (dump_file, " %d\t%d\n", j,
4511 ivopts_global_cost_for_size (data, j));
4512 fprintf (dump_file, "\n");
4516 /* Returns true if A is a cheaper cost pair than B. */
4519 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4529 cmp = compare_costs (a->cost, b->cost);
4536 /* In case the costs are the same, prefer the cheaper candidate. */
4537 if (a->cand->cost < b->cand->cost)
4543 /* Computes the cost field of IVS structure. */
4546 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4548 comp_cost cost = ivs->cand_use_cost;
4549 cost.cost += ivs->cand_cost;
4550 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4555 /* Remove invariants in set INVS to set IVS. */
4558 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4566 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4568 ivs->n_invariant_uses[iid]--;
4569 if (ivs->n_invariant_uses[iid] == 0)
4574 /* Set USE not to be expressed by any candidate in IVS. */
4577 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4580 unsigned uid = use->id, cid;
4581 struct cost_pair *cp;
4583 cp = ivs->cand_for_use[uid];
4589 ivs->cand_for_use[uid] = NULL;
4590 ivs->n_cand_uses[cid]--;
4592 if (ivs->n_cand_uses[cid] == 0)
4594 bitmap_clear_bit (ivs->cands, cid);
4595 /* Do not count the pseudocandidates. */
4599 ivs->cand_cost -= cp->cand->cost;
4601 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4604 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4606 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4607 iv_ca_recount_cost (data, ivs);
4610 /* Add invariants in set INVS to set IVS. */
4613 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4621 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4623 ivs->n_invariant_uses[iid]++;
4624 if (ivs->n_invariant_uses[iid] == 1)
4629 /* Set cost pair for USE in set IVS to CP. */
4632 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4633 struct iv_use *use, struct cost_pair *cp)
4635 unsigned uid = use->id, cid;
4637 if (ivs->cand_for_use[uid] == cp)
4640 if (ivs->cand_for_use[uid])
4641 iv_ca_set_no_cp (data, ivs, use);
4648 ivs->cand_for_use[uid] = cp;
4649 ivs->n_cand_uses[cid]++;
4650 if (ivs->n_cand_uses[cid] == 1)
4652 bitmap_set_bit (ivs->cands, cid);
4653 /* Do not count the pseudocandidates. */
4657 ivs->cand_cost += cp->cand->cost;
4659 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4662 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4663 iv_ca_set_add_invariants (ivs, cp->depends_on);
4664 iv_ca_recount_cost (data, ivs);
4668 /* Extend set IVS by expressing USE by some of the candidates in it
4672 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4675 struct cost_pair *best_cp = NULL, *cp;
4679 gcc_assert (ivs->upto >= use->id);
4681 if (ivs->upto == use->id)
4687 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4689 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4691 if (cheaper_cost_pair (cp, best_cp))
4695 iv_ca_set_cp (data, ivs, use, best_cp);
4698 /* Get cost for assignment IVS. */
4701 iv_ca_cost (struct iv_ca *ivs)
4703 /* This was a conditional expression but it triggered a bug in
4706 return infinite_cost;
4711 /* Returns true if all dependences of CP are among invariants in IVS. */
4714 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4719 if (!cp->depends_on)
4722 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4724 if (ivs->n_invariant_uses[i] == 0)
4731 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4732 it before NEXT_CHANGE. */
4734 static struct iv_ca_delta *
4735 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4736 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4738 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4741 change->old_cp = old_cp;
4742 change->new_cp = new_cp;
4743 change->next_change = next_change;
4748 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4751 static struct iv_ca_delta *
4752 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4754 struct iv_ca_delta *last;
4762 for (last = l1; last->next_change; last = last->next_change)
4764 last->next_change = l2;
4769 /* Returns candidate by that USE is expressed in IVS. */
4771 static struct cost_pair *
4772 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4774 return ivs->cand_for_use[use->id];
4777 /* Reverse the list of changes DELTA, forming the inverse to it. */
4779 static struct iv_ca_delta *
4780 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4782 struct iv_ca_delta *act, *next, *prev = NULL;
4783 struct cost_pair *tmp;
4785 for (act = delta; act; act = next)
4787 next = act->next_change;
4788 act->next_change = prev;
4792 act->old_cp = act->new_cp;
4799 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4800 reverted instead. */
4803 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4804 struct iv_ca_delta *delta, bool forward)
4806 struct cost_pair *from, *to;
4807 struct iv_ca_delta *act;
4810 delta = iv_ca_delta_reverse (delta);
4812 for (act = delta; act; act = act->next_change)
4816 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4817 iv_ca_set_cp (data, ivs, act->use, to);
4821 iv_ca_delta_reverse (delta);
4824 /* Returns true if CAND is used in IVS. */
4827 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4829 return ivs->n_cand_uses[cand->id] > 0;
4832 /* Returns number of induction variable candidates in the set IVS. */
4835 iv_ca_n_cands (struct iv_ca *ivs)
4837 return ivs->n_cands;
4840 /* Free the list of changes DELTA. */
4843 iv_ca_delta_free (struct iv_ca_delta **delta)
4845 struct iv_ca_delta *act, *next;
4847 for (act = *delta; act; act = next)
4849 next = act->next_change;
4856 /* Allocates new iv candidates assignment. */
4858 static struct iv_ca *
4859 iv_ca_new (struct ivopts_data *data)
4861 struct iv_ca *nw = XNEW (struct iv_ca);
4865 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4866 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4867 nw->cands = BITMAP_ALLOC (NULL);
4870 nw->cand_use_cost = zero_cost;
4872 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4873 nw->cost = zero_cost;
4878 /* Free memory occupied by the set IVS. */
4881 iv_ca_free (struct iv_ca **ivs)
4883 free ((*ivs)->cand_for_use);
4884 free ((*ivs)->n_cand_uses);
4885 BITMAP_FREE ((*ivs)->cands);
4886 free ((*ivs)->n_invariant_uses);
4891 /* Dumps IVS to FILE. */
4894 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4896 const char *pref = " invariants ";
4898 comp_cost cost = iv_ca_cost (ivs);
4900 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4901 bitmap_print (file, ivs->cands, " candidates ","\n");
4903 for (i = 1; i <= data->max_inv_id; i++)
4904 if (ivs->n_invariant_uses[i])
4906 fprintf (file, "%s%d", pref, i);
4909 fprintf (file, "\n");
4912 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4913 new set, and store differences in DELTA. Number of induction variables
4914 in the new set is stored to N_IVS. */
4917 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4918 struct iv_cand *cand, struct iv_ca_delta **delta,
4924 struct cost_pair *old_cp, *new_cp;
4927 for (i = 0; i < ivs->upto; i++)
4929 use = iv_use (data, i);
4930 old_cp = iv_ca_cand_for_use (ivs, use);
4933 && old_cp->cand == cand)
4936 new_cp = get_use_iv_cost (data, use, cand);
4940 if (!iv_ca_has_deps (ivs, new_cp))
4943 if (!cheaper_cost_pair (new_cp, old_cp))
4946 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4949 iv_ca_delta_commit (data, ivs, *delta, true);
4950 cost = iv_ca_cost (ivs);
4952 *n_ivs = iv_ca_n_cands (ivs);
4953 iv_ca_delta_commit (data, ivs, *delta, false);
4958 /* Try narrowing set IVS by removing CAND. Return the cost of
4959 the new set and store the differences in DELTA. */
4962 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4963 struct iv_cand *cand, struct iv_ca_delta **delta)
4967 struct cost_pair *old_cp, *new_cp, *cp;
4969 struct iv_cand *cnd;
4973 for (i = 0; i < n_iv_uses (data); i++)
4975 use = iv_use (data, i);
4977 old_cp = iv_ca_cand_for_use (ivs, use);
4978 if (old_cp->cand != cand)
4983 if (data->consider_all_candidates)
4985 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4990 cnd = iv_cand (data, ci);
4992 cp = get_use_iv_cost (data, use, cnd);
4995 if (!iv_ca_has_deps (ivs, cp))
4998 if (!cheaper_cost_pair (cp, new_cp))
5006 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5011 cnd = iv_cand (data, ci);
5013 cp = get_use_iv_cost (data, use, cnd);
5016 if (!iv_ca_has_deps (ivs, cp))
5019 if (!cheaper_cost_pair (cp, new_cp))
5028 iv_ca_delta_free (delta);
5029 return infinite_cost;
5032 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5035 iv_ca_delta_commit (data, ivs, *delta, true);
5036 cost = iv_ca_cost (ivs);
5037 iv_ca_delta_commit (data, ivs, *delta, false);
5042 /* Try optimizing the set of candidates IVS by removing candidates different
5043 from to EXCEPT_CAND from it. Return cost of the new set, and store
5044 differences in DELTA. */
5047 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5048 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5051 struct iv_ca_delta *act_delta, *best_delta;
5053 comp_cost best_cost, acost;
5054 struct iv_cand *cand;
5057 best_cost = iv_ca_cost (ivs);
5059 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5061 cand = iv_cand (data, i);
5063 if (cand == except_cand)
5066 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5068 if (compare_costs (acost, best_cost) < 0)
5071 iv_ca_delta_free (&best_delta);
5072 best_delta = act_delta;
5075 iv_ca_delta_free (&act_delta);
5084 /* Recurse to possibly remove other unnecessary ivs. */
5085 iv_ca_delta_commit (data, ivs, best_delta, true);
5086 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5087 iv_ca_delta_commit (data, ivs, best_delta, false);
5088 *delta = iv_ca_delta_join (best_delta, *delta);
5092 /* Tries to extend the sets IVS in the best possible way in order
5093 to express the USE. */
5096 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5099 comp_cost best_cost, act_cost;
5102 struct iv_cand *cand;
5103 struct iv_ca_delta *best_delta = NULL, *act_delta;
5104 struct cost_pair *cp;
5106 iv_ca_add_use (data, ivs, use);
5107 best_cost = iv_ca_cost (ivs);
5109 cp = iv_ca_cand_for_use (ivs, use);
5112 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5113 iv_ca_set_no_cp (data, ivs, use);
5116 /* First try important candidates not based on any memory object. Only if
5117 this fails, try the specific ones. Rationale -- in loops with many
5118 variables the best choice often is to use just one generic biv. If we
5119 added here many ivs specific to the uses, the optimization algorithm later
5120 would be likely to get stuck in a local minimum, thus causing us to create
5121 too many ivs. The approach from few ivs to more seems more likely to be
5122 successful -- starting from few ivs, replacing an expensive use by a
5123 specific iv should always be a win. */
5124 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5126 cand = iv_cand (data, i);
5128 if (cand->iv->base_object != NULL_TREE)
5131 if (iv_ca_cand_used_p (ivs, cand))
5134 cp = get_use_iv_cost (data, use, cand);
5138 iv_ca_set_cp (data, ivs, use, cp);
5139 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5140 iv_ca_set_no_cp (data, ivs, use);
5141 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5143 if (compare_costs (act_cost, best_cost) < 0)
5145 best_cost = act_cost;
5147 iv_ca_delta_free (&best_delta);
5148 best_delta = act_delta;
5151 iv_ca_delta_free (&act_delta);
5154 if (infinite_cost_p (best_cost))
5156 for (i = 0; i < use->n_map_members; i++)
5158 cp = use->cost_map + i;
5163 /* Already tried this. */
5164 if (cand->important && cand->iv->base_object == NULL_TREE)
5167 if (iv_ca_cand_used_p (ivs, cand))
5171 iv_ca_set_cp (data, ivs, use, cp);
5172 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5173 iv_ca_set_no_cp (data, ivs, use);
5174 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5177 if (compare_costs (act_cost, best_cost) < 0)
5179 best_cost = act_cost;
5182 iv_ca_delta_free (&best_delta);
5183 best_delta = act_delta;
5186 iv_ca_delta_free (&act_delta);
5190 iv_ca_delta_commit (data, ivs, best_delta, true);
5191 iv_ca_delta_free (&best_delta);
5193 return !infinite_cost_p (best_cost);
5196 /* Finds an initial assignment of candidates to uses. */
5198 static struct iv_ca *
5199 get_initial_solution (struct ivopts_data *data)
5201 struct iv_ca *ivs = iv_ca_new (data);
5204 for (i = 0; i < n_iv_uses (data); i++)
5205 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5214 /* Tries to improve set of induction variables IVS. */
5217 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5220 comp_cost acost, best_cost = iv_ca_cost (ivs);
5221 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5222 struct iv_cand *cand;
5224 /* Try extending the set of induction variables by one. */
5225 for (i = 0; i < n_iv_cands (data); i++)
5227 cand = iv_cand (data, i);
5229 if (iv_ca_cand_used_p (ivs, cand))
5232 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5236 /* If we successfully added the candidate and the set is small enough,
5237 try optimizing it by removing other candidates. */
5238 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5240 iv_ca_delta_commit (data, ivs, act_delta, true);
5241 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5242 iv_ca_delta_commit (data, ivs, act_delta, false);
5243 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5246 if (compare_costs (acost, best_cost) < 0)
5249 iv_ca_delta_free (&best_delta);
5250 best_delta = act_delta;
5253 iv_ca_delta_free (&act_delta);
5258 /* Try removing the candidates from the set instead. */
5259 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5261 /* Nothing more we can do. */
5266 iv_ca_delta_commit (data, ivs, best_delta, true);
5267 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5268 iv_ca_delta_free (&best_delta);
5272 /* Attempts to find the optimal set of induction variables. We do simple
5273 greedy heuristic -- we try to replace at most one candidate in the selected
5274 solution and remove the unused ivs while this improves the cost. */
5276 static struct iv_ca *
5277 find_optimal_iv_set (struct ivopts_data *data)
5283 /* Get the initial solution. */
5284 set = get_initial_solution (data);
5287 if (dump_file && (dump_flags & TDF_DETAILS))
5288 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5292 if (dump_file && (dump_flags & TDF_DETAILS))
5294 fprintf (dump_file, "Initial set of candidates:\n");
5295 iv_ca_dump (data, dump_file, set);
5298 while (try_improve_iv_set (data, set))
5300 if (dump_file && (dump_flags & TDF_DETAILS))
5302 fprintf (dump_file, "Improved to:\n");
5303 iv_ca_dump (data, dump_file, set);
5307 if (dump_file && (dump_flags & TDF_DETAILS))
5309 comp_cost cost = iv_ca_cost (set);
5310 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5313 for (i = 0; i < n_iv_uses (data); i++)
5315 use = iv_use (data, i);
5316 use->selected = iv_ca_cand_for_use (set, use)->cand;
5322 /* Creates a new induction variable corresponding to CAND. */
5325 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5327 gimple_stmt_iterator incr_pos;
5337 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5341 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5349 incr_pos = gsi_for_stmt (cand->incremented_at);
5353 /* Mark that the iv is preserved. */
5354 name_info (data, cand->var_before)->preserve_biv = true;
5355 name_info (data, cand->var_after)->preserve_biv = true;
5357 /* Rewrite the increment so that it uses var_before directly. */
5358 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5363 gimple_add_tmp_var (cand->var_before);
5364 add_referenced_var (cand->var_before);
5366 base = unshare_expr (cand->iv->base);
5368 create_iv (base, unshare_expr (cand->iv->step),
5369 cand->var_before, data->current_loop,
5370 &incr_pos, after, &cand->var_before, &cand->var_after);
5373 /* Creates new induction variables described in SET. */
5376 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5379 struct iv_cand *cand;
5382 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5384 cand = iv_cand (data, i);
5385 create_new_iv (data, cand);
5390 /* Rewrites USE (definition of iv used in a nonlinear expression)
5391 using candidate CAND. */
5394 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5395 struct iv_use *use, struct iv_cand *cand)
5400 gimple_stmt_iterator bsi;
5402 /* An important special case -- if we are asked to express value of
5403 the original iv by itself, just exit; there is no need to
5404 introduce a new computation (that might also need casting the
5405 variable to unsigned and back). */
5406 if (cand->pos == IP_ORIGINAL
5407 && cand->incremented_at == use->stmt)
5409 tree step, ctype, utype;
5410 enum tree_code incr_code = PLUS_EXPR, old_code;
5412 gcc_assert (is_gimple_assign (use->stmt));
5413 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5415 step = cand->iv->step;
5416 ctype = TREE_TYPE (step);
5417 utype = TREE_TYPE (cand->var_after);
5418 if (TREE_CODE (step) == NEGATE_EXPR)
5420 incr_code = MINUS_EXPR;
5421 step = TREE_OPERAND (step, 0);
5424 /* Check whether we may leave the computation unchanged.
5425 This is the case only if it does not rely on other
5426 computations in the loop -- otherwise, the computation
5427 we rely upon may be removed in remove_unused_ivs,
5428 thus leading to ICE. */
5429 old_code = gimple_assign_rhs_code (use->stmt);
5430 if (old_code == PLUS_EXPR
5431 || old_code == MINUS_EXPR
5432 || old_code == POINTER_PLUS_EXPR)
5434 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5435 op = gimple_assign_rhs2 (use->stmt);
5436 else if (old_code != MINUS_EXPR
5437 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5438 op = gimple_assign_rhs1 (use->stmt);
5446 && (TREE_CODE (op) == INTEGER_CST
5447 || operand_equal_p (op, step, 0)))
5450 /* Otherwise, add the necessary computations to express
5452 op = fold_convert (ctype, cand->var_before);
5453 comp = fold_convert (utype,
5454 build2 (incr_code, ctype, op,
5455 unshare_expr (step)));
5459 comp = get_computation (data->current_loop, use, cand);
5460 gcc_assert (comp != NULL_TREE);
5463 switch (gimple_code (use->stmt))
5466 tgt = PHI_RESULT (use->stmt);
5468 /* If we should keep the biv, do not replace it. */
5469 if (name_info (data, tgt)->preserve_biv)
5472 bsi = gsi_after_labels (gimple_bb (use->stmt));
5476 tgt = gimple_assign_lhs (use->stmt);
5477 bsi = gsi_for_stmt (use->stmt);
5484 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5485 true, GSI_SAME_STMT);
5487 if (gimple_code (use->stmt) == GIMPLE_PHI)
5489 ass = gimple_build_assign (tgt, op);
5490 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5492 bsi = gsi_for_stmt (use->stmt);
5493 remove_phi_node (&bsi, false);
5497 gimple_assign_set_rhs_from_tree (&bsi, op);
5498 use->stmt = gsi_stmt (bsi);
5502 /* Replaces ssa name in index IDX by its basic variable. Callback for
5506 idx_remove_ssa_names (tree base, tree *idx,
5507 void *data ATTRIBUTE_UNUSED)
5511 if (TREE_CODE (*idx) == SSA_NAME)
5512 *idx = SSA_NAME_VAR (*idx);
5514 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5516 op = &TREE_OPERAND (base, 2);
5518 && TREE_CODE (*op) == SSA_NAME)
5519 *op = SSA_NAME_VAR (*op);
5520 op = &TREE_OPERAND (base, 3);
5522 && TREE_CODE (*op) == SSA_NAME)
5523 *op = SSA_NAME_VAR (*op);
5529 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5532 unshare_and_remove_ssa_names (tree ref)
5534 ref = unshare_expr (ref);
5535 for_each_index (&ref, idx_remove_ssa_names, NULL);
5540 /* Copies the reference information from OLD_REF to NEW_REF. */
5543 copy_ref_info (tree new_ref, tree old_ref)
5545 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5546 copy_mem_ref_info (new_ref, old_ref);
5549 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5550 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5551 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5552 /* We can transfer points-to information from an old pointer
5553 or decl base to the new one. */
5554 if (TMR_BASE (new_ref)
5555 && TREE_CODE (TMR_BASE (new_ref)) == SSA_NAME
5556 && POINTER_TYPE_P (TREE_TYPE (TMR_BASE (new_ref)))
5557 && !SSA_NAME_PTR_INFO (TMR_BASE (new_ref)))
5559 tree base = get_base_address (old_ref);
5560 if ((INDIRECT_REF_P (base)
5561 || TREE_CODE (base) == MEM_REF)
5562 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5563 duplicate_ssa_name_ptr_info
5564 (TMR_BASE (new_ref), SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5565 else if (TREE_CODE (base) == VAR_DECL
5566 || TREE_CODE (base) == PARM_DECL
5567 || TREE_CODE (base) == RESULT_DECL)
5569 struct ptr_info_def *pi = get_ptr_info (TMR_BASE (new_ref));
5570 pt_solution_set_var (&pi->pt, base);
5576 /* Rewrites USE (address that is an iv) using candidate CAND. */
5579 rewrite_use_address (struct ivopts_data *data,
5580 struct iv_use *use, struct iv_cand *cand)
5583 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5584 tree base_hint = NULL_TREE;
5588 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5590 unshare_aff_combination (&aff);
5592 /* To avoid undefined overflow problems, all IV candidates use unsigned
5593 integer types. The drawback is that this makes it impossible for
5594 create_mem_ref to distinguish an IV that is based on a memory object
5595 from one that represents simply an offset.
5597 To work around this problem, we pass a hint to create_mem_ref that
5598 indicates which variable (if any) in aff is an IV based on a memory
5599 object. Note that we only consider the candidate. If this is not
5600 based on an object, the base of the reference is in some subexpression
5601 of the use -- but these will use pointer types, so they are recognized
5602 by the create_mem_ref heuristics anyway. */
5603 if (cand->iv->base_object)
5604 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5606 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5608 copy_ref_info (ref, *use->op_p);
5612 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5616 rewrite_use_compare (struct ivopts_data *data,
5617 struct iv_use *use, struct iv_cand *cand)
5619 tree comp, *var_p, op, bound;
5620 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5621 enum tree_code compare;
5622 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5628 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5629 tree var_type = TREE_TYPE (var);
5632 compare = iv_elimination_compare (data, use);
5633 bound = unshare_expr (fold_convert (var_type, bound));
5634 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5636 gsi_insert_seq_on_edge_immediate (
5637 loop_preheader_edge (data->current_loop),
5640 gimple_cond_set_lhs (use->stmt, var);
5641 gimple_cond_set_code (use->stmt, compare);
5642 gimple_cond_set_rhs (use->stmt, op);
5646 /* The induction variable elimination failed; just express the original
5648 comp = get_computation (data->current_loop, use, cand);
5649 gcc_assert (comp != NULL_TREE);
5651 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5654 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5655 true, GSI_SAME_STMT);
5658 /* Rewrites USE using candidate CAND. */
5661 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5665 case USE_NONLINEAR_EXPR:
5666 rewrite_use_nonlinear_expr (data, use, cand);
5670 rewrite_use_address (data, use, cand);
5674 rewrite_use_compare (data, use, cand);
5681 update_stmt (use->stmt);
5684 /* Rewrite the uses using the selected induction variables. */
5687 rewrite_uses (struct ivopts_data *data)
5690 struct iv_cand *cand;
5693 for (i = 0; i < n_iv_uses (data); i++)
5695 use = iv_use (data, i);
5696 cand = use->selected;
5699 rewrite_use (data, use, cand);
5703 /* Removes the ivs that are not used after rewriting. */
5706 remove_unused_ivs (struct ivopts_data *data)
5710 bitmap toremove = BITMAP_ALLOC (NULL);
5712 /* Figure out an order in which to release SSA DEFs so that we don't
5713 release something that we'd have to propagate into a debug stmt
5715 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5717 struct version_info *info;
5719 info = ver_info (data, j);
5721 && !integer_zerop (info->iv->step)
5723 && !info->iv->have_use_for
5724 && !info->preserve_biv)
5725 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5728 release_defs_bitset (toremove);
5730 BITMAP_FREE (toremove);
5733 /* Frees data allocated by the optimization of a single loop. */
5736 free_loop_data (struct ivopts_data *data)
5744 pointer_map_destroy (data->niters);
5745 data->niters = NULL;
5748 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5750 struct version_info *info;
5752 info = ver_info (data, i);
5756 info->has_nonlin_use = false;
5757 info->preserve_biv = false;
5760 bitmap_clear (data->relevant);
5761 bitmap_clear (data->important_candidates);
5763 for (i = 0; i < n_iv_uses (data); i++)
5765 struct iv_use *use = iv_use (data, i);
5768 BITMAP_FREE (use->related_cands);
5769 for (j = 0; j < use->n_map_members; j++)
5770 if (use->cost_map[j].depends_on)
5771 BITMAP_FREE (use->cost_map[j].depends_on);
5772 free (use->cost_map);
5775 VEC_truncate (iv_use_p, data->iv_uses, 0);
5777 for (i = 0; i < n_iv_cands (data); i++)
5779 struct iv_cand *cand = iv_cand (data, i);
5783 if (cand->depends_on)
5784 BITMAP_FREE (cand->depends_on);
5787 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5789 if (data->version_info_size < num_ssa_names)
5791 data->version_info_size = 2 * num_ssa_names;
5792 free (data->version_info);
5793 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5796 data->max_inv_id = 0;
5798 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5799 SET_DECL_RTL (obj, NULL_RTX);
5801 VEC_truncate (tree, decl_rtl_to_reset, 0);
5804 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5808 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5810 free_loop_data (data);
5811 free (data->version_info);
5812 BITMAP_FREE (data->relevant);
5813 BITMAP_FREE (data->important_candidates);
5815 VEC_free (tree, heap, decl_rtl_to_reset);
5816 VEC_free (iv_use_p, heap, data->iv_uses);
5817 VEC_free (iv_cand_p, heap, data->iv_candidates);
5820 /* Optimizes the LOOP. Returns true if anything changed. */
5823 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5825 bool changed = false;
5826 struct iv_ca *iv_ca;
5830 gcc_assert (!data->niters);
5831 data->current_loop = loop;
5832 data->speed = optimize_loop_for_speed_p (loop);
5834 if (dump_file && (dump_flags & TDF_DETAILS))
5836 fprintf (dump_file, "Processing loop %d\n", loop->num);
5838 exit = single_dom_exit (loop);
5841 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5842 exit->src->index, exit->dest->index);
5843 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5844 fprintf (dump_file, "\n");
5847 fprintf (dump_file, "\n");
5850 body = get_loop_body (loop);
5851 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5854 /* For each ssa name determines whether it behaves as an induction variable
5856 if (!find_induction_variables (data))
5859 /* Finds interesting uses (item 1). */
5860 find_interesting_uses (data);
5861 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5864 /* Finds candidates for the induction variables (item 2). */
5865 find_iv_candidates (data);
5867 /* Calculates the costs (item 3, part 1). */
5868 determine_iv_costs (data);
5869 determine_use_iv_costs (data);
5870 determine_set_costs (data);
5872 /* Find the optimal set of induction variables (item 3, part 2). */
5873 iv_ca = find_optimal_iv_set (data);
5878 /* Create the new induction variables (item 4, part 1). */
5879 create_new_ivs (data, iv_ca);
5880 iv_ca_free (&iv_ca);
5882 /* Rewrite the uses (item 4, part 2). */
5883 rewrite_uses (data);
5885 /* Remove the ivs that are unused after rewriting. */
5886 remove_unused_ivs (data);
5888 /* We have changed the structure of induction variables; it might happen
5889 that definitions in the scev database refer to some of them that were
5894 free_loop_data (data);
5899 /* Main entry point. Optimizes induction variables in loops. */
5902 tree_ssa_iv_optimize (void)
5905 struct ivopts_data data;
5908 tree_ssa_iv_optimize_init (&data);
5910 /* Optimize the loops starting with the innermost ones. */
5911 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5913 if (dump_file && (dump_flags & TDF_DETAILS))
5914 flow_loop_dump (loop, dump_file, NULL, 1);
5916 tree_ssa_iv_optimize_loop (&data, loop);
5919 tree_ssa_iv_optimize_finalize (&data);