1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
127 unsigned regs_used; /* Number of registers used. */
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use *iv_use_p;
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
210 /* The currently optimized loop. */
211 struct loop *current_loop;
213 /* Numbers of iterations for all exits of the current loop. */
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
222 /* The bitmap of indices in version_info whose value was changed. */
225 /* The maximum invariant id. */
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
237 /* Whether to consider just related and important candidates when replacing a
239 bool consider_all_candidates;
242 /* An assignment of iv candidates to uses. */
246 /* The number of uses covered by the assignment. */
249 /* Number of uses that cannot be expressed by the candidates in the set. */
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
258 /* The candidates used. */
261 /* The number of candidates in the set. */
264 /* Total number of registers needed. */
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
270 /* Total cost of candidates. */
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
276 /* Total cost of the assignment. */
280 /* Difference of two iv candidate assignments. */
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
290 /* A new assignment. */
291 struct cost_pair *new_cp;
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
317 static VEC(tree,heap) *decl_rtl_to_reset;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
324 return VEC_length (iv_use_p, data->iv_uses);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
332 return VEC_index (iv_use_p, data->iv_uses, i);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
340 return VEC_length (iv_cand_p, data->iv_candidates);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
351 /* The data for LOOP. */
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
362 single_dom_exit (struct loop *loop)
364 edge exit = loop->single_exit;
369 if (!just_once_each_iteration_p (loop, exit->src))
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv *);
379 dump_iv (FILE *file, struct iv *iv)
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
417 fprintf (file, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use *);
424 dump_use (FILE *file, struct iv_use *use)
426 fprintf (file, "use %d\n", use->id);
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
435 fprintf (file, " outside\n");
439 fprintf (file, " address\n");
443 fprintf (file, " compare\n");
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
472 dump_uses (FILE *file, struct ivopts_data *data)
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
505 fprintf (file, " final value replacement\n");
512 fprintf (file, " incremented before exit test\n");
516 fprintf (file, " incremented at end\n");
520 fprintf (file, " original biv\n");
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
568 /* If b is still even, a is odd and there is no such x. */
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
576 for (i = 0; i < bits - 1; i++)
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
582 val = (a * inv) & mask;
584 gcc_assert (((val * b) & mask) == a);
586 if ((val >> (bits - 1)) & 1)
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
604 if (sbb == loop->latch)
610 return stmt == last_stmt (bb);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
626 if (stmt_bb != cand_bb)
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
633 if (bsi_stmt (bsi) == cand->incremented_at)
635 if (bsi_stmt (bsi) == stmt)
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
652 return stmt_after_ip_normal_pos (loop, stmt);
655 return stmt_after_ip_original_pos (cand, stmt);
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
667 /* The edge for that the number of iterations is cached. */
670 /* True if the # of iterations was successfully determined. */
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
677 /* Hash function for nfe_cache_elt E. */
680 nfe_hash (const void *e)
682 const struct nfe_cache_elt *elt = e;
684 return htab_hash_pointer (elt->exit);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
690 nfe_eq (const void *e1, const void *e2)
692 const struct nfe_cache_elt *elt1 = e1;
694 return elt1->exit == e2;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
703 struct nfe_cache_elt *nfe_desc;
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter);
721 if (!nfe_desc->valid_p)
724 return &nfe_desc->niter;
727 /* Returns structure describing number of iterations determined from
728 single dominating exit of DATA->current_loop, or NULL if something
731 static struct tree_niter_desc *
732 niter_for_single_dom_exit (struct ivopts_data *data)
734 edge exit = single_dom_exit (data->current_loop);
739 return niter_for_exit (data, exit);
742 /* Initializes data structures used by the iv optimization pass, stored
743 in DATA. LOOPS is the loop tree. */
746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
750 data->version_info_size = 2 * num_ssa_names;
751 data->version_info = xcalloc (data->version_info_size,
752 sizeof (struct version_info));
753 data->relevant = BITMAP_ALLOC (NULL);
754 data->important_candidates = BITMAP_ALLOC (NULL);
755 data->max_inv_id = 0;
756 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
758 for (i = 1; i < loops->num; i++)
759 if (loops->parray[i])
760 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj, op0, op1;
776 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
785 obj = TREE_OPERAND (expr, 0);
786 base = get_base_address (obj);
791 if (TREE_CODE (base) == INDIRECT_REF)
792 return determine_base_object (TREE_OPERAND (base, 0));
794 return fold_convert (ptr_type_node,
795 build_fold_addr_expr (base));
799 op0 = determine_base_object (TREE_OPERAND (expr, 0));
800 op1 = determine_base_object (TREE_OPERAND (expr, 1));
806 return (code == PLUS_EXPR
808 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
810 return fold_build2 (code, ptr_type_node, op0, op1);
814 return determine_base_object (TREE_OPERAND (expr, 0));
817 return fold_convert (ptr_type_node, expr);
821 /* Allocates an induction variable with given initial value BASE and step STEP
825 alloc_iv (tree base, tree step)
827 struct iv *iv = xcalloc (1, sizeof (struct iv));
829 if (step && integer_zerop (step))
833 iv->base_object = determine_base_object (base);
836 iv->have_use_for = false;
838 iv->ssa_name = NULL_TREE;
843 /* Sets STEP and BASE for induction variable IV. */
846 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
848 struct version_info *info = name_info (data, iv);
850 gcc_assert (!info->iv);
852 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
853 info->iv = alloc_iv (base, step);
854 info->iv->ssa_name = iv;
857 /* Finds induction variable declaration for VAR. */
860 get_iv (struct ivopts_data *data, tree var)
864 if (!name_info (data, var)->iv)
866 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
869 || !flow_bb_inside_loop_p (data->current_loop, bb))
870 set_iv (data, var, var, NULL_TREE);
873 return name_info (data, var)->iv;
876 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
877 not define a simple affine biv with nonzero step. */
880 determine_biv_step (tree phi)
882 struct loop *loop = bb_for_stmt (phi)->loop_father;
883 tree name = PHI_RESULT (phi), base, step;
885 if (!is_gimple_reg (name))
888 if (!simple_iv (loop, phi, name, &base, &step, true))
897 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
900 abnormal_ssa_name_p (tree exp)
905 if (TREE_CODE (exp) != SSA_NAME)
908 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
911 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
912 abnormal phi node. Callback for for_each_index. */
915 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
916 void *data ATTRIBUTE_UNUSED)
918 if (TREE_CODE (base) == ARRAY_REF)
920 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
922 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
926 return !abnormal_ssa_name_p (*index);
929 /* Returns true if EXPR contains a ssa name that occurs in an
930 abnormal phi node. */
933 contains_abnormal_ssa_name_p (tree expr)
936 enum tree_code_class class;
941 code = TREE_CODE (expr);
942 class = TREE_CODE_CLASS (code);
944 if (code == SSA_NAME)
945 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
947 if (code == INTEGER_CST
948 || is_gimple_min_invariant (expr))
951 if (code == ADDR_EXPR)
952 return !for_each_index (&TREE_OPERAND (expr, 0),
953 idx_contains_abnormal_ssa_name_p,
960 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
965 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
977 /* Finds basic ivs. */
980 find_bivs (struct ivopts_data *data)
982 tree phi, step, type, base;
984 struct loop *loop = data->current_loop;
986 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
988 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
991 step = determine_biv_step (phi);
995 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
996 base = expand_simple_operations (base);
997 if (contains_abnormal_ssa_name_p (base)
998 || contains_abnormal_ssa_name_p (step))
1001 type = TREE_TYPE (PHI_RESULT (phi));
1002 base = fold_convert (type, base);
1004 step = fold_convert (type, step);
1006 set_iv (data, PHI_RESULT (phi), base, step);
1013 /* Marks basic ivs. */
1016 mark_bivs (struct ivopts_data *data)
1019 struct iv *iv, *incr_iv;
1020 struct loop *loop = data->current_loop;
1021 basic_block incr_bb;
1023 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1025 iv = get_iv (data, PHI_RESULT (phi));
1029 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1030 incr_iv = get_iv (data, var);
1034 /* If the increment is in the subloop, ignore it. */
1035 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1036 if (incr_bb->loop_father != data->current_loop
1037 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1041 incr_iv->biv_p = true;
1045 /* Checks whether STMT defines a linear induction variable and stores its
1046 parameters to BASE and STEP. */
1049 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1050 tree *base, tree *step)
1053 struct loop *loop = data->current_loop;
1058 if (TREE_CODE (stmt) != MODIFY_EXPR)
1061 lhs = TREE_OPERAND (stmt, 0);
1062 if (TREE_CODE (lhs) != SSA_NAME)
1065 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1067 *base = expand_simple_operations (*base);
1069 if (contains_abnormal_ssa_name_p (*base)
1070 || contains_abnormal_ssa_name_p (*step))
1076 /* Finds general ivs in statement STMT. */
1079 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1083 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1086 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1089 /* Finds general ivs in basic block BB. */
1092 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1094 block_stmt_iterator bsi;
1096 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1097 find_givs_in_stmt (data, bsi_stmt (bsi));
1100 /* Finds general ivs. */
1103 find_givs (struct ivopts_data *data)
1105 struct loop *loop = data->current_loop;
1106 basic_block *body = get_loop_body_in_dom_order (loop);
1109 for (i = 0; i < loop->num_nodes; i++)
1110 find_givs_in_bb (data, body[i]);
1114 /* For each ssa name defined in LOOP determines whether it is an induction
1115 variable and if so, its initial value and step. */
1118 find_induction_variables (struct ivopts_data *data)
1123 if (!find_bivs (data))
1129 if (dump_file && (dump_flags & TDF_DETAILS))
1131 struct tree_niter_desc *niter;
1133 niter = niter_for_single_dom_exit (data);
1137 fprintf (dump_file, " number of iterations ");
1138 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1139 fprintf (dump_file, "\n");
1141 fprintf (dump_file, " may be zero if ");
1142 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1143 fprintf (dump_file, "\n");
1144 fprintf (dump_file, "\n");
1147 fprintf (dump_file, "Induction variables:\n\n");
1149 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1151 if (ver_info (data, i)->iv)
1152 dump_iv (dump_file, ver_info (data, i)->iv);
1159 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1161 static struct iv_use *
1162 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1163 tree stmt, enum use_type use_type)
1165 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1167 use->id = n_iv_uses (data);
1168 use->type = use_type;
1172 use->related_cands = BITMAP_ALLOC (NULL);
1174 /* To avoid showing ssa name in the dumps, if it was not reset by the
1176 iv->ssa_name = NULL_TREE;
1178 if (dump_file && (dump_flags & TDF_DETAILS))
1179 dump_use (dump_file, use);
1181 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1186 /* Checks whether OP is a loop-level invariant and if so, records it.
1187 NONLINEAR_USE is true if the invariant is used in a way we do not
1188 handle specially. */
1191 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1194 struct version_info *info;
1196 if (TREE_CODE (op) != SSA_NAME
1197 || !is_gimple_reg (op))
1200 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1202 && flow_bb_inside_loop_p (data->current_loop, bb))
1205 info = name_info (data, op);
1207 info->has_nonlin_use |= nonlinear_use;
1209 info->inv_id = ++data->max_inv_id;
1210 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1213 /* Checks whether the use OP is interesting and if so, records it
1216 static struct iv_use *
1217 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1225 if (TREE_CODE (op) != SSA_NAME)
1228 iv = get_iv (data, op);
1232 if (iv->have_use_for)
1234 use = iv_use (data, iv->use_id);
1236 gcc_assert (use->type == USE_NONLINEAR_EXPR
1237 || use->type == USE_OUTER);
1239 if (type == USE_NONLINEAR_EXPR)
1240 use->type = USE_NONLINEAR_EXPR;
1244 if (zero_p (iv->step))
1246 record_invariant (data, op, true);
1249 iv->have_use_for = true;
1251 civ = xmalloc (sizeof (struct iv));
1254 stmt = SSA_NAME_DEF_STMT (op);
1255 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1256 || TREE_CODE (stmt) == MODIFY_EXPR);
1258 use = record_use (data, NULL, civ, stmt, type);
1259 iv->use_id = use->id;
1264 /* Checks whether the use OP is interesting and if so, records it. */
1266 static struct iv_use *
1267 find_interesting_uses_op (struct ivopts_data *data, tree op)
1269 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1272 /* Records a definition of induction variable OP that is used outside of the
1275 static struct iv_use *
1276 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1278 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1281 /* Checks whether the condition *COND_P in STMT is interesting
1282 and if so, records it. */
1285 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1289 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1291 tree zero = integer_zero_node;
1293 const_iv.step = NULL_TREE;
1295 if (TREE_CODE (*cond_p) != SSA_NAME
1296 && !COMPARISON_CLASS_P (*cond_p))
1299 if (TREE_CODE (*cond_p) == SSA_NAME)
1306 op0_p = &TREE_OPERAND (*cond_p, 0);
1307 op1_p = &TREE_OPERAND (*cond_p, 1);
1310 if (TREE_CODE (*op0_p) == SSA_NAME)
1311 iv0 = get_iv (data, *op0_p);
1315 if (TREE_CODE (*op1_p) == SSA_NAME)
1316 iv1 = get_iv (data, *op1_p);
1320 if (/* When comparing with non-invariant value, we may not do any senseful
1321 induction variable elimination. */
1323 /* Eliminating condition based on two ivs would be nontrivial.
1324 ??? TODO -- it is not really important to handle this case. */
1325 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1327 find_interesting_uses_op (data, *op0_p);
1328 find_interesting_uses_op (data, *op1_p);
1332 if (zero_p (iv0->step) && zero_p (iv1->step))
1334 /* If both are invariants, this is a work for unswitching. */
1338 civ = xmalloc (sizeof (struct iv));
1339 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1340 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1343 /* Returns true if expression EXPR is obviously invariant in LOOP,
1344 i.e. if all its operands are defined outside of the LOOP. */
1347 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1352 if (is_gimple_min_invariant (expr))
1355 if (TREE_CODE (expr) == SSA_NAME)
1357 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1359 && flow_bb_inside_loop_p (loop, def_bb))
1368 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1369 for (i = 0; i < len; i++)
1370 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1376 /* Cumulates the steps of indices into DATA and replaces their values with the
1377 initial ones. Returns false when the value of the index cannot be determined.
1378 Callback for for_each_index. */
1380 struct ifs_ivopts_data
1382 struct ivopts_data *ivopts_data;
1388 idx_find_step (tree base, tree *idx, void *data)
1390 struct ifs_ivopts_data *dta = data;
1392 tree step, iv_step, lbound, off;
1393 struct loop *loop = dta->ivopts_data->current_loop;
1395 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1396 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1399 /* If base is a component ref, require that the offset of the reference
1401 if (TREE_CODE (base) == COMPONENT_REF)
1403 off = component_ref_field_offset (base);
1404 return expr_invariant_in_loop_p (loop, off);
1407 /* If base is array, first check whether we will be able to move the
1408 reference out of the loop (in order to take its address in strength
1409 reduction). In order for this to work we need both lower bound
1410 and step to be loop invariants. */
1411 if (TREE_CODE (base) == ARRAY_REF)
1413 step = array_ref_element_size (base);
1414 lbound = array_ref_low_bound (base);
1416 if (!expr_invariant_in_loop_p (loop, step)
1417 || !expr_invariant_in_loop_p (loop, lbound))
1421 if (TREE_CODE (*idx) != SSA_NAME)
1424 iv = get_iv (dta->ivopts_data, *idx);
1433 if (TREE_CODE (base) == ARRAY_REF)
1435 step = array_ref_element_size (base);
1437 /* We only handle addresses whose step is an integer constant. */
1438 if (TREE_CODE (step) != INTEGER_CST)
1442 /* The step for pointer arithmetics already is 1 byte. */
1443 step = build_int_cst (sizetype, 1);
1445 if (TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
1446 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1447 sizetype, iv->base, iv->step, dta->stmt);
1449 iv_step = fold_convert (sizetype, iv->step);
1453 /* The index might wrap. */
1457 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1460 *dta->step_p = step;
1462 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1467 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1468 object is passed to it in DATA. */
1471 idx_record_use (tree base, tree *idx,
1474 find_interesting_uses_op (data, *idx);
1475 if (TREE_CODE (base) == ARRAY_REF)
1477 find_interesting_uses_op (data, array_ref_element_size (base));
1478 find_interesting_uses_op (data, array_ref_low_bound (base));
1483 /* Returns true if memory reference REF may be unaligned. */
1486 may_be_unaligned_p (tree ref)
1490 HOST_WIDE_INT bitsize;
1491 HOST_WIDE_INT bitpos;
1493 enum machine_mode mode;
1494 int unsignedp, volatilep;
1495 unsigned base_align;
1497 /* The test below is basically copy of what expr.c:normal_inner_ref
1498 does to check whether the object must be loaded by parts when
1499 STRICT_ALIGNMENT is true. */
1500 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1501 &unsignedp, &volatilep, true);
1502 base_type = TREE_TYPE (base);
1503 base_align = TYPE_ALIGN (base_type);
1506 && (base_align < GET_MODE_ALIGNMENT (mode)
1507 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1508 || bitpos % BITS_PER_UNIT != 0))
1514 /* Finds addresses in *OP_P inside STMT. */
1517 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1519 tree base = unshare_expr (*op_p), step = NULL;
1521 struct ifs_ivopts_data ifs_ivopts_data;
1523 /* Do not play with volatile memory references. A bit too conservative,
1524 perhaps, but safe. */
1525 if (stmt_ann (stmt)->has_volatile_ops)
1528 /* Ignore bitfields for now. Not really something terribly complicated
1530 if (TREE_CODE (base) == COMPONENT_REF
1531 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1534 if (STRICT_ALIGNMENT
1535 && may_be_unaligned_p (base))
1538 ifs_ivopts_data.ivopts_data = data;
1539 ifs_ivopts_data.stmt = stmt;
1540 ifs_ivopts_data.step_p = &step;
1541 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1545 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1546 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1548 base = build_fold_addr_expr (base);
1550 civ = alloc_iv (base, step);
1551 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1555 for_each_index (op_p, idx_record_use, data);
1558 /* Finds and records invariants used in STMT. */
1561 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1564 use_operand_p use_p;
1567 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1569 op = USE_FROM_PTR (use_p);
1570 record_invariant (data, op, false);
1574 /* Finds interesting uses of induction variables in the statement STMT. */
1577 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1582 use_operand_p use_p;
1584 find_invariants_stmt (data, stmt);
1586 if (TREE_CODE (stmt) == COND_EXPR)
1588 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1592 if (TREE_CODE (stmt) == MODIFY_EXPR)
1594 lhs = TREE_OPERAND (stmt, 0);
1595 rhs = TREE_OPERAND (stmt, 1);
1597 if (TREE_CODE (lhs) == SSA_NAME)
1599 /* If the statement defines an induction variable, the uses are not
1600 interesting by themselves. */
1602 iv = get_iv (data, lhs);
1604 if (iv && !zero_p (iv->step))
1608 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1610 case tcc_comparison:
1611 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1615 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1616 if (REFERENCE_CLASS_P (lhs))
1617 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1623 if (REFERENCE_CLASS_P (lhs)
1624 && is_gimple_val (rhs))
1626 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1627 find_interesting_uses_op (data, rhs);
1631 /* TODO -- we should also handle address uses of type
1633 memory = call (whatever);
1640 if (TREE_CODE (stmt) == PHI_NODE
1641 && bb_for_stmt (stmt) == data->current_loop->header)
1643 lhs = PHI_RESULT (stmt);
1644 iv = get_iv (data, lhs);
1646 if (iv && !zero_p (iv->step))
1650 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1652 op = USE_FROM_PTR (use_p);
1654 if (TREE_CODE (op) != SSA_NAME)
1657 iv = get_iv (data, op);
1661 find_interesting_uses_op (data, op);
1665 /* Finds interesting uses of induction variables outside of loops
1666 on loop exit edge EXIT. */
1669 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1673 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1675 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1676 find_interesting_uses_outer (data, def);
1680 /* Finds uses of the induction variables that are interesting. */
1683 find_interesting_uses (struct ivopts_data *data)
1686 block_stmt_iterator bsi;
1688 basic_block *body = get_loop_body (data->current_loop);
1690 struct version_info *info;
1693 if (dump_file && (dump_flags & TDF_DETAILS))
1694 fprintf (dump_file, "Uses:\n\n");
1696 for (i = 0; i < data->current_loop->num_nodes; i++)
1701 FOR_EACH_EDGE (e, ei, bb->succs)
1702 if (e->dest != EXIT_BLOCK_PTR
1703 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1704 find_interesting_uses_outside (data, e);
1706 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1707 find_interesting_uses_stmt (data, phi);
1708 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1709 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1712 if (dump_file && (dump_flags & TDF_DETAILS))
1716 fprintf (dump_file, "\n");
1718 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1720 info = ver_info (data, i);
1723 fprintf (dump_file, " ");
1724 print_generic_expr (dump_file, info->name, TDF_SLIM);
1725 fprintf (dump_file, " is invariant (%d)%s\n",
1726 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1730 fprintf (dump_file, "\n");
1736 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1737 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1738 we are at the top-level of the processed address. */
1741 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1742 unsigned HOST_WIDE_INT *offset)
1744 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1745 enum tree_code code;
1746 tree type, orig_type = TREE_TYPE (expr);
1747 unsigned HOST_WIDE_INT off0, off1, st;
1748 tree orig_expr = expr;
1752 type = TREE_TYPE (expr);
1753 code = TREE_CODE (expr);
1759 if (!cst_and_fits_in_hwi (expr)
1763 *offset = int_cst_value (expr);
1764 return build_int_cst_type (orig_type, 0);
1768 op0 = TREE_OPERAND (expr, 0);
1769 op1 = TREE_OPERAND (expr, 1);
1771 op0 = strip_offset_1 (op0, false, false, &off0);
1772 op1 = strip_offset_1 (op1, false, false, &off1);
1774 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1775 if (op0 == TREE_OPERAND (expr, 0)
1776 && op1 == TREE_OPERAND (expr, 1))
1781 else if (zero_p (op0))
1783 if (code == PLUS_EXPR)
1786 expr = fold_build1 (NEGATE_EXPR, type, op1);
1789 expr = fold_build2 (code, type, op0, op1);
1791 return fold_convert (orig_type, expr);
1797 step = array_ref_element_size (expr);
1798 if (!cst_and_fits_in_hwi (step))
1801 st = int_cst_value (step);
1802 op1 = TREE_OPERAND (expr, 1);
1803 op1 = strip_offset_1 (op1, false, false, &off1);
1804 *offset = off1 * st;
1809 /* Strip the component reference completely. */
1810 op0 = TREE_OPERAND (expr, 0);
1811 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1821 tmp = component_ref_field_offset (expr);
1823 && cst_and_fits_in_hwi (tmp))
1825 /* Strip the component reference completely. */
1826 op0 = TREE_OPERAND (expr, 0);
1827 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1828 *offset = off0 + int_cst_value (tmp);
1834 op0 = TREE_OPERAND (expr, 0);
1835 op0 = strip_offset_1 (op0, true, true, &off0);
1838 if (op0 == TREE_OPERAND (expr, 0))
1841 expr = build_fold_addr_expr (op0);
1842 return fold_convert (orig_type, expr);
1845 inside_addr = false;
1852 /* Default handling of expressions for that we want to recurse into
1853 the first operand. */
1854 op0 = TREE_OPERAND (expr, 0);
1855 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1858 if (op0 == TREE_OPERAND (expr, 0)
1859 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1862 expr = copy_node (expr);
1863 TREE_OPERAND (expr, 0) = op0;
1865 TREE_OPERAND (expr, 1) = op1;
1867 /* Inside address, we might strip the top level component references,
1868 thus changing type of the expression. Handling of ADDR_EXPR
1870 expr = fold_convert (orig_type, expr);
1875 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1878 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1880 return strip_offset_1 (expr, false, false, offset);
1883 /* Returns variant of TYPE that can be used as base for different uses.
1884 For integer types, we return unsigned variant of the type, which
1885 avoids problems with overflows. For pointer types, we return void *. */
1888 generic_type_for (tree type)
1890 if (POINTER_TYPE_P (type))
1891 return ptr_type_node;
1893 if (TYPE_UNSIGNED (type))
1896 return unsigned_type_for (type);
1899 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1900 the bitmap to that we should store it. */
1902 static struct ivopts_data *fd_ivopts_data;
1904 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1906 bitmap *depends_on = data;
1907 struct version_info *info;
1909 if (TREE_CODE (*expr_p) != SSA_NAME)
1911 info = name_info (fd_ivopts_data, *expr_p);
1913 if (!info->inv_id || info->has_nonlin_use)
1917 *depends_on = BITMAP_ALLOC (NULL);
1918 bitmap_set_bit (*depends_on, info->inv_id);
1923 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1924 position to POS. If USE is not NULL, the candidate is set as related to
1925 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1926 replacement of the final value of the iv by a direct computation. */
1928 static struct iv_cand *
1929 add_candidate_1 (struct ivopts_data *data,
1930 tree base, tree step, bool important, enum iv_position pos,
1931 struct iv_use *use, tree incremented_at)
1934 struct iv_cand *cand = NULL;
1935 tree type, orig_type;
1939 orig_type = TREE_TYPE (base);
1940 type = generic_type_for (orig_type);
1941 if (type != orig_type)
1943 base = fold_convert (type, base);
1945 step = fold_convert (type, step);
1949 for (i = 0; i < n_iv_cands (data); i++)
1951 cand = iv_cand (data, i);
1953 if (cand->pos != pos)
1956 if (cand->incremented_at != incremented_at)
1970 if (!operand_equal_p (base, cand->iv->base, 0))
1973 if (zero_p (cand->iv->step))
1980 if (step && operand_equal_p (step, cand->iv->step, 0))
1985 if (i == n_iv_cands (data))
1987 cand = xcalloc (1, sizeof (struct iv_cand));
1993 cand->iv = alloc_iv (base, step);
1996 if (pos != IP_ORIGINAL && cand->iv)
1998 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1999 cand->var_after = cand->var_before;
2001 cand->important = important;
2002 cand->incremented_at = incremented_at;
2003 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2006 && TREE_CODE (step) != INTEGER_CST)
2008 fd_ivopts_data = data;
2009 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2012 if (dump_file && (dump_flags & TDF_DETAILS))
2013 dump_cand (dump_file, cand);
2016 if (important && !cand->important)
2018 cand->important = true;
2019 if (dump_file && (dump_flags & TDF_DETAILS))
2020 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2025 bitmap_set_bit (use->related_cands, i);
2026 if (dump_file && (dump_flags & TDF_DETAILS))
2027 fprintf (dump_file, "Candidate %d is related to use %d\n",
2034 /* Returns true if incrementing the induction variable at the end of the LOOP
2037 The purpose is to avoid splitting latch edge with a biv increment, thus
2038 creating a jump, possibly confusing other optimization passes and leaving
2039 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2040 is not available (so we do not have a better alternative), or if the latch
2041 edge is already nonempty. */
2044 allow_ip_end_pos_p (struct loop *loop)
2046 if (!ip_normal_pos (loop))
2049 if (!empty_block_p (ip_end_pos (loop)))
2055 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2056 position to POS. If USE is not NULL, the candidate is set as related to
2057 it. The candidate computation is scheduled on all available positions. */
2060 add_candidate (struct ivopts_data *data,
2061 tree base, tree step, bool important, struct iv_use *use)
2063 if (ip_normal_pos (data->current_loop))
2064 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2065 if (ip_end_pos (data->current_loop)
2066 && allow_ip_end_pos_p (data->current_loop))
2067 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2070 /* Add a standard "0 + 1 * iteration" iv candidate for a
2071 type with SIZE bits. */
2074 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2077 tree type = lang_hooks.types.type_for_size (size, true);
2078 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2082 /* Adds standard iv candidates. */
2085 add_standard_iv_candidates (struct ivopts_data *data)
2087 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2089 /* The same for a double-integer type if it is still fast enough. */
2090 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2091 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2095 /* Adds candidates bases on the old induction variable IV. */
2098 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2101 struct iv_cand *cand;
2103 add_candidate (data, iv->base, iv->step, true, NULL);
2105 /* The same, but with initial value zero. */
2106 add_candidate (data,
2107 build_int_cst (TREE_TYPE (iv->base), 0),
2108 iv->step, true, NULL);
2110 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2111 if (TREE_CODE (phi) == PHI_NODE)
2113 /* Additionally record the possibility of leaving the original iv
2115 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2116 cand = add_candidate_1 (data,
2117 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2118 SSA_NAME_DEF_STMT (def));
2119 cand->var_before = iv->ssa_name;
2120 cand->var_after = def;
2124 /* Adds candidates based on the old induction variables. */
2127 add_old_ivs_candidates (struct ivopts_data *data)
2133 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2135 iv = ver_info (data, i)->iv;
2136 if (iv && iv->biv_p && !zero_p (iv->step))
2137 add_old_iv_candidates (data, iv);
2141 /* Adds candidates based on the value of the induction variable IV and USE. */
2144 add_iv_value_candidates (struct ivopts_data *data,
2145 struct iv *iv, struct iv_use *use)
2147 unsigned HOST_WIDE_INT offset;
2150 add_candidate (data, iv->base, iv->step, false, use);
2152 /* The same, but with initial value zero. Make such variable important,
2153 since it is generic enough so that possibly many uses may be based
2155 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2156 iv->step, true, use);
2158 /* Third, try removing the constant offset. */
2159 base = strip_offset (iv->base, &offset);
2161 add_candidate (data, base, iv->step, false, use);
2164 /* Possibly adds pseudocandidate for replacing the final value of USE by
2165 a direct computation. */
2168 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2170 struct tree_niter_desc *niter;
2172 /* We must know where we exit the loop and how many times does it roll. */
2173 niter = niter_for_single_dom_exit (data);
2175 || !zero_p (niter->may_be_zero))
2178 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2181 /* Adds candidates based on the uses. */
2184 add_derived_ivs_candidates (struct ivopts_data *data)
2188 for (i = 0; i < n_iv_uses (data); i++)
2190 struct iv_use *use = iv_use (data, i);
2197 case USE_NONLINEAR_EXPR:
2200 /* Just add the ivs based on the value of the iv used here. */
2201 add_iv_value_candidates (data, use->iv, use);
2205 add_iv_value_candidates (data, use->iv, use);
2207 /* Additionally, add the pseudocandidate for the possibility to
2208 replace the final value by a direct computation. */
2209 add_iv_outer_candidates (data, use);
2218 /* Record important candidates and add them to related_cands bitmaps
2222 record_important_candidates (struct ivopts_data *data)
2227 for (i = 0; i < n_iv_cands (data); i++)
2229 struct iv_cand *cand = iv_cand (data, i);
2231 if (cand->important)
2232 bitmap_set_bit (data->important_candidates, i);
2235 data->consider_all_candidates = (n_iv_cands (data)
2236 <= CONSIDER_ALL_CANDIDATES_BOUND);
2238 if (data->consider_all_candidates)
2240 /* We will not need "related_cands" bitmaps in this case,
2241 so release them to decrease peak memory consumption. */
2242 for (i = 0; i < n_iv_uses (data); i++)
2244 use = iv_use (data, i);
2245 BITMAP_FREE (use->related_cands);
2250 /* Add important candidates to the related_cands bitmaps. */
2251 for (i = 0; i < n_iv_uses (data); i++)
2252 bitmap_ior_into (iv_use (data, i)->related_cands,
2253 data->important_candidates);
2257 /* Finds the candidates for the induction variables. */
2260 find_iv_candidates (struct ivopts_data *data)
2262 /* Add commonly used ivs. */
2263 add_standard_iv_candidates (data);
2265 /* Add old induction variables. */
2266 add_old_ivs_candidates (data);
2268 /* Add induction variables derived from uses. */
2269 add_derived_ivs_candidates (data);
2271 /* Record the important candidates. */
2272 record_important_candidates (data);
2275 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2276 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2277 we allocate a simple list to every use. */
2280 alloc_use_cost_map (struct ivopts_data *data)
2282 unsigned i, size, s, j;
2284 for (i = 0; i < n_iv_uses (data); i++)
2286 struct iv_use *use = iv_use (data, i);
2289 if (data->consider_all_candidates)
2290 size = n_iv_cands (data);
2294 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2299 /* Round up to the power of two, so that moduling by it is fast. */
2300 for (size = 1; size < s; size <<= 1)
2304 use->n_map_members = size;
2305 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2309 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2310 on invariants DEPENDS_ON and that the value used in expressing it
2314 set_use_iv_cost (struct ivopts_data *data,
2315 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2316 bitmap depends_on, tree value)
2322 BITMAP_FREE (depends_on);
2326 if (data->consider_all_candidates)
2328 use->cost_map[cand->id].cand = cand;
2329 use->cost_map[cand->id].cost = cost;
2330 use->cost_map[cand->id].depends_on = depends_on;
2331 use->cost_map[cand->id].value = value;
2335 /* n_map_members is a power of two, so this computes modulo. */
2336 s = cand->id & (use->n_map_members - 1);
2337 for (i = s; i < use->n_map_members; i++)
2338 if (!use->cost_map[i].cand)
2340 for (i = 0; i < s; i++)
2341 if (!use->cost_map[i].cand)
2347 use->cost_map[i].cand = cand;
2348 use->cost_map[i].cost = cost;
2349 use->cost_map[i].depends_on = depends_on;
2350 use->cost_map[i].value = value;
2353 /* Gets cost of (USE, CANDIDATE) pair. */
2355 static struct cost_pair *
2356 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2357 struct iv_cand *cand)
2360 struct cost_pair *ret;
2365 if (data->consider_all_candidates)
2367 ret = use->cost_map + cand->id;
2374 /* n_map_members is a power of two, so this computes modulo. */
2375 s = cand->id & (use->n_map_members - 1);
2376 for (i = s; i < use->n_map_members; i++)
2377 if (use->cost_map[i].cand == cand)
2378 return use->cost_map + i;
2380 for (i = 0; i < s; i++)
2381 if (use->cost_map[i].cand == cand)
2382 return use->cost_map + i;
2387 /* Returns estimate on cost of computing SEQ. */
2395 for (; seq; seq = NEXT_INSN (seq))
2397 set = single_set (seq);
2399 cost += rtx_cost (set, SET);
2407 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2409 produce_memory_decl_rtl (tree obj, int *regno)
2414 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2416 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2417 x = gen_rtx_SYMBOL_REF (Pmode, name);
2420 x = gen_raw_REG (Pmode, (*regno)++);
2422 return gen_rtx_MEM (DECL_MODE (obj), x);
2425 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2426 walk_tree. DATA contains the actual fake register number. */
2429 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2431 tree obj = NULL_TREE;
2435 switch (TREE_CODE (*expr_p))
2438 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2439 handled_component_p (*expr_p);
2440 expr_p = &TREE_OPERAND (*expr_p, 0))
2444 x = produce_memory_decl_rtl (obj, regno);
2449 obj = SSA_NAME_VAR (*expr_p);
2450 if (!DECL_RTL_SET_P (obj))
2451 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2460 if (DECL_RTL_SET_P (obj))
2463 if (DECL_MODE (obj) == BLKmode)
2464 x = produce_memory_decl_rtl (obj, regno);
2466 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2476 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2477 SET_DECL_RTL (obj, x);
2483 /* Determines cost of the computation of EXPR. */
2486 computation_cost (tree expr)
2489 tree type = TREE_TYPE (expr);
2491 /* Avoid using hard regs in ways which may be unsupported. */
2492 int regno = LAST_VIRTUAL_REGISTER + 1;
2494 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2496 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2500 cost = seq_cost (seq);
2502 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2507 /* Returns variable containing the value of candidate CAND at statement AT. */
2510 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2512 if (stmt_after_increment (loop, cand, stmt))
2513 return cand->var_after;
2515 return cand->var_before;
2518 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2519 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2522 tree_int_cst_sign_bit (tree t)
2524 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2525 unsigned HOST_WIDE_INT w;
2527 if (bitno < HOST_BITS_PER_WIDE_INT)
2528 w = TREE_INT_CST_LOW (t);
2531 w = TREE_INT_CST_HIGH (t);
2532 bitno -= HOST_BITS_PER_WIDE_INT;
2535 return (w >> bitno) & 1;
2538 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2539 return cst. Otherwise return NULL_TREE. */
2542 constant_multiple_of (tree type, tree top, tree bot)
2544 tree res, mby, p0, p1;
2545 enum tree_code code;
2551 if (operand_equal_p (top, bot, 0))
2552 return build_int_cst (type, 1);
2554 code = TREE_CODE (top);
2558 mby = TREE_OPERAND (top, 1);
2559 if (TREE_CODE (mby) != INTEGER_CST)
2562 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2566 return fold_binary_to_constant (MULT_EXPR, type, res,
2567 fold_convert (type, mby));
2571 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2574 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2578 return fold_binary_to_constant (code, type, p0, p1);
2581 if (TREE_CODE (bot) != INTEGER_CST)
2584 bot = fold_convert (type, bot);
2585 top = fold_convert (type, top);
2587 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2588 the result afterwards. */
2589 if (tree_int_cst_sign_bit (bot))
2592 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2597 /* Ditto for TOP. */
2598 if (tree_int_cst_sign_bit (top))
2601 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2604 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2607 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2609 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2617 /* Affine combination of trees. We keep track of at most MAX_AFF_ELTS elements
2618 to make things simpler; this is sufficient in most cases. */
2620 #define MAX_AFF_ELTS 8
2622 struct affine_tree_combination
2624 /* Type of the result of the combination. */
2627 /* Mask modulo that the operations are performed. */
2628 unsigned HOST_WIDE_INT mask;
2630 /* Constant offset. */
2631 unsigned HOST_WIDE_INT offset;
2633 /* Number of elements of the combination. */
2636 /* Elements and their coefficients. */
2637 tree elts[MAX_AFF_ELTS];
2638 unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS];
2640 /* Remainder of the expression. */
2644 /* Sets COMB to CST. */
2647 aff_combination_const (struct affine_tree_combination *comb, tree type,
2648 unsigned HOST_WIDE_INT cst)
2650 unsigned prec = TYPE_PRECISION (type);
2653 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2656 comb->rest = NULL_TREE;
2657 comb->offset = cst & comb->mask;
2660 /* Sets COMB to single element ELT. */
2663 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2665 unsigned prec = TYPE_PRECISION (type);
2668 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2671 comb->elts[0] = elt;
2673 comb->rest = NULL_TREE;
2677 /* Scales COMB by SCALE. */
2680 aff_combination_scale (struct affine_tree_combination *comb,
2681 unsigned HOST_WIDE_INT scale)
2690 aff_combination_const (comb, comb->type, 0);
2694 comb->offset = (scale * comb->offset) & comb->mask;
2695 for (i = 0, j = 0; i < comb->n; i++)
2697 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2698 comb->elts[j] = comb->elts[i];
2699 if (comb->coefs[j] != 0)
2706 if (comb->n < MAX_AFF_ELTS)
2708 comb->coefs[comb->n] = scale;
2709 comb->elts[comb->n] = comb->rest;
2710 comb->rest = NULL_TREE;
2714 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2715 build_int_cst_type (comb->type, scale));
2719 /* Adds ELT * SCALE to COMB. */
2722 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2723 unsigned HOST_WIDE_INT scale)
2730 for (i = 0; i < comb->n; i++)
2731 if (operand_equal_p (comb->elts[i], elt, 0))
2733 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2738 comb->coefs[i] = comb->coefs[comb->n];
2739 comb->elts[i] = comb->elts[comb->n];
2742 if (comb->n < MAX_AFF_ELTS)
2744 comb->coefs[comb->n] = scale;
2745 comb->elts[comb->n] = elt;
2751 elt = fold_convert (comb->type, elt);
2753 elt = fold_build2 (MULT_EXPR, comb->type,
2754 fold_convert (comb->type, elt),
2755 build_int_cst_type (comb->type, scale));
2758 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2763 /* Adds COMB2 to COMB1. */
2766 aff_combination_add (struct affine_tree_combination *comb1,
2767 struct affine_tree_combination *comb2)
2771 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2772 for (i = 0; i < comb2-> n; i++)
2773 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2775 aff_combination_add_elt (comb1, comb2->rest, 1);
2778 /* Splits EXPR into an affine combination of parts. */
2781 tree_to_aff_combination (tree expr, tree type,
2782 struct affine_tree_combination *comb)
2784 struct affine_tree_combination tmp;
2785 enum tree_code code;
2786 tree cst, core, toffset;
2787 HOST_WIDE_INT bitpos, bitsize;
2788 enum machine_mode mode;
2789 int unsignedp, volatilep;
2793 code = TREE_CODE (expr);
2797 aff_combination_const (comb, type, int_cst_value (expr));
2802 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2803 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2804 if (code == MINUS_EXPR)
2805 aff_combination_scale (&tmp, -1);
2806 aff_combination_add (comb, &tmp);
2810 cst = TREE_OPERAND (expr, 1);
2811 if (TREE_CODE (cst) != INTEGER_CST)
2813 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2814 aff_combination_scale (comb, int_cst_value (cst));
2818 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2819 aff_combination_scale (comb, -1);
2823 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2824 &toffset, &mode, &unsignedp, &volatilep,
2826 if (bitpos % BITS_PER_UNIT != 0)
2828 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2829 core = build_fold_addr_expr (core);
2830 if (TREE_CODE (core) == ADDR_EXPR)
2831 aff_combination_add_elt (comb, core, 1);
2834 tree_to_aff_combination (core, type, &tmp);
2835 aff_combination_add (comb, &tmp);
2839 tree_to_aff_combination (toffset, type, &tmp);
2840 aff_combination_add (comb, &tmp);
2848 aff_combination_elt (comb, type, expr);
2851 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2854 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2855 unsigned HOST_WIDE_INT mask)
2857 enum tree_code code;
2860 elt = fold_convert (type, elt);
2867 return fold_build2 (PLUS_EXPR, type, expr, elt);
2873 return fold_build1 (NEGATE_EXPR, type, elt);
2875 return fold_build2 (MINUS_EXPR, type, expr, elt);
2879 return fold_build2 (MULT_EXPR, type, elt,
2880 build_int_cst_type (type, scale));
2882 if ((scale | (mask >> 1)) == mask)
2884 /* Scale is negative. */
2886 scale = (-scale) & mask;
2891 elt = fold_build2 (MULT_EXPR, type, elt,
2892 build_int_cst_type (type, scale));
2893 return fold_build2 (code, type, expr, elt);
2896 /* Makes tree from the affine combination COMB. */
2899 aff_combination_to_tree (struct affine_tree_combination *comb)
2901 tree type = comb->type;
2902 tree expr = comb->rest;
2904 unsigned HOST_WIDE_INT off, sgn;
2906 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2908 for (i = 0; i < comb->n; i++)
2909 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2912 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2914 /* Offset is negative. */
2915 off = (-comb->offset) & comb->mask;
2923 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2927 /* Folds X + RATIO * Y in TYPE. */
2930 fold_affine_sum (tree type, tree x, tree y, HOST_WIDE_INT ratio)
2932 enum tree_code code;
2934 struct affine_tree_combination cx, cy;
2936 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2939 return fold_build2 (PLUS_EXPR, type, x, y);
2941 return fold_build2 (MINUS_EXPR, type, x, y);
2951 cst = build_int_cst_type (type, ratio);
2952 y = fold_build2 (MULT_EXPR, type, y, cst);
2953 return fold_build2 (code, type, x, y);
2956 tree_to_aff_combination (x, type, &cx);
2957 tree_to_aff_combination (y, type, &cy);
2958 aff_combination_scale (&cy, ratio);
2959 aff_combination_add (&cx, &cy);
2961 return aff_combination_to_tree (&cx);
2964 /* Determines the expression by that USE is expressed from induction variable
2965 CAND at statement AT in LOOP. */
2968 get_computation_at (struct loop *loop,
2969 struct iv_use *use, struct iv_cand *cand, tree at)
2971 tree ubase = use->iv->base;
2972 tree ustep = use->iv->step;
2973 tree cbase = cand->iv->base;
2974 tree cstep = cand->iv->step;
2975 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2979 unsigned HOST_WIDE_INT ustepi, cstepi;
2980 HOST_WIDE_INT ratioi;
2982 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2984 /* We do not have a precision to express the values of use. */
2988 expr = var_at_stmt (loop, cand, at);
2990 if (TREE_TYPE (expr) != ctype)
2992 /* This may happen with the original ivs. */
2993 expr = fold_convert (ctype, expr);
2996 if (TYPE_UNSIGNED (utype))
3000 uutype = unsigned_type_for (utype);
3001 ubase = fold_convert (uutype, ubase);
3002 ustep = fold_convert (uutype, ustep);
3005 if (uutype != ctype)
3007 expr = fold_convert (uutype, expr);
3008 cbase = fold_convert (uutype, cbase);
3009 cstep = fold_convert (uutype, cstep);
3012 if (cst_and_fits_in_hwi (cstep)
3013 && cst_and_fits_in_hwi (ustep))
3015 ustepi = int_cst_value (ustep);
3016 cstepi = int_cst_value (cstep);
3018 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3020 /* TODO maybe consider case when ustep divides cstep and the ratio is
3021 a power of 2 (so that the division is fast to execute)? We would
3022 need to be much more careful with overflows etc. then. */
3026 ratio = build_int_cst_type (uutype, ratioi);
3030 ratio = constant_multiple_of (uutype, ustep, cstep);
3034 /* Ratioi is only used to detect special cases when the multiplicative
3035 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3036 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3037 to integer_onep/integer_all_onesp, since the former ignores
3039 if (cst_and_fits_in_hwi (ratio))
3040 ratioi = int_cst_value (ratio);
3041 else if (integer_onep (ratio))
3043 else if (integer_all_onesp (ratio))
3049 /* We may need to shift the value if we are after the increment. */
3050 if (stmt_after_increment (loop, cand, at))
3051 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
3053 /* use = ubase - ratio * cbase + ratio * var.
3055 In general case ubase + ratio * (var - cbase) could be better (one less
3056 multiplication), but often it is possible to eliminate redundant parts
3057 of computations from (ubase - ratio * cbase) term, and if it does not
3058 happen, fold is able to apply the distributive law to obtain this form
3063 delta = fold_affine_sum (uutype, ubase, cbase, -1);
3064 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3066 else if (ratioi == -1)
3068 delta = fold_affine_sum (uutype, ubase, cbase, 1);
3069 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3074 delta = fold_affine_sum (uutype, ubase, cbase, -ratioi);
3077 delta = fold_build2 (MULT_EXPR, uutype, ratio, cbase);
3078 delta = fold_affine_sum (uutype, ubase, delta, -1);
3080 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3081 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3084 return fold_convert (utype, expr);
3087 /* Determines the expression by that USE is expressed from induction variable
3091 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3093 return get_computation_at (loop, use, cand, use->stmt);
3096 /* Returns cost of addition in MODE. */
3099 add_cost (enum machine_mode mode)
3101 static unsigned costs[NUM_MACHINE_MODES];
3109 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3110 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
3111 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
3116 cost = seq_cost (seq);
3122 if (dump_file && (dump_flags & TDF_DETAILS))
3123 fprintf (dump_file, "Addition in %s costs %d\n",
3124 GET_MODE_NAME (mode), cost);
3128 /* Entry in a hashtable of already known costs for multiplication. */
3131 HOST_WIDE_INT cst; /* The constant to multiply by. */
3132 enum machine_mode mode; /* In mode. */
3133 unsigned cost; /* The cost. */
3136 /* Counts hash value for the ENTRY. */
3139 mbc_entry_hash (const void *entry)
3141 const struct mbc_entry *e = entry;
3143 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3146 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3149 mbc_entry_eq (const void *entry1, const void *entry2)
3151 const struct mbc_entry *e1 = entry1;
3152 const struct mbc_entry *e2 = entry2;
3154 return (e1->mode == e2->mode
3155 && e1->cst == e2->cst);
3158 /* Returns cost of multiplication by constant CST in MODE. */
3161 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3163 static htab_t costs;
3164 struct mbc_entry **cached, act;
3169 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3173 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3175 return (*cached)->cost;
3177 *cached = xmalloc (sizeof (struct mbc_entry));
3178 (*cached)->mode = mode;
3179 (*cached)->cst = cst;
3182 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
3187 cost = seq_cost (seq);
3189 if (dump_file && (dump_flags & TDF_DETAILS))
3190 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3191 (int) cst, GET_MODE_NAME (mode), cost);
3193 (*cached)->cost = cost;
3198 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3199 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3200 variable is omitted. The created memory accesses MODE.
3202 TODO -- there must be some better way. This all is quite crude. */
3205 get_address_cost (bool symbol_present, bool var_present,
3206 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3208 #define MAX_RATIO 128
3209 static sbitmap valid_mult;
3210 static HOST_WIDE_INT rat, off;
3211 static HOST_WIDE_INT min_offset, max_offset;
3212 static unsigned costs[2][2][2][2];
3213 unsigned cost, acost;
3214 rtx seq, addr, base;
3215 bool offset_p, ratio_p;
3217 HOST_WIDE_INT s_offset;
3218 unsigned HOST_WIDE_INT mask;
3225 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3227 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3228 for (i = 1; i <= 1 << 20; i <<= 1)
3230 XEXP (addr, 1) = GEN_INT (i);
3231 if (!memory_address_p (Pmode, addr))
3234 max_offset = i >> 1;
3237 for (i = 1; i <= 1 << 20; i <<= 1)
3239 XEXP (addr, 1) = GEN_INT (-i);
3240 if (!memory_address_p (Pmode, addr))
3243 min_offset = -(i >> 1);
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3247 fprintf (dump_file, "get_address_cost:\n");
3248 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3249 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3252 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3253 sbitmap_zero (valid_mult);
3255 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3256 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3258 XEXP (addr, 1) = GEN_INT (i);
3259 if (memory_address_p (Pmode, addr))
3261 SET_BIT (valid_mult, i + MAX_RATIO);
3266 if (dump_file && (dump_flags & TDF_DETAILS))
3268 fprintf (dump_file, " allowed multipliers:");
3269 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3270 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3271 fprintf (dump_file, " %d", (int) i);
3272 fprintf (dump_file, "\n");
3273 fprintf (dump_file, "\n");
3277 bits = GET_MODE_BITSIZE (Pmode);
3278 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3280 if ((offset >> (bits - 1) & 1))
3285 offset_p = (s_offset != 0
3286 && min_offset <= s_offset && s_offset <= max_offset);
3287 ratio_p = (ratio != 1
3288 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
3289 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
3291 if (ratio != 1 && !ratio_p)
3292 cost += multiply_by_cost (ratio, Pmode);
3294 if (s_offset && !offset_p && !symbol_present)
3296 cost += add_cost (Pmode);
3300 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3305 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3306 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
3308 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
3311 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3315 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3317 base = gen_rtx_fmt_e (CONST, Pmode,
3318 gen_rtx_fmt_ee (PLUS, Pmode,
3323 base = GEN_INT (off);
3328 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3331 addr = memory_address (Pmode, addr);
3335 acost = seq_cost (seq);
3336 acost += address_cost (addr, Pmode);
3340 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3343 return cost + acost;
3345 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3346 invariants the computation depends on. */
3349 force_var_cost (struct ivopts_data *data,
3350 tree expr, bitmap *depends_on)
3352 static bool costs_initialized = false;
3353 static unsigned integer_cost;
3354 static unsigned symbol_cost;
3355 static unsigned address_cost;
3357 unsigned cost0, cost1, cost;
3358 enum machine_mode mode;
3360 if (!costs_initialized)
3362 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3363 rtx x = gen_rtx_MEM (DECL_MODE (var),
3364 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3366 tree type = build_pointer_type (integer_type_node);
3368 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3371 SET_DECL_RTL (var, x);
3372 TREE_STATIC (var) = 1;
3373 addr = build1 (ADDR_EXPR, type, var);
3374 symbol_cost = computation_cost (addr) + 1;
3377 = computation_cost (build2 (PLUS_EXPR, type,
3379 build_int_cst_type (type, 2000))) + 1;
3380 if (dump_file && (dump_flags & TDF_DETAILS))
3382 fprintf (dump_file, "force_var_cost:\n");
3383 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3384 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3385 fprintf (dump_file, " address %d\n", (int) address_cost);
3386 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3387 fprintf (dump_file, "\n");
3390 costs_initialized = true;
3397 fd_ivopts_data = data;
3398 walk_tree (&expr, find_depends, depends_on, NULL);
3401 if (SSA_VAR_P (expr))
3404 if (TREE_INVARIANT (expr))
3406 if (TREE_CODE (expr) == INTEGER_CST)
3407 return integer_cost;
3409 if (TREE_CODE (expr) == ADDR_EXPR)
3411 tree obj = TREE_OPERAND (expr, 0);
3413 if (TREE_CODE (obj) == VAR_DECL
3414 || TREE_CODE (obj) == PARM_DECL
3415 || TREE_CODE (obj) == RESULT_DECL)
3419 return address_cost;
3422 switch (TREE_CODE (expr))
3427 op0 = TREE_OPERAND (expr, 0);
3428 op1 = TREE_OPERAND (expr, 1);
3432 if (is_gimple_val (op0))
3435 cost0 = force_var_cost (data, op0, NULL);
3437 if (is_gimple_val (op1))
3440 cost1 = force_var_cost (data, op1, NULL);
3445 /* Just an arbitrary value, FIXME. */
3446 return target_spill_cost;
3449 mode = TYPE_MODE (TREE_TYPE (expr));
3450 switch (TREE_CODE (expr))
3454 cost = add_cost (mode);
3458 if (cst_and_fits_in_hwi (op0))
3459 cost = multiply_by_cost (int_cst_value (op0), mode);
3460 else if (cst_and_fits_in_hwi (op1))
3461 cost = multiply_by_cost (int_cst_value (op1), mode);
3463 return target_spill_cost;
3473 /* Bound the cost by target_spill_cost. The parts of complicated
3474 computations often are either loop invariant or at least can
3475 be shared between several iv uses, so letting this grow without
3476 limits would not give reasonable results. */
3477 return cost < target_spill_cost ? cost : target_spill_cost;
3480 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3481 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3482 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3483 invariants the computation depends on. */
3486 split_address_cost (struct ivopts_data *data,
3487 tree addr, bool *symbol_present, bool *var_present,
3488 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3491 HOST_WIDE_INT bitsize;
3492 HOST_WIDE_INT bitpos;
3494 enum machine_mode mode;
3495 int unsignedp, volatilep;
3497 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3498 &unsignedp, &volatilep, false);
3501 || bitpos % BITS_PER_UNIT != 0
3502 || TREE_CODE (core) != VAR_DECL)
3504 *symbol_present = false;
3505 *var_present = true;
3506 fd_ivopts_data = data;
3507 walk_tree (&addr, find_depends, depends_on, NULL);
3508 return target_spill_cost;
3511 *offset += bitpos / BITS_PER_UNIT;
3512 if (TREE_STATIC (core)
3513 || DECL_EXTERNAL (core))
3515 *symbol_present = true;
3516 *var_present = false;
3520 *symbol_present = false;
3521 *var_present = true;
3525 /* Estimates cost of expressing difference of addresses E1 - E2 as
3526 var + symbol + offset. The value of offset is added to OFFSET,
3527 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3528 part is missing. DEPENDS_ON is a set of the invariants the computation
3532 ptr_difference_cost (struct ivopts_data *data,
3533 tree e1, tree e2, bool *symbol_present, bool *var_present,
3534 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3536 HOST_WIDE_INT diff = 0;
3539 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3541 if (ptr_difference_const (e1, e2, &diff))
3544 *symbol_present = false;
3545 *var_present = false;
3549 if (e2 == integer_zero_node)
3550 return split_address_cost (data, TREE_OPERAND (e1, 0),
3551 symbol_present, var_present, offset, depends_on);
3553 *symbol_present = false;
3554 *var_present = true;
3556 cost = force_var_cost (data, e1, depends_on);
3557 cost += force_var_cost (data, e2, depends_on);
3558 cost += add_cost (Pmode);
3563 /* Estimates cost of expressing difference E1 - E2 as
3564 var + symbol + offset. The value of offset is added to OFFSET,
3565 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3566 part is missing. DEPENDS_ON is a set of the invariants the computation
3570 difference_cost (struct ivopts_data *data,
3571 tree e1, tree e2, bool *symbol_present, bool *var_present,
3572 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3575 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3576 unsigned HOST_WIDE_INT off1, off2;
3578 e1 = strip_offset (e1, &off1);
3579 e2 = strip_offset (e2, &off2);
3580 *offset += off1 - off2;
3585 if (TREE_CODE (e1) == ADDR_EXPR)
3586 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3588 *symbol_present = false;
3590 if (operand_equal_p (e1, e2, 0))
3592 *var_present = false;
3595 *var_present = true;
3597 return force_var_cost (data, e1, depends_on);
3601 cost = force_var_cost (data, e2, depends_on);
3602 cost += multiply_by_cost (-1, mode);
3607 cost = force_var_cost (data, e1, depends_on);
3608 cost += force_var_cost (data, e2, depends_on);
3609 cost += add_cost (mode);
3614 /* Determines the cost of the computation by that USE is expressed
3615 from induction variable CAND. If ADDRESS_P is true, we just need
3616 to create an address from it, otherwise we want to get it into
3617 register. A set of invariants we depend on is stored in
3618 DEPENDS_ON. AT is the statement at that the value is computed. */
3621 get_computation_cost_at (struct ivopts_data *data,
3622 struct iv_use *use, struct iv_cand *cand,
3623 bool address_p, bitmap *depends_on, tree at)
3625 tree ubase = use->iv->base, ustep = use->iv->step;
3627 tree utype = TREE_TYPE (ubase), ctype;
3628 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3629 HOST_WIDE_INT ratio, aratio;
3630 bool var_present, symbol_present;
3631 unsigned cost = 0, n_sums;
3635 /* Only consider real candidates. */
3639 cbase = cand->iv->base;
3640 cstep = cand->iv->step;
3641 ctype = TREE_TYPE (cbase);
3643 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3645 /* We do not have a precision to express the values of use. */
3651 /* Do not try to express address of an object with computation based
3652 on address of a different object. This may cause problems in rtl
3653 level alias analysis (that does not expect this to be happening,
3654 as this is illegal in C), and would be unlikely to be useful
3656 if (use->iv->base_object
3657 && cand->iv->base_object
3658 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3662 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3664 /* TODO -- add direct handling of this case. */
3668 /* CSTEPI is removed from the offset in case statement is after the
3669 increment. If the step is not constant, we use zero instead.
3670 This is a bit imprecise (there is the extra addition), but
3671 redundancy elimination is likely to transform the code so that
3672 it uses value of the variable before increment anyway,
3673 so it is not that much unrealistic. */
3674 if (cst_and_fits_in_hwi (cstep))
3675 cstepi = int_cst_value (cstep);
3679 if (cst_and_fits_in_hwi (ustep)
3680 && cst_and_fits_in_hwi (cstep))
3682 ustepi = int_cst_value (ustep);
3684 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3691 rat = constant_multiple_of (utype, ustep, cstep);
3696 if (cst_and_fits_in_hwi (rat))
3697 ratio = int_cst_value (rat);
3698 else if (integer_onep (rat))
3700 else if (integer_all_onesp (rat))
3706 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3707 or ratio == 1, it is better to handle this like
3709 ubase - ratio * cbase + ratio * var
3711 (also holds in the case ratio == -1, TODO. */
3713 if (cst_and_fits_in_hwi (cbase))
3715 offset = - ratio * int_cst_value (cbase);
3716 cost += difference_cost (data,
3717 ubase, integer_zero_node,
3718 &symbol_present, &var_present, &offset,
3721 else if (ratio == 1)
3723 cost += difference_cost (data,
3725 &symbol_present, &var_present, &offset,
3730 cost += force_var_cost (data, cbase, depends_on);
3731 cost += add_cost (TYPE_MODE (ctype));
3732 cost += difference_cost (data,
3733 ubase, integer_zero_node,
3734 &symbol_present, &var_present, &offset,
3738 /* If we are after the increment, the value of the candidate is higher by
3740 if (stmt_after_increment (data->current_loop, cand, at))
3741 offset -= ratio * cstepi;
3743 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3744 (symbol/var/const parts may be omitted). If we are looking for an address,
3745 find the cost of addressing this. */
3747 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3749 /* Otherwise estimate the costs for computing the expression. */
3750 aratio = ratio > 0 ? ratio : -ratio;
3751 if (!symbol_present && !var_present && !offset)
3754 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3760 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3764 /* Symbol + offset should be compile-time computable. */
3765 && (symbol_present || offset))
3768 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3772 /* Just get the expression, expand it and measure the cost. */
3773 tree comp = get_computation_at (data->current_loop, use, cand, at);
3779 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3781 return computation_cost (comp);
3785 /* Determines the cost of the computation by that USE is expressed
3786 from induction variable CAND. If ADDRESS_P is true, we just need
3787 to create an address from it, otherwise we want to get it into
3788 register. A set of invariants we depend on is stored in
3792 get_computation_cost (struct ivopts_data *data,
3793 struct iv_use *use, struct iv_cand *cand,
3794 bool address_p, bitmap *depends_on)
3796 return get_computation_cost_at (data,
3797 use, cand, address_p, depends_on, use->stmt);
3800 /* Determines cost of basing replacement of USE on CAND in a generic
3804 determine_use_iv_cost_generic (struct ivopts_data *data,
3805 struct iv_use *use, struct iv_cand *cand)
3810 /* The simple case first -- if we need to express value of the preserved
3811 original biv, the cost is 0. This also prevents us from counting the
3812 cost of increment twice -- once at this use and once in the cost of
3814 if (cand->pos == IP_ORIGINAL
3815 && cand->incremented_at == use->stmt)
3817 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3821 cost = get_computation_cost (data, use, cand, false, &depends_on);
3822 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3824 return cost != INFTY;
3827 /* Determines cost of basing replacement of USE on CAND in an address. */
3830 determine_use_iv_cost_address (struct ivopts_data *data,
3831 struct iv_use *use, struct iv_cand *cand)
3834 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3836 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3838 return cost != INFTY;
3841 /* Computes value of induction variable IV in iteration NITER. */
3844 iv_value (struct iv *iv, tree niter)
3847 tree type = TREE_TYPE (iv->base);
3849 niter = fold_convert (type, niter);
3850 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3852 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3855 /* Computes value of candidate CAND at position AT in iteration NITER. */
3858 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3860 tree val = iv_value (cand->iv, niter);
3861 tree type = TREE_TYPE (cand->iv->base);
3863 if (stmt_after_increment (loop, cand, at))
3864 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3869 /* Returns period of induction variable iv. */
3872 iv_period (struct iv *iv)
3874 tree step = iv->step, period, type;
3877 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3879 /* Period of the iv is gcd (step, type range). Since type range is power
3880 of two, it suffices to determine the maximum power of two that divides
3882 pow2div = num_ending_zeros (step);
3883 type = unsigned_type_for (TREE_TYPE (step));
3885 period = build_low_bits_mask (type,
3886 (TYPE_PRECISION (type)
3887 - tree_low_cst (pow2div, 1)));
3892 /* Returns the comparison operator used when eliminating the iv USE. */
3894 static enum tree_code
3895 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3897 struct loop *loop = data->current_loop;
3901 ex_bb = bb_for_stmt (use->stmt);
3902 exit = EDGE_SUCC (ex_bb, 0);
3903 if (flow_bb_inside_loop_p (loop, exit->dest))
3904 exit = EDGE_SUCC (ex_bb, 1);
3906 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3909 /* Check whether it is possible to express the condition in USE by comparison
3910 of candidate CAND. If so, store the value compared with to BOUND. */
3913 may_eliminate_iv (struct ivopts_data *data,
3914 struct iv_use *use, struct iv_cand *cand, tree *bound)
3918 struct tree_niter_desc *niter;
3920 tree wider_type, period, per_type;
3921 struct loop *loop = data->current_loop;
3923 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3926 /* For now works only for exits that dominate the loop latch. TODO -- extend
3927 for other conditions inside loop body. */
3928 ex_bb = bb_for_stmt (use->stmt);
3929 if (use->stmt != last_stmt (ex_bb)
3930 || TREE_CODE (use->stmt) != COND_EXPR)
3932 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3935 exit = EDGE_SUCC (ex_bb, 0);
3936 if (flow_bb_inside_loop_p (loop, exit->dest))
3937 exit = EDGE_SUCC (ex_bb, 1);
3938 if (flow_bb_inside_loop_p (loop, exit->dest))
3941 niter = niter_for_exit (data, exit);
3943 || !zero_p (niter->may_be_zero))
3947 nit_type = TREE_TYPE (nit);
3949 /* Determine whether we may use the variable to test whether niter iterations
3950 elapsed. This is the case iff the period of the induction variable is
3951 greater than the number of iterations. */
3952 period = iv_period (cand->iv);
3955 per_type = TREE_TYPE (period);
3957 wider_type = TREE_TYPE (period);
3958 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3959 wider_type = per_type;
3961 wider_type = nit_type;
3963 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3964 fold_convert (wider_type, period),
3965 fold_convert (wider_type, nit)))))
3968 *bound = cand_value_at (loop, cand, use->stmt, nit);
3972 /* Determines cost of basing replacement of USE on CAND in a condition. */
3975 determine_use_iv_cost_condition (struct ivopts_data *data,
3976 struct iv_use *use, struct iv_cand *cand)
3978 tree bound = NULL_TREE, op, cond;
3979 bitmap depends_on = NULL;
3982 /* Only consider real candidates. */
3985 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3989 if (may_eliminate_iv (data, use, cand, &bound))
3991 cost = force_var_cost (data, bound, &depends_on);
3993 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3994 return cost != INFTY;
3997 /* The induction variable elimination failed; just express the original
3998 giv. If it is compared with an invariant, note that we cannot get
4000 cost = get_computation_cost (data, use, cand, false, &depends_on);
4003 if (TREE_CODE (cond) != SSA_NAME)
4005 op = TREE_OPERAND (cond, 0);
4006 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4007 op = TREE_OPERAND (cond, 1);
4008 if (TREE_CODE (op) == SSA_NAME)
4010 op = get_iv (data, op)->base;
4011 fd_ivopts_data = data;
4012 walk_tree (&op, find_depends, &depends_on, NULL);
4016 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4017 return cost != INFTY;
4020 /* Checks whether it is possible to replace the final value of USE by
4021 a direct computation. If so, the formula is stored to *VALUE. */
4024 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4027 struct loop *loop = data->current_loop;
4029 struct tree_niter_desc *niter;
4031 exit = single_dom_exit (loop);
4035 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4036 bb_for_stmt (use->stmt)));
4038 niter = niter_for_single_dom_exit (data);
4040 || !zero_p (niter->may_be_zero))
4043 *value = iv_value (use->iv, niter->niter);
4048 /* Determines cost of replacing final value of USE using CAND. */
4051 determine_use_iv_cost_outer (struct ivopts_data *data,
4052 struct iv_use *use, struct iv_cand *cand)
4057 tree value = NULL_TREE;
4058 struct loop *loop = data->current_loop;
4060 /* The simple case first -- if we need to express value of the preserved
4061 original biv, the cost is 0. This also prevents us from counting the
4062 cost of increment twice -- once at this use and once in the cost of
4064 if (cand->pos == IP_ORIGINAL
4065 && cand->incremented_at == use->stmt)
4067 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4073 if (!may_replace_final_value (data, use, &value))
4075 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4080 cost = force_var_cost (data, value, &depends_on);
4082 cost /= AVG_LOOP_NITER (loop);
4084 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4085 return cost != INFTY;
4088 exit = single_dom_exit (loop);
4091 /* If there is just a single exit, we may use value of the candidate
4092 after we take it to determine the value of use. */
4093 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4094 last_stmt (exit->src));
4096 cost /= AVG_LOOP_NITER (loop);
4100 /* Otherwise we just need to compute the iv. */
4101 cost = get_computation_cost (data, use, cand, false, &depends_on);
4104 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4106 return cost != INFTY;
4109 /* Determines cost of basing replacement of USE on CAND. Returns false
4110 if USE cannot be based on CAND. */
4113 determine_use_iv_cost (struct ivopts_data *data,
4114 struct iv_use *use, struct iv_cand *cand)
4118 case USE_NONLINEAR_EXPR:
4119 return determine_use_iv_cost_generic (data, use, cand);
4122 return determine_use_iv_cost_outer (data, use, cand);
4125 return determine_use_iv_cost_address (data, use, cand);
4128 return determine_use_iv_cost_condition (data, use, cand);
4135 /* Determines costs of basing the use of the iv on an iv candidate. */
4138 determine_use_iv_costs (struct ivopts_data *data)
4142 struct iv_cand *cand;
4143 bitmap to_clear = BITMAP_ALLOC (NULL);
4145 alloc_use_cost_map (data);
4147 for (i = 0; i < n_iv_uses (data); i++)
4149 use = iv_use (data, i);
4151 if (data->consider_all_candidates)
4153 for (j = 0; j < n_iv_cands (data); j++)
4155 cand = iv_cand (data, j);
4156 determine_use_iv_cost (data, use, cand);
4163 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4165 cand = iv_cand (data, j);
4166 if (!determine_use_iv_cost (data, use, cand))
4167 bitmap_set_bit (to_clear, j);
4170 /* Remove the candidates for that the cost is infinite from
4171 the list of related candidates. */
4172 bitmap_and_compl_into (use->related_cands, to_clear);
4173 bitmap_clear (to_clear);
4177 BITMAP_FREE (to_clear);
4179 if (dump_file && (dump_flags & TDF_DETAILS))
4181 fprintf (dump_file, "Use-candidate costs:\n");
4183 for (i = 0; i < n_iv_uses (data); i++)
4185 use = iv_use (data, i);
4187 fprintf (dump_file, "Use %d:\n", i);
4188 fprintf (dump_file, " cand\tcost\tdepends on\n");
4189 for (j = 0; j < use->n_map_members; j++)
4191 if (!use->cost_map[j].cand
4192 || use->cost_map[j].cost == INFTY)
4195 fprintf (dump_file, " %d\t%d\t",
4196 use->cost_map[j].cand->id,
4197 use->cost_map[j].cost);
4198 if (use->cost_map[j].depends_on)
4199 bitmap_print (dump_file,
4200 use->cost_map[j].depends_on, "","");
4201 fprintf (dump_file, "\n");
4204 fprintf (dump_file, "\n");
4206 fprintf (dump_file, "\n");
4210 /* Determines cost of the candidate CAND. */
4213 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4215 unsigned cost_base, cost_step;
4224 /* There are two costs associated with the candidate -- its increment
4225 and its initialization. The second is almost negligible for any loop
4226 that rolls enough, so we take it just very little into account. */
4228 base = cand->iv->base;
4229 cost_base = force_var_cost (data, base, NULL);
4230 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4232 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4234 /* Prefer the original iv unless we may gain something by replacing it;
4235 this is not really relevant for artificial ivs created by other
4237 if (cand->pos == IP_ORIGINAL
4238 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4241 /* Prefer not to insert statements into latch unless there are some
4242 already (so that we do not create unnecessary jumps). */
4243 if (cand->pos == IP_END
4244 && empty_block_p (ip_end_pos (data->current_loop)))
4248 /* Determines costs of computation of the candidates. */
4251 determine_iv_costs (struct ivopts_data *data)
4255 if (dump_file && (dump_flags & TDF_DETAILS))
4257 fprintf (dump_file, "Candidate costs:\n");
4258 fprintf (dump_file, " cand\tcost\n");
4261 for (i = 0; i < n_iv_cands (data); i++)
4263 struct iv_cand *cand = iv_cand (data, i);
4265 determine_iv_cost (data, cand);
4267 if (dump_file && (dump_flags & TDF_DETAILS))
4268 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4271 if (dump_file && (dump_flags & TDF_DETAILS))
4272 fprintf (dump_file, "\n");
4275 /* Calculates cost for having SIZE induction variables. */
4278 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4280 return global_cost_for_size (size,
4281 loop_data (data->current_loop)->regs_used,
4285 /* For each size of the induction variable set determine the penalty. */
4288 determine_set_costs (struct ivopts_data *data)
4292 struct loop *loop = data->current_loop;
4295 /* We use the following model (definitely improvable, especially the
4296 cost function -- TODO):
4298 We estimate the number of registers available (using MD data), name it A.
4300 We estimate the number of registers used by the loop, name it U. This
4301 number is obtained as the number of loop phi nodes (not counting virtual
4302 registers and bivs) + the number of variables from outside of the loop.
4304 We set a reserve R (free regs that are used for temporary computations,
4305 etc.). For now the reserve is a constant 3.
4307 Let I be the number of induction variables.
4309 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4310 make a lot of ivs without a reason).
4311 -- if A - R < U + I <= A, the cost is I * PRES_COST
4312 -- if U + I > A, the cost is I * PRES_COST and
4313 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4315 if (dump_file && (dump_flags & TDF_DETAILS))
4317 fprintf (dump_file, "Global costs:\n");
4318 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4319 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4320 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4321 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4325 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4327 op = PHI_RESULT (phi);
4329 if (!is_gimple_reg (op))
4332 if (get_iv (data, op))
4338 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4340 struct version_info *info = ver_info (data, j);
4342 if (info->inv_id && info->has_nonlin_use)
4346 loop_data (loop)->regs_used = n;
4347 if (dump_file && (dump_flags & TDF_DETAILS))
4348 fprintf (dump_file, " regs_used %d\n", n);
4350 if (dump_file && (dump_flags & TDF_DETAILS))
4352 fprintf (dump_file, " cost for size:\n");
4353 fprintf (dump_file, " ivs\tcost\n");
4354 for (j = 0; j <= 2 * target_avail_regs; j++)
4355 fprintf (dump_file, " %d\t%d\n", j,
4356 ivopts_global_cost_for_size (data, j));
4357 fprintf (dump_file, "\n");
4361 /* Returns true if A is a cheaper cost pair than B. */
4364 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4372 if (a->cost < b->cost)
4375 if (a->cost > b->cost)
4378 /* In case the costs are the same, prefer the cheaper candidate. */
4379 if (a->cand->cost < b->cand->cost)
4385 /* Computes the cost field of IVS structure. */
4388 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4392 cost += ivs->cand_use_cost;
4393 cost += ivs->cand_cost;
4394 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4399 /* Remove invariants in set INVS to set IVS. */
4402 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4410 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4412 ivs->n_invariant_uses[iid]--;
4413 if (ivs->n_invariant_uses[iid] == 0)
4418 /* Set USE not to be expressed by any candidate in IVS. */
4421 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4424 unsigned uid = use->id, cid;
4425 struct cost_pair *cp;
4427 cp = ivs->cand_for_use[uid];
4433 ivs->cand_for_use[uid] = NULL;
4434 ivs->n_cand_uses[cid]--;
4436 if (ivs->n_cand_uses[cid] == 0)
4438 bitmap_clear_bit (ivs->cands, cid);
4439 /* Do not count the pseudocandidates. */
4443 ivs->cand_cost -= cp->cand->cost;
4445 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4448 ivs->cand_use_cost -= cp->cost;
4450 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4451 iv_ca_recount_cost (data, ivs);
4454 /* Add invariants in set INVS to set IVS. */
4457 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4465 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4467 ivs->n_invariant_uses[iid]++;
4468 if (ivs->n_invariant_uses[iid] == 1)
4473 /* Set cost pair for USE in set IVS to CP. */
4476 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4477 struct iv_use *use, struct cost_pair *cp)
4479 unsigned uid = use->id, cid;
4481 if (ivs->cand_for_use[uid] == cp)
4484 if (ivs->cand_for_use[uid])
4485 iv_ca_set_no_cp (data, ivs, use);
4492 ivs->cand_for_use[uid] = cp;
4493 ivs->n_cand_uses[cid]++;
4494 if (ivs->n_cand_uses[cid] == 1)
4496 bitmap_set_bit (ivs->cands, cid);
4497 /* Do not count the pseudocandidates. */
4501 ivs->cand_cost += cp->cand->cost;
4503 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4506 ivs->cand_use_cost += cp->cost;
4507 iv_ca_set_add_invariants (ivs, cp->depends_on);
4508 iv_ca_recount_cost (data, ivs);
4512 /* Extend set IVS by expressing USE by some of the candidates in it
4516 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4519 struct cost_pair *best_cp = NULL, *cp;
4523 gcc_assert (ivs->upto >= use->id);
4525 if (ivs->upto == use->id)
4531 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4533 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4535 if (cheaper_cost_pair (cp, best_cp))
4539 iv_ca_set_cp (data, ivs, use, best_cp);
4542 /* Get cost for assignment IVS. */
4545 iv_ca_cost (struct iv_ca *ivs)
4547 return (ivs->bad_uses ? INFTY : ivs->cost);
4550 /* Returns true if all dependences of CP are among invariants in IVS. */
4553 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4558 if (!cp->depends_on)
4561 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4563 if (ivs->n_invariant_uses[i] == 0)
4570 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4571 it before NEXT_CHANGE. */
4573 static struct iv_ca_delta *
4574 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4575 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4577 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4580 change->old_cp = old_cp;
4581 change->new_cp = new_cp;
4582 change->next_change = next_change;
4587 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4590 static struct iv_ca_delta *
4591 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4593 struct iv_ca_delta *last;
4601 for (last = l1; last->next_change; last = last->next_change)
4603 last->next_change = l2;
4608 /* Returns candidate by that USE is expressed in IVS. */
4610 static struct cost_pair *
4611 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4613 return ivs->cand_for_use[use->id];
4616 /* Reverse the list of changes DELTA, forming the inverse to it. */
4618 static struct iv_ca_delta *
4619 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4621 struct iv_ca_delta *act, *next, *prev = NULL;
4622 struct cost_pair *tmp;
4624 for (act = delta; act; act = next)
4626 next = act->next_change;
4627 act->next_change = prev;
4631 act->old_cp = act->new_cp;
4638 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4639 reverted instead. */
4642 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4643 struct iv_ca_delta *delta, bool forward)
4645 struct cost_pair *from, *to;
4646 struct iv_ca_delta *act;
4649 delta = iv_ca_delta_reverse (delta);
4651 for (act = delta; act; act = act->next_change)
4655 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4656 iv_ca_set_cp (data, ivs, act->use, to);
4660 iv_ca_delta_reverse (delta);
4663 /* Returns true if CAND is used in IVS. */
4666 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4668 return ivs->n_cand_uses[cand->id] > 0;
4671 /* Returns number of induction variable candidates in the set IVS. */
4674 iv_ca_n_cands (struct iv_ca *ivs)
4676 return ivs->n_cands;
4679 /* Free the list of changes DELTA. */
4682 iv_ca_delta_free (struct iv_ca_delta **delta)
4684 struct iv_ca_delta *act, *next;
4686 for (act = *delta; act; act = next)
4688 next = act->next_change;
4695 /* Allocates new iv candidates assignment. */
4697 static struct iv_ca *
4698 iv_ca_new (struct ivopts_data *data)
4700 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4704 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4705 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4706 nw->cands = BITMAP_ALLOC (NULL);
4709 nw->cand_use_cost = 0;
4711 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4717 /* Free memory occupied by the set IVS. */
4720 iv_ca_free (struct iv_ca **ivs)
4722 free ((*ivs)->cand_for_use);
4723 free ((*ivs)->n_cand_uses);
4724 BITMAP_FREE ((*ivs)->cands);
4725 free ((*ivs)->n_invariant_uses);
4730 /* Dumps IVS to FILE. */
4733 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4735 const char *pref = " invariants ";
4738 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4739 bitmap_print (file, ivs->cands, " candidates ","\n");
4741 for (i = 1; i <= data->max_inv_id; i++)
4742 if (ivs->n_invariant_uses[i])
4744 fprintf (file, "%s%d", pref, i);
4747 fprintf (file, "\n");
4750 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4751 new set, and store differences in DELTA. Number of induction variables
4752 in the new set is stored to N_IVS. */
4755 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4756 struct iv_cand *cand, struct iv_ca_delta **delta,
4761 struct cost_pair *old_cp, *new_cp;
4764 for (i = 0; i < ivs->upto; i++)
4766 use = iv_use (data, i);
4767 old_cp = iv_ca_cand_for_use (ivs, use);
4770 && old_cp->cand == cand)
4773 new_cp = get_use_iv_cost (data, use, cand);
4777 if (!iv_ca_has_deps (ivs, new_cp))
4780 if (!cheaper_cost_pair (new_cp, old_cp))
4783 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4786 iv_ca_delta_commit (data, ivs, *delta, true);
4787 cost = iv_ca_cost (ivs);
4789 *n_ivs = iv_ca_n_cands (ivs);
4790 iv_ca_delta_commit (data, ivs, *delta, false);
4795 /* Try narrowing set IVS by removing CAND. Return the cost of
4796 the new set and store the differences in DELTA. */
4799 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4800 struct iv_cand *cand, struct iv_ca_delta **delta)
4804 struct cost_pair *old_cp, *new_cp, *cp;
4806 struct iv_cand *cnd;
4810 for (i = 0; i < n_iv_uses (data); i++)
4812 use = iv_use (data, i);
4814 old_cp = iv_ca_cand_for_use (ivs, use);
4815 if (old_cp->cand != cand)
4820 if (data->consider_all_candidates)
4822 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4827 cnd = iv_cand (data, ci);
4829 cp = get_use_iv_cost (data, use, cnd);
4832 if (!iv_ca_has_deps (ivs, cp))
4835 if (!cheaper_cost_pair (cp, new_cp))
4843 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4848 cnd = iv_cand (data, ci);
4850 cp = get_use_iv_cost (data, use, cnd);
4853 if (!iv_ca_has_deps (ivs, cp))
4856 if (!cheaper_cost_pair (cp, new_cp))
4865 iv_ca_delta_free (delta);
4869 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4872 iv_ca_delta_commit (data, ivs, *delta, true);
4873 cost = iv_ca_cost (ivs);
4874 iv_ca_delta_commit (data, ivs, *delta, false);
4879 /* Try optimizing the set of candidates IVS by removing candidates different
4880 from to EXCEPT_CAND from it. Return cost of the new set, and store
4881 differences in DELTA. */
4884 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4885 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4888 struct iv_ca_delta *act_delta, *best_delta;
4889 unsigned i, best_cost, acost;
4890 struct iv_cand *cand;
4893 best_cost = iv_ca_cost (ivs);
4895 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4897 cand = iv_cand (data, i);
4899 if (cand == except_cand)
4902 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4904 if (acost < best_cost)
4907 iv_ca_delta_free (&best_delta);
4908 best_delta = act_delta;
4911 iv_ca_delta_free (&act_delta);
4920 /* Recurse to possibly remove other unnecessary ivs. */
4921 iv_ca_delta_commit (data, ivs, best_delta, true);
4922 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4923 iv_ca_delta_commit (data, ivs, best_delta, false);
4924 *delta = iv_ca_delta_join (best_delta, *delta);
4928 /* Tries to extend the sets IVS in the best possible way in order
4929 to express the USE. */
4932 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4935 unsigned best_cost, act_cost;
4938 struct iv_cand *cand;
4939 struct iv_ca_delta *best_delta = NULL, *act_delta;
4940 struct cost_pair *cp;
4942 iv_ca_add_use (data, ivs, use);
4943 best_cost = iv_ca_cost (ivs);
4945 cp = iv_ca_cand_for_use (ivs, use);
4948 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4949 iv_ca_set_no_cp (data, ivs, use);
4952 /* First try important candidates. Only if it fails, try the specific ones.
4953 Rationale -- in loops with many variables the best choice often is to use
4954 just one generic biv. If we added here many ivs specific to the uses,
4955 the optimization algorithm later would be likely to get stuck in a local
4956 minimum, thus causing us to create too many ivs. The approach from
4957 few ivs to more seems more likely to be successful -- starting from few
4958 ivs, replacing an expensive use by a specific iv should always be a
4960 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4962 cand = iv_cand (data, i);
4964 if (iv_ca_cand_used_p (ivs, cand))
4967 cp = get_use_iv_cost (data, use, cand);
4971 iv_ca_set_cp (data, ivs, use, cp);
4972 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4973 iv_ca_set_no_cp (data, ivs, use);
4974 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4976 if (act_cost < best_cost)
4978 best_cost = act_cost;
4980 iv_ca_delta_free (&best_delta);
4981 best_delta = act_delta;
4984 iv_ca_delta_free (&act_delta);
4987 if (best_cost == INFTY)
4989 for (i = 0; i < use->n_map_members; i++)
4991 cp = use->cost_map + i;
4996 /* Already tried this. */
4997 if (cand->important)
5000 if (iv_ca_cand_used_p (ivs, cand))
5004 iv_ca_set_cp (data, ivs, use, cp);
5005 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5006 iv_ca_set_no_cp (data, ivs, use);
5007 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5010 if (act_cost < best_cost)
5012 best_cost = act_cost;
5015 iv_ca_delta_free (&best_delta);
5016 best_delta = act_delta;
5019 iv_ca_delta_free (&act_delta);
5023 iv_ca_delta_commit (data, ivs, best_delta, true);
5024 iv_ca_delta_free (&best_delta);
5026 return (best_cost != INFTY);
5029 /* Finds an initial assignment of candidates to uses. */
5031 static struct iv_ca *
5032 get_initial_solution (struct ivopts_data *data)
5034 struct iv_ca *ivs = iv_ca_new (data);
5037 for (i = 0; i < n_iv_uses (data); i++)
5038 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5047 /* Tries to improve set of induction variables IVS. */
5050 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5052 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5053 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5054 struct iv_cand *cand;
5056 /* Try extending the set of induction variables by one. */
5057 for (i = 0; i < n_iv_cands (data); i++)
5059 cand = iv_cand (data, i);
5061 if (iv_ca_cand_used_p (ivs, cand))
5064 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5068 /* If we successfully added the candidate and the set is small enough,
5069 try optimizing it by removing other candidates. */
5070 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5072 iv_ca_delta_commit (data, ivs, act_delta, true);
5073 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5074 iv_ca_delta_commit (data, ivs, act_delta, false);
5075 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5078 if (acost < best_cost)
5081 iv_ca_delta_free (&best_delta);
5082 best_delta = act_delta;
5085 iv_ca_delta_free (&act_delta);
5090 /* Try removing the candidates from the set instead. */
5091 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5093 /* Nothing more we can do. */
5098 iv_ca_delta_commit (data, ivs, best_delta, true);
5099 gcc_assert (best_cost == iv_ca_cost (ivs));
5100 iv_ca_delta_free (&best_delta);
5104 /* Attempts to find the optimal set of induction variables. We do simple
5105 greedy heuristic -- we try to replace at most one candidate in the selected
5106 solution and remove the unused ivs while this improves the cost. */
5108 static struct iv_ca *
5109 find_optimal_iv_set (struct ivopts_data *data)
5115 /* Get the initial solution. */
5116 set = get_initial_solution (data);
5119 if (dump_file && (dump_flags & TDF_DETAILS))
5120 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5124 if (dump_file && (dump_flags & TDF_DETAILS))
5126 fprintf (dump_file, "Initial set of candidates:\n");
5127 iv_ca_dump (data, dump_file, set);
5130 while (try_improve_iv_set (data, set))
5132 if (dump_file && (dump_flags & TDF_DETAILS))
5134 fprintf (dump_file, "Improved to:\n");
5135 iv_ca_dump (data, dump_file, set);
5139 if (dump_file && (dump_flags & TDF_DETAILS))
5140 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5142 for (i = 0; i < n_iv_uses (data); i++)
5144 use = iv_use (data, i);
5145 use->selected = iv_ca_cand_for_use (set, use)->cand;
5151 /* Creates a new induction variable corresponding to CAND. */
5154 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5156 block_stmt_iterator incr_pos;
5166 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5170 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5175 /* Mark that the iv is preserved. */
5176 name_info (data, cand->var_before)->preserve_biv = true;
5177 name_info (data, cand->var_after)->preserve_biv = true;
5179 /* Rewrite the increment so that it uses var_before directly. */
5180 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5185 gimple_add_tmp_var (cand->var_before);
5186 add_referenced_tmp_var (cand->var_before);
5188 base = unshare_expr (cand->iv->base);
5190 create_iv (base, unshare_expr (cand->iv->step),
5191 cand->var_before, data->current_loop,
5192 &incr_pos, after, &cand->var_before, &cand->var_after);
5195 /* Creates new induction variables described in SET. */
5198 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5201 struct iv_cand *cand;
5204 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5206 cand = iv_cand (data, i);
5207 create_new_iv (data, cand);
5211 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5212 is true, remove also the ssa name defined by the statement. */
5215 remove_statement (tree stmt, bool including_defined_name)
5217 if (TREE_CODE (stmt) == PHI_NODE)
5219 if (!including_defined_name)
5221 /* Prevent the ssa name defined by the statement from being removed. */
5222 SET_PHI_RESULT (stmt, NULL);
5224 remove_phi_node (stmt, NULL_TREE);
5228 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5234 /* Rewrites USE (definition of iv used in a nonlinear expression)
5235 using candidate CAND. */
5238 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5239 struct iv_use *use, struct iv_cand *cand)
5242 tree op, stmts, tgt, ass;
5243 block_stmt_iterator bsi, pbsi;
5245 /* An important special case -- if we are asked to express value of
5246 the original iv by itself, just exit; there is no need to
5247 introduce a new computation (that might also need casting the
5248 variable to unsigned and back). */
5249 if (cand->pos == IP_ORIGINAL
5250 && TREE_CODE (use->stmt) == MODIFY_EXPR
5251 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5253 op = TREE_OPERAND (use->stmt, 1);
5255 /* Be a bit careful. In case variable is expressed in some
5256 complicated way, rewrite it so that we may get rid of this
5257 complicated expression. */
5258 if ((TREE_CODE (op) == PLUS_EXPR
5259 || TREE_CODE (op) == MINUS_EXPR)
5260 && TREE_OPERAND (op, 0) == cand->var_before
5261 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5265 comp = unshare_expr (get_computation (data->current_loop,
5267 switch (TREE_CODE (use->stmt))
5270 tgt = PHI_RESULT (use->stmt);
5272 /* If we should keep the biv, do not replace it. */
5273 if (name_info (data, tgt)->preserve_biv)
5276 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5277 while (!bsi_end_p (pbsi)
5278 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5286 tgt = TREE_OPERAND (use->stmt, 0);
5287 bsi = bsi_for_stmt (use->stmt);
5294 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5296 if (TREE_CODE (use->stmt) == PHI_NODE)
5299 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5300 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5301 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5302 remove_statement (use->stmt, false);
5303 SSA_NAME_DEF_STMT (tgt) = ass;
5308 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5309 TREE_OPERAND (use->stmt, 1) = op;
5313 /* Replaces ssa name in index IDX by its basic variable. Callback for
5317 idx_remove_ssa_names (tree base, tree *idx,
5318 void *data ATTRIBUTE_UNUSED)
5322 if (TREE_CODE (*idx) == SSA_NAME)
5323 *idx = SSA_NAME_VAR (*idx);
5325 if (TREE_CODE (base) == ARRAY_REF)
5327 op = &TREE_OPERAND (base, 2);
5329 && TREE_CODE (*op) == SSA_NAME)
5330 *op = SSA_NAME_VAR (*op);
5331 op = &TREE_OPERAND (base, 3);
5333 && TREE_CODE (*op) == SSA_NAME)
5334 *op = SSA_NAME_VAR (*op);
5340 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5343 unshare_and_remove_ssa_names (tree ref)
5345 ref = unshare_expr (ref);
5346 for_each_index (&ref, idx_remove_ssa_names, NULL);
5351 /* Rewrites base of memory access OP with expression WITH in statement
5352 pointed to by BSI. */
5355 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
5357 tree bvar, var, new_name, copy, name;
5360 var = bvar = get_base_address (*op);
5362 if (!var || TREE_CODE (with) != SSA_NAME)
5365 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
5366 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
5367 if (TREE_CODE (var) == INDIRECT_REF)
5368 var = TREE_OPERAND (var, 0);
5369 if (TREE_CODE (var) == SSA_NAME)
5372 var = SSA_NAME_VAR (var);
5374 else if (DECL_P (var))
5379 /* We need to add a memory tag for the variable. But we do not want
5380 to add it to the temporary used for the computations, since this leads
5381 to problems in redundancy elimination when there are common parts
5382 in two computations referring to the different arrays. So we copy
5383 the variable to a new temporary. */
5384 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
5387 new_name = duplicate_ssa_name (name, copy);
5390 tree tag = var_ann (var)->type_mem_tag;
5391 tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
5392 add_referenced_tmp_var (new_ptr);
5394 var_ann (new_ptr)->type_mem_tag = tag;
5396 add_type_alias (new_ptr, var);
5397 new_name = make_ssa_name (new_ptr, copy);
5400 TREE_OPERAND (copy, 0) = new_name;
5401 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
5407 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
5408 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
5410 if (TREE_CODE (*op) == INDIRECT_REF)
5411 orig = REF_ORIGINAL (*op);
5413 orig = unshare_and_remove_ssa_names (*op);
5415 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
5417 /* Record the original reference, for purposes of alias analysis. */
5418 REF_ORIGINAL (*op) = orig;
5420 /* Virtual operands in the original statement may have to be renamed
5421 because of the replacement. */
5422 mark_new_vars_to_rename (bsi_stmt (*bsi));
5425 /* Rewrites USE (address that is an iv) using candidate CAND. */
5428 rewrite_use_address (struct ivopts_data *data,
5429 struct iv_use *use, struct iv_cand *cand)
5431 tree comp = unshare_expr (get_computation (data->current_loop,
5433 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5435 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
5438 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5440 rewrite_address_base (&bsi, use->op_p, op);
5443 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5447 rewrite_use_compare (struct ivopts_data *data,
5448 struct iv_use *use, struct iv_cand *cand)
5451 tree *op_p, cond, op, stmts, bound;
5452 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5453 enum tree_code compare;
5454 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5459 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5460 tree var_type = TREE_TYPE (var);
5462 compare = iv_elimination_compare (data, use);
5463 bound = fold_convert (var_type, bound);
5464 op = force_gimple_operand (unshare_expr (bound), &stmts,
5468 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5470 *use->op_p = build2 (compare, boolean_type_node, var, op);
5471 update_stmt (use->stmt);
5475 /* The induction variable elimination failed; just express the original
5477 comp = unshare_expr (get_computation (data->current_loop, use, cand));
5480 op_p = &TREE_OPERAND (cond, 0);
5481 if (TREE_CODE (*op_p) != SSA_NAME
5482 || zero_p (get_iv (data, *op_p)->step))
5483 op_p = &TREE_OPERAND (cond, 1);
5485 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5487 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5492 /* Ensure that operand *OP_P may be used at the end of EXIT without
5493 violating loop closed ssa form. */
5496 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5499 struct loop *def_loop;
5502 use = USE_FROM_PTR (op_p);
5503 if (TREE_CODE (use) != SSA_NAME)
5506 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5510 def_loop = def_bb->loop_father;
5511 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5514 /* Try finding a phi node that copies the value out of the loop. */
5515 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5516 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5521 /* Create such a phi node. */
5522 tree new_name = duplicate_ssa_name (use, NULL);
5524 phi = create_phi_node (new_name, exit->dest);
5525 SSA_NAME_DEF_STMT (new_name) = phi;
5526 add_phi_arg (phi, use, exit);
5529 SET_USE (op_p, PHI_RESULT (phi));
5532 /* Ensure that operands of STMT may be used at the end of EXIT without
5533 violating loop closed ssa form. */
5536 protect_loop_closed_ssa_form (edge exit, tree stmt)
5539 use_operand_p use_p;
5541 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5542 protect_loop_closed_ssa_form_use (exit, use_p);
5545 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5546 so that they are emitted on the correct place, and so that the loop closed
5547 ssa form is preserved. */
5550 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5552 tree_stmt_iterator tsi;
5553 block_stmt_iterator bsi;
5554 tree phi, stmt, def, next;
5556 if (!single_pred_p (exit->dest))
5557 split_loop_exit_edge (exit);
5559 /* Ensure there is label in exit->dest, so that we can
5561 tree_block_label (exit->dest);
5562 bsi = bsi_after_labels (exit->dest);
5564 if (TREE_CODE (stmts) == STATEMENT_LIST)
5566 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5568 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5569 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5574 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5575 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5581 for (phi = phi_nodes (exit->dest); phi; phi = next)
5583 next = PHI_CHAIN (phi);
5585 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5587 def = PHI_RESULT (phi);
5588 remove_statement (phi, false);
5589 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5591 SSA_NAME_DEF_STMT (def) = stmt;
5592 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5597 /* Rewrites the final value of USE (that is only needed outside of the loop)
5598 using candidate CAND. */
5601 rewrite_use_outer (struct ivopts_data *data,
5602 struct iv_use *use, struct iv_cand *cand)
5605 tree value, op, stmts, tgt;
5608 switch (TREE_CODE (use->stmt))
5611 tgt = PHI_RESULT (use->stmt);
5614 tgt = TREE_OPERAND (use->stmt, 0);
5620 exit = single_dom_exit (data->current_loop);
5626 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5630 value = get_computation_at (data->current_loop,
5631 use, cand, last_stmt (exit->src));
5633 value = unshare_expr (value);
5634 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5636 /* If we will preserve the iv anyway and we would need to perform
5637 some computation to replace the final value, do nothing. */
5638 if (stmts && name_info (data, tgt)->preserve_biv)
5641 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5643 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5645 if (USE_FROM_PTR (use_p) == tgt)
5646 SET_USE (use_p, op);
5650 compute_phi_arg_on_exit (exit, stmts, op);
5652 /* Enable removal of the statement. We cannot remove it directly,
5653 since we may still need the aliasing information attached to the
5654 ssa name defined by it. */
5655 name_info (data, tgt)->iv->have_use_for = false;
5659 /* If the variable is going to be preserved anyway, there is nothing to
5661 if (name_info (data, tgt)->preserve_biv)
5664 /* Otherwise we just need to compute the iv. */
5665 rewrite_use_nonlinear_expr (data, use, cand);
5668 /* Rewrites USE using candidate CAND. */
5671 rewrite_use (struct ivopts_data *data,
5672 struct iv_use *use, struct iv_cand *cand)
5676 case USE_NONLINEAR_EXPR:
5677 rewrite_use_nonlinear_expr (data, use, cand);
5681 rewrite_use_outer (data, use, cand);
5685 rewrite_use_address (data, use, cand);
5689 rewrite_use_compare (data, use, cand);
5695 update_stmt (use->stmt);
5698 /* Rewrite the uses using the selected induction variables. */
5701 rewrite_uses (struct ivopts_data *data)
5704 struct iv_cand *cand;
5707 for (i = 0; i < n_iv_uses (data); i++)
5709 use = iv_use (data, i);
5710 cand = use->selected;
5713 rewrite_use (data, use, cand);
5717 /* Removes the ivs that are not used after rewriting. */
5720 remove_unused_ivs (struct ivopts_data *data)
5725 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5727 struct version_info *info;
5729 info = ver_info (data, j);
5731 && !zero_p (info->iv->step)
5733 && !info->iv->have_use_for
5734 && !info->preserve_biv)
5735 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5739 /* Frees data allocated by the optimization of a single loop. */
5742 free_loop_data (struct ivopts_data *data)
5748 htab_empty (data->niters);
5750 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5752 struct version_info *info;
5754 info = ver_info (data, i);
5758 info->has_nonlin_use = false;
5759 info->preserve_biv = false;
5762 bitmap_clear (data->relevant);
5763 bitmap_clear (data->important_candidates);
5765 for (i = 0; i < n_iv_uses (data); i++)
5767 struct iv_use *use = iv_use (data, i);
5770 BITMAP_FREE (use->related_cands);
5771 for (j = 0; j < use->n_map_members; j++)
5772 if (use->cost_map[j].depends_on)
5773 BITMAP_FREE (use->cost_map[j].depends_on);
5774 free (use->cost_map);
5777 VEC_truncate (iv_use_p, data->iv_uses, 0);
5779 for (i = 0; i < n_iv_cands (data); i++)
5781 struct iv_cand *cand = iv_cand (data, i);
5785 if (cand->depends_on)
5786 BITMAP_FREE (cand->depends_on);
5789 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5791 if (data->version_info_size < num_ssa_names)
5793 data->version_info_size = 2 * num_ssa_names;
5794 free (data->version_info);
5795 data->version_info = xcalloc (data->version_info_size,
5796 sizeof (struct version_info));
5799 data->max_inv_id = 0;
5801 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5802 SET_DECL_RTL (obj, NULL_RTX);
5804 VEC_truncate (tree, decl_rtl_to_reset, 0);
5807 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5811 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5815 for (i = 1; i < loops->num; i++)
5816 if (loops->parray[i])
5818 free (loops->parray[i]->aux);
5819 loops->parray[i]->aux = NULL;
5822 free_loop_data (data);
5823 free (data->version_info);
5824 BITMAP_FREE (data->relevant);
5825 BITMAP_FREE (data->important_candidates);
5826 htab_delete (data->niters);
5828 VEC_free (tree, heap, decl_rtl_to_reset);
5829 VEC_free (iv_use_p, heap, data->iv_uses);
5830 VEC_free (iv_cand_p, heap, data->iv_candidates);
5833 /* Optimizes the LOOP. Returns true if anything changed. */
5836 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5838 bool changed = false;
5839 struct iv_ca *iv_ca;
5842 data->current_loop = loop;
5844 if (dump_file && (dump_flags & TDF_DETAILS))
5846 fprintf (dump_file, "Processing loop %d\n", loop->num);
5848 exit = single_dom_exit (loop);
5851 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5852 exit->src->index, exit->dest->index);
5853 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5854 fprintf (dump_file, "\n");
5857 fprintf (dump_file, "\n");
5860 /* For each ssa name determines whether it behaves as an induction variable
5862 if (!find_induction_variables (data))
5865 /* Finds interesting uses (item 1). */
5866 find_interesting_uses (data);
5867 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5870 /* Finds candidates for the induction variables (item 2). */
5871 find_iv_candidates (data);
5873 /* Calculates the costs (item 3, part 1). */
5874 determine_use_iv_costs (data);
5875 determine_iv_costs (data);
5876 determine_set_costs (data);
5878 /* Find the optimal set of induction variables (item 3, part 2). */
5879 iv_ca = find_optimal_iv_set (data);
5884 /* Create the new induction variables (item 4, part 1). */
5885 create_new_ivs (data, iv_ca);
5886 iv_ca_free (&iv_ca);
5888 /* Rewrite the uses (item 4, part 2). */
5889 rewrite_uses (data);
5891 /* Remove the ivs that are unused after rewriting. */
5892 remove_unused_ivs (data);
5894 /* We have changed the structure of induction variables; it might happen
5895 that definitions in the scev database refer to some of them that were
5900 free_loop_data (data);
5905 /* Main entry point. Optimizes induction variables in LOOPS. */
5908 tree_ssa_iv_optimize (struct loops *loops)
5911 struct ivopts_data data;
5913 tree_ssa_iv_optimize_init (loops, &data);
5915 /* Optimize the loops starting with the innermost ones. */
5916 loop = loops->tree_root;
5920 /* Scan the loops, inner ones first. */
5921 while (loop != loops->tree_root)
5923 if (dump_file && (dump_flags & TDF_DETAILS))
5924 flow_loop_dump (loop, dump_file, NULL, 1);
5926 tree_ssa_iv_optimize_loop (&data, loop);
5938 /* FIXME. IV opts introduces new aliases and call-clobbered
5939 variables, which need to be renamed. However, when we call the
5940 renamer, not all statements will be scanned for operands. In
5941 particular, the newly introduced aliases may appear in statements
5942 that are considered "unmodified", so the renamer will not get a
5943 chance to rename those operands.
5945 Work around this problem by forcing an operand re-scan on every
5946 statement. This will not be necessary once the new operand
5947 scanner is implemented. */
5948 if (need_ssa_update_p ())
5951 block_stmt_iterator si;
5953 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5954 update_stmt (bsi_stmt (si));
5957 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
5958 tree_ssa_iv_optimize_finalize (loops, &data);