1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
127 unsigned regs_used; /* Number of registers used. */
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use *iv_use_p;
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
210 /* The currently optimized loop. */
211 struct loop *current_loop;
213 /* Numbers of iterations for all exits of the current loop. */
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
222 /* The bitmap of indices in version_info whose value was changed. */
225 /* The maximum invariant id. */
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
237 /* Whether to consider just related and important candidates when replacing a
239 bool consider_all_candidates;
242 /* An assignment of iv candidates to uses. */
246 /* The number of uses covered by the assignment. */
249 /* Number of uses that cannot be expressed by the candidates in the set. */
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
258 /* The candidates used. */
261 /* The number of candidates in the set. */
264 /* Total number of registers needed. */
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
270 /* Total cost of candidates. */
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
276 /* Total cost of the assignment. */
280 /* Difference of two iv candidate assignments. */
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
290 /* A new assignment. */
291 struct cost_pair *new_cp;
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
317 static VEC(tree,heap) *decl_rtl_to_reset;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
324 return VEC_length (iv_use_p, data->iv_uses);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
332 return VEC_index (iv_use_p, data->iv_uses, i);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
340 return VEC_length (iv_cand_p, data->iv_candidates);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
351 /* The data for LOOP. */
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
362 single_dom_exit (struct loop *loop)
364 edge exit = loop->single_exit;
369 if (!just_once_each_iteration_p (loop, exit->src))
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv *);
379 dump_iv (FILE *file, struct iv *iv)
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
417 fprintf (file, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use *);
424 dump_use (FILE *file, struct iv_use *use)
426 fprintf (file, "use %d\n", use->id);
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
435 fprintf (file, " outside\n");
439 fprintf (file, " address\n");
443 fprintf (file, " compare\n");
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
472 dump_uses (FILE *file, struct ivopts_data *data)
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
505 fprintf (file, " final value replacement\n");
512 fprintf (file, " incremented before exit test\n");
516 fprintf (file, " incremented at end\n");
520 fprintf (file, " original biv\n");
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
568 /* If b is still even, a is odd and there is no such x. */
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
576 for (i = 0; i < bits - 1; i++)
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
582 val = (a * inv) & mask;
584 gcc_assert (((val * b) & mask) == a);
586 if ((val >> (bits - 1)) & 1)
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
604 if (sbb == loop->latch)
610 return stmt == last_stmt (bb);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
626 if (stmt_bb != cand_bb)
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
633 if (bsi_stmt (bsi) == cand->incremented_at)
635 if (bsi_stmt (bsi) == stmt)
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
652 return stmt_after_ip_normal_pos (loop, stmt);
655 return stmt_after_ip_original_pos (cand, stmt);
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
667 /* The edge for that the number of iterations is cached. */
670 /* True if the # of iterations was successfully determined. */
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
677 /* Hash function for nfe_cache_elt E. */
680 nfe_hash (const void *e)
682 const struct nfe_cache_elt *elt = e;
684 return htab_hash_pointer (elt->exit);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
690 nfe_eq (const void *e1, const void *e2)
692 const struct nfe_cache_elt *elt1 = e1;
694 return elt1->exit == e2;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
703 struct nfe_cache_elt *nfe_desc;
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter);
721 if (!nfe_desc->valid_p)
724 return &nfe_desc->niter;
727 /* Returns structure describing number of iterations determined from
728 single dominating exit of DATA->current_loop, or NULL if something
731 static struct tree_niter_desc *
732 niter_for_single_dom_exit (struct ivopts_data *data)
734 edge exit = single_dom_exit (data->current_loop);
739 return niter_for_exit (data, exit);
742 /* Initializes data structures used by the iv optimization pass, stored
743 in DATA. LOOPS is the loop tree. */
746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
750 data->version_info_size = 2 * num_ssa_names;
751 data->version_info = xcalloc (data->version_info_size,
752 sizeof (struct version_info));
753 data->relevant = BITMAP_ALLOC (NULL);
754 data->important_candidates = BITMAP_ALLOC (NULL);
755 data->max_inv_id = 0;
756 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
758 for (i = 1; i < loops->num; i++)
759 if (loops->parray[i])
760 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj, op0, op1;
776 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
785 obj = TREE_OPERAND (expr, 0);
786 base = get_base_address (obj);
791 if (TREE_CODE (base) == INDIRECT_REF)
792 return determine_base_object (TREE_OPERAND (base, 0));
794 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
798 op0 = determine_base_object (TREE_OPERAND (expr, 0));
799 op1 = determine_base_object (TREE_OPERAND (expr, 1));
805 return (code == PLUS_EXPR
807 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
809 return fold (build (code, ptr_type_node, op0, op1));
813 return determine_base_object (TREE_OPERAND (expr, 0));
816 return fold_convert (ptr_type_node, expr);
820 /* Allocates an induction variable with given initial value BASE and step STEP
824 alloc_iv (tree base, tree step)
826 struct iv *iv = xcalloc (1, sizeof (struct iv));
828 if (step && integer_zerop (step))
832 iv->base_object = determine_base_object (base);
835 iv->have_use_for = false;
837 iv->ssa_name = NULL_TREE;
842 /* Sets STEP and BASE for induction variable IV. */
845 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
847 struct version_info *info = name_info (data, iv);
849 gcc_assert (!info->iv);
851 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
852 info->iv = alloc_iv (base, step);
853 info->iv->ssa_name = iv;
856 /* Finds induction variable declaration for VAR. */
859 get_iv (struct ivopts_data *data, tree var)
863 if (!name_info (data, var)->iv)
865 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
868 || !flow_bb_inside_loop_p (data->current_loop, bb))
869 set_iv (data, var, var, NULL_TREE);
872 return name_info (data, var)->iv;
875 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
876 not define a simple affine biv with nonzero step. */
879 determine_biv_step (tree phi)
881 struct loop *loop = bb_for_stmt (phi)->loop_father;
882 tree name = PHI_RESULT (phi), base, step;
884 if (!is_gimple_reg (name))
887 if (!simple_iv (loop, phi, name, &base, &step, true))
896 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
899 abnormal_ssa_name_p (tree exp)
904 if (TREE_CODE (exp) != SSA_NAME)
907 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
910 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
911 abnormal phi node. Callback for for_each_index. */
914 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
915 void *data ATTRIBUTE_UNUSED)
917 if (TREE_CODE (base) == ARRAY_REF)
919 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
921 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
925 return !abnormal_ssa_name_p (*index);
928 /* Returns true if EXPR contains a ssa name that occurs in an
929 abnormal phi node. */
932 contains_abnormal_ssa_name_p (tree expr)
935 enum tree_code_class class;
940 code = TREE_CODE (expr);
941 class = TREE_CODE_CLASS (code);
943 if (code == SSA_NAME)
944 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
946 if (code == INTEGER_CST
947 || is_gimple_min_invariant (expr))
950 if (code == ADDR_EXPR)
951 return !for_each_index (&TREE_OPERAND (expr, 0),
952 idx_contains_abnormal_ssa_name_p,
959 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
964 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
976 /* Finds basic ivs. */
979 find_bivs (struct ivopts_data *data)
981 tree phi, step, type, base;
983 struct loop *loop = data->current_loop;
985 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
987 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
990 step = determine_biv_step (phi);
994 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
995 if (contains_abnormal_ssa_name_p (base)
996 || contains_abnormal_ssa_name_p (step))
999 type = TREE_TYPE (PHI_RESULT (phi));
1000 base = fold_convert (type, base);
1002 step = fold_convert (type, step);
1004 set_iv (data, PHI_RESULT (phi), base, step);
1011 /* Marks basic ivs. */
1014 mark_bivs (struct ivopts_data *data)
1017 struct iv *iv, *incr_iv;
1018 struct loop *loop = data->current_loop;
1019 basic_block incr_bb;
1021 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1023 iv = get_iv (data, PHI_RESULT (phi));
1027 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1028 incr_iv = get_iv (data, var);
1032 /* If the increment is in the subloop, ignore it. */
1033 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1034 if (incr_bb->loop_father != data->current_loop
1035 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1039 incr_iv->biv_p = true;
1043 /* Checks whether STMT defines a linear induction variable and stores its
1044 parameters to BASE and STEP. */
1047 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1048 tree *base, tree *step)
1051 struct loop *loop = data->current_loop;
1056 if (TREE_CODE (stmt) != MODIFY_EXPR)
1059 lhs = TREE_OPERAND (stmt, 0);
1060 if (TREE_CODE (lhs) != SSA_NAME)
1063 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1066 if (contains_abnormal_ssa_name_p (*base)
1067 || contains_abnormal_ssa_name_p (*step))
1073 /* Finds general ivs in statement STMT. */
1076 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1080 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1083 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1086 /* Finds general ivs in basic block BB. */
1089 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1091 block_stmt_iterator bsi;
1093 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1094 find_givs_in_stmt (data, bsi_stmt (bsi));
1097 /* Finds general ivs. */
1100 find_givs (struct ivopts_data *data)
1102 struct loop *loop = data->current_loop;
1103 basic_block *body = get_loop_body_in_dom_order (loop);
1106 for (i = 0; i < loop->num_nodes; i++)
1107 find_givs_in_bb (data, body[i]);
1111 /* For each ssa name defined in LOOP determines whether it is an induction
1112 variable and if so, its initial value and step. */
1115 find_induction_variables (struct ivopts_data *data)
1120 if (!find_bivs (data))
1126 if (dump_file && (dump_flags & TDF_DETAILS))
1128 struct tree_niter_desc *niter;
1130 niter = niter_for_single_dom_exit (data);
1134 fprintf (dump_file, " number of iterations ");
1135 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1136 fprintf (dump_file, "\n");
1138 fprintf (dump_file, " may be zero if ");
1139 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1140 fprintf (dump_file, "\n");
1141 fprintf (dump_file, "\n");
1144 fprintf (dump_file, "Induction variables:\n\n");
1146 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1148 if (ver_info (data, i)->iv)
1149 dump_iv (dump_file, ver_info (data, i)->iv);
1156 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1158 static struct iv_use *
1159 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1160 tree stmt, enum use_type use_type)
1162 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1164 use->id = n_iv_uses (data);
1165 use->type = use_type;
1169 use->related_cands = BITMAP_ALLOC (NULL);
1171 /* To avoid showing ssa name in the dumps, if it was not reset by the
1173 iv->ssa_name = NULL_TREE;
1175 if (dump_file && (dump_flags & TDF_DETAILS))
1176 dump_use (dump_file, use);
1178 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1183 /* Checks whether OP is a loop-level invariant and if so, records it.
1184 NONLINEAR_USE is true if the invariant is used in a way we do not
1185 handle specially. */
1188 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1191 struct version_info *info;
1193 if (TREE_CODE (op) != SSA_NAME
1194 || !is_gimple_reg (op))
1197 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1199 && flow_bb_inside_loop_p (data->current_loop, bb))
1202 info = name_info (data, op);
1204 info->has_nonlin_use |= nonlinear_use;
1206 info->inv_id = ++data->max_inv_id;
1207 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1210 /* Checks whether the use OP is interesting and if so, records it
1213 static struct iv_use *
1214 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1222 if (TREE_CODE (op) != SSA_NAME)
1225 iv = get_iv (data, op);
1229 if (iv->have_use_for)
1231 use = iv_use (data, iv->use_id);
1233 gcc_assert (use->type == USE_NONLINEAR_EXPR
1234 || use->type == USE_OUTER);
1236 if (type == USE_NONLINEAR_EXPR)
1237 use->type = USE_NONLINEAR_EXPR;
1241 if (zero_p (iv->step))
1243 record_invariant (data, op, true);
1246 iv->have_use_for = true;
1248 civ = xmalloc (sizeof (struct iv));
1251 stmt = SSA_NAME_DEF_STMT (op);
1252 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1253 || TREE_CODE (stmt) == MODIFY_EXPR);
1255 use = record_use (data, NULL, civ, stmt, type);
1256 iv->use_id = use->id;
1261 /* Checks whether the use OP is interesting and if so, records it. */
1263 static struct iv_use *
1264 find_interesting_uses_op (struct ivopts_data *data, tree op)
1266 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1269 /* Records a definition of induction variable OP that is used outside of the
1272 static struct iv_use *
1273 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1275 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1278 /* Checks whether the condition *COND_P in STMT is interesting
1279 and if so, records it. */
1282 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1286 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1288 tree zero = integer_zero_node;
1290 const_iv.step = NULL_TREE;
1292 if (TREE_CODE (*cond_p) != SSA_NAME
1293 && !COMPARISON_CLASS_P (*cond_p))
1296 if (TREE_CODE (*cond_p) == SSA_NAME)
1303 op0_p = &TREE_OPERAND (*cond_p, 0);
1304 op1_p = &TREE_OPERAND (*cond_p, 1);
1307 if (TREE_CODE (*op0_p) == SSA_NAME)
1308 iv0 = get_iv (data, *op0_p);
1312 if (TREE_CODE (*op1_p) == SSA_NAME)
1313 iv1 = get_iv (data, *op1_p);
1317 if (/* When comparing with non-invariant value, we may not do any senseful
1318 induction variable elimination. */
1320 /* Eliminating condition based on two ivs would be nontrivial.
1321 ??? TODO -- it is not really important to handle this case. */
1322 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1324 find_interesting_uses_op (data, *op0_p);
1325 find_interesting_uses_op (data, *op1_p);
1329 if (zero_p (iv0->step) && zero_p (iv1->step))
1331 /* If both are invariants, this is a work for unswitching. */
1335 civ = xmalloc (sizeof (struct iv));
1336 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1337 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1340 /* Returns true if expression EXPR is obviously invariant in LOOP,
1341 i.e. if all its operands are defined outside of the LOOP. */
1344 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1349 if (is_gimple_min_invariant (expr))
1352 if (TREE_CODE (expr) == SSA_NAME)
1354 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1356 && flow_bb_inside_loop_p (loop, def_bb))
1365 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1366 for (i = 0; i < len; i++)
1367 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1373 /* Cumulates the steps of indices into DATA and replaces their values with the
1374 initial ones. Returns false when the value of the index cannot be determined.
1375 Callback for for_each_index. */
1377 struct ifs_ivopts_data
1379 struct ivopts_data *ivopts_data;
1385 idx_find_step (tree base, tree *idx, void *data)
1387 struct ifs_ivopts_data *dta = data;
1389 tree step, type, iv_type, iv_step, lbound, off;
1390 struct loop *loop = dta->ivopts_data->current_loop;
1392 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1393 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1396 /* If base is a component ref, require that the offset of the reference
1398 if (TREE_CODE (base) == COMPONENT_REF)
1400 off = component_ref_field_offset (base);
1401 return expr_invariant_in_loop_p (loop, off);
1404 /* If base is array, first check whether we will be able to move the
1405 reference out of the loop (in order to take its address in strength
1406 reduction). In order for this to work we need both lower bound
1407 and step to be loop invariants. */
1408 if (TREE_CODE (base) == ARRAY_REF)
1410 step = array_ref_element_size (base);
1411 lbound = array_ref_low_bound (base);
1413 if (!expr_invariant_in_loop_p (loop, step)
1414 || !expr_invariant_in_loop_p (loop, lbound))
1418 if (TREE_CODE (*idx) != SSA_NAME)
1421 iv = get_iv (dta->ivopts_data, *idx);
1430 iv_type = TREE_TYPE (iv->base);
1431 type = build_pointer_type (TREE_TYPE (base));
1432 if (TREE_CODE (base) == ARRAY_REF)
1434 step = array_ref_element_size (base);
1436 /* We only handle addresses whose step is an integer constant. */
1437 if (TREE_CODE (step) != INTEGER_CST)
1441 /* The step for pointer arithmetics already is 1 byte. */
1442 step = build_int_cst (type, 1);
1444 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1445 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1446 type, iv->base, iv->step, dta->stmt);
1448 iv_step = fold_convert (iv_type, iv->step);
1452 /* The index might wrap. */
1456 step = fold_build2 (MULT_EXPR, type, step, iv_step);
1459 *dta->step_p = step;
1461 *dta->step_p = fold_build2 (PLUS_EXPR, type, *dta->step_p, step);
1466 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1467 object is passed to it in DATA. */
1470 idx_record_use (tree base, tree *idx,
1473 find_interesting_uses_op (data, *idx);
1474 if (TREE_CODE (base) == ARRAY_REF)
1476 find_interesting_uses_op (data, array_ref_element_size (base));
1477 find_interesting_uses_op (data, array_ref_low_bound (base));
1482 /* Returns true if memory reference REF may be unaligned. */
1485 may_be_unaligned_p (tree ref)
1489 HOST_WIDE_INT bitsize;
1490 HOST_WIDE_INT bitpos;
1492 enum machine_mode mode;
1493 int unsignedp, volatilep;
1494 unsigned base_align;
1496 /* The test below is basically copy of what expr.c:normal_inner_ref
1497 does to check whether the object must be loaded by parts when
1498 STRICT_ALIGNMENT is true. */
1499 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1500 &unsignedp, &volatilep, true);
1501 base_type = TREE_TYPE (base);
1502 base_align = TYPE_ALIGN (base_type);
1505 && (base_align < GET_MODE_ALIGNMENT (mode)
1506 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1507 || bitpos % BITS_PER_UNIT != 0))
1513 /* Builds ADDR_EXPR of object OBJ. If OBJ is an INDIRECT_REF, the indirect_ref
1514 is stripped instead. */
1517 build_addr_strip_iref (tree obj)
1521 if (TREE_CODE (obj) == INDIRECT_REF)
1523 type = build_pointer_type (TREE_TYPE (obj));
1524 obj = fold_convert (type, TREE_OPERAND (obj, 0));
1527 obj = build_addr (obj);
1532 /* Finds addresses in *OP_P inside STMT. */
1535 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1537 tree base = unshare_expr (*op_p), step = NULL;
1539 struct ifs_ivopts_data ifs_ivopts_data;
1541 /* Do not play with volatile memory references. A bit too conservative,
1542 perhaps, but safe. */
1543 if (stmt_ann (stmt)->has_volatile_ops)
1546 /* Ignore bitfields for now. Not really something terribly complicated
1548 if (TREE_CODE (base) == COMPONENT_REF
1549 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1552 if (STRICT_ALIGNMENT
1553 && may_be_unaligned_p (base))
1556 ifs_ivopts_data.ivopts_data = data;
1557 ifs_ivopts_data.stmt = stmt;
1558 ifs_ivopts_data.step_p = &step;
1559 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1563 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1564 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1566 base = build_addr_strip_iref (base);
1568 civ = alloc_iv (base, step);
1569 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1573 for_each_index (op_p, idx_record_use, data);
1576 /* Finds and records invariants used in STMT. */
1579 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1581 use_optype uses = NULL;
1585 if (TREE_CODE (stmt) == PHI_NODE)
1586 n = PHI_NUM_ARGS (stmt);
1589 uses = STMT_USE_OPS (stmt);
1590 n = NUM_USES (uses);
1593 for (i = 0; i < n; i++)
1595 if (TREE_CODE (stmt) == PHI_NODE)
1596 op = PHI_ARG_DEF (stmt, i);
1598 op = USE_OP (uses, i);
1600 record_invariant (data, op, false);
1604 /* Finds interesting uses of induction variables in the statement STMT. */
1607 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1611 use_optype uses = NULL;
1614 find_invariants_stmt (data, stmt);
1616 if (TREE_CODE (stmt) == COND_EXPR)
1618 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1622 if (TREE_CODE (stmt) == MODIFY_EXPR)
1624 lhs = TREE_OPERAND (stmt, 0);
1625 rhs = TREE_OPERAND (stmt, 1);
1627 if (TREE_CODE (lhs) == SSA_NAME)
1629 /* If the statement defines an induction variable, the uses are not
1630 interesting by themselves. */
1632 iv = get_iv (data, lhs);
1634 if (iv && !zero_p (iv->step))
1638 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1640 case tcc_comparison:
1641 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1645 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1646 if (REFERENCE_CLASS_P (lhs))
1647 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1653 if (REFERENCE_CLASS_P (lhs)
1654 && is_gimple_val (rhs))
1656 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1657 find_interesting_uses_op (data, rhs);
1661 /* TODO -- we should also handle address uses of type
1663 memory = call (whatever);
1670 if (TREE_CODE (stmt) == PHI_NODE
1671 && bb_for_stmt (stmt) == data->current_loop->header)
1673 lhs = PHI_RESULT (stmt);
1674 iv = get_iv (data, lhs);
1676 if (iv && !zero_p (iv->step))
1680 if (TREE_CODE (stmt) == PHI_NODE)
1681 n = PHI_NUM_ARGS (stmt);
1684 uses = STMT_USE_OPS (stmt);
1685 n = NUM_USES (uses);
1688 for (i = 0; i < n; i++)
1690 if (TREE_CODE (stmt) == PHI_NODE)
1691 op = PHI_ARG_DEF (stmt, i);
1693 op = USE_OP (uses, i);
1695 if (TREE_CODE (op) != SSA_NAME)
1698 iv = get_iv (data, op);
1702 find_interesting_uses_op (data, op);
1706 /* Finds interesting uses of induction variables outside of loops
1707 on loop exit edge EXIT. */
1710 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1714 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1716 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1717 find_interesting_uses_outer (data, def);
1721 /* Finds uses of the induction variables that are interesting. */
1724 find_interesting_uses (struct ivopts_data *data)
1727 block_stmt_iterator bsi;
1729 basic_block *body = get_loop_body (data->current_loop);
1731 struct version_info *info;
1734 if (dump_file && (dump_flags & TDF_DETAILS))
1735 fprintf (dump_file, "Uses:\n\n");
1737 for (i = 0; i < data->current_loop->num_nodes; i++)
1742 FOR_EACH_EDGE (e, ei, bb->succs)
1743 if (e->dest != EXIT_BLOCK_PTR
1744 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1745 find_interesting_uses_outside (data, e);
1747 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1748 find_interesting_uses_stmt (data, phi);
1749 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1750 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1753 if (dump_file && (dump_flags & TDF_DETAILS))
1757 fprintf (dump_file, "\n");
1759 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1761 info = ver_info (data, i);
1764 fprintf (dump_file, " ");
1765 print_generic_expr (dump_file, info->name, TDF_SLIM);
1766 fprintf (dump_file, " is invariant (%d)%s\n",
1767 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1771 fprintf (dump_file, "\n");
1777 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1778 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1779 we are at the top-level of the processed address. */
1782 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1783 unsigned HOST_WIDE_INT *offset)
1785 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1786 enum tree_code code;
1787 tree type, orig_type = TREE_TYPE (expr);
1788 unsigned HOST_WIDE_INT off0, off1, st;
1789 tree orig_expr = expr;
1793 type = TREE_TYPE (expr);
1794 code = TREE_CODE (expr);
1800 if (!cst_and_fits_in_hwi (expr)
1804 *offset = int_cst_value (expr);
1805 return build_int_cst_type (orig_type, 0);
1809 op0 = TREE_OPERAND (expr, 0);
1810 op1 = TREE_OPERAND (expr, 1);
1812 op0 = strip_offset_1 (op0, false, false, &off0);
1813 op1 = strip_offset_1 (op1, false, false, &off1);
1815 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1816 if (op0 == TREE_OPERAND (expr, 0)
1817 && op1 == TREE_OPERAND (expr, 1))
1822 else if (zero_p (op0))
1824 if (code == PLUS_EXPR)
1827 expr = fold_build1 (NEGATE_EXPR, type, op1);
1830 expr = fold_build2 (code, type, op0, op1);
1832 return fold_convert (orig_type, expr);
1838 step = array_ref_element_size (expr);
1839 if (!cst_and_fits_in_hwi (step))
1842 st = int_cst_value (step);
1843 op1 = TREE_OPERAND (expr, 1);
1844 op1 = strip_offset_1 (op1, false, false, &off1);
1845 *offset = off1 * st;
1850 /* Strip the component reference completely. */
1851 op0 = TREE_OPERAND (expr, 0);
1852 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1862 tmp = component_ref_field_offset (expr);
1864 && cst_and_fits_in_hwi (tmp))
1866 /* Strip the component reference completely. */
1867 op0 = TREE_OPERAND (expr, 0);
1868 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1869 *offset = off0 + int_cst_value (tmp);
1875 op0 = TREE_OPERAND (expr, 0);
1876 op0 = strip_offset_1 (op0, true, true, &off0);
1879 if (op0 == TREE_OPERAND (expr, 0))
1882 expr = build_addr_strip_iref (op0);
1883 return fold_convert (orig_type, expr);
1886 inside_addr = false;
1893 /* Default handling of expressions for that we want to recurse into
1894 the first operand. */
1895 op0 = TREE_OPERAND (expr, 0);
1896 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1899 if (op0 == TREE_OPERAND (expr, 0)
1900 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1903 expr = copy_node (expr);
1904 TREE_OPERAND (expr, 0) = op0;
1906 TREE_OPERAND (expr, 1) = op1;
1908 /* Inside address, we might strip the top level component references,
1909 thus changing type of the expresion. Handling of ADDR_EXPR
1911 expr = fold_convert (orig_type, expr);
1916 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1919 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1921 return strip_offset_1 (expr, false, false, offset);
1924 /* Returns variant of TYPE that can be used as base for different uses.
1925 For integer types, we return unsigned variant of the type, which
1926 avoids problems with overflows. For pointer types, we return void *. */
1929 generic_type_for (tree type)
1931 if (POINTER_TYPE_P (type))
1932 return ptr_type_node;
1934 if (TYPE_UNSIGNED (type))
1937 return unsigned_type_for (type);
1940 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1941 the bitmap to that we should store it. */
1943 static struct ivopts_data *fd_ivopts_data;
1945 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1947 bitmap *depends_on = data;
1948 struct version_info *info;
1950 if (TREE_CODE (*expr_p) != SSA_NAME)
1952 info = name_info (fd_ivopts_data, *expr_p);
1954 if (!info->inv_id || info->has_nonlin_use)
1958 *depends_on = BITMAP_ALLOC (NULL);
1959 bitmap_set_bit (*depends_on, info->inv_id);
1964 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1965 position to POS. If USE is not NULL, the candidate is set as related to
1966 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1967 replacement of the final value of the iv by a direct computation. */
1969 static struct iv_cand *
1970 add_candidate_1 (struct ivopts_data *data,
1971 tree base, tree step, bool important, enum iv_position pos,
1972 struct iv_use *use, tree incremented_at)
1975 struct iv_cand *cand = NULL;
1976 tree type, orig_type;
1980 orig_type = TREE_TYPE (base);
1981 type = generic_type_for (orig_type);
1982 if (type != orig_type)
1984 base = fold_convert (type, base);
1986 step = fold_convert (type, step);
1990 for (i = 0; i < n_iv_cands (data); i++)
1992 cand = iv_cand (data, i);
1994 if (cand->pos != pos)
1997 if (cand->incremented_at != incremented_at)
2011 if (!operand_equal_p (base, cand->iv->base, 0))
2014 if (zero_p (cand->iv->step))
2021 if (step && operand_equal_p (step, cand->iv->step, 0))
2026 if (i == n_iv_cands (data))
2028 cand = xcalloc (1, sizeof (struct iv_cand));
2034 cand->iv = alloc_iv (base, step);
2037 if (pos != IP_ORIGINAL && cand->iv)
2039 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2040 cand->var_after = cand->var_before;
2042 cand->important = important;
2043 cand->incremented_at = incremented_at;
2044 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2047 && TREE_CODE (step) != INTEGER_CST)
2049 fd_ivopts_data = data;
2050 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2053 if (dump_file && (dump_flags & TDF_DETAILS))
2054 dump_cand (dump_file, cand);
2057 if (important && !cand->important)
2059 cand->important = true;
2060 if (dump_file && (dump_flags & TDF_DETAILS))
2061 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2066 bitmap_set_bit (use->related_cands, i);
2067 if (dump_file && (dump_flags & TDF_DETAILS))
2068 fprintf (dump_file, "Candidate %d is related to use %d\n",
2075 /* Returns true if incrementing the induction variable at the end of the LOOP
2078 The purpose is to avoid splitting latch edge with a biv increment, thus
2079 creating a jump, possibly confusing other optimization passes and leaving
2080 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2081 is not available (so we do not have a better alternative), or if the latch
2082 edge is already nonempty. */
2085 allow_ip_end_pos_p (struct loop *loop)
2087 if (!ip_normal_pos (loop))
2090 if (!empty_block_p (ip_end_pos (loop)))
2096 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2097 position to POS. If USE is not NULL, the candidate is set as related to
2098 it. The candidate computation is scheduled on all available positions. */
2101 add_candidate (struct ivopts_data *data,
2102 tree base, tree step, bool important, struct iv_use *use)
2104 if (ip_normal_pos (data->current_loop))
2105 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2106 if (ip_end_pos (data->current_loop)
2107 && allow_ip_end_pos_p (data->current_loop))
2108 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2111 /* Add a standard "0 + 1 * iteration" iv candidate for a
2112 type with SIZE bits. */
2115 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2118 tree type = lang_hooks.types.type_for_size (size, true);
2119 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2123 /* Adds standard iv candidates. */
2126 add_standard_iv_candidates (struct ivopts_data *data)
2128 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2130 /* The same for a double-integer type if it is still fast enough. */
2131 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2132 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2136 /* Adds candidates bases on the old induction variable IV. */
2139 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2142 struct iv_cand *cand;
2144 add_candidate (data, iv->base, iv->step, true, NULL);
2146 /* The same, but with initial value zero. */
2147 add_candidate (data,
2148 build_int_cst (TREE_TYPE (iv->base), 0),
2149 iv->step, true, NULL);
2151 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2152 if (TREE_CODE (phi) == PHI_NODE)
2154 /* Additionally record the possibility of leaving the original iv
2156 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2157 cand = add_candidate_1 (data,
2158 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2159 SSA_NAME_DEF_STMT (def));
2160 cand->var_before = iv->ssa_name;
2161 cand->var_after = def;
2165 /* Adds candidates based on the old induction variables. */
2168 add_old_ivs_candidates (struct ivopts_data *data)
2174 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2176 iv = ver_info (data, i)->iv;
2177 if (iv && iv->biv_p && !zero_p (iv->step))
2178 add_old_iv_candidates (data, iv);
2182 /* Adds candidates based on the value of the induction variable IV and USE. */
2185 add_iv_value_candidates (struct ivopts_data *data,
2186 struct iv *iv, struct iv_use *use)
2188 unsigned HOST_WIDE_INT offset;
2191 add_candidate (data, iv->base, iv->step, false, use);
2193 /* The same, but with initial value zero. Make such variable important,
2194 since it is generic enough so that possibly many uses may be based
2196 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2197 iv->step, true, use);
2199 /* Third, try removing the constant offset. */
2200 base = strip_offset (iv->base, &offset);
2202 add_candidate (data, base, iv->step, false, use);
2205 /* Possibly adds pseudocandidate for replacing the final value of USE by
2206 a direct computation. */
2209 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2211 struct tree_niter_desc *niter;
2213 /* We must know where we exit the loop and how many times does it roll. */
2214 niter = niter_for_single_dom_exit (data);
2216 || !zero_p (niter->may_be_zero))
2219 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2222 /* Adds candidates based on the uses. */
2225 add_derived_ivs_candidates (struct ivopts_data *data)
2229 for (i = 0; i < n_iv_uses (data); i++)
2231 struct iv_use *use = iv_use (data, i);
2238 case USE_NONLINEAR_EXPR:
2241 /* Just add the ivs based on the value of the iv used here. */
2242 add_iv_value_candidates (data, use->iv, use);
2246 add_iv_value_candidates (data, use->iv, use);
2248 /* Additionally, add the pseudocandidate for the possibility to
2249 replace the final value by a direct computation. */
2250 add_iv_outer_candidates (data, use);
2259 /* Record important candidates and add them to related_cands bitmaps
2263 record_important_candidates (struct ivopts_data *data)
2268 for (i = 0; i < n_iv_cands (data); i++)
2270 struct iv_cand *cand = iv_cand (data, i);
2272 if (cand->important)
2273 bitmap_set_bit (data->important_candidates, i);
2276 data->consider_all_candidates = (n_iv_cands (data)
2277 <= CONSIDER_ALL_CANDIDATES_BOUND);
2279 if (data->consider_all_candidates)
2281 /* We will not need "related_cands" bitmaps in this case,
2282 so release them to decrease peak memory consumption. */
2283 for (i = 0; i < n_iv_uses (data); i++)
2285 use = iv_use (data, i);
2286 BITMAP_FREE (use->related_cands);
2291 /* Add important candidates to the related_cands bitmaps. */
2292 for (i = 0; i < n_iv_uses (data); i++)
2293 bitmap_ior_into (iv_use (data, i)->related_cands,
2294 data->important_candidates);
2298 /* Finds the candidates for the induction variables. */
2301 find_iv_candidates (struct ivopts_data *data)
2303 /* Add commonly used ivs. */
2304 add_standard_iv_candidates (data);
2306 /* Add old induction variables. */
2307 add_old_ivs_candidates (data);
2309 /* Add induction variables derived from uses. */
2310 add_derived_ivs_candidates (data);
2312 /* Record the important candidates. */
2313 record_important_candidates (data);
2316 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2317 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2318 we allocate a simple list to every use. */
2321 alloc_use_cost_map (struct ivopts_data *data)
2323 unsigned i, size, s, j;
2325 for (i = 0; i < n_iv_uses (data); i++)
2327 struct iv_use *use = iv_use (data, i);
2330 if (data->consider_all_candidates)
2331 size = n_iv_cands (data);
2335 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2340 /* Round up to the power of two, so that moduling by it is fast. */
2341 for (size = 1; size < s; size <<= 1)
2345 use->n_map_members = size;
2346 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2350 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2351 on invariants DEPENDS_ON and that the value used in expressing it
2355 set_use_iv_cost (struct ivopts_data *data,
2356 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2357 bitmap depends_on, tree value)
2363 BITMAP_FREE (depends_on);
2367 if (data->consider_all_candidates)
2369 use->cost_map[cand->id].cand = cand;
2370 use->cost_map[cand->id].cost = cost;
2371 use->cost_map[cand->id].depends_on = depends_on;
2372 use->cost_map[cand->id].value = value;
2376 /* n_map_members is a power of two, so this computes modulo. */
2377 s = cand->id & (use->n_map_members - 1);
2378 for (i = s; i < use->n_map_members; i++)
2379 if (!use->cost_map[i].cand)
2381 for (i = 0; i < s; i++)
2382 if (!use->cost_map[i].cand)
2388 use->cost_map[i].cand = cand;
2389 use->cost_map[i].cost = cost;
2390 use->cost_map[i].depends_on = depends_on;
2391 use->cost_map[i].value = value;
2394 /* Gets cost of (USE, CANDIDATE) pair. */
2396 static struct cost_pair *
2397 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2398 struct iv_cand *cand)
2401 struct cost_pair *ret;
2406 if (data->consider_all_candidates)
2408 ret = use->cost_map + cand->id;
2415 /* n_map_members is a power of two, so this computes modulo. */
2416 s = cand->id & (use->n_map_members - 1);
2417 for (i = s; i < use->n_map_members; i++)
2418 if (use->cost_map[i].cand == cand)
2419 return use->cost_map + i;
2421 for (i = 0; i < s; i++)
2422 if (use->cost_map[i].cand == cand)
2423 return use->cost_map + i;
2428 /* Returns estimate on cost of computing SEQ. */
2436 for (; seq; seq = NEXT_INSN (seq))
2438 set = single_set (seq);
2440 cost += rtx_cost (set, SET);
2448 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2450 produce_memory_decl_rtl (tree obj, int *regno)
2455 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2457 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2458 x = gen_rtx_SYMBOL_REF (Pmode, name);
2461 x = gen_raw_REG (Pmode, (*regno)++);
2463 return gen_rtx_MEM (DECL_MODE (obj), x);
2466 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2467 walk_tree. DATA contains the actual fake register number. */
2470 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2472 tree obj = NULL_TREE;
2476 switch (TREE_CODE (*expr_p))
2479 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2480 handled_component_p (*expr_p);
2481 expr_p = &TREE_OPERAND (*expr_p, 0))
2485 x = produce_memory_decl_rtl (obj, regno);
2490 obj = SSA_NAME_VAR (*expr_p);
2491 if (!DECL_RTL_SET_P (obj))
2492 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2501 if (DECL_RTL_SET_P (obj))
2504 if (DECL_MODE (obj) == BLKmode)
2505 x = produce_memory_decl_rtl (obj, regno);
2507 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2517 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2518 SET_DECL_RTL (obj, x);
2524 /* Determines cost of the computation of EXPR. */
2527 computation_cost (tree expr)
2530 tree type = TREE_TYPE (expr);
2532 /* Avoid using hard regs in ways which may be unsupported. */
2533 int regno = LAST_VIRTUAL_REGISTER + 1;
2535 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2537 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2541 cost = seq_cost (seq);
2543 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2548 /* Returns variable containing the value of candidate CAND at statement AT. */
2551 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2553 if (stmt_after_increment (loop, cand, stmt))
2554 return cand->var_after;
2556 return cand->var_before;
2559 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2560 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2563 tree_int_cst_sign_bit (tree t)
2565 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2566 unsigned HOST_WIDE_INT w;
2568 if (bitno < HOST_BITS_PER_WIDE_INT)
2569 w = TREE_INT_CST_LOW (t);
2572 w = TREE_INT_CST_HIGH (t);
2573 bitno -= HOST_BITS_PER_WIDE_INT;
2576 return (w >> bitno) & 1;
2579 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2580 return cst. Otherwise return NULL_TREE. */
2583 constant_multiple_of (tree type, tree top, tree bot)
2585 tree res, mby, p0, p1;
2586 enum tree_code code;
2592 if (operand_equal_p (top, bot, 0))
2593 return build_int_cst (type, 1);
2595 code = TREE_CODE (top);
2599 mby = TREE_OPERAND (top, 1);
2600 if (TREE_CODE (mby) != INTEGER_CST)
2603 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2607 return fold_binary_to_constant (MULT_EXPR, type, res,
2608 fold_convert (type, mby));
2612 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2615 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2619 return fold_binary_to_constant (code, type, p0, p1);
2622 if (TREE_CODE (bot) != INTEGER_CST)
2625 bot = fold_convert (type, bot);
2626 top = fold_convert (type, top);
2628 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2629 the result afterwards. */
2630 if (tree_int_cst_sign_bit (bot))
2633 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2638 /* Ditto for TOP. */
2639 if (tree_int_cst_sign_bit (top))
2642 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2645 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2648 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2650 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2658 /* Affine combination of trees. We keep track of at most MAX_AFF_ELTS elements
2659 to make things simpler; this is sufficient in most cases. */
2661 #define MAX_AFF_ELTS 8
2663 struct affine_tree_combination
2665 /* Type of the result of the combination. */
2668 /* Mask modulo that the operations are performed. */
2669 unsigned HOST_WIDE_INT mask;
2671 /* Constant offset. */
2672 unsigned HOST_WIDE_INT offset;
2674 /* Number of elements of the combination. */
2677 /* Elements and their coefficients. */
2678 tree elts[MAX_AFF_ELTS];
2679 unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS];
2681 /* Remainder of the expression. */
2685 /* Sets COMB to CST. */
2688 aff_combination_const (struct affine_tree_combination *comb, tree type,
2689 unsigned HOST_WIDE_INT cst)
2691 unsigned prec = TYPE_PRECISION (type);
2694 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2697 comb->rest = NULL_TREE;
2698 comb->offset = cst & comb->mask;
2701 /* Sets COMB to single element ELT. */
2704 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2706 unsigned prec = TYPE_PRECISION (type);
2709 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2712 comb->elts[0] = elt;
2714 comb->rest = NULL_TREE;
2718 /* Scales COMB by SCALE. */
2721 aff_combination_scale (struct affine_tree_combination *comb,
2722 unsigned HOST_WIDE_INT scale)
2731 aff_combination_const (comb, comb->type, 0);
2735 comb->offset = (scale * comb->offset) & comb->mask;
2736 for (i = 0, j = 0; i < comb->n; i++)
2738 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2739 comb->elts[j] = comb->elts[i];
2740 if (comb->coefs[j] != 0)
2747 if (comb->n < MAX_AFF_ELTS)
2749 comb->coefs[comb->n] = scale;
2750 comb->elts[comb->n] = comb->rest;
2751 comb->rest = NULL_TREE;
2755 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2756 build_int_cst_type (comb->type, scale));
2760 /* Adds ELT * SCALE to COMB. */
2763 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2764 unsigned HOST_WIDE_INT scale)
2771 for (i = 0; i < comb->n; i++)
2772 if (operand_equal_p (comb->elts[i], elt, 0))
2774 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2779 comb->coefs[i] = comb->coefs[comb->n];
2780 comb->elts[i] = comb->elts[comb->n];
2783 if (comb->n < MAX_AFF_ELTS)
2785 comb->coefs[comb->n] = scale;
2786 comb->elts[comb->n] = elt;
2792 elt = fold_convert (comb->type, elt);
2794 elt = fold_build2 (MULT_EXPR, comb->type,
2795 fold_convert (comb->type, elt),
2796 build_int_cst_type (comb->type, scale));
2799 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2804 /* Adds COMB2 to COMB1. */
2807 aff_combination_add (struct affine_tree_combination *comb1,
2808 struct affine_tree_combination *comb2)
2812 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2813 for (i = 0; i < comb2-> n; i++)
2814 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2816 aff_combination_add_elt (comb1, comb2->rest, 1);
2819 /* Splits EXPR into an affine combination of parts. */
2822 tree_to_aff_combination (tree expr, tree type,
2823 struct affine_tree_combination *comb)
2825 struct affine_tree_combination tmp;
2826 enum tree_code code;
2827 tree cst, core, toffset;
2828 HOST_WIDE_INT bitpos, bitsize;
2829 enum machine_mode mode;
2830 int unsignedp, volatilep;
2834 code = TREE_CODE (expr);
2838 aff_combination_const (comb, type, int_cst_value (expr));
2843 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2844 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2845 if (code == MINUS_EXPR)
2846 aff_combination_scale (&tmp, -1);
2847 aff_combination_add (comb, &tmp);
2851 cst = TREE_OPERAND (expr, 1);
2852 if (TREE_CODE (cst) != INTEGER_CST)
2854 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2855 aff_combination_scale (comb, int_cst_value (cst));
2859 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2860 aff_combination_scale (comb, -1);
2864 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2865 &toffset, &mode, &unsignedp, &volatilep,
2867 if (bitpos % BITS_PER_UNIT != 0)
2869 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2870 core = build_addr_strip_iref (core);
2871 if (TREE_CODE (core) == ADDR_EXPR)
2872 aff_combination_add_elt (comb, core, 1);
2875 tree_to_aff_combination (core, type, &tmp);
2876 aff_combination_add (comb, &tmp);
2880 tree_to_aff_combination (toffset, type, &tmp);
2881 aff_combination_add (comb, &tmp);
2889 aff_combination_elt (comb, type, expr);
2892 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2895 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2896 unsigned HOST_WIDE_INT mask)
2898 enum tree_code code;
2901 elt = fold_convert (type, elt);
2908 return fold_build2 (PLUS_EXPR, type, expr, elt);
2914 return fold_build1 (NEGATE_EXPR, type, elt);
2916 return fold_build2 (MINUS_EXPR, type, expr, elt);
2920 return fold_build2 (MULT_EXPR, type, elt,
2921 build_int_cst_type (type, scale));
2923 if ((scale | (mask >> 1)) == mask)
2925 /* Scale is negative. */
2927 scale = (-scale) & mask;
2932 elt = fold_build2 (MULT_EXPR, type, elt,
2933 build_int_cst_type (type, scale));
2934 return fold_build2 (code, type, expr, elt);
2937 /* Makes tree from the affine combination COMB. */
2940 aff_combination_to_tree (struct affine_tree_combination *comb)
2942 tree type = comb->type;
2943 tree expr = comb->rest;
2945 unsigned HOST_WIDE_INT off, sgn;
2947 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2949 for (i = 0; i < comb->n; i++)
2950 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2953 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2955 /* Offset is negative. */
2956 off = (-comb->offset) & comb->mask;
2964 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2968 /* Folds X + RATIO * Y in TYPE. */
2971 fold_affine_sum (tree type, tree x, tree y, HOST_WIDE_INT ratio)
2973 enum tree_code code;
2975 struct affine_tree_combination cx, cy;
2977 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2980 return fold_build2 (PLUS_EXPR, type, x, y);
2982 return fold_build2 (MINUS_EXPR, type, x, y);
2992 cst = build_int_cst_type (type, ratio);
2993 y = fold_build2 (MULT_EXPR, type, y, cst);
2994 return fold_build2 (code, type, x, y);
2997 tree_to_aff_combination (x, type, &cx);
2998 tree_to_aff_combination (y, type, &cy);
2999 aff_combination_scale (&cy, ratio);
3000 aff_combination_add (&cx, &cy);
3002 return aff_combination_to_tree (&cx);
3005 /* Determines the expression by that USE is expressed from induction variable
3006 CAND at statement AT in LOOP. */
3009 get_computation_at (struct loop *loop,
3010 struct iv_use *use, struct iv_cand *cand, tree at)
3012 tree ubase = use->iv->base;
3013 tree ustep = use->iv->step;
3014 tree cbase = cand->iv->base;
3015 tree cstep = cand->iv->step;
3016 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3020 unsigned HOST_WIDE_INT ustepi, cstepi;
3021 HOST_WIDE_INT ratioi;
3023 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3025 /* We do not have a precision to express the values of use. */
3029 expr = var_at_stmt (loop, cand, at);
3031 if (TREE_TYPE (expr) != ctype)
3033 /* This may happen with the original ivs. */
3034 expr = fold_convert (ctype, expr);
3037 if (TYPE_UNSIGNED (utype))
3041 uutype = unsigned_type_for (utype);
3042 ubase = fold_convert (uutype, ubase);
3043 ustep = fold_convert (uutype, ustep);
3046 if (uutype != ctype)
3048 expr = fold_convert (uutype, expr);
3049 cbase = fold_convert (uutype, cbase);
3050 cstep = fold_convert (uutype, cstep);
3053 if (cst_and_fits_in_hwi (cstep)
3054 && cst_and_fits_in_hwi (ustep))
3056 ustepi = int_cst_value (ustep);
3057 cstepi = int_cst_value (cstep);
3059 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3061 /* TODO maybe consider case when ustep divides cstep and the ratio is
3062 a power of 2 (so that the division is fast to execute)? We would
3063 need to be much more careful with overflows etc. then. */
3067 ratio = build_int_cst_type (uutype, ratioi);
3071 ratio = constant_multiple_of (uutype, ustep, cstep);
3075 /* Ratioi is only used to detect special cases when the multiplicative
3076 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3077 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3078 to integer_onep/integer_all_onesp, since the former ignores
3080 if (cst_and_fits_in_hwi (ratio))
3081 ratioi = int_cst_value (ratio);
3082 else if (integer_onep (ratio))
3084 else if (integer_all_onesp (ratio))
3090 /* We may need to shift the value if we are after the increment. */
3091 if (stmt_after_increment (loop, cand, at))
3092 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
3094 /* use = ubase - ratio * cbase + ratio * var.
3096 In general case ubase + ratio * (var - cbase) could be better (one less
3097 multiplication), but often it is possible to eliminate redundant parts
3098 of computations from (ubase - ratio * cbase) term, and if it does not
3099 happen, fold is able to apply the distributive law to obtain this form
3104 delta = fold_affine_sum (uutype, ubase, cbase, -1);
3105 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3107 else if (ratioi == -1)
3109 delta = fold_affine_sum (uutype, ubase, cbase, 1);
3110 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3115 delta = fold_affine_sum (uutype, ubase, cbase, -ratioi);
3118 delta = fold_build2 (MULT_EXPR, uutype, ratio, cbase);
3119 delta = fold_affine_sum (uutype, ubase, delta, -1);
3121 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3122 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3125 return fold_convert (utype, expr);
3128 /* Determines the expression by that USE is expressed from induction variable
3132 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3134 return get_computation_at (loop, use, cand, use->stmt);
3137 /* Returns cost of addition in MODE. */
3140 add_cost (enum machine_mode mode)
3142 static unsigned costs[NUM_MACHINE_MODES];
3150 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3151 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
3152 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
3157 cost = seq_cost (seq);
3163 if (dump_file && (dump_flags & TDF_DETAILS))
3164 fprintf (dump_file, "Addition in %s costs %d\n",
3165 GET_MODE_NAME (mode), cost);
3169 /* Entry in a hashtable of already known costs for multiplication. */
3172 HOST_WIDE_INT cst; /* The constant to multiply by. */
3173 enum machine_mode mode; /* In mode. */
3174 unsigned cost; /* The cost. */
3177 /* Counts hash value for the ENTRY. */
3180 mbc_entry_hash (const void *entry)
3182 const struct mbc_entry *e = entry;
3184 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3187 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3190 mbc_entry_eq (const void *entry1, const void *entry2)
3192 const struct mbc_entry *e1 = entry1;
3193 const struct mbc_entry *e2 = entry2;
3195 return (e1->mode == e2->mode
3196 && e1->cst == e2->cst);
3199 /* Returns cost of multiplication by constant CST in MODE. */
3202 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3204 static htab_t costs;
3205 struct mbc_entry **cached, act;
3210 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3214 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3216 return (*cached)->cost;
3218 *cached = xmalloc (sizeof (struct mbc_entry));
3219 (*cached)->mode = mode;
3220 (*cached)->cst = cst;
3223 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
3228 cost = seq_cost (seq);
3230 if (dump_file && (dump_flags & TDF_DETAILS))
3231 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3232 (int) cst, GET_MODE_NAME (mode), cost);
3234 (*cached)->cost = cost;
3239 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3240 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3241 variable is omitted. The created memory accesses MODE.
3243 TODO -- there must be some better way. This all is quite crude. */
3246 get_address_cost (bool symbol_present, bool var_present,
3247 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3249 #define MAX_RATIO 128
3250 static sbitmap valid_mult;
3251 static HOST_WIDE_INT rat, off;
3252 static HOST_WIDE_INT min_offset, max_offset;
3253 static unsigned costs[2][2][2][2];
3254 unsigned cost, acost;
3255 rtx seq, addr, base;
3256 bool offset_p, ratio_p;
3258 HOST_WIDE_INT s_offset;
3259 unsigned HOST_WIDE_INT mask;
3266 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3268 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3269 for (i = 1; i <= 1 << 20; i <<= 1)
3271 XEXP (addr, 1) = GEN_INT (i);
3272 if (!memory_address_p (Pmode, addr))
3275 max_offset = i >> 1;
3278 for (i = 1; i <= 1 << 20; i <<= 1)
3280 XEXP (addr, 1) = GEN_INT (-i);
3281 if (!memory_address_p (Pmode, addr))
3284 min_offset = -(i >> 1);
3286 if (dump_file && (dump_flags & TDF_DETAILS))
3288 fprintf (dump_file, "get_address_cost:\n");
3289 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3290 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3293 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3294 sbitmap_zero (valid_mult);
3296 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3297 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3299 XEXP (addr, 1) = GEN_INT (i);
3300 if (memory_address_p (Pmode, addr))
3302 SET_BIT (valid_mult, i + MAX_RATIO);
3307 if (dump_file && (dump_flags & TDF_DETAILS))
3309 fprintf (dump_file, " allowed multipliers:");
3310 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3311 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3312 fprintf (dump_file, " %d", (int) i);
3313 fprintf (dump_file, "\n");
3314 fprintf (dump_file, "\n");
3318 bits = GET_MODE_BITSIZE (Pmode);
3319 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3321 if ((offset >> (bits - 1) & 1))
3326 offset_p = (s_offset != 0
3327 && min_offset <= s_offset && s_offset <= max_offset);
3328 ratio_p = (ratio != 1
3329 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
3330 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
3332 if (ratio != 1 && !ratio_p)
3333 cost += multiply_by_cost (ratio, Pmode);
3335 if (s_offset && !offset_p && !symbol_present)
3337 cost += add_cost (Pmode);
3341 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3346 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3347 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
3349 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
3352 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3356 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3358 base = gen_rtx_fmt_e (CONST, Pmode,
3359 gen_rtx_fmt_ee (PLUS, Pmode,
3364 base = GEN_INT (off);
3369 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3372 addr = memory_address (Pmode, addr);
3376 acost = seq_cost (seq);
3377 acost += address_cost (addr, Pmode);
3381 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3384 return cost + acost;
3386 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3387 invariants the computation depends on. */
3390 force_var_cost (struct ivopts_data *data,
3391 tree expr, bitmap *depends_on)
3393 static bool costs_initialized = false;
3394 static unsigned integer_cost;
3395 static unsigned symbol_cost;
3396 static unsigned address_cost;
3398 unsigned cost0, cost1, cost;
3399 enum machine_mode mode;
3401 if (!costs_initialized)
3403 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3404 rtx x = gen_rtx_MEM (DECL_MODE (var),
3405 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3407 tree type = build_pointer_type (integer_type_node);
3409 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3412 SET_DECL_RTL (var, x);
3413 TREE_STATIC (var) = 1;
3414 addr = build1 (ADDR_EXPR, type, var);
3415 symbol_cost = computation_cost (addr) + 1;
3418 = computation_cost (build2 (PLUS_EXPR, type,
3420 build_int_cst_type (type, 2000))) + 1;
3421 if (dump_file && (dump_flags & TDF_DETAILS))
3423 fprintf (dump_file, "force_var_cost:\n");
3424 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3425 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3426 fprintf (dump_file, " address %d\n", (int) address_cost);
3427 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3428 fprintf (dump_file, "\n");
3431 costs_initialized = true;
3438 fd_ivopts_data = data;
3439 walk_tree (&expr, find_depends, depends_on, NULL);
3442 if (SSA_VAR_P (expr))
3445 if (TREE_INVARIANT (expr))
3447 if (TREE_CODE (expr) == INTEGER_CST)
3448 return integer_cost;
3450 if (TREE_CODE (expr) == ADDR_EXPR)
3452 tree obj = TREE_OPERAND (expr, 0);
3454 if (TREE_CODE (obj) == VAR_DECL
3455 || TREE_CODE (obj) == PARM_DECL
3456 || TREE_CODE (obj) == RESULT_DECL)
3460 return address_cost;
3463 switch (TREE_CODE (expr))
3468 op0 = TREE_OPERAND (expr, 0);
3469 op1 = TREE_OPERAND (expr, 1);
3473 if (is_gimple_val (op0))
3476 cost0 = force_var_cost (data, op0, NULL);
3478 if (is_gimple_val (op1))
3481 cost1 = force_var_cost (data, op1, NULL);
3486 /* Just an arbitrary value, FIXME. */
3487 return target_spill_cost;
3490 mode = TYPE_MODE (TREE_TYPE (expr));
3491 switch (TREE_CODE (expr))
3495 cost = add_cost (mode);
3499 if (cst_and_fits_in_hwi (op0))
3500 cost = multiply_by_cost (int_cst_value (op0), mode);
3501 else if (cst_and_fits_in_hwi (op1))
3502 cost = multiply_by_cost (int_cst_value (op1), mode);
3504 return target_spill_cost;
3514 /* Bound the cost by target_spill_cost. The parts of complicated
3515 computations often are either loop invariant or at least can
3516 be shared between several iv uses, so letting this grow without
3517 limits would not give reasonable results. */
3518 return cost < target_spill_cost ? cost : target_spill_cost;
3521 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3522 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3523 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3524 invariants the computation depends on. */
3527 split_address_cost (struct ivopts_data *data,
3528 tree addr, bool *symbol_present, bool *var_present,
3529 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3532 HOST_WIDE_INT bitsize;
3533 HOST_WIDE_INT bitpos;
3535 enum machine_mode mode;
3536 int unsignedp, volatilep;
3538 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3539 &unsignedp, &volatilep, false);
3542 || bitpos % BITS_PER_UNIT != 0
3543 || TREE_CODE (core) != VAR_DECL)
3545 *symbol_present = false;
3546 *var_present = true;
3547 fd_ivopts_data = data;
3548 walk_tree (&addr, find_depends, depends_on, NULL);
3549 return target_spill_cost;
3552 *offset += bitpos / BITS_PER_UNIT;
3553 if (TREE_STATIC (core)
3554 || DECL_EXTERNAL (core))
3556 *symbol_present = true;
3557 *var_present = false;
3561 *symbol_present = false;
3562 *var_present = true;
3566 /* Estimates cost of expressing difference of addresses E1 - E2 as
3567 var + symbol + offset. The value of offset is added to OFFSET,
3568 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3569 part is missing. DEPENDS_ON is a set of the invariants the computation
3573 ptr_difference_cost (struct ivopts_data *data,
3574 tree e1, tree e2, bool *symbol_present, bool *var_present,
3575 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3577 HOST_WIDE_INT diff = 0;
3580 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3582 if (ptr_difference_const (e1, e2, &diff))
3585 *symbol_present = false;
3586 *var_present = false;
3590 if (e2 == integer_zero_node)
3591 return split_address_cost (data, TREE_OPERAND (e1, 0),
3592 symbol_present, var_present, offset, depends_on);
3594 *symbol_present = false;
3595 *var_present = true;
3597 cost = force_var_cost (data, e1, depends_on);
3598 cost += force_var_cost (data, e2, depends_on);
3599 cost += add_cost (Pmode);
3604 /* Estimates cost of expressing difference E1 - E2 as
3605 var + symbol + offset. The value of offset is added to OFFSET,
3606 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3607 part is missing. DEPENDS_ON is a set of the invariants the computation
3611 difference_cost (struct ivopts_data *data,
3612 tree e1, tree e2, bool *symbol_present, bool *var_present,
3613 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3616 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3617 unsigned HOST_WIDE_INT off1, off2;
3619 e1 = strip_offset (e1, &off1);
3620 e2 = strip_offset (e2, &off2);
3621 *offset += off1 - off2;
3626 if (TREE_CODE (e1) == ADDR_EXPR)
3627 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3629 *symbol_present = false;
3631 if (operand_equal_p (e1, e2, 0))
3633 *var_present = false;
3636 *var_present = true;
3638 return force_var_cost (data, e1, depends_on);
3642 cost = force_var_cost (data, e2, depends_on);
3643 cost += multiply_by_cost (-1, mode);
3648 cost = force_var_cost (data, e1, depends_on);
3649 cost += force_var_cost (data, e2, depends_on);
3650 cost += add_cost (mode);
3655 /* Determines the cost of the computation by that USE is expressed
3656 from induction variable CAND. If ADDRESS_P is true, we just need
3657 to create an address from it, otherwise we want to get it into
3658 register. A set of invariants we depend on is stored in
3659 DEPENDS_ON. AT is the statement at that the value is computed. */
3662 get_computation_cost_at (struct ivopts_data *data,
3663 struct iv_use *use, struct iv_cand *cand,
3664 bool address_p, bitmap *depends_on, tree at)
3666 tree ubase = use->iv->base, ustep = use->iv->step;
3668 tree utype = TREE_TYPE (ubase), ctype;
3669 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3670 HOST_WIDE_INT ratio, aratio;
3671 bool var_present, symbol_present;
3672 unsigned cost = 0, n_sums;
3676 /* Only consider real candidates. */
3680 cbase = cand->iv->base;
3681 cstep = cand->iv->step;
3682 ctype = TREE_TYPE (cbase);
3684 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3686 /* We do not have a precision to express the values of use. */
3692 /* Do not try to express address of an object with computation based
3693 on address of a different object. This may cause problems in rtl
3694 level alias analysis (that does not expect this to be happening,
3695 as this is illegal in C), and would be unlikely to be useful
3697 if (use->iv->base_object
3698 && cand->iv->base_object
3699 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3703 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3705 /* TODO -- add direct handling of this case. */
3709 /* CSTEPI is removed from the offset in case statement is after the
3710 increment. If the step is not constant, we use zero instead.
3711 This is a bit imprecise (there is the extra addition), but
3712 redundancy elimination is likely to transform the code so that
3713 it uses value of the variable before increment anyway,
3714 so it is not that much unrealistic. */
3715 if (cst_and_fits_in_hwi (cstep))
3716 cstepi = int_cst_value (cstep);
3720 if (cst_and_fits_in_hwi (ustep)
3721 && cst_and_fits_in_hwi (cstep))
3723 ustepi = int_cst_value (ustep);
3725 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3732 rat = constant_multiple_of (utype, ustep, cstep);
3737 if (cst_and_fits_in_hwi (rat))
3738 ratio = int_cst_value (rat);
3739 else if (integer_onep (rat))
3741 else if (integer_all_onesp (rat))
3747 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3748 or ratio == 1, it is better to handle this like
3750 ubase - ratio * cbase + ratio * var
3752 (also holds in the case ratio == -1, TODO. */
3754 if (cst_and_fits_in_hwi (cbase))
3756 offset = - ratio * int_cst_value (cbase);
3757 cost += difference_cost (data,
3758 ubase, integer_zero_node,
3759 &symbol_present, &var_present, &offset,
3762 else if (ratio == 1)
3764 cost += difference_cost (data,
3766 &symbol_present, &var_present, &offset,
3771 cost += force_var_cost (data, cbase, depends_on);
3772 cost += add_cost (TYPE_MODE (ctype));
3773 cost += difference_cost (data,
3774 ubase, integer_zero_node,
3775 &symbol_present, &var_present, &offset,
3779 /* If we are after the increment, the value of the candidate is higher by
3781 if (stmt_after_increment (data->current_loop, cand, at))
3782 offset -= ratio * cstepi;
3784 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3785 (symbol/var/const parts may be omitted). If we are looking for an address,
3786 find the cost of addressing this. */
3788 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3790 /* Otherwise estimate the costs for computing the expression. */
3791 aratio = ratio > 0 ? ratio : -ratio;
3792 if (!symbol_present && !var_present && !offset)
3795 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3801 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3805 /* Symbol + offset should be compile-time computable. */
3806 && (symbol_present || offset))
3809 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3813 /* Just get the expression, expand it and measure the cost. */
3814 tree comp = get_computation_at (data->current_loop, use, cand, at);
3820 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3822 return computation_cost (comp);
3826 /* Determines the cost of the computation by that USE is expressed
3827 from induction variable CAND. If ADDRESS_P is true, we just need
3828 to create an address from it, otherwise we want to get it into
3829 register. A set of invariants we depend on is stored in
3833 get_computation_cost (struct ivopts_data *data,
3834 struct iv_use *use, struct iv_cand *cand,
3835 bool address_p, bitmap *depends_on)
3837 return get_computation_cost_at (data,
3838 use, cand, address_p, depends_on, use->stmt);
3841 /* Determines cost of basing replacement of USE on CAND in a generic
3845 determine_use_iv_cost_generic (struct ivopts_data *data,
3846 struct iv_use *use, struct iv_cand *cand)
3851 /* The simple case first -- if we need to express value of the preserved
3852 original biv, the cost is 0. This also prevents us from counting the
3853 cost of increment twice -- once at this use and once in the cost of
3855 if (cand->pos == IP_ORIGINAL
3856 && cand->incremented_at == use->stmt)
3858 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3862 cost = get_computation_cost (data, use, cand, false, &depends_on);
3863 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3865 return cost != INFTY;
3868 /* Determines cost of basing replacement of USE on CAND in an address. */
3871 determine_use_iv_cost_address (struct ivopts_data *data,
3872 struct iv_use *use, struct iv_cand *cand)
3875 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3877 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3879 return cost != INFTY;
3882 /* Computes value of induction variable IV in iteration NITER. */
3885 iv_value (struct iv *iv, tree niter)
3888 tree type = TREE_TYPE (iv->base);
3890 niter = fold_convert (type, niter);
3891 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3893 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3896 /* Computes value of candidate CAND at position AT in iteration NITER. */
3899 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3901 tree val = iv_value (cand->iv, niter);
3902 tree type = TREE_TYPE (cand->iv->base);
3904 if (stmt_after_increment (loop, cand, at))
3905 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3910 /* Returns period of induction variable iv. */
3913 iv_period (struct iv *iv)
3915 tree step = iv->step, period, type;
3918 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3920 /* Period of the iv is gcd (step, type range). Since type range is power
3921 of two, it suffices to determine the maximum power of two that divides
3923 pow2div = num_ending_zeros (step);
3924 type = unsigned_type_for (TREE_TYPE (step));
3926 period = build_low_bits_mask (type,
3927 (TYPE_PRECISION (type)
3928 - tree_low_cst (pow2div, 1)));
3933 /* Returns the comparison operator used when eliminating the iv USE. */
3935 static enum tree_code
3936 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3938 struct loop *loop = data->current_loop;
3942 ex_bb = bb_for_stmt (use->stmt);
3943 exit = EDGE_SUCC (ex_bb, 0);
3944 if (flow_bb_inside_loop_p (loop, exit->dest))
3945 exit = EDGE_SUCC (ex_bb, 1);
3947 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3950 /* Check whether it is possible to express the condition in USE by comparison
3951 of candidate CAND. If so, store the value compared with to BOUND. */
3954 may_eliminate_iv (struct ivopts_data *data,
3955 struct iv_use *use, struct iv_cand *cand, tree *bound)
3959 struct tree_niter_desc *niter;
3961 tree wider_type, period, per_type;
3962 struct loop *loop = data->current_loop;
3964 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3967 /* For now works only for exits that dominate the loop latch. TODO -- extend
3968 for other conditions inside loop body. */
3969 ex_bb = bb_for_stmt (use->stmt);
3970 if (use->stmt != last_stmt (ex_bb)
3971 || TREE_CODE (use->stmt) != COND_EXPR)
3973 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3976 exit = EDGE_SUCC (ex_bb, 0);
3977 if (flow_bb_inside_loop_p (loop, exit->dest))
3978 exit = EDGE_SUCC (ex_bb, 1);
3979 if (flow_bb_inside_loop_p (loop, exit->dest))
3982 niter = niter_for_exit (data, exit);
3984 || !zero_p (niter->may_be_zero))
3988 nit_type = TREE_TYPE (nit);
3990 /* Determine whether we may use the variable to test whether niter iterations
3991 elapsed. This is the case iff the period of the induction variable is
3992 greater than the number of iterations. */
3993 period = iv_period (cand->iv);
3996 per_type = TREE_TYPE (period);
3998 wider_type = TREE_TYPE (period);
3999 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4000 wider_type = per_type;
4002 wider_type = nit_type;
4004 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
4005 fold_convert (wider_type, period),
4006 fold_convert (wider_type, nit)))))
4009 *bound = cand_value_at (loop, cand, use->stmt, nit);
4013 /* Determines cost of basing replacement of USE on CAND in a condition. */
4016 determine_use_iv_cost_condition (struct ivopts_data *data,
4017 struct iv_use *use, struct iv_cand *cand)
4019 tree bound = NULL_TREE, op, cond;
4020 bitmap depends_on = NULL;
4023 /* Only consider real candidates. */
4026 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4030 if (may_eliminate_iv (data, use, cand, &bound))
4032 cost = force_var_cost (data, bound, &depends_on);
4034 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4035 return cost != INFTY;
4038 /* The induction variable elimination failed; just express the original
4039 giv. If it is compared with an invariant, note that we cannot get
4041 cost = get_computation_cost (data, use, cand, false, &depends_on);
4044 if (TREE_CODE (cond) != SSA_NAME)
4046 op = TREE_OPERAND (cond, 0);
4047 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4048 op = TREE_OPERAND (cond, 1);
4049 if (TREE_CODE (op) == SSA_NAME)
4051 op = get_iv (data, op)->base;
4052 fd_ivopts_data = data;
4053 walk_tree (&op, find_depends, &depends_on, NULL);
4057 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4058 return cost != INFTY;
4061 /* Checks whether it is possible to replace the final value of USE by
4062 a direct computation. If so, the formula is stored to *VALUE. */
4065 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4068 struct loop *loop = data->current_loop;
4070 struct tree_niter_desc *niter;
4072 exit = single_dom_exit (loop);
4076 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4077 bb_for_stmt (use->stmt)));
4079 niter = niter_for_single_dom_exit (data);
4081 || !zero_p (niter->may_be_zero))
4084 *value = iv_value (use->iv, niter->niter);
4089 /* Determines cost of replacing final value of USE using CAND. */
4092 determine_use_iv_cost_outer (struct ivopts_data *data,
4093 struct iv_use *use, struct iv_cand *cand)
4098 tree value = NULL_TREE;
4099 struct loop *loop = data->current_loop;
4101 /* The simple case first -- if we need to express value of the preserved
4102 original biv, the cost is 0. This also prevents us from counting the
4103 cost of increment twice -- once at this use and once in the cost of
4105 if (cand->pos == IP_ORIGINAL
4106 && cand->incremented_at == use->stmt)
4108 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4114 if (!may_replace_final_value (data, use, &value))
4116 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4121 cost = force_var_cost (data, value, &depends_on);
4123 cost /= AVG_LOOP_NITER (loop);
4125 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4126 return cost != INFTY;
4129 exit = single_dom_exit (loop);
4132 /* If there is just a single exit, we may use value of the candidate
4133 after we take it to determine the value of use. */
4134 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4135 last_stmt (exit->src));
4137 cost /= AVG_LOOP_NITER (loop);
4141 /* Otherwise we just need to compute the iv. */
4142 cost = get_computation_cost (data, use, cand, false, &depends_on);
4145 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4147 return cost != INFTY;
4150 /* Determines cost of basing replacement of USE on CAND. Returns false
4151 if USE cannot be based on CAND. */
4154 determine_use_iv_cost (struct ivopts_data *data,
4155 struct iv_use *use, struct iv_cand *cand)
4159 case USE_NONLINEAR_EXPR:
4160 return determine_use_iv_cost_generic (data, use, cand);
4163 return determine_use_iv_cost_outer (data, use, cand);
4166 return determine_use_iv_cost_address (data, use, cand);
4169 return determine_use_iv_cost_condition (data, use, cand);
4176 /* Determines costs of basing the use of the iv on an iv candidate. */
4179 determine_use_iv_costs (struct ivopts_data *data)
4183 struct iv_cand *cand;
4184 bitmap to_clear = BITMAP_ALLOC (NULL);
4186 alloc_use_cost_map (data);
4188 for (i = 0; i < n_iv_uses (data); i++)
4190 use = iv_use (data, i);
4192 if (data->consider_all_candidates)
4194 for (j = 0; j < n_iv_cands (data); j++)
4196 cand = iv_cand (data, j);
4197 determine_use_iv_cost (data, use, cand);
4204 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4206 cand = iv_cand (data, j);
4207 if (!determine_use_iv_cost (data, use, cand))
4208 bitmap_set_bit (to_clear, j);
4211 /* Remove the candidates for that the cost is infinite from
4212 the list of related candidates. */
4213 bitmap_and_compl_into (use->related_cands, to_clear);
4214 bitmap_clear (to_clear);
4218 BITMAP_FREE (to_clear);
4220 if (dump_file && (dump_flags & TDF_DETAILS))
4222 fprintf (dump_file, "Use-candidate costs:\n");
4224 for (i = 0; i < n_iv_uses (data); i++)
4226 use = iv_use (data, i);
4228 fprintf (dump_file, "Use %d:\n", i);
4229 fprintf (dump_file, " cand\tcost\tdepends on\n");
4230 for (j = 0; j < use->n_map_members; j++)
4232 if (!use->cost_map[j].cand
4233 || use->cost_map[j].cost == INFTY)
4236 fprintf (dump_file, " %d\t%d\t",
4237 use->cost_map[j].cand->id,
4238 use->cost_map[j].cost);
4239 if (use->cost_map[j].depends_on)
4240 bitmap_print (dump_file,
4241 use->cost_map[j].depends_on, "","");
4242 fprintf (dump_file, "\n");
4245 fprintf (dump_file, "\n");
4247 fprintf (dump_file, "\n");
4251 /* Determines cost of the candidate CAND. */
4254 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4256 unsigned cost_base, cost_step;
4265 /* There are two costs associated with the candidate -- its increment
4266 and its initialization. The second is almost negligible for any loop
4267 that rolls enough, so we take it just very little into account. */
4269 base = cand->iv->base;
4270 cost_base = force_var_cost (data, base, NULL);
4271 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4273 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4275 /* Prefer the original iv unless we may gain something by replacing it;
4276 this is not really relevant for artificial ivs created by other
4278 if (cand->pos == IP_ORIGINAL
4279 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4282 /* Prefer not to insert statements into latch unless there are some
4283 already (so that we do not create unnecessary jumps). */
4284 if (cand->pos == IP_END
4285 && empty_block_p (ip_end_pos (data->current_loop)))
4289 /* Determines costs of computation of the candidates. */
4292 determine_iv_costs (struct ivopts_data *data)
4296 if (dump_file && (dump_flags & TDF_DETAILS))
4298 fprintf (dump_file, "Candidate costs:\n");
4299 fprintf (dump_file, " cand\tcost\n");
4302 for (i = 0; i < n_iv_cands (data); i++)
4304 struct iv_cand *cand = iv_cand (data, i);
4306 determine_iv_cost (data, cand);
4308 if (dump_file && (dump_flags & TDF_DETAILS))
4309 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4312 if (dump_file && (dump_flags & TDF_DETAILS))
4313 fprintf (dump_file, "\n");
4316 /* Calculates cost for having SIZE induction variables. */
4319 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4321 return global_cost_for_size (size,
4322 loop_data (data->current_loop)->regs_used,
4326 /* For each size of the induction variable set determine the penalty. */
4329 determine_set_costs (struct ivopts_data *data)
4333 struct loop *loop = data->current_loop;
4336 /* We use the following model (definitely improvable, especially the
4337 cost function -- TODO):
4339 We estimate the number of registers available (using MD data), name it A.
4341 We estimate the number of registers used by the loop, name it U. This
4342 number is obtained as the number of loop phi nodes (not counting virtual
4343 registers and bivs) + the number of variables from outside of the loop.
4345 We set a reserve R (free regs that are used for temporary computations,
4346 etc.). For now the reserve is a constant 3.
4348 Let I be the number of induction variables.
4350 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4351 make a lot of ivs without a reason).
4352 -- if A - R < U + I <= A, the cost is I * PRES_COST
4353 -- if U + I > A, the cost is I * PRES_COST and
4354 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4356 if (dump_file && (dump_flags & TDF_DETAILS))
4358 fprintf (dump_file, "Global costs:\n");
4359 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4360 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4361 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4362 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4366 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4368 op = PHI_RESULT (phi);
4370 if (!is_gimple_reg (op))
4373 if (get_iv (data, op))
4379 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4381 struct version_info *info = ver_info (data, j);
4383 if (info->inv_id && info->has_nonlin_use)
4387 loop_data (loop)->regs_used = n;
4388 if (dump_file && (dump_flags & TDF_DETAILS))
4389 fprintf (dump_file, " regs_used %d\n", n);
4391 if (dump_file && (dump_flags & TDF_DETAILS))
4393 fprintf (dump_file, " cost for size:\n");
4394 fprintf (dump_file, " ivs\tcost\n");
4395 for (j = 0; j <= 2 * target_avail_regs; j++)
4396 fprintf (dump_file, " %d\t%d\n", j,
4397 ivopts_global_cost_for_size (data, j));
4398 fprintf (dump_file, "\n");
4402 /* Returns true if A is a cheaper cost pair than B. */
4405 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4413 if (a->cost < b->cost)
4416 if (a->cost > b->cost)
4419 /* In case the costs are the same, prefer the cheaper candidate. */
4420 if (a->cand->cost < b->cand->cost)
4426 /* Computes the cost field of IVS structure. */
4429 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4433 cost += ivs->cand_use_cost;
4434 cost += ivs->cand_cost;
4435 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4440 /* Remove invariants in set INVS to set IVS. */
4443 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4451 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4453 ivs->n_invariant_uses[iid]--;
4454 if (ivs->n_invariant_uses[iid] == 0)
4459 /* Set USE not to be expressed by any candidate in IVS. */
4462 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4465 unsigned uid = use->id, cid;
4466 struct cost_pair *cp;
4468 cp = ivs->cand_for_use[uid];
4474 ivs->cand_for_use[uid] = NULL;
4475 ivs->n_cand_uses[cid]--;
4477 if (ivs->n_cand_uses[cid] == 0)
4479 bitmap_clear_bit (ivs->cands, cid);
4480 /* Do not count the pseudocandidates. */
4484 ivs->cand_cost -= cp->cand->cost;
4486 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4489 ivs->cand_use_cost -= cp->cost;
4491 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4492 iv_ca_recount_cost (data, ivs);
4495 /* Add invariants in set INVS to set IVS. */
4498 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4506 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4508 ivs->n_invariant_uses[iid]++;
4509 if (ivs->n_invariant_uses[iid] == 1)
4514 /* Set cost pair for USE in set IVS to CP. */
4517 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4518 struct iv_use *use, struct cost_pair *cp)
4520 unsigned uid = use->id, cid;
4522 if (ivs->cand_for_use[uid] == cp)
4525 if (ivs->cand_for_use[uid])
4526 iv_ca_set_no_cp (data, ivs, use);
4533 ivs->cand_for_use[uid] = cp;
4534 ivs->n_cand_uses[cid]++;
4535 if (ivs->n_cand_uses[cid] == 1)
4537 bitmap_set_bit (ivs->cands, cid);
4538 /* Do not count the pseudocandidates. */
4542 ivs->cand_cost += cp->cand->cost;
4544 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4547 ivs->cand_use_cost += cp->cost;
4548 iv_ca_set_add_invariants (ivs, cp->depends_on);
4549 iv_ca_recount_cost (data, ivs);
4553 /* Extend set IVS by expressing USE by some of the candidates in it
4557 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4560 struct cost_pair *best_cp = NULL, *cp;
4564 gcc_assert (ivs->upto >= use->id);
4566 if (ivs->upto == use->id)
4572 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4574 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4576 if (cheaper_cost_pair (cp, best_cp))
4580 iv_ca_set_cp (data, ivs, use, best_cp);
4583 /* Get cost for assignment IVS. */
4586 iv_ca_cost (struct iv_ca *ivs)
4588 return (ivs->bad_uses ? INFTY : ivs->cost);
4591 /* Returns true if all dependences of CP are among invariants in IVS. */
4594 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4599 if (!cp->depends_on)
4602 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4604 if (ivs->n_invariant_uses[i] == 0)
4611 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4612 it before NEXT_CHANGE. */
4614 static struct iv_ca_delta *
4615 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4616 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4618 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4621 change->old_cp = old_cp;
4622 change->new_cp = new_cp;
4623 change->next_change = next_change;
4628 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4631 static struct iv_ca_delta *
4632 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4634 struct iv_ca_delta *last;
4642 for (last = l1; last->next_change; last = last->next_change)
4644 last->next_change = l2;
4649 /* Returns candidate by that USE is expressed in IVS. */
4651 static struct cost_pair *
4652 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4654 return ivs->cand_for_use[use->id];
4657 /* Reverse the list of changes DELTA, forming the inverse to it. */
4659 static struct iv_ca_delta *
4660 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4662 struct iv_ca_delta *act, *next, *prev = NULL;
4663 struct cost_pair *tmp;
4665 for (act = delta; act; act = next)
4667 next = act->next_change;
4668 act->next_change = prev;
4672 act->old_cp = act->new_cp;
4679 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4680 reverted instead. */
4683 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4684 struct iv_ca_delta *delta, bool forward)
4686 struct cost_pair *from, *to;
4687 struct iv_ca_delta *act;
4690 delta = iv_ca_delta_reverse (delta);
4692 for (act = delta; act; act = act->next_change)
4696 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4697 iv_ca_set_cp (data, ivs, act->use, to);
4701 iv_ca_delta_reverse (delta);
4704 /* Returns true if CAND is used in IVS. */
4707 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4709 return ivs->n_cand_uses[cand->id] > 0;
4712 /* Returns number of induction variable candidates in the set IVS. */
4715 iv_ca_n_cands (struct iv_ca *ivs)
4717 return ivs->n_cands;
4720 /* Free the list of changes DELTA. */
4723 iv_ca_delta_free (struct iv_ca_delta **delta)
4725 struct iv_ca_delta *act, *next;
4727 for (act = *delta; act; act = next)
4729 next = act->next_change;
4736 /* Allocates new iv candidates assignment. */
4738 static struct iv_ca *
4739 iv_ca_new (struct ivopts_data *data)
4741 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4745 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4746 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4747 nw->cands = BITMAP_ALLOC (NULL);
4750 nw->cand_use_cost = 0;
4752 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4758 /* Free memory occupied by the set IVS. */
4761 iv_ca_free (struct iv_ca **ivs)
4763 free ((*ivs)->cand_for_use);
4764 free ((*ivs)->n_cand_uses);
4765 BITMAP_FREE ((*ivs)->cands);
4766 free ((*ivs)->n_invariant_uses);
4771 /* Dumps IVS to FILE. */
4774 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4776 const char *pref = " invariants ";
4779 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4780 bitmap_print (file, ivs->cands, " candidates ","\n");
4782 for (i = 1; i <= data->max_inv_id; i++)
4783 if (ivs->n_invariant_uses[i])
4785 fprintf (file, "%s%d", pref, i);
4788 fprintf (file, "\n");
4791 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4792 new set, and store differences in DELTA. Number of induction variables
4793 in the new set is stored to N_IVS. */
4796 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4797 struct iv_cand *cand, struct iv_ca_delta **delta,
4802 struct cost_pair *old_cp, *new_cp;
4805 for (i = 0; i < ivs->upto; i++)
4807 use = iv_use (data, i);
4808 old_cp = iv_ca_cand_for_use (ivs, use);
4811 && old_cp->cand == cand)
4814 new_cp = get_use_iv_cost (data, use, cand);
4818 if (!iv_ca_has_deps (ivs, new_cp))
4821 if (!cheaper_cost_pair (new_cp, old_cp))
4824 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4827 iv_ca_delta_commit (data, ivs, *delta, true);
4828 cost = iv_ca_cost (ivs);
4830 *n_ivs = iv_ca_n_cands (ivs);
4831 iv_ca_delta_commit (data, ivs, *delta, false);
4836 /* Try narrowing set IVS by removing CAND. Return the cost of
4837 the new set and store the differences in DELTA. */
4840 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4841 struct iv_cand *cand, struct iv_ca_delta **delta)
4845 struct cost_pair *old_cp, *new_cp, *cp;
4847 struct iv_cand *cnd;
4851 for (i = 0; i < n_iv_uses (data); i++)
4853 use = iv_use (data, i);
4855 old_cp = iv_ca_cand_for_use (ivs, use);
4856 if (old_cp->cand != cand)
4861 if (data->consider_all_candidates)
4863 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4868 cnd = iv_cand (data, ci);
4870 cp = get_use_iv_cost (data, use, cnd);
4873 if (!iv_ca_has_deps (ivs, cp))
4876 if (!cheaper_cost_pair (cp, new_cp))
4884 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4889 cnd = iv_cand (data, ci);
4891 cp = get_use_iv_cost (data, use, cnd);
4894 if (!iv_ca_has_deps (ivs, cp))
4897 if (!cheaper_cost_pair (cp, new_cp))
4906 iv_ca_delta_free (delta);
4910 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4913 iv_ca_delta_commit (data, ivs, *delta, true);
4914 cost = iv_ca_cost (ivs);
4915 iv_ca_delta_commit (data, ivs, *delta, false);
4920 /* Try optimizing the set of candidates IVS by removing candidates different
4921 from to EXCEPT_CAND from it. Return cost of the new set, and store
4922 differences in DELTA. */
4925 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4926 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4929 struct iv_ca_delta *act_delta, *best_delta;
4930 unsigned i, best_cost, acost;
4931 struct iv_cand *cand;
4934 best_cost = iv_ca_cost (ivs);
4936 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4938 cand = iv_cand (data, i);
4940 if (cand == except_cand)
4943 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4945 if (acost < best_cost)
4948 iv_ca_delta_free (&best_delta);
4949 best_delta = act_delta;
4952 iv_ca_delta_free (&act_delta);
4961 /* Recurse to possibly remove other unnecessary ivs. */
4962 iv_ca_delta_commit (data, ivs, best_delta, true);
4963 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4964 iv_ca_delta_commit (data, ivs, best_delta, false);
4965 *delta = iv_ca_delta_join (best_delta, *delta);
4969 /* Tries to extend the sets IVS in the best possible way in order
4970 to express the USE. */
4973 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4976 unsigned best_cost, act_cost;
4979 struct iv_cand *cand;
4980 struct iv_ca_delta *best_delta = NULL, *act_delta;
4981 struct cost_pair *cp;
4983 iv_ca_add_use (data, ivs, use);
4984 best_cost = iv_ca_cost (ivs);
4986 cp = iv_ca_cand_for_use (ivs, use);
4989 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4990 iv_ca_set_no_cp (data, ivs, use);
4993 /* First try important candidates. Only if it fails, try the specific ones.
4994 Rationale -- in loops with many variables the best choice often is to use
4995 just one generic biv. If we added here many ivs specific to the uses,
4996 the optimization algorithm later would be likely to get stuck in a local
4997 minimum, thus causing us to create too many ivs. The approach from
4998 few ivs to more seems more likely to be successful -- starting from few
4999 ivs, replacing an expensive use by a specific iv should always be a
5001 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5003 cand = iv_cand (data, i);
5005 if (iv_ca_cand_used_p (ivs, cand))
5008 cp = get_use_iv_cost (data, use, cand);
5012 iv_ca_set_cp (data, ivs, use, cp);
5013 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5014 iv_ca_set_no_cp (data, ivs, use);
5015 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5017 if (act_cost < best_cost)
5019 best_cost = act_cost;
5021 iv_ca_delta_free (&best_delta);
5022 best_delta = act_delta;
5025 iv_ca_delta_free (&act_delta);
5028 if (best_cost == INFTY)
5030 for (i = 0; i < use->n_map_members; i++)
5032 cp = use->cost_map + i;
5037 /* Already tried this. */
5038 if (cand->important)
5041 if (iv_ca_cand_used_p (ivs, cand))
5045 iv_ca_set_cp (data, ivs, use, cp);
5046 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5047 iv_ca_set_no_cp (data, ivs, use);
5048 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5051 if (act_cost < best_cost)
5053 best_cost = act_cost;
5056 iv_ca_delta_free (&best_delta);
5057 best_delta = act_delta;
5060 iv_ca_delta_free (&act_delta);
5064 iv_ca_delta_commit (data, ivs, best_delta, true);
5065 iv_ca_delta_free (&best_delta);
5067 return (best_cost != INFTY);
5070 /* Finds an initial assignment of candidates to uses. */
5072 static struct iv_ca *
5073 get_initial_solution (struct ivopts_data *data)
5075 struct iv_ca *ivs = iv_ca_new (data);
5078 for (i = 0; i < n_iv_uses (data); i++)
5079 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5088 /* Tries to improve set of induction variables IVS. */
5091 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5093 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5094 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5095 struct iv_cand *cand;
5097 /* Try extending the set of induction variables by one. */
5098 for (i = 0; i < n_iv_cands (data); i++)
5100 cand = iv_cand (data, i);
5102 if (iv_ca_cand_used_p (ivs, cand))
5105 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5109 /* If we successfully added the candidate and the set is small enough,
5110 try optimizing it by removing other candidates. */
5111 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5113 iv_ca_delta_commit (data, ivs, act_delta, true);
5114 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5115 iv_ca_delta_commit (data, ivs, act_delta, false);
5116 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5119 if (acost < best_cost)
5122 iv_ca_delta_free (&best_delta);
5123 best_delta = act_delta;
5126 iv_ca_delta_free (&act_delta);
5131 /* Try removing the candidates from the set instead. */
5132 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5134 /* Nothing more we can do. */
5139 iv_ca_delta_commit (data, ivs, best_delta, true);
5140 gcc_assert (best_cost == iv_ca_cost (ivs));
5141 iv_ca_delta_free (&best_delta);
5145 /* Attempts to find the optimal set of induction variables. We do simple
5146 greedy heuristic -- we try to replace at most one candidate in the selected
5147 solution and remove the unused ivs while this improves the cost. */
5149 static struct iv_ca *
5150 find_optimal_iv_set (struct ivopts_data *data)
5156 /* Get the initial solution. */
5157 set = get_initial_solution (data);
5160 if (dump_file && (dump_flags & TDF_DETAILS))
5161 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5165 if (dump_file && (dump_flags & TDF_DETAILS))
5167 fprintf (dump_file, "Initial set of candidates:\n");
5168 iv_ca_dump (data, dump_file, set);
5171 while (try_improve_iv_set (data, set))
5173 if (dump_file && (dump_flags & TDF_DETAILS))
5175 fprintf (dump_file, "Improved to:\n");
5176 iv_ca_dump (data, dump_file, set);
5180 if (dump_file && (dump_flags & TDF_DETAILS))
5181 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5183 for (i = 0; i < n_iv_uses (data); i++)
5185 use = iv_use (data, i);
5186 use->selected = iv_ca_cand_for_use (set, use)->cand;
5192 /* Creates a new induction variable corresponding to CAND. */
5195 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5197 block_stmt_iterator incr_pos;
5207 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5211 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5216 /* Mark that the iv is preserved. */
5217 name_info (data, cand->var_before)->preserve_biv = true;
5218 name_info (data, cand->var_after)->preserve_biv = true;
5220 /* Rewrite the increment so that it uses var_before directly. */
5221 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5226 gimple_add_tmp_var (cand->var_before);
5227 add_referenced_tmp_var (cand->var_before);
5229 base = unshare_expr (cand->iv->base);
5231 create_iv (base, unshare_expr (cand->iv->step),
5232 cand->var_before, data->current_loop,
5233 &incr_pos, after, &cand->var_before, &cand->var_after);
5236 /* Creates new induction variables described in SET. */
5239 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5242 struct iv_cand *cand;
5245 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5247 cand = iv_cand (data, i);
5248 create_new_iv (data, cand);
5252 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5253 is true, remove also the ssa name defined by the statement. */
5256 remove_statement (tree stmt, bool including_defined_name)
5258 if (TREE_CODE (stmt) == PHI_NODE)
5260 if (!including_defined_name)
5262 /* Prevent the ssa name defined by the statement from being removed. */
5263 SET_PHI_RESULT (stmt, NULL);
5265 remove_phi_node (stmt, NULL_TREE);
5269 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5275 /* Rewrites USE (definition of iv used in a nonlinear expression)
5276 using candidate CAND. */
5279 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5280 struct iv_use *use, struct iv_cand *cand)
5283 tree op, stmts, tgt, ass;
5284 block_stmt_iterator bsi, pbsi;
5286 /* An important special case -- if we are asked to express value of
5287 the original iv by itself, just exit; there is no need to
5288 introduce a new computation (that might also need casting the
5289 variable to unsigned and back). */
5290 if (cand->pos == IP_ORIGINAL
5291 && TREE_CODE (use->stmt) == MODIFY_EXPR
5292 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5294 op = TREE_OPERAND (use->stmt, 1);
5296 /* Be a bit careful. In case variable is expressed in some
5297 complicated way, rewrite it so that we may get rid of this
5298 complicated expression. */
5299 if ((TREE_CODE (op) == PLUS_EXPR
5300 || TREE_CODE (op) == MINUS_EXPR)
5301 && TREE_OPERAND (op, 0) == cand->var_before
5302 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5306 comp = unshare_expr (get_computation (data->current_loop,
5308 switch (TREE_CODE (use->stmt))
5311 tgt = PHI_RESULT (use->stmt);
5313 /* If we should keep the biv, do not replace it. */
5314 if (name_info (data, tgt)->preserve_biv)
5317 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5318 while (!bsi_end_p (pbsi)
5319 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5327 tgt = TREE_OPERAND (use->stmt, 0);
5328 bsi = bsi_for_stmt (use->stmt);
5335 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5337 if (TREE_CODE (use->stmt) == PHI_NODE)
5340 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5341 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5342 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5343 remove_statement (use->stmt, false);
5344 SSA_NAME_DEF_STMT (tgt) = ass;
5349 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5350 TREE_OPERAND (use->stmt, 1) = op;
5354 /* Replaces ssa name in index IDX by its basic variable. Callback for
5358 idx_remove_ssa_names (tree base, tree *idx,
5359 void *data ATTRIBUTE_UNUSED)
5363 if (TREE_CODE (*idx) == SSA_NAME)
5364 *idx = SSA_NAME_VAR (*idx);
5366 if (TREE_CODE (base) == ARRAY_REF)
5368 op = &TREE_OPERAND (base, 2);
5370 && TREE_CODE (*op) == SSA_NAME)
5371 *op = SSA_NAME_VAR (*op);
5372 op = &TREE_OPERAND (base, 3);
5374 && TREE_CODE (*op) == SSA_NAME)
5375 *op = SSA_NAME_VAR (*op);
5381 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5384 unshare_and_remove_ssa_names (tree ref)
5386 ref = unshare_expr (ref);
5387 for_each_index (&ref, idx_remove_ssa_names, NULL);
5392 /* Rewrites base of memory access OP with expression WITH in statement
5393 pointed to by BSI. */
5396 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
5398 tree bvar, var, new_name, copy, name;
5401 var = bvar = get_base_address (*op);
5403 if (!var || TREE_CODE (with) != SSA_NAME)
5406 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
5407 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
5408 if (TREE_CODE (var) == INDIRECT_REF)
5409 var = TREE_OPERAND (var, 0);
5410 if (TREE_CODE (var) == SSA_NAME)
5413 var = SSA_NAME_VAR (var);
5415 else if (DECL_P (var))
5420 /* We need to add a memory tag for the variable. But we do not want
5421 to add it to the temporary used for the computations, since this leads
5422 to problems in redundancy elimination when there are common parts
5423 in two computations referring to the different arrays. So we copy
5424 the variable to a new temporary. */
5425 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
5428 new_name = duplicate_ssa_name (name, copy);
5431 tree tag = var_ann (var)->type_mem_tag;
5432 tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
5433 add_referenced_tmp_var (new_ptr);
5435 var_ann (new_ptr)->type_mem_tag = tag;
5437 add_type_alias (new_ptr, var);
5438 new_name = make_ssa_name (new_ptr, copy);
5441 TREE_OPERAND (copy, 0) = new_name;
5443 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
5449 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
5450 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
5452 if (TREE_CODE (*op) == INDIRECT_REF)
5453 orig = REF_ORIGINAL (*op);
5455 orig = unshare_and_remove_ssa_names (*op);
5457 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
5459 /* Record the original reference, for purposes of alias analysis. */
5460 REF_ORIGINAL (*op) = orig;
5462 /* Virtual operands in the original statement may have to be renamed
5463 because of the replacement. */
5464 mark_new_vars_to_rename (bsi_stmt (*bsi));
5467 /* Rewrites USE (address that is an iv) using candidate CAND. */
5470 rewrite_use_address (struct ivopts_data *data,
5471 struct iv_use *use, struct iv_cand *cand)
5473 tree comp = unshare_expr (get_computation (data->current_loop,
5475 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5477 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
5480 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5482 rewrite_address_base (&bsi, use->op_p, op);
5485 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5489 rewrite_use_compare (struct ivopts_data *data,
5490 struct iv_use *use, struct iv_cand *cand)
5493 tree *op_p, cond, op, stmts, bound;
5494 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5495 enum tree_code compare;
5496 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5501 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5502 tree var_type = TREE_TYPE (var);
5504 compare = iv_elimination_compare (data, use);
5505 bound = fold_convert (var_type, bound);
5506 op = force_gimple_operand (unshare_expr (bound), &stmts,
5510 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5512 *use->op_p = build2 (compare, boolean_type_node, var, op);
5513 update_stmt (use->stmt);
5517 /* The induction variable elimination failed; just express the original
5519 comp = unshare_expr (get_computation (data->current_loop, use, cand));
5522 op_p = &TREE_OPERAND (cond, 0);
5523 if (TREE_CODE (*op_p) != SSA_NAME
5524 || zero_p (get_iv (data, *op_p)->step))
5525 op_p = &TREE_OPERAND (cond, 1);
5527 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5529 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5534 /* Ensure that operand *OP_P may be used at the end of EXIT without
5535 violating loop closed ssa form. */
5538 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5541 struct loop *def_loop;
5544 use = USE_FROM_PTR (op_p);
5545 if (TREE_CODE (use) != SSA_NAME)
5548 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5552 def_loop = def_bb->loop_father;
5553 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5556 /* Try finding a phi node that copies the value out of the loop. */
5557 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5558 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5563 /* Create such a phi node. */
5564 tree new_name = duplicate_ssa_name (use, NULL);
5566 phi = create_phi_node (new_name, exit->dest);
5567 SSA_NAME_DEF_STMT (new_name) = phi;
5568 add_phi_arg (phi, use, exit);
5571 SET_USE (op_p, PHI_RESULT (phi));
5574 /* Ensure that operands of STMT may be used at the end of EXIT without
5575 violating loop closed ssa form. */
5578 protect_loop_closed_ssa_form (edge exit, tree stmt)
5582 v_may_def_optype v_may_defs;
5585 uses = STMT_USE_OPS (stmt);
5586 for (i = 0; i < NUM_USES (uses); i++)
5587 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
5589 vuses = STMT_VUSE_OPS (stmt);
5590 for (i = 0; i < NUM_VUSES (vuses); i++)
5591 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5593 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5594 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5595 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5598 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5599 so that they are emitted on the correct place, and so that the loop closed
5600 ssa form is preserved. */
5603 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5605 tree_stmt_iterator tsi;
5606 block_stmt_iterator bsi;
5607 tree phi, stmt, def, next;
5609 if (!single_pred_p (exit->dest))
5610 split_loop_exit_edge (exit);
5612 /* Ensure there is label in exit->dest, so that we can
5614 tree_block_label (exit->dest);
5615 bsi = bsi_after_labels (exit->dest);
5617 if (TREE_CODE (stmts) == STATEMENT_LIST)
5619 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5621 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5622 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5627 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5628 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5634 for (phi = phi_nodes (exit->dest); phi; phi = next)
5636 next = PHI_CHAIN (phi);
5638 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5640 def = PHI_RESULT (phi);
5641 remove_statement (phi, false);
5642 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5644 SSA_NAME_DEF_STMT (def) = stmt;
5645 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5650 /* Rewrites the final value of USE (that is only needed outside of the loop)
5651 using candidate CAND. */
5654 rewrite_use_outer (struct ivopts_data *data,
5655 struct iv_use *use, struct iv_cand *cand)
5658 tree value, op, stmts, tgt;
5661 switch (TREE_CODE (use->stmt))
5664 tgt = PHI_RESULT (use->stmt);
5667 tgt = TREE_OPERAND (use->stmt, 0);
5673 exit = single_dom_exit (data->current_loop);
5679 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5683 value = get_computation_at (data->current_loop,
5684 use, cand, last_stmt (exit->src));
5686 value = unshare_expr (value);
5687 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5689 /* If we will preserve the iv anyway and we would need to perform
5690 some computation to replace the final value, do nothing. */
5691 if (stmts && name_info (data, tgt)->preserve_biv)
5694 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5696 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5698 if (USE_FROM_PTR (use_p) == tgt)
5699 SET_USE (use_p, op);
5703 compute_phi_arg_on_exit (exit, stmts, op);
5705 /* Enable removal of the statement. We cannot remove it directly,
5706 since we may still need the aliasing information attached to the
5707 ssa name defined by it. */
5708 name_info (data, tgt)->iv->have_use_for = false;
5712 /* If the variable is going to be preserved anyway, there is nothing to
5714 if (name_info (data, tgt)->preserve_biv)
5717 /* Otherwise we just need to compute the iv. */
5718 rewrite_use_nonlinear_expr (data, use, cand);
5721 /* Rewrites USE using candidate CAND. */
5724 rewrite_use (struct ivopts_data *data,
5725 struct iv_use *use, struct iv_cand *cand)
5729 case USE_NONLINEAR_EXPR:
5730 rewrite_use_nonlinear_expr (data, use, cand);
5734 rewrite_use_outer (data, use, cand);
5738 rewrite_use_address (data, use, cand);
5742 rewrite_use_compare (data, use, cand);
5748 update_stmt (use->stmt);
5751 /* Rewrite the uses using the selected induction variables. */
5754 rewrite_uses (struct ivopts_data *data)
5757 struct iv_cand *cand;
5760 for (i = 0; i < n_iv_uses (data); i++)
5762 use = iv_use (data, i);
5763 cand = use->selected;
5766 rewrite_use (data, use, cand);
5770 /* Removes the ivs that are not used after rewriting. */
5773 remove_unused_ivs (struct ivopts_data *data)
5778 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5780 struct version_info *info;
5782 info = ver_info (data, j);
5784 && !zero_p (info->iv->step)
5786 && !info->iv->have_use_for
5787 && !info->preserve_biv)
5788 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5792 /* Frees data allocated by the optimization of a single loop. */
5795 free_loop_data (struct ivopts_data *data)
5801 htab_empty (data->niters);
5803 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5805 struct version_info *info;
5807 info = ver_info (data, i);
5811 info->has_nonlin_use = false;
5812 info->preserve_biv = false;
5815 bitmap_clear (data->relevant);
5816 bitmap_clear (data->important_candidates);
5818 for (i = 0; i < n_iv_uses (data); i++)
5820 struct iv_use *use = iv_use (data, i);
5823 BITMAP_FREE (use->related_cands);
5824 for (j = 0; j < use->n_map_members; j++)
5825 if (use->cost_map[j].depends_on)
5826 BITMAP_FREE (use->cost_map[j].depends_on);
5827 free (use->cost_map);
5830 VEC_truncate (iv_use_p, data->iv_uses, 0);
5832 for (i = 0; i < n_iv_cands (data); i++)
5834 struct iv_cand *cand = iv_cand (data, i);
5838 if (cand->depends_on)
5839 BITMAP_FREE (cand->depends_on);
5842 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5844 if (data->version_info_size < num_ssa_names)
5846 data->version_info_size = 2 * num_ssa_names;
5847 free (data->version_info);
5848 data->version_info = xcalloc (data->version_info_size,
5849 sizeof (struct version_info));
5852 data->max_inv_id = 0;
5854 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5855 SET_DECL_RTL (obj, NULL_RTX);
5857 VEC_truncate (tree, decl_rtl_to_reset, 0);
5860 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5864 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5868 for (i = 1; i < loops->num; i++)
5869 if (loops->parray[i])
5871 free (loops->parray[i]->aux);
5872 loops->parray[i]->aux = NULL;
5875 free_loop_data (data);
5876 free (data->version_info);
5877 BITMAP_FREE (data->relevant);
5878 BITMAP_FREE (data->important_candidates);
5879 htab_delete (data->niters);
5881 VEC_free (tree, heap, decl_rtl_to_reset);
5882 VEC_free (iv_use_p, heap, data->iv_uses);
5883 VEC_free (iv_cand_p, heap, data->iv_candidates);
5886 /* Optimizes the LOOP. Returns true if anything changed. */
5889 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5891 bool changed = false;
5892 struct iv_ca *iv_ca;
5895 data->current_loop = loop;
5897 if (dump_file && (dump_flags & TDF_DETAILS))
5899 fprintf (dump_file, "Processing loop %d\n", loop->num);
5901 exit = single_dom_exit (loop);
5904 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5905 exit->src->index, exit->dest->index);
5906 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5907 fprintf (dump_file, "\n");
5910 fprintf (dump_file, "\n");
5913 /* For each ssa name determines whether it behaves as an induction variable
5915 if (!find_induction_variables (data))
5918 /* Finds interesting uses (item 1). */
5919 find_interesting_uses (data);
5920 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5923 /* Finds candidates for the induction variables (item 2). */
5924 find_iv_candidates (data);
5926 /* Calculates the costs (item 3, part 1). */
5927 determine_use_iv_costs (data);
5928 determine_iv_costs (data);
5929 determine_set_costs (data);
5931 /* Find the optimal set of induction variables (item 3, part 2). */
5932 iv_ca = find_optimal_iv_set (data);
5937 /* Create the new induction variables (item 4, part 1). */
5938 create_new_ivs (data, iv_ca);
5939 iv_ca_free (&iv_ca);
5941 /* Rewrite the uses (item 4, part 2). */
5942 rewrite_uses (data);
5944 /* Remove the ivs that are unused after rewriting. */
5945 remove_unused_ivs (data);
5947 /* We have changed the structure of induction variables; it might happen
5948 that definitions in the scev database refer to some of them that were
5953 free_loop_data (data);
5958 /* Main entry point. Optimizes induction variables in LOOPS. */
5961 tree_ssa_iv_optimize (struct loops *loops)
5964 struct ivopts_data data;
5966 tree_ssa_iv_optimize_init (loops, &data);
5968 /* Optimize the loops starting with the innermost ones. */
5969 loop = loops->tree_root;
5973 /* Scan the loops, inner ones first. */
5974 while (loop != loops->tree_root)
5976 if (dump_file && (dump_flags & TDF_DETAILS))
5977 flow_loop_dump (loop, dump_file, NULL, 1);
5979 tree_ssa_iv_optimize_loop (&data, loop);
5991 /* FIXME. IV opts introduces new aliases and call-clobbered
5992 variables, which need to be renamed. However, when we call the
5993 renamer, not all statements will be scanned for operands. In
5994 particular, the newly introduced aliases may appear in statements
5995 that are considered "unmodified", so the renamer will not get a
5996 chance to rename those operands.
5998 Work around this problem by forcing an operand re-scan on every
5999 statement. This will not be necessary once the new operand
6000 scanner is implemented. */
6001 if (need_ssa_update_p ())
6004 block_stmt_iterator si;
6006 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
6007 update_stmt (bsi_stmt (si));
6010 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
6011 tree_ssa_iv_optimize_finalize (loops, &data);