1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
127 unsigned regs_used; /* Number of registers used. */
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use *iv_use_p;
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
210 /* The currently optimized loop. */
211 struct loop *current_loop;
213 /* Numbers of iterations for all exits of the current loop. */
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
222 /* The bitmap of indices in version_info whose value was changed. */
225 /* The maximum invariant id. */
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
237 /* Whether to consider just related and important candidates when replacing a
239 bool consider_all_candidates;
242 /* An assignment of iv candidates to uses. */
246 /* The number of uses covered by the assignment. */
249 /* Number of uses that cannot be expressed by the candidates in the set. */
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
258 /* The candidates used. */
261 /* The number of candidates in the set. */
264 /* Total number of registers needed. */
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
270 /* Total cost of candidates. */
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
276 /* Total cost of the assignment. */
280 /* Difference of two iv candidate assignments. */
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
290 /* A new assignment. */
291 struct cost_pair *new_cp;
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
317 static VEC(tree,heap) *decl_rtl_to_reset;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
324 return VEC_length (iv_use_p, data->iv_uses);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
332 return VEC_index (iv_use_p, data->iv_uses, i);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
340 return VEC_length (iv_cand_p, data->iv_candidates);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
351 /* The data for LOOP. */
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
362 single_dom_exit (struct loop *loop)
364 edge exit = loop->single_exit;
369 if (!just_once_each_iteration_p (loop, exit->src))
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv *);
379 dump_iv (FILE *file, struct iv *iv)
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
417 fprintf (file, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use *);
424 dump_use (FILE *file, struct iv_use *use)
426 fprintf (file, "use %d\n", use->id);
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
435 fprintf (file, " outside\n");
439 fprintf (file, " address\n");
443 fprintf (file, " compare\n");
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
472 dump_uses (FILE *file, struct ivopts_data *data)
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
505 fprintf (file, " final value replacement\n");
512 fprintf (file, " incremented before exit test\n");
516 fprintf (file, " incremented at end\n");
520 fprintf (file, " original biv\n");
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
568 /* If b is still even, a is odd and there is no such x. */
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
576 for (i = 0; i < bits - 1; i++)
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
582 val = (a * inv) & mask;
584 gcc_assert (((val * b) & mask) == a);
586 if ((val >> (bits - 1)) & 1)
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
604 if (sbb == loop->latch)
610 return stmt == last_stmt (bb);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
626 if (stmt_bb != cand_bb)
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
633 if (bsi_stmt (bsi) == cand->incremented_at)
635 if (bsi_stmt (bsi) == stmt)
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
652 return stmt_after_ip_normal_pos (loop, stmt);
655 return stmt_after_ip_original_pos (cand, stmt);
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
667 /* The edge for that the number of iterations is cached. */
670 /* True if the # of iterations was successfully determined. */
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
677 /* Hash function for nfe_cache_elt E. */
680 nfe_hash (const void *e)
682 const struct nfe_cache_elt *elt = e;
684 return htab_hash_pointer (elt->exit);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
690 nfe_eq (const void *e1, const void *e2)
692 const struct nfe_cache_elt *elt1 = e1;
694 return elt1->exit == e2;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
703 struct nfe_cache_elt *nfe_desc;
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter,
722 if (!nfe_desc->valid_p)
725 return &nfe_desc->niter;
728 /* Returns structure describing number of iterations determined from
729 single dominating exit of DATA->current_loop, or NULL if something
732 static struct tree_niter_desc *
733 niter_for_single_dom_exit (struct ivopts_data *data)
735 edge exit = single_dom_exit (data->current_loop);
740 return niter_for_exit (data, exit);
743 /* Initializes data structures used by the iv optimization pass, stored
744 in DATA. LOOPS is the loop tree. */
747 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
751 data->version_info_size = 2 * num_ssa_names;
752 data->version_info = xcalloc (data->version_info_size,
753 sizeof (struct version_info));
754 data->relevant = BITMAP_ALLOC (NULL);
755 data->important_candidates = BITMAP_ALLOC (NULL);
756 data->max_inv_id = 0;
757 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
759 for (i = 1; i < loops->num; i++)
760 if (loops->parray[i])
761 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
763 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
764 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
765 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
768 /* Returns a memory object to that EXPR points. In case we are able to
769 determine that it does not point to any such object, NULL is returned. */
772 determine_base_object (tree expr)
774 enum tree_code code = TREE_CODE (expr);
775 tree base, obj, op0, op1;
777 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
786 obj = TREE_OPERAND (expr, 0);
787 base = get_base_address (obj);
792 if (TREE_CODE (base) == INDIRECT_REF)
793 return determine_base_object (TREE_OPERAND (base, 0));
795 return fold_convert (ptr_type_node,
796 build_fold_addr_expr (base));
800 op0 = determine_base_object (TREE_OPERAND (expr, 0));
801 op1 = determine_base_object (TREE_OPERAND (expr, 1));
807 return (code == PLUS_EXPR
809 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
811 return fold_build2 (code, ptr_type_node, op0, op1);
815 return determine_base_object (TREE_OPERAND (expr, 0));
818 return fold_convert (ptr_type_node, expr);
822 /* Allocates an induction variable with given initial value BASE and step STEP
826 alloc_iv (tree base, tree step)
828 struct iv *iv = xcalloc (1, sizeof (struct iv));
830 if (step && integer_zerop (step))
834 iv->base_object = determine_base_object (base);
837 iv->have_use_for = false;
839 iv->ssa_name = NULL_TREE;
844 /* Sets STEP and BASE for induction variable IV. */
847 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
849 struct version_info *info = name_info (data, iv);
851 gcc_assert (!info->iv);
853 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
854 info->iv = alloc_iv (base, step);
855 info->iv->ssa_name = iv;
858 /* Finds induction variable declaration for VAR. */
861 get_iv (struct ivopts_data *data, tree var)
865 if (!name_info (data, var)->iv)
867 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
870 || !flow_bb_inside_loop_p (data->current_loop, bb))
871 set_iv (data, var, var, NULL_TREE);
874 return name_info (data, var)->iv;
877 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
878 not define a simple affine biv with nonzero step. */
881 determine_biv_step (tree phi)
883 struct loop *loop = bb_for_stmt (phi)->loop_father;
884 tree name = PHI_RESULT (phi), base, step;
886 if (!is_gimple_reg (name))
889 if (!simple_iv (loop, phi, name, &base, &step, true))
898 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
901 abnormal_ssa_name_p (tree exp)
906 if (TREE_CODE (exp) != SSA_NAME)
909 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
912 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
913 abnormal phi node. Callback for for_each_index. */
916 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
917 void *data ATTRIBUTE_UNUSED)
919 if (TREE_CODE (base) == ARRAY_REF)
921 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
923 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
927 return !abnormal_ssa_name_p (*index);
930 /* Returns true if EXPR contains a ssa name that occurs in an
931 abnormal phi node. */
934 contains_abnormal_ssa_name_p (tree expr)
937 enum tree_code_class class;
942 code = TREE_CODE (expr);
943 class = TREE_CODE_CLASS (code);
945 if (code == SSA_NAME)
946 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
948 if (code == INTEGER_CST
949 || is_gimple_min_invariant (expr))
952 if (code == ADDR_EXPR)
953 return !for_each_index (&TREE_OPERAND (expr, 0),
954 idx_contains_abnormal_ssa_name_p,
961 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
966 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
978 /* Finds basic ivs. */
981 find_bivs (struct ivopts_data *data)
983 tree phi, step, type, base;
985 struct loop *loop = data->current_loop;
987 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
989 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
992 step = determine_biv_step (phi);
996 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
997 base = expand_simple_operations (base);
998 if (contains_abnormal_ssa_name_p (base)
999 || contains_abnormal_ssa_name_p (step))
1002 type = TREE_TYPE (PHI_RESULT (phi));
1003 base = fold_convert (type, base);
1005 step = fold_convert (type, step);
1007 set_iv (data, PHI_RESULT (phi), base, step);
1014 /* Marks basic ivs. */
1017 mark_bivs (struct ivopts_data *data)
1020 struct iv *iv, *incr_iv;
1021 struct loop *loop = data->current_loop;
1022 basic_block incr_bb;
1024 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1026 iv = get_iv (data, PHI_RESULT (phi));
1030 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1031 incr_iv = get_iv (data, var);
1035 /* If the increment is in the subloop, ignore it. */
1036 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1037 if (incr_bb->loop_father != data->current_loop
1038 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1042 incr_iv->biv_p = true;
1046 /* Checks whether STMT defines a linear induction variable and stores its
1047 parameters to BASE and STEP. */
1050 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1051 tree *base, tree *step)
1054 struct loop *loop = data->current_loop;
1059 if (TREE_CODE (stmt) != MODIFY_EXPR)
1062 lhs = TREE_OPERAND (stmt, 0);
1063 if (TREE_CODE (lhs) != SSA_NAME)
1066 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1068 *base = expand_simple_operations (*base);
1070 if (contains_abnormal_ssa_name_p (*base)
1071 || contains_abnormal_ssa_name_p (*step))
1077 /* Finds general ivs in statement STMT. */
1080 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1084 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1087 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1090 /* Finds general ivs in basic block BB. */
1093 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1095 block_stmt_iterator bsi;
1097 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1098 find_givs_in_stmt (data, bsi_stmt (bsi));
1101 /* Finds general ivs. */
1104 find_givs (struct ivopts_data *data)
1106 struct loop *loop = data->current_loop;
1107 basic_block *body = get_loop_body_in_dom_order (loop);
1110 for (i = 0; i < loop->num_nodes; i++)
1111 find_givs_in_bb (data, body[i]);
1115 /* For each ssa name defined in LOOP determines whether it is an induction
1116 variable and if so, its initial value and step. */
1119 find_induction_variables (struct ivopts_data *data)
1124 if (!find_bivs (data))
1130 if (dump_file && (dump_flags & TDF_DETAILS))
1132 struct tree_niter_desc *niter;
1134 niter = niter_for_single_dom_exit (data);
1138 fprintf (dump_file, " number of iterations ");
1139 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1140 fprintf (dump_file, "\n");
1142 fprintf (dump_file, " may be zero if ");
1143 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1144 fprintf (dump_file, "\n");
1145 fprintf (dump_file, "\n");
1148 fprintf (dump_file, "Induction variables:\n\n");
1150 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1152 if (ver_info (data, i)->iv)
1153 dump_iv (dump_file, ver_info (data, i)->iv);
1160 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1162 static struct iv_use *
1163 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1164 tree stmt, enum use_type use_type)
1166 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1168 use->id = n_iv_uses (data);
1169 use->type = use_type;
1173 use->related_cands = BITMAP_ALLOC (NULL);
1175 /* To avoid showing ssa name in the dumps, if it was not reset by the
1177 iv->ssa_name = NULL_TREE;
1179 if (dump_file && (dump_flags & TDF_DETAILS))
1180 dump_use (dump_file, use);
1182 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1187 /* Checks whether OP is a loop-level invariant and if so, records it.
1188 NONLINEAR_USE is true if the invariant is used in a way we do not
1189 handle specially. */
1192 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1195 struct version_info *info;
1197 if (TREE_CODE (op) != SSA_NAME
1198 || !is_gimple_reg (op))
1201 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1203 && flow_bb_inside_loop_p (data->current_loop, bb))
1206 info = name_info (data, op);
1208 info->has_nonlin_use |= nonlinear_use;
1210 info->inv_id = ++data->max_inv_id;
1211 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1214 /* Checks whether the use OP is interesting and if so, records it
1217 static struct iv_use *
1218 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1226 if (TREE_CODE (op) != SSA_NAME)
1229 iv = get_iv (data, op);
1233 if (iv->have_use_for)
1235 use = iv_use (data, iv->use_id);
1237 gcc_assert (use->type == USE_NONLINEAR_EXPR
1238 || use->type == USE_OUTER);
1240 if (type == USE_NONLINEAR_EXPR)
1241 use->type = USE_NONLINEAR_EXPR;
1245 if (zero_p (iv->step))
1247 record_invariant (data, op, true);
1250 iv->have_use_for = true;
1252 civ = xmalloc (sizeof (struct iv));
1255 stmt = SSA_NAME_DEF_STMT (op);
1256 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1257 || TREE_CODE (stmt) == MODIFY_EXPR);
1259 use = record_use (data, NULL, civ, stmt, type);
1260 iv->use_id = use->id;
1265 /* Checks whether the use OP is interesting and if so, records it. */
1267 static struct iv_use *
1268 find_interesting_uses_op (struct ivopts_data *data, tree op)
1270 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1273 /* Records a definition of induction variable OP that is used outside of the
1276 static struct iv_use *
1277 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1279 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1282 /* Checks whether the condition *COND_P in STMT is interesting
1283 and if so, records it. */
1286 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1290 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1292 tree zero = integer_zero_node;
1294 const_iv.step = NULL_TREE;
1296 if (TREE_CODE (*cond_p) != SSA_NAME
1297 && !COMPARISON_CLASS_P (*cond_p))
1300 if (TREE_CODE (*cond_p) == SSA_NAME)
1307 op0_p = &TREE_OPERAND (*cond_p, 0);
1308 op1_p = &TREE_OPERAND (*cond_p, 1);
1311 if (TREE_CODE (*op0_p) == SSA_NAME)
1312 iv0 = get_iv (data, *op0_p);
1316 if (TREE_CODE (*op1_p) == SSA_NAME)
1317 iv1 = get_iv (data, *op1_p);
1321 if (/* When comparing with non-invariant value, we may not do any senseful
1322 induction variable elimination. */
1324 /* Eliminating condition based on two ivs would be nontrivial.
1325 ??? TODO -- it is not really important to handle this case. */
1326 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1328 find_interesting_uses_op (data, *op0_p);
1329 find_interesting_uses_op (data, *op1_p);
1333 if (zero_p (iv0->step) && zero_p (iv1->step))
1335 /* If both are invariants, this is a work for unswitching. */
1339 civ = xmalloc (sizeof (struct iv));
1340 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1341 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1344 /* Returns true if expression EXPR is obviously invariant in LOOP,
1345 i.e. if all its operands are defined outside of the LOOP. */
1348 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1353 if (is_gimple_min_invariant (expr))
1356 if (TREE_CODE (expr) == SSA_NAME)
1358 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1360 && flow_bb_inside_loop_p (loop, def_bb))
1369 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1370 for (i = 0; i < len; i++)
1371 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1377 /* Cumulates the steps of indices into DATA and replaces their values with the
1378 initial ones. Returns false when the value of the index cannot be determined.
1379 Callback for for_each_index. */
1381 struct ifs_ivopts_data
1383 struct ivopts_data *ivopts_data;
1389 idx_find_step (tree base, tree *idx, void *data)
1391 struct ifs_ivopts_data *dta = data;
1393 tree step, iv_step, lbound, off;
1394 struct loop *loop = dta->ivopts_data->current_loop;
1396 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1397 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1400 /* If base is a component ref, require that the offset of the reference
1402 if (TREE_CODE (base) == COMPONENT_REF)
1404 off = component_ref_field_offset (base);
1405 return expr_invariant_in_loop_p (loop, off);
1408 /* If base is array, first check whether we will be able to move the
1409 reference out of the loop (in order to take its address in strength
1410 reduction). In order for this to work we need both lower bound
1411 and step to be loop invariants. */
1412 if (TREE_CODE (base) == ARRAY_REF)
1414 step = array_ref_element_size (base);
1415 lbound = array_ref_low_bound (base);
1417 if (!expr_invariant_in_loop_p (loop, step)
1418 || !expr_invariant_in_loop_p (loop, lbound))
1422 if (TREE_CODE (*idx) != SSA_NAME)
1425 iv = get_iv (dta->ivopts_data, *idx);
1434 if (TREE_CODE (base) == ARRAY_REF)
1436 step = array_ref_element_size (base);
1438 /* We only handle addresses whose step is an integer constant. */
1439 if (TREE_CODE (step) != INTEGER_CST)
1443 /* The step for pointer arithmetics already is 1 byte. */
1444 step = build_int_cst (sizetype, 1);
1446 iv_step = convert_step (dta->ivopts_data->current_loop,
1447 sizetype, iv->base, iv->step, dta->stmt);
1451 /* The index might wrap. */
1455 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1458 *dta->step_p = step;
1460 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1465 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1466 object is passed to it in DATA. */
1469 idx_record_use (tree base, tree *idx,
1472 find_interesting_uses_op (data, *idx);
1473 if (TREE_CODE (base) == ARRAY_REF)
1475 find_interesting_uses_op (data, array_ref_element_size (base));
1476 find_interesting_uses_op (data, array_ref_low_bound (base));
1481 /* Returns true if memory reference REF may be unaligned. */
1484 may_be_unaligned_p (tree ref)
1488 HOST_WIDE_INT bitsize;
1489 HOST_WIDE_INT bitpos;
1491 enum machine_mode mode;
1492 int unsignedp, volatilep;
1493 unsigned base_align;
1495 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1496 thus they are not misaligned. */
1497 if (TREE_CODE (ref) == TARGET_MEM_REF)
1500 /* The test below is basically copy of what expr.c:normal_inner_ref
1501 does to check whether the object must be loaded by parts when
1502 STRICT_ALIGNMENT is true. */
1503 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1504 &unsignedp, &volatilep, true);
1505 base_type = TREE_TYPE (base);
1506 base_align = TYPE_ALIGN (base_type);
1509 && (base_align < GET_MODE_ALIGNMENT (mode)
1510 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1511 || bitpos % BITS_PER_UNIT != 0))
1517 /* Finds addresses in *OP_P inside STMT. */
1520 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1522 tree base = *op_p, step = NULL;
1524 struct ifs_ivopts_data ifs_ivopts_data;
1526 /* Do not play with volatile memory references. A bit too conservative,
1527 perhaps, but safe. */
1528 if (stmt_ann (stmt)->has_volatile_ops)
1531 /* Ignore bitfields for now. Not really something terribly complicated
1533 if (TREE_CODE (base) == COMPONENT_REF
1534 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1537 if (STRICT_ALIGNMENT
1538 && may_be_unaligned_p (base))
1541 base = unshare_expr (base);
1543 if (TREE_CODE (base) == TARGET_MEM_REF)
1545 tree type = build_pointer_type (TREE_TYPE (base));
1549 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1551 civ = get_iv (data, TMR_BASE (base));
1555 TMR_BASE (base) = civ->base;
1558 if (TMR_INDEX (base)
1559 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1561 civ = get_iv (data, TMR_INDEX (base));
1565 TMR_INDEX (base) = civ->base;
1570 if (TMR_STEP (base))
1571 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1574 step = fold_build2 (PLUS_EXPR, type, step, astep);
1582 base = tree_mem_ref_addr (type, base);
1586 ifs_ivopts_data.ivopts_data = data;
1587 ifs_ivopts_data.stmt = stmt;
1588 ifs_ivopts_data.step_p = &step;
1589 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1593 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1594 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1596 base = build_fold_addr_expr (base);
1599 civ = alloc_iv (base, step);
1600 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1604 for_each_index (op_p, idx_record_use, data);
1607 /* Finds and records invariants used in STMT. */
1610 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1613 use_operand_p use_p;
1616 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1618 op = USE_FROM_PTR (use_p);
1619 record_invariant (data, op, false);
1623 /* Finds interesting uses of induction variables in the statement STMT. */
1626 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1631 use_operand_p use_p;
1633 find_invariants_stmt (data, stmt);
1635 if (TREE_CODE (stmt) == COND_EXPR)
1637 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1641 if (TREE_CODE (stmt) == MODIFY_EXPR)
1643 lhs = TREE_OPERAND (stmt, 0);
1644 rhs = TREE_OPERAND (stmt, 1);
1646 if (TREE_CODE (lhs) == SSA_NAME)
1648 /* If the statement defines an induction variable, the uses are not
1649 interesting by themselves. */
1651 iv = get_iv (data, lhs);
1653 if (iv && !zero_p (iv->step))
1657 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1659 case tcc_comparison:
1660 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1664 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1665 if (REFERENCE_CLASS_P (lhs))
1666 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1672 if (REFERENCE_CLASS_P (lhs)
1673 && is_gimple_val (rhs))
1675 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1676 find_interesting_uses_op (data, rhs);
1680 /* TODO -- we should also handle address uses of type
1682 memory = call (whatever);
1689 if (TREE_CODE (stmt) == PHI_NODE
1690 && bb_for_stmt (stmt) == data->current_loop->header)
1692 lhs = PHI_RESULT (stmt);
1693 iv = get_iv (data, lhs);
1695 if (iv && !zero_p (iv->step))
1699 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1701 op = USE_FROM_PTR (use_p);
1703 if (TREE_CODE (op) != SSA_NAME)
1706 iv = get_iv (data, op);
1710 find_interesting_uses_op (data, op);
1714 /* Finds interesting uses of induction variables outside of loops
1715 on loop exit edge EXIT. */
1718 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1722 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1724 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1725 find_interesting_uses_outer (data, def);
1729 /* Finds uses of the induction variables that are interesting. */
1732 find_interesting_uses (struct ivopts_data *data)
1735 block_stmt_iterator bsi;
1737 basic_block *body = get_loop_body (data->current_loop);
1739 struct version_info *info;
1742 if (dump_file && (dump_flags & TDF_DETAILS))
1743 fprintf (dump_file, "Uses:\n\n");
1745 for (i = 0; i < data->current_loop->num_nodes; i++)
1750 FOR_EACH_EDGE (e, ei, bb->succs)
1751 if (e->dest != EXIT_BLOCK_PTR
1752 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1753 find_interesting_uses_outside (data, e);
1755 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1756 find_interesting_uses_stmt (data, phi);
1757 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1758 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1761 if (dump_file && (dump_flags & TDF_DETAILS))
1765 fprintf (dump_file, "\n");
1767 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1769 info = ver_info (data, i);
1772 fprintf (dump_file, " ");
1773 print_generic_expr (dump_file, info->name, TDF_SLIM);
1774 fprintf (dump_file, " is invariant (%d)%s\n",
1775 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1779 fprintf (dump_file, "\n");
1785 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1786 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1787 we are at the top-level of the processed address. */
1790 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1791 unsigned HOST_WIDE_INT *offset)
1793 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1794 enum tree_code code;
1795 tree type, orig_type = TREE_TYPE (expr);
1796 unsigned HOST_WIDE_INT off0, off1, st;
1797 tree orig_expr = expr;
1801 type = TREE_TYPE (expr);
1802 code = TREE_CODE (expr);
1808 if (!cst_and_fits_in_hwi (expr)
1812 *offset = int_cst_value (expr);
1813 return build_int_cst_type (orig_type, 0);
1817 op0 = TREE_OPERAND (expr, 0);
1818 op1 = TREE_OPERAND (expr, 1);
1820 op0 = strip_offset_1 (op0, false, false, &off0);
1821 op1 = strip_offset_1 (op1, false, false, &off1);
1823 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1824 if (op0 == TREE_OPERAND (expr, 0)
1825 && op1 == TREE_OPERAND (expr, 1))
1830 else if (zero_p (op0))
1832 if (code == PLUS_EXPR)
1835 expr = fold_build1 (NEGATE_EXPR, type, op1);
1838 expr = fold_build2 (code, type, op0, op1);
1840 return fold_convert (orig_type, expr);
1846 step = array_ref_element_size (expr);
1847 if (!cst_and_fits_in_hwi (step))
1850 st = int_cst_value (step);
1851 op1 = TREE_OPERAND (expr, 1);
1852 op1 = strip_offset_1 (op1, false, false, &off1);
1853 *offset = off1 * st;
1858 /* Strip the component reference completely. */
1859 op0 = TREE_OPERAND (expr, 0);
1860 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1870 tmp = component_ref_field_offset (expr);
1872 && cst_and_fits_in_hwi (tmp))
1874 /* Strip the component reference completely. */
1875 op0 = TREE_OPERAND (expr, 0);
1876 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1877 *offset = off0 + int_cst_value (tmp);
1883 op0 = TREE_OPERAND (expr, 0);
1884 op0 = strip_offset_1 (op0, true, true, &off0);
1887 if (op0 == TREE_OPERAND (expr, 0))
1890 expr = build_fold_addr_expr (op0);
1891 return fold_convert (orig_type, expr);
1894 inside_addr = false;
1901 /* Default handling of expressions for that we want to recurse into
1902 the first operand. */
1903 op0 = TREE_OPERAND (expr, 0);
1904 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1907 if (op0 == TREE_OPERAND (expr, 0)
1908 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1911 expr = copy_node (expr);
1912 TREE_OPERAND (expr, 0) = op0;
1914 TREE_OPERAND (expr, 1) = op1;
1916 /* Inside address, we might strip the top level component references,
1917 thus changing type of the expression. Handling of ADDR_EXPR
1919 expr = fold_convert (orig_type, expr);
1924 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1927 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1929 return strip_offset_1 (expr, false, false, offset);
1932 /* Returns variant of TYPE that can be used as base for different uses.
1933 For integer types, we return unsigned variant of the type, which
1934 avoids problems with overflows. For pointer types, we return void *. */
1937 generic_type_for (tree type)
1939 if (POINTER_TYPE_P (type))
1940 return ptr_type_node;
1942 if (TYPE_UNSIGNED (type))
1945 return unsigned_type_for (type);
1948 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1949 the bitmap to that we should store it. */
1951 static struct ivopts_data *fd_ivopts_data;
1953 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1955 bitmap *depends_on = data;
1956 struct version_info *info;
1958 if (TREE_CODE (*expr_p) != SSA_NAME)
1960 info = name_info (fd_ivopts_data, *expr_p);
1962 if (!info->inv_id || info->has_nonlin_use)
1966 *depends_on = BITMAP_ALLOC (NULL);
1967 bitmap_set_bit (*depends_on, info->inv_id);
1972 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1973 position to POS. If USE is not NULL, the candidate is set as related to
1974 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1975 replacement of the final value of the iv by a direct computation. */
1977 static struct iv_cand *
1978 add_candidate_1 (struct ivopts_data *data,
1979 tree base, tree step, bool important, enum iv_position pos,
1980 struct iv_use *use, tree incremented_at)
1983 struct iv_cand *cand = NULL;
1984 tree type, orig_type;
1988 orig_type = TREE_TYPE (base);
1989 type = generic_type_for (orig_type);
1990 if (type != orig_type)
1992 base = fold_convert (type, base);
1994 step = fold_convert (type, step);
1998 for (i = 0; i < n_iv_cands (data); i++)
2000 cand = iv_cand (data, i);
2002 if (cand->pos != pos)
2005 if (cand->incremented_at != incremented_at)
2019 if (!operand_equal_p (base, cand->iv->base, 0))
2022 if (zero_p (cand->iv->step))
2029 if (step && operand_equal_p (step, cand->iv->step, 0))
2034 if (i == n_iv_cands (data))
2036 cand = xcalloc (1, sizeof (struct iv_cand));
2042 cand->iv = alloc_iv (base, step);
2045 if (pos != IP_ORIGINAL && cand->iv)
2047 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2048 cand->var_after = cand->var_before;
2050 cand->important = important;
2051 cand->incremented_at = incremented_at;
2052 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2055 && TREE_CODE (step) != INTEGER_CST)
2057 fd_ivopts_data = data;
2058 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2061 if (dump_file && (dump_flags & TDF_DETAILS))
2062 dump_cand (dump_file, cand);
2065 if (important && !cand->important)
2067 cand->important = true;
2068 if (dump_file && (dump_flags & TDF_DETAILS))
2069 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2074 bitmap_set_bit (use->related_cands, i);
2075 if (dump_file && (dump_flags & TDF_DETAILS))
2076 fprintf (dump_file, "Candidate %d is related to use %d\n",
2083 /* Returns true if incrementing the induction variable at the end of the LOOP
2086 The purpose is to avoid splitting latch edge with a biv increment, thus
2087 creating a jump, possibly confusing other optimization passes and leaving
2088 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2089 is not available (so we do not have a better alternative), or if the latch
2090 edge is already nonempty. */
2093 allow_ip_end_pos_p (struct loop *loop)
2095 if (!ip_normal_pos (loop))
2098 if (!empty_block_p (ip_end_pos (loop)))
2104 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2105 position to POS. If USE is not NULL, the candidate is set as related to
2106 it. The candidate computation is scheduled on all available positions. */
2109 add_candidate (struct ivopts_data *data,
2110 tree base, tree step, bool important, struct iv_use *use)
2112 if (ip_normal_pos (data->current_loop))
2113 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2114 if (ip_end_pos (data->current_loop)
2115 && allow_ip_end_pos_p (data->current_loop))
2116 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2119 /* Add a standard "0 + 1 * iteration" iv candidate for a
2120 type with SIZE bits. */
2123 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2126 tree type = lang_hooks.types.type_for_size (size, true);
2127 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2131 /* Adds standard iv candidates. */
2134 add_standard_iv_candidates (struct ivopts_data *data)
2136 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2138 /* The same for a double-integer type if it is still fast enough. */
2139 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2140 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2144 /* Adds candidates bases on the old induction variable IV. */
2147 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2150 struct iv_cand *cand;
2152 add_candidate (data, iv->base, iv->step, true, NULL);
2154 /* The same, but with initial value zero. */
2155 add_candidate (data,
2156 build_int_cst (TREE_TYPE (iv->base), 0),
2157 iv->step, true, NULL);
2159 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2160 if (TREE_CODE (phi) == PHI_NODE)
2162 /* Additionally record the possibility of leaving the original iv
2164 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2165 cand = add_candidate_1 (data,
2166 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2167 SSA_NAME_DEF_STMT (def));
2168 cand->var_before = iv->ssa_name;
2169 cand->var_after = def;
2173 /* Adds candidates based on the old induction variables. */
2176 add_old_ivs_candidates (struct ivopts_data *data)
2182 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2184 iv = ver_info (data, i)->iv;
2185 if (iv && iv->biv_p && !zero_p (iv->step))
2186 add_old_iv_candidates (data, iv);
2190 /* Adds candidates based on the value of the induction variable IV and USE. */
2193 add_iv_value_candidates (struct ivopts_data *data,
2194 struct iv *iv, struct iv_use *use)
2196 unsigned HOST_WIDE_INT offset;
2199 add_candidate (data, iv->base, iv->step, false, use);
2201 /* The same, but with initial value zero. Make such variable important,
2202 since it is generic enough so that possibly many uses may be based
2204 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2205 iv->step, true, use);
2207 /* Third, try removing the constant offset. */
2208 base = strip_offset (iv->base, &offset);
2210 add_candidate (data, base, iv->step, false, use);
2213 /* Possibly adds pseudocandidate for replacing the final value of USE by
2214 a direct computation. */
2217 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2219 struct tree_niter_desc *niter;
2221 /* We must know where we exit the loop and how many times does it roll. */
2222 niter = niter_for_single_dom_exit (data);
2224 || !zero_p (niter->may_be_zero))
2227 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2230 /* Adds candidates based on the uses. */
2233 add_derived_ivs_candidates (struct ivopts_data *data)
2237 for (i = 0; i < n_iv_uses (data); i++)
2239 struct iv_use *use = iv_use (data, i);
2246 case USE_NONLINEAR_EXPR:
2249 /* Just add the ivs based on the value of the iv used here. */
2250 add_iv_value_candidates (data, use->iv, use);
2254 add_iv_value_candidates (data, use->iv, use);
2256 /* Additionally, add the pseudocandidate for the possibility to
2257 replace the final value by a direct computation. */
2258 add_iv_outer_candidates (data, use);
2267 /* Record important candidates and add them to related_cands bitmaps
2271 record_important_candidates (struct ivopts_data *data)
2276 for (i = 0; i < n_iv_cands (data); i++)
2278 struct iv_cand *cand = iv_cand (data, i);
2280 if (cand->important)
2281 bitmap_set_bit (data->important_candidates, i);
2284 data->consider_all_candidates = (n_iv_cands (data)
2285 <= CONSIDER_ALL_CANDIDATES_BOUND);
2287 if (data->consider_all_candidates)
2289 /* We will not need "related_cands" bitmaps in this case,
2290 so release them to decrease peak memory consumption. */
2291 for (i = 0; i < n_iv_uses (data); i++)
2293 use = iv_use (data, i);
2294 BITMAP_FREE (use->related_cands);
2299 /* Add important candidates to the related_cands bitmaps. */
2300 for (i = 0; i < n_iv_uses (data); i++)
2301 bitmap_ior_into (iv_use (data, i)->related_cands,
2302 data->important_candidates);
2306 /* Finds the candidates for the induction variables. */
2309 find_iv_candidates (struct ivopts_data *data)
2311 /* Add commonly used ivs. */
2312 add_standard_iv_candidates (data);
2314 /* Add old induction variables. */
2315 add_old_ivs_candidates (data);
2317 /* Add induction variables derived from uses. */
2318 add_derived_ivs_candidates (data);
2320 /* Record the important candidates. */
2321 record_important_candidates (data);
2324 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2325 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2326 we allocate a simple list to every use. */
2329 alloc_use_cost_map (struct ivopts_data *data)
2331 unsigned i, size, s, j;
2333 for (i = 0; i < n_iv_uses (data); i++)
2335 struct iv_use *use = iv_use (data, i);
2338 if (data->consider_all_candidates)
2339 size = n_iv_cands (data);
2343 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2348 /* Round up to the power of two, so that moduling by it is fast. */
2349 for (size = 1; size < s; size <<= 1)
2353 use->n_map_members = size;
2354 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2358 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2359 on invariants DEPENDS_ON and that the value used in expressing it
2363 set_use_iv_cost (struct ivopts_data *data,
2364 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2365 bitmap depends_on, tree value)
2371 BITMAP_FREE (depends_on);
2375 if (data->consider_all_candidates)
2377 use->cost_map[cand->id].cand = cand;
2378 use->cost_map[cand->id].cost = cost;
2379 use->cost_map[cand->id].depends_on = depends_on;
2380 use->cost_map[cand->id].value = value;
2384 /* n_map_members is a power of two, so this computes modulo. */
2385 s = cand->id & (use->n_map_members - 1);
2386 for (i = s; i < use->n_map_members; i++)
2387 if (!use->cost_map[i].cand)
2389 for (i = 0; i < s; i++)
2390 if (!use->cost_map[i].cand)
2396 use->cost_map[i].cand = cand;
2397 use->cost_map[i].cost = cost;
2398 use->cost_map[i].depends_on = depends_on;
2399 use->cost_map[i].value = value;
2402 /* Gets cost of (USE, CANDIDATE) pair. */
2404 static struct cost_pair *
2405 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2406 struct iv_cand *cand)
2409 struct cost_pair *ret;
2414 if (data->consider_all_candidates)
2416 ret = use->cost_map + cand->id;
2423 /* n_map_members is a power of two, so this computes modulo. */
2424 s = cand->id & (use->n_map_members - 1);
2425 for (i = s; i < use->n_map_members; i++)
2426 if (use->cost_map[i].cand == cand)
2427 return use->cost_map + i;
2429 for (i = 0; i < s; i++)
2430 if (use->cost_map[i].cand == cand)
2431 return use->cost_map + i;
2436 /* Returns estimate on cost of computing SEQ. */
2444 for (; seq; seq = NEXT_INSN (seq))
2446 set = single_set (seq);
2448 cost += rtx_cost (set, SET);
2456 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2458 produce_memory_decl_rtl (tree obj, int *regno)
2463 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2465 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2466 x = gen_rtx_SYMBOL_REF (Pmode, name);
2469 x = gen_raw_REG (Pmode, (*regno)++);
2471 return gen_rtx_MEM (DECL_MODE (obj), x);
2474 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2475 walk_tree. DATA contains the actual fake register number. */
2478 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2480 tree obj = NULL_TREE;
2484 switch (TREE_CODE (*expr_p))
2487 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2488 handled_component_p (*expr_p);
2489 expr_p = &TREE_OPERAND (*expr_p, 0))
2493 x = produce_memory_decl_rtl (obj, regno);
2498 obj = SSA_NAME_VAR (*expr_p);
2499 if (!DECL_RTL_SET_P (obj))
2500 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2509 if (DECL_RTL_SET_P (obj))
2512 if (DECL_MODE (obj) == BLKmode)
2513 x = produce_memory_decl_rtl (obj, regno);
2515 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2525 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2526 SET_DECL_RTL (obj, x);
2532 /* Determines cost of the computation of EXPR. */
2535 computation_cost (tree expr)
2538 tree type = TREE_TYPE (expr);
2540 /* Avoid using hard regs in ways which may be unsupported. */
2541 int regno = LAST_VIRTUAL_REGISTER + 1;
2543 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2545 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2549 cost = seq_cost (seq);
2551 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2556 /* Returns variable containing the value of candidate CAND at statement AT. */
2559 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2561 if (stmt_after_increment (loop, cand, stmt))
2562 return cand->var_after;
2564 return cand->var_before;
2567 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2568 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2571 tree_int_cst_sign_bit (tree t)
2573 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2574 unsigned HOST_WIDE_INT w;
2576 if (bitno < HOST_BITS_PER_WIDE_INT)
2577 w = TREE_INT_CST_LOW (t);
2580 w = TREE_INT_CST_HIGH (t);
2581 bitno -= HOST_BITS_PER_WIDE_INT;
2584 return (w >> bitno) & 1;
2587 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2588 return cst. Otherwise return NULL_TREE. */
2591 constant_multiple_of (tree type, tree top, tree bot)
2593 tree res, mby, p0, p1;
2594 enum tree_code code;
2600 if (operand_equal_p (top, bot, 0))
2601 return build_int_cst (type, 1);
2603 code = TREE_CODE (top);
2607 mby = TREE_OPERAND (top, 1);
2608 if (TREE_CODE (mby) != INTEGER_CST)
2611 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2615 return fold_binary_to_constant (MULT_EXPR, type, res,
2616 fold_convert (type, mby));
2620 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2623 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2627 return fold_binary_to_constant (code, type, p0, p1);
2630 if (TREE_CODE (bot) != INTEGER_CST)
2633 bot = fold_convert (type, bot);
2634 top = fold_convert (type, top);
2636 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2637 the result afterwards. */
2638 if (tree_int_cst_sign_bit (bot))
2641 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2646 /* Ditto for TOP. */
2647 if (tree_int_cst_sign_bit (top))
2650 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2653 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2656 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2658 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2666 /* Sets COMB to CST. */
2669 aff_combination_const (struct affine_tree_combination *comb, tree type,
2670 unsigned HOST_WIDE_INT cst)
2672 unsigned prec = TYPE_PRECISION (type);
2675 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2678 comb->rest = NULL_TREE;
2679 comb->offset = cst & comb->mask;
2682 /* Sets COMB to single element ELT. */
2685 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2687 unsigned prec = TYPE_PRECISION (type);
2690 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2693 comb->elts[0] = elt;
2695 comb->rest = NULL_TREE;
2699 /* Scales COMB by SCALE. */
2702 aff_combination_scale (struct affine_tree_combination *comb,
2703 unsigned HOST_WIDE_INT scale)
2712 aff_combination_const (comb, comb->type, 0);
2716 comb->offset = (scale * comb->offset) & comb->mask;
2717 for (i = 0, j = 0; i < comb->n; i++)
2719 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2720 comb->elts[j] = comb->elts[i];
2721 if (comb->coefs[j] != 0)
2728 if (comb->n < MAX_AFF_ELTS)
2730 comb->coefs[comb->n] = scale;
2731 comb->elts[comb->n] = comb->rest;
2732 comb->rest = NULL_TREE;
2736 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2737 build_int_cst_type (comb->type, scale));
2741 /* Adds ELT * SCALE to COMB. */
2744 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2745 unsigned HOST_WIDE_INT scale)
2752 for (i = 0; i < comb->n; i++)
2753 if (operand_equal_p (comb->elts[i], elt, 0))
2755 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2760 comb->coefs[i] = comb->coefs[comb->n];
2761 comb->elts[i] = comb->elts[comb->n];
2764 if (comb->n < MAX_AFF_ELTS)
2766 comb->coefs[comb->n] = scale;
2767 comb->elts[comb->n] = elt;
2773 elt = fold_convert (comb->type, elt);
2775 elt = fold_build2 (MULT_EXPR, comb->type,
2776 fold_convert (comb->type, elt),
2777 build_int_cst_type (comb->type, scale));
2780 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2785 /* Adds COMB2 to COMB1. */
2788 aff_combination_add (struct affine_tree_combination *comb1,
2789 struct affine_tree_combination *comb2)
2793 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2794 for (i = 0; i < comb2-> n; i++)
2795 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2797 aff_combination_add_elt (comb1, comb2->rest, 1);
2800 /* Splits EXPR into an affine combination of parts. */
2803 tree_to_aff_combination (tree expr, tree type,
2804 struct affine_tree_combination *comb)
2806 struct affine_tree_combination tmp;
2807 enum tree_code code;
2808 tree cst, core, toffset;
2809 HOST_WIDE_INT bitpos, bitsize;
2810 enum machine_mode mode;
2811 int unsignedp, volatilep;
2815 code = TREE_CODE (expr);
2819 aff_combination_const (comb, type, int_cst_value (expr));
2824 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2825 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2826 if (code == MINUS_EXPR)
2827 aff_combination_scale (&tmp, -1);
2828 aff_combination_add (comb, &tmp);
2832 cst = TREE_OPERAND (expr, 1);
2833 if (TREE_CODE (cst) != INTEGER_CST)
2835 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2836 aff_combination_scale (comb, int_cst_value (cst));
2840 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2841 aff_combination_scale (comb, -1);
2845 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2846 &toffset, &mode, &unsignedp, &volatilep,
2848 if (bitpos % BITS_PER_UNIT != 0)
2850 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2851 core = build_fold_addr_expr (core);
2852 if (TREE_CODE (core) == ADDR_EXPR)
2853 aff_combination_add_elt (comb, core, 1);
2856 tree_to_aff_combination (core, type, &tmp);
2857 aff_combination_add (comb, &tmp);
2861 tree_to_aff_combination (toffset, type, &tmp);
2862 aff_combination_add (comb, &tmp);
2870 aff_combination_elt (comb, type, expr);
2873 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2876 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2877 unsigned HOST_WIDE_INT mask)
2879 enum tree_code code;
2882 elt = fold_convert (type, elt);
2889 return fold_build2 (PLUS_EXPR, type, expr, elt);
2895 return fold_build1 (NEGATE_EXPR, type, elt);
2897 return fold_build2 (MINUS_EXPR, type, expr, elt);
2901 return fold_build2 (MULT_EXPR, type, elt,
2902 build_int_cst_type (type, scale));
2904 if ((scale | (mask >> 1)) == mask)
2906 /* Scale is negative. */
2908 scale = (-scale) & mask;
2913 elt = fold_build2 (MULT_EXPR, type, elt,
2914 build_int_cst_type (type, scale));
2915 return fold_build2 (code, type, expr, elt);
2918 /* Copies the tree elements of COMB to ensure that they are not shared. */
2921 unshare_aff_combination (struct affine_tree_combination *comb)
2925 for (i = 0; i < comb->n; i++)
2926 comb->elts[i] = unshare_expr (comb->elts[i]);
2928 comb->rest = unshare_expr (comb->rest);
2931 /* Makes tree from the affine combination COMB. */
2934 aff_combination_to_tree (struct affine_tree_combination *comb)
2936 tree type = comb->type;
2937 tree expr = comb->rest;
2939 unsigned HOST_WIDE_INT off, sgn;
2941 /* Handle the special case produced by get_computation_aff when
2942 the type does not fit in HOST_WIDE_INT. */
2943 if (comb->n == 0 && comb->offset == 0)
2944 return fold_convert (type, expr);
2946 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2948 for (i = 0; i < comb->n; i++)
2949 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2952 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2954 /* Offset is negative. */
2955 off = (-comb->offset) & comb->mask;
2963 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2967 /* Determines the expression by that USE is expressed from induction variable
2968 CAND at statement AT in LOOP. The expression is stored in a decomposed
2969 form into AFF. Returns false if USE cannot be expressed using CAND. */
2972 get_computation_aff (struct loop *loop,
2973 struct iv_use *use, struct iv_cand *cand, tree at,
2974 struct affine_tree_combination *aff)
2976 tree ubase = use->iv->base;
2977 tree ustep = use->iv->step;
2978 tree cbase = cand->iv->base;
2979 tree cstep = cand->iv->step;
2980 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2984 unsigned HOST_WIDE_INT ustepi, cstepi;
2985 HOST_WIDE_INT ratioi;
2986 struct affine_tree_combination cbase_aff, expr_aff;
2987 tree cstep_orig = cstep, ustep_orig = ustep;
2989 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2991 /* We do not have a precision to express the values of use. */
2995 expr = var_at_stmt (loop, cand, at);
2997 if (TREE_TYPE (expr) != ctype)
2999 /* This may happen with the original ivs. */
3000 expr = fold_convert (ctype, expr);
3003 if (TYPE_UNSIGNED (utype))
3007 uutype = unsigned_type_for (utype);
3008 ubase = fold_convert (uutype, ubase);
3009 ustep = fold_convert (uutype, ustep);
3012 if (uutype != ctype)
3014 expr = fold_convert (uutype, expr);
3015 cbase = fold_convert (uutype, cbase);
3016 cstep = fold_convert (uutype, cstep);
3018 /* If the conversion is not noop, we must take it into account when
3019 considering the value of the step. */
3020 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3024 if (cst_and_fits_in_hwi (cstep_orig)
3025 && cst_and_fits_in_hwi (ustep_orig))
3027 ustepi = int_cst_value (ustep_orig);
3028 cstepi = int_cst_value (cstep_orig);
3030 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3032 /* TODO maybe consider case when ustep divides cstep and the ratio is
3033 a power of 2 (so that the division is fast to execute)? We would
3034 need to be much more careful with overflows etc. then. */
3038 ratio = build_int_cst_type (uutype, ratioi);
3042 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3046 /* Ratioi is only used to detect special cases when the multiplicative
3047 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3048 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3049 to integer_onep/integer_all_onesp, since the former ignores
3051 if (cst_and_fits_in_hwi (ratio))
3052 ratioi = int_cst_value (ratio);
3053 else if (integer_onep (ratio))
3055 else if (integer_all_onesp (ratio))
3061 /* We may need to shift the value if we are after the increment. */
3062 if (stmt_after_increment (loop, cand, at))
3063 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3065 /* use = ubase - ratio * cbase + ratio * var.
3067 In general case ubase + ratio * (var - cbase) could be better (one less
3068 multiplication), but often it is possible to eliminate redundant parts
3069 of computations from (ubase - ratio * cbase) term, and if it does not
3070 happen, fold is able to apply the distributive law to obtain this form
3073 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3075 /* Let's compute in trees and just return the result in AFF. This case
3076 should not be very common, and fold itself is not that bad either,
3077 so making the aff. functions more complicated to handle this case
3078 is not that urgent. */
3081 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3082 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3084 else if (ratioi == -1)
3086 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3087 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3091 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3092 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3093 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3094 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3105 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3106 possible to compute ratioi. */
3107 gcc_assert (ratioi);
3109 tree_to_aff_combination (ubase, uutype, aff);
3110 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3111 tree_to_aff_combination (expr, uutype, &expr_aff);
3112 aff_combination_scale (&cbase_aff, -ratioi);
3113 aff_combination_scale (&expr_aff, ratioi);
3114 aff_combination_add (aff, &cbase_aff);
3115 aff_combination_add (aff, &expr_aff);
3120 /* Determines the expression by that USE is expressed from induction variable
3121 CAND at statement AT in LOOP. The computation is unshared. */
3124 get_computation_at (struct loop *loop,
3125 struct iv_use *use, struct iv_cand *cand, tree at)
3127 struct affine_tree_combination aff;
3128 tree type = TREE_TYPE (use->iv->base);
3130 if (!get_computation_aff (loop, use, cand, at, &aff))
3132 unshare_aff_combination (&aff);
3133 return fold_convert (type, aff_combination_to_tree (&aff));
3136 /* Determines the expression by that USE is expressed from induction variable
3137 CAND in LOOP. The computation is unshared. */
3140 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3142 return get_computation_at (loop, use, cand, use->stmt);
3145 /* Returns cost of addition in MODE. */
3148 add_cost (enum machine_mode mode)
3150 static unsigned costs[NUM_MACHINE_MODES];
3158 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3159 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3160 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3165 cost = seq_cost (seq);
3171 if (dump_file && (dump_flags & TDF_DETAILS))
3172 fprintf (dump_file, "Addition in %s costs %d\n",
3173 GET_MODE_NAME (mode), cost);
3177 /* Entry in a hashtable of already known costs for multiplication. */
3180 HOST_WIDE_INT cst; /* The constant to multiply by. */
3181 enum machine_mode mode; /* In mode. */
3182 unsigned cost; /* The cost. */
3185 /* Counts hash value for the ENTRY. */
3188 mbc_entry_hash (const void *entry)
3190 const struct mbc_entry *e = entry;
3192 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3195 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3198 mbc_entry_eq (const void *entry1, const void *entry2)
3200 const struct mbc_entry *e1 = entry1;
3201 const struct mbc_entry *e2 = entry2;
3203 return (e1->mode == e2->mode
3204 && e1->cst == e2->cst);
3207 /* Returns cost of multiplication by constant CST in MODE. */
3210 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3212 static htab_t costs;
3213 struct mbc_entry **cached, act;
3218 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3222 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3224 return (*cached)->cost;
3226 *cached = xmalloc (sizeof (struct mbc_entry));
3227 (*cached)->mode = mode;
3228 (*cached)->cst = cst;
3231 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3232 gen_int_mode (cst, mode), NULL_RTX, 0);
3236 cost = seq_cost (seq);
3238 if (dump_file && (dump_flags & TDF_DETAILS))
3239 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3240 (int) cst, GET_MODE_NAME (mode), cost);
3242 (*cached)->cost = cost;
3247 /* Returns true if multiplying by RATIO is allowed in address. */
3250 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3252 #define MAX_RATIO 128
3253 static sbitmap valid_mult;
3257 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3261 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3262 sbitmap_zero (valid_mult);
3263 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3264 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3266 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3267 if (memory_address_p (Pmode, addr))
3268 SET_BIT (valid_mult, i + MAX_RATIO);
3271 if (dump_file && (dump_flags & TDF_DETAILS))
3273 fprintf (dump_file, " allowed multipliers:");
3274 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3275 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3276 fprintf (dump_file, " %d", (int) i);
3277 fprintf (dump_file, "\n");
3278 fprintf (dump_file, "\n");
3282 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3285 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3288 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3289 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3290 variable is omitted. The created memory accesses MODE.
3292 TODO -- there must be some better way. This all is quite crude. */
3295 get_address_cost (bool symbol_present, bool var_present,
3296 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3298 static bool initialized = false;
3299 static HOST_WIDE_INT rat, off;
3300 static HOST_WIDE_INT min_offset, max_offset;
3301 static unsigned costs[2][2][2][2];
3302 unsigned cost, acost;
3303 rtx seq, addr, base;
3304 bool offset_p, ratio_p;
3306 HOST_WIDE_INT s_offset;
3307 unsigned HOST_WIDE_INT mask;
3315 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3317 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3318 for (i = 1; i <= 1 << 20; i <<= 1)
3320 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3321 if (!memory_address_p (Pmode, addr))
3324 max_offset = i >> 1;
3327 for (i = 1; i <= 1 << 20; i <<= 1)
3329 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3330 if (!memory_address_p (Pmode, addr))
3333 min_offset = -(i >> 1);
3335 if (dump_file && (dump_flags & TDF_DETAILS))
3337 fprintf (dump_file, "get_address_cost:\n");
3338 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3339 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3343 for (i = 2; i <= MAX_RATIO; i++)
3344 if (multiplier_allowed_in_address_p (i))
3351 bits = GET_MODE_BITSIZE (Pmode);
3352 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3354 if ((offset >> (bits - 1) & 1))
3359 offset_p = (s_offset != 0
3360 && min_offset <= s_offset && s_offset <= max_offset);
3361 ratio_p = (ratio != 1
3362 && multiplier_allowed_in_address_p (ratio));
3364 if (ratio != 1 && !ratio_p)
3365 cost += multiply_by_cost (ratio, Pmode);
3367 if (s_offset && !offset_p && !symbol_present)
3369 cost += add_cost (Pmode);
3373 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3378 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3379 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3381 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3384 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3388 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3390 base = gen_rtx_fmt_e (CONST, Pmode,
3391 gen_rtx_fmt_ee (PLUS, Pmode,
3393 gen_int_mode (off, Pmode)));
3396 base = gen_int_mode (off, Pmode);
3401 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3404 addr = memory_address (Pmode, addr);
3408 acost = seq_cost (seq);
3409 acost += address_cost (addr, Pmode);
3413 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3416 return cost + acost;
3419 /* Estimates cost of forcing expression EXPR into a variable. */
3422 force_expr_to_var_cost (tree expr)
3424 static bool costs_initialized = false;
3425 static unsigned integer_cost;
3426 static unsigned symbol_cost;
3427 static unsigned address_cost;
3429 unsigned cost0, cost1, cost;
3430 enum machine_mode mode;
3432 if (!costs_initialized)
3434 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3435 rtx x = gen_rtx_MEM (DECL_MODE (var),
3436 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3438 tree type = build_pointer_type (integer_type_node);
3440 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3443 SET_DECL_RTL (var, x);
3444 TREE_STATIC (var) = 1;
3445 addr = build1 (ADDR_EXPR, type, var);
3446 symbol_cost = computation_cost (addr) + 1;
3449 = computation_cost (build2 (PLUS_EXPR, type,
3451 build_int_cst_type (type, 2000))) + 1;
3452 if (dump_file && (dump_flags & TDF_DETAILS))
3454 fprintf (dump_file, "force_expr_to_var_cost:\n");
3455 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3456 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3457 fprintf (dump_file, " address %d\n", (int) address_cost);
3458 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3459 fprintf (dump_file, "\n");
3462 costs_initialized = true;
3467 if (SSA_VAR_P (expr))
3470 if (TREE_INVARIANT (expr))
3472 if (TREE_CODE (expr) == INTEGER_CST)
3473 return integer_cost;
3475 if (TREE_CODE (expr) == ADDR_EXPR)
3477 tree obj = TREE_OPERAND (expr, 0);
3479 if (TREE_CODE (obj) == VAR_DECL
3480 || TREE_CODE (obj) == PARM_DECL
3481 || TREE_CODE (obj) == RESULT_DECL)
3485 return address_cost;
3488 switch (TREE_CODE (expr))
3493 op0 = TREE_OPERAND (expr, 0);
3494 op1 = TREE_OPERAND (expr, 1);
3498 if (is_gimple_val (op0))
3501 cost0 = force_expr_to_var_cost (op0);
3503 if (is_gimple_val (op1))
3506 cost1 = force_expr_to_var_cost (op1);
3511 /* Just an arbitrary value, FIXME. */
3512 return target_spill_cost;
3515 mode = TYPE_MODE (TREE_TYPE (expr));
3516 switch (TREE_CODE (expr))
3520 cost = add_cost (mode);
3524 if (cst_and_fits_in_hwi (op0))
3525 cost = multiply_by_cost (int_cst_value (op0), mode);
3526 else if (cst_and_fits_in_hwi (op1))
3527 cost = multiply_by_cost (int_cst_value (op1), mode);
3529 return target_spill_cost;
3539 /* Bound the cost by target_spill_cost. The parts of complicated
3540 computations often are either loop invariant or at least can
3541 be shared between several iv uses, so letting this grow without
3542 limits would not give reasonable results. */
3543 return cost < target_spill_cost ? cost : target_spill_cost;
3546 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3547 invariants the computation depends on. */
3550 force_var_cost (struct ivopts_data *data,
3551 tree expr, bitmap *depends_on)
3555 fd_ivopts_data = data;
3556 walk_tree (&expr, find_depends, depends_on, NULL);
3559 return force_expr_to_var_cost (expr);
3562 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3563 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3564 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3565 invariants the computation depends on. */
3568 split_address_cost (struct ivopts_data *data,
3569 tree addr, bool *symbol_present, bool *var_present,
3570 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3573 HOST_WIDE_INT bitsize;
3574 HOST_WIDE_INT bitpos;
3576 enum machine_mode mode;
3577 int unsignedp, volatilep;
3579 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3580 &unsignedp, &volatilep, false);
3583 || bitpos % BITS_PER_UNIT != 0
3584 || TREE_CODE (core) != VAR_DECL)
3586 *symbol_present = false;
3587 *var_present = true;
3588 fd_ivopts_data = data;
3589 walk_tree (&addr, find_depends, depends_on, NULL);
3590 return target_spill_cost;
3593 *offset += bitpos / BITS_PER_UNIT;
3594 if (TREE_STATIC (core)
3595 || DECL_EXTERNAL (core))
3597 *symbol_present = true;
3598 *var_present = false;
3602 *symbol_present = false;
3603 *var_present = true;
3607 /* Estimates cost of expressing difference of addresses E1 - E2 as
3608 var + symbol + offset. The value of offset is added to OFFSET,
3609 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3610 part is missing. DEPENDS_ON is a set of the invariants the computation
3614 ptr_difference_cost (struct ivopts_data *data,
3615 tree e1, tree e2, bool *symbol_present, bool *var_present,
3616 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3618 HOST_WIDE_INT diff = 0;
3621 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3623 if (ptr_difference_const (e1, e2, &diff))
3626 *symbol_present = false;
3627 *var_present = false;
3631 if (e2 == integer_zero_node)
3632 return split_address_cost (data, TREE_OPERAND (e1, 0),
3633 symbol_present, var_present, offset, depends_on);
3635 *symbol_present = false;
3636 *var_present = true;
3638 cost = force_var_cost (data, e1, depends_on);
3639 cost += force_var_cost (data, e2, depends_on);
3640 cost += add_cost (Pmode);
3645 /* Estimates cost of expressing difference E1 - E2 as
3646 var + symbol + offset. The value of offset is added to OFFSET,
3647 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3648 part is missing. DEPENDS_ON is a set of the invariants the computation
3652 difference_cost (struct ivopts_data *data,
3653 tree e1, tree e2, bool *symbol_present, bool *var_present,
3654 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3657 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3658 unsigned HOST_WIDE_INT off1, off2;
3660 e1 = strip_offset (e1, &off1);
3661 e2 = strip_offset (e2, &off2);
3662 *offset += off1 - off2;
3667 if (TREE_CODE (e1) == ADDR_EXPR)
3668 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3670 *symbol_present = false;
3672 if (operand_equal_p (e1, e2, 0))
3674 *var_present = false;
3677 *var_present = true;
3679 return force_var_cost (data, e1, depends_on);
3683 cost = force_var_cost (data, e2, depends_on);
3684 cost += multiply_by_cost (-1, mode);
3689 cost = force_var_cost (data, e1, depends_on);
3690 cost += force_var_cost (data, e2, depends_on);
3691 cost += add_cost (mode);
3696 /* Determines the cost of the computation by that USE is expressed
3697 from induction variable CAND. If ADDRESS_P is true, we just need
3698 to create an address from it, otherwise we want to get it into
3699 register. A set of invariants we depend on is stored in
3700 DEPENDS_ON. AT is the statement at that the value is computed. */
3703 get_computation_cost_at (struct ivopts_data *data,
3704 struct iv_use *use, struct iv_cand *cand,
3705 bool address_p, bitmap *depends_on, tree at)
3707 tree ubase = use->iv->base, ustep = use->iv->step;
3709 tree utype = TREE_TYPE (ubase), ctype;
3710 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3711 HOST_WIDE_INT ratio, aratio;
3712 bool var_present, symbol_present;
3713 unsigned cost = 0, n_sums;
3717 /* Only consider real candidates. */
3721 cbase = cand->iv->base;
3722 cstep = cand->iv->step;
3723 ctype = TREE_TYPE (cbase);
3725 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3727 /* We do not have a precision to express the values of use. */
3733 /* Do not try to express address of an object with computation based
3734 on address of a different object. This may cause problems in rtl
3735 level alias analysis (that does not expect this to be happening,
3736 as this is illegal in C), and would be unlikely to be useful
3738 if (use->iv->base_object
3739 && cand->iv->base_object
3740 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3744 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3746 /* TODO -- add direct handling of this case. */
3750 /* CSTEPI is removed from the offset in case statement is after the
3751 increment. If the step is not constant, we use zero instead.
3752 This is a bit imprecise (there is the extra addition), but
3753 redundancy elimination is likely to transform the code so that
3754 it uses value of the variable before increment anyway,
3755 so it is not that much unrealistic. */
3756 if (cst_and_fits_in_hwi (cstep))
3757 cstepi = int_cst_value (cstep);
3761 if (cst_and_fits_in_hwi (ustep)
3762 && cst_and_fits_in_hwi (cstep))
3764 ustepi = int_cst_value (ustep);
3766 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3773 rat = constant_multiple_of (utype, ustep, cstep);
3778 if (cst_and_fits_in_hwi (rat))
3779 ratio = int_cst_value (rat);
3780 else if (integer_onep (rat))
3782 else if (integer_all_onesp (rat))
3788 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3789 or ratio == 1, it is better to handle this like
3791 ubase - ratio * cbase + ratio * var
3793 (also holds in the case ratio == -1, TODO. */
3795 if (cst_and_fits_in_hwi (cbase))
3797 offset = - ratio * int_cst_value (cbase);
3798 cost += difference_cost (data,
3799 ubase, integer_zero_node,
3800 &symbol_present, &var_present, &offset,
3803 else if (ratio == 1)
3805 cost += difference_cost (data,
3807 &symbol_present, &var_present, &offset,
3812 cost += force_var_cost (data, cbase, depends_on);
3813 cost += add_cost (TYPE_MODE (ctype));
3814 cost += difference_cost (data,
3815 ubase, integer_zero_node,
3816 &symbol_present, &var_present, &offset,
3820 /* If we are after the increment, the value of the candidate is higher by
3822 if (stmt_after_increment (data->current_loop, cand, at))
3823 offset -= ratio * cstepi;
3825 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3826 (symbol/var/const parts may be omitted). If we are looking for an address,
3827 find the cost of addressing this. */
3829 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3831 /* Otherwise estimate the costs for computing the expression. */
3832 aratio = ratio > 0 ? ratio : -ratio;
3833 if (!symbol_present && !var_present && !offset)
3836 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3842 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3846 /* Symbol + offset should be compile-time computable. */
3847 && (symbol_present || offset))
3850 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3854 /* Just get the expression, expand it and measure the cost. */
3855 tree comp = get_computation_at (data->current_loop, use, cand, at);
3861 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3863 return computation_cost (comp);
3867 /* Determines the cost of the computation by that USE is expressed
3868 from induction variable CAND. If ADDRESS_P is true, we just need
3869 to create an address from it, otherwise we want to get it into
3870 register. A set of invariants we depend on is stored in
3874 get_computation_cost (struct ivopts_data *data,
3875 struct iv_use *use, struct iv_cand *cand,
3876 bool address_p, bitmap *depends_on)
3878 return get_computation_cost_at (data,
3879 use, cand, address_p, depends_on, use->stmt);
3882 /* Determines cost of basing replacement of USE on CAND in a generic
3886 determine_use_iv_cost_generic (struct ivopts_data *data,
3887 struct iv_use *use, struct iv_cand *cand)
3892 /* The simple case first -- if we need to express value of the preserved
3893 original biv, the cost is 0. This also prevents us from counting the
3894 cost of increment twice -- once at this use and once in the cost of
3896 if (cand->pos == IP_ORIGINAL
3897 && cand->incremented_at == use->stmt)
3899 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3903 cost = get_computation_cost (data, use, cand, false, &depends_on);
3904 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3906 return cost != INFTY;
3909 /* Determines cost of basing replacement of USE on CAND in an address. */
3912 determine_use_iv_cost_address (struct ivopts_data *data,
3913 struct iv_use *use, struct iv_cand *cand)
3916 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3918 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3920 return cost != INFTY;
3923 /* Computes value of induction variable IV in iteration NITER. */
3926 iv_value (struct iv *iv, tree niter)
3929 tree type = TREE_TYPE (iv->base);
3931 niter = fold_convert (type, niter);
3932 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3934 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3937 /* Computes value of candidate CAND at position AT in iteration NITER. */
3940 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3942 tree val = iv_value (cand->iv, niter);
3943 tree type = TREE_TYPE (cand->iv->base);
3945 if (stmt_after_increment (loop, cand, at))
3946 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3951 /* Returns period of induction variable iv. */
3954 iv_period (struct iv *iv)
3956 tree step = iv->step, period, type;
3959 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3961 /* Period of the iv is gcd (step, type range). Since type range is power
3962 of two, it suffices to determine the maximum power of two that divides
3964 pow2div = num_ending_zeros (step);
3965 type = unsigned_type_for (TREE_TYPE (step));
3967 period = build_low_bits_mask (type,
3968 (TYPE_PRECISION (type)
3969 - tree_low_cst (pow2div, 1)));
3974 /* Returns the comparison operator used when eliminating the iv USE. */
3976 static enum tree_code
3977 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3979 struct loop *loop = data->current_loop;
3983 ex_bb = bb_for_stmt (use->stmt);
3984 exit = EDGE_SUCC (ex_bb, 0);
3985 if (flow_bb_inside_loop_p (loop, exit->dest))
3986 exit = EDGE_SUCC (ex_bb, 1);
3988 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3991 /* Check whether it is possible to express the condition in USE by comparison
3992 of candidate CAND. If so, store the value compared with to BOUND. */
3995 may_eliminate_iv (struct ivopts_data *data,
3996 struct iv_use *use, struct iv_cand *cand, tree *bound)
4000 struct tree_niter_desc *niter;
4002 tree wider_type, period, per_type;
4003 struct loop *loop = data->current_loop;
4005 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4008 /* For now works only for exits that dominate the loop latch. TODO -- extend
4009 for other conditions inside loop body. */
4010 ex_bb = bb_for_stmt (use->stmt);
4011 if (use->stmt != last_stmt (ex_bb)
4012 || TREE_CODE (use->stmt) != COND_EXPR)
4014 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4017 exit = EDGE_SUCC (ex_bb, 0);
4018 if (flow_bb_inside_loop_p (loop, exit->dest))
4019 exit = EDGE_SUCC (ex_bb, 1);
4020 if (flow_bb_inside_loop_p (loop, exit->dest))
4023 niter = niter_for_exit (data, exit);
4025 || !zero_p (niter->may_be_zero))
4029 nit_type = TREE_TYPE (nit);
4031 /* Determine whether we may use the variable to test whether niter iterations
4032 elapsed. This is the case iff the period of the induction variable is
4033 greater than the number of iterations. */
4034 period = iv_period (cand->iv);
4037 per_type = TREE_TYPE (period);
4039 wider_type = TREE_TYPE (period);
4040 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4041 wider_type = per_type;
4043 wider_type = nit_type;
4045 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4046 fold_convert (wider_type, period),
4047 fold_convert (wider_type, nit))))
4050 *bound = cand_value_at (loop, cand, use->stmt, nit);
4054 /* Determines cost of basing replacement of USE on CAND in a condition. */
4057 determine_use_iv_cost_condition (struct ivopts_data *data,
4058 struct iv_use *use, struct iv_cand *cand)
4060 tree bound = NULL_TREE, op, cond;
4061 bitmap depends_on = NULL;
4064 /* Only consider real candidates. */
4067 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4071 if (may_eliminate_iv (data, use, cand, &bound))
4073 cost = force_var_cost (data, bound, &depends_on);
4075 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4076 return cost != INFTY;
4079 /* The induction variable elimination failed; just express the original
4080 giv. If it is compared with an invariant, note that we cannot get
4082 cost = get_computation_cost (data, use, cand, false, &depends_on);
4085 if (TREE_CODE (cond) != SSA_NAME)
4087 op = TREE_OPERAND (cond, 0);
4088 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4089 op = TREE_OPERAND (cond, 1);
4090 if (TREE_CODE (op) == SSA_NAME)
4092 op = get_iv (data, op)->base;
4093 fd_ivopts_data = data;
4094 walk_tree (&op, find_depends, &depends_on, NULL);
4098 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4099 return cost != INFTY;
4102 /* Checks whether it is possible to replace the final value of USE by
4103 a direct computation. If so, the formula is stored to *VALUE. */
4106 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4109 struct loop *loop = data->current_loop;
4111 struct tree_niter_desc *niter;
4113 exit = single_dom_exit (loop);
4117 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4118 bb_for_stmt (use->stmt)));
4120 niter = niter_for_single_dom_exit (data);
4122 || !zero_p (niter->may_be_zero))
4125 *value = iv_value (use->iv, niter->niter);
4130 /* Determines cost of replacing final value of USE using CAND. */
4133 determine_use_iv_cost_outer (struct ivopts_data *data,
4134 struct iv_use *use, struct iv_cand *cand)
4139 tree value = NULL_TREE;
4140 struct loop *loop = data->current_loop;
4142 /* The simple case first -- if we need to express value of the preserved
4143 original biv, the cost is 0. This also prevents us from counting the
4144 cost of increment twice -- once at this use and once in the cost of
4146 if (cand->pos == IP_ORIGINAL
4147 && cand->incremented_at == use->stmt)
4149 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4155 if (!may_replace_final_value (data, use, &value))
4157 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4162 cost = force_var_cost (data, value, &depends_on);
4164 cost /= AVG_LOOP_NITER (loop);
4166 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4167 return cost != INFTY;
4170 exit = single_dom_exit (loop);
4173 /* If there is just a single exit, we may use value of the candidate
4174 after we take it to determine the value of use. */
4175 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4176 last_stmt (exit->src));
4178 cost /= AVG_LOOP_NITER (loop);
4182 /* Otherwise we just need to compute the iv. */
4183 cost = get_computation_cost (data, use, cand, false, &depends_on);
4186 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4188 return cost != INFTY;
4191 /* Determines cost of basing replacement of USE on CAND. Returns false
4192 if USE cannot be based on CAND. */
4195 determine_use_iv_cost (struct ivopts_data *data,
4196 struct iv_use *use, struct iv_cand *cand)
4200 case USE_NONLINEAR_EXPR:
4201 return determine_use_iv_cost_generic (data, use, cand);
4204 return determine_use_iv_cost_outer (data, use, cand);
4207 return determine_use_iv_cost_address (data, use, cand);
4210 return determine_use_iv_cost_condition (data, use, cand);
4217 /* Determines costs of basing the use of the iv on an iv candidate. */
4220 determine_use_iv_costs (struct ivopts_data *data)
4224 struct iv_cand *cand;
4225 bitmap to_clear = BITMAP_ALLOC (NULL);
4227 alloc_use_cost_map (data);
4229 for (i = 0; i < n_iv_uses (data); i++)
4231 use = iv_use (data, i);
4233 if (data->consider_all_candidates)
4235 for (j = 0; j < n_iv_cands (data); j++)
4237 cand = iv_cand (data, j);
4238 determine_use_iv_cost (data, use, cand);
4245 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4247 cand = iv_cand (data, j);
4248 if (!determine_use_iv_cost (data, use, cand))
4249 bitmap_set_bit (to_clear, j);
4252 /* Remove the candidates for that the cost is infinite from
4253 the list of related candidates. */
4254 bitmap_and_compl_into (use->related_cands, to_clear);
4255 bitmap_clear (to_clear);
4259 BITMAP_FREE (to_clear);
4261 if (dump_file && (dump_flags & TDF_DETAILS))
4263 fprintf (dump_file, "Use-candidate costs:\n");
4265 for (i = 0; i < n_iv_uses (data); i++)
4267 use = iv_use (data, i);
4269 fprintf (dump_file, "Use %d:\n", i);
4270 fprintf (dump_file, " cand\tcost\tdepends on\n");
4271 for (j = 0; j < use->n_map_members; j++)
4273 if (!use->cost_map[j].cand
4274 || use->cost_map[j].cost == INFTY)
4277 fprintf (dump_file, " %d\t%d\t",
4278 use->cost_map[j].cand->id,
4279 use->cost_map[j].cost);
4280 if (use->cost_map[j].depends_on)
4281 bitmap_print (dump_file,
4282 use->cost_map[j].depends_on, "","");
4283 fprintf (dump_file, "\n");
4286 fprintf (dump_file, "\n");
4288 fprintf (dump_file, "\n");
4292 /* Determines cost of the candidate CAND. */
4295 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4297 unsigned cost_base, cost_step;
4306 /* There are two costs associated with the candidate -- its increment
4307 and its initialization. The second is almost negligible for any loop
4308 that rolls enough, so we take it just very little into account. */
4310 base = cand->iv->base;
4311 cost_base = force_var_cost (data, base, NULL);
4312 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4314 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4316 /* Prefer the original iv unless we may gain something by replacing it;
4317 this is not really relevant for artificial ivs created by other
4319 if (cand->pos == IP_ORIGINAL
4320 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4323 /* Prefer not to insert statements into latch unless there are some
4324 already (so that we do not create unnecessary jumps). */
4325 if (cand->pos == IP_END
4326 && empty_block_p (ip_end_pos (data->current_loop)))
4330 /* Determines costs of computation of the candidates. */
4333 determine_iv_costs (struct ivopts_data *data)
4337 if (dump_file && (dump_flags & TDF_DETAILS))
4339 fprintf (dump_file, "Candidate costs:\n");
4340 fprintf (dump_file, " cand\tcost\n");
4343 for (i = 0; i < n_iv_cands (data); i++)
4345 struct iv_cand *cand = iv_cand (data, i);
4347 determine_iv_cost (data, cand);
4349 if (dump_file && (dump_flags & TDF_DETAILS))
4350 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4353 if (dump_file && (dump_flags & TDF_DETAILS))
4354 fprintf (dump_file, "\n");
4357 /* Calculates cost for having SIZE induction variables. */
4360 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4362 return global_cost_for_size (size,
4363 loop_data (data->current_loop)->regs_used,
4367 /* For each size of the induction variable set determine the penalty. */
4370 determine_set_costs (struct ivopts_data *data)
4374 struct loop *loop = data->current_loop;
4377 /* We use the following model (definitely improvable, especially the
4378 cost function -- TODO):
4380 We estimate the number of registers available (using MD data), name it A.
4382 We estimate the number of registers used by the loop, name it U. This
4383 number is obtained as the number of loop phi nodes (not counting virtual
4384 registers and bivs) + the number of variables from outside of the loop.
4386 We set a reserve R (free regs that are used for temporary computations,
4387 etc.). For now the reserve is a constant 3.
4389 Let I be the number of induction variables.
4391 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4392 make a lot of ivs without a reason).
4393 -- if A - R < U + I <= A, the cost is I * PRES_COST
4394 -- if U + I > A, the cost is I * PRES_COST and
4395 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4397 if (dump_file && (dump_flags & TDF_DETAILS))
4399 fprintf (dump_file, "Global costs:\n");
4400 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4401 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4402 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4403 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4407 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4409 op = PHI_RESULT (phi);
4411 if (!is_gimple_reg (op))
4414 if (get_iv (data, op))
4420 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4422 struct version_info *info = ver_info (data, j);
4424 if (info->inv_id && info->has_nonlin_use)
4428 loop_data (loop)->regs_used = n;
4429 if (dump_file && (dump_flags & TDF_DETAILS))
4430 fprintf (dump_file, " regs_used %d\n", n);
4432 if (dump_file && (dump_flags & TDF_DETAILS))
4434 fprintf (dump_file, " cost for size:\n");
4435 fprintf (dump_file, " ivs\tcost\n");
4436 for (j = 0; j <= 2 * target_avail_regs; j++)
4437 fprintf (dump_file, " %d\t%d\n", j,
4438 ivopts_global_cost_for_size (data, j));
4439 fprintf (dump_file, "\n");
4443 /* Returns true if A is a cheaper cost pair than B. */
4446 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4454 if (a->cost < b->cost)
4457 if (a->cost > b->cost)
4460 /* In case the costs are the same, prefer the cheaper candidate. */
4461 if (a->cand->cost < b->cand->cost)
4467 /* Computes the cost field of IVS structure. */
4470 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4474 cost += ivs->cand_use_cost;
4475 cost += ivs->cand_cost;
4476 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4481 /* Remove invariants in set INVS to set IVS. */
4484 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4492 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4494 ivs->n_invariant_uses[iid]--;
4495 if (ivs->n_invariant_uses[iid] == 0)
4500 /* Set USE not to be expressed by any candidate in IVS. */
4503 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4506 unsigned uid = use->id, cid;
4507 struct cost_pair *cp;
4509 cp = ivs->cand_for_use[uid];
4515 ivs->cand_for_use[uid] = NULL;
4516 ivs->n_cand_uses[cid]--;
4518 if (ivs->n_cand_uses[cid] == 0)
4520 bitmap_clear_bit (ivs->cands, cid);
4521 /* Do not count the pseudocandidates. */
4525 ivs->cand_cost -= cp->cand->cost;
4527 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4530 ivs->cand_use_cost -= cp->cost;
4532 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4533 iv_ca_recount_cost (data, ivs);
4536 /* Add invariants in set INVS to set IVS. */
4539 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4547 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4549 ivs->n_invariant_uses[iid]++;
4550 if (ivs->n_invariant_uses[iid] == 1)
4555 /* Set cost pair for USE in set IVS to CP. */
4558 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4559 struct iv_use *use, struct cost_pair *cp)
4561 unsigned uid = use->id, cid;
4563 if (ivs->cand_for_use[uid] == cp)
4566 if (ivs->cand_for_use[uid])
4567 iv_ca_set_no_cp (data, ivs, use);
4574 ivs->cand_for_use[uid] = cp;
4575 ivs->n_cand_uses[cid]++;
4576 if (ivs->n_cand_uses[cid] == 1)
4578 bitmap_set_bit (ivs->cands, cid);
4579 /* Do not count the pseudocandidates. */
4583 ivs->cand_cost += cp->cand->cost;
4585 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4588 ivs->cand_use_cost += cp->cost;
4589 iv_ca_set_add_invariants (ivs, cp->depends_on);
4590 iv_ca_recount_cost (data, ivs);
4594 /* Extend set IVS by expressing USE by some of the candidates in it
4598 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4601 struct cost_pair *best_cp = NULL, *cp;
4605 gcc_assert (ivs->upto >= use->id);
4607 if (ivs->upto == use->id)
4613 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4615 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4617 if (cheaper_cost_pair (cp, best_cp))
4621 iv_ca_set_cp (data, ivs, use, best_cp);
4624 /* Get cost for assignment IVS. */
4627 iv_ca_cost (struct iv_ca *ivs)
4629 return (ivs->bad_uses ? INFTY : ivs->cost);
4632 /* Returns true if all dependences of CP are among invariants in IVS. */
4635 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4640 if (!cp->depends_on)
4643 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4645 if (ivs->n_invariant_uses[i] == 0)
4652 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4653 it before NEXT_CHANGE. */
4655 static struct iv_ca_delta *
4656 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4657 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4659 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4662 change->old_cp = old_cp;
4663 change->new_cp = new_cp;
4664 change->next_change = next_change;
4669 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4672 static struct iv_ca_delta *
4673 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4675 struct iv_ca_delta *last;
4683 for (last = l1; last->next_change; last = last->next_change)
4685 last->next_change = l2;
4690 /* Returns candidate by that USE is expressed in IVS. */
4692 static struct cost_pair *
4693 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4695 return ivs->cand_for_use[use->id];
4698 /* Reverse the list of changes DELTA, forming the inverse to it. */
4700 static struct iv_ca_delta *
4701 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4703 struct iv_ca_delta *act, *next, *prev = NULL;
4704 struct cost_pair *tmp;
4706 for (act = delta; act; act = next)
4708 next = act->next_change;
4709 act->next_change = prev;
4713 act->old_cp = act->new_cp;
4720 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4721 reverted instead. */
4724 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4725 struct iv_ca_delta *delta, bool forward)
4727 struct cost_pair *from, *to;
4728 struct iv_ca_delta *act;
4731 delta = iv_ca_delta_reverse (delta);
4733 for (act = delta; act; act = act->next_change)
4737 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4738 iv_ca_set_cp (data, ivs, act->use, to);
4742 iv_ca_delta_reverse (delta);
4745 /* Returns true if CAND is used in IVS. */
4748 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4750 return ivs->n_cand_uses[cand->id] > 0;
4753 /* Returns number of induction variable candidates in the set IVS. */
4756 iv_ca_n_cands (struct iv_ca *ivs)
4758 return ivs->n_cands;
4761 /* Free the list of changes DELTA. */
4764 iv_ca_delta_free (struct iv_ca_delta **delta)
4766 struct iv_ca_delta *act, *next;
4768 for (act = *delta; act; act = next)
4770 next = act->next_change;
4777 /* Allocates new iv candidates assignment. */
4779 static struct iv_ca *
4780 iv_ca_new (struct ivopts_data *data)
4782 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4786 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4787 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4788 nw->cands = BITMAP_ALLOC (NULL);
4791 nw->cand_use_cost = 0;
4793 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4799 /* Free memory occupied by the set IVS. */
4802 iv_ca_free (struct iv_ca **ivs)
4804 free ((*ivs)->cand_for_use);
4805 free ((*ivs)->n_cand_uses);
4806 BITMAP_FREE ((*ivs)->cands);
4807 free ((*ivs)->n_invariant_uses);
4812 /* Dumps IVS to FILE. */
4815 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4817 const char *pref = " invariants ";
4820 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4821 bitmap_print (file, ivs->cands, " candidates ","\n");
4823 for (i = 1; i <= data->max_inv_id; i++)
4824 if (ivs->n_invariant_uses[i])
4826 fprintf (file, "%s%d", pref, i);
4829 fprintf (file, "\n");
4832 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4833 new set, and store differences in DELTA. Number of induction variables
4834 in the new set is stored to N_IVS. */
4837 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4838 struct iv_cand *cand, struct iv_ca_delta **delta,
4843 struct cost_pair *old_cp, *new_cp;
4846 for (i = 0; i < ivs->upto; i++)
4848 use = iv_use (data, i);
4849 old_cp = iv_ca_cand_for_use (ivs, use);
4852 && old_cp->cand == cand)
4855 new_cp = get_use_iv_cost (data, use, cand);
4859 if (!iv_ca_has_deps (ivs, new_cp))
4862 if (!cheaper_cost_pair (new_cp, old_cp))
4865 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4868 iv_ca_delta_commit (data, ivs, *delta, true);
4869 cost = iv_ca_cost (ivs);
4871 *n_ivs = iv_ca_n_cands (ivs);
4872 iv_ca_delta_commit (data, ivs, *delta, false);
4877 /* Try narrowing set IVS by removing CAND. Return the cost of
4878 the new set and store the differences in DELTA. */
4881 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4882 struct iv_cand *cand, struct iv_ca_delta **delta)
4886 struct cost_pair *old_cp, *new_cp, *cp;
4888 struct iv_cand *cnd;
4892 for (i = 0; i < n_iv_uses (data); i++)
4894 use = iv_use (data, i);
4896 old_cp = iv_ca_cand_for_use (ivs, use);
4897 if (old_cp->cand != cand)
4902 if (data->consider_all_candidates)
4904 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4909 cnd = iv_cand (data, ci);
4911 cp = get_use_iv_cost (data, use, cnd);
4914 if (!iv_ca_has_deps (ivs, cp))
4917 if (!cheaper_cost_pair (cp, new_cp))
4925 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4930 cnd = iv_cand (data, ci);
4932 cp = get_use_iv_cost (data, use, cnd);
4935 if (!iv_ca_has_deps (ivs, cp))
4938 if (!cheaper_cost_pair (cp, new_cp))
4947 iv_ca_delta_free (delta);
4951 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4954 iv_ca_delta_commit (data, ivs, *delta, true);
4955 cost = iv_ca_cost (ivs);
4956 iv_ca_delta_commit (data, ivs, *delta, false);
4961 /* Try optimizing the set of candidates IVS by removing candidates different
4962 from to EXCEPT_CAND from it. Return cost of the new set, and store
4963 differences in DELTA. */
4966 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4967 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4970 struct iv_ca_delta *act_delta, *best_delta;
4971 unsigned i, best_cost, acost;
4972 struct iv_cand *cand;
4975 best_cost = iv_ca_cost (ivs);
4977 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4979 cand = iv_cand (data, i);
4981 if (cand == except_cand)
4984 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4986 if (acost < best_cost)
4989 iv_ca_delta_free (&best_delta);
4990 best_delta = act_delta;
4993 iv_ca_delta_free (&act_delta);
5002 /* Recurse to possibly remove other unnecessary ivs. */
5003 iv_ca_delta_commit (data, ivs, best_delta, true);
5004 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5005 iv_ca_delta_commit (data, ivs, best_delta, false);
5006 *delta = iv_ca_delta_join (best_delta, *delta);
5010 /* Tries to extend the sets IVS in the best possible way in order
5011 to express the USE. */
5014 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5017 unsigned best_cost, act_cost;
5020 struct iv_cand *cand;
5021 struct iv_ca_delta *best_delta = NULL, *act_delta;
5022 struct cost_pair *cp;
5024 iv_ca_add_use (data, ivs, use);
5025 best_cost = iv_ca_cost (ivs);
5027 cp = iv_ca_cand_for_use (ivs, use);
5030 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5031 iv_ca_set_no_cp (data, ivs, use);
5034 /* First try important candidates. Only if it fails, try the specific ones.
5035 Rationale -- in loops with many variables the best choice often is to use
5036 just one generic biv. If we added here many ivs specific to the uses,
5037 the optimization algorithm later would be likely to get stuck in a local
5038 minimum, thus causing us to create too many ivs. The approach from
5039 few ivs to more seems more likely to be successful -- starting from few
5040 ivs, replacing an expensive use by a specific iv should always be a
5042 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5044 cand = iv_cand (data, i);
5046 if (iv_ca_cand_used_p (ivs, cand))
5049 cp = get_use_iv_cost (data, use, cand);
5053 iv_ca_set_cp (data, ivs, use, cp);
5054 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5055 iv_ca_set_no_cp (data, ivs, use);
5056 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5058 if (act_cost < best_cost)
5060 best_cost = act_cost;
5062 iv_ca_delta_free (&best_delta);
5063 best_delta = act_delta;
5066 iv_ca_delta_free (&act_delta);
5069 if (best_cost == INFTY)
5071 for (i = 0; i < use->n_map_members; i++)
5073 cp = use->cost_map + i;
5078 /* Already tried this. */
5079 if (cand->important)
5082 if (iv_ca_cand_used_p (ivs, cand))
5086 iv_ca_set_cp (data, ivs, use, cp);
5087 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5088 iv_ca_set_no_cp (data, ivs, use);
5089 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5092 if (act_cost < best_cost)
5094 best_cost = act_cost;
5097 iv_ca_delta_free (&best_delta);
5098 best_delta = act_delta;
5101 iv_ca_delta_free (&act_delta);
5105 iv_ca_delta_commit (data, ivs, best_delta, true);
5106 iv_ca_delta_free (&best_delta);
5108 return (best_cost != INFTY);
5111 /* Finds an initial assignment of candidates to uses. */
5113 static struct iv_ca *
5114 get_initial_solution (struct ivopts_data *data)
5116 struct iv_ca *ivs = iv_ca_new (data);
5119 for (i = 0; i < n_iv_uses (data); i++)
5120 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5129 /* Tries to improve set of induction variables IVS. */
5132 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5134 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5135 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5136 struct iv_cand *cand;
5138 /* Try extending the set of induction variables by one. */
5139 for (i = 0; i < n_iv_cands (data); i++)
5141 cand = iv_cand (data, i);
5143 if (iv_ca_cand_used_p (ivs, cand))
5146 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5150 /* If we successfully added the candidate and the set is small enough,
5151 try optimizing it by removing other candidates. */
5152 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5154 iv_ca_delta_commit (data, ivs, act_delta, true);
5155 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5156 iv_ca_delta_commit (data, ivs, act_delta, false);
5157 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5160 if (acost < best_cost)
5163 iv_ca_delta_free (&best_delta);
5164 best_delta = act_delta;
5167 iv_ca_delta_free (&act_delta);
5172 /* Try removing the candidates from the set instead. */
5173 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5175 /* Nothing more we can do. */
5180 iv_ca_delta_commit (data, ivs, best_delta, true);
5181 gcc_assert (best_cost == iv_ca_cost (ivs));
5182 iv_ca_delta_free (&best_delta);
5186 /* Attempts to find the optimal set of induction variables. We do simple
5187 greedy heuristic -- we try to replace at most one candidate in the selected
5188 solution and remove the unused ivs while this improves the cost. */
5190 static struct iv_ca *
5191 find_optimal_iv_set (struct ivopts_data *data)
5197 /* Get the initial solution. */
5198 set = get_initial_solution (data);
5201 if (dump_file && (dump_flags & TDF_DETAILS))
5202 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5206 if (dump_file && (dump_flags & TDF_DETAILS))
5208 fprintf (dump_file, "Initial set of candidates:\n");
5209 iv_ca_dump (data, dump_file, set);
5212 while (try_improve_iv_set (data, set))
5214 if (dump_file && (dump_flags & TDF_DETAILS))
5216 fprintf (dump_file, "Improved to:\n");
5217 iv_ca_dump (data, dump_file, set);
5221 if (dump_file && (dump_flags & TDF_DETAILS))
5222 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5224 for (i = 0; i < n_iv_uses (data); i++)
5226 use = iv_use (data, i);
5227 use->selected = iv_ca_cand_for_use (set, use)->cand;
5233 /* Creates a new induction variable corresponding to CAND. */
5236 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5238 block_stmt_iterator incr_pos;
5248 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5252 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5257 /* Mark that the iv is preserved. */
5258 name_info (data, cand->var_before)->preserve_biv = true;
5259 name_info (data, cand->var_after)->preserve_biv = true;
5261 /* Rewrite the increment so that it uses var_before directly. */
5262 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5267 gimple_add_tmp_var (cand->var_before);
5268 add_referenced_tmp_var (cand->var_before);
5270 base = unshare_expr (cand->iv->base);
5272 create_iv (base, unshare_expr (cand->iv->step),
5273 cand->var_before, data->current_loop,
5274 &incr_pos, after, &cand->var_before, &cand->var_after);
5277 /* Creates new induction variables described in SET. */
5280 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5283 struct iv_cand *cand;
5286 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5288 cand = iv_cand (data, i);
5289 create_new_iv (data, cand);
5293 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5294 is true, remove also the ssa name defined by the statement. */
5297 remove_statement (tree stmt, bool including_defined_name)
5299 if (TREE_CODE (stmt) == PHI_NODE)
5301 if (!including_defined_name)
5303 /* Prevent the ssa name defined by the statement from being removed. */
5304 SET_PHI_RESULT (stmt, NULL);
5306 remove_phi_node (stmt, NULL_TREE);
5310 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5316 /* Rewrites USE (definition of iv used in a nonlinear expression)
5317 using candidate CAND. */
5320 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5321 struct iv_use *use, struct iv_cand *cand)
5324 tree op, stmts, tgt, ass;
5325 block_stmt_iterator bsi, pbsi;
5327 /* An important special case -- if we are asked to express value of
5328 the original iv by itself, just exit; there is no need to
5329 introduce a new computation (that might also need casting the
5330 variable to unsigned and back). */
5331 if (cand->pos == IP_ORIGINAL
5332 && TREE_CODE (use->stmt) == MODIFY_EXPR
5333 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5335 op = TREE_OPERAND (use->stmt, 1);
5337 /* Be a bit careful. In case variable is expressed in some
5338 complicated way, rewrite it so that we may get rid of this
5339 complicated expression. */
5340 if ((TREE_CODE (op) == PLUS_EXPR
5341 || TREE_CODE (op) == MINUS_EXPR)
5342 && TREE_OPERAND (op, 0) == cand->var_before
5343 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5347 comp = get_computation (data->current_loop, use, cand);
5348 switch (TREE_CODE (use->stmt))
5351 tgt = PHI_RESULT (use->stmt);
5353 /* If we should keep the biv, do not replace it. */
5354 if (name_info (data, tgt)->preserve_biv)
5357 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5358 while (!bsi_end_p (pbsi)
5359 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5367 tgt = TREE_OPERAND (use->stmt, 0);
5368 bsi = bsi_for_stmt (use->stmt);
5375 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5377 if (TREE_CODE (use->stmt) == PHI_NODE)
5380 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5381 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5382 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5383 remove_statement (use->stmt, false);
5384 SSA_NAME_DEF_STMT (tgt) = ass;
5389 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5390 TREE_OPERAND (use->stmt, 1) = op;
5394 /* Replaces ssa name in index IDX by its basic variable. Callback for
5398 idx_remove_ssa_names (tree base, tree *idx,
5399 void *data ATTRIBUTE_UNUSED)
5403 if (TREE_CODE (*idx) == SSA_NAME)
5404 *idx = SSA_NAME_VAR (*idx);
5406 if (TREE_CODE (base) == ARRAY_REF)
5408 op = &TREE_OPERAND (base, 2);
5410 && TREE_CODE (*op) == SSA_NAME)
5411 *op = SSA_NAME_VAR (*op);
5412 op = &TREE_OPERAND (base, 3);
5414 && TREE_CODE (*op) == SSA_NAME)
5415 *op = SSA_NAME_VAR (*op);
5421 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5424 unshare_and_remove_ssa_names (tree ref)
5426 ref = unshare_expr (ref);
5427 for_each_index (&ref, idx_remove_ssa_names, NULL);
5432 /* Extract the alias analysis info for the memory reference REF. There are
5433 several ways how this information may be stored and what precisely is
5434 its semantics depending on the type of the reference, but there always is
5435 somewhere hidden one _DECL node that is used to determine the set of
5436 virtual operands for the reference. The code below deciphers this jungle
5437 and extracts this single useful piece of information. */
5440 get_ref_tag (tree ref)
5442 tree var = get_base_address (ref);
5448 if (TREE_CODE (var) == INDIRECT_REF)
5449 var = TREE_OPERAND (var, 0);
5450 if (TREE_CODE (var) == SSA_NAME)
5452 if (SSA_NAME_PTR_INFO (var))
5454 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5459 var = SSA_NAME_VAR (var);
5464 tag = var_ann (var)->type_mem_tag;
5474 /* Copies the reference information from OLD_REF to NEW_REF. */
5477 copy_ref_info (tree new_ref, tree old_ref)
5479 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5480 copy_mem_ref_info (new_ref, old_ref);
5483 TMR_TAG (new_ref) = get_ref_tag (old_ref);
5484 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5488 /* Rewrites USE (address that is an iv) using candidate CAND. */
5491 rewrite_use_address (struct ivopts_data *data,
5492 struct iv_use *use, struct iv_cand *cand)
5494 struct affine_tree_combination aff;
5495 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5498 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5499 unshare_aff_combination (&aff);
5501 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5502 copy_ref_info (ref, *use->op_p);
5506 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5510 rewrite_use_compare (struct ivopts_data *data,
5511 struct iv_use *use, struct iv_cand *cand)
5514 tree *op_p, cond, op, stmts, bound;
5515 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5516 enum tree_code compare;
5517 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5522 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5523 tree var_type = TREE_TYPE (var);
5525 compare = iv_elimination_compare (data, use);
5526 bound = fold_convert (var_type, bound);
5527 op = force_gimple_operand (unshare_expr (bound), &stmts,
5531 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5533 *use->op_p = build2 (compare, boolean_type_node, var, op);
5534 update_stmt (use->stmt);
5538 /* The induction variable elimination failed; just express the original
5540 comp = get_computation (data->current_loop, use, cand);
5543 op_p = &TREE_OPERAND (cond, 0);
5544 if (TREE_CODE (*op_p) != SSA_NAME
5545 || zero_p (get_iv (data, *op_p)->step))
5546 op_p = &TREE_OPERAND (cond, 1);
5548 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5550 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5555 /* Ensure that operand *OP_P may be used at the end of EXIT without
5556 violating loop closed ssa form. */
5559 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5562 struct loop *def_loop;
5565 use = USE_FROM_PTR (op_p);
5566 if (TREE_CODE (use) != SSA_NAME)
5569 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5573 def_loop = def_bb->loop_father;
5574 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5577 /* Try finding a phi node that copies the value out of the loop. */
5578 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5579 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5584 /* Create such a phi node. */
5585 tree new_name = duplicate_ssa_name (use, NULL);
5587 phi = create_phi_node (new_name, exit->dest);
5588 SSA_NAME_DEF_STMT (new_name) = phi;
5589 add_phi_arg (phi, use, exit);
5592 SET_USE (op_p, PHI_RESULT (phi));
5595 /* Ensure that operands of STMT may be used at the end of EXIT without
5596 violating loop closed ssa form. */
5599 protect_loop_closed_ssa_form (edge exit, tree stmt)
5602 use_operand_p use_p;
5604 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5605 protect_loop_closed_ssa_form_use (exit, use_p);
5608 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5609 so that they are emitted on the correct place, and so that the loop closed
5610 ssa form is preserved. */
5613 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5615 tree_stmt_iterator tsi;
5616 block_stmt_iterator bsi;
5617 tree phi, stmt, def, next;
5619 if (!single_pred_p (exit->dest))
5620 split_loop_exit_edge (exit);
5622 /* Ensure there is label in exit->dest, so that we can
5624 tree_block_label (exit->dest);
5625 bsi = bsi_after_labels (exit->dest);
5627 if (TREE_CODE (stmts) == STATEMENT_LIST)
5629 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5631 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5632 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5637 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5638 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5644 for (phi = phi_nodes (exit->dest); phi; phi = next)
5646 next = PHI_CHAIN (phi);
5648 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5650 def = PHI_RESULT (phi);
5651 remove_statement (phi, false);
5652 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5654 SSA_NAME_DEF_STMT (def) = stmt;
5655 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5660 /* Rewrites the final value of USE (that is only needed outside of the loop)
5661 using candidate CAND. */
5664 rewrite_use_outer (struct ivopts_data *data,
5665 struct iv_use *use, struct iv_cand *cand)
5668 tree value, op, stmts, tgt;
5671 switch (TREE_CODE (use->stmt))
5674 tgt = PHI_RESULT (use->stmt);
5677 tgt = TREE_OPERAND (use->stmt, 0);
5683 exit = single_dom_exit (data->current_loop);
5689 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5690 value = unshare_expr (cp->value);
5693 value = get_computation_at (data->current_loop,
5694 use, cand, last_stmt (exit->src));
5696 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5698 /* If we will preserve the iv anyway and we would need to perform
5699 some computation to replace the final value, do nothing. */
5700 if (stmts && name_info (data, tgt)->preserve_biv)
5703 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5705 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5707 if (USE_FROM_PTR (use_p) == tgt)
5708 SET_USE (use_p, op);
5712 compute_phi_arg_on_exit (exit, stmts, op);
5714 /* Enable removal of the statement. We cannot remove it directly,
5715 since we may still need the aliasing information attached to the
5716 ssa name defined by it. */
5717 name_info (data, tgt)->iv->have_use_for = false;
5721 /* If the variable is going to be preserved anyway, there is nothing to
5723 if (name_info (data, tgt)->preserve_biv)
5726 /* Otherwise we just need to compute the iv. */
5727 rewrite_use_nonlinear_expr (data, use, cand);
5730 /* Rewrites USE using candidate CAND. */
5733 rewrite_use (struct ivopts_data *data,
5734 struct iv_use *use, struct iv_cand *cand)
5738 case USE_NONLINEAR_EXPR:
5739 rewrite_use_nonlinear_expr (data, use, cand);
5743 rewrite_use_outer (data, use, cand);
5747 rewrite_use_address (data, use, cand);
5751 rewrite_use_compare (data, use, cand);
5757 update_stmt (use->stmt);
5760 /* Rewrite the uses using the selected induction variables. */
5763 rewrite_uses (struct ivopts_data *data)
5766 struct iv_cand *cand;
5769 for (i = 0; i < n_iv_uses (data); i++)
5771 use = iv_use (data, i);
5772 cand = use->selected;
5775 rewrite_use (data, use, cand);
5779 /* Removes the ivs that are not used after rewriting. */
5782 remove_unused_ivs (struct ivopts_data *data)
5787 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5789 struct version_info *info;
5791 info = ver_info (data, j);
5793 && !zero_p (info->iv->step)
5795 && !info->iv->have_use_for
5796 && !info->preserve_biv)
5797 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5801 /* Frees data allocated by the optimization of a single loop. */
5804 free_loop_data (struct ivopts_data *data)
5810 htab_empty (data->niters);
5812 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5814 struct version_info *info;
5816 info = ver_info (data, i);
5820 info->has_nonlin_use = false;
5821 info->preserve_biv = false;
5824 bitmap_clear (data->relevant);
5825 bitmap_clear (data->important_candidates);
5827 for (i = 0; i < n_iv_uses (data); i++)
5829 struct iv_use *use = iv_use (data, i);
5832 BITMAP_FREE (use->related_cands);
5833 for (j = 0; j < use->n_map_members; j++)
5834 if (use->cost_map[j].depends_on)
5835 BITMAP_FREE (use->cost_map[j].depends_on);
5836 free (use->cost_map);
5839 VEC_truncate (iv_use_p, data->iv_uses, 0);
5841 for (i = 0; i < n_iv_cands (data); i++)
5843 struct iv_cand *cand = iv_cand (data, i);
5847 if (cand->depends_on)
5848 BITMAP_FREE (cand->depends_on);
5851 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5853 if (data->version_info_size < num_ssa_names)
5855 data->version_info_size = 2 * num_ssa_names;
5856 free (data->version_info);
5857 data->version_info = xcalloc (data->version_info_size,
5858 sizeof (struct version_info));
5861 data->max_inv_id = 0;
5863 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5864 SET_DECL_RTL (obj, NULL_RTX);
5866 VEC_truncate (tree, decl_rtl_to_reset, 0);
5869 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5873 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5877 for (i = 1; i < loops->num; i++)
5878 if (loops->parray[i])
5880 free (loops->parray[i]->aux);
5881 loops->parray[i]->aux = NULL;
5884 free_loop_data (data);
5885 free (data->version_info);
5886 BITMAP_FREE (data->relevant);
5887 BITMAP_FREE (data->important_candidates);
5888 htab_delete (data->niters);
5890 VEC_free (tree, heap, decl_rtl_to_reset);
5891 VEC_free (iv_use_p, heap, data->iv_uses);
5892 VEC_free (iv_cand_p, heap, data->iv_candidates);
5895 /* Optimizes the LOOP. Returns true if anything changed. */
5898 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5900 bool changed = false;
5901 struct iv_ca *iv_ca;
5904 data->current_loop = loop;
5906 if (dump_file && (dump_flags & TDF_DETAILS))
5908 fprintf (dump_file, "Processing loop %d\n", loop->num);
5910 exit = single_dom_exit (loop);
5913 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5914 exit->src->index, exit->dest->index);
5915 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5916 fprintf (dump_file, "\n");
5919 fprintf (dump_file, "\n");
5922 /* For each ssa name determines whether it behaves as an induction variable
5924 if (!find_induction_variables (data))
5927 /* Finds interesting uses (item 1). */
5928 find_interesting_uses (data);
5929 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5932 /* Finds candidates for the induction variables (item 2). */
5933 find_iv_candidates (data);
5935 /* Calculates the costs (item 3, part 1). */
5936 determine_use_iv_costs (data);
5937 determine_iv_costs (data);
5938 determine_set_costs (data);
5940 /* Find the optimal set of induction variables (item 3, part 2). */
5941 iv_ca = find_optimal_iv_set (data);
5946 /* Create the new induction variables (item 4, part 1). */
5947 create_new_ivs (data, iv_ca);
5948 iv_ca_free (&iv_ca);
5950 /* Rewrite the uses (item 4, part 2). */
5951 rewrite_uses (data);
5953 /* Remove the ivs that are unused after rewriting. */
5954 remove_unused_ivs (data);
5956 /* We have changed the structure of induction variables; it might happen
5957 that definitions in the scev database refer to some of them that were
5962 free_loop_data (data);
5967 /* Main entry point. Optimizes induction variables in LOOPS. */
5970 tree_ssa_iv_optimize (struct loops *loops)
5973 struct ivopts_data data;
5975 tree_ssa_iv_optimize_init (loops, &data);
5977 /* Optimize the loops starting with the innermost ones. */
5978 loop = loops->tree_root;
5982 /* Scan the loops, inner ones first. */
5983 while (loop != loops->tree_root)
5985 if (dump_file && (dump_flags & TDF_DETAILS))
5986 flow_loop_dump (loop, dump_file, NULL, 1);
5988 tree_ssa_iv_optimize_loop (&data, loop);
6000 tree_ssa_iv_optimize_finalize (loops, &data);