1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
127 unsigned regs_used; /* Number of registers used. */
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use *iv_use_p;
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
210 /* The currently optimized loop. */
211 struct loop *current_loop;
213 /* Numbers of iterations for all exits of the current loop. */
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
222 /* The bitmap of indices in version_info whose value was changed. */
225 /* The maximum invariant id. */
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
237 /* Whether to consider just related and important candidates when replacing a
239 bool consider_all_candidates;
242 /* An assignment of iv candidates to uses. */
246 /* The number of uses covered by the assignment. */
249 /* Number of uses that cannot be expressed by the candidates in the set. */
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
258 /* The candidates used. */
261 /* The number of candidates in the set. */
264 /* Total number of registers needed. */
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
270 /* Total cost of candidates. */
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
276 /* Total cost of the assignment. */
280 /* Difference of two iv candidate assignments. */
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
290 /* A new assignment. */
291 struct cost_pair *new_cp;
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
317 static VEC(tree,heap) *decl_rtl_to_reset;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
324 return VEC_length (iv_use_p, data->iv_uses);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
332 return VEC_index (iv_use_p, data->iv_uses, i);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
340 return VEC_length (iv_cand_p, data->iv_candidates);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
351 /* The data for LOOP. */
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
362 single_dom_exit (struct loop *loop)
364 edge exit = loop->single_exit;
369 if (!just_once_each_iteration_p (loop, exit->src))
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv *);
379 dump_iv (FILE *file, struct iv *iv)
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
417 fprintf (file, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use *);
424 dump_use (FILE *file, struct iv_use *use)
426 fprintf (file, "use %d\n", use->id);
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
435 fprintf (file, " outside\n");
439 fprintf (file, " address\n");
443 fprintf (file, " compare\n");
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
472 dump_uses (FILE *file, struct ivopts_data *data)
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
505 fprintf (file, " final value replacement\n");
512 fprintf (file, " incremented before exit test\n");
516 fprintf (file, " incremented at end\n");
520 fprintf (file, " original biv\n");
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
568 /* If b is still even, a is odd and there is no such x. */
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
576 for (i = 0; i < bits - 1; i++)
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
582 val = (a * inv) & mask;
584 gcc_assert (((val * b) & mask) == a);
586 if ((val >> (bits - 1)) & 1)
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
604 if (sbb == loop->latch)
610 return stmt == last_stmt (bb);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
626 if (stmt_bb != cand_bb)
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
633 if (bsi_stmt (bsi) == cand->incremented_at)
635 if (bsi_stmt (bsi) == stmt)
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
652 return stmt_after_ip_normal_pos (loop, stmt);
655 return stmt_after_ip_original_pos (cand, stmt);
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
667 /* The edge for that the number of iterations is cached. */
670 /* True if the # of iterations was successfully determined. */
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
677 /* Hash function for nfe_cache_elt E. */
680 nfe_hash (const void *e)
682 const struct nfe_cache_elt *elt = e;
684 return htab_hash_pointer (elt->exit);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
690 nfe_eq (const void *e1, const void *e2)
692 const struct nfe_cache_elt *elt1 = e1;
694 return elt1->exit == e2;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
703 struct nfe_cache_elt *nfe_desc;
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter);
721 if (!nfe_desc->valid_p)
724 return &nfe_desc->niter;
727 /* Returns structure describing number of iterations determined from
728 single dominating exit of DATA->current_loop, or NULL if something
731 static struct tree_niter_desc *
732 niter_for_single_dom_exit (struct ivopts_data *data)
734 edge exit = single_dom_exit (data->current_loop);
739 return niter_for_exit (data, exit);
742 /* Initializes data structures used by the iv optimization pass, stored
743 in DATA. LOOPS is the loop tree. */
746 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
750 data->version_info_size = 2 * num_ssa_names;
751 data->version_info = xcalloc (data->version_info_size,
752 sizeof (struct version_info));
753 data->relevant = BITMAP_ALLOC (NULL);
754 data->important_candidates = BITMAP_ALLOC (NULL);
755 data->max_inv_id = 0;
756 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
758 for (i = 1; i < loops->num; i++)
759 if (loops->parray[i])
760 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj, op0, op1;
776 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
785 obj = TREE_OPERAND (expr, 0);
786 base = get_base_address (obj);
791 if (TREE_CODE (base) == INDIRECT_REF)
792 return determine_base_object (TREE_OPERAND (base, 0));
794 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
798 op0 = determine_base_object (TREE_OPERAND (expr, 0));
799 op1 = determine_base_object (TREE_OPERAND (expr, 1));
805 return (code == PLUS_EXPR
807 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
809 return fold (build (code, ptr_type_node, op0, op1));
813 return determine_base_object (TREE_OPERAND (expr, 0));
816 return fold_convert (ptr_type_node, expr);
820 /* Allocates an induction variable with given initial value BASE and step STEP
824 alloc_iv (tree base, tree step)
826 struct iv *iv = xcalloc (1, sizeof (struct iv));
828 if (step && integer_zerop (step))
832 iv->base_object = determine_base_object (base);
835 iv->have_use_for = false;
837 iv->ssa_name = NULL_TREE;
842 /* Sets STEP and BASE for induction variable IV. */
845 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
847 struct version_info *info = name_info (data, iv);
849 gcc_assert (!info->iv);
851 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
852 info->iv = alloc_iv (base, step);
853 info->iv->ssa_name = iv;
856 /* Finds induction variable declaration for VAR. */
859 get_iv (struct ivopts_data *data, tree var)
863 if (!name_info (data, var)->iv)
865 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
868 || !flow_bb_inside_loop_p (data->current_loop, bb))
869 set_iv (data, var, var, NULL_TREE);
872 return name_info (data, var)->iv;
875 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
876 not define a simple affine biv with nonzero step. */
879 determine_biv_step (tree phi)
881 struct loop *loop = bb_for_stmt (phi)->loop_father;
882 tree name = PHI_RESULT (phi), base, step;
884 if (!is_gimple_reg (name))
887 if (!simple_iv (loop, phi, name, &base, &step, true))
896 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
899 abnormal_ssa_name_p (tree exp)
904 if (TREE_CODE (exp) != SSA_NAME)
907 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
910 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
911 abnormal phi node. Callback for for_each_index. */
914 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
915 void *data ATTRIBUTE_UNUSED)
917 if (TREE_CODE (base) == ARRAY_REF)
919 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
921 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
925 return !abnormal_ssa_name_p (*index);
928 /* Returns true if EXPR contains a ssa name that occurs in an
929 abnormal phi node. */
932 contains_abnormal_ssa_name_p (tree expr)
935 enum tree_code_class class;
940 code = TREE_CODE (expr);
941 class = TREE_CODE_CLASS (code);
943 if (code == SSA_NAME)
944 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
946 if (code == INTEGER_CST
947 || is_gimple_min_invariant (expr))
950 if (code == ADDR_EXPR)
951 return !for_each_index (&TREE_OPERAND (expr, 0),
952 idx_contains_abnormal_ssa_name_p,
959 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
964 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
976 /* Finds basic ivs. */
979 find_bivs (struct ivopts_data *data)
981 tree phi, step, type, base;
983 struct loop *loop = data->current_loop;
985 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
987 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
990 step = determine_biv_step (phi);
994 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
995 base = expand_simple_operations (base);
996 if (contains_abnormal_ssa_name_p (base)
997 || contains_abnormal_ssa_name_p (step))
1000 type = TREE_TYPE (PHI_RESULT (phi));
1001 base = fold_convert (type, base);
1003 step = fold_convert (type, step);
1005 set_iv (data, PHI_RESULT (phi), base, step);
1012 /* Marks basic ivs. */
1015 mark_bivs (struct ivopts_data *data)
1018 struct iv *iv, *incr_iv;
1019 struct loop *loop = data->current_loop;
1020 basic_block incr_bb;
1022 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1024 iv = get_iv (data, PHI_RESULT (phi));
1028 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1029 incr_iv = get_iv (data, var);
1033 /* If the increment is in the subloop, ignore it. */
1034 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1035 if (incr_bb->loop_father != data->current_loop
1036 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1040 incr_iv->biv_p = true;
1044 /* Checks whether STMT defines a linear induction variable and stores its
1045 parameters to BASE and STEP. */
1048 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1049 tree *base, tree *step)
1052 struct loop *loop = data->current_loop;
1057 if (TREE_CODE (stmt) != MODIFY_EXPR)
1060 lhs = TREE_OPERAND (stmt, 0);
1061 if (TREE_CODE (lhs) != SSA_NAME)
1064 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step, true))
1066 *base = expand_simple_operations (*base);
1068 if (contains_abnormal_ssa_name_p (*base)
1069 || contains_abnormal_ssa_name_p (*step))
1075 /* Finds general ivs in statement STMT. */
1078 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1082 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1085 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1088 /* Finds general ivs in basic block BB. */
1091 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1093 block_stmt_iterator bsi;
1095 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1096 find_givs_in_stmt (data, bsi_stmt (bsi));
1099 /* Finds general ivs. */
1102 find_givs (struct ivopts_data *data)
1104 struct loop *loop = data->current_loop;
1105 basic_block *body = get_loop_body_in_dom_order (loop);
1108 for (i = 0; i < loop->num_nodes; i++)
1109 find_givs_in_bb (data, body[i]);
1113 /* For each ssa name defined in LOOP determines whether it is an induction
1114 variable and if so, its initial value and step. */
1117 find_induction_variables (struct ivopts_data *data)
1122 if (!find_bivs (data))
1128 if (dump_file && (dump_flags & TDF_DETAILS))
1130 struct tree_niter_desc *niter;
1132 niter = niter_for_single_dom_exit (data);
1136 fprintf (dump_file, " number of iterations ");
1137 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1138 fprintf (dump_file, "\n");
1140 fprintf (dump_file, " may be zero if ");
1141 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1142 fprintf (dump_file, "\n");
1143 fprintf (dump_file, "\n");
1146 fprintf (dump_file, "Induction variables:\n\n");
1148 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1150 if (ver_info (data, i)->iv)
1151 dump_iv (dump_file, ver_info (data, i)->iv);
1158 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1160 static struct iv_use *
1161 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1162 tree stmt, enum use_type use_type)
1164 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1166 use->id = n_iv_uses (data);
1167 use->type = use_type;
1171 use->related_cands = BITMAP_ALLOC (NULL);
1173 /* To avoid showing ssa name in the dumps, if it was not reset by the
1175 iv->ssa_name = NULL_TREE;
1177 if (dump_file && (dump_flags & TDF_DETAILS))
1178 dump_use (dump_file, use);
1180 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1185 /* Checks whether OP is a loop-level invariant and if so, records it.
1186 NONLINEAR_USE is true if the invariant is used in a way we do not
1187 handle specially. */
1190 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1193 struct version_info *info;
1195 if (TREE_CODE (op) != SSA_NAME
1196 || !is_gimple_reg (op))
1199 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1201 && flow_bb_inside_loop_p (data->current_loop, bb))
1204 info = name_info (data, op);
1206 info->has_nonlin_use |= nonlinear_use;
1208 info->inv_id = ++data->max_inv_id;
1209 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1212 /* Checks whether the use OP is interesting and if so, records it
1215 static struct iv_use *
1216 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1224 if (TREE_CODE (op) != SSA_NAME)
1227 iv = get_iv (data, op);
1231 if (iv->have_use_for)
1233 use = iv_use (data, iv->use_id);
1235 gcc_assert (use->type == USE_NONLINEAR_EXPR
1236 || use->type == USE_OUTER);
1238 if (type == USE_NONLINEAR_EXPR)
1239 use->type = USE_NONLINEAR_EXPR;
1243 if (zero_p (iv->step))
1245 record_invariant (data, op, true);
1248 iv->have_use_for = true;
1250 civ = xmalloc (sizeof (struct iv));
1253 stmt = SSA_NAME_DEF_STMT (op);
1254 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1255 || TREE_CODE (stmt) == MODIFY_EXPR);
1257 use = record_use (data, NULL, civ, stmt, type);
1258 iv->use_id = use->id;
1263 /* Checks whether the use OP is interesting and if so, records it. */
1265 static struct iv_use *
1266 find_interesting_uses_op (struct ivopts_data *data, tree op)
1268 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1271 /* Records a definition of induction variable OP that is used outside of the
1274 static struct iv_use *
1275 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1277 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1280 /* Checks whether the condition *COND_P in STMT is interesting
1281 and if so, records it. */
1284 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1288 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1290 tree zero = integer_zero_node;
1292 const_iv.step = NULL_TREE;
1294 if (TREE_CODE (*cond_p) != SSA_NAME
1295 && !COMPARISON_CLASS_P (*cond_p))
1298 if (TREE_CODE (*cond_p) == SSA_NAME)
1305 op0_p = &TREE_OPERAND (*cond_p, 0);
1306 op1_p = &TREE_OPERAND (*cond_p, 1);
1309 if (TREE_CODE (*op0_p) == SSA_NAME)
1310 iv0 = get_iv (data, *op0_p);
1314 if (TREE_CODE (*op1_p) == SSA_NAME)
1315 iv1 = get_iv (data, *op1_p);
1319 if (/* When comparing with non-invariant value, we may not do any senseful
1320 induction variable elimination. */
1322 /* Eliminating condition based on two ivs would be nontrivial.
1323 ??? TODO -- it is not really important to handle this case. */
1324 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1326 find_interesting_uses_op (data, *op0_p);
1327 find_interesting_uses_op (data, *op1_p);
1331 if (zero_p (iv0->step) && zero_p (iv1->step))
1333 /* If both are invariants, this is a work for unswitching. */
1337 civ = xmalloc (sizeof (struct iv));
1338 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1339 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1342 /* Returns true if expression EXPR is obviously invariant in LOOP,
1343 i.e. if all its operands are defined outside of the LOOP. */
1346 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1351 if (is_gimple_min_invariant (expr))
1354 if (TREE_CODE (expr) == SSA_NAME)
1356 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1358 && flow_bb_inside_loop_p (loop, def_bb))
1367 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1368 for (i = 0; i < len; i++)
1369 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1375 /* Cumulates the steps of indices into DATA and replaces their values with the
1376 initial ones. Returns false when the value of the index cannot be determined.
1377 Callback for for_each_index. */
1379 struct ifs_ivopts_data
1381 struct ivopts_data *ivopts_data;
1387 idx_find_step (tree base, tree *idx, void *data)
1389 struct ifs_ivopts_data *dta = data;
1391 tree step, type, iv_type, iv_step, lbound, off;
1392 struct loop *loop = dta->ivopts_data->current_loop;
1394 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1395 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1398 /* If base is a component ref, require that the offset of the reference
1400 if (TREE_CODE (base) == COMPONENT_REF)
1402 off = component_ref_field_offset (base);
1403 return expr_invariant_in_loop_p (loop, off);
1406 /* If base is array, first check whether we will be able to move the
1407 reference out of the loop (in order to take its address in strength
1408 reduction). In order for this to work we need both lower bound
1409 and step to be loop invariants. */
1410 if (TREE_CODE (base) == ARRAY_REF)
1412 step = array_ref_element_size (base);
1413 lbound = array_ref_low_bound (base);
1415 if (!expr_invariant_in_loop_p (loop, step)
1416 || !expr_invariant_in_loop_p (loop, lbound))
1420 if (TREE_CODE (*idx) != SSA_NAME)
1423 iv = get_iv (dta->ivopts_data, *idx);
1432 iv_type = TREE_TYPE (iv->base);
1433 type = build_pointer_type (TREE_TYPE (base));
1434 if (TREE_CODE (base) == ARRAY_REF)
1436 step = array_ref_element_size (base);
1438 /* We only handle addresses whose step is an integer constant. */
1439 if (TREE_CODE (step) != INTEGER_CST)
1443 /* The step for pointer arithmetics already is 1 byte. */
1444 step = build_int_cst (type, 1);
1446 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1447 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1448 type, iv->base, iv->step, dta->stmt);
1450 iv_step = fold_convert (iv_type, iv->step);
1454 /* The index might wrap. */
1458 step = fold_build2 (MULT_EXPR, type, step, iv_step);
1461 *dta->step_p = step;
1463 *dta->step_p = fold_build2 (PLUS_EXPR, type, *dta->step_p, step);
1468 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1469 object is passed to it in DATA. */
1472 idx_record_use (tree base, tree *idx,
1475 find_interesting_uses_op (data, *idx);
1476 if (TREE_CODE (base) == ARRAY_REF)
1478 find_interesting_uses_op (data, array_ref_element_size (base));
1479 find_interesting_uses_op (data, array_ref_low_bound (base));
1484 /* Returns true if memory reference REF may be unaligned. */
1487 may_be_unaligned_p (tree ref)
1491 HOST_WIDE_INT bitsize;
1492 HOST_WIDE_INT bitpos;
1494 enum machine_mode mode;
1495 int unsignedp, volatilep;
1496 unsigned base_align;
1498 /* The test below is basically copy of what expr.c:normal_inner_ref
1499 does to check whether the object must be loaded by parts when
1500 STRICT_ALIGNMENT is true. */
1501 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1502 &unsignedp, &volatilep, true);
1503 base_type = TREE_TYPE (base);
1504 base_align = TYPE_ALIGN (base_type);
1507 && (base_align < GET_MODE_ALIGNMENT (mode)
1508 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1509 || bitpos % BITS_PER_UNIT != 0))
1515 /* Builds ADDR_EXPR of object OBJ. If OBJ is an INDIRECT_REF, the indirect_ref
1516 is stripped instead. */
1519 build_addr_strip_iref (tree obj)
1523 if (TREE_CODE (obj) == INDIRECT_REF)
1525 type = build_pointer_type (TREE_TYPE (obj));
1526 obj = fold_convert (type, TREE_OPERAND (obj, 0));
1529 obj = build_addr (obj);
1534 /* Finds addresses in *OP_P inside STMT. */
1537 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1539 tree base = unshare_expr (*op_p), step = NULL;
1541 struct ifs_ivopts_data ifs_ivopts_data;
1543 /* Do not play with volatile memory references. A bit too conservative,
1544 perhaps, but safe. */
1545 if (stmt_ann (stmt)->has_volatile_ops)
1548 /* Ignore bitfields for now. Not really something terribly complicated
1550 if (TREE_CODE (base) == COMPONENT_REF
1551 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1554 if (STRICT_ALIGNMENT
1555 && may_be_unaligned_p (base))
1558 ifs_ivopts_data.ivopts_data = data;
1559 ifs_ivopts_data.stmt = stmt;
1560 ifs_ivopts_data.step_p = &step;
1561 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1565 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1566 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1568 base = build_addr_strip_iref (base);
1570 civ = alloc_iv (base, step);
1571 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1575 for_each_index (op_p, idx_record_use, data);
1578 /* Finds and records invariants used in STMT. */
1581 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1584 use_operand_p use_p;
1587 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1589 op = USE_FROM_PTR (use_p);
1590 record_invariant (data, op, false);
1594 /* Finds interesting uses of induction variables in the statement STMT. */
1597 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1602 use_operand_p use_p;
1604 find_invariants_stmt (data, stmt);
1606 if (TREE_CODE (stmt) == COND_EXPR)
1608 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1612 if (TREE_CODE (stmt) == MODIFY_EXPR)
1614 lhs = TREE_OPERAND (stmt, 0);
1615 rhs = TREE_OPERAND (stmt, 1);
1617 if (TREE_CODE (lhs) == SSA_NAME)
1619 /* If the statement defines an induction variable, the uses are not
1620 interesting by themselves. */
1622 iv = get_iv (data, lhs);
1624 if (iv && !zero_p (iv->step))
1628 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1630 case tcc_comparison:
1631 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1635 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1636 if (REFERENCE_CLASS_P (lhs))
1637 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1643 if (REFERENCE_CLASS_P (lhs)
1644 && is_gimple_val (rhs))
1646 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1647 find_interesting_uses_op (data, rhs);
1651 /* TODO -- we should also handle address uses of type
1653 memory = call (whatever);
1660 if (TREE_CODE (stmt) == PHI_NODE
1661 && bb_for_stmt (stmt) == data->current_loop->header)
1663 lhs = PHI_RESULT (stmt);
1664 iv = get_iv (data, lhs);
1666 if (iv && !zero_p (iv->step))
1670 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1672 op = USE_FROM_PTR (use_p);
1674 if (TREE_CODE (op) != SSA_NAME)
1677 iv = get_iv (data, op);
1681 find_interesting_uses_op (data, op);
1685 /* Finds interesting uses of induction variables outside of loops
1686 on loop exit edge EXIT. */
1689 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1693 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1695 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1696 find_interesting_uses_outer (data, def);
1700 /* Finds uses of the induction variables that are interesting. */
1703 find_interesting_uses (struct ivopts_data *data)
1706 block_stmt_iterator bsi;
1708 basic_block *body = get_loop_body (data->current_loop);
1710 struct version_info *info;
1713 if (dump_file && (dump_flags & TDF_DETAILS))
1714 fprintf (dump_file, "Uses:\n\n");
1716 for (i = 0; i < data->current_loop->num_nodes; i++)
1721 FOR_EACH_EDGE (e, ei, bb->succs)
1722 if (e->dest != EXIT_BLOCK_PTR
1723 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1724 find_interesting_uses_outside (data, e);
1726 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1727 find_interesting_uses_stmt (data, phi);
1728 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1729 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1732 if (dump_file && (dump_flags & TDF_DETAILS))
1736 fprintf (dump_file, "\n");
1738 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1740 info = ver_info (data, i);
1743 fprintf (dump_file, " ");
1744 print_generic_expr (dump_file, info->name, TDF_SLIM);
1745 fprintf (dump_file, " is invariant (%d)%s\n",
1746 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1750 fprintf (dump_file, "\n");
1756 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1757 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1758 we are at the top-level of the processed address. */
1761 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1762 unsigned HOST_WIDE_INT *offset)
1764 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1765 enum tree_code code;
1766 tree type, orig_type = TREE_TYPE (expr);
1767 unsigned HOST_WIDE_INT off0, off1, st;
1768 tree orig_expr = expr;
1772 type = TREE_TYPE (expr);
1773 code = TREE_CODE (expr);
1779 if (!cst_and_fits_in_hwi (expr)
1783 *offset = int_cst_value (expr);
1784 return build_int_cst_type (orig_type, 0);
1788 op0 = TREE_OPERAND (expr, 0);
1789 op1 = TREE_OPERAND (expr, 1);
1791 op0 = strip_offset_1 (op0, false, false, &off0);
1792 op1 = strip_offset_1 (op1, false, false, &off1);
1794 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1795 if (op0 == TREE_OPERAND (expr, 0)
1796 && op1 == TREE_OPERAND (expr, 1))
1801 else if (zero_p (op0))
1803 if (code == PLUS_EXPR)
1806 expr = fold_build1 (NEGATE_EXPR, type, op1);
1809 expr = fold_build2 (code, type, op0, op1);
1811 return fold_convert (orig_type, expr);
1817 step = array_ref_element_size (expr);
1818 if (!cst_and_fits_in_hwi (step))
1821 st = int_cst_value (step);
1822 op1 = TREE_OPERAND (expr, 1);
1823 op1 = strip_offset_1 (op1, false, false, &off1);
1824 *offset = off1 * st;
1829 /* Strip the component reference completely. */
1830 op0 = TREE_OPERAND (expr, 0);
1831 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1841 tmp = component_ref_field_offset (expr);
1843 && cst_and_fits_in_hwi (tmp))
1845 /* Strip the component reference completely. */
1846 op0 = TREE_OPERAND (expr, 0);
1847 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1848 *offset = off0 + int_cst_value (tmp);
1854 op0 = TREE_OPERAND (expr, 0);
1855 op0 = strip_offset_1 (op0, true, true, &off0);
1858 if (op0 == TREE_OPERAND (expr, 0))
1861 expr = build_addr_strip_iref (op0);
1862 return fold_convert (orig_type, expr);
1865 inside_addr = false;
1872 /* Default handling of expressions for that we want to recurse into
1873 the first operand. */
1874 op0 = TREE_OPERAND (expr, 0);
1875 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1878 if (op0 == TREE_OPERAND (expr, 0)
1879 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1882 expr = copy_node (expr);
1883 TREE_OPERAND (expr, 0) = op0;
1885 TREE_OPERAND (expr, 1) = op1;
1887 /* Inside address, we might strip the top level component references,
1888 thus changing type of the expresion. Handling of ADDR_EXPR
1890 expr = fold_convert (orig_type, expr);
1895 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1898 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1900 return strip_offset_1 (expr, false, false, offset);
1903 /* Returns variant of TYPE that can be used as base for different uses.
1904 For integer types, we return unsigned variant of the type, which
1905 avoids problems with overflows. For pointer types, we return void *. */
1908 generic_type_for (tree type)
1910 if (POINTER_TYPE_P (type))
1911 return ptr_type_node;
1913 if (TYPE_UNSIGNED (type))
1916 return unsigned_type_for (type);
1919 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1920 the bitmap to that we should store it. */
1922 static struct ivopts_data *fd_ivopts_data;
1924 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1926 bitmap *depends_on = data;
1927 struct version_info *info;
1929 if (TREE_CODE (*expr_p) != SSA_NAME)
1931 info = name_info (fd_ivopts_data, *expr_p);
1933 if (!info->inv_id || info->has_nonlin_use)
1937 *depends_on = BITMAP_ALLOC (NULL);
1938 bitmap_set_bit (*depends_on, info->inv_id);
1943 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1944 position to POS. If USE is not NULL, the candidate is set as related to
1945 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1946 replacement of the final value of the iv by a direct computation. */
1948 static struct iv_cand *
1949 add_candidate_1 (struct ivopts_data *data,
1950 tree base, tree step, bool important, enum iv_position pos,
1951 struct iv_use *use, tree incremented_at)
1954 struct iv_cand *cand = NULL;
1955 tree type, orig_type;
1959 orig_type = TREE_TYPE (base);
1960 type = generic_type_for (orig_type);
1961 if (type != orig_type)
1963 base = fold_convert (type, base);
1965 step = fold_convert (type, step);
1969 for (i = 0; i < n_iv_cands (data); i++)
1971 cand = iv_cand (data, i);
1973 if (cand->pos != pos)
1976 if (cand->incremented_at != incremented_at)
1990 if (!operand_equal_p (base, cand->iv->base, 0))
1993 if (zero_p (cand->iv->step))
2000 if (step && operand_equal_p (step, cand->iv->step, 0))
2005 if (i == n_iv_cands (data))
2007 cand = xcalloc (1, sizeof (struct iv_cand));
2013 cand->iv = alloc_iv (base, step);
2016 if (pos != IP_ORIGINAL && cand->iv)
2018 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2019 cand->var_after = cand->var_before;
2021 cand->important = important;
2022 cand->incremented_at = incremented_at;
2023 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2026 && TREE_CODE (step) != INTEGER_CST)
2028 fd_ivopts_data = data;
2029 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2032 if (dump_file && (dump_flags & TDF_DETAILS))
2033 dump_cand (dump_file, cand);
2036 if (important && !cand->important)
2038 cand->important = true;
2039 if (dump_file && (dump_flags & TDF_DETAILS))
2040 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2045 bitmap_set_bit (use->related_cands, i);
2046 if (dump_file && (dump_flags & TDF_DETAILS))
2047 fprintf (dump_file, "Candidate %d is related to use %d\n",
2054 /* Returns true if incrementing the induction variable at the end of the LOOP
2057 The purpose is to avoid splitting latch edge with a biv increment, thus
2058 creating a jump, possibly confusing other optimization passes and leaving
2059 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2060 is not available (so we do not have a better alternative), or if the latch
2061 edge is already nonempty. */
2064 allow_ip_end_pos_p (struct loop *loop)
2066 if (!ip_normal_pos (loop))
2069 if (!empty_block_p (ip_end_pos (loop)))
2075 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2076 position to POS. If USE is not NULL, the candidate is set as related to
2077 it. The candidate computation is scheduled on all available positions. */
2080 add_candidate (struct ivopts_data *data,
2081 tree base, tree step, bool important, struct iv_use *use)
2083 if (ip_normal_pos (data->current_loop))
2084 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2085 if (ip_end_pos (data->current_loop)
2086 && allow_ip_end_pos_p (data->current_loop))
2087 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2090 /* Add a standard "0 + 1 * iteration" iv candidate for a
2091 type with SIZE bits. */
2094 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2097 tree type = lang_hooks.types.type_for_size (size, true);
2098 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2102 /* Adds standard iv candidates. */
2105 add_standard_iv_candidates (struct ivopts_data *data)
2107 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2109 /* The same for a double-integer type if it is still fast enough. */
2110 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2111 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2115 /* Adds candidates bases on the old induction variable IV. */
2118 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2121 struct iv_cand *cand;
2123 add_candidate (data, iv->base, iv->step, true, NULL);
2125 /* The same, but with initial value zero. */
2126 add_candidate (data,
2127 build_int_cst (TREE_TYPE (iv->base), 0),
2128 iv->step, true, NULL);
2130 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2131 if (TREE_CODE (phi) == PHI_NODE)
2133 /* Additionally record the possibility of leaving the original iv
2135 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2136 cand = add_candidate_1 (data,
2137 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2138 SSA_NAME_DEF_STMT (def));
2139 cand->var_before = iv->ssa_name;
2140 cand->var_after = def;
2144 /* Adds candidates based on the old induction variables. */
2147 add_old_ivs_candidates (struct ivopts_data *data)
2153 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2155 iv = ver_info (data, i)->iv;
2156 if (iv && iv->biv_p && !zero_p (iv->step))
2157 add_old_iv_candidates (data, iv);
2161 /* Adds candidates based on the value of the induction variable IV and USE. */
2164 add_iv_value_candidates (struct ivopts_data *data,
2165 struct iv *iv, struct iv_use *use)
2167 unsigned HOST_WIDE_INT offset;
2170 add_candidate (data, iv->base, iv->step, false, use);
2172 /* The same, but with initial value zero. Make such variable important,
2173 since it is generic enough so that possibly many uses may be based
2175 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2176 iv->step, true, use);
2178 /* Third, try removing the constant offset. */
2179 base = strip_offset (iv->base, &offset);
2181 add_candidate (data, base, iv->step, false, use);
2184 /* Possibly adds pseudocandidate for replacing the final value of USE by
2185 a direct computation. */
2188 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2190 struct tree_niter_desc *niter;
2192 /* We must know where we exit the loop and how many times does it roll. */
2193 niter = niter_for_single_dom_exit (data);
2195 || !zero_p (niter->may_be_zero))
2198 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2201 /* Adds candidates based on the uses. */
2204 add_derived_ivs_candidates (struct ivopts_data *data)
2208 for (i = 0; i < n_iv_uses (data); i++)
2210 struct iv_use *use = iv_use (data, i);
2217 case USE_NONLINEAR_EXPR:
2220 /* Just add the ivs based on the value of the iv used here. */
2221 add_iv_value_candidates (data, use->iv, use);
2225 add_iv_value_candidates (data, use->iv, use);
2227 /* Additionally, add the pseudocandidate for the possibility to
2228 replace the final value by a direct computation. */
2229 add_iv_outer_candidates (data, use);
2238 /* Record important candidates and add them to related_cands bitmaps
2242 record_important_candidates (struct ivopts_data *data)
2247 for (i = 0; i < n_iv_cands (data); i++)
2249 struct iv_cand *cand = iv_cand (data, i);
2251 if (cand->important)
2252 bitmap_set_bit (data->important_candidates, i);
2255 data->consider_all_candidates = (n_iv_cands (data)
2256 <= CONSIDER_ALL_CANDIDATES_BOUND);
2258 if (data->consider_all_candidates)
2260 /* We will not need "related_cands" bitmaps in this case,
2261 so release them to decrease peak memory consumption. */
2262 for (i = 0; i < n_iv_uses (data); i++)
2264 use = iv_use (data, i);
2265 BITMAP_FREE (use->related_cands);
2270 /* Add important candidates to the related_cands bitmaps. */
2271 for (i = 0; i < n_iv_uses (data); i++)
2272 bitmap_ior_into (iv_use (data, i)->related_cands,
2273 data->important_candidates);
2277 /* Finds the candidates for the induction variables. */
2280 find_iv_candidates (struct ivopts_data *data)
2282 /* Add commonly used ivs. */
2283 add_standard_iv_candidates (data);
2285 /* Add old induction variables. */
2286 add_old_ivs_candidates (data);
2288 /* Add induction variables derived from uses. */
2289 add_derived_ivs_candidates (data);
2291 /* Record the important candidates. */
2292 record_important_candidates (data);
2295 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2296 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2297 we allocate a simple list to every use. */
2300 alloc_use_cost_map (struct ivopts_data *data)
2302 unsigned i, size, s, j;
2304 for (i = 0; i < n_iv_uses (data); i++)
2306 struct iv_use *use = iv_use (data, i);
2309 if (data->consider_all_candidates)
2310 size = n_iv_cands (data);
2314 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2319 /* Round up to the power of two, so that moduling by it is fast. */
2320 for (size = 1; size < s; size <<= 1)
2324 use->n_map_members = size;
2325 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2329 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2330 on invariants DEPENDS_ON and that the value used in expressing it
2334 set_use_iv_cost (struct ivopts_data *data,
2335 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2336 bitmap depends_on, tree value)
2342 BITMAP_FREE (depends_on);
2346 if (data->consider_all_candidates)
2348 use->cost_map[cand->id].cand = cand;
2349 use->cost_map[cand->id].cost = cost;
2350 use->cost_map[cand->id].depends_on = depends_on;
2351 use->cost_map[cand->id].value = value;
2355 /* n_map_members is a power of two, so this computes modulo. */
2356 s = cand->id & (use->n_map_members - 1);
2357 for (i = s; i < use->n_map_members; i++)
2358 if (!use->cost_map[i].cand)
2360 for (i = 0; i < s; i++)
2361 if (!use->cost_map[i].cand)
2367 use->cost_map[i].cand = cand;
2368 use->cost_map[i].cost = cost;
2369 use->cost_map[i].depends_on = depends_on;
2370 use->cost_map[i].value = value;
2373 /* Gets cost of (USE, CANDIDATE) pair. */
2375 static struct cost_pair *
2376 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2377 struct iv_cand *cand)
2380 struct cost_pair *ret;
2385 if (data->consider_all_candidates)
2387 ret = use->cost_map + cand->id;
2394 /* n_map_members is a power of two, so this computes modulo. */
2395 s = cand->id & (use->n_map_members - 1);
2396 for (i = s; i < use->n_map_members; i++)
2397 if (use->cost_map[i].cand == cand)
2398 return use->cost_map + i;
2400 for (i = 0; i < s; i++)
2401 if (use->cost_map[i].cand == cand)
2402 return use->cost_map + i;
2407 /* Returns estimate on cost of computing SEQ. */
2415 for (; seq; seq = NEXT_INSN (seq))
2417 set = single_set (seq);
2419 cost += rtx_cost (set, SET);
2427 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2429 produce_memory_decl_rtl (tree obj, int *regno)
2434 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2436 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2437 x = gen_rtx_SYMBOL_REF (Pmode, name);
2440 x = gen_raw_REG (Pmode, (*regno)++);
2442 return gen_rtx_MEM (DECL_MODE (obj), x);
2445 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2446 walk_tree. DATA contains the actual fake register number. */
2449 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2451 tree obj = NULL_TREE;
2455 switch (TREE_CODE (*expr_p))
2458 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2459 handled_component_p (*expr_p);
2460 expr_p = &TREE_OPERAND (*expr_p, 0))
2464 x = produce_memory_decl_rtl (obj, regno);
2469 obj = SSA_NAME_VAR (*expr_p);
2470 if (!DECL_RTL_SET_P (obj))
2471 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2480 if (DECL_RTL_SET_P (obj))
2483 if (DECL_MODE (obj) == BLKmode)
2484 x = produce_memory_decl_rtl (obj, regno);
2486 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2496 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2497 SET_DECL_RTL (obj, x);
2503 /* Determines cost of the computation of EXPR. */
2506 computation_cost (tree expr)
2509 tree type = TREE_TYPE (expr);
2511 /* Avoid using hard regs in ways which may be unsupported. */
2512 int regno = LAST_VIRTUAL_REGISTER + 1;
2514 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2516 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2520 cost = seq_cost (seq);
2522 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2527 /* Returns variable containing the value of candidate CAND at statement AT. */
2530 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2532 if (stmt_after_increment (loop, cand, stmt))
2533 return cand->var_after;
2535 return cand->var_before;
2538 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2539 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2542 tree_int_cst_sign_bit (tree t)
2544 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2545 unsigned HOST_WIDE_INT w;
2547 if (bitno < HOST_BITS_PER_WIDE_INT)
2548 w = TREE_INT_CST_LOW (t);
2551 w = TREE_INT_CST_HIGH (t);
2552 bitno -= HOST_BITS_PER_WIDE_INT;
2555 return (w >> bitno) & 1;
2558 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2559 return cst. Otherwise return NULL_TREE. */
2562 constant_multiple_of (tree type, tree top, tree bot)
2564 tree res, mby, p0, p1;
2565 enum tree_code code;
2571 if (operand_equal_p (top, bot, 0))
2572 return build_int_cst (type, 1);
2574 code = TREE_CODE (top);
2578 mby = TREE_OPERAND (top, 1);
2579 if (TREE_CODE (mby) != INTEGER_CST)
2582 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2586 return fold_binary_to_constant (MULT_EXPR, type, res,
2587 fold_convert (type, mby));
2591 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2594 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2598 return fold_binary_to_constant (code, type, p0, p1);
2601 if (TREE_CODE (bot) != INTEGER_CST)
2604 bot = fold_convert (type, bot);
2605 top = fold_convert (type, top);
2607 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2608 the result afterwards. */
2609 if (tree_int_cst_sign_bit (bot))
2612 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2617 /* Ditto for TOP. */
2618 if (tree_int_cst_sign_bit (top))
2621 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2624 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2627 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2629 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2637 /* Affine combination of trees. We keep track of at most MAX_AFF_ELTS elements
2638 to make things simpler; this is sufficient in most cases. */
2640 #define MAX_AFF_ELTS 8
2642 struct affine_tree_combination
2644 /* Type of the result of the combination. */
2647 /* Mask modulo that the operations are performed. */
2648 unsigned HOST_WIDE_INT mask;
2650 /* Constant offset. */
2651 unsigned HOST_WIDE_INT offset;
2653 /* Number of elements of the combination. */
2656 /* Elements and their coefficients. */
2657 tree elts[MAX_AFF_ELTS];
2658 unsigned HOST_WIDE_INT coefs[MAX_AFF_ELTS];
2660 /* Remainder of the expression. */
2664 /* Sets COMB to CST. */
2667 aff_combination_const (struct affine_tree_combination *comb, tree type,
2668 unsigned HOST_WIDE_INT cst)
2670 unsigned prec = TYPE_PRECISION (type);
2673 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2676 comb->rest = NULL_TREE;
2677 comb->offset = cst & comb->mask;
2680 /* Sets COMB to single element ELT. */
2683 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2685 unsigned prec = TYPE_PRECISION (type);
2688 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2691 comb->elts[0] = elt;
2693 comb->rest = NULL_TREE;
2697 /* Scales COMB by SCALE. */
2700 aff_combination_scale (struct affine_tree_combination *comb,
2701 unsigned HOST_WIDE_INT scale)
2710 aff_combination_const (comb, comb->type, 0);
2714 comb->offset = (scale * comb->offset) & comb->mask;
2715 for (i = 0, j = 0; i < comb->n; i++)
2717 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2718 comb->elts[j] = comb->elts[i];
2719 if (comb->coefs[j] != 0)
2726 if (comb->n < MAX_AFF_ELTS)
2728 comb->coefs[comb->n] = scale;
2729 comb->elts[comb->n] = comb->rest;
2730 comb->rest = NULL_TREE;
2734 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2735 build_int_cst_type (comb->type, scale));
2739 /* Adds ELT * SCALE to COMB. */
2742 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2743 unsigned HOST_WIDE_INT scale)
2750 for (i = 0; i < comb->n; i++)
2751 if (operand_equal_p (comb->elts[i], elt, 0))
2753 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2758 comb->coefs[i] = comb->coefs[comb->n];
2759 comb->elts[i] = comb->elts[comb->n];
2762 if (comb->n < MAX_AFF_ELTS)
2764 comb->coefs[comb->n] = scale;
2765 comb->elts[comb->n] = elt;
2771 elt = fold_convert (comb->type, elt);
2773 elt = fold_build2 (MULT_EXPR, comb->type,
2774 fold_convert (comb->type, elt),
2775 build_int_cst_type (comb->type, scale));
2778 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2783 /* Adds COMB2 to COMB1. */
2786 aff_combination_add (struct affine_tree_combination *comb1,
2787 struct affine_tree_combination *comb2)
2791 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2792 for (i = 0; i < comb2-> n; i++)
2793 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2795 aff_combination_add_elt (comb1, comb2->rest, 1);
2798 /* Splits EXPR into an affine combination of parts. */
2801 tree_to_aff_combination (tree expr, tree type,
2802 struct affine_tree_combination *comb)
2804 struct affine_tree_combination tmp;
2805 enum tree_code code;
2806 tree cst, core, toffset;
2807 HOST_WIDE_INT bitpos, bitsize;
2808 enum machine_mode mode;
2809 int unsignedp, volatilep;
2813 code = TREE_CODE (expr);
2817 aff_combination_const (comb, type, int_cst_value (expr));
2822 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2823 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2824 if (code == MINUS_EXPR)
2825 aff_combination_scale (&tmp, -1);
2826 aff_combination_add (comb, &tmp);
2830 cst = TREE_OPERAND (expr, 1);
2831 if (TREE_CODE (cst) != INTEGER_CST)
2833 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2834 aff_combination_scale (comb, int_cst_value (cst));
2838 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2839 aff_combination_scale (comb, -1);
2843 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2844 &toffset, &mode, &unsignedp, &volatilep,
2846 if (bitpos % BITS_PER_UNIT != 0)
2848 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2849 core = build_addr_strip_iref (core);
2850 if (TREE_CODE (core) == ADDR_EXPR)
2851 aff_combination_add_elt (comb, core, 1);
2854 tree_to_aff_combination (core, type, &tmp);
2855 aff_combination_add (comb, &tmp);
2859 tree_to_aff_combination (toffset, type, &tmp);
2860 aff_combination_add (comb, &tmp);
2868 aff_combination_elt (comb, type, expr);
2871 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2874 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2875 unsigned HOST_WIDE_INT mask)
2877 enum tree_code code;
2880 elt = fold_convert (type, elt);
2887 return fold_build2 (PLUS_EXPR, type, expr, elt);
2893 return fold_build1 (NEGATE_EXPR, type, elt);
2895 return fold_build2 (MINUS_EXPR, type, expr, elt);
2899 return fold_build2 (MULT_EXPR, type, elt,
2900 build_int_cst_type (type, scale));
2902 if ((scale | (mask >> 1)) == mask)
2904 /* Scale is negative. */
2906 scale = (-scale) & mask;
2911 elt = fold_build2 (MULT_EXPR, type, elt,
2912 build_int_cst_type (type, scale));
2913 return fold_build2 (code, type, expr, elt);
2916 /* Makes tree from the affine combination COMB. */
2919 aff_combination_to_tree (struct affine_tree_combination *comb)
2921 tree type = comb->type;
2922 tree expr = comb->rest;
2924 unsigned HOST_WIDE_INT off, sgn;
2926 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2928 for (i = 0; i < comb->n; i++)
2929 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2932 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2934 /* Offset is negative. */
2935 off = (-comb->offset) & comb->mask;
2943 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2947 /* Folds X + RATIO * Y in TYPE. */
2950 fold_affine_sum (tree type, tree x, tree y, HOST_WIDE_INT ratio)
2952 enum tree_code code;
2954 struct affine_tree_combination cx, cy;
2956 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2959 return fold_build2 (PLUS_EXPR, type, x, y);
2961 return fold_build2 (MINUS_EXPR, type, x, y);
2971 cst = build_int_cst_type (type, ratio);
2972 y = fold_build2 (MULT_EXPR, type, y, cst);
2973 return fold_build2 (code, type, x, y);
2976 tree_to_aff_combination (x, type, &cx);
2977 tree_to_aff_combination (y, type, &cy);
2978 aff_combination_scale (&cy, ratio);
2979 aff_combination_add (&cx, &cy);
2981 return aff_combination_to_tree (&cx);
2984 /* Determines the expression by that USE is expressed from induction variable
2985 CAND at statement AT in LOOP. */
2988 get_computation_at (struct loop *loop,
2989 struct iv_use *use, struct iv_cand *cand, tree at)
2991 tree ubase = use->iv->base;
2992 tree ustep = use->iv->step;
2993 tree cbase = cand->iv->base;
2994 tree cstep = cand->iv->step;
2995 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2999 unsigned HOST_WIDE_INT ustepi, cstepi;
3000 HOST_WIDE_INT ratioi;
3002 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3004 /* We do not have a precision to express the values of use. */
3008 expr = var_at_stmt (loop, cand, at);
3010 if (TREE_TYPE (expr) != ctype)
3012 /* This may happen with the original ivs. */
3013 expr = fold_convert (ctype, expr);
3016 if (TYPE_UNSIGNED (utype))
3020 uutype = unsigned_type_for (utype);
3021 ubase = fold_convert (uutype, ubase);
3022 ustep = fold_convert (uutype, ustep);
3025 if (uutype != ctype)
3027 expr = fold_convert (uutype, expr);
3028 cbase = fold_convert (uutype, cbase);
3029 cstep = fold_convert (uutype, cstep);
3032 if (cst_and_fits_in_hwi (cstep)
3033 && cst_and_fits_in_hwi (ustep))
3035 ustepi = int_cst_value (ustep);
3036 cstepi = int_cst_value (cstep);
3038 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3040 /* TODO maybe consider case when ustep divides cstep and the ratio is
3041 a power of 2 (so that the division is fast to execute)? We would
3042 need to be much more careful with overflows etc. then. */
3046 ratio = build_int_cst_type (uutype, ratioi);
3050 ratio = constant_multiple_of (uutype, ustep, cstep);
3054 /* Ratioi is only used to detect special cases when the multiplicative
3055 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3056 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3057 to integer_onep/integer_all_onesp, since the former ignores
3059 if (cst_and_fits_in_hwi (ratio))
3060 ratioi = int_cst_value (ratio);
3061 else if (integer_onep (ratio))
3063 else if (integer_all_onesp (ratio))
3069 /* We may need to shift the value if we are after the increment. */
3070 if (stmt_after_increment (loop, cand, at))
3071 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
3073 /* use = ubase - ratio * cbase + ratio * var.
3075 In general case ubase + ratio * (var - cbase) could be better (one less
3076 multiplication), but often it is possible to eliminate redundant parts
3077 of computations from (ubase - ratio * cbase) term, and if it does not
3078 happen, fold is able to apply the distributive law to obtain this form
3083 delta = fold_affine_sum (uutype, ubase, cbase, -1);
3084 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3086 else if (ratioi == -1)
3088 delta = fold_affine_sum (uutype, ubase, cbase, 1);
3089 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3094 delta = fold_affine_sum (uutype, ubase, cbase, -ratioi);
3097 delta = fold_build2 (MULT_EXPR, uutype, ratio, cbase);
3098 delta = fold_affine_sum (uutype, ubase, delta, -1);
3100 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3101 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3104 return fold_convert (utype, expr);
3107 /* Determines the expression by that USE is expressed from induction variable
3111 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3113 return get_computation_at (loop, use, cand, use->stmt);
3116 /* Returns cost of addition in MODE. */
3119 add_cost (enum machine_mode mode)
3121 static unsigned costs[NUM_MACHINE_MODES];
3129 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3130 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
3131 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
3136 cost = seq_cost (seq);
3142 if (dump_file && (dump_flags & TDF_DETAILS))
3143 fprintf (dump_file, "Addition in %s costs %d\n",
3144 GET_MODE_NAME (mode), cost);
3148 /* Entry in a hashtable of already known costs for multiplication. */
3151 HOST_WIDE_INT cst; /* The constant to multiply by. */
3152 enum machine_mode mode; /* In mode. */
3153 unsigned cost; /* The cost. */
3156 /* Counts hash value for the ENTRY. */
3159 mbc_entry_hash (const void *entry)
3161 const struct mbc_entry *e = entry;
3163 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3166 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3169 mbc_entry_eq (const void *entry1, const void *entry2)
3171 const struct mbc_entry *e1 = entry1;
3172 const struct mbc_entry *e2 = entry2;
3174 return (e1->mode == e2->mode
3175 && e1->cst == e2->cst);
3178 /* Returns cost of multiplication by constant CST in MODE. */
3181 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3183 static htab_t costs;
3184 struct mbc_entry **cached, act;
3189 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3193 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3195 return (*cached)->cost;
3197 *cached = xmalloc (sizeof (struct mbc_entry));
3198 (*cached)->mode = mode;
3199 (*cached)->cst = cst;
3202 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
3207 cost = seq_cost (seq);
3209 if (dump_file && (dump_flags & TDF_DETAILS))
3210 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3211 (int) cst, GET_MODE_NAME (mode), cost);
3213 (*cached)->cost = cost;
3218 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3219 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3220 variable is omitted. The created memory accesses MODE.
3222 TODO -- there must be some better way. This all is quite crude. */
3225 get_address_cost (bool symbol_present, bool var_present,
3226 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3228 #define MAX_RATIO 128
3229 static sbitmap valid_mult;
3230 static HOST_WIDE_INT rat, off;
3231 static HOST_WIDE_INT min_offset, max_offset;
3232 static unsigned costs[2][2][2][2];
3233 unsigned cost, acost;
3234 rtx seq, addr, base;
3235 bool offset_p, ratio_p;
3237 HOST_WIDE_INT s_offset;
3238 unsigned HOST_WIDE_INT mask;
3245 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3247 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3248 for (i = 1; i <= 1 << 20; i <<= 1)
3250 XEXP (addr, 1) = GEN_INT (i);
3251 if (!memory_address_p (Pmode, addr))
3254 max_offset = i >> 1;
3257 for (i = 1; i <= 1 << 20; i <<= 1)
3259 XEXP (addr, 1) = GEN_INT (-i);
3260 if (!memory_address_p (Pmode, addr))
3263 min_offset = -(i >> 1);
3265 if (dump_file && (dump_flags & TDF_DETAILS))
3267 fprintf (dump_file, "get_address_cost:\n");
3268 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3269 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3272 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3273 sbitmap_zero (valid_mult);
3275 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3276 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3278 XEXP (addr, 1) = GEN_INT (i);
3279 if (memory_address_p (Pmode, addr))
3281 SET_BIT (valid_mult, i + MAX_RATIO);
3286 if (dump_file && (dump_flags & TDF_DETAILS))
3288 fprintf (dump_file, " allowed multipliers:");
3289 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3290 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3291 fprintf (dump_file, " %d", (int) i);
3292 fprintf (dump_file, "\n");
3293 fprintf (dump_file, "\n");
3297 bits = GET_MODE_BITSIZE (Pmode);
3298 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3300 if ((offset >> (bits - 1) & 1))
3305 offset_p = (s_offset != 0
3306 && min_offset <= s_offset && s_offset <= max_offset);
3307 ratio_p = (ratio != 1
3308 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
3309 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
3311 if (ratio != 1 && !ratio_p)
3312 cost += multiply_by_cost (ratio, Pmode);
3314 if (s_offset && !offset_p && !symbol_present)
3316 cost += add_cost (Pmode);
3320 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3325 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
3326 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
3328 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
3331 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3335 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3337 base = gen_rtx_fmt_e (CONST, Pmode,
3338 gen_rtx_fmt_ee (PLUS, Pmode,
3343 base = GEN_INT (off);
3348 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3351 addr = memory_address (Pmode, addr);
3355 acost = seq_cost (seq);
3356 acost += address_cost (addr, Pmode);
3360 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3363 return cost + acost;
3365 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3366 invariants the computation depends on. */
3369 force_var_cost (struct ivopts_data *data,
3370 tree expr, bitmap *depends_on)
3372 static bool costs_initialized = false;
3373 static unsigned integer_cost;
3374 static unsigned symbol_cost;
3375 static unsigned address_cost;
3377 unsigned cost0, cost1, cost;
3378 enum machine_mode mode;
3380 if (!costs_initialized)
3382 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3383 rtx x = gen_rtx_MEM (DECL_MODE (var),
3384 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3386 tree type = build_pointer_type (integer_type_node);
3388 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3391 SET_DECL_RTL (var, x);
3392 TREE_STATIC (var) = 1;
3393 addr = build1 (ADDR_EXPR, type, var);
3394 symbol_cost = computation_cost (addr) + 1;
3397 = computation_cost (build2 (PLUS_EXPR, type,
3399 build_int_cst_type (type, 2000))) + 1;
3400 if (dump_file && (dump_flags & TDF_DETAILS))
3402 fprintf (dump_file, "force_var_cost:\n");
3403 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3404 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3405 fprintf (dump_file, " address %d\n", (int) address_cost);
3406 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3407 fprintf (dump_file, "\n");
3410 costs_initialized = true;
3417 fd_ivopts_data = data;
3418 walk_tree (&expr, find_depends, depends_on, NULL);
3421 if (SSA_VAR_P (expr))
3424 if (TREE_INVARIANT (expr))
3426 if (TREE_CODE (expr) == INTEGER_CST)
3427 return integer_cost;
3429 if (TREE_CODE (expr) == ADDR_EXPR)
3431 tree obj = TREE_OPERAND (expr, 0);
3433 if (TREE_CODE (obj) == VAR_DECL
3434 || TREE_CODE (obj) == PARM_DECL
3435 || TREE_CODE (obj) == RESULT_DECL)
3439 return address_cost;
3442 switch (TREE_CODE (expr))
3447 op0 = TREE_OPERAND (expr, 0);
3448 op1 = TREE_OPERAND (expr, 1);
3452 if (is_gimple_val (op0))
3455 cost0 = force_var_cost (data, op0, NULL);
3457 if (is_gimple_val (op1))
3460 cost1 = force_var_cost (data, op1, NULL);
3465 /* Just an arbitrary value, FIXME. */
3466 return target_spill_cost;
3469 mode = TYPE_MODE (TREE_TYPE (expr));
3470 switch (TREE_CODE (expr))
3474 cost = add_cost (mode);
3478 if (cst_and_fits_in_hwi (op0))
3479 cost = multiply_by_cost (int_cst_value (op0), mode);
3480 else if (cst_and_fits_in_hwi (op1))
3481 cost = multiply_by_cost (int_cst_value (op1), mode);
3483 return target_spill_cost;
3493 /* Bound the cost by target_spill_cost. The parts of complicated
3494 computations often are either loop invariant or at least can
3495 be shared between several iv uses, so letting this grow without
3496 limits would not give reasonable results. */
3497 return cost < target_spill_cost ? cost : target_spill_cost;
3500 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3501 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3502 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3503 invariants the computation depends on. */
3506 split_address_cost (struct ivopts_data *data,
3507 tree addr, bool *symbol_present, bool *var_present,
3508 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3511 HOST_WIDE_INT bitsize;
3512 HOST_WIDE_INT bitpos;
3514 enum machine_mode mode;
3515 int unsignedp, volatilep;
3517 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3518 &unsignedp, &volatilep, false);
3521 || bitpos % BITS_PER_UNIT != 0
3522 || TREE_CODE (core) != VAR_DECL)
3524 *symbol_present = false;
3525 *var_present = true;
3526 fd_ivopts_data = data;
3527 walk_tree (&addr, find_depends, depends_on, NULL);
3528 return target_spill_cost;
3531 *offset += bitpos / BITS_PER_UNIT;
3532 if (TREE_STATIC (core)
3533 || DECL_EXTERNAL (core))
3535 *symbol_present = true;
3536 *var_present = false;
3540 *symbol_present = false;
3541 *var_present = true;
3545 /* Estimates cost of expressing difference of addresses E1 - E2 as
3546 var + symbol + offset. The value of offset is added to OFFSET,
3547 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3548 part is missing. DEPENDS_ON is a set of the invariants the computation
3552 ptr_difference_cost (struct ivopts_data *data,
3553 tree e1, tree e2, bool *symbol_present, bool *var_present,
3554 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3556 HOST_WIDE_INT diff = 0;
3559 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3561 if (ptr_difference_const (e1, e2, &diff))
3564 *symbol_present = false;
3565 *var_present = false;
3569 if (e2 == integer_zero_node)
3570 return split_address_cost (data, TREE_OPERAND (e1, 0),
3571 symbol_present, var_present, offset, depends_on);
3573 *symbol_present = false;
3574 *var_present = true;
3576 cost = force_var_cost (data, e1, depends_on);
3577 cost += force_var_cost (data, e2, depends_on);
3578 cost += add_cost (Pmode);
3583 /* Estimates cost of expressing difference E1 - E2 as
3584 var + symbol + offset. The value of offset is added to OFFSET,
3585 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3586 part is missing. DEPENDS_ON is a set of the invariants the computation
3590 difference_cost (struct ivopts_data *data,
3591 tree e1, tree e2, bool *symbol_present, bool *var_present,
3592 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3595 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3596 unsigned HOST_WIDE_INT off1, off2;
3598 e1 = strip_offset (e1, &off1);
3599 e2 = strip_offset (e2, &off2);
3600 *offset += off1 - off2;
3605 if (TREE_CODE (e1) == ADDR_EXPR)
3606 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3608 *symbol_present = false;
3610 if (operand_equal_p (e1, e2, 0))
3612 *var_present = false;
3615 *var_present = true;
3617 return force_var_cost (data, e1, depends_on);
3621 cost = force_var_cost (data, e2, depends_on);
3622 cost += multiply_by_cost (-1, mode);
3627 cost = force_var_cost (data, e1, depends_on);
3628 cost += force_var_cost (data, e2, depends_on);
3629 cost += add_cost (mode);
3634 /* Determines the cost of the computation by that USE is expressed
3635 from induction variable CAND. If ADDRESS_P is true, we just need
3636 to create an address from it, otherwise we want to get it into
3637 register. A set of invariants we depend on is stored in
3638 DEPENDS_ON. AT is the statement at that the value is computed. */
3641 get_computation_cost_at (struct ivopts_data *data,
3642 struct iv_use *use, struct iv_cand *cand,
3643 bool address_p, bitmap *depends_on, tree at)
3645 tree ubase = use->iv->base, ustep = use->iv->step;
3647 tree utype = TREE_TYPE (ubase), ctype;
3648 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3649 HOST_WIDE_INT ratio, aratio;
3650 bool var_present, symbol_present;
3651 unsigned cost = 0, n_sums;
3655 /* Only consider real candidates. */
3659 cbase = cand->iv->base;
3660 cstep = cand->iv->step;
3661 ctype = TREE_TYPE (cbase);
3663 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3665 /* We do not have a precision to express the values of use. */
3671 /* Do not try to express address of an object with computation based
3672 on address of a different object. This may cause problems in rtl
3673 level alias analysis (that does not expect this to be happening,
3674 as this is illegal in C), and would be unlikely to be useful
3676 if (use->iv->base_object
3677 && cand->iv->base_object
3678 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3682 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3684 /* TODO -- add direct handling of this case. */
3688 /* CSTEPI is removed from the offset in case statement is after the
3689 increment. If the step is not constant, we use zero instead.
3690 This is a bit imprecise (there is the extra addition), but
3691 redundancy elimination is likely to transform the code so that
3692 it uses value of the variable before increment anyway,
3693 so it is not that much unrealistic. */
3694 if (cst_and_fits_in_hwi (cstep))
3695 cstepi = int_cst_value (cstep);
3699 if (cst_and_fits_in_hwi (ustep)
3700 && cst_and_fits_in_hwi (cstep))
3702 ustepi = int_cst_value (ustep);
3704 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3711 rat = constant_multiple_of (utype, ustep, cstep);
3716 if (cst_and_fits_in_hwi (rat))
3717 ratio = int_cst_value (rat);
3718 else if (integer_onep (rat))
3720 else if (integer_all_onesp (rat))
3726 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3727 or ratio == 1, it is better to handle this like
3729 ubase - ratio * cbase + ratio * var
3731 (also holds in the case ratio == -1, TODO. */
3733 if (cst_and_fits_in_hwi (cbase))
3735 offset = - ratio * int_cst_value (cbase);
3736 cost += difference_cost (data,
3737 ubase, integer_zero_node,
3738 &symbol_present, &var_present, &offset,
3741 else if (ratio == 1)
3743 cost += difference_cost (data,
3745 &symbol_present, &var_present, &offset,
3750 cost += force_var_cost (data, cbase, depends_on);
3751 cost += add_cost (TYPE_MODE (ctype));
3752 cost += difference_cost (data,
3753 ubase, integer_zero_node,
3754 &symbol_present, &var_present, &offset,
3758 /* If we are after the increment, the value of the candidate is higher by
3760 if (stmt_after_increment (data->current_loop, cand, at))
3761 offset -= ratio * cstepi;
3763 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3764 (symbol/var/const parts may be omitted). If we are looking for an address,
3765 find the cost of addressing this. */
3767 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3769 /* Otherwise estimate the costs for computing the expression. */
3770 aratio = ratio > 0 ? ratio : -ratio;
3771 if (!symbol_present && !var_present && !offset)
3774 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3780 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3784 /* Symbol + offset should be compile-time computable. */
3785 && (symbol_present || offset))
3788 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3792 /* Just get the expression, expand it and measure the cost. */
3793 tree comp = get_computation_at (data->current_loop, use, cand, at);
3799 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3801 return computation_cost (comp);
3805 /* Determines the cost of the computation by that USE is expressed
3806 from induction variable CAND. If ADDRESS_P is true, we just need
3807 to create an address from it, otherwise we want to get it into
3808 register. A set of invariants we depend on is stored in
3812 get_computation_cost (struct ivopts_data *data,
3813 struct iv_use *use, struct iv_cand *cand,
3814 bool address_p, bitmap *depends_on)
3816 return get_computation_cost_at (data,
3817 use, cand, address_p, depends_on, use->stmt);
3820 /* Determines cost of basing replacement of USE on CAND in a generic
3824 determine_use_iv_cost_generic (struct ivopts_data *data,
3825 struct iv_use *use, struct iv_cand *cand)
3830 /* The simple case first -- if we need to express value of the preserved
3831 original biv, the cost is 0. This also prevents us from counting the
3832 cost of increment twice -- once at this use and once in the cost of
3834 if (cand->pos == IP_ORIGINAL
3835 && cand->incremented_at == use->stmt)
3837 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3841 cost = get_computation_cost (data, use, cand, false, &depends_on);
3842 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3844 return cost != INFTY;
3847 /* Determines cost of basing replacement of USE on CAND in an address. */
3850 determine_use_iv_cost_address (struct ivopts_data *data,
3851 struct iv_use *use, struct iv_cand *cand)
3854 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3856 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3858 return cost != INFTY;
3861 /* Computes value of induction variable IV in iteration NITER. */
3864 iv_value (struct iv *iv, tree niter)
3867 tree type = TREE_TYPE (iv->base);
3869 niter = fold_convert (type, niter);
3870 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3872 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3875 /* Computes value of candidate CAND at position AT in iteration NITER. */
3878 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3880 tree val = iv_value (cand->iv, niter);
3881 tree type = TREE_TYPE (cand->iv->base);
3883 if (stmt_after_increment (loop, cand, at))
3884 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3889 /* Returns period of induction variable iv. */
3892 iv_period (struct iv *iv)
3894 tree step = iv->step, period, type;
3897 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3899 /* Period of the iv is gcd (step, type range). Since type range is power
3900 of two, it suffices to determine the maximum power of two that divides
3902 pow2div = num_ending_zeros (step);
3903 type = unsigned_type_for (TREE_TYPE (step));
3905 period = build_low_bits_mask (type,
3906 (TYPE_PRECISION (type)
3907 - tree_low_cst (pow2div, 1)));
3912 /* Returns the comparison operator used when eliminating the iv USE. */
3914 static enum tree_code
3915 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3917 struct loop *loop = data->current_loop;
3921 ex_bb = bb_for_stmt (use->stmt);
3922 exit = EDGE_SUCC (ex_bb, 0);
3923 if (flow_bb_inside_loop_p (loop, exit->dest))
3924 exit = EDGE_SUCC (ex_bb, 1);
3926 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3929 /* Check whether it is possible to express the condition in USE by comparison
3930 of candidate CAND. If so, store the value compared with to BOUND. */
3933 may_eliminate_iv (struct ivopts_data *data,
3934 struct iv_use *use, struct iv_cand *cand, tree *bound)
3938 struct tree_niter_desc *niter;
3940 tree wider_type, period, per_type;
3941 struct loop *loop = data->current_loop;
3943 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3946 /* For now works only for exits that dominate the loop latch. TODO -- extend
3947 for other conditions inside loop body. */
3948 ex_bb = bb_for_stmt (use->stmt);
3949 if (use->stmt != last_stmt (ex_bb)
3950 || TREE_CODE (use->stmt) != COND_EXPR)
3952 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3955 exit = EDGE_SUCC (ex_bb, 0);
3956 if (flow_bb_inside_loop_p (loop, exit->dest))
3957 exit = EDGE_SUCC (ex_bb, 1);
3958 if (flow_bb_inside_loop_p (loop, exit->dest))
3961 niter = niter_for_exit (data, exit);
3963 || !zero_p (niter->may_be_zero))
3967 nit_type = TREE_TYPE (nit);
3969 /* Determine whether we may use the variable to test whether niter iterations
3970 elapsed. This is the case iff the period of the induction variable is
3971 greater than the number of iterations. */
3972 period = iv_period (cand->iv);
3975 per_type = TREE_TYPE (period);
3977 wider_type = TREE_TYPE (period);
3978 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3979 wider_type = per_type;
3981 wider_type = nit_type;
3983 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3984 fold_convert (wider_type, period),
3985 fold_convert (wider_type, nit)))))
3988 *bound = cand_value_at (loop, cand, use->stmt, nit);
3992 /* Determines cost of basing replacement of USE on CAND in a condition. */
3995 determine_use_iv_cost_condition (struct ivopts_data *data,
3996 struct iv_use *use, struct iv_cand *cand)
3998 tree bound = NULL_TREE, op, cond;
3999 bitmap depends_on = NULL;
4002 /* Only consider real candidates. */
4005 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4009 if (may_eliminate_iv (data, use, cand, &bound))
4011 cost = force_var_cost (data, bound, &depends_on);
4013 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4014 return cost != INFTY;
4017 /* The induction variable elimination failed; just express the original
4018 giv. If it is compared with an invariant, note that we cannot get
4020 cost = get_computation_cost (data, use, cand, false, &depends_on);
4023 if (TREE_CODE (cond) != SSA_NAME)
4025 op = TREE_OPERAND (cond, 0);
4026 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4027 op = TREE_OPERAND (cond, 1);
4028 if (TREE_CODE (op) == SSA_NAME)
4030 op = get_iv (data, op)->base;
4031 fd_ivopts_data = data;
4032 walk_tree (&op, find_depends, &depends_on, NULL);
4036 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4037 return cost != INFTY;
4040 /* Checks whether it is possible to replace the final value of USE by
4041 a direct computation. If so, the formula is stored to *VALUE. */
4044 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4047 struct loop *loop = data->current_loop;
4049 struct tree_niter_desc *niter;
4051 exit = single_dom_exit (loop);
4055 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4056 bb_for_stmt (use->stmt)));
4058 niter = niter_for_single_dom_exit (data);
4060 || !zero_p (niter->may_be_zero))
4063 *value = iv_value (use->iv, niter->niter);
4068 /* Determines cost of replacing final value of USE using CAND. */
4071 determine_use_iv_cost_outer (struct ivopts_data *data,
4072 struct iv_use *use, struct iv_cand *cand)
4077 tree value = NULL_TREE;
4078 struct loop *loop = data->current_loop;
4080 /* The simple case first -- if we need to express value of the preserved
4081 original biv, the cost is 0. This also prevents us from counting the
4082 cost of increment twice -- once at this use and once in the cost of
4084 if (cand->pos == IP_ORIGINAL
4085 && cand->incremented_at == use->stmt)
4087 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4093 if (!may_replace_final_value (data, use, &value))
4095 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4100 cost = force_var_cost (data, value, &depends_on);
4102 cost /= AVG_LOOP_NITER (loop);
4104 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4105 return cost != INFTY;
4108 exit = single_dom_exit (loop);
4111 /* If there is just a single exit, we may use value of the candidate
4112 after we take it to determine the value of use. */
4113 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4114 last_stmt (exit->src));
4116 cost /= AVG_LOOP_NITER (loop);
4120 /* Otherwise we just need to compute the iv. */
4121 cost = get_computation_cost (data, use, cand, false, &depends_on);
4124 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4126 return cost != INFTY;
4129 /* Determines cost of basing replacement of USE on CAND. Returns false
4130 if USE cannot be based on CAND. */
4133 determine_use_iv_cost (struct ivopts_data *data,
4134 struct iv_use *use, struct iv_cand *cand)
4138 case USE_NONLINEAR_EXPR:
4139 return determine_use_iv_cost_generic (data, use, cand);
4142 return determine_use_iv_cost_outer (data, use, cand);
4145 return determine_use_iv_cost_address (data, use, cand);
4148 return determine_use_iv_cost_condition (data, use, cand);
4155 /* Determines costs of basing the use of the iv on an iv candidate. */
4158 determine_use_iv_costs (struct ivopts_data *data)
4162 struct iv_cand *cand;
4163 bitmap to_clear = BITMAP_ALLOC (NULL);
4165 alloc_use_cost_map (data);
4167 for (i = 0; i < n_iv_uses (data); i++)
4169 use = iv_use (data, i);
4171 if (data->consider_all_candidates)
4173 for (j = 0; j < n_iv_cands (data); j++)
4175 cand = iv_cand (data, j);
4176 determine_use_iv_cost (data, use, cand);
4183 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4185 cand = iv_cand (data, j);
4186 if (!determine_use_iv_cost (data, use, cand))
4187 bitmap_set_bit (to_clear, j);
4190 /* Remove the candidates for that the cost is infinite from
4191 the list of related candidates. */
4192 bitmap_and_compl_into (use->related_cands, to_clear);
4193 bitmap_clear (to_clear);
4197 BITMAP_FREE (to_clear);
4199 if (dump_file && (dump_flags & TDF_DETAILS))
4201 fprintf (dump_file, "Use-candidate costs:\n");
4203 for (i = 0; i < n_iv_uses (data); i++)
4205 use = iv_use (data, i);
4207 fprintf (dump_file, "Use %d:\n", i);
4208 fprintf (dump_file, " cand\tcost\tdepends on\n");
4209 for (j = 0; j < use->n_map_members; j++)
4211 if (!use->cost_map[j].cand
4212 || use->cost_map[j].cost == INFTY)
4215 fprintf (dump_file, " %d\t%d\t",
4216 use->cost_map[j].cand->id,
4217 use->cost_map[j].cost);
4218 if (use->cost_map[j].depends_on)
4219 bitmap_print (dump_file,
4220 use->cost_map[j].depends_on, "","");
4221 fprintf (dump_file, "\n");
4224 fprintf (dump_file, "\n");
4226 fprintf (dump_file, "\n");
4230 /* Determines cost of the candidate CAND. */
4233 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4235 unsigned cost_base, cost_step;
4244 /* There are two costs associated with the candidate -- its increment
4245 and its initialization. The second is almost negligible for any loop
4246 that rolls enough, so we take it just very little into account. */
4248 base = cand->iv->base;
4249 cost_base = force_var_cost (data, base, NULL);
4250 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4252 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4254 /* Prefer the original iv unless we may gain something by replacing it;
4255 this is not really relevant for artificial ivs created by other
4257 if (cand->pos == IP_ORIGINAL
4258 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4261 /* Prefer not to insert statements into latch unless there are some
4262 already (so that we do not create unnecessary jumps). */
4263 if (cand->pos == IP_END
4264 && empty_block_p (ip_end_pos (data->current_loop)))
4268 /* Determines costs of computation of the candidates. */
4271 determine_iv_costs (struct ivopts_data *data)
4275 if (dump_file && (dump_flags & TDF_DETAILS))
4277 fprintf (dump_file, "Candidate costs:\n");
4278 fprintf (dump_file, " cand\tcost\n");
4281 for (i = 0; i < n_iv_cands (data); i++)
4283 struct iv_cand *cand = iv_cand (data, i);
4285 determine_iv_cost (data, cand);
4287 if (dump_file && (dump_flags & TDF_DETAILS))
4288 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4291 if (dump_file && (dump_flags & TDF_DETAILS))
4292 fprintf (dump_file, "\n");
4295 /* Calculates cost for having SIZE induction variables. */
4298 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4300 return global_cost_for_size (size,
4301 loop_data (data->current_loop)->regs_used,
4305 /* For each size of the induction variable set determine the penalty. */
4308 determine_set_costs (struct ivopts_data *data)
4312 struct loop *loop = data->current_loop;
4315 /* We use the following model (definitely improvable, especially the
4316 cost function -- TODO):
4318 We estimate the number of registers available (using MD data), name it A.
4320 We estimate the number of registers used by the loop, name it U. This
4321 number is obtained as the number of loop phi nodes (not counting virtual
4322 registers and bivs) + the number of variables from outside of the loop.
4324 We set a reserve R (free regs that are used for temporary computations,
4325 etc.). For now the reserve is a constant 3.
4327 Let I be the number of induction variables.
4329 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4330 make a lot of ivs without a reason).
4331 -- if A - R < U + I <= A, the cost is I * PRES_COST
4332 -- if U + I > A, the cost is I * PRES_COST and
4333 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4335 if (dump_file && (dump_flags & TDF_DETAILS))
4337 fprintf (dump_file, "Global costs:\n");
4338 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4339 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4340 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4341 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4345 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4347 op = PHI_RESULT (phi);
4349 if (!is_gimple_reg (op))
4352 if (get_iv (data, op))
4358 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4360 struct version_info *info = ver_info (data, j);
4362 if (info->inv_id && info->has_nonlin_use)
4366 loop_data (loop)->regs_used = n;
4367 if (dump_file && (dump_flags & TDF_DETAILS))
4368 fprintf (dump_file, " regs_used %d\n", n);
4370 if (dump_file && (dump_flags & TDF_DETAILS))
4372 fprintf (dump_file, " cost for size:\n");
4373 fprintf (dump_file, " ivs\tcost\n");
4374 for (j = 0; j <= 2 * target_avail_regs; j++)
4375 fprintf (dump_file, " %d\t%d\n", j,
4376 ivopts_global_cost_for_size (data, j));
4377 fprintf (dump_file, "\n");
4381 /* Returns true if A is a cheaper cost pair than B. */
4384 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4392 if (a->cost < b->cost)
4395 if (a->cost > b->cost)
4398 /* In case the costs are the same, prefer the cheaper candidate. */
4399 if (a->cand->cost < b->cand->cost)
4405 /* Computes the cost field of IVS structure. */
4408 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4412 cost += ivs->cand_use_cost;
4413 cost += ivs->cand_cost;
4414 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4419 /* Remove invariants in set INVS to set IVS. */
4422 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4430 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4432 ivs->n_invariant_uses[iid]--;
4433 if (ivs->n_invariant_uses[iid] == 0)
4438 /* Set USE not to be expressed by any candidate in IVS. */
4441 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4444 unsigned uid = use->id, cid;
4445 struct cost_pair *cp;
4447 cp = ivs->cand_for_use[uid];
4453 ivs->cand_for_use[uid] = NULL;
4454 ivs->n_cand_uses[cid]--;
4456 if (ivs->n_cand_uses[cid] == 0)
4458 bitmap_clear_bit (ivs->cands, cid);
4459 /* Do not count the pseudocandidates. */
4463 ivs->cand_cost -= cp->cand->cost;
4465 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4468 ivs->cand_use_cost -= cp->cost;
4470 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4471 iv_ca_recount_cost (data, ivs);
4474 /* Add invariants in set INVS to set IVS. */
4477 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4485 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4487 ivs->n_invariant_uses[iid]++;
4488 if (ivs->n_invariant_uses[iid] == 1)
4493 /* Set cost pair for USE in set IVS to CP. */
4496 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4497 struct iv_use *use, struct cost_pair *cp)
4499 unsigned uid = use->id, cid;
4501 if (ivs->cand_for_use[uid] == cp)
4504 if (ivs->cand_for_use[uid])
4505 iv_ca_set_no_cp (data, ivs, use);
4512 ivs->cand_for_use[uid] = cp;
4513 ivs->n_cand_uses[cid]++;
4514 if (ivs->n_cand_uses[cid] == 1)
4516 bitmap_set_bit (ivs->cands, cid);
4517 /* Do not count the pseudocandidates. */
4521 ivs->cand_cost += cp->cand->cost;
4523 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4526 ivs->cand_use_cost += cp->cost;
4527 iv_ca_set_add_invariants (ivs, cp->depends_on);
4528 iv_ca_recount_cost (data, ivs);
4532 /* Extend set IVS by expressing USE by some of the candidates in it
4536 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4539 struct cost_pair *best_cp = NULL, *cp;
4543 gcc_assert (ivs->upto >= use->id);
4545 if (ivs->upto == use->id)
4551 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4553 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4555 if (cheaper_cost_pair (cp, best_cp))
4559 iv_ca_set_cp (data, ivs, use, best_cp);
4562 /* Get cost for assignment IVS. */
4565 iv_ca_cost (struct iv_ca *ivs)
4567 return (ivs->bad_uses ? INFTY : ivs->cost);
4570 /* Returns true if all dependences of CP are among invariants in IVS. */
4573 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4578 if (!cp->depends_on)
4581 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4583 if (ivs->n_invariant_uses[i] == 0)
4590 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4591 it before NEXT_CHANGE. */
4593 static struct iv_ca_delta *
4594 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4595 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4597 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4600 change->old_cp = old_cp;
4601 change->new_cp = new_cp;
4602 change->next_change = next_change;
4607 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4610 static struct iv_ca_delta *
4611 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4613 struct iv_ca_delta *last;
4621 for (last = l1; last->next_change; last = last->next_change)
4623 last->next_change = l2;
4628 /* Returns candidate by that USE is expressed in IVS. */
4630 static struct cost_pair *
4631 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4633 return ivs->cand_for_use[use->id];
4636 /* Reverse the list of changes DELTA, forming the inverse to it. */
4638 static struct iv_ca_delta *
4639 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4641 struct iv_ca_delta *act, *next, *prev = NULL;
4642 struct cost_pair *tmp;
4644 for (act = delta; act; act = next)
4646 next = act->next_change;
4647 act->next_change = prev;
4651 act->old_cp = act->new_cp;
4658 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4659 reverted instead. */
4662 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4663 struct iv_ca_delta *delta, bool forward)
4665 struct cost_pair *from, *to;
4666 struct iv_ca_delta *act;
4669 delta = iv_ca_delta_reverse (delta);
4671 for (act = delta; act; act = act->next_change)
4675 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4676 iv_ca_set_cp (data, ivs, act->use, to);
4680 iv_ca_delta_reverse (delta);
4683 /* Returns true if CAND is used in IVS. */
4686 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4688 return ivs->n_cand_uses[cand->id] > 0;
4691 /* Returns number of induction variable candidates in the set IVS. */
4694 iv_ca_n_cands (struct iv_ca *ivs)
4696 return ivs->n_cands;
4699 /* Free the list of changes DELTA. */
4702 iv_ca_delta_free (struct iv_ca_delta **delta)
4704 struct iv_ca_delta *act, *next;
4706 for (act = *delta; act; act = next)
4708 next = act->next_change;
4715 /* Allocates new iv candidates assignment. */
4717 static struct iv_ca *
4718 iv_ca_new (struct ivopts_data *data)
4720 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4724 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4725 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4726 nw->cands = BITMAP_ALLOC (NULL);
4729 nw->cand_use_cost = 0;
4731 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4737 /* Free memory occupied by the set IVS. */
4740 iv_ca_free (struct iv_ca **ivs)
4742 free ((*ivs)->cand_for_use);
4743 free ((*ivs)->n_cand_uses);
4744 BITMAP_FREE ((*ivs)->cands);
4745 free ((*ivs)->n_invariant_uses);
4750 /* Dumps IVS to FILE. */
4753 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4755 const char *pref = " invariants ";
4758 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4759 bitmap_print (file, ivs->cands, " candidates ","\n");
4761 for (i = 1; i <= data->max_inv_id; i++)
4762 if (ivs->n_invariant_uses[i])
4764 fprintf (file, "%s%d", pref, i);
4767 fprintf (file, "\n");
4770 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4771 new set, and store differences in DELTA. Number of induction variables
4772 in the new set is stored to N_IVS. */
4775 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4776 struct iv_cand *cand, struct iv_ca_delta **delta,
4781 struct cost_pair *old_cp, *new_cp;
4784 for (i = 0; i < ivs->upto; i++)
4786 use = iv_use (data, i);
4787 old_cp = iv_ca_cand_for_use (ivs, use);
4790 && old_cp->cand == cand)
4793 new_cp = get_use_iv_cost (data, use, cand);
4797 if (!iv_ca_has_deps (ivs, new_cp))
4800 if (!cheaper_cost_pair (new_cp, old_cp))
4803 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4806 iv_ca_delta_commit (data, ivs, *delta, true);
4807 cost = iv_ca_cost (ivs);
4809 *n_ivs = iv_ca_n_cands (ivs);
4810 iv_ca_delta_commit (data, ivs, *delta, false);
4815 /* Try narrowing set IVS by removing CAND. Return the cost of
4816 the new set and store the differences in DELTA. */
4819 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4820 struct iv_cand *cand, struct iv_ca_delta **delta)
4824 struct cost_pair *old_cp, *new_cp, *cp;
4826 struct iv_cand *cnd;
4830 for (i = 0; i < n_iv_uses (data); i++)
4832 use = iv_use (data, i);
4834 old_cp = iv_ca_cand_for_use (ivs, use);
4835 if (old_cp->cand != cand)
4840 if (data->consider_all_candidates)
4842 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4847 cnd = iv_cand (data, ci);
4849 cp = get_use_iv_cost (data, use, cnd);
4852 if (!iv_ca_has_deps (ivs, cp))
4855 if (!cheaper_cost_pair (cp, new_cp))
4863 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4868 cnd = iv_cand (data, ci);
4870 cp = get_use_iv_cost (data, use, cnd);
4873 if (!iv_ca_has_deps (ivs, cp))
4876 if (!cheaper_cost_pair (cp, new_cp))
4885 iv_ca_delta_free (delta);
4889 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4892 iv_ca_delta_commit (data, ivs, *delta, true);
4893 cost = iv_ca_cost (ivs);
4894 iv_ca_delta_commit (data, ivs, *delta, false);
4899 /* Try optimizing the set of candidates IVS by removing candidates different
4900 from to EXCEPT_CAND from it. Return cost of the new set, and store
4901 differences in DELTA. */
4904 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4905 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4908 struct iv_ca_delta *act_delta, *best_delta;
4909 unsigned i, best_cost, acost;
4910 struct iv_cand *cand;
4913 best_cost = iv_ca_cost (ivs);
4915 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4917 cand = iv_cand (data, i);
4919 if (cand == except_cand)
4922 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4924 if (acost < best_cost)
4927 iv_ca_delta_free (&best_delta);
4928 best_delta = act_delta;
4931 iv_ca_delta_free (&act_delta);
4940 /* Recurse to possibly remove other unnecessary ivs. */
4941 iv_ca_delta_commit (data, ivs, best_delta, true);
4942 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4943 iv_ca_delta_commit (data, ivs, best_delta, false);
4944 *delta = iv_ca_delta_join (best_delta, *delta);
4948 /* Tries to extend the sets IVS in the best possible way in order
4949 to express the USE. */
4952 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4955 unsigned best_cost, act_cost;
4958 struct iv_cand *cand;
4959 struct iv_ca_delta *best_delta = NULL, *act_delta;
4960 struct cost_pair *cp;
4962 iv_ca_add_use (data, ivs, use);
4963 best_cost = iv_ca_cost (ivs);
4965 cp = iv_ca_cand_for_use (ivs, use);
4968 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4969 iv_ca_set_no_cp (data, ivs, use);
4972 /* First try important candidates. Only if it fails, try the specific ones.
4973 Rationale -- in loops with many variables the best choice often is to use
4974 just one generic biv. If we added here many ivs specific to the uses,
4975 the optimization algorithm later would be likely to get stuck in a local
4976 minimum, thus causing us to create too many ivs. The approach from
4977 few ivs to more seems more likely to be successful -- starting from few
4978 ivs, replacing an expensive use by a specific iv should always be a
4980 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4982 cand = iv_cand (data, i);
4984 if (iv_ca_cand_used_p (ivs, cand))
4987 cp = get_use_iv_cost (data, use, cand);
4991 iv_ca_set_cp (data, ivs, use, cp);
4992 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4993 iv_ca_set_no_cp (data, ivs, use);
4994 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4996 if (act_cost < best_cost)
4998 best_cost = act_cost;
5000 iv_ca_delta_free (&best_delta);
5001 best_delta = act_delta;
5004 iv_ca_delta_free (&act_delta);
5007 if (best_cost == INFTY)
5009 for (i = 0; i < use->n_map_members; i++)
5011 cp = use->cost_map + i;
5016 /* Already tried this. */
5017 if (cand->important)
5020 if (iv_ca_cand_used_p (ivs, cand))
5024 iv_ca_set_cp (data, ivs, use, cp);
5025 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5026 iv_ca_set_no_cp (data, ivs, use);
5027 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5030 if (act_cost < best_cost)
5032 best_cost = act_cost;
5035 iv_ca_delta_free (&best_delta);
5036 best_delta = act_delta;
5039 iv_ca_delta_free (&act_delta);
5043 iv_ca_delta_commit (data, ivs, best_delta, true);
5044 iv_ca_delta_free (&best_delta);
5046 return (best_cost != INFTY);
5049 /* Finds an initial assignment of candidates to uses. */
5051 static struct iv_ca *
5052 get_initial_solution (struct ivopts_data *data)
5054 struct iv_ca *ivs = iv_ca_new (data);
5057 for (i = 0; i < n_iv_uses (data); i++)
5058 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5067 /* Tries to improve set of induction variables IVS. */
5070 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5072 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5073 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5074 struct iv_cand *cand;
5076 /* Try extending the set of induction variables by one. */
5077 for (i = 0; i < n_iv_cands (data); i++)
5079 cand = iv_cand (data, i);
5081 if (iv_ca_cand_used_p (ivs, cand))
5084 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5088 /* If we successfully added the candidate and the set is small enough,
5089 try optimizing it by removing other candidates. */
5090 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5092 iv_ca_delta_commit (data, ivs, act_delta, true);
5093 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5094 iv_ca_delta_commit (data, ivs, act_delta, false);
5095 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5098 if (acost < best_cost)
5101 iv_ca_delta_free (&best_delta);
5102 best_delta = act_delta;
5105 iv_ca_delta_free (&act_delta);
5110 /* Try removing the candidates from the set instead. */
5111 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5113 /* Nothing more we can do. */
5118 iv_ca_delta_commit (data, ivs, best_delta, true);
5119 gcc_assert (best_cost == iv_ca_cost (ivs));
5120 iv_ca_delta_free (&best_delta);
5124 /* Attempts to find the optimal set of induction variables. We do simple
5125 greedy heuristic -- we try to replace at most one candidate in the selected
5126 solution and remove the unused ivs while this improves the cost. */
5128 static struct iv_ca *
5129 find_optimal_iv_set (struct ivopts_data *data)
5135 /* Get the initial solution. */
5136 set = get_initial_solution (data);
5139 if (dump_file && (dump_flags & TDF_DETAILS))
5140 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5144 if (dump_file && (dump_flags & TDF_DETAILS))
5146 fprintf (dump_file, "Initial set of candidates:\n");
5147 iv_ca_dump (data, dump_file, set);
5150 while (try_improve_iv_set (data, set))
5152 if (dump_file && (dump_flags & TDF_DETAILS))
5154 fprintf (dump_file, "Improved to:\n");
5155 iv_ca_dump (data, dump_file, set);
5159 if (dump_file && (dump_flags & TDF_DETAILS))
5160 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5162 for (i = 0; i < n_iv_uses (data); i++)
5164 use = iv_use (data, i);
5165 use->selected = iv_ca_cand_for_use (set, use)->cand;
5171 /* Creates a new induction variable corresponding to CAND. */
5174 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5176 block_stmt_iterator incr_pos;
5186 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5190 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5195 /* Mark that the iv is preserved. */
5196 name_info (data, cand->var_before)->preserve_biv = true;
5197 name_info (data, cand->var_after)->preserve_biv = true;
5199 /* Rewrite the increment so that it uses var_before directly. */
5200 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5205 gimple_add_tmp_var (cand->var_before);
5206 add_referenced_tmp_var (cand->var_before);
5208 base = unshare_expr (cand->iv->base);
5210 create_iv (base, unshare_expr (cand->iv->step),
5211 cand->var_before, data->current_loop,
5212 &incr_pos, after, &cand->var_before, &cand->var_after);
5215 /* Creates new induction variables described in SET. */
5218 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5221 struct iv_cand *cand;
5224 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5226 cand = iv_cand (data, i);
5227 create_new_iv (data, cand);
5231 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5232 is true, remove also the ssa name defined by the statement. */
5235 remove_statement (tree stmt, bool including_defined_name)
5237 if (TREE_CODE (stmt) == PHI_NODE)
5239 if (!including_defined_name)
5241 /* Prevent the ssa name defined by the statement from being removed. */
5242 SET_PHI_RESULT (stmt, NULL);
5244 remove_phi_node (stmt, NULL_TREE);
5248 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5254 /* Rewrites USE (definition of iv used in a nonlinear expression)
5255 using candidate CAND. */
5258 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5259 struct iv_use *use, struct iv_cand *cand)
5262 tree op, stmts, tgt, ass;
5263 block_stmt_iterator bsi, pbsi;
5265 /* An important special case -- if we are asked to express value of
5266 the original iv by itself, just exit; there is no need to
5267 introduce a new computation (that might also need casting the
5268 variable to unsigned and back). */
5269 if (cand->pos == IP_ORIGINAL
5270 && TREE_CODE (use->stmt) == MODIFY_EXPR
5271 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
5273 op = TREE_OPERAND (use->stmt, 1);
5275 /* Be a bit careful. In case variable is expressed in some
5276 complicated way, rewrite it so that we may get rid of this
5277 complicated expression. */
5278 if ((TREE_CODE (op) == PLUS_EXPR
5279 || TREE_CODE (op) == MINUS_EXPR)
5280 && TREE_OPERAND (op, 0) == cand->var_before
5281 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
5285 comp = unshare_expr (get_computation (data->current_loop,
5287 switch (TREE_CODE (use->stmt))
5290 tgt = PHI_RESULT (use->stmt);
5292 /* If we should keep the biv, do not replace it. */
5293 if (name_info (data, tgt)->preserve_biv)
5296 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5297 while (!bsi_end_p (pbsi)
5298 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5306 tgt = TREE_OPERAND (use->stmt, 0);
5307 bsi = bsi_for_stmt (use->stmt);
5314 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5316 if (TREE_CODE (use->stmt) == PHI_NODE)
5319 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5320 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5321 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5322 remove_statement (use->stmt, false);
5323 SSA_NAME_DEF_STMT (tgt) = ass;
5328 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5329 TREE_OPERAND (use->stmt, 1) = op;
5333 /* Replaces ssa name in index IDX by its basic variable. Callback for
5337 idx_remove_ssa_names (tree base, tree *idx,
5338 void *data ATTRIBUTE_UNUSED)
5342 if (TREE_CODE (*idx) == SSA_NAME)
5343 *idx = SSA_NAME_VAR (*idx);
5345 if (TREE_CODE (base) == ARRAY_REF)
5347 op = &TREE_OPERAND (base, 2);
5349 && TREE_CODE (*op) == SSA_NAME)
5350 *op = SSA_NAME_VAR (*op);
5351 op = &TREE_OPERAND (base, 3);
5353 && TREE_CODE (*op) == SSA_NAME)
5354 *op = SSA_NAME_VAR (*op);
5360 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5363 unshare_and_remove_ssa_names (tree ref)
5365 ref = unshare_expr (ref);
5366 for_each_index (&ref, idx_remove_ssa_names, NULL);
5371 /* Rewrites base of memory access OP with expression WITH in statement
5372 pointed to by BSI. */
5375 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
5377 tree bvar, var, new_name, copy, name;
5380 var = bvar = get_base_address (*op);
5382 if (!var || TREE_CODE (with) != SSA_NAME)
5385 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
5386 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
5387 if (TREE_CODE (var) == INDIRECT_REF)
5388 var = TREE_OPERAND (var, 0);
5389 if (TREE_CODE (var) == SSA_NAME)
5392 var = SSA_NAME_VAR (var);
5394 else if (DECL_P (var))
5399 /* We need to add a memory tag for the variable. But we do not want
5400 to add it to the temporary used for the computations, since this leads
5401 to problems in redundancy elimination when there are common parts
5402 in two computations referring to the different arrays. So we copy
5403 the variable to a new temporary. */
5404 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
5407 new_name = duplicate_ssa_name (name, copy);
5410 tree tag = var_ann (var)->type_mem_tag;
5411 tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
5412 add_referenced_tmp_var (new_ptr);
5414 var_ann (new_ptr)->type_mem_tag = tag;
5416 add_type_alias (new_ptr, var);
5417 new_name = make_ssa_name (new_ptr, copy);
5420 TREE_OPERAND (copy, 0) = new_name;
5421 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
5427 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
5428 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
5430 if (TREE_CODE (*op) == INDIRECT_REF)
5431 orig = REF_ORIGINAL (*op);
5433 orig = unshare_and_remove_ssa_names (*op);
5435 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
5437 /* Record the original reference, for purposes of alias analysis. */
5438 REF_ORIGINAL (*op) = orig;
5440 /* Virtual operands in the original statement may have to be renamed
5441 because of the replacement. */
5442 mark_new_vars_to_rename (bsi_stmt (*bsi));
5445 /* Rewrites USE (address that is an iv) using candidate CAND. */
5448 rewrite_use_address (struct ivopts_data *data,
5449 struct iv_use *use, struct iv_cand *cand)
5451 tree comp = unshare_expr (get_computation (data->current_loop,
5453 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5455 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
5458 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5460 rewrite_address_base (&bsi, use->op_p, op);
5463 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5467 rewrite_use_compare (struct ivopts_data *data,
5468 struct iv_use *use, struct iv_cand *cand)
5471 tree *op_p, cond, op, stmts, bound;
5472 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5473 enum tree_code compare;
5474 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5479 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5480 tree var_type = TREE_TYPE (var);
5482 compare = iv_elimination_compare (data, use);
5483 bound = fold_convert (var_type, bound);
5484 op = force_gimple_operand (unshare_expr (bound), &stmts,
5488 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5490 *use->op_p = build2 (compare, boolean_type_node, var, op);
5491 update_stmt (use->stmt);
5495 /* The induction variable elimination failed; just express the original
5497 comp = unshare_expr (get_computation (data->current_loop, use, cand));
5500 op_p = &TREE_OPERAND (cond, 0);
5501 if (TREE_CODE (*op_p) != SSA_NAME
5502 || zero_p (get_iv (data, *op_p)->step))
5503 op_p = &TREE_OPERAND (cond, 1);
5505 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5507 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5512 /* Ensure that operand *OP_P may be used at the end of EXIT without
5513 violating loop closed ssa form. */
5516 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5519 struct loop *def_loop;
5522 use = USE_FROM_PTR (op_p);
5523 if (TREE_CODE (use) != SSA_NAME)
5526 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5530 def_loop = def_bb->loop_father;
5531 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5534 /* Try finding a phi node that copies the value out of the loop. */
5535 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5536 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5541 /* Create such a phi node. */
5542 tree new_name = duplicate_ssa_name (use, NULL);
5544 phi = create_phi_node (new_name, exit->dest);
5545 SSA_NAME_DEF_STMT (new_name) = phi;
5546 add_phi_arg (phi, use, exit);
5549 SET_USE (op_p, PHI_RESULT (phi));
5552 /* Ensure that operands of STMT may be used at the end of EXIT without
5553 violating loop closed ssa form. */
5556 protect_loop_closed_ssa_form (edge exit, tree stmt)
5559 use_operand_p use_p;
5561 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5562 protect_loop_closed_ssa_form_use (exit, use_p);
5565 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5566 so that they are emitted on the correct place, and so that the loop closed
5567 ssa form is preserved. */
5570 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5572 tree_stmt_iterator tsi;
5573 block_stmt_iterator bsi;
5574 tree phi, stmt, def, next;
5576 if (!single_pred_p (exit->dest))
5577 split_loop_exit_edge (exit);
5579 /* Ensure there is label in exit->dest, so that we can
5581 tree_block_label (exit->dest);
5582 bsi = bsi_after_labels (exit->dest);
5584 if (TREE_CODE (stmts) == STATEMENT_LIST)
5586 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5588 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5589 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5594 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5595 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5601 for (phi = phi_nodes (exit->dest); phi; phi = next)
5603 next = PHI_CHAIN (phi);
5605 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5607 def = PHI_RESULT (phi);
5608 remove_statement (phi, false);
5609 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5611 SSA_NAME_DEF_STMT (def) = stmt;
5612 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5617 /* Rewrites the final value of USE (that is only needed outside of the loop)
5618 using candidate CAND. */
5621 rewrite_use_outer (struct ivopts_data *data,
5622 struct iv_use *use, struct iv_cand *cand)
5625 tree value, op, stmts, tgt;
5628 switch (TREE_CODE (use->stmt))
5631 tgt = PHI_RESULT (use->stmt);
5634 tgt = TREE_OPERAND (use->stmt, 0);
5640 exit = single_dom_exit (data->current_loop);
5646 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5650 value = get_computation_at (data->current_loop,
5651 use, cand, last_stmt (exit->src));
5653 value = unshare_expr (value);
5654 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5656 /* If we will preserve the iv anyway and we would need to perform
5657 some computation to replace the final value, do nothing. */
5658 if (stmts && name_info (data, tgt)->preserve_biv)
5661 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5663 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5665 if (USE_FROM_PTR (use_p) == tgt)
5666 SET_USE (use_p, op);
5670 compute_phi_arg_on_exit (exit, stmts, op);
5672 /* Enable removal of the statement. We cannot remove it directly,
5673 since we may still need the aliasing information attached to the
5674 ssa name defined by it. */
5675 name_info (data, tgt)->iv->have_use_for = false;
5679 /* If the variable is going to be preserved anyway, there is nothing to
5681 if (name_info (data, tgt)->preserve_biv)
5684 /* Otherwise we just need to compute the iv. */
5685 rewrite_use_nonlinear_expr (data, use, cand);
5688 /* Rewrites USE using candidate CAND. */
5691 rewrite_use (struct ivopts_data *data,
5692 struct iv_use *use, struct iv_cand *cand)
5696 case USE_NONLINEAR_EXPR:
5697 rewrite_use_nonlinear_expr (data, use, cand);
5701 rewrite_use_outer (data, use, cand);
5705 rewrite_use_address (data, use, cand);
5709 rewrite_use_compare (data, use, cand);
5715 update_stmt (use->stmt);
5718 /* Rewrite the uses using the selected induction variables. */
5721 rewrite_uses (struct ivopts_data *data)
5724 struct iv_cand *cand;
5727 for (i = 0; i < n_iv_uses (data); i++)
5729 use = iv_use (data, i);
5730 cand = use->selected;
5733 rewrite_use (data, use, cand);
5737 /* Removes the ivs that are not used after rewriting. */
5740 remove_unused_ivs (struct ivopts_data *data)
5745 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5747 struct version_info *info;
5749 info = ver_info (data, j);
5751 && !zero_p (info->iv->step)
5753 && !info->iv->have_use_for
5754 && !info->preserve_biv)
5755 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5759 /* Frees data allocated by the optimization of a single loop. */
5762 free_loop_data (struct ivopts_data *data)
5768 htab_empty (data->niters);
5770 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5772 struct version_info *info;
5774 info = ver_info (data, i);
5778 info->has_nonlin_use = false;
5779 info->preserve_biv = false;
5782 bitmap_clear (data->relevant);
5783 bitmap_clear (data->important_candidates);
5785 for (i = 0; i < n_iv_uses (data); i++)
5787 struct iv_use *use = iv_use (data, i);
5790 BITMAP_FREE (use->related_cands);
5791 for (j = 0; j < use->n_map_members; j++)
5792 if (use->cost_map[j].depends_on)
5793 BITMAP_FREE (use->cost_map[j].depends_on);
5794 free (use->cost_map);
5797 VEC_truncate (iv_use_p, data->iv_uses, 0);
5799 for (i = 0; i < n_iv_cands (data); i++)
5801 struct iv_cand *cand = iv_cand (data, i);
5805 if (cand->depends_on)
5806 BITMAP_FREE (cand->depends_on);
5809 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5811 if (data->version_info_size < num_ssa_names)
5813 data->version_info_size = 2 * num_ssa_names;
5814 free (data->version_info);
5815 data->version_info = xcalloc (data->version_info_size,
5816 sizeof (struct version_info));
5819 data->max_inv_id = 0;
5821 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5822 SET_DECL_RTL (obj, NULL_RTX);
5824 VEC_truncate (tree, decl_rtl_to_reset, 0);
5827 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5831 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5835 for (i = 1; i < loops->num; i++)
5836 if (loops->parray[i])
5838 free (loops->parray[i]->aux);
5839 loops->parray[i]->aux = NULL;
5842 free_loop_data (data);
5843 free (data->version_info);
5844 BITMAP_FREE (data->relevant);
5845 BITMAP_FREE (data->important_candidates);
5846 htab_delete (data->niters);
5848 VEC_free (tree, heap, decl_rtl_to_reset);
5849 VEC_free (iv_use_p, heap, data->iv_uses);
5850 VEC_free (iv_cand_p, heap, data->iv_candidates);
5853 /* Optimizes the LOOP. Returns true if anything changed. */
5856 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5858 bool changed = false;
5859 struct iv_ca *iv_ca;
5862 data->current_loop = loop;
5864 if (dump_file && (dump_flags & TDF_DETAILS))
5866 fprintf (dump_file, "Processing loop %d\n", loop->num);
5868 exit = single_dom_exit (loop);
5871 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5872 exit->src->index, exit->dest->index);
5873 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5874 fprintf (dump_file, "\n");
5877 fprintf (dump_file, "\n");
5880 /* For each ssa name determines whether it behaves as an induction variable
5882 if (!find_induction_variables (data))
5885 /* Finds interesting uses (item 1). */
5886 find_interesting_uses (data);
5887 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5890 /* Finds candidates for the induction variables (item 2). */
5891 find_iv_candidates (data);
5893 /* Calculates the costs (item 3, part 1). */
5894 determine_use_iv_costs (data);
5895 determine_iv_costs (data);
5896 determine_set_costs (data);
5898 /* Find the optimal set of induction variables (item 3, part 2). */
5899 iv_ca = find_optimal_iv_set (data);
5904 /* Create the new induction variables (item 4, part 1). */
5905 create_new_ivs (data, iv_ca);
5906 iv_ca_free (&iv_ca);
5908 /* Rewrite the uses (item 4, part 2). */
5909 rewrite_uses (data);
5911 /* Remove the ivs that are unused after rewriting. */
5912 remove_unused_ivs (data);
5914 /* We have changed the structure of induction variables; it might happen
5915 that definitions in the scev database refer to some of them that were
5920 free_loop_data (data);
5925 /* Main entry point. Optimizes induction variables in LOOPS. */
5928 tree_ssa_iv_optimize (struct loops *loops)
5931 struct ivopts_data data;
5933 tree_ssa_iv_optimize_init (loops, &data);
5935 /* Optimize the loops starting with the innermost ones. */
5936 loop = loops->tree_root;
5940 /* Scan the loops, inner ones first. */
5941 while (loop != loops->tree_root)
5943 if (dump_file && (dump_flags & TDF_DETAILS))
5944 flow_loop_dump (loop, dump_file, NULL, 1);
5946 tree_ssa_iv_optimize_loop (&data, loop);
5958 /* FIXME. IV opts introduces new aliases and call-clobbered
5959 variables, which need to be renamed. However, when we call the
5960 renamer, not all statements will be scanned for operands. In
5961 particular, the newly introduced aliases may appear in statements
5962 that are considered "unmodified", so the renamer will not get a
5963 chance to rename those operands.
5965 Work around this problem by forcing an operand re-scan on every
5966 statement. This will not be necessary once the new operand
5967 scanner is implemented. */
5968 if (need_ssa_update_p ())
5971 block_stmt_iterator si;
5973 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5974 update_stmt (bsi_stmt (si));
5977 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
5978 tree_ssa_iv_optimize_finalize (loops, &data);