1 /* Induction variable optimizations.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
106 tree base; /* Initial value of the iv. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Information attached to loop. */
128 struct tree_niter_desc niter;
129 /* Number of iterations. */
131 unsigned regs_used; /* Number of registers used. */
137 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
138 USE_OUTER, /* The induction variable is used outside the loop. */
139 USE_ADDRESS, /* Use in an address. */
140 USE_COMPARE /* Use is a compare. */
143 /* The candidate - cost pair. */
146 struct iv_cand *cand; /* The candidate. */
147 unsigned cost; /* The cost. */
148 bitmap depends_on; /* The list of invariants that have to be
155 unsigned id; /* The id of the use. */
156 enum use_type type; /* Type of the use. */
157 struct iv *iv; /* The induction variable it is based on. */
158 tree stmt; /* Statement in that it occurs. */
159 tree *op_p; /* The place where it occurs. */
160 bitmap related_cands; /* The set of "related" iv candidates. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
200 /* The currently optimized loop. */
201 struct loop *current_loop;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
212 /* The maximum invariant id. */
215 /* The uses of induction variables. */
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* Whether to consider just related and important candidates when replacing a
223 bool consider_all_candidates;
226 /* Bound on number of candidates below that all candidates are considered. */
228 #define CONSIDER_ALL_CANDIDATES_BOUND \
229 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
231 /* If there are more iv occurrences, we just give up (it is quite unlikely that
232 optimizing such a loop would help, and it would take ages). */
234 #define MAX_CONSIDERED_USES \
235 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
237 /* The list of trees for that the decl_rtl field must be reset is stored
240 static varray_type decl_rtl_to_reset;
242 /* Number of uses recorded in DATA. */
244 static inline unsigned
245 n_iv_uses (struct ivopts_data *data)
247 return VARRAY_ACTIVE_SIZE (data->iv_uses);
250 /* Ith use recorded in DATA. */
252 static inline struct iv_use *
253 iv_use (struct ivopts_data *data, unsigned i)
255 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
258 /* Number of candidates recorded in DATA. */
260 static inline unsigned
261 n_iv_cands (struct ivopts_data *data)
263 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
266 /* Ith candidate recorded in DATA. */
268 static inline struct iv_cand *
269 iv_cand (struct ivopts_data *data, unsigned i)
271 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
274 /* The data for LOOP. */
276 static inline struct loop_data *
277 loop_data (struct loop *loop)
282 /* The single loop exit if it dominates the latch, NULL otherwise. */
285 single_dom_exit (struct loop *loop)
287 edge exit = loop->single_exit;
292 if (!just_once_each_iteration_p (loop, exit->src))
298 /* Dumps information about the induction variable IV to FILE. */
300 extern void dump_iv (FILE *, struct iv *);
302 dump_iv (FILE *file, struct iv *iv)
304 fprintf (file, "ssa name ");
305 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
306 fprintf (file, "\n");
310 fprintf (file, " base ");
311 print_generic_expr (file, iv->base, TDF_SLIM);
312 fprintf (file, "\n");
314 fprintf (file, " step ");
315 print_generic_expr (file, iv->step, TDF_SLIM);
316 fprintf (file, "\n");
320 fprintf (file, " invariant ");
321 print_generic_expr (file, iv->base, TDF_SLIM);
322 fprintf (file, "\n");
326 fprintf (file, " is a biv\n");
329 /* Dumps information about the USE to FILE. */
331 extern void dump_use (FILE *, struct iv_use *);
333 dump_use (FILE *file, struct iv_use *use)
335 struct iv *iv = use->iv;
337 fprintf (file, "use %d\n", use->id);
341 case USE_NONLINEAR_EXPR:
342 fprintf (file, " generic\n");
346 fprintf (file, " outside\n");
350 fprintf (file, " address\n");
354 fprintf (file, " compare\n");
361 fprintf (file, " in statement ");
362 print_generic_expr (file, use->stmt, TDF_SLIM);
363 fprintf (file, "\n");
365 fprintf (file, " at position ");
367 print_generic_expr (file, *use->op_p, TDF_SLIM);
368 fprintf (file, "\n");
372 fprintf (file, " base ");
373 print_generic_expr (file, iv->base, TDF_SLIM);
374 fprintf (file, "\n");
376 fprintf (file, " step ");
377 print_generic_expr (file, iv->step, TDF_SLIM);
378 fprintf (file, "\n");
382 fprintf (file, " invariant ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
387 fprintf (file, " related candidates ");
388 dump_bitmap (file, use->related_cands);
391 /* Dumps information about the uses to FILE. */
393 extern void dump_uses (FILE *, struct ivopts_data *);
395 dump_uses (FILE *file, struct ivopts_data *data)
400 for (i = 0; i < n_iv_uses (data); i++)
402 use = iv_use (data, i);
404 dump_use (file, use);
405 fprintf (file, "\n");
409 /* Dumps information about induction variable candidate CAND to FILE. */
411 extern void dump_cand (FILE *, struct iv_cand *);
413 dump_cand (FILE *file, struct iv_cand *cand)
415 struct iv *iv = cand->iv;
417 fprintf (file, "candidate %d%s\n",
418 cand->id, cand->important ? " (important)" : "");
422 fprintf (file, " final value replacement\n");
429 fprintf (file, " incremented before exit test\n");
433 fprintf (file, " incremented at end\n");
437 fprintf (file, " original biv\n");
443 fprintf (file, " base ");
444 print_generic_expr (file, iv->base, TDF_SLIM);
445 fprintf (file, "\n");
447 fprintf (file, " step ");
448 print_generic_expr (file, iv->step, TDF_SLIM);
449 fprintf (file, "\n");
453 fprintf (file, " invariant ");
454 print_generic_expr (file, iv->base, TDF_SLIM);
455 fprintf (file, "\n");
459 /* Returns the info for ssa version VER. */
461 static inline struct version_info *
462 ver_info (struct ivopts_data *data, unsigned ver)
464 return data->version_info + ver;
467 /* Returns the info for ssa name NAME. */
469 static inline struct version_info *
470 name_info (struct ivopts_data *data, tree name)
472 return ver_info (data, SSA_NAME_VERSION (name));
475 /* Checks whether there exists number X such that X * B = A, counting modulo
479 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
482 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
483 unsigned HOST_WIDE_INT inv, ex, val;
489 /* First divide the whole equation by 2 as long as possible. */
490 while (!(a & 1) && !(b & 1))
500 /* If b is still even, a is odd and there is no such x. */
504 /* Find the inverse of b. We compute it as
505 b^(2^(bits - 1) - 1) (mod 2^bits). */
508 for (i = 0; i < bits - 1; i++)
510 inv = (inv * ex) & mask;
511 ex = (ex * ex) & mask;
514 val = (a * inv) & mask;
516 if (((val * b) & mask) != a)
519 if ((val >> (bits - 1)) & 1)
527 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
531 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
533 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
538 if (sbb == loop->latch)
544 return stmt == last_stmt (bb);
547 /* Returns true if STMT if after the place where the original induction
548 variable CAND is incremented. */
551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
553 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
554 basic_block stmt_bb = bb_for_stmt (stmt);
555 block_stmt_iterator bsi;
557 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
560 if (stmt_bb != cand_bb)
563 /* Scan the block from the end, since the original ivs are usually
564 incremented at the end of the loop body. */
565 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
567 if (bsi_stmt (bsi) == cand->incremented_at)
569 if (bsi_stmt (bsi) == stmt)
574 /* Returns true if STMT if after the place where the induction variable
575 CAND is incremented in LOOP. */
578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
586 return stmt_after_ip_normal_pos (loop, stmt);
589 return stmt_after_ip_original_pos (cand, stmt);
596 /* Initializes data structures used by the iv optimization pass, stored
597 in DATA. LOOPS is the loop tree. */
600 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
604 data->version_info_size = 2 * num_ssa_names;
605 data->version_info = xcalloc (data->version_info_size,
606 sizeof (struct version_info));
607 data->relevant = BITMAP_XMALLOC ();
608 data->max_inv_id = 0;
610 for (i = 1; i < loops->num; i++)
611 if (loops->parray[i])
612 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
614 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
615 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
616 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
619 /* Allocates an induction variable with given initial value BASE and step STEP
623 alloc_iv (tree base, tree step)
625 struct iv *iv = xcalloc (1, sizeof (struct iv));
627 if (step && integer_zerop (step))
633 iv->have_use_for = false;
635 iv->ssa_name = NULL_TREE;
640 /* Sets STEP and BASE for induction variable IV. */
643 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
645 struct version_info *info = name_info (data, iv);
650 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
651 info->iv = alloc_iv (base, step);
652 info->iv->ssa_name = iv;
655 /* Finds induction variable declaration for VAR. */
658 get_iv (struct ivopts_data *data, tree var)
662 if (!name_info (data, var)->iv)
664 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
667 || !flow_bb_inside_loop_p (data->current_loop, bb))
668 set_iv (data, var, var, NULL_TREE);
671 return name_info (data, var)->iv;
674 /* Determines the step of a biv defined in PHI. */
677 determine_biv_step (tree phi)
679 struct loop *loop = bb_for_stmt (phi)->loop_father;
680 tree name = PHI_RESULT (phi), base, step;
681 tree type = TREE_TYPE (name);
683 if (!is_gimple_reg (name))
686 if (!simple_iv (loop, phi, name, &base, &step))
690 return fold_convert (type, integer_zero_node);
695 /* Returns false if INDEX is a ssa name that occurs in an
696 abnormal phi node. Callback for for_each_index. */
699 idx_contains_abnormal_ssa_name_p (tree base ATTRIBUTE_UNUSED, tree *index,
700 void *data ATTRIBUTE_UNUSED)
702 if (TREE_CODE (*index) != SSA_NAME)
705 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*index) == 0;
708 /* Returns true if EXPR contains a ssa name that occurs in an
709 abnormal phi node. */
712 contains_abnormal_ssa_name_p (tree expr)
714 enum tree_code code = TREE_CODE (expr);
715 char class = TREE_CODE_CLASS (code);
717 if (code == SSA_NAME)
718 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
720 if (code == INTEGER_CST
721 || is_gimple_min_invariant (expr))
724 if (code == ADDR_EXPR)
725 return !for_each_index (&TREE_OPERAND (expr, 1),
726 idx_contains_abnormal_ssa_name_p,
732 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
737 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
749 /* Finds basic ivs. */
752 find_bivs (struct ivopts_data *data)
754 tree phi, step, type, base;
756 struct loop *loop = data->current_loop;
758 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
760 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
763 step = determine_biv_step (phi);
767 if (cst_and_fits_in_hwi (step)
768 && int_cst_value (step) == 0)
771 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
772 if (contains_abnormal_ssa_name_p (base))
775 type = TREE_TYPE (PHI_RESULT (phi));
776 base = fold_convert (type, base);
777 step = fold_convert (type, step);
779 /* FIXME: We do not handle induction variables whose step does
780 not satisfy cst_and_fits_in_hwi. */
781 if (!cst_and_fits_in_hwi (step))
784 set_iv (data, PHI_RESULT (phi), base, step);
791 /* Marks basic ivs. */
794 mark_bivs (struct ivopts_data *data)
797 struct iv *iv, *incr_iv;
798 struct loop *loop = data->current_loop;
801 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
803 iv = get_iv (data, PHI_RESULT (phi));
807 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
808 incr_iv = get_iv (data, var);
812 /* If the increment is in the subloop, ignore it. */
813 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
814 if (incr_bb->loop_father != data->current_loop
815 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
819 incr_iv->biv_p = true;
823 /* Checks whether STMT defines a linear induction variable and stores its
824 parameters to BASE and STEP. */
827 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
828 tree *base, tree *step)
831 struct loop *loop = data->current_loop;
836 if (TREE_CODE (stmt) != MODIFY_EXPR)
839 lhs = TREE_OPERAND (stmt, 0);
840 if (TREE_CODE (lhs) != SSA_NAME)
843 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
846 /* FIXME: We do not handle induction variables whose step does
847 not satisfy cst_and_fits_in_hwi. */
849 && !cst_and_fits_in_hwi (*step))
852 if (contains_abnormal_ssa_name_p (*base))
858 /* Finds general ivs in statement STMT. */
861 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
865 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
868 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
871 /* Finds general ivs in basic block BB. */
874 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
876 block_stmt_iterator bsi;
878 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
879 find_givs_in_stmt (data, bsi_stmt (bsi));
882 /* Finds general ivs. */
885 find_givs (struct ivopts_data *data)
887 struct loop *loop = data->current_loop;
888 basic_block *body = get_loop_body_in_dom_order (loop);
891 for (i = 0; i < loop->num_nodes; i++)
892 find_givs_in_bb (data, body[i]);
896 /* Determine the number of iterations of the current loop. */
899 determine_number_of_iterations (struct ivopts_data *data)
901 struct loop *loop = data->current_loop;
902 edge exit = single_dom_exit (loop);
907 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
910 /* For each ssa name defined in LOOP determines whether it is an induction
911 variable and if so, its initial value and step. */
914 find_induction_variables (struct ivopts_data *data)
917 struct loop *loop = data->current_loop;
919 if (!find_bivs (data))
924 determine_number_of_iterations (data);
926 if (dump_file && (dump_flags & TDF_DETAILS))
928 if (loop_data (loop)->niter.niter)
930 fprintf (dump_file, " number of iterations ");
931 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
933 fprintf (dump_file, "\n");
935 fprintf (dump_file, " may be zero if ");
936 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
938 fprintf (dump_file, "\n");
940 fprintf (dump_file, " bogus unless ");
941 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
943 fprintf (dump_file, "\n");
944 fprintf (dump_file, "\n");
947 fprintf (dump_file, "Induction variables:\n\n");
949 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
951 if (ver_info (data, i)->iv)
952 dump_iv (dump_file, ver_info (data, i)->iv);
959 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
961 static struct iv_use *
962 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
963 tree stmt, enum use_type use_type)
965 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
967 use->id = n_iv_uses (data);
968 use->type = use_type;
972 use->related_cands = BITMAP_XMALLOC ();
974 if (dump_file && (dump_flags & TDF_DETAILS))
975 dump_use (dump_file, use);
977 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
982 /* Checks whether OP is a loop-level invariant and if so, records it.
983 NONLINEAR_USE is true if the invariant is used in a way we do not
987 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
990 struct version_info *info;
992 if (TREE_CODE (op) != SSA_NAME
993 || !is_gimple_reg (op))
996 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
998 && flow_bb_inside_loop_p (data->current_loop, bb))
1001 info = name_info (data, op);
1003 info->has_nonlin_use |= nonlinear_use;
1005 info->inv_id = ++data->max_inv_id;
1006 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1009 /* Checks whether the use OP is interesting and if so, records it
1012 static struct iv_use *
1013 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1021 if (TREE_CODE (op) != SSA_NAME)
1024 iv = get_iv (data, op);
1028 if (iv->have_use_for)
1030 use = iv_use (data, iv->use_id);
1032 if (use->type != USE_NONLINEAR_EXPR
1033 && use->type != USE_OUTER)
1036 if (type == USE_NONLINEAR_EXPR)
1037 use->type = USE_NONLINEAR_EXPR;
1041 if (zero_p (iv->step))
1043 record_invariant (data, op, true);
1046 iv->have_use_for = true;
1048 civ = xmalloc (sizeof (struct iv));
1051 stmt = SSA_NAME_DEF_STMT (op);
1052 if (TREE_CODE (stmt) != PHI_NODE
1053 && TREE_CODE (stmt) != MODIFY_EXPR)
1056 use = record_use (data, NULL, civ, stmt, type);
1057 iv->use_id = use->id;
1062 /* Checks whether the use OP is interesting and if so, records it. */
1064 static struct iv_use *
1065 find_interesting_uses_op (struct ivopts_data *data, tree op)
1067 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1070 /* Records a definition of induction variable OP that is used outside of the
1073 static struct iv_use *
1074 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1076 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1079 /* Checks whether the condition *COND_P in STMT is interesting
1080 and if so, records it. */
1083 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1087 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1089 tree zero = integer_zero_node;
1091 const_iv.step = NULL_TREE;
1093 if (integer_zerop (*cond_p)
1094 || integer_nonzerop (*cond_p))
1097 if (TREE_CODE (*cond_p) == SSA_NAME)
1104 op0_p = &TREE_OPERAND (*cond_p, 0);
1105 op1_p = &TREE_OPERAND (*cond_p, 1);
1108 if (TREE_CODE (*op0_p) == SSA_NAME)
1109 iv0 = get_iv (data, *op0_p);
1113 if (TREE_CODE (*op1_p) == SSA_NAME)
1114 iv1 = get_iv (data, *op1_p);
1118 if (/* When comparing with non-invariant value, we may not do any senseful
1119 induction variable elimination. */
1121 /* Eliminating condition based on two ivs would be nontrivial.
1122 ??? TODO -- it is not really important to handle this case. */
1123 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1125 find_interesting_uses_op (data, *op0_p);
1126 find_interesting_uses_op (data, *op1_p);
1130 if (zero_p (iv0->step) && zero_p (iv1->step))
1132 /* If both are invariants, this is a work for unswitching. */
1136 civ = xmalloc (sizeof (struct iv));
1137 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1138 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1141 /* Cumulates the steps of indices into DATA and replaces their values with the
1142 initial ones. Returns false when the value of the index cannot be determined.
1143 Callback for for_each_index. */
1145 struct ifs_ivopts_data
1147 struct ivopts_data *ivopts_data;
1153 idx_find_step (tree base, tree *idx, void *data)
1155 struct ifs_ivopts_data *dta = data;
1157 tree step, type, iv_type, iv_step;
1159 if (TREE_CODE (*idx) != SSA_NAME)
1162 iv = get_iv (dta->ivopts_data, *idx);
1171 iv_type = TREE_TYPE (iv->base);
1172 type = build_pointer_type (TREE_TYPE (base));
1173 if (TREE_CODE (base) == ARRAY_REF)
1174 step = array_ref_element_size (base);
1177 /* The step for pointer arithmetics already is 1 byte. */
1178 step = fold_convert (type, integer_one_node);
1181 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1182 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1183 type, iv->base, iv->step, dta->stmt);
1185 iv_step = fold_convert (iv_type, iv->step);
1189 /* The index might wrap. */
1193 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1196 *dta->step_p = step;
1198 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1203 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1204 object is passed to it in DATA. */
1207 idx_record_use (tree base ATTRIBUTE_UNUSED, tree *idx,
1210 find_interesting_uses_op (data, *idx);
1214 /* Finds addresses in *OP_P inside STMT. */
1217 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1219 tree base = unshare_expr (*op_p), step = NULL;
1221 struct ifs_ivopts_data ifs_ivopts_data;
1223 /* Ignore bitfields for now. Not really something terribly complicated
1225 if (TREE_CODE (base) == COMPONENT_REF
1226 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1229 ifs_ivopts_data.ivopts_data = data;
1230 ifs_ivopts_data.stmt = stmt;
1231 ifs_ivopts_data.step_p = &step;
1232 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1236 if (TREE_CODE (base) == INDIRECT_REF)
1237 base = TREE_OPERAND (base, 0);
1239 base = build_addr (base);
1241 civ = alloc_iv (base, step);
1242 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1246 for_each_index (op_p, idx_record_use, data);
1249 /* Finds and records invariants used in STMT. */
1252 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1254 use_optype uses = NULL;
1258 if (TREE_CODE (stmt) == PHI_NODE)
1259 n = PHI_NUM_ARGS (stmt);
1262 get_stmt_operands (stmt);
1263 uses = STMT_USE_OPS (stmt);
1264 n = NUM_USES (uses);
1267 for (i = 0; i < n; i++)
1269 if (TREE_CODE (stmt) == PHI_NODE)
1270 op = PHI_ARG_DEF (stmt, i);
1272 op = USE_OP (uses, i);
1274 record_invariant (data, op, false);
1278 /* Finds interesting uses of induction variables in the statement STMT. */
1281 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1285 use_optype uses = NULL;
1288 find_invariants_stmt (data, stmt);
1290 if (TREE_CODE (stmt) == COND_EXPR)
1292 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1296 if (TREE_CODE (stmt) == MODIFY_EXPR)
1298 lhs = TREE_OPERAND (stmt, 0);
1299 rhs = TREE_OPERAND (stmt, 1);
1301 if (TREE_CODE (lhs) == SSA_NAME)
1303 /* If the statement defines an induction variable, the uses are not
1304 interesting by themselves. */
1306 iv = get_iv (data, lhs);
1308 if (iv && !zero_p (iv->step))
1312 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1315 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1319 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1320 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r')
1321 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1327 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r')
1329 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1330 find_interesting_uses_op (data, rhs);
1335 if (TREE_CODE (stmt) == PHI_NODE
1336 && bb_for_stmt (stmt) == data->current_loop->header)
1338 lhs = PHI_RESULT (stmt);
1339 iv = get_iv (data, lhs);
1341 if (iv && !zero_p (iv->step))
1345 if (TREE_CODE (stmt) == PHI_NODE)
1346 n = PHI_NUM_ARGS (stmt);
1349 uses = STMT_USE_OPS (stmt);
1350 n = NUM_USES (uses);
1353 for (i = 0; i < n; i++)
1355 if (TREE_CODE (stmt) == PHI_NODE)
1356 op = PHI_ARG_DEF (stmt, i);
1358 op = USE_OP (uses, i);
1360 if (TREE_CODE (op) != SSA_NAME)
1363 iv = get_iv (data, op);
1367 find_interesting_uses_op (data, op);
1371 /* Finds interesting uses of induction variables outside of loops
1372 on loop exit edge EXIT. */
1375 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1379 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1381 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1382 find_interesting_uses_outer (data, def);
1386 /* Finds uses of the induction variables that are interesting. */
1389 find_interesting_uses (struct ivopts_data *data)
1392 block_stmt_iterator bsi;
1394 basic_block *body = get_loop_body (data->current_loop);
1396 struct version_info *info;
1399 if (dump_file && (dump_flags & TDF_DETAILS))
1400 fprintf (dump_file, "Uses:\n\n");
1402 for (i = 0; i < data->current_loop->num_nodes; i++)
1406 for (e = bb->succ; e; e = e->succ_next)
1407 if (e->dest != EXIT_BLOCK_PTR
1408 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1409 find_interesting_uses_outside (data, e);
1411 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1412 find_interesting_uses_stmt (data, phi);
1413 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1414 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1417 if (dump_file && (dump_flags & TDF_DETAILS))
1419 fprintf (dump_file, "\n");
1421 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1423 info = ver_info (data, i);
1426 fprintf (dump_file, " ");
1427 print_generic_expr (dump_file, info->name, TDF_SLIM);
1428 fprintf (dump_file, " is invariant (%d)%s\n",
1429 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1433 fprintf (dump_file, "\n");
1439 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1440 position to POS. If USE is not NULL, the candidate is set as related to
1441 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1442 replacement of the final value of the iv by a direct computation. */
1444 static struct iv_cand *
1445 add_candidate_1 (struct ivopts_data *data,
1446 tree base, tree step, bool important, enum iv_position pos,
1447 struct iv_use *use, tree incremented_at)
1450 struct iv_cand *cand = NULL;
1455 type = TREE_TYPE (base);
1456 if (!TYPE_UNSIGNED (type))
1458 type = unsigned_type_for (type);
1459 base = fold_convert (type, base);
1461 step = fold_convert (type, step);
1465 for (i = 0; i < n_iv_cands (data); i++)
1467 cand = iv_cand (data, i);
1469 if (cand->pos != pos)
1472 if (cand->incremented_at != incremented_at)
1486 if (!operand_equal_p (base, cand->iv->base, 0))
1489 if (zero_p (cand->iv->step))
1496 if (step && operand_equal_p (step, cand->iv->step, 0))
1501 if (i == n_iv_cands (data))
1503 cand = xcalloc (1, sizeof (struct iv_cand));
1509 cand->iv = alloc_iv (base, step);
1512 if (pos != IP_ORIGINAL && cand->iv)
1514 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1515 cand->var_after = cand->var_before;
1517 cand->important = important;
1518 cand->incremented_at = incremented_at;
1519 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1521 if (dump_file && (dump_flags & TDF_DETAILS))
1522 dump_cand (dump_file, cand);
1525 if (important && !cand->important)
1527 cand->important = true;
1528 if (dump_file && (dump_flags & TDF_DETAILS))
1529 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1534 bitmap_set_bit (use->related_cands, i);
1535 if (dump_file && (dump_flags & TDF_DETAILS))
1536 fprintf (dump_file, "Candidate %d is related to use %d\n",
1543 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1544 position to POS. If USE is not NULL, the candidate is set as related to
1545 it. The candidate computation is scheduled on all available positions. */
1548 add_candidate (struct ivopts_data *data,
1549 tree base, tree step, bool important, struct iv_use *use)
1551 if (ip_normal_pos (data->current_loop))
1552 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1553 if (ip_end_pos (data->current_loop))
1554 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1557 /* Adds standard iv candidates. */
1560 add_standard_iv_candidates (struct ivopts_data *data)
1562 /* Add 0 + 1 * iteration candidate. */
1563 add_candidate (data,
1564 fold_convert (unsigned_type_node, integer_zero_node),
1565 fold_convert (unsigned_type_node, integer_one_node),
1568 /* The same for a long type. */
1569 add_candidate (data,
1570 fold_convert (long_unsigned_type_node, integer_zero_node),
1571 fold_convert (long_unsigned_type_node, integer_one_node),
1576 /* Adds candidates bases on the old induction variable IV. */
1579 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1582 struct iv_cand *cand;
1584 add_candidate (data, iv->base, iv->step, true, NULL);
1586 /* The same, but with initial value zero. */
1587 add_candidate (data,
1588 fold_convert (TREE_TYPE (iv->base), integer_zero_node),
1589 iv->step, true, NULL);
1591 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1592 if (TREE_CODE (phi) == PHI_NODE)
1594 /* Additionally record the possibility of leaving the original iv
1596 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1597 cand = add_candidate_1 (data,
1598 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1599 SSA_NAME_DEF_STMT (def));
1600 cand->var_before = iv->ssa_name;
1601 cand->var_after = def;
1605 /* Adds candidates based on the old induction variables. */
1608 add_old_ivs_candidates (struct ivopts_data *data)
1613 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1615 iv = ver_info (data, i)->iv;
1616 if (iv && iv->biv_p && !zero_p (iv->step))
1617 add_old_iv_candidates (data, iv);
1621 /* Adds candidates based on the value of the induction variable IV and USE. */
1624 add_iv_value_candidates (struct ivopts_data *data,
1625 struct iv *iv, struct iv_use *use)
1627 add_candidate (data, iv->base, iv->step, false, use);
1629 /* The same, but with initial value zero. */
1630 add_candidate (data,
1631 fold_convert (TREE_TYPE (iv->base), integer_zero_node),
1632 iv->step, false, use);
1635 /* Adds candidates based on the address IV and USE. */
1638 add_address_candidates (struct ivopts_data *data,
1639 struct iv *iv, struct iv_use *use)
1641 tree base, abase, tmp, *act;
1643 /* First, the trivial choices. */
1644 add_iv_value_candidates (data, iv, use);
1646 /* Second, try removing the COMPONENT_REFs. */
1647 if (TREE_CODE (iv->base) == ADDR_EXPR)
1649 base = TREE_OPERAND (iv->base, 0);
1650 while (TREE_CODE (base) == COMPONENT_REF
1651 || (TREE_CODE (base) == ARRAY_REF
1652 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1653 base = TREE_OPERAND (base, 0);
1655 if (base != TREE_OPERAND (iv->base, 0))
1657 if (TREE_CODE (base) == INDIRECT_REF)
1658 base = TREE_OPERAND (base, 0);
1660 base = build_addr (base);
1661 add_candidate (data, base, iv->step, false, use);
1665 /* Third, try removing the constant offset. */
1667 while (TREE_CODE (abase) == PLUS_EXPR
1668 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1669 abase = TREE_OPERAND (abase, 0);
1670 /* We found the offset, so make the copy of the non-shared part and
1672 if (TREE_CODE (abase) == PLUS_EXPR)
1677 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1679 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1680 NULL_TREE, TREE_OPERAND (tmp, 1));
1681 act = &TREE_OPERAND (*act, 0);
1683 *act = TREE_OPERAND (tmp, 0);
1685 add_candidate (data, base, iv->step, false, use);
1689 /* Possibly adds pseudocandidate for replacing the final value of USE by
1690 a direct computation. */
1693 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1695 struct tree_niter_desc *niter;
1696 struct loop *loop = data->current_loop;
1698 /* We must know where we exit the loop and how many times does it roll. */
1699 if (!single_dom_exit (loop))
1702 niter = &loop_data (loop)->niter;
1704 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1705 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1708 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1711 /* Adds candidates based on the uses. */
1714 add_derived_ivs_candidates (struct ivopts_data *data)
1718 for (i = 0; i < n_iv_uses (data); i++)
1720 struct iv_use *use = iv_use (data, i);
1727 case USE_NONLINEAR_EXPR:
1729 /* Just add the ivs based on the value of the iv used here. */
1730 add_iv_value_candidates (data, use->iv, use);
1734 add_iv_value_candidates (data, use->iv, use);
1736 /* Additionally, add the pseudocandidate for the possibility to
1737 replace the final value by a direct computation. */
1738 add_iv_outer_candidates (data, use);
1742 add_address_candidates (data, use->iv, use);
1751 /* Finds the candidates for the induction variables. */
1754 find_iv_candidates (struct ivopts_data *data)
1756 /* Add commonly used ivs. */
1757 add_standard_iv_candidates (data);
1759 /* Add old induction variables. */
1760 add_old_ivs_candidates (data);
1762 /* Add induction variables derived from uses. */
1763 add_derived_ivs_candidates (data);
1766 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1767 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1768 we allocate a simple list to every use. */
1771 alloc_use_cost_map (struct ivopts_data *data)
1773 unsigned i, n_imp = 0, size, j;
1775 if (!data->consider_all_candidates)
1777 for (i = 0; i < n_iv_cands (data); i++)
1779 struct iv_cand *cand = iv_cand (data, i);
1780 if (cand->important)
1785 for (i = 0; i < n_iv_uses (data); i++)
1787 struct iv_use *use = iv_use (data, i);
1789 if (data->consider_all_candidates)
1791 size = n_iv_cands (data);
1792 use->n_map_members = size;
1797 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, size++);
1798 use->n_map_members = 0;
1801 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1805 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1806 on invariants DEPENDS_ON. */
1809 set_use_iv_cost (struct ivopts_data *data,
1810 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1816 BITMAP_XFREE (depends_on);
1820 if (data->consider_all_candidates)
1822 use->cost_map[cand->id].cand = cand;
1823 use->cost_map[cand->id].cost = cost;
1824 use->cost_map[cand->id].depends_on = depends_on;
1831 use->cost_map[use->n_map_members].cand = cand;
1832 use->cost_map[use->n_map_members].cost = cost;
1833 use->cost_map[use->n_map_members].depends_on = depends_on;
1834 use->n_map_members++;
1837 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1841 get_use_iv_cost (struct ivopts_data *data,
1842 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1849 if (data->consider_all_candidates)
1853 for (i = 0; i < use->n_map_members; i++)
1854 if (use->cost_map[i].cand == cand)
1857 if (i == use->n_map_members)
1862 *depends_on = use->cost_map[i].depends_on;
1863 return use->cost_map[i].cost;
1866 /* Returns estimate on cost of computing SEQ. */
1874 for (; seq; seq = NEXT_INSN (seq))
1876 set = single_set (seq);
1878 cost += rtx_cost (set, SET);
1886 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
1887 walk_tree. DATA contains the actual fake register number. */
1890 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
1892 tree obj = NULL_TREE;
1896 switch (TREE_CODE (*expr_p))
1900 obj = SSA_NAME_VAR (*expr_p);
1901 if (!DECL_RTL_SET_P (obj))
1902 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1911 if (DECL_RTL_SET_P (obj))
1914 if (DECL_MODE (obj) == BLKmode)
1916 if (TREE_STATIC (obj)
1917 || DECL_EXTERNAL (obj))
1919 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
1920 x = gen_rtx_SYMBOL_REF (Pmode, name);
1923 x = gen_raw_REG (Pmode, (*regno)++);
1925 x = gen_rtx_MEM (DECL_MODE (obj), x);
1928 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1938 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
1939 SET_DECL_RTL (obj, x);
1945 /* Determines cost of the computation of EXPR. */
1948 computation_cost (tree expr)
1951 tree type = TREE_TYPE (expr);
1955 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
1957 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
1961 cost = seq_cost (seq);
1962 if (GET_CODE (rslt) == MEM)
1963 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
1968 /* Returns variable containing the value of candidate CAND at statement AT. */
1971 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
1973 if (stmt_after_increment (loop, cand, stmt))
1974 return cand->var_after;
1976 return cand->var_before;
1979 /* Determines the expression by that USE is expressed from induction variable
1980 CAND at statement AT in LOOP. */
1983 get_computation_at (struct loop *loop,
1984 struct iv_use *use, struct iv_cand *cand, tree at)
1986 tree ubase = use->iv->base, ustep = use->iv->step;
1987 tree cbase = cand->iv->base, cstep = cand->iv->step;
1988 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
1992 unsigned HOST_WIDE_INT ustepi, cstepi;
1993 HOST_WIDE_INT ratioi;
1995 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
1997 /* We do not have a precision to express the values of use. */
2001 expr = var_at_stmt (loop, cand, at);
2003 if (TREE_TYPE (expr) != ctype)
2005 /* This may happen with the original ivs. */
2006 expr = fold_convert (ctype, expr);
2009 if (TYPE_UNSIGNED (utype))
2013 uutype = unsigned_type_for (utype);
2014 ubase = fold_convert (uutype, ubase);
2015 ustep = fold_convert (uutype, ustep);
2018 if (uutype != ctype)
2020 expr = fold_convert (uutype, expr);
2021 cbase = fold_convert (uutype, cbase);
2022 cstep = fold_convert (uutype, cstep);
2025 if (!cst_and_fits_in_hwi (cstep)
2026 || !cst_and_fits_in_hwi (ustep))
2029 ustepi = int_cst_value (ustep);
2030 cstepi = int_cst_value (cstep);
2032 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2034 /* TODO maybe consider case when ustep divides cstep and the ratio is
2035 a power of 2 (so that the division is fast to execute)? We would
2036 need to be much more careful with overflows etc. then. */
2040 /* We may need to shift the value if we are after the increment. */
2041 if (stmt_after_increment (loop, cand, at))
2042 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2044 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2045 or |ratio| == 1, it is better to handle this like
2047 ubase - ratio * cbase + ratio * var. */
2051 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2052 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2054 else if (ratioi == -1)
2056 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2057 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2059 else if (TREE_CODE (cbase) == INTEGER_CST)
2061 ratio = build_int_cst_type (uutype, ratioi);
2062 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2063 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2064 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2065 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2069 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2070 ratio = build_int_cst_type (uutype, ratioi);
2071 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2072 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2075 return fold_convert (utype, expr);
2078 /* Determines the expression by that USE is expressed from induction variable
2082 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2084 return get_computation_at (loop, use, cand, use->stmt);
2087 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2090 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2093 enum tree_code code;
2097 if (cst_and_fits_in_hwi (*expr))
2099 *offset += int_cst_value (*expr);
2100 *expr = integer_zero_node;
2104 code = TREE_CODE (*expr);
2106 if (code != PLUS_EXPR && code != MINUS_EXPR)
2109 op0 = TREE_OPERAND (*expr, 0);
2110 op1 = TREE_OPERAND (*expr, 1);
2112 if (cst_and_fits_in_hwi (op1))
2114 if (code == PLUS_EXPR)
2115 *offset += int_cst_value (op1);
2117 *offset -= int_cst_value (op1);
2123 if (code != PLUS_EXPR)
2126 if (!cst_and_fits_in_hwi (op0))
2129 *offset += int_cst_value (op0);
2134 /* Returns cost of addition in MODE. */
2137 add_cost (enum machine_mode mode)
2139 static unsigned costs[NUM_MACHINE_MODES];
2147 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2148 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2149 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2154 cost = seq_cost (seq);
2160 if (dump_file && (dump_flags & TDF_DETAILS))
2161 fprintf (dump_file, "Addition in %s costs %d\n",
2162 GET_MODE_NAME (mode), cost);
2166 /* Entry in a hashtable of already known costs for multiplication. */
2169 HOST_WIDE_INT cst; /* The constant to multiply by. */
2170 enum machine_mode mode; /* In mode. */
2171 unsigned cost; /* The cost. */
2174 /* Counts hash value for the ENTRY. */
2177 mbc_entry_hash (const void *entry)
2179 const struct mbc_entry *e = entry;
2181 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2184 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2187 mbc_entry_eq (const void *entry1, const void *entry2)
2189 const struct mbc_entry *e1 = entry1;
2190 const struct mbc_entry *e2 = entry2;
2192 return (e1->mode == e2->mode
2193 && e1->cst == e2->cst);
2196 /* Returns cost of multiplication by constant CST in MODE. */
2199 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2201 static htab_t costs;
2202 struct mbc_entry **cached, act;
2207 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2211 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2213 return (*cached)->cost;
2215 *cached = xmalloc (sizeof (struct mbc_entry));
2216 (*cached)->mode = mode;
2217 (*cached)->cst = cst;
2220 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2225 cost = seq_cost (seq);
2227 if (dump_file && (dump_flags & TDF_DETAILS))
2228 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2229 (int) cst, GET_MODE_NAME (mode), cost);
2231 (*cached)->cost = cost;
2236 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2237 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2238 variable is omitted. The created memory accesses MODE.
2240 TODO -- there must be some better way. This all is quite crude. */
2243 get_address_cost (bool symbol_present, bool var_present,
2244 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2246 #define MAX_RATIO 128
2247 static sbitmap valid_mult;
2248 static HOST_WIDE_INT rat, off;
2249 static HOST_WIDE_INT min_offset, max_offset;
2250 static unsigned costs[2][2][2][2];
2251 unsigned cost, acost;
2252 rtx seq, addr, base;
2253 bool offset_p, ratio_p;
2255 HOST_WIDE_INT s_offset;
2256 unsigned HOST_WIDE_INT mask;
2263 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2265 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2266 for (i = 1; i <= 1 << 20; i <<= 1)
2268 XEXP (addr, 1) = GEN_INT (i);
2269 if (!memory_address_p (Pmode, addr))
2272 max_offset = i >> 1;
2275 for (i = 1; i <= 1 << 20; i <<= 1)
2277 XEXP (addr, 1) = GEN_INT (-i);
2278 if (!memory_address_p (Pmode, addr))
2281 min_offset = -(i >> 1);
2283 if (dump_file && (dump_flags & TDF_DETAILS))
2285 fprintf (dump_file, "get_address_cost:\n");
2286 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2287 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2290 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2291 sbitmap_zero (valid_mult);
2293 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2294 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2296 XEXP (addr, 1) = GEN_INT (i);
2297 if (memory_address_p (Pmode, addr))
2299 SET_BIT (valid_mult, i + MAX_RATIO);
2304 if (dump_file && (dump_flags & TDF_DETAILS))
2306 fprintf (dump_file, " allowed multipliers:");
2307 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2308 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2309 fprintf (dump_file, " %d", (int) i);
2310 fprintf (dump_file, "\n");
2311 fprintf (dump_file, "\n");
2315 bits = GET_MODE_BITSIZE (Pmode);
2316 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2318 if ((offset >> (bits - 1) & 1))
2323 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2324 ratio_p = (ratio != 1
2325 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2326 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2328 if (ratio != 1 && !ratio_p)
2329 cost += multiply_by_cost (ratio, Pmode);
2331 if (s_offset && !offset_p && !symbol_present)
2333 cost += add_cost (Pmode);
2337 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2342 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2343 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2345 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2349 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2351 base = gen_rtx_fmt_e (CONST, Pmode,
2352 gen_rtx_fmt_ee (PLUS, Pmode,
2356 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2359 else if (var_present)
2363 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2366 base = GEN_INT (off);
2371 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2374 addr = memory_address (Pmode, addr);
2378 acost = seq_cost (seq);
2379 acost += address_cost (addr, Pmode);
2383 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2386 return cost + acost;
2389 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2390 the bitmap to that we should store it. */
2392 static struct ivopts_data *fd_ivopts_data;
2394 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2396 bitmap *depends_on = data;
2397 struct version_info *info;
2399 if (TREE_CODE (*expr_p) != SSA_NAME)
2401 info = name_info (fd_ivopts_data, *expr_p);
2403 if (!info->inv_id || info->has_nonlin_use)
2407 *depends_on = BITMAP_XMALLOC ();
2408 bitmap_set_bit (*depends_on, info->inv_id);
2413 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2414 invariants the computation depends on. */
2417 force_var_cost (struct ivopts_data *data,
2418 tree expr, bitmap *depends_on)
2420 static bool costs_initialized = false;
2421 static unsigned integer_cost;
2422 static unsigned symbol_cost;
2423 static unsigned address_cost;
2425 if (!costs_initialized)
2427 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2428 rtx x = gen_rtx_MEM (DECL_MODE (var),
2429 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2431 tree type = build_pointer_type (integer_type_node);
2433 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2436 SET_DECL_RTL (var, x);
2437 TREE_STATIC (var) = 1;
2438 addr = build1 (ADDR_EXPR, type, var);
2439 symbol_cost = computation_cost (addr) + 1;
2442 = computation_cost (build2 (PLUS_EXPR, type,
2444 build_int_cst_type (type, 2000))) + 1;
2445 if (dump_file && (dump_flags & TDF_DETAILS))
2447 fprintf (dump_file, "force_var_cost:\n");
2448 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2449 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2450 fprintf (dump_file, " address %d\n", (int) address_cost);
2451 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2452 fprintf (dump_file, "\n");
2455 costs_initialized = true;
2460 fd_ivopts_data = data;
2461 walk_tree (&expr, find_depends, depends_on, NULL);
2464 if (SSA_VAR_P (expr))
2467 if (TREE_INVARIANT (expr))
2469 if (TREE_CODE (expr) == INTEGER_CST)
2470 return integer_cost;
2472 if (TREE_CODE (expr) == ADDR_EXPR)
2474 tree obj = TREE_OPERAND (expr, 0);
2476 if (TREE_CODE (obj) == VAR_DECL
2477 || TREE_CODE (obj) == PARM_DECL
2478 || TREE_CODE (obj) == RESULT_DECL)
2482 return address_cost;
2485 /* Just an arbitrary value, FIXME. */
2486 return target_spill_cost;
2489 /* Peels a single layer of ADDR. If DIFF is not NULL, do it only if the
2490 offset is constant and add the offset to DIFF. */
2493 peel_address (tree addr, unsigned HOST_WIDE_INT *diff)
2496 HOST_WIDE_INT bit_offset;
2498 switch (TREE_CODE (addr))
2512 off = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (addr, 1));
2513 bit_offset = TREE_INT_CST_LOW (off);
2515 if (bit_offset % BITS_PER_UNIT)
2519 *diff += bit_offset / BITS_PER_UNIT;
2521 return TREE_OPERAND (addr, 0);
2524 off = TREE_OPERAND (addr, 1);
2528 if (!cst_and_fits_in_hwi (off))
2531 size = TYPE_SIZE_UNIT (TREE_TYPE (addr));
2532 if (!cst_and_fits_in_hwi (size))
2535 *diff += TREE_INT_CST_LOW (off) * TREE_INT_CST_LOW (size);
2538 return TREE_OPERAND (addr, 0);
2545 /* Checks whether E1 and E2 have constant difference, and if they do,
2546 store it in *DIFF. */
2549 ptr_difference_const (tree e1, tree e2, unsigned HOST_WIDE_INT *diff)
2553 unsigned HOST_WIDE_INT delta1 = 0, delta2 = 0;
2555 /* Find depths of E1 and E2. */
2556 for (x = e1; x; x = peel_address (x, NULL))
2558 for (x = e2; x; x = peel_address (x, NULL))
2561 for (; e1 && d1 > d2; e1 = peel_address (e1, &delta1))
2563 for (; e2 && d2 > d1; e2 = peel_address (e2, &delta2))
2566 while (e1 && e2 && !operand_equal_p (e1, e2, 0))
2568 e1 = peel_address (e1, &delta1);
2569 e2 = peel_address (e2, &delta1);
2575 *diff = delta1 - delta2;
2579 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2580 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2581 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2582 invariants the computation depends on. */
2585 split_address_cost (struct ivopts_data *data,
2586 tree addr, bool *symbol_present, bool *var_present,
2587 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2592 && TREE_CODE (core) != VAR_DECL)
2593 core = peel_address (core, offset);
2597 *symbol_present = false;
2598 *var_present = true;
2599 fd_ivopts_data = data;
2600 walk_tree (&addr, find_depends, depends_on, NULL);
2601 return target_spill_cost;
2604 if (TREE_STATIC (core)
2605 || DECL_EXTERNAL (core))
2607 *symbol_present = true;
2608 *var_present = false;
2612 *symbol_present = false;
2613 *var_present = true;
2617 /* Estimates cost of expressing difference of addresses E1 - E2 as
2618 var + symbol + offset. The value of offset is added to OFFSET,
2619 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2620 part is missing. DEPENDS_ON is a set of the invariants the computation
2624 ptr_difference_cost (struct ivopts_data *data,
2625 tree e1, tree e2, bool *symbol_present, bool *var_present,
2626 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2628 unsigned HOST_WIDE_INT diff = 0;
2631 if (TREE_CODE (e1) != ADDR_EXPR)
2634 if (TREE_CODE (e2) == ADDR_EXPR
2635 && ptr_difference_const (TREE_OPERAND (e1, 0),
2636 TREE_OPERAND (e2, 0), &diff))
2639 *symbol_present = false;
2640 *var_present = false;
2644 if (e2 == integer_zero_node)
2645 return split_address_cost (data, TREE_OPERAND (e1, 0),
2646 symbol_present, var_present, offset, depends_on);
2648 *symbol_present = false;
2649 *var_present = true;
2651 cost = force_var_cost (data, e1, depends_on);
2652 cost += force_var_cost (data, e2, depends_on);
2653 cost += add_cost (Pmode);
2658 /* Estimates cost of expressing difference E1 - E2 as
2659 var + symbol + offset. The value of offset is added to OFFSET,
2660 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2661 part is missing. DEPENDS_ON is a set of the invariants the computation
2665 difference_cost (struct ivopts_data *data,
2666 tree e1, tree e2, bool *symbol_present, bool *var_present,
2667 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2670 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2672 strip_offset (&e1, offset);
2674 strip_offset (&e2, offset);
2677 if (TREE_CODE (e1) == ADDR_EXPR)
2678 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2680 *symbol_present = false;
2682 if (operand_equal_p (e1, e2, 0))
2684 *var_present = false;
2687 *var_present = true;
2689 return force_var_cost (data, e1, depends_on);
2693 cost = force_var_cost (data, e2, depends_on);
2694 cost += multiply_by_cost (-1, mode);
2699 cost = force_var_cost (data, e1, depends_on);
2700 cost += force_var_cost (data, e2, depends_on);
2701 cost += add_cost (mode);
2706 /* Determines the cost of the computation by that USE is expressed
2707 from induction variable CAND. If ADDRESS_P is true, we just need
2708 to create an address from it, otherwise we want to get it into
2709 register. A set of invariants we depend on is stored in
2710 DEPENDS_ON. AT is the statement at that the value is computed. */
2713 get_computation_cost_at (struct ivopts_data *data,
2714 struct iv_use *use, struct iv_cand *cand,
2715 bool address_p, bitmap *depends_on, tree at)
2717 tree ubase = use->iv->base, ustep = use->iv->step;
2719 tree utype = TREE_TYPE (ubase), ctype;
2720 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2721 HOST_WIDE_INT ratio, aratio;
2722 bool var_present, symbol_present;
2723 unsigned cost = 0, n_sums;
2727 /* Only consider real candidates. */
2731 cbase = cand->iv->base;
2732 cstep = cand->iv->step;
2733 ctype = TREE_TYPE (cbase);
2735 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2737 /* We do not have a precision to express the values of use. */
2741 if (!cst_and_fits_in_hwi (ustep)
2742 || !cst_and_fits_in_hwi (cstep))
2745 if (TREE_CODE (ubase) == INTEGER_CST
2746 && !cst_and_fits_in_hwi (ubase))
2749 if (TREE_CODE (cbase) == INTEGER_CST
2750 && !cst_and_fits_in_hwi (cbase))
2753 ustepi = int_cst_value (ustep);
2754 cstepi = int_cst_value (cstep);
2756 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2758 /* TODO -- add direct handling of this case. */
2762 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2765 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2766 or ratio == 1, it is better to handle this like
2768 ubase - ratio * cbase + ratio * var
2770 (also holds in the case ratio == -1, TODO. */
2772 if (TREE_CODE (cbase) == INTEGER_CST)
2774 offset = - ratio * int_cst_value (cbase);
2775 cost += difference_cost (data,
2776 ubase, integer_zero_node,
2777 &symbol_present, &var_present, &offset,
2780 else if (ratio == 1)
2782 cost += difference_cost (data,
2784 &symbol_present, &var_present, &offset,
2789 cost += force_var_cost (data, cbase, depends_on);
2790 cost += add_cost (TYPE_MODE (ctype));
2791 cost += difference_cost (data,
2792 ubase, integer_zero_node,
2793 &symbol_present, &var_present, &offset,
2797 /* If we are after the increment, the value of the candidate is higher by
2799 if (stmt_after_increment (data->current_loop, cand, at))
2800 offset -= ratio * cstepi;
2802 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2803 (symbol/var/const parts may be omitted). If we are looking for an address,
2804 find the cost of addressing this. */
2806 return get_address_cost (symbol_present, var_present, offset, ratio);
2808 /* Otherwise estimate the costs for computing the expression. */
2809 aratio = ratio > 0 ? ratio : -ratio;
2810 if (!symbol_present && !var_present && !offset)
2813 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2819 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2823 /* Symbol + offset should be compile-time computable. */
2824 && (symbol_present || offset))
2827 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2831 /* Just get the expression, expand it and measure the cost. */
2832 tree comp = get_computation_at (data->current_loop, use, cand, at);
2838 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2840 return computation_cost (comp);
2844 /* Determines the cost of the computation by that USE is expressed
2845 from induction variable CAND. If ADDRESS_P is true, we just need
2846 to create an address from it, otherwise we want to get it into
2847 register. A set of invariants we depend on is stored in
2851 get_computation_cost (struct ivopts_data *data,
2852 struct iv_use *use, struct iv_cand *cand,
2853 bool address_p, bitmap *depends_on)
2855 return get_computation_cost_at (data,
2856 use, cand, address_p, depends_on, use->stmt);
2859 /* Determines cost of basing replacement of USE on CAND in a generic
2863 determine_use_iv_cost_generic (struct ivopts_data *data,
2864 struct iv_use *use, struct iv_cand *cand)
2867 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2869 set_use_iv_cost (data, use, cand, cost, depends_on);
2872 /* Determines cost of basing replacement of USE on CAND in an address. */
2875 determine_use_iv_cost_address (struct ivopts_data *data,
2876 struct iv_use *use, struct iv_cand *cand)
2879 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2881 set_use_iv_cost (data, use, cand, cost, depends_on);
2884 /* Computes value of induction variable IV in iteration NITER. */
2887 iv_value (struct iv *iv, tree niter)
2890 tree type = TREE_TYPE (iv->base);
2892 niter = fold_convert (type, niter);
2893 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
2895 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2898 /* Computes value of candidate CAND at position AT in iteration NITER. */
2901 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2903 tree val = iv_value (cand->iv, niter);
2904 tree type = TREE_TYPE (cand->iv->base);
2906 if (stmt_after_increment (loop, cand, at))
2907 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
2912 /* Check whether it is possible to express the condition in USE by comparison
2913 of candidate CAND. If so, store the comparison code to COMPARE and the
2914 value compared with to BOUND. */
2917 may_eliminate_iv (struct loop *loop,
2918 struct iv_use *use, struct iv_cand *cand,
2919 enum tree_code *compare, tree *bound)
2922 struct tree_niter_desc *niter, new_niter;
2923 tree wider_type, type, base;
2925 /* For now just very primitive -- we work just for the single exit condition,
2926 and are quite conservative about the possible overflows. TODO -- both of
2927 these can be improved. */
2928 exit = single_dom_exit (loop);
2931 if (use->stmt != last_stmt (exit->src))
2934 niter = &loop_data (loop)->niter;
2936 || !integer_nonzerop (niter->assumptions)
2937 || !integer_zerop (niter->may_be_zero))
2940 if (exit->flags & EDGE_TRUE_VALUE)
2945 *bound = cand_value_at (loop, cand, use->stmt, niter->niter);
2947 /* Let us check there is not some problem with overflows, by checking that
2948 the number of iterations is unchanged. */
2949 base = cand->iv->base;
2950 type = TREE_TYPE (base);
2951 if (stmt_after_increment (loop, cand, use->stmt))
2952 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
2954 new_niter.niter = NULL_TREE;
2955 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
2956 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
2958 if (!new_niter.niter
2959 || !integer_nonzerop (new_niter.assumptions)
2960 || !integer_zerop (new_niter.may_be_zero))
2963 wider_type = TREE_TYPE (new_niter.niter);
2964 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter->niter)))
2965 wider_type = TREE_TYPE (niter->niter);
2966 if (!operand_equal_p (fold_convert (wider_type, niter->niter),
2967 fold_convert (wider_type, new_niter.niter), 0))
2973 /* Determines cost of basing replacement of USE on CAND in a condition. */
2976 determine_use_iv_cost_condition (struct ivopts_data *data,
2977 struct iv_use *use, struct iv_cand *cand)
2980 enum tree_code compare;
2982 /* Only consider real candidates. */
2985 set_use_iv_cost (data, use, cand, INFTY, NULL);
2989 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
2991 bitmap depends_on = NULL;
2992 unsigned cost = force_var_cost (data, bound, &depends_on);
2994 set_use_iv_cost (data, use, cand, cost, depends_on);
2998 /* The induction variable elimination failed; just express the original
2999 giv. If it is compared with an invariant, note that we cannot get
3001 if (TREE_CODE (*use->op_p) == SSA_NAME)
3002 record_invariant (data, *use->op_p, true);
3005 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3006 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3009 determine_use_iv_cost_generic (data, use, cand);
3012 /* Checks whether it is possible to replace the final value of USE by
3013 a direct computation. If so, the formula is stored to *VALUE. */
3016 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3019 struct tree_niter_desc *niter;
3021 exit = single_dom_exit (loop);
3025 if (!dominated_by_p (CDI_DOMINATORS, exit->src,
3026 bb_for_stmt (use->stmt)))
3029 niter = &loop_data (loop)->niter;
3031 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3032 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3035 *value = iv_value (use->iv, niter->niter);
3040 /* Determines cost of replacing final value of USE using CAND. */
3043 determine_use_iv_cost_outer (struct ivopts_data *data,
3044 struct iv_use *use, struct iv_cand *cand)
3050 struct loop *loop = data->current_loop;
3054 if (!may_replace_final_value (loop, use, &value))
3056 set_use_iv_cost (data, use, cand, INFTY, NULL);
3061 cost = force_var_cost (data, value, &depends_on);
3063 cost /= AVG_LOOP_NITER (loop);
3065 set_use_iv_cost (data, use, cand, cost, depends_on);
3069 exit = single_dom_exit (loop);
3072 /* If there is just a single exit, we may use value of the candidate
3073 after we take it to determine the value of use. */
3074 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3075 last_stmt (exit->src));
3077 cost /= AVG_LOOP_NITER (loop);
3081 /* Otherwise we just need to compute the iv. */
3082 cost = get_computation_cost (data, use, cand, false, &depends_on);
3085 set_use_iv_cost (data, use, cand, cost, depends_on);
3088 /* Determines cost of basing replacement of USE on CAND. */
3091 determine_use_iv_cost (struct ivopts_data *data,
3092 struct iv_use *use, struct iv_cand *cand)
3096 case USE_NONLINEAR_EXPR:
3097 determine_use_iv_cost_generic (data, use, cand);
3101 determine_use_iv_cost_outer (data, use, cand);
3105 determine_use_iv_cost_address (data, use, cand);
3109 determine_use_iv_cost_condition (data, use, cand);
3117 /* Determines costs of basing the use of the iv on an iv candidate. */
3120 determine_use_iv_costs (struct ivopts_data *data)
3124 struct iv_cand *cand;
3126 data->consider_all_candidates = (n_iv_cands (data)
3127 <= CONSIDER_ALL_CANDIDATES_BOUND);
3129 alloc_use_cost_map (data);
3131 if (!data->consider_all_candidates)
3133 /* Add the important candidate entries. */
3134 for (j = 0; j < n_iv_cands (data); j++)
3136 cand = iv_cand (data, j);
3137 if (!cand->important)
3139 for (i = 0; i < n_iv_uses (data); i++)
3141 use = iv_use (data, i);
3142 determine_use_iv_cost (data, use, cand);
3147 for (i = 0; i < n_iv_uses (data); i++)
3149 use = iv_use (data, i);
3151 if (data->consider_all_candidates)
3153 for (j = 0; j < n_iv_cands (data); j++)
3155 cand = iv_cand (data, j);
3156 determine_use_iv_cost (data, use, cand);
3161 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j,
3163 cand = iv_cand (data, j);
3164 if (!cand->important)
3165 determine_use_iv_cost (data, use, cand);
3170 if (dump_file && (dump_flags & TDF_DETAILS))
3172 fprintf (dump_file, "Use-candidate costs:\n");
3174 for (i = 0; i < n_iv_uses (data); i++)
3176 use = iv_use (data, i);
3178 fprintf (dump_file, "Use %d:\n", i);
3179 fprintf (dump_file, " cand\tcost\tdepends on\n");
3180 for (j = 0; j < use->n_map_members; j++)
3182 if (!use->cost_map[j].cand
3183 || use->cost_map[j].cost == INFTY)
3186 fprintf (dump_file, " %d\t%d\t",
3187 use->cost_map[j].cand->id,
3188 use->cost_map[j].cost);
3189 if (use->cost_map[j].depends_on)
3190 bitmap_print (dump_file,
3191 use->cost_map[j].depends_on, "","");
3192 fprintf (dump_file, "\n");
3195 fprintf (dump_file, "\n");
3197 fprintf (dump_file, "\n");
3201 /* Determines cost of the candidate CAND. */
3204 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3206 unsigned cost_base, cost_step;
3216 /* There are two costs associated with the candidate -- its increment
3217 and its initialization. The second is almost negligible for any loop
3218 that rolls enough, so we take it just very little into account. */
3220 base = cand->iv->base;
3221 cost_base = force_var_cost (data, base, NULL);
3222 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3224 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3226 /* Prefer the original iv unless we may gain something by replacing it. */
3227 if (cand->pos == IP_ORIGINAL)
3230 /* Prefer not to insert statements into latch unless there are some
3231 already (so that we do not create unnecessary jumps). */
3232 if (cand->pos == IP_END)
3234 bb = ip_end_pos (data->current_loop);
3235 last = last_stmt (bb);
3238 || TREE_CODE (last) == LABEL_EXPR)
3243 /* Determines costs of computation of the candidates. */
3246 determine_iv_costs (struct ivopts_data *data)
3250 if (dump_file && (dump_flags & TDF_DETAILS))
3252 fprintf (dump_file, "Candidate costs:\n");
3253 fprintf (dump_file, " cand\tcost\n");
3256 for (i = 0; i < n_iv_cands (data); i++)
3258 struct iv_cand *cand = iv_cand (data, i);
3260 determine_iv_cost (data, cand);
3262 if (dump_file && (dump_flags & TDF_DETAILS))
3263 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3266 if (dump_file && (dump_flags & TDF_DETAILS))
3267 fprintf (dump_file, "\n");
3270 /* Calculates cost for having SIZE induction variables. */
3273 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3275 return global_cost_for_size (size,
3276 loop_data (data->current_loop)->regs_used,
3280 /* For each size of the induction variable set determine the penalty. */
3283 determine_set_costs (struct ivopts_data *data)
3287 struct loop *loop = data->current_loop;
3289 /* We use the following model (definitely improvable, especially the
3290 cost function -- TODO):
3292 We estimate the number of registers available (using MD data), name it A.
3294 We estimate the number of registers used by the loop, name it U. This
3295 number is obtained as the number of loop phi nodes (not counting virtual
3296 registers and bivs) + the number of variables from outside of the loop.
3298 We set a reserve R (free regs that are used for temporary computations,
3299 etc.). For now the reserve is a constant 3.
3301 Let I be the number of induction variables.
3303 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3304 make a lot of ivs without a reason).
3305 -- if A - R < U + I <= A, the cost is I * PRES_COST
3306 -- if U + I > A, the cost is I * PRES_COST and
3307 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3309 if (dump_file && (dump_flags & TDF_DETAILS))
3311 fprintf (dump_file, "Global costs:\n");
3312 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3313 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3314 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3315 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3319 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3321 op = PHI_RESULT (phi);
3323 if (!is_gimple_reg (op))
3326 if (get_iv (data, op))
3332 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
3334 struct version_info *info = ver_info (data, j);
3336 if (info->inv_id && info->has_nonlin_use)
3340 loop_data (loop)->regs_used = n;
3341 if (dump_file && (dump_flags & TDF_DETAILS))
3342 fprintf (dump_file, " regs_used %d\n", n);
3344 if (dump_file && (dump_flags & TDF_DETAILS))
3346 fprintf (dump_file, " cost for size:\n");
3347 fprintf (dump_file, " ivs\tcost\n");
3348 for (j = 0; j <= 2 * target_avail_regs; j++)
3349 fprintf (dump_file, " %d\t%d\n", j,
3350 ivopts_global_cost_for_size (data, j));
3351 fprintf (dump_file, "\n");
3355 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3356 taken from the set SOL and they may depend on invariants in the set INV.
3357 The really used candidate and invariants are noted to USED_IVS and
3361 find_best_candidate (struct ivopts_data *data,
3362 struct iv_use *use, bitmap sol, bitmap inv,
3363 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3366 unsigned best_cost = INFTY, cost;
3367 struct iv_cand *cnd = NULL, *acnd;
3368 bitmap depends_on = NULL, asol;
3370 if (data->consider_all_candidates)
3374 asol = BITMAP_XMALLOC ();
3375 bitmap_a_and_b (asol, sol, use->related_cands);
3378 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c,
3380 acnd = iv_cand (data, c);
3381 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3385 if (cost > best_cost)
3387 if (cost == best_cost)
3389 /* Prefer the cheaper iv. */
3390 if (acnd->cost >= cnd->cost)
3396 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d,
3399 bitmap_a_or_b (used_inv, used_inv, depends_on);
3407 if (cnd && used_ivs)
3408 bitmap_set_bit (used_ivs, cnd->id);
3413 if (!data->consider_all_candidates)
3414 BITMAP_XFREE (asol);
3419 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3420 induction variable candidates and invariants from the sets. Only
3421 uses 0 .. MAX_USE - 1 are taken into account. */
3424 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3428 unsigned cost = 0, size = 0, acost;
3430 struct iv_cand *cand;
3431 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3433 for (i = 0; i < max_use; i++)
3435 use = iv_use (data, i);
3436 acost = find_best_candidate (data, use, sol, inv,
3437 used_ivs, used_inv, NULL);
3440 BITMAP_XFREE (used_ivs);
3441 BITMAP_XFREE (used_inv);
3447 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i,
3449 cand = iv_cand (data, i);
3451 /* Do not count the pseudocandidates. */
3457 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, size++);
3458 cost += ivopts_global_cost_for_size (data, size);
3460 bitmap_copy (sol, used_ivs);
3461 bitmap_copy (inv, used_inv);
3463 BITMAP_XFREE (used_ivs);
3464 BITMAP_XFREE (used_inv);
3469 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3470 induction variable candidates and invariants from the sets. */
3473 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3475 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3478 /* Tries to extend the sets IVS and INV in the best possible way in order
3479 to express the USE. */
3482 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3485 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3486 bitmap best_ivs = BITMAP_XMALLOC ();
3487 bitmap best_inv = BITMAP_XMALLOC ();
3488 bitmap act_ivs = BITMAP_XMALLOC ();
3489 bitmap act_inv = BITMAP_XMALLOC ();
3491 struct cost_pair *cp;
3493 bitmap_copy (best_ivs, ivs);
3494 bitmap_copy (best_inv, inv);
3496 for (i = 0; i < use->n_map_members; i++)
3498 cp = use->cost_map + i;
3499 if (cp->cost == INFTY)
3502 bitmap_copy (act_ivs, ivs);
3503 bitmap_set_bit (act_ivs, cp->cand->id);
3505 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3507 bitmap_copy (act_inv, inv);
3508 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3510 if (act_cost < best_cost)
3512 best_cost = act_cost;
3513 bitmap_copy (best_ivs, act_ivs);
3514 bitmap_copy (best_inv, act_inv);
3518 bitmap_copy (ivs, best_ivs);
3519 bitmap_copy (inv, best_inv);
3521 BITMAP_XFREE (best_ivs);
3522 BITMAP_XFREE (best_inv);
3523 BITMAP_XFREE (act_ivs);
3524 BITMAP_XFREE (act_inv);
3526 return (best_cost != INFTY);
3529 /* Finds an initial set of IVS and invariants INV. We do this by simply
3530 choosing the best candidate for each use. */
3533 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3537 for (i = 0; i < n_iv_uses (data); i++)
3538 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3541 return set_cost (data, ivs, inv);
3544 /* Tries to improve set of induction variables IVS and invariants INV to get
3545 it better than COST. */
3548 try_improve_iv_set (struct ivopts_data *data,
3549 bitmap ivs, bitmap inv, unsigned *cost)
3552 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3553 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3555 /* Try altering the set of induction variables by one. */
3556 for (i = 0; i < n_iv_cands (data); i++)
3558 bitmap_copy (new_ivs, ivs);
3559 bitmap_copy (new_inv, inv);
3561 if (bitmap_bit_p (ivs, i))
3562 bitmap_clear_bit (new_ivs, i);
3564 bitmap_set_bit (new_ivs, i);
3566 acost = set_cost (data, new_ivs, new_inv);
3572 best_new_ivs = BITMAP_XMALLOC ();
3573 best_new_inv = BITMAP_XMALLOC ();
3577 bitmap_copy (best_new_ivs, new_ivs);
3578 bitmap_copy (best_new_inv, new_inv);
3581 /* Ditto for invariants. */
3582 for (i = 1; i <= data->max_inv_id; i++)
3584 if (ver_info (data, i)->has_nonlin_use)
3587 bitmap_copy (new_ivs, ivs);
3588 bitmap_copy (new_inv, inv);
3590 if (bitmap_bit_p (inv, i))
3591 bitmap_clear_bit (new_inv, i);
3593 bitmap_set_bit (new_inv, i);
3595 acost = set_cost (data, new_ivs, new_inv);
3601 best_new_ivs = BITMAP_XMALLOC ();
3602 best_new_inv = BITMAP_XMALLOC ();
3606 bitmap_copy (best_new_ivs, new_ivs);
3607 bitmap_copy (best_new_inv, new_inv);
3610 BITMAP_XFREE (new_ivs);
3611 BITMAP_XFREE (new_inv);
3616 bitmap_copy (ivs, best_new_ivs);
3617 bitmap_copy (inv, best_new_inv);
3618 BITMAP_XFREE (best_new_ivs);
3619 BITMAP_XFREE (best_new_inv);
3623 /* Attempts to find the optimal set of induction variables. We do simple
3624 greedy heuristic -- we try to replace at most one candidate in the selected
3625 solution and remove the unused ivs while this improves the cost. */
3628 find_optimal_iv_set (struct ivopts_data *data)
3631 bitmap set = BITMAP_XMALLOC ();
3632 bitmap inv = BITMAP_XMALLOC ();
3635 /* Set the upper bound. */
3636 cost = get_initial_solution (data, set, inv);
3639 if (dump_file && (dump_flags & TDF_DETAILS))
3640 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3646 if (dump_file && (dump_flags & TDF_DETAILS))
3648 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3649 bitmap_print (dump_file, set, "", "");
3650 fprintf (dump_file, " invariants ");
3651 bitmap_print (dump_file, inv, "", "");
3652 fprintf (dump_file, "\n");
3655 while (try_improve_iv_set (data, set, inv, &cost))
3657 if (dump_file && (dump_flags & TDF_DETAILS))
3659 fprintf (dump_file, "Improved to (cost %d): ", cost);
3660 bitmap_print (dump_file, set, "", "");
3661 fprintf (dump_file, " invariants ");
3662 bitmap_print (dump_file, inv, "", "");
3663 fprintf (dump_file, "\n");
3667 if (dump_file && (dump_flags & TDF_DETAILS))
3668 fprintf (dump_file, "Final cost %d\n\n", cost);
3670 for (i = 0; i < n_iv_uses (data); i++)
3672 use = iv_use (data, i);
3673 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3681 /* Creates a new induction variable corresponding to CAND. */
3684 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3686 block_stmt_iterator incr_pos;
3696 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3700 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3705 /* Mark that the iv is preserved. */
3706 name_info (data, cand->var_before)->preserve_biv = true;
3707 name_info (data, cand->var_after)->preserve_biv = true;
3709 /* Rewrite the increment so that it uses var_before directly. */
3710 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3715 gimple_add_tmp_var (cand->var_before);
3716 add_referenced_tmp_var (cand->var_before);
3718 base = unshare_expr (cand->iv->base);
3720 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3721 &incr_pos, after, &cand->var_before, &cand->var_after);
3724 /* Creates new induction variables described in SET. */
3727 create_new_ivs (struct ivopts_data *data, bitmap set)
3730 struct iv_cand *cand;
3732 EXECUTE_IF_SET_IN_BITMAP (set, 0, i,
3734 cand = iv_cand (data, i);
3735 create_new_iv (data, cand);
3739 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3740 is true, remove also the ssa name defined by the statement. */
3743 remove_statement (tree stmt, bool including_defined_name)
3745 if (TREE_CODE (stmt) == PHI_NODE)
3747 if (!including_defined_name)
3749 /* Prevent the ssa name defined by the statement from being removed. */
3750 SET_PHI_RESULT (stmt, NULL);
3752 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3756 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3762 /* Rewrites USE (definition of iv used in a nonlinear expression)
3763 using candidate CAND. */
3766 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3767 struct iv_use *use, struct iv_cand *cand)
3769 tree comp = unshare_expr (get_computation (data->current_loop,
3771 tree op, stmts, tgt, ass;
3772 block_stmt_iterator bsi, pbsi;
3774 if (TREE_CODE (use->stmt) == PHI_NODE)
3776 tgt = PHI_RESULT (use->stmt);
3778 /* If we should keep the biv, do not replace it. */
3779 if (name_info (data, tgt)->preserve_biv)
3782 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3783 while (!bsi_end_p (pbsi)
3784 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3790 else if (TREE_CODE (use->stmt) == MODIFY_EXPR)
3792 tgt = TREE_OPERAND (use->stmt, 0);
3793 bsi = stmt_for_bsi (use->stmt);
3798 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3800 if (TREE_CODE (use->stmt) == PHI_NODE)
3803 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3804 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3805 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3806 remove_statement (use->stmt, false);
3807 SSA_NAME_DEF_STMT (tgt) = ass;
3812 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3813 TREE_OPERAND (use->stmt, 1) = op;
3817 /* Replaces ssa name in index IDX by its basic variable. Callback for
3821 idx_remove_ssa_names (tree base ATTRIBUTE_UNUSED, tree *idx,
3822 void *data ATTRIBUTE_UNUSED)
3824 if (TREE_CODE (*idx) == SSA_NAME)
3825 *idx = SSA_NAME_VAR (*idx);
3829 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3832 unshare_and_remove_ssa_names (tree ref)
3834 ref = unshare_expr (ref);
3835 for_each_index (&ref, idx_remove_ssa_names, NULL);
3840 /* Rewrites base of memory access OP with expression WITH in statement
3841 pointed to by BSI. */
3844 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3846 tree var = get_base_address (*op), new_var, new_name, copy, name;
3849 if (!var || TREE_CODE (with) != SSA_NAME)
3852 if (TREE_CODE (var) == INDIRECT_REF)
3853 var = TREE_OPERAND (var, 0);
3854 if (TREE_CODE (var) == SSA_NAME)
3857 var = SSA_NAME_VAR (var);
3859 else if (DECL_P (var))
3864 if (var_ann (var)->type_mem_tag)
3865 var = var_ann (var)->type_mem_tag;
3867 /* We need to add a memory tag for the variable. But we do not want
3868 to add it to the temporary used for the computations, since this leads
3869 to problems in redundancy elimination when there are common parts
3870 in two computations referring to the different arrays. So we copy
3871 the variable to a new temporary. */
3872 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
3874 new_name = duplicate_ssa_name (name, copy);
3877 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
3878 add_referenced_tmp_var (new_var);
3879 var_ann (new_var)->type_mem_tag = var;
3880 new_name = make_ssa_name (new_var, copy);
3882 TREE_OPERAND (copy, 0) = new_name;
3883 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
3889 if (TREE_CODE (*op) == INDIRECT_REF)
3890 orig = REF_ORIGINAL (*op);
3892 orig = unshare_and_remove_ssa_names (*op);
3894 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
3895 /* Record the original reference, for purposes of alias analysis. */
3896 REF_ORIGINAL (*op) = orig;
3899 /* Rewrites USE (address that is an iv) using candidate CAND. */
3902 rewrite_use_address (struct ivopts_data *data,
3903 struct iv_use *use, struct iv_cand *cand)
3905 tree comp = unshare_expr (get_computation (data->current_loop,
3907 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3909 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
3912 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3914 rewrite_address_base (&bsi, use->op_p, op);
3917 /* Rewrites USE (the condition such that one of the arguments is an iv) using
3921 rewrite_use_compare (struct ivopts_data *data,
3922 struct iv_use *use, struct iv_cand *cand)
3925 tree *op_p, cond, op, stmts, bound;
3926 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3927 enum tree_code compare;
3929 if (may_eliminate_iv (data->current_loop,
3930 use, cand, &compare, &bound))
3932 op = force_gimple_operand (unshare_expr (bound), &stmts,
3936 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3938 *use->op_p = build2 (compare, boolean_type_node,
3939 var_at_stmt (data->current_loop,
3940 cand, use->stmt), op);
3941 modify_stmt (use->stmt);
3945 /* The induction variable elimination failed; just express the original
3947 comp = unshare_expr (get_computation (data->current_loop, use, cand));
3950 op_p = &TREE_OPERAND (cond, 0);
3951 if (TREE_CODE (*op_p) != SSA_NAME
3952 || zero_p (get_iv (data, *op_p)->step))
3953 op_p = &TREE_OPERAND (cond, 1);
3955 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
3957 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3962 /* Ensure that operand *OP_P may be used at the end of EXIT without
3963 violating loop closed ssa form. */
3966 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
3969 struct loop *def_loop;
3972 use = USE_FROM_PTR (op_p);
3973 if (TREE_CODE (use) != SSA_NAME)
3976 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
3980 def_loop = def_bb->loop_father;
3981 if (flow_bb_inside_loop_p (def_loop, exit->dest))
3984 /* Try finding a phi node that copies the value out of the loop. */
3985 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
3986 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
3991 /* Create such a phi node. */
3992 tree new_name = duplicate_ssa_name (use, NULL);
3994 phi = create_phi_node (new_name, exit->dest);
3995 SSA_NAME_DEF_STMT (new_name) = phi;
3996 add_phi_arg (&phi, use, exit);
3999 SET_USE (op_p, PHI_RESULT (phi));
4002 /* Ensure that operands of STMT may be used at the end of EXIT without
4003 violating loop closed ssa form. */
4006 protect_loop_closed_ssa_form (edge exit, tree stmt)
4010 v_may_def_optype v_may_defs;
4013 get_stmt_operands (stmt);
4015 uses = STMT_USE_OPS (stmt);
4016 for (i = 0; i < NUM_USES (uses); i++)
4017 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4019 vuses = STMT_VUSE_OPS (stmt);
4020 for (i = 0; i < NUM_VUSES (vuses); i++)
4021 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4023 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4024 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4025 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4028 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4029 so that they are emitted on the correct place, and so that the loop closed
4030 ssa form is preserved. */
4033 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4035 tree_stmt_iterator tsi;
4036 block_stmt_iterator bsi;
4037 tree phi, stmt, def, next;
4039 if (exit->dest->pred->pred_next)
4040 split_loop_exit_edge (exit);
4042 if (TREE_CODE (stmts) == STATEMENT_LIST)
4044 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4045 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4048 protect_loop_closed_ssa_form (exit, stmts);
4050 /* Ensure there is label in exit->dest, so that we can
4052 tree_block_label (exit->dest);
4053 bsi = bsi_after_labels (exit->dest);
4054 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4059 for (phi = phi_nodes (exit->dest); phi; phi = next)
4061 next = TREE_CHAIN (phi);
4063 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4065 def = PHI_RESULT (phi);
4066 remove_statement (phi, false);
4067 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4069 SSA_NAME_DEF_STMT (def) = stmt;
4070 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4075 /* Rewrites the final value of USE (that is only needed outside of the loop)
4076 using candidate CAND. */
4079 rewrite_use_outer (struct ivopts_data *data,
4080 struct iv_use *use, struct iv_cand *cand)
4083 tree value, op, stmts, tgt;
4086 if (TREE_CODE (use->stmt) == PHI_NODE)
4087 tgt = PHI_RESULT (use->stmt);
4088 else if (TREE_CODE (use->stmt) == MODIFY_EXPR)
4089 tgt = TREE_OPERAND (use->stmt, 0);
4092 exit = single_dom_exit (data->current_loop);
4098 if (!may_replace_final_value (data->current_loop, use, &value))
4102 value = get_computation_at (data->current_loop,
4103 use, cand, last_stmt (exit->src));
4105 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4107 /* If we will preserve the iv anyway and we would need to perform
4108 some computation to replace the final value, do nothing. */
4109 if (stmts && name_info (data, tgt)->preserve_biv)
4112 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4114 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4116 if (USE_FROM_PTR (use_p) == tgt)
4117 SET_USE (use_p, op);
4121 compute_phi_arg_on_exit (exit, stmts, op);
4123 /* Enable removal of the statement. We cannot remove it directly,
4124 since we may still need the aliasing information attached to the
4125 ssa name defined by it. */
4126 name_info (data, tgt)->iv->have_use_for = false;
4130 /* If the variable is going to be preserved anyway, there is nothing to
4132 if (name_info (data, tgt)->preserve_biv)
4135 /* Otherwise we just need to compute the iv. */
4136 rewrite_use_nonlinear_expr (data, use, cand);
4139 /* Rewrites USE using candidate CAND. */
4142 rewrite_use (struct ivopts_data *data,
4143 struct iv_use *use, struct iv_cand *cand)
4147 case USE_NONLINEAR_EXPR:
4148 rewrite_use_nonlinear_expr (data, use, cand);
4152 rewrite_use_outer (data, use, cand);
4156 rewrite_use_address (data, use, cand);
4160 rewrite_use_compare (data, use, cand);
4166 modify_stmt (use->stmt);
4169 /* Rewrite the uses using the selected induction variables. */
4172 rewrite_uses (struct ivopts_data *data)
4175 struct iv_cand *cand;
4178 for (i = 0; i < n_iv_uses (data); i++)
4180 use = iv_use (data, i);
4181 cand = use->selected;
4185 rewrite_use (data, use, cand);
4189 /* Removes the ivs that are not used after rewriting. */
4192 remove_unused_ivs (struct ivopts_data *data)
4196 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
4198 struct version_info *info;
4200 info = ver_info (data, j);
4202 && !zero_p (info->iv->step)
4204 && !info->iv->have_use_for
4205 && !info->preserve_biv)
4206 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4210 /* Frees data allocated by the optimization of a single loop. */
4213 free_loop_data (struct ivopts_data *data)
4217 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
4219 struct version_info *info;
4221 info = ver_info (data, i);
4225 info->has_nonlin_use = false;
4226 info->preserve_biv = false;
4229 bitmap_clear (data->relevant);
4231 for (i = 0; i < n_iv_uses (data); i++)
4233 struct iv_use *use = iv_use (data, i);
4236 BITMAP_XFREE (use->related_cands);
4237 for (j = 0; j < use->n_map_members; j++)
4238 if (use->cost_map[j].depends_on)
4239 BITMAP_XFREE (use->cost_map[j].depends_on);
4240 free (use->cost_map);
4243 VARRAY_POP_ALL (data->iv_uses);
4245 for (i = 0; i < n_iv_cands (data); i++)
4247 struct iv_cand *cand = iv_cand (data, i);
4253 VARRAY_POP_ALL (data->iv_candidates);
4255 if (data->version_info_size < num_ssa_names)
4257 data->version_info_size = 2 * num_ssa_names;
4258 free (data->version_info);
4259 data->version_info = xcalloc (data->version_info_size,
4260 sizeof (struct version_info));
4263 data->max_inv_id = 0;
4265 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4267 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4269 SET_DECL_RTL (obj, NULL_RTX);
4271 VARRAY_POP_ALL (decl_rtl_to_reset);
4274 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4278 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4282 for (i = 1; i < loops->num; i++)
4283 if (loops->parray[i])
4285 free (loops->parray[i]->aux);
4286 loops->parray[i]->aux = NULL;
4289 free_loop_data (data);
4290 free (data->version_info);
4291 BITMAP_XFREE (data->relevant);
4293 VARRAY_FREE (decl_rtl_to_reset);
4294 VARRAY_FREE (data->iv_uses);
4295 VARRAY_FREE (data->iv_candidates);
4298 /* Optimizes the LOOP. Returns true if anything changed. */
4301 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4303 bool changed = false;
4307 data->current_loop = loop;
4309 if (dump_file && (dump_flags & TDF_DETAILS))
4311 fprintf (dump_file, "Processing loop %d\n", loop->num);
4313 exit = single_dom_exit (loop);
4316 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4317 exit->src->index, exit->dest->index);
4318 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4319 fprintf (dump_file, "\n");
4322 fprintf (dump_file, "\n");
4325 /* For each ssa name determines whether it behaves as an induction variable
4327 if (!find_induction_variables (data))
4330 /* Finds interesting uses (item 1). */
4331 find_interesting_uses (data);
4332 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4335 /* Finds candidates for the induction variables (item 2). */
4336 find_iv_candidates (data);
4338 /* Calculates the costs (item 3, part 1). */
4339 determine_use_iv_costs (data);
4340 determine_iv_costs (data);
4341 determine_set_costs (data);
4343 /* Find the optimal set of induction variables (item 3, part 2). */
4344 iv_set = find_optimal_iv_set (data);
4349 /* Create the new induction variables (item 4, part 1). */
4350 create_new_ivs (data, iv_set);
4352 /* Rewrite the uses (item 4, part 2). */
4353 rewrite_uses (data);
4355 /* Remove the ivs that are unused after rewriting. */
4356 remove_unused_ivs (data);
4358 loop_commit_inserts ();
4360 BITMAP_XFREE (iv_set);
4362 /* We have changed the structure of induction variables; it might happen
4363 that definitions in the scev database refer to some of them that were
4368 free_loop_data (data);
4373 /* Main entry point. Optimizes induction variables in LOOPS. */
4376 tree_ssa_iv_optimize (struct loops *loops)
4379 struct ivopts_data data;
4381 tree_ssa_iv_optimize_init (loops, &data);
4383 /* Optimize the loops starting with the innermost ones. */
4384 loop = loops->tree_root;
4388 #ifdef ENABLE_CHECKING
4389 verify_loop_closed_ssa ();
4392 /* Scan the loops, inner ones first. */
4393 while (loop != loops->tree_root)
4395 if (tree_ssa_iv_optimize_loop (&data, loop))
4397 #ifdef ENABLE_CHECKING
4398 verify_loop_closed_ssa ();
4412 tree_ssa_iv_optimize_finalize (loops, &data);