1 /* Induction variable optimizations.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
106 tree base; /* Initial value of the iv. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Information attached to loop. */
128 struct tree_niter_desc niter;
129 /* Number of iterations. */
131 unsigned regs_used; /* Number of registers used. */
137 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
138 USE_OUTER, /* The induction variable is used outside the loop. */
139 USE_ADDRESS, /* Use in an address. */
140 USE_COMPARE /* Use is a compare. */
143 /* The candidate - cost pair. */
146 struct iv_cand *cand; /* The candidate. */
147 unsigned cost; /* The cost. */
148 bitmap depends_on; /* The list of invariants that have to be
155 unsigned id; /* The id of the use. */
156 enum use_type type; /* Type of the use. */
157 struct iv *iv; /* The induction variable it is based on. */
158 tree stmt; /* Statement in that it occurs. */
159 tree *op_p; /* The place where it occurs. */
160 bitmap related_cands; /* The set of "related" iv candidates. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
200 /* The currently optimized loop. */
201 struct loop *current_loop;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
212 /* The maximum invariant id. */
215 /* The uses of induction variables. */
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* Whether to consider just related and important candidates when replacing a
223 bool consider_all_candidates;
226 /* Bound on number of candidates below that all candidates are considered. */
228 #define CONSIDER_ALL_CANDIDATES_BOUND \
229 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
231 /* If there are more iv occurrences, we just give up (it is quite unlikely that
232 optimizing such a loop would help, and it would take ages). */
234 #define MAX_CONSIDERED_USES \
235 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
237 /* The list of trees for that the decl_rtl field must be reset is stored
240 static varray_type decl_rtl_to_reset;
242 /* Number of uses recorded in DATA. */
244 static inline unsigned
245 n_iv_uses (struct ivopts_data *data)
247 return VARRAY_ACTIVE_SIZE (data->iv_uses);
250 /* Ith use recorded in DATA. */
252 static inline struct iv_use *
253 iv_use (struct ivopts_data *data, unsigned i)
255 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
258 /* Number of candidates recorded in DATA. */
260 static inline unsigned
261 n_iv_cands (struct ivopts_data *data)
263 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
266 /* Ith candidate recorded in DATA. */
268 static inline struct iv_cand *
269 iv_cand (struct ivopts_data *data, unsigned i)
271 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
274 /* The data for LOOP. */
276 static inline struct loop_data *
277 loop_data (struct loop *loop)
282 /* The single loop exit if it dominates the latch, NULL otherwise. */
285 single_dom_exit (struct loop *loop)
287 edge exit = loop->single_exit;
292 if (!just_once_each_iteration_p (loop, exit->src))
298 /* Dumps information about the induction variable IV to FILE. */
300 extern void dump_iv (FILE *, struct iv *);
302 dump_iv (FILE *file, struct iv *iv)
304 fprintf (file, "ssa name ");
305 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
306 fprintf (file, "\n");
310 fprintf (file, " base ");
311 print_generic_expr (file, iv->base, TDF_SLIM);
312 fprintf (file, "\n");
314 fprintf (file, " step ");
315 print_generic_expr (file, iv->step, TDF_SLIM);
316 fprintf (file, "\n");
320 fprintf (file, " invariant ");
321 print_generic_expr (file, iv->base, TDF_SLIM);
322 fprintf (file, "\n");
326 fprintf (file, " is a biv\n");
329 /* Dumps information about the USE to FILE. */
331 extern void dump_use (FILE *, struct iv_use *);
333 dump_use (FILE *file, struct iv_use *use)
335 struct iv *iv = use->iv;
337 fprintf (file, "use %d\n", use->id);
341 case USE_NONLINEAR_EXPR:
342 fprintf (file, " generic\n");
346 fprintf (file, " outside\n");
350 fprintf (file, " address\n");
354 fprintf (file, " compare\n");
361 fprintf (file, " in statement ");
362 print_generic_expr (file, use->stmt, TDF_SLIM);
363 fprintf (file, "\n");
365 fprintf (file, " at position ");
367 print_generic_expr (file, *use->op_p, TDF_SLIM);
368 fprintf (file, "\n");
372 fprintf (file, " base ");
373 print_generic_expr (file, iv->base, TDF_SLIM);
374 fprintf (file, "\n");
376 fprintf (file, " step ");
377 print_generic_expr (file, iv->step, TDF_SLIM);
378 fprintf (file, "\n");
382 fprintf (file, " invariant ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
387 fprintf (file, " related candidates ");
388 dump_bitmap (file, use->related_cands);
391 /* Dumps information about the uses to FILE. */
393 extern void dump_uses (FILE *, struct ivopts_data *);
395 dump_uses (FILE *file, struct ivopts_data *data)
400 for (i = 0; i < n_iv_uses (data); i++)
402 use = iv_use (data, i);
404 dump_use (file, use);
405 fprintf (file, "\n");
409 /* Dumps information about induction variable candidate CAND to FILE. */
411 extern void dump_cand (FILE *, struct iv_cand *);
413 dump_cand (FILE *file, struct iv_cand *cand)
415 struct iv *iv = cand->iv;
417 fprintf (file, "candidate %d%s\n",
418 cand->id, cand->important ? " (important)" : "");
422 fprintf (file, " final value replacement\n");
429 fprintf (file, " incremented before exit test\n");
433 fprintf (file, " incremented at end\n");
437 fprintf (file, " original biv\n");
443 fprintf (file, " base ");
444 print_generic_expr (file, iv->base, TDF_SLIM);
445 fprintf (file, "\n");
447 fprintf (file, " step ");
448 print_generic_expr (file, iv->step, TDF_SLIM);
449 fprintf (file, "\n");
453 fprintf (file, " invariant ");
454 print_generic_expr (file, iv->base, TDF_SLIM);
455 fprintf (file, "\n");
459 /* Returns the info for ssa version VER. */
461 static inline struct version_info *
462 ver_info (struct ivopts_data *data, unsigned ver)
464 return data->version_info + ver;
467 /* Returns the info for ssa name NAME. */
469 static inline struct version_info *
470 name_info (struct ivopts_data *data, tree name)
472 return ver_info (data, SSA_NAME_VERSION (name));
475 /* Checks whether there exists number X such that X * B = A, counting modulo
479 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
482 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
483 unsigned HOST_WIDE_INT inv, ex, val;
489 /* First divide the whole equation by 2 as long as possible. */
490 while (!(a & 1) && !(b & 1))
500 /* If b is still even, a is odd and there is no such x. */
504 /* Find the inverse of b. We compute it as
505 b^(2^(bits - 1) - 1) (mod 2^bits). */
508 for (i = 0; i < bits - 1; i++)
510 inv = (inv * ex) & mask;
511 ex = (ex * ex) & mask;
514 val = (a * inv) & mask;
516 if (((val * b) & mask) != a)
519 if ((val >> (bits - 1)) & 1)
527 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
531 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
533 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
538 if (sbb == loop->latch)
544 return stmt == last_stmt (bb);
547 /* Returns true if STMT if after the place where the original induction
548 variable CAND is incremented. */
551 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
553 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
554 basic_block stmt_bb = bb_for_stmt (stmt);
555 block_stmt_iterator bsi;
557 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
560 if (stmt_bb != cand_bb)
563 /* Scan the block from the end, since the original ivs are usually
564 incremented at the end of the loop body. */
565 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
567 if (bsi_stmt (bsi) == cand->incremented_at)
569 if (bsi_stmt (bsi) == stmt)
574 /* Returns true if STMT if after the place where the induction variable
575 CAND is incremented in LOOP. */
578 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
586 return stmt_after_ip_normal_pos (loop, stmt);
589 return stmt_after_ip_original_pos (cand, stmt);
596 /* Initializes data structures used by the iv optimization pass, stored
597 in DATA. LOOPS is the loop tree. */
600 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
604 data->version_info_size = 2 * num_ssa_names;
605 data->version_info = xcalloc (data->version_info_size,
606 sizeof (struct version_info));
607 data->relevant = BITMAP_XMALLOC ();
608 data->max_inv_id = 0;
610 for (i = 1; i < loops->num; i++)
611 if (loops->parray[i])
612 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
614 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
615 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
616 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
619 /* Allocates an induction variable with given initial value BASE and step STEP
623 alloc_iv (tree base, tree step)
625 struct iv *iv = xcalloc (1, sizeof (struct iv));
627 if (step && integer_zerop (step))
633 iv->have_use_for = false;
635 iv->ssa_name = NULL_TREE;
640 /* Sets STEP and BASE for induction variable IV. */
643 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
645 struct version_info *info = name_info (data, iv);
650 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
651 info->iv = alloc_iv (base, step);
652 info->iv->ssa_name = iv;
655 /* Finds induction variable declaration for VAR. */
658 get_iv (struct ivopts_data *data, tree var)
662 if (!name_info (data, var)->iv)
664 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
667 || !flow_bb_inside_loop_p (data->current_loop, bb))
668 set_iv (data, var, var, NULL_TREE);
671 return name_info (data, var)->iv;
674 /* Determines the step of a biv defined in PHI. */
677 determine_biv_step (tree phi)
679 struct loop *loop = bb_for_stmt (phi)->loop_father;
680 tree name = PHI_RESULT (phi), base, step;
681 tree type = TREE_TYPE (name);
683 if (!is_gimple_reg (name))
686 if (!simple_iv (loop, phi, name, &base, &step))
690 return build_int_cst (type, 0);
695 /* Returns false if INDEX is a ssa name that occurs in an
696 abnormal phi node. Callback for for_each_index. */
699 idx_contains_abnormal_ssa_name_p (tree base ATTRIBUTE_UNUSED, tree *index,
700 void *data ATTRIBUTE_UNUSED)
702 if (TREE_CODE (*index) != SSA_NAME)
705 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*index) == 0;
708 /* Returns true if EXPR contains a ssa name that occurs in an
709 abnormal phi node. */
712 contains_abnormal_ssa_name_p (tree expr)
714 enum tree_code code = TREE_CODE (expr);
715 char class = TREE_CODE_CLASS (code);
717 if (code == SSA_NAME)
718 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
720 if (code == INTEGER_CST
721 || is_gimple_min_invariant (expr))
724 if (code == ADDR_EXPR)
725 return !for_each_index (&TREE_OPERAND (expr, 1),
726 idx_contains_abnormal_ssa_name_p,
733 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
738 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
750 /* Finds basic ivs. */
753 find_bivs (struct ivopts_data *data)
755 tree phi, step, type, base;
757 struct loop *loop = data->current_loop;
759 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
761 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
764 step = determine_biv_step (phi);
768 if (cst_and_fits_in_hwi (step)
769 && int_cst_value (step) == 0)
772 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
773 if (contains_abnormal_ssa_name_p (base))
776 type = TREE_TYPE (PHI_RESULT (phi));
777 base = fold_convert (type, base);
778 step = fold_convert (type, step);
780 /* FIXME: We do not handle induction variables whose step does
781 not satisfy cst_and_fits_in_hwi. */
782 if (!cst_and_fits_in_hwi (step))
785 set_iv (data, PHI_RESULT (phi), base, step);
792 /* Marks basic ivs. */
795 mark_bivs (struct ivopts_data *data)
798 struct iv *iv, *incr_iv;
799 struct loop *loop = data->current_loop;
802 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
804 iv = get_iv (data, PHI_RESULT (phi));
808 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
809 incr_iv = get_iv (data, var);
813 /* If the increment is in the subloop, ignore it. */
814 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
815 if (incr_bb->loop_father != data->current_loop
816 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
820 incr_iv->biv_p = true;
824 /* Checks whether STMT defines a linear induction variable and stores its
825 parameters to BASE and STEP. */
828 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
829 tree *base, tree *step)
832 struct loop *loop = data->current_loop;
837 if (TREE_CODE (stmt) != MODIFY_EXPR)
840 lhs = TREE_OPERAND (stmt, 0);
841 if (TREE_CODE (lhs) != SSA_NAME)
844 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
847 /* FIXME: We do not handle induction variables whose step does
848 not satisfy cst_and_fits_in_hwi. */
850 && !cst_and_fits_in_hwi (*step))
853 if (contains_abnormal_ssa_name_p (*base))
859 /* Finds general ivs in statement STMT. */
862 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
866 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
869 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
872 /* Finds general ivs in basic block BB. */
875 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
877 block_stmt_iterator bsi;
879 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
880 find_givs_in_stmt (data, bsi_stmt (bsi));
883 /* Finds general ivs. */
886 find_givs (struct ivopts_data *data)
888 struct loop *loop = data->current_loop;
889 basic_block *body = get_loop_body_in_dom_order (loop);
892 for (i = 0; i < loop->num_nodes; i++)
893 find_givs_in_bb (data, body[i]);
897 /* Determine the number of iterations of the current loop. */
900 determine_number_of_iterations (struct ivopts_data *data)
902 struct loop *loop = data->current_loop;
903 edge exit = single_dom_exit (loop);
908 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
911 /* For each ssa name defined in LOOP determines whether it is an induction
912 variable and if so, its initial value and step. */
915 find_induction_variables (struct ivopts_data *data)
918 struct loop *loop = data->current_loop;
920 if (!find_bivs (data))
925 determine_number_of_iterations (data);
927 if (dump_file && (dump_flags & TDF_DETAILS))
929 if (loop_data (loop)->niter.niter)
931 fprintf (dump_file, " number of iterations ");
932 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
934 fprintf (dump_file, "\n");
936 fprintf (dump_file, " may be zero if ");
937 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
939 fprintf (dump_file, "\n");
941 fprintf (dump_file, " bogus unless ");
942 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
944 fprintf (dump_file, "\n");
945 fprintf (dump_file, "\n");
948 fprintf (dump_file, "Induction variables:\n\n");
950 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
952 if (ver_info (data, i)->iv)
953 dump_iv (dump_file, ver_info (data, i)->iv);
960 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
962 static struct iv_use *
963 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
964 tree stmt, enum use_type use_type)
966 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
968 use->id = n_iv_uses (data);
969 use->type = use_type;
973 use->related_cands = BITMAP_XMALLOC ();
975 if (dump_file && (dump_flags & TDF_DETAILS))
976 dump_use (dump_file, use);
978 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
983 /* Checks whether OP is a loop-level invariant and if so, records it.
984 NONLINEAR_USE is true if the invariant is used in a way we do not
988 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
991 struct version_info *info;
993 if (TREE_CODE (op) != SSA_NAME
994 || !is_gimple_reg (op))
997 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
999 && flow_bb_inside_loop_p (data->current_loop, bb))
1002 info = name_info (data, op);
1004 info->has_nonlin_use |= nonlinear_use;
1006 info->inv_id = ++data->max_inv_id;
1007 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1010 /* Checks whether the use OP is interesting and if so, records it
1013 static struct iv_use *
1014 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1022 if (TREE_CODE (op) != SSA_NAME)
1025 iv = get_iv (data, op);
1029 if (iv->have_use_for)
1031 use = iv_use (data, iv->use_id);
1033 if (use->type != USE_NONLINEAR_EXPR
1034 && use->type != USE_OUTER)
1037 if (type == USE_NONLINEAR_EXPR)
1038 use->type = USE_NONLINEAR_EXPR;
1042 if (zero_p (iv->step))
1044 record_invariant (data, op, true);
1047 iv->have_use_for = true;
1049 civ = xmalloc (sizeof (struct iv));
1052 stmt = SSA_NAME_DEF_STMT (op);
1053 if (TREE_CODE (stmt) != PHI_NODE
1054 && TREE_CODE (stmt) != MODIFY_EXPR)
1057 use = record_use (data, NULL, civ, stmt, type);
1058 iv->use_id = use->id;
1063 /* Checks whether the use OP is interesting and if so, records it. */
1065 static struct iv_use *
1066 find_interesting_uses_op (struct ivopts_data *data, tree op)
1068 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1071 /* Records a definition of induction variable OP that is used outside of the
1074 static struct iv_use *
1075 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1077 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1080 /* Checks whether the condition *COND_P in STMT is interesting
1081 and if so, records it. */
1084 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1088 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1090 tree zero = integer_zero_node;
1092 const_iv.step = NULL_TREE;
1094 if (integer_zerop (*cond_p)
1095 || integer_nonzerop (*cond_p))
1098 if (TREE_CODE (*cond_p) == SSA_NAME)
1105 op0_p = &TREE_OPERAND (*cond_p, 0);
1106 op1_p = &TREE_OPERAND (*cond_p, 1);
1109 if (TREE_CODE (*op0_p) == SSA_NAME)
1110 iv0 = get_iv (data, *op0_p);
1114 if (TREE_CODE (*op1_p) == SSA_NAME)
1115 iv1 = get_iv (data, *op1_p);
1119 if (/* When comparing with non-invariant value, we may not do any senseful
1120 induction variable elimination. */
1122 /* Eliminating condition based on two ivs would be nontrivial.
1123 ??? TODO -- it is not really important to handle this case. */
1124 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1126 find_interesting_uses_op (data, *op0_p);
1127 find_interesting_uses_op (data, *op1_p);
1131 if (zero_p (iv0->step) && zero_p (iv1->step))
1133 /* If both are invariants, this is a work for unswitching. */
1137 civ = xmalloc (sizeof (struct iv));
1138 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1139 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1142 /* Cumulates the steps of indices into DATA and replaces their values with the
1143 initial ones. Returns false when the value of the index cannot be determined.
1144 Callback for for_each_index. */
1146 struct ifs_ivopts_data
1148 struct ivopts_data *ivopts_data;
1154 idx_find_step (tree base, tree *idx, void *data)
1156 struct ifs_ivopts_data *dta = data;
1158 tree step, type, iv_type, iv_step;
1160 if (TREE_CODE (*idx) != SSA_NAME)
1163 iv = get_iv (dta->ivopts_data, *idx);
1172 iv_type = TREE_TYPE (iv->base);
1173 type = build_pointer_type (TREE_TYPE (base));
1174 if (TREE_CODE (base) == ARRAY_REF)
1175 step = array_ref_element_size (base);
1177 /* The step for pointer arithmetics already is 1 byte. */
1178 step = build_int_cst (type, 1);
1180 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1181 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1182 type, iv->base, iv->step, dta->stmt);
1184 iv_step = fold_convert (iv_type, iv->step);
1188 /* The index might wrap. */
1192 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1195 *dta->step_p = step;
1197 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1202 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1203 object is passed to it in DATA. */
1206 idx_record_use (tree base ATTRIBUTE_UNUSED, tree *idx,
1209 find_interesting_uses_op (data, *idx);
1213 /* Finds addresses in *OP_P inside STMT. */
1216 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1218 tree base = unshare_expr (*op_p), step = NULL;
1220 struct ifs_ivopts_data ifs_ivopts_data;
1222 /* Ignore bitfields for now. Not really something terribly complicated
1224 if (TREE_CODE (base) == COMPONENT_REF
1225 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1228 ifs_ivopts_data.ivopts_data = data;
1229 ifs_ivopts_data.stmt = stmt;
1230 ifs_ivopts_data.step_p = &step;
1231 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1235 if (TREE_CODE (base) == INDIRECT_REF)
1236 base = TREE_OPERAND (base, 0);
1238 base = build_addr (base);
1240 civ = alloc_iv (base, step);
1241 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1245 for_each_index (op_p, idx_record_use, data);
1248 /* Finds and records invariants used in STMT. */
1251 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1253 use_optype uses = NULL;
1257 if (TREE_CODE (stmt) == PHI_NODE)
1258 n = PHI_NUM_ARGS (stmt);
1261 get_stmt_operands (stmt);
1262 uses = STMT_USE_OPS (stmt);
1263 n = NUM_USES (uses);
1266 for (i = 0; i < n; i++)
1268 if (TREE_CODE (stmt) == PHI_NODE)
1269 op = PHI_ARG_DEF (stmt, i);
1271 op = USE_OP (uses, i);
1273 record_invariant (data, op, false);
1277 /* Finds interesting uses of induction variables in the statement STMT. */
1280 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1284 use_optype uses = NULL;
1287 find_invariants_stmt (data, stmt);
1289 if (TREE_CODE (stmt) == COND_EXPR)
1291 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1295 if (TREE_CODE (stmt) == MODIFY_EXPR)
1297 lhs = TREE_OPERAND (stmt, 0);
1298 rhs = TREE_OPERAND (stmt, 1);
1300 if (TREE_CODE (lhs) == SSA_NAME)
1302 /* If the statement defines an induction variable, the uses are not
1303 interesting by themselves. */
1305 iv = get_iv (data, lhs);
1307 if (iv && !zero_p (iv->step))
1311 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1314 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1318 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1319 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r')
1320 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1326 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r')
1328 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1329 find_interesting_uses_op (data, rhs);
1334 if (TREE_CODE (stmt) == PHI_NODE
1335 && bb_for_stmt (stmt) == data->current_loop->header)
1337 lhs = PHI_RESULT (stmt);
1338 iv = get_iv (data, lhs);
1340 if (iv && !zero_p (iv->step))
1344 if (TREE_CODE (stmt) == PHI_NODE)
1345 n = PHI_NUM_ARGS (stmt);
1348 uses = STMT_USE_OPS (stmt);
1349 n = NUM_USES (uses);
1352 for (i = 0; i < n; i++)
1354 if (TREE_CODE (stmt) == PHI_NODE)
1355 op = PHI_ARG_DEF (stmt, i);
1357 op = USE_OP (uses, i);
1359 if (TREE_CODE (op) != SSA_NAME)
1362 iv = get_iv (data, op);
1366 find_interesting_uses_op (data, op);
1370 /* Finds interesting uses of induction variables outside of loops
1371 on loop exit edge EXIT. */
1374 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1378 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1380 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1381 find_interesting_uses_outer (data, def);
1385 /* Finds uses of the induction variables that are interesting. */
1388 find_interesting_uses (struct ivopts_data *data)
1391 block_stmt_iterator bsi;
1393 basic_block *body = get_loop_body (data->current_loop);
1395 struct version_info *info;
1398 if (dump_file && (dump_flags & TDF_DETAILS))
1399 fprintf (dump_file, "Uses:\n\n");
1401 for (i = 0; i < data->current_loop->num_nodes; i++)
1405 for (e = bb->succ; e; e = e->succ_next)
1406 if (e->dest != EXIT_BLOCK_PTR
1407 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1408 find_interesting_uses_outside (data, e);
1410 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1411 find_interesting_uses_stmt (data, phi);
1412 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1413 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1416 if (dump_file && (dump_flags & TDF_DETAILS))
1418 fprintf (dump_file, "\n");
1420 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1422 info = ver_info (data, i);
1425 fprintf (dump_file, " ");
1426 print_generic_expr (dump_file, info->name, TDF_SLIM);
1427 fprintf (dump_file, " is invariant (%d)%s\n",
1428 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1432 fprintf (dump_file, "\n");
1438 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1439 position to POS. If USE is not NULL, the candidate is set as related to
1440 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1441 replacement of the final value of the iv by a direct computation. */
1443 static struct iv_cand *
1444 add_candidate_1 (struct ivopts_data *data,
1445 tree base, tree step, bool important, enum iv_position pos,
1446 struct iv_use *use, tree incremented_at)
1449 struct iv_cand *cand = NULL;
1454 type = TREE_TYPE (base);
1455 if (!TYPE_UNSIGNED (type))
1457 type = unsigned_type_for (type);
1458 base = fold_convert (type, base);
1460 step = fold_convert (type, step);
1464 for (i = 0; i < n_iv_cands (data); i++)
1466 cand = iv_cand (data, i);
1468 if (cand->pos != pos)
1471 if (cand->incremented_at != incremented_at)
1485 if (!operand_equal_p (base, cand->iv->base, 0))
1488 if (zero_p (cand->iv->step))
1495 if (step && operand_equal_p (step, cand->iv->step, 0))
1500 if (i == n_iv_cands (data))
1502 cand = xcalloc (1, sizeof (struct iv_cand));
1508 cand->iv = alloc_iv (base, step);
1511 if (pos != IP_ORIGINAL && cand->iv)
1513 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1514 cand->var_after = cand->var_before;
1516 cand->important = important;
1517 cand->incremented_at = incremented_at;
1518 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1520 if (dump_file && (dump_flags & TDF_DETAILS))
1521 dump_cand (dump_file, cand);
1524 if (important && !cand->important)
1526 cand->important = true;
1527 if (dump_file && (dump_flags & TDF_DETAILS))
1528 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1533 bitmap_set_bit (use->related_cands, i);
1534 if (dump_file && (dump_flags & TDF_DETAILS))
1535 fprintf (dump_file, "Candidate %d is related to use %d\n",
1542 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1543 position to POS. If USE is not NULL, the candidate is set as related to
1544 it. The candidate computation is scheduled on all available positions. */
1547 add_candidate (struct ivopts_data *data,
1548 tree base, tree step, bool important, struct iv_use *use)
1550 if (ip_normal_pos (data->current_loop))
1551 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1552 if (ip_end_pos (data->current_loop))
1553 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1556 /* Adds standard iv candidates. */
1559 add_standard_iv_candidates (struct ivopts_data *data)
1561 /* Add 0 + 1 * iteration candidate. */
1562 add_candidate (data,
1563 build_int_cst (unsigned_intSI_type_node, 0),
1564 build_int_cst (unsigned_intSI_type_node, 1),
1567 /* The same for a long type if it is still fast enought. */
1568 if (BITS_PER_WORD > 32)
1569 add_candidate (data,
1570 build_int_cst (unsigned_intDI_type_node, 0),
1571 build_int_cst (unsigned_intDI_type_node, 1),
1576 /* Adds candidates bases on the old induction variable IV. */
1579 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1582 struct iv_cand *cand;
1584 add_candidate (data, iv->base, iv->step, true, NULL);
1586 /* The same, but with initial value zero. */
1587 add_candidate (data,
1588 build_int_cst (TREE_TYPE (iv->base), 0),
1589 iv->step, true, NULL);
1591 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1592 if (TREE_CODE (phi) == PHI_NODE)
1594 /* Additionally record the possibility of leaving the original iv
1596 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1597 cand = add_candidate_1 (data,
1598 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1599 SSA_NAME_DEF_STMT (def));
1600 cand->var_before = iv->ssa_name;
1601 cand->var_after = def;
1605 /* Adds candidates based on the old induction variables. */
1608 add_old_ivs_candidates (struct ivopts_data *data)
1613 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1615 iv = ver_info (data, i)->iv;
1616 if (iv && iv->biv_p && !zero_p (iv->step))
1617 add_old_iv_candidates (data, iv);
1621 /* Adds candidates based on the value of the induction variable IV and USE. */
1624 add_iv_value_candidates (struct ivopts_data *data,
1625 struct iv *iv, struct iv_use *use)
1627 add_candidate (data, iv->base, iv->step, false, use);
1629 /* The same, but with initial value zero. */
1630 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1631 iv->step, false, use);
1634 /* Adds candidates based on the address IV and USE. */
1637 add_address_candidates (struct ivopts_data *data,
1638 struct iv *iv, struct iv_use *use)
1640 tree base, abase, tmp, *act;
1642 /* First, the trivial choices. */
1643 add_iv_value_candidates (data, iv, use);
1645 /* Second, try removing the COMPONENT_REFs. */
1646 if (TREE_CODE (iv->base) == ADDR_EXPR)
1648 base = TREE_OPERAND (iv->base, 0);
1649 while (TREE_CODE (base) == COMPONENT_REF
1650 || (TREE_CODE (base) == ARRAY_REF
1651 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1652 base = TREE_OPERAND (base, 0);
1654 if (base != TREE_OPERAND (iv->base, 0))
1656 if (TREE_CODE (base) == INDIRECT_REF)
1657 base = TREE_OPERAND (base, 0);
1659 base = build_addr (base);
1660 add_candidate (data, base, iv->step, false, use);
1664 /* Third, try removing the constant offset. */
1666 while (TREE_CODE (abase) == PLUS_EXPR
1667 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1668 abase = TREE_OPERAND (abase, 0);
1669 /* We found the offset, so make the copy of the non-shared part and
1671 if (TREE_CODE (abase) == PLUS_EXPR)
1676 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1678 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1679 NULL_TREE, TREE_OPERAND (tmp, 1));
1680 act = &TREE_OPERAND (*act, 0);
1682 *act = TREE_OPERAND (tmp, 0);
1684 add_candidate (data, base, iv->step, false, use);
1688 /* Possibly adds pseudocandidate for replacing the final value of USE by
1689 a direct computation. */
1692 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1694 struct tree_niter_desc *niter;
1695 struct loop *loop = data->current_loop;
1697 /* We must know where we exit the loop and how many times does it roll. */
1698 if (!single_dom_exit (loop))
1701 niter = &loop_data (loop)->niter;
1703 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1704 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1707 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1710 /* Adds candidates based on the uses. */
1713 add_derived_ivs_candidates (struct ivopts_data *data)
1717 for (i = 0; i < n_iv_uses (data); i++)
1719 struct iv_use *use = iv_use (data, i);
1726 case USE_NONLINEAR_EXPR:
1728 /* Just add the ivs based on the value of the iv used here. */
1729 add_iv_value_candidates (data, use->iv, use);
1733 add_iv_value_candidates (data, use->iv, use);
1735 /* Additionally, add the pseudocandidate for the possibility to
1736 replace the final value by a direct computation. */
1737 add_iv_outer_candidates (data, use);
1741 add_address_candidates (data, use->iv, use);
1750 /* Finds the candidates for the induction variables. */
1753 find_iv_candidates (struct ivopts_data *data)
1755 /* Add commonly used ivs. */
1756 add_standard_iv_candidates (data);
1758 /* Add old induction variables. */
1759 add_old_ivs_candidates (data);
1761 /* Add induction variables derived from uses. */
1762 add_derived_ivs_candidates (data);
1765 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1766 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1767 we allocate a simple list to every use. */
1770 alloc_use_cost_map (struct ivopts_data *data)
1772 unsigned i, n_imp = 0, size, j;
1774 if (!data->consider_all_candidates)
1776 for (i = 0; i < n_iv_cands (data); i++)
1778 struct iv_cand *cand = iv_cand (data, i);
1779 if (cand->important)
1784 for (i = 0; i < n_iv_uses (data); i++)
1786 struct iv_use *use = iv_use (data, i);
1788 if (data->consider_all_candidates)
1790 size = n_iv_cands (data);
1791 use->n_map_members = size;
1796 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, size++);
1797 use->n_map_members = 0;
1800 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1804 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1805 on invariants DEPENDS_ON. */
1808 set_use_iv_cost (struct ivopts_data *data,
1809 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1815 BITMAP_XFREE (depends_on);
1819 if (data->consider_all_candidates)
1821 use->cost_map[cand->id].cand = cand;
1822 use->cost_map[cand->id].cost = cost;
1823 use->cost_map[cand->id].depends_on = depends_on;
1830 use->cost_map[use->n_map_members].cand = cand;
1831 use->cost_map[use->n_map_members].cost = cost;
1832 use->cost_map[use->n_map_members].depends_on = depends_on;
1833 use->n_map_members++;
1836 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1840 get_use_iv_cost (struct ivopts_data *data,
1841 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1848 if (data->consider_all_candidates)
1852 for (i = 0; i < use->n_map_members; i++)
1853 if (use->cost_map[i].cand == cand)
1856 if (i == use->n_map_members)
1861 *depends_on = use->cost_map[i].depends_on;
1862 return use->cost_map[i].cost;
1865 /* Returns estimate on cost of computing SEQ. */
1873 for (; seq; seq = NEXT_INSN (seq))
1875 set = single_set (seq);
1877 cost += rtx_cost (set, SET);
1885 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
1887 produce_memory_decl_rtl (tree obj, int *regno)
1892 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
1894 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
1895 x = gen_rtx_SYMBOL_REF (Pmode, name);
1898 x = gen_raw_REG (Pmode, (*regno)++);
1900 return gen_rtx_MEM (DECL_MODE (obj), x);
1903 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
1904 walk_tree. DATA contains the actual fake register number. */
1907 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
1909 tree obj = NULL_TREE;
1913 switch (TREE_CODE (*expr_p))
1916 for (expr_p = &TREE_OPERAND (*expr_p, 0);
1917 (handled_component_p (*expr_p)
1918 || TREE_CODE (*expr_p) == REALPART_EXPR
1919 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
1920 expr_p = &TREE_OPERAND (*expr_p, 0));
1923 x = produce_memory_decl_rtl (obj, regno);
1928 obj = SSA_NAME_VAR (*expr_p);
1929 if (!DECL_RTL_SET_P (obj))
1930 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1939 if (DECL_RTL_SET_P (obj))
1942 if (DECL_MODE (obj) == BLKmode)
1943 x = produce_memory_decl_rtl (obj, regno);
1945 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1955 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
1956 SET_DECL_RTL (obj, x);
1962 /* Determines cost of the computation of EXPR. */
1965 computation_cost (tree expr)
1968 tree type = TREE_TYPE (expr);
1972 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
1974 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
1978 cost = seq_cost (seq);
1979 if (GET_CODE (rslt) == MEM)
1980 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
1985 /* Returns variable containing the value of candidate CAND at statement AT. */
1988 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
1990 if (stmt_after_increment (loop, cand, stmt))
1991 return cand->var_after;
1993 return cand->var_before;
1996 /* Determines the expression by that USE is expressed from induction variable
1997 CAND at statement AT in LOOP. */
2000 get_computation_at (struct loop *loop,
2001 struct iv_use *use, struct iv_cand *cand, tree at)
2003 tree ubase = unsave_expr_now (use->iv->base);
2004 tree ustep = unsave_expr_now (use->iv->step);
2005 tree cbase = unsave_expr_now (cand->iv->base);
2006 tree cstep = unsave_expr_now (cand->iv->step);
2007 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2011 unsigned HOST_WIDE_INT ustepi, cstepi;
2012 HOST_WIDE_INT ratioi;
2014 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2016 /* We do not have a precision to express the values of use. */
2020 expr = var_at_stmt (loop, cand, at);
2022 if (TREE_TYPE (expr) != ctype)
2024 /* This may happen with the original ivs. */
2025 expr = fold_convert (ctype, expr);
2028 if (TYPE_UNSIGNED (utype))
2032 uutype = unsigned_type_for (utype);
2033 ubase = fold_convert (uutype, ubase);
2034 ustep = fold_convert (uutype, ustep);
2037 if (uutype != ctype)
2039 expr = fold_convert (uutype, expr);
2040 cbase = fold_convert (uutype, cbase);
2041 cstep = fold_convert (uutype, cstep);
2044 if (!cst_and_fits_in_hwi (cstep)
2045 || !cst_and_fits_in_hwi (ustep))
2048 ustepi = int_cst_value (ustep);
2049 cstepi = int_cst_value (cstep);
2051 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2053 /* TODO maybe consider case when ustep divides cstep and the ratio is
2054 a power of 2 (so that the division is fast to execute)? We would
2055 need to be much more careful with overflows etc. then. */
2059 /* We may need to shift the value if we are after the increment. */
2060 if (stmt_after_increment (loop, cand, at))
2061 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2063 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2064 or |ratio| == 1, it is better to handle this like
2066 ubase - ratio * cbase + ratio * var. */
2070 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2071 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2073 else if (ratioi == -1)
2075 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2076 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2078 else if (TREE_CODE (cbase) == INTEGER_CST)
2080 ratio = build_int_cst_type (uutype, ratioi);
2081 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2082 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2083 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2084 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2088 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2089 ratio = build_int_cst_type (uutype, ratioi);
2090 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2091 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2094 return fold_convert (utype, expr);
2097 /* Determines the expression by that USE is expressed from induction variable
2101 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2103 return get_computation_at (loop, use, cand, use->stmt);
2106 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2109 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2112 enum tree_code code;
2116 if (cst_and_fits_in_hwi (*expr))
2118 *offset += int_cst_value (*expr);
2119 *expr = integer_zero_node;
2123 code = TREE_CODE (*expr);
2125 if (code != PLUS_EXPR && code != MINUS_EXPR)
2128 op0 = TREE_OPERAND (*expr, 0);
2129 op1 = TREE_OPERAND (*expr, 1);
2131 if (cst_and_fits_in_hwi (op1))
2133 if (code == PLUS_EXPR)
2134 *offset += int_cst_value (op1);
2136 *offset -= int_cst_value (op1);
2142 if (code != PLUS_EXPR)
2145 if (!cst_and_fits_in_hwi (op0))
2148 *offset += int_cst_value (op0);
2153 /* Returns cost of addition in MODE. */
2156 add_cost (enum machine_mode mode)
2158 static unsigned costs[NUM_MACHINE_MODES];
2166 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2167 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2168 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2173 cost = seq_cost (seq);
2179 if (dump_file && (dump_flags & TDF_DETAILS))
2180 fprintf (dump_file, "Addition in %s costs %d\n",
2181 GET_MODE_NAME (mode), cost);
2185 /* Entry in a hashtable of already known costs for multiplication. */
2188 HOST_WIDE_INT cst; /* The constant to multiply by. */
2189 enum machine_mode mode; /* In mode. */
2190 unsigned cost; /* The cost. */
2193 /* Counts hash value for the ENTRY. */
2196 mbc_entry_hash (const void *entry)
2198 const struct mbc_entry *e = entry;
2200 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2203 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2206 mbc_entry_eq (const void *entry1, const void *entry2)
2208 const struct mbc_entry *e1 = entry1;
2209 const struct mbc_entry *e2 = entry2;
2211 return (e1->mode == e2->mode
2212 && e1->cst == e2->cst);
2215 /* Returns cost of multiplication by constant CST in MODE. */
2218 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2220 static htab_t costs;
2221 struct mbc_entry **cached, act;
2226 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2230 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2232 return (*cached)->cost;
2234 *cached = xmalloc (sizeof (struct mbc_entry));
2235 (*cached)->mode = mode;
2236 (*cached)->cst = cst;
2239 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2244 cost = seq_cost (seq);
2246 if (dump_file && (dump_flags & TDF_DETAILS))
2247 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2248 (int) cst, GET_MODE_NAME (mode), cost);
2250 (*cached)->cost = cost;
2255 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2256 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2257 variable is omitted. The created memory accesses MODE.
2259 TODO -- there must be some better way. This all is quite crude. */
2262 get_address_cost (bool symbol_present, bool var_present,
2263 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2265 #define MAX_RATIO 128
2266 static sbitmap valid_mult;
2267 static HOST_WIDE_INT rat, off;
2268 static HOST_WIDE_INT min_offset, max_offset;
2269 static unsigned costs[2][2][2][2];
2270 unsigned cost, acost;
2271 rtx seq, addr, base;
2272 bool offset_p, ratio_p;
2274 HOST_WIDE_INT s_offset;
2275 unsigned HOST_WIDE_INT mask;
2282 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2284 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2285 for (i = 1; i <= 1 << 20; i <<= 1)
2287 XEXP (addr, 1) = GEN_INT (i);
2288 if (!memory_address_p (Pmode, addr))
2291 max_offset = i >> 1;
2294 for (i = 1; i <= 1 << 20; i <<= 1)
2296 XEXP (addr, 1) = GEN_INT (-i);
2297 if (!memory_address_p (Pmode, addr))
2300 min_offset = -(i >> 1);
2302 if (dump_file && (dump_flags & TDF_DETAILS))
2304 fprintf (dump_file, "get_address_cost:\n");
2305 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2306 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2309 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2310 sbitmap_zero (valid_mult);
2312 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2313 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2315 XEXP (addr, 1) = GEN_INT (i);
2316 if (memory_address_p (Pmode, addr))
2318 SET_BIT (valid_mult, i + MAX_RATIO);
2323 if (dump_file && (dump_flags & TDF_DETAILS))
2325 fprintf (dump_file, " allowed multipliers:");
2326 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2327 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2328 fprintf (dump_file, " %d", (int) i);
2329 fprintf (dump_file, "\n");
2330 fprintf (dump_file, "\n");
2334 bits = GET_MODE_BITSIZE (Pmode);
2335 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2337 if ((offset >> (bits - 1) & 1))
2342 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2343 ratio_p = (ratio != 1
2344 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2345 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2347 if (ratio != 1 && !ratio_p)
2348 cost += multiply_by_cost (ratio, Pmode);
2350 if (s_offset && !offset_p && !symbol_present)
2352 cost += add_cost (Pmode);
2356 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2361 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2362 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2364 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2368 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2370 base = gen_rtx_fmt_e (CONST, Pmode,
2371 gen_rtx_fmt_ee (PLUS, Pmode,
2375 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2378 else if (var_present)
2382 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2385 base = GEN_INT (off);
2390 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2393 addr = memory_address (Pmode, addr);
2397 acost = seq_cost (seq);
2398 acost += address_cost (addr, Pmode);
2402 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2405 return cost + acost;
2408 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2409 the bitmap to that we should store it. */
2411 static struct ivopts_data *fd_ivopts_data;
2413 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2415 bitmap *depends_on = data;
2416 struct version_info *info;
2418 if (TREE_CODE (*expr_p) != SSA_NAME)
2420 info = name_info (fd_ivopts_data, *expr_p);
2422 if (!info->inv_id || info->has_nonlin_use)
2426 *depends_on = BITMAP_XMALLOC ();
2427 bitmap_set_bit (*depends_on, info->inv_id);
2432 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2433 invariants the computation depends on. */
2436 force_var_cost (struct ivopts_data *data,
2437 tree expr, bitmap *depends_on)
2439 static bool costs_initialized = false;
2440 static unsigned integer_cost;
2441 static unsigned symbol_cost;
2442 static unsigned address_cost;
2444 if (!costs_initialized)
2446 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2447 rtx x = gen_rtx_MEM (DECL_MODE (var),
2448 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2450 tree type = build_pointer_type (integer_type_node);
2452 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2455 SET_DECL_RTL (var, x);
2456 TREE_STATIC (var) = 1;
2457 addr = build1 (ADDR_EXPR, type, var);
2458 symbol_cost = computation_cost (addr) + 1;
2461 = computation_cost (build2 (PLUS_EXPR, type,
2463 build_int_cst_type (type, 2000))) + 1;
2464 if (dump_file && (dump_flags & TDF_DETAILS))
2466 fprintf (dump_file, "force_var_cost:\n");
2467 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2468 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2469 fprintf (dump_file, " address %d\n", (int) address_cost);
2470 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2471 fprintf (dump_file, "\n");
2474 costs_initialized = true;
2479 fd_ivopts_data = data;
2480 walk_tree (&expr, find_depends, depends_on, NULL);
2483 if (SSA_VAR_P (expr))
2486 if (TREE_INVARIANT (expr))
2488 if (TREE_CODE (expr) == INTEGER_CST)
2489 return integer_cost;
2491 if (TREE_CODE (expr) == ADDR_EXPR)
2493 tree obj = TREE_OPERAND (expr, 0);
2495 if (TREE_CODE (obj) == VAR_DECL
2496 || TREE_CODE (obj) == PARM_DECL
2497 || TREE_CODE (obj) == RESULT_DECL)
2501 return address_cost;
2504 /* Just an arbitrary value, FIXME. */
2505 return target_spill_cost;
2508 /* Peels a single layer of ADDR. If DIFF is not NULL, do it only if the
2509 offset is constant and add the offset to DIFF. */
2512 peel_address (tree addr, unsigned HOST_WIDE_INT *diff)
2515 HOST_WIDE_INT bit_offset;
2517 switch (TREE_CODE (addr))
2531 off = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (addr, 1));
2532 bit_offset = TREE_INT_CST_LOW (off);
2534 if (bit_offset % BITS_PER_UNIT)
2538 *diff += bit_offset / BITS_PER_UNIT;
2540 return TREE_OPERAND (addr, 0);
2543 off = TREE_OPERAND (addr, 1);
2547 if (!cst_and_fits_in_hwi (off))
2550 size = TYPE_SIZE_UNIT (TREE_TYPE (addr));
2551 if (!cst_and_fits_in_hwi (size))
2554 *diff += TREE_INT_CST_LOW (off) * TREE_INT_CST_LOW (size);
2557 return TREE_OPERAND (addr, 0);
2564 /* Checks whether E1 and E2 have constant difference, and if they do,
2565 store it in *DIFF. */
2568 ptr_difference_const (tree e1, tree e2, unsigned HOST_WIDE_INT *diff)
2572 unsigned HOST_WIDE_INT delta1 = 0, delta2 = 0;
2574 /* Find depths of E1 and E2. */
2575 for (x = e1; x; x = peel_address (x, NULL))
2577 for (x = e2; x; x = peel_address (x, NULL))
2580 for (; e1 && d1 > d2; e1 = peel_address (e1, &delta1))
2582 for (; e2 && d2 > d1; e2 = peel_address (e2, &delta2))
2585 while (e1 && e2 && !operand_equal_p (e1, e2, 0))
2587 e1 = peel_address (e1, &delta1);
2588 e2 = peel_address (e2, &delta1);
2594 *diff = delta1 - delta2;
2598 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2599 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2600 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2601 invariants the computation depends on. */
2604 split_address_cost (struct ivopts_data *data,
2605 tree addr, bool *symbol_present, bool *var_present,
2606 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2611 && TREE_CODE (core) != VAR_DECL)
2612 core = peel_address (core, offset);
2616 *symbol_present = false;
2617 *var_present = true;
2618 fd_ivopts_data = data;
2619 walk_tree (&addr, find_depends, depends_on, NULL);
2620 return target_spill_cost;
2623 if (TREE_STATIC (core)
2624 || DECL_EXTERNAL (core))
2626 *symbol_present = true;
2627 *var_present = false;
2631 *symbol_present = false;
2632 *var_present = true;
2636 /* Estimates cost of expressing difference of addresses E1 - E2 as
2637 var + symbol + offset. The value of offset is added to OFFSET,
2638 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2639 part is missing. DEPENDS_ON is a set of the invariants the computation
2643 ptr_difference_cost (struct ivopts_data *data,
2644 tree e1, tree e2, bool *symbol_present, bool *var_present,
2645 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2647 unsigned HOST_WIDE_INT diff = 0;
2650 if (TREE_CODE (e1) != ADDR_EXPR)
2653 if (TREE_CODE (e2) == ADDR_EXPR
2654 && ptr_difference_const (TREE_OPERAND (e1, 0),
2655 TREE_OPERAND (e2, 0), &diff))
2658 *symbol_present = false;
2659 *var_present = false;
2663 if (e2 == integer_zero_node)
2664 return split_address_cost (data, TREE_OPERAND (e1, 0),
2665 symbol_present, var_present, offset, depends_on);
2667 *symbol_present = false;
2668 *var_present = true;
2670 cost = force_var_cost (data, e1, depends_on);
2671 cost += force_var_cost (data, e2, depends_on);
2672 cost += add_cost (Pmode);
2677 /* Estimates cost of expressing difference E1 - E2 as
2678 var + symbol + offset. The value of offset is added to OFFSET,
2679 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2680 part is missing. DEPENDS_ON is a set of the invariants the computation
2684 difference_cost (struct ivopts_data *data,
2685 tree e1, tree e2, bool *symbol_present, bool *var_present,
2686 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2689 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2691 strip_offset (&e1, offset);
2693 strip_offset (&e2, offset);
2696 if (TREE_CODE (e1) == ADDR_EXPR)
2697 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2699 *symbol_present = false;
2701 if (operand_equal_p (e1, e2, 0))
2703 *var_present = false;
2706 *var_present = true;
2708 return force_var_cost (data, e1, depends_on);
2712 cost = force_var_cost (data, e2, depends_on);
2713 cost += multiply_by_cost (-1, mode);
2718 cost = force_var_cost (data, e1, depends_on);
2719 cost += force_var_cost (data, e2, depends_on);
2720 cost += add_cost (mode);
2725 /* Determines the cost of the computation by that USE is expressed
2726 from induction variable CAND. If ADDRESS_P is true, we just need
2727 to create an address from it, otherwise we want to get it into
2728 register. A set of invariants we depend on is stored in
2729 DEPENDS_ON. AT is the statement at that the value is computed. */
2732 get_computation_cost_at (struct ivopts_data *data,
2733 struct iv_use *use, struct iv_cand *cand,
2734 bool address_p, bitmap *depends_on, tree at)
2736 tree ubase = use->iv->base, ustep = use->iv->step;
2738 tree utype = TREE_TYPE (ubase), ctype;
2739 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2740 HOST_WIDE_INT ratio, aratio;
2741 bool var_present, symbol_present;
2742 unsigned cost = 0, n_sums;
2746 /* Only consider real candidates. */
2750 cbase = cand->iv->base;
2751 cstep = cand->iv->step;
2752 ctype = TREE_TYPE (cbase);
2754 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2756 /* We do not have a precision to express the values of use. */
2760 if (!cst_and_fits_in_hwi (ustep)
2761 || !cst_and_fits_in_hwi (cstep))
2764 if (TREE_CODE (ubase) == INTEGER_CST
2765 && !cst_and_fits_in_hwi (ubase))
2768 if (TREE_CODE (cbase) == INTEGER_CST
2769 && !cst_and_fits_in_hwi (cbase))
2772 ustepi = int_cst_value (ustep);
2773 cstepi = int_cst_value (cstep);
2775 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2777 /* TODO -- add direct handling of this case. */
2781 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2784 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2785 or ratio == 1, it is better to handle this like
2787 ubase - ratio * cbase + ratio * var
2789 (also holds in the case ratio == -1, TODO. */
2791 if (TREE_CODE (cbase) == INTEGER_CST)
2793 offset = - ratio * int_cst_value (cbase);
2794 cost += difference_cost (data,
2795 ubase, integer_zero_node,
2796 &symbol_present, &var_present, &offset,
2799 else if (ratio == 1)
2801 cost += difference_cost (data,
2803 &symbol_present, &var_present, &offset,
2808 cost += force_var_cost (data, cbase, depends_on);
2809 cost += add_cost (TYPE_MODE (ctype));
2810 cost += difference_cost (data,
2811 ubase, integer_zero_node,
2812 &symbol_present, &var_present, &offset,
2816 /* If we are after the increment, the value of the candidate is higher by
2818 if (stmt_after_increment (data->current_loop, cand, at))
2819 offset -= ratio * cstepi;
2821 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2822 (symbol/var/const parts may be omitted). If we are looking for an address,
2823 find the cost of addressing this. */
2825 return get_address_cost (symbol_present, var_present, offset, ratio);
2827 /* Otherwise estimate the costs for computing the expression. */
2828 aratio = ratio > 0 ? ratio : -ratio;
2829 if (!symbol_present && !var_present && !offset)
2832 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2838 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2842 /* Symbol + offset should be compile-time computable. */
2843 && (symbol_present || offset))
2846 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2850 /* Just get the expression, expand it and measure the cost. */
2851 tree comp = get_computation_at (data->current_loop, use, cand, at);
2857 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2859 return computation_cost (comp);
2863 /* Determines the cost of the computation by that USE is expressed
2864 from induction variable CAND. If ADDRESS_P is true, we just need
2865 to create an address from it, otherwise we want to get it into
2866 register. A set of invariants we depend on is stored in
2870 get_computation_cost (struct ivopts_data *data,
2871 struct iv_use *use, struct iv_cand *cand,
2872 bool address_p, bitmap *depends_on)
2874 return get_computation_cost_at (data,
2875 use, cand, address_p, depends_on, use->stmt);
2878 /* Determines cost of basing replacement of USE on CAND in a generic
2882 determine_use_iv_cost_generic (struct ivopts_data *data,
2883 struct iv_use *use, struct iv_cand *cand)
2886 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2888 set_use_iv_cost (data, use, cand, cost, depends_on);
2891 /* Determines cost of basing replacement of USE on CAND in an address. */
2894 determine_use_iv_cost_address (struct ivopts_data *data,
2895 struct iv_use *use, struct iv_cand *cand)
2898 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2900 set_use_iv_cost (data, use, cand, cost, depends_on);
2903 /* Computes value of induction variable IV in iteration NITER. */
2906 iv_value (struct iv *iv, tree niter)
2909 tree type = TREE_TYPE (iv->base);
2911 niter = fold_convert (type, niter);
2912 val = fold (build2 (MULT_EXPR, type, iv->step, unsave_expr_now (niter)));
2914 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2917 /* Computes value of candidate CAND at position AT in iteration NITER. */
2920 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2922 tree val = iv_value (cand->iv, niter);
2923 tree type = TREE_TYPE (cand->iv->base);
2925 if (stmt_after_increment (loop, cand, at))
2926 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
2931 /* Check whether it is possible to express the condition in USE by comparison
2932 of candidate CAND. If so, store the comparison code to COMPARE and the
2933 value compared with to BOUND. */
2936 may_eliminate_iv (struct loop *loop,
2937 struct iv_use *use, struct iv_cand *cand,
2938 enum tree_code *compare, tree *bound)
2941 struct tree_niter_desc *niter, new_niter;
2942 tree wider_type, type, base;
2944 /* For now just very primitive -- we work just for the single exit condition,
2945 and are quite conservative about the possible overflows. TODO -- both of
2946 these can be improved. */
2947 exit = single_dom_exit (loop);
2950 if (use->stmt != last_stmt (exit->src))
2953 niter = &loop_data (loop)->niter;
2955 || !integer_nonzerop (niter->assumptions)
2956 || !integer_zerop (niter->may_be_zero))
2959 if (exit->flags & EDGE_TRUE_VALUE)
2964 *bound = cand_value_at (loop, cand, use->stmt, niter->niter);
2966 /* Let us check there is not some problem with overflows, by checking that
2967 the number of iterations is unchanged. */
2968 base = cand->iv->base;
2969 type = TREE_TYPE (base);
2970 if (stmt_after_increment (loop, cand, use->stmt))
2971 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
2973 new_niter.niter = NULL_TREE;
2974 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
2975 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
2977 if (!new_niter.niter
2978 || !integer_nonzerop (new_niter.assumptions)
2979 || !integer_zerop (new_niter.may_be_zero))
2982 wider_type = TREE_TYPE (new_niter.niter);
2983 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter->niter)))
2984 wider_type = TREE_TYPE (niter->niter);
2985 if (!operand_equal_p (fold_convert (wider_type, niter->niter),
2986 fold_convert (wider_type, new_niter.niter), 0))
2992 /* Determines cost of basing replacement of USE on CAND in a condition. */
2995 determine_use_iv_cost_condition (struct ivopts_data *data,
2996 struct iv_use *use, struct iv_cand *cand)
2999 enum tree_code compare;
3001 /* Only consider real candidates. */
3004 set_use_iv_cost (data, use, cand, INFTY, NULL);
3008 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3010 bitmap depends_on = NULL;
3011 unsigned cost = force_var_cost (data, bound, &depends_on);
3013 set_use_iv_cost (data, use, cand, cost, depends_on);
3017 /* The induction variable elimination failed; just express the original
3018 giv. If it is compared with an invariant, note that we cannot get
3020 if (TREE_CODE (*use->op_p) == SSA_NAME)
3021 record_invariant (data, *use->op_p, true);
3024 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3025 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3028 determine_use_iv_cost_generic (data, use, cand);
3031 /* Checks whether it is possible to replace the final value of USE by
3032 a direct computation. If so, the formula is stored to *VALUE. */
3035 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3038 struct tree_niter_desc *niter;
3040 exit = single_dom_exit (loop);
3044 if (!dominated_by_p (CDI_DOMINATORS, exit->src,
3045 bb_for_stmt (use->stmt)))
3048 niter = &loop_data (loop)->niter;
3050 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3051 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3054 *value = iv_value (use->iv, niter->niter);
3059 /* Determines cost of replacing final value of USE using CAND. */
3062 determine_use_iv_cost_outer (struct ivopts_data *data,
3063 struct iv_use *use, struct iv_cand *cand)
3069 struct loop *loop = data->current_loop;
3073 if (!may_replace_final_value (loop, use, &value))
3075 set_use_iv_cost (data, use, cand, INFTY, NULL);
3080 cost = force_var_cost (data, value, &depends_on);
3082 cost /= AVG_LOOP_NITER (loop);
3084 set_use_iv_cost (data, use, cand, cost, depends_on);
3088 exit = single_dom_exit (loop);
3091 /* If there is just a single exit, we may use value of the candidate
3092 after we take it to determine the value of use. */
3093 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3094 last_stmt (exit->src));
3096 cost /= AVG_LOOP_NITER (loop);
3100 /* Otherwise we just need to compute the iv. */
3101 cost = get_computation_cost (data, use, cand, false, &depends_on);
3104 set_use_iv_cost (data, use, cand, cost, depends_on);
3107 /* Determines cost of basing replacement of USE on CAND. */
3110 determine_use_iv_cost (struct ivopts_data *data,
3111 struct iv_use *use, struct iv_cand *cand)
3115 case USE_NONLINEAR_EXPR:
3116 determine_use_iv_cost_generic (data, use, cand);
3120 determine_use_iv_cost_outer (data, use, cand);
3124 determine_use_iv_cost_address (data, use, cand);
3128 determine_use_iv_cost_condition (data, use, cand);
3136 /* Determines costs of basing the use of the iv on an iv candidate. */
3139 determine_use_iv_costs (struct ivopts_data *data)
3143 struct iv_cand *cand;
3145 data->consider_all_candidates = (n_iv_cands (data)
3146 <= CONSIDER_ALL_CANDIDATES_BOUND);
3148 alloc_use_cost_map (data);
3150 if (!data->consider_all_candidates)
3152 /* Add the important candidate entries. */
3153 for (j = 0; j < n_iv_cands (data); j++)
3155 cand = iv_cand (data, j);
3156 if (!cand->important)
3158 for (i = 0; i < n_iv_uses (data); i++)
3160 use = iv_use (data, i);
3161 determine_use_iv_cost (data, use, cand);
3166 for (i = 0; i < n_iv_uses (data); i++)
3168 use = iv_use (data, i);
3170 if (data->consider_all_candidates)
3172 for (j = 0; j < n_iv_cands (data); j++)
3174 cand = iv_cand (data, j);
3175 determine_use_iv_cost (data, use, cand);
3180 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j,
3182 cand = iv_cand (data, j);
3183 if (!cand->important)
3184 determine_use_iv_cost (data, use, cand);
3189 if (dump_file && (dump_flags & TDF_DETAILS))
3191 fprintf (dump_file, "Use-candidate costs:\n");
3193 for (i = 0; i < n_iv_uses (data); i++)
3195 use = iv_use (data, i);
3197 fprintf (dump_file, "Use %d:\n", i);
3198 fprintf (dump_file, " cand\tcost\tdepends on\n");
3199 for (j = 0; j < use->n_map_members; j++)
3201 if (!use->cost_map[j].cand
3202 || use->cost_map[j].cost == INFTY)
3205 fprintf (dump_file, " %d\t%d\t",
3206 use->cost_map[j].cand->id,
3207 use->cost_map[j].cost);
3208 if (use->cost_map[j].depends_on)
3209 bitmap_print (dump_file,
3210 use->cost_map[j].depends_on, "","");
3211 fprintf (dump_file, "\n");
3214 fprintf (dump_file, "\n");
3216 fprintf (dump_file, "\n");
3220 /* Determines cost of the candidate CAND. */
3223 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3225 unsigned cost_base, cost_step;
3235 /* There are two costs associated with the candidate -- its increment
3236 and its initialization. The second is almost negligible for any loop
3237 that rolls enough, so we take it just very little into account. */
3239 base = cand->iv->base;
3240 cost_base = force_var_cost (data, base, NULL);
3241 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3243 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3245 /* Prefer the original iv unless we may gain something by replacing it. */
3246 if (cand->pos == IP_ORIGINAL)
3249 /* Prefer not to insert statements into latch unless there are some
3250 already (so that we do not create unnecessary jumps). */
3251 if (cand->pos == IP_END)
3253 bb = ip_end_pos (data->current_loop);
3254 last = last_stmt (bb);
3257 || TREE_CODE (last) == LABEL_EXPR)
3262 /* Determines costs of computation of the candidates. */
3265 determine_iv_costs (struct ivopts_data *data)
3269 if (dump_file && (dump_flags & TDF_DETAILS))
3271 fprintf (dump_file, "Candidate costs:\n");
3272 fprintf (dump_file, " cand\tcost\n");
3275 for (i = 0; i < n_iv_cands (data); i++)
3277 struct iv_cand *cand = iv_cand (data, i);
3279 determine_iv_cost (data, cand);
3281 if (dump_file && (dump_flags & TDF_DETAILS))
3282 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3285 if (dump_file && (dump_flags & TDF_DETAILS))
3286 fprintf (dump_file, "\n");
3289 /* Calculates cost for having SIZE induction variables. */
3292 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3294 return global_cost_for_size (size,
3295 loop_data (data->current_loop)->regs_used,
3299 /* For each size of the induction variable set determine the penalty. */
3302 determine_set_costs (struct ivopts_data *data)
3306 struct loop *loop = data->current_loop;
3308 /* We use the following model (definitely improvable, especially the
3309 cost function -- TODO):
3311 We estimate the number of registers available (using MD data), name it A.
3313 We estimate the number of registers used by the loop, name it U. This
3314 number is obtained as the number of loop phi nodes (not counting virtual
3315 registers and bivs) + the number of variables from outside of the loop.
3317 We set a reserve R (free regs that are used for temporary computations,
3318 etc.). For now the reserve is a constant 3.
3320 Let I be the number of induction variables.
3322 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3323 make a lot of ivs without a reason).
3324 -- if A - R < U + I <= A, the cost is I * PRES_COST
3325 -- if U + I > A, the cost is I * PRES_COST and
3326 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3328 if (dump_file && (dump_flags & TDF_DETAILS))
3330 fprintf (dump_file, "Global costs:\n");
3331 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3332 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3333 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3334 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3338 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3340 op = PHI_RESULT (phi);
3342 if (!is_gimple_reg (op))
3345 if (get_iv (data, op))
3351 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
3353 struct version_info *info = ver_info (data, j);
3355 if (info->inv_id && info->has_nonlin_use)
3359 loop_data (loop)->regs_used = n;
3360 if (dump_file && (dump_flags & TDF_DETAILS))
3361 fprintf (dump_file, " regs_used %d\n", n);
3363 if (dump_file && (dump_flags & TDF_DETAILS))
3365 fprintf (dump_file, " cost for size:\n");
3366 fprintf (dump_file, " ivs\tcost\n");
3367 for (j = 0; j <= 2 * target_avail_regs; j++)
3368 fprintf (dump_file, " %d\t%d\n", j,
3369 ivopts_global_cost_for_size (data, j));
3370 fprintf (dump_file, "\n");
3374 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3375 taken from the set SOL and they may depend on invariants in the set INV.
3376 The really used candidate and invariants are noted to USED_IVS and
3380 find_best_candidate (struct ivopts_data *data,
3381 struct iv_use *use, bitmap sol, bitmap inv,
3382 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3385 unsigned best_cost = INFTY, cost;
3386 struct iv_cand *cnd = NULL, *acnd;
3387 bitmap depends_on = NULL, asol;
3389 if (data->consider_all_candidates)
3393 asol = BITMAP_XMALLOC ();
3394 bitmap_a_and_b (asol, sol, use->related_cands);
3397 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c,
3399 acnd = iv_cand (data, c);
3400 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3404 if (cost > best_cost)
3406 if (cost == best_cost)
3408 /* Prefer the cheaper iv. */
3409 if (acnd->cost >= cnd->cost)
3415 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d,
3418 bitmap_a_or_b (used_inv, used_inv, depends_on);
3426 if (cnd && used_ivs)
3427 bitmap_set_bit (used_ivs, cnd->id);
3432 if (!data->consider_all_candidates)
3433 BITMAP_XFREE (asol);
3438 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3439 induction variable candidates and invariants from the sets. Only
3440 uses 0 .. MAX_USE - 1 are taken into account. */
3443 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3447 unsigned cost = 0, size = 0, acost;
3449 struct iv_cand *cand;
3450 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3452 for (i = 0; i < max_use; i++)
3454 use = iv_use (data, i);
3455 acost = find_best_candidate (data, use, sol, inv,
3456 used_ivs, used_inv, NULL);
3459 BITMAP_XFREE (used_ivs);
3460 BITMAP_XFREE (used_inv);
3466 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i,
3468 cand = iv_cand (data, i);
3470 /* Do not count the pseudocandidates. */
3476 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, size++);
3477 cost += ivopts_global_cost_for_size (data, size);
3479 bitmap_copy (sol, used_ivs);
3480 bitmap_copy (inv, used_inv);
3482 BITMAP_XFREE (used_ivs);
3483 BITMAP_XFREE (used_inv);
3488 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3489 induction variable candidates and invariants from the sets. */
3492 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3494 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3497 /* Tries to extend the sets IVS and INV in the best possible way in order
3498 to express the USE. */
3501 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3504 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3505 bitmap best_ivs = BITMAP_XMALLOC ();
3506 bitmap best_inv = BITMAP_XMALLOC ();
3507 bitmap act_ivs = BITMAP_XMALLOC ();
3508 bitmap act_inv = BITMAP_XMALLOC ();
3510 struct cost_pair *cp;
3512 bitmap_copy (best_ivs, ivs);
3513 bitmap_copy (best_inv, inv);
3515 for (i = 0; i < use->n_map_members; i++)
3517 cp = use->cost_map + i;
3518 if (cp->cost == INFTY)
3521 bitmap_copy (act_ivs, ivs);
3522 bitmap_set_bit (act_ivs, cp->cand->id);
3524 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3526 bitmap_copy (act_inv, inv);
3527 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3529 if (act_cost < best_cost)
3531 best_cost = act_cost;
3532 bitmap_copy (best_ivs, act_ivs);
3533 bitmap_copy (best_inv, act_inv);
3537 bitmap_copy (ivs, best_ivs);
3538 bitmap_copy (inv, best_inv);
3540 BITMAP_XFREE (best_ivs);
3541 BITMAP_XFREE (best_inv);
3542 BITMAP_XFREE (act_ivs);
3543 BITMAP_XFREE (act_inv);
3545 return (best_cost != INFTY);
3548 /* Finds an initial set of IVS and invariants INV. We do this by simply
3549 choosing the best candidate for each use. */
3552 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3556 for (i = 0; i < n_iv_uses (data); i++)
3557 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3560 return set_cost (data, ivs, inv);
3563 /* Tries to improve set of induction variables IVS and invariants INV to get
3564 it better than COST. */
3567 try_improve_iv_set (struct ivopts_data *data,
3568 bitmap ivs, bitmap inv, unsigned *cost)
3571 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3572 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3574 /* Try altering the set of induction variables by one. */
3575 for (i = 0; i < n_iv_cands (data); i++)
3577 bitmap_copy (new_ivs, ivs);
3578 bitmap_copy (new_inv, inv);
3580 if (bitmap_bit_p (ivs, i))
3581 bitmap_clear_bit (new_ivs, i);
3583 bitmap_set_bit (new_ivs, i);
3585 acost = set_cost (data, new_ivs, new_inv);
3591 best_new_ivs = BITMAP_XMALLOC ();
3592 best_new_inv = BITMAP_XMALLOC ();
3596 bitmap_copy (best_new_ivs, new_ivs);
3597 bitmap_copy (best_new_inv, new_inv);
3600 /* Ditto for invariants. */
3601 for (i = 1; i <= data->max_inv_id; i++)
3603 if (ver_info (data, i)->has_nonlin_use)
3606 bitmap_copy (new_ivs, ivs);
3607 bitmap_copy (new_inv, inv);
3609 if (bitmap_bit_p (inv, i))
3610 bitmap_clear_bit (new_inv, i);
3612 bitmap_set_bit (new_inv, i);
3614 acost = set_cost (data, new_ivs, new_inv);
3620 best_new_ivs = BITMAP_XMALLOC ();
3621 best_new_inv = BITMAP_XMALLOC ();
3625 bitmap_copy (best_new_ivs, new_ivs);
3626 bitmap_copy (best_new_inv, new_inv);
3629 BITMAP_XFREE (new_ivs);
3630 BITMAP_XFREE (new_inv);
3635 bitmap_copy (ivs, best_new_ivs);
3636 bitmap_copy (inv, best_new_inv);
3637 BITMAP_XFREE (best_new_ivs);
3638 BITMAP_XFREE (best_new_inv);
3642 /* Attempts to find the optimal set of induction variables. We do simple
3643 greedy heuristic -- we try to replace at most one candidate in the selected
3644 solution and remove the unused ivs while this improves the cost. */
3647 find_optimal_iv_set (struct ivopts_data *data)
3650 bitmap set = BITMAP_XMALLOC ();
3651 bitmap inv = BITMAP_XMALLOC ();
3654 /* Set the upper bound. */
3655 cost = get_initial_solution (data, set, inv);
3658 if (dump_file && (dump_flags & TDF_DETAILS))
3659 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3665 if (dump_file && (dump_flags & TDF_DETAILS))
3667 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3668 bitmap_print (dump_file, set, "", "");
3669 fprintf (dump_file, " invariants ");
3670 bitmap_print (dump_file, inv, "", "");
3671 fprintf (dump_file, "\n");
3674 while (try_improve_iv_set (data, set, inv, &cost))
3676 if (dump_file && (dump_flags & TDF_DETAILS))
3678 fprintf (dump_file, "Improved to (cost %d): ", cost);
3679 bitmap_print (dump_file, set, "", "");
3680 fprintf (dump_file, " invariants ");
3681 bitmap_print (dump_file, inv, "", "");
3682 fprintf (dump_file, "\n");
3686 if (dump_file && (dump_flags & TDF_DETAILS))
3687 fprintf (dump_file, "Final cost %d\n\n", cost);
3689 for (i = 0; i < n_iv_uses (data); i++)
3691 use = iv_use (data, i);
3692 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3700 /* Creates a new induction variable corresponding to CAND. */
3703 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3705 block_stmt_iterator incr_pos;
3715 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3719 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3724 /* Mark that the iv is preserved. */
3725 name_info (data, cand->var_before)->preserve_biv = true;
3726 name_info (data, cand->var_after)->preserve_biv = true;
3728 /* Rewrite the increment so that it uses var_before directly. */
3729 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3734 gimple_add_tmp_var (cand->var_before);
3735 add_referenced_tmp_var (cand->var_before);
3737 base = unshare_expr (cand->iv->base);
3739 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3740 &incr_pos, after, &cand->var_before, &cand->var_after);
3743 /* Creates new induction variables described in SET. */
3746 create_new_ivs (struct ivopts_data *data, bitmap set)
3749 struct iv_cand *cand;
3751 EXECUTE_IF_SET_IN_BITMAP (set, 0, i,
3753 cand = iv_cand (data, i);
3754 create_new_iv (data, cand);
3758 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3759 is true, remove also the ssa name defined by the statement. */
3762 remove_statement (tree stmt, bool including_defined_name)
3764 if (TREE_CODE (stmt) == PHI_NODE)
3766 if (!including_defined_name)
3768 /* Prevent the ssa name defined by the statement from being removed. */
3769 SET_PHI_RESULT (stmt, NULL);
3771 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3775 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3781 /* Rewrites USE (definition of iv used in a nonlinear expression)
3782 using candidate CAND. */
3785 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3786 struct iv_use *use, struct iv_cand *cand)
3788 tree comp = unshare_expr (get_computation (data->current_loop,
3790 tree op, stmts, tgt, ass;
3791 block_stmt_iterator bsi, pbsi;
3793 if (TREE_CODE (use->stmt) == PHI_NODE)
3795 tgt = PHI_RESULT (use->stmt);
3797 /* If we should keep the biv, do not replace it. */
3798 if (name_info (data, tgt)->preserve_biv)
3801 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3802 while (!bsi_end_p (pbsi)
3803 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3809 else if (TREE_CODE (use->stmt) == MODIFY_EXPR)
3811 tgt = TREE_OPERAND (use->stmt, 0);
3812 bsi = stmt_for_bsi (use->stmt);
3817 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3819 if (TREE_CODE (use->stmt) == PHI_NODE)
3822 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3823 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3824 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3825 remove_statement (use->stmt, false);
3826 SSA_NAME_DEF_STMT (tgt) = ass;
3831 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3832 TREE_OPERAND (use->stmt, 1) = op;
3836 /* Replaces ssa name in index IDX by its basic variable. Callback for
3840 idx_remove_ssa_names (tree base ATTRIBUTE_UNUSED, tree *idx,
3841 void *data ATTRIBUTE_UNUSED)
3843 if (TREE_CODE (*idx) == SSA_NAME)
3844 *idx = SSA_NAME_VAR (*idx);
3848 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3851 unshare_and_remove_ssa_names (tree ref)
3853 ref = unshare_expr (ref);
3854 for_each_index (&ref, idx_remove_ssa_names, NULL);
3859 /* Rewrites base of memory access OP with expression WITH in statement
3860 pointed to by BSI. */
3863 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3865 tree var = get_base_address (*op), new_var, new_name, copy, name;
3868 if (!var || TREE_CODE (with) != SSA_NAME)
3871 if (TREE_CODE (var) == INDIRECT_REF)
3872 var = TREE_OPERAND (var, 0);
3873 if (TREE_CODE (var) == SSA_NAME)
3876 var = SSA_NAME_VAR (var);
3878 else if (DECL_P (var))
3883 if (var_ann (var)->type_mem_tag)
3884 var = var_ann (var)->type_mem_tag;
3886 /* We need to add a memory tag for the variable. But we do not want
3887 to add it to the temporary used for the computations, since this leads
3888 to problems in redundancy elimination when there are common parts
3889 in two computations referring to the different arrays. So we copy
3890 the variable to a new temporary. */
3891 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
3893 new_name = duplicate_ssa_name (name, copy);
3896 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
3897 add_referenced_tmp_var (new_var);
3898 var_ann (new_var)->type_mem_tag = var;
3899 new_name = make_ssa_name (new_var, copy);
3901 TREE_OPERAND (copy, 0) = new_name;
3902 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
3908 if (TREE_CODE (*op) == INDIRECT_REF)
3909 orig = REF_ORIGINAL (*op);
3911 orig = unshare_and_remove_ssa_names (*op);
3913 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
3914 /* Record the original reference, for purposes of alias analysis. */
3915 REF_ORIGINAL (*op) = orig;
3918 /* Rewrites USE (address that is an iv) using candidate CAND. */
3921 rewrite_use_address (struct ivopts_data *data,
3922 struct iv_use *use, struct iv_cand *cand)
3924 tree comp = unshare_expr (get_computation (data->current_loop,
3926 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3928 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
3931 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3933 rewrite_address_base (&bsi, use->op_p, op);
3936 /* Rewrites USE (the condition such that one of the arguments is an iv) using
3940 rewrite_use_compare (struct ivopts_data *data,
3941 struct iv_use *use, struct iv_cand *cand)
3944 tree *op_p, cond, op, stmts, bound;
3945 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3946 enum tree_code compare;
3948 if (may_eliminate_iv (data->current_loop,
3949 use, cand, &compare, &bound))
3951 op = force_gimple_operand (unshare_expr (bound), &stmts,
3955 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3957 *use->op_p = build2 (compare, boolean_type_node,
3958 var_at_stmt (data->current_loop,
3959 cand, use->stmt), op);
3960 modify_stmt (use->stmt);
3964 /* The induction variable elimination failed; just express the original
3966 comp = unshare_expr (get_computation (data->current_loop, use, cand));
3969 op_p = &TREE_OPERAND (cond, 0);
3970 if (TREE_CODE (*op_p) != SSA_NAME
3971 || zero_p (get_iv (data, *op_p)->step))
3972 op_p = &TREE_OPERAND (cond, 1);
3974 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
3976 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3981 /* Ensure that operand *OP_P may be used at the end of EXIT without
3982 violating loop closed ssa form. */
3985 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
3988 struct loop *def_loop;
3991 use = USE_FROM_PTR (op_p);
3992 if (TREE_CODE (use) != SSA_NAME)
3995 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
3999 def_loop = def_bb->loop_father;
4000 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4003 /* Try finding a phi node that copies the value out of the loop. */
4004 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4005 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4010 /* Create such a phi node. */
4011 tree new_name = duplicate_ssa_name (use, NULL);
4013 phi = create_phi_node (new_name, exit->dest);
4014 SSA_NAME_DEF_STMT (new_name) = phi;
4015 add_phi_arg (&phi, use, exit);
4018 SET_USE (op_p, PHI_RESULT (phi));
4021 /* Ensure that operands of STMT may be used at the end of EXIT without
4022 violating loop closed ssa form. */
4025 protect_loop_closed_ssa_form (edge exit, tree stmt)
4029 v_may_def_optype v_may_defs;
4032 get_stmt_operands (stmt);
4034 uses = STMT_USE_OPS (stmt);
4035 for (i = 0; i < NUM_USES (uses); i++)
4036 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4038 vuses = STMT_VUSE_OPS (stmt);
4039 for (i = 0; i < NUM_VUSES (vuses); i++)
4040 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4042 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4043 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4044 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4047 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4048 so that they are emitted on the correct place, and so that the loop closed
4049 ssa form is preserved. */
4052 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4054 tree_stmt_iterator tsi;
4055 block_stmt_iterator bsi;
4056 tree phi, stmt, def, next;
4058 if (exit->dest->pred->pred_next)
4059 split_loop_exit_edge (exit);
4061 if (TREE_CODE (stmts) == STATEMENT_LIST)
4063 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4064 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4067 protect_loop_closed_ssa_form (exit, stmts);
4069 /* Ensure there is label in exit->dest, so that we can
4071 tree_block_label (exit->dest);
4072 bsi = bsi_after_labels (exit->dest);
4073 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4078 for (phi = phi_nodes (exit->dest); phi; phi = next)
4080 next = TREE_CHAIN (phi);
4082 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4084 def = PHI_RESULT (phi);
4085 remove_statement (phi, false);
4086 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4088 SSA_NAME_DEF_STMT (def) = stmt;
4089 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4094 /* Rewrites the final value of USE (that is only needed outside of the loop)
4095 using candidate CAND. */
4098 rewrite_use_outer (struct ivopts_data *data,
4099 struct iv_use *use, struct iv_cand *cand)
4102 tree value, op, stmts, tgt;
4105 if (TREE_CODE (use->stmt) == PHI_NODE)
4106 tgt = PHI_RESULT (use->stmt);
4107 else if (TREE_CODE (use->stmt) == MODIFY_EXPR)
4108 tgt = TREE_OPERAND (use->stmt, 0);
4111 exit = single_dom_exit (data->current_loop);
4117 if (!may_replace_final_value (data->current_loop, use, &value))
4121 value = get_computation_at (data->current_loop,
4122 use, cand, last_stmt (exit->src));
4124 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4126 /* If we will preserve the iv anyway and we would need to perform
4127 some computation to replace the final value, do nothing. */
4128 if (stmts && name_info (data, tgt)->preserve_biv)
4131 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4133 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4135 if (USE_FROM_PTR (use_p) == tgt)
4136 SET_USE (use_p, op);
4140 compute_phi_arg_on_exit (exit, stmts, op);
4142 /* Enable removal of the statement. We cannot remove it directly,
4143 since we may still need the aliasing information attached to the
4144 ssa name defined by it. */
4145 name_info (data, tgt)->iv->have_use_for = false;
4149 /* If the variable is going to be preserved anyway, there is nothing to
4151 if (name_info (data, tgt)->preserve_biv)
4154 /* Otherwise we just need to compute the iv. */
4155 rewrite_use_nonlinear_expr (data, use, cand);
4158 /* Rewrites USE using candidate CAND. */
4161 rewrite_use (struct ivopts_data *data,
4162 struct iv_use *use, struct iv_cand *cand)
4166 case USE_NONLINEAR_EXPR:
4167 rewrite_use_nonlinear_expr (data, use, cand);
4171 rewrite_use_outer (data, use, cand);
4175 rewrite_use_address (data, use, cand);
4179 rewrite_use_compare (data, use, cand);
4185 modify_stmt (use->stmt);
4188 /* Rewrite the uses using the selected induction variables. */
4191 rewrite_uses (struct ivopts_data *data)
4194 struct iv_cand *cand;
4197 for (i = 0; i < n_iv_uses (data); i++)
4199 use = iv_use (data, i);
4200 cand = use->selected;
4204 rewrite_use (data, use, cand);
4208 /* Removes the ivs that are not used after rewriting. */
4211 remove_unused_ivs (struct ivopts_data *data)
4215 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
4217 struct version_info *info;
4219 info = ver_info (data, j);
4221 && !zero_p (info->iv->step)
4223 && !info->iv->have_use_for
4224 && !info->preserve_biv)
4225 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4229 /* Frees data allocated by the optimization of a single loop. */
4232 free_loop_data (struct ivopts_data *data)
4236 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
4238 struct version_info *info;
4240 info = ver_info (data, i);
4244 info->has_nonlin_use = false;
4245 info->preserve_biv = false;
4248 bitmap_clear (data->relevant);
4250 for (i = 0; i < n_iv_uses (data); i++)
4252 struct iv_use *use = iv_use (data, i);
4255 BITMAP_XFREE (use->related_cands);
4256 for (j = 0; j < use->n_map_members; j++)
4257 if (use->cost_map[j].depends_on)
4258 BITMAP_XFREE (use->cost_map[j].depends_on);
4259 free (use->cost_map);
4262 VARRAY_POP_ALL (data->iv_uses);
4264 for (i = 0; i < n_iv_cands (data); i++)
4266 struct iv_cand *cand = iv_cand (data, i);
4272 VARRAY_POP_ALL (data->iv_candidates);
4274 if (data->version_info_size < num_ssa_names)
4276 data->version_info_size = 2 * num_ssa_names;
4277 free (data->version_info);
4278 data->version_info = xcalloc (data->version_info_size,
4279 sizeof (struct version_info));
4282 data->max_inv_id = 0;
4284 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4286 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4288 SET_DECL_RTL (obj, NULL_RTX);
4290 VARRAY_POP_ALL (decl_rtl_to_reset);
4293 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4297 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4301 for (i = 1; i < loops->num; i++)
4302 if (loops->parray[i])
4304 free (loops->parray[i]->aux);
4305 loops->parray[i]->aux = NULL;
4308 free_loop_data (data);
4309 free (data->version_info);
4310 BITMAP_XFREE (data->relevant);
4312 VARRAY_FREE (decl_rtl_to_reset);
4313 VARRAY_FREE (data->iv_uses);
4314 VARRAY_FREE (data->iv_candidates);
4317 /* Optimizes the LOOP. Returns true if anything changed. */
4320 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4322 bool changed = false;
4326 data->current_loop = loop;
4328 if (dump_file && (dump_flags & TDF_DETAILS))
4330 fprintf (dump_file, "Processing loop %d\n", loop->num);
4332 exit = single_dom_exit (loop);
4335 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4336 exit->src->index, exit->dest->index);
4337 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4338 fprintf (dump_file, "\n");
4341 fprintf (dump_file, "\n");
4344 /* For each ssa name determines whether it behaves as an induction variable
4346 if (!find_induction_variables (data))
4349 /* Finds interesting uses (item 1). */
4350 find_interesting_uses (data);
4351 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4354 /* Finds candidates for the induction variables (item 2). */
4355 find_iv_candidates (data);
4357 /* Calculates the costs (item 3, part 1). */
4358 determine_use_iv_costs (data);
4359 determine_iv_costs (data);
4360 determine_set_costs (data);
4362 /* Find the optimal set of induction variables (item 3, part 2). */
4363 iv_set = find_optimal_iv_set (data);
4368 /* Create the new induction variables (item 4, part 1). */
4369 create_new_ivs (data, iv_set);
4371 /* Rewrite the uses (item 4, part 2). */
4372 rewrite_uses (data);
4374 /* Remove the ivs that are unused after rewriting. */
4375 remove_unused_ivs (data);
4377 loop_commit_inserts ();
4379 BITMAP_XFREE (iv_set);
4381 /* We have changed the structure of induction variables; it might happen
4382 that definitions in the scev database refer to some of them that were
4387 free_loop_data (data);
4392 /* Main entry point. Optimizes induction variables in LOOPS. */
4395 tree_ssa_iv_optimize (struct loops *loops)
4398 struct ivopts_data data;
4400 tree_ssa_iv_optimize_init (loops, &data);
4402 /* Optimize the loops starting with the innermost ones. */
4403 loop = loops->tree_root;
4407 #ifdef ENABLE_CHECKING
4408 verify_loop_closed_ssa ();
4412 /* Scan the loops, inner ones first. */
4413 while (loop != loops->tree_root)
4415 if (dump_file && (dump_flags & TDF_DETAILS))
4416 flow_loop_dump (loop, dump_file, NULL, 1);
4417 if (tree_ssa_iv_optimize_loop (&data, loop))
4419 #ifdef ENABLE_CHECKING
4420 verify_loop_closed_ssa ();
4435 tree_ssa_iv_optimize_finalize (loops, &data);