1 /* Induction variable optimizations.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
106 tree base; /* Initial value of the iv. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Information attached to loop. */
128 struct tree_niter_desc niter;
129 /* Number of iterations. */
131 unsigned regs_used; /* Number of registers used. */
137 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
138 USE_OUTER, /* The induction variable is used outside the loop. */
139 USE_ADDRESS, /* Use in an address. */
140 USE_COMPARE /* Use is a compare. */
143 /* The candidate - cost pair. */
146 struct iv_cand *cand; /* The candidate. */
147 unsigned cost; /* The cost. */
148 bitmap depends_on; /* The list of invariants that have to be
155 unsigned id; /* The id of the use. */
156 enum use_type type; /* Type of the use. */
157 struct iv *iv; /* The induction variable it is based on. */
158 tree stmt; /* Statement in that it occurs. */
159 tree *op_p; /* The place where it occurs. */
160 bitmap related_cands; /* The set of "related" iv candidates. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
200 /* The currently optimized loop. */
201 struct loop *current_loop;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
212 /* The maximum invariant id. */
215 /* The uses of induction variables. */
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* Whether to consider just related and important candidates when replacing a
223 bool consider_all_candidates;
226 /* Bound on number of candidates below that all candidates are considered. */
228 #define CONSIDER_ALL_CANDIDATES_BOUND \
229 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
231 /* If there are more iv occurrences, we just give up (it is quite unlikely that
232 optimizing such a loop would help, and it would take ages). */
234 #define MAX_CONSIDERED_USES \
235 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
237 /* The list of trees for that the decl_rtl field must be reset is stored
240 static varray_type decl_rtl_to_reset;
242 /* Number of uses recorded in DATA. */
244 static inline unsigned
245 n_iv_uses (struct ivopts_data *data)
247 return VARRAY_ACTIVE_SIZE (data->iv_uses);
250 /* Ith use recorded in DATA. */
252 static inline struct iv_use *
253 iv_use (struct ivopts_data *data, unsigned i)
255 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
258 /* Number of candidates recorded in DATA. */
260 static inline unsigned
261 n_iv_cands (struct ivopts_data *data)
263 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
266 /* Ith candidate recorded in DATA. */
268 static inline struct iv_cand *
269 iv_cand (struct ivopts_data *data, unsigned i)
271 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
274 /* The data for LOOP. */
276 static inline struct loop_data *
277 loop_data (struct loop *loop)
282 /* The single loop exit if it dominates the latch, NULL otherwise. */
285 single_dom_exit (struct loop *loop)
287 edge exit = loop->single_exit;
292 if (!just_once_each_iteration_p (loop, exit->src))
298 /* Dumps information about the induction variable IV to FILE. */
300 extern void dump_iv (FILE *, struct iv *);
302 dump_iv (FILE *file, struct iv *iv)
304 fprintf (file, "ssa name ");
305 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
306 fprintf (file, "\n");
310 fprintf (file, " base ");
311 print_generic_expr (file, iv->base, TDF_SLIM);
312 fprintf (file, "\n");
314 fprintf (file, " step ");
315 print_generic_expr (file, iv->step, TDF_SLIM);
316 fprintf (file, "\n");
320 fprintf (file, " invariant ");
321 print_generic_expr (file, iv->base, TDF_SLIM);
322 fprintf (file, "\n");
326 fprintf (file, " is a biv\n");
329 /* Dumps information about the USE to FILE. */
331 extern void dump_use (FILE *, struct iv_use *);
333 dump_use (FILE *file, struct iv_use *use)
335 struct iv *iv = use->iv;
337 fprintf (file, "use %d\n", use->id);
341 case USE_NONLINEAR_EXPR:
342 fprintf (file, " generic\n");
346 fprintf (file, " outside\n");
350 fprintf (file, " address\n");
354 fprintf (file, " compare\n");
361 fprintf (file, " in statement ");
362 print_generic_expr (file, use->stmt, TDF_SLIM);
363 fprintf (file, "\n");
365 fprintf (file, " at position ");
367 print_generic_expr (file, *use->op_p, TDF_SLIM);
368 fprintf (file, "\n");
372 fprintf (file, " base ");
373 print_generic_expr (file, iv->base, TDF_SLIM);
374 fprintf (file, "\n");
376 fprintf (file, " step ");
377 print_generic_expr (file, iv->step, TDF_SLIM);
378 fprintf (file, "\n");
382 fprintf (file, " invariant ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
387 fprintf (file, " related candidates ");
388 dump_bitmap (file, use->related_cands);
391 /* Dumps information about the uses to FILE. */
393 extern void dump_uses (FILE *, struct ivopts_data *);
395 dump_uses (FILE *file, struct ivopts_data *data)
400 for (i = 0; i < n_iv_uses (data); i++)
402 use = iv_use (data, i);
404 dump_use (file, use);
405 fprintf (file, "\n");
409 /* Dumps information about induction variable candidate CAND to FILE. */
411 extern void dump_cand (FILE *, struct iv_cand *);
413 dump_cand (FILE *file, struct iv_cand *cand)
415 struct iv *iv = cand->iv;
417 fprintf (file, "candidate %d%s\n",
418 cand->id, cand->important ? " (important)" : "");
422 fprintf (file, " final value replacement\n");
429 fprintf (file, " incremented before exit test\n");
433 fprintf (file, " incremented at end\n");
437 fprintf (file, " original biv\n");
443 fprintf (file, " base ");
444 print_generic_expr (file, iv->base, TDF_SLIM);
445 fprintf (file, "\n");
447 fprintf (file, " step ");
448 print_generic_expr (file, iv->step, TDF_SLIM);
449 fprintf (file, "\n");
453 fprintf (file, " invariant ");
454 print_generic_expr (file, iv->base, TDF_SLIM);
455 fprintf (file, "\n");
459 /* Returns the info for ssa version VER. */
461 static inline struct version_info *
462 ver_info (struct ivopts_data *data, unsigned ver)
464 return data->version_info + ver;
467 /* Returns the info for ssa name NAME. */
469 static inline struct version_info *
470 name_info (struct ivopts_data *data, tree name)
472 return ver_info (data, SSA_NAME_VERSION (name));
475 /* Checks whether there exists number X such that X * B = A, counting modulo
479 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
482 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
483 unsigned HOST_WIDE_INT inv, ex, val;
489 /* First divide the whole equation by 2 as long as possible. */
490 while (!(a & 1) && !(b & 1))
500 /* If b is still even, a is odd and there is no such x. */
504 /* Find the inverse of b. We compute it as
505 b^(2^(bits - 1) - 1) (mod 2^bits). */
508 for (i = 0; i < bits - 1; i++)
510 inv = (inv * ex) & mask;
511 ex = (ex * ex) & mask;
514 val = (a * inv) & mask;
516 gcc_assert (((val * b) & mask) == a);
518 if ((val >> (bits - 1)) & 1)
526 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
530 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
532 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
536 if (sbb == loop->latch)
542 return stmt == last_stmt (bb);
545 /* Returns true if STMT if after the place where the original induction
546 variable CAND is incremented. */
549 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
551 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
552 basic_block stmt_bb = bb_for_stmt (stmt);
553 block_stmt_iterator bsi;
555 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
558 if (stmt_bb != cand_bb)
561 /* Scan the block from the end, since the original ivs are usually
562 incremented at the end of the loop body. */
563 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
565 if (bsi_stmt (bsi) == cand->incremented_at)
567 if (bsi_stmt (bsi) == stmt)
572 /* Returns true if STMT if after the place where the induction variable
573 CAND is incremented in LOOP. */
576 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
584 return stmt_after_ip_normal_pos (loop, stmt);
587 return stmt_after_ip_original_pos (cand, stmt);
594 /* Initializes data structures used by the iv optimization pass, stored
595 in DATA. LOOPS is the loop tree. */
598 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
602 data->version_info_size = 2 * num_ssa_names;
603 data->version_info = xcalloc (data->version_info_size,
604 sizeof (struct version_info));
605 data->relevant = BITMAP_XMALLOC ();
606 data->max_inv_id = 0;
608 for (i = 1; i < loops->num; i++)
609 if (loops->parray[i])
610 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
612 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
613 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
614 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
617 /* Allocates an induction variable with given initial value BASE and step STEP
621 alloc_iv (tree base, tree step)
623 struct iv *iv = xcalloc (1, sizeof (struct iv));
625 if (step && integer_zerop (step))
631 iv->have_use_for = false;
633 iv->ssa_name = NULL_TREE;
638 /* Sets STEP and BASE for induction variable IV. */
641 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
643 struct version_info *info = name_info (data, iv);
645 gcc_assert (!info->iv);
647 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
648 info->iv = alloc_iv (base, step);
649 info->iv->ssa_name = iv;
652 /* Finds induction variable declaration for VAR. */
655 get_iv (struct ivopts_data *data, tree var)
659 if (!name_info (data, var)->iv)
661 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
664 || !flow_bb_inside_loop_p (data->current_loop, bb))
665 set_iv (data, var, var, NULL_TREE);
668 return name_info (data, var)->iv;
671 /* Determines the step of a biv defined in PHI. */
674 determine_biv_step (tree phi)
676 struct loop *loop = bb_for_stmt (phi)->loop_father;
677 tree name = PHI_RESULT (phi), base, step;
678 tree type = TREE_TYPE (name);
680 if (!is_gimple_reg (name))
683 if (!simple_iv (loop, phi, name, &base, &step))
687 return build_int_cst (type, 0);
692 /* Returns false if INDEX is a ssa name that occurs in an
693 abnormal phi node. Callback for for_each_index. */
696 idx_contains_abnormal_ssa_name_p (tree base ATTRIBUTE_UNUSED, tree *index,
697 void *data ATTRIBUTE_UNUSED)
699 if (TREE_CODE (*index) != SSA_NAME)
702 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*index) == 0;
705 /* Returns true if EXPR contains a ssa name that occurs in an
706 abnormal phi node. */
709 contains_abnormal_ssa_name_p (tree expr)
711 enum tree_code code = TREE_CODE (expr);
712 char class = TREE_CODE_CLASS (code);
714 if (code == SSA_NAME)
715 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
717 if (code == INTEGER_CST
718 || is_gimple_min_invariant (expr))
721 if (code == ADDR_EXPR)
722 return !for_each_index (&TREE_OPERAND (expr, 1),
723 idx_contains_abnormal_ssa_name_p,
730 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
735 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
747 /* Finds basic ivs. */
750 find_bivs (struct ivopts_data *data)
752 tree phi, step, type, base;
754 struct loop *loop = data->current_loop;
756 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
758 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
761 step = determine_biv_step (phi);
765 if (cst_and_fits_in_hwi (step)
766 && int_cst_value (step) == 0)
769 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
770 if (contains_abnormal_ssa_name_p (base))
773 type = TREE_TYPE (PHI_RESULT (phi));
774 base = fold_convert (type, base);
775 step = fold_convert (type, step);
777 /* FIXME: We do not handle induction variables whose step does
778 not satisfy cst_and_fits_in_hwi. */
779 if (!cst_and_fits_in_hwi (step))
782 set_iv (data, PHI_RESULT (phi), base, step);
789 /* Marks basic ivs. */
792 mark_bivs (struct ivopts_data *data)
795 struct iv *iv, *incr_iv;
796 struct loop *loop = data->current_loop;
799 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
801 iv = get_iv (data, PHI_RESULT (phi));
805 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
806 incr_iv = get_iv (data, var);
810 /* If the increment is in the subloop, ignore it. */
811 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
812 if (incr_bb->loop_father != data->current_loop
813 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
817 incr_iv->biv_p = true;
821 /* Checks whether STMT defines a linear induction variable and stores its
822 parameters to BASE and STEP. */
825 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
826 tree *base, tree *step)
829 struct loop *loop = data->current_loop;
834 if (TREE_CODE (stmt) != MODIFY_EXPR)
837 lhs = TREE_OPERAND (stmt, 0);
838 if (TREE_CODE (lhs) != SSA_NAME)
841 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
844 /* FIXME: We do not handle induction variables whose step does
845 not satisfy cst_and_fits_in_hwi. */
847 && !cst_and_fits_in_hwi (*step))
850 if (contains_abnormal_ssa_name_p (*base))
856 /* Finds general ivs in statement STMT. */
859 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
863 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
866 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
869 /* Finds general ivs in basic block BB. */
872 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
874 block_stmt_iterator bsi;
876 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
877 find_givs_in_stmt (data, bsi_stmt (bsi));
880 /* Finds general ivs. */
883 find_givs (struct ivopts_data *data)
885 struct loop *loop = data->current_loop;
886 basic_block *body = get_loop_body_in_dom_order (loop);
889 for (i = 0; i < loop->num_nodes; i++)
890 find_givs_in_bb (data, body[i]);
894 /* Determine the number of iterations of the current loop. */
897 determine_number_of_iterations (struct ivopts_data *data)
899 struct loop *loop = data->current_loop;
900 edge exit = single_dom_exit (loop);
905 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
908 /* For each ssa name defined in LOOP determines whether it is an induction
909 variable and if so, its initial value and step. */
912 find_induction_variables (struct ivopts_data *data)
915 struct loop *loop = data->current_loop;
917 if (!find_bivs (data))
922 determine_number_of_iterations (data);
924 if (dump_file && (dump_flags & TDF_DETAILS))
926 if (loop_data (loop)->niter.niter)
928 fprintf (dump_file, " number of iterations ");
929 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
931 fprintf (dump_file, "\n");
933 fprintf (dump_file, " may be zero if ");
934 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
936 fprintf (dump_file, "\n");
938 fprintf (dump_file, " bogus unless ");
939 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
941 fprintf (dump_file, "\n");
942 fprintf (dump_file, "\n");
945 fprintf (dump_file, "Induction variables:\n\n");
947 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
949 if (ver_info (data, i)->iv)
950 dump_iv (dump_file, ver_info (data, i)->iv);
957 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
959 static struct iv_use *
960 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
961 tree stmt, enum use_type use_type)
963 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
965 use->id = n_iv_uses (data);
966 use->type = use_type;
970 use->related_cands = BITMAP_XMALLOC ();
972 if (dump_file && (dump_flags & TDF_DETAILS))
973 dump_use (dump_file, use);
975 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
980 /* Checks whether OP is a loop-level invariant and if so, records it.
981 NONLINEAR_USE is true if the invariant is used in a way we do not
985 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
988 struct version_info *info;
990 if (TREE_CODE (op) != SSA_NAME
991 || !is_gimple_reg (op))
994 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
996 && flow_bb_inside_loop_p (data->current_loop, bb))
999 info = name_info (data, op);
1001 info->has_nonlin_use |= nonlinear_use;
1003 info->inv_id = ++data->max_inv_id;
1004 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1007 /* Checks whether the use OP is interesting and if so, records it
1010 static struct iv_use *
1011 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1019 if (TREE_CODE (op) != SSA_NAME)
1022 iv = get_iv (data, op);
1026 if (iv->have_use_for)
1028 use = iv_use (data, iv->use_id);
1030 gcc_assert (use->type == USE_NONLINEAR_EXPR
1031 || use->type == USE_OUTER);
1033 if (type == USE_NONLINEAR_EXPR)
1034 use->type = USE_NONLINEAR_EXPR;
1038 if (zero_p (iv->step))
1040 record_invariant (data, op, true);
1043 iv->have_use_for = true;
1045 civ = xmalloc (sizeof (struct iv));
1048 stmt = SSA_NAME_DEF_STMT (op);
1049 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1050 || TREE_CODE (stmt) == MODIFY_EXPR);
1052 use = record_use (data, NULL, civ, stmt, type);
1053 iv->use_id = use->id;
1058 /* Checks whether the use OP is interesting and if so, records it. */
1060 static struct iv_use *
1061 find_interesting_uses_op (struct ivopts_data *data, tree op)
1063 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1066 /* Records a definition of induction variable OP that is used outside of the
1069 static struct iv_use *
1070 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1072 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1075 /* Checks whether the condition *COND_P in STMT is interesting
1076 and if so, records it. */
1079 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1083 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1085 tree zero = integer_zero_node;
1087 const_iv.step = NULL_TREE;
1089 if (integer_zerop (*cond_p)
1090 || integer_nonzerop (*cond_p))
1093 if (TREE_CODE (*cond_p) == SSA_NAME)
1100 op0_p = &TREE_OPERAND (*cond_p, 0);
1101 op1_p = &TREE_OPERAND (*cond_p, 1);
1104 if (TREE_CODE (*op0_p) == SSA_NAME)
1105 iv0 = get_iv (data, *op0_p);
1109 if (TREE_CODE (*op1_p) == SSA_NAME)
1110 iv1 = get_iv (data, *op1_p);
1114 if (/* When comparing with non-invariant value, we may not do any senseful
1115 induction variable elimination. */
1117 /* Eliminating condition based on two ivs would be nontrivial.
1118 ??? TODO -- it is not really important to handle this case. */
1119 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1121 find_interesting_uses_op (data, *op0_p);
1122 find_interesting_uses_op (data, *op1_p);
1126 if (zero_p (iv0->step) && zero_p (iv1->step))
1128 /* If both are invariants, this is a work for unswitching. */
1132 civ = xmalloc (sizeof (struct iv));
1133 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1134 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1137 /* Cumulates the steps of indices into DATA and replaces their values with the
1138 initial ones. Returns false when the value of the index cannot be determined.
1139 Callback for for_each_index. */
1141 struct ifs_ivopts_data
1143 struct ivopts_data *ivopts_data;
1149 idx_find_step (tree base, tree *idx, void *data)
1151 struct ifs_ivopts_data *dta = data;
1153 tree step, type, iv_type, iv_step;
1155 if (TREE_CODE (*idx) != SSA_NAME)
1158 iv = get_iv (dta->ivopts_data, *idx);
1167 iv_type = TREE_TYPE (iv->base);
1168 type = build_pointer_type (TREE_TYPE (base));
1169 if (TREE_CODE (base) == ARRAY_REF)
1170 step = array_ref_element_size (base);
1172 /* The step for pointer arithmetics already is 1 byte. */
1173 step = build_int_cst (type, 1);
1175 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1176 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1177 type, iv->base, iv->step, dta->stmt);
1179 iv_step = fold_convert (iv_type, iv->step);
1183 /* The index might wrap. */
1187 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1190 *dta->step_p = step;
1192 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1197 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1198 object is passed to it in DATA. */
1201 idx_record_use (tree base ATTRIBUTE_UNUSED, tree *idx,
1204 find_interesting_uses_op (data, *idx);
1208 /* Finds addresses in *OP_P inside STMT. */
1211 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1213 tree base = unshare_expr (*op_p), step = NULL;
1215 struct ifs_ivopts_data ifs_ivopts_data;
1217 /* Ignore bitfields for now. Not really something terribly complicated
1219 if (TREE_CODE (base) == COMPONENT_REF
1220 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1223 ifs_ivopts_data.ivopts_data = data;
1224 ifs_ivopts_data.stmt = stmt;
1225 ifs_ivopts_data.step_p = &step;
1226 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1230 if (TREE_CODE (base) == INDIRECT_REF)
1231 base = TREE_OPERAND (base, 0);
1233 base = build_addr (base);
1235 civ = alloc_iv (base, step);
1236 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1240 for_each_index (op_p, idx_record_use, data);
1243 /* Finds and records invariants used in STMT. */
1246 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1248 use_optype uses = NULL;
1252 if (TREE_CODE (stmt) == PHI_NODE)
1253 n = PHI_NUM_ARGS (stmt);
1256 get_stmt_operands (stmt);
1257 uses = STMT_USE_OPS (stmt);
1258 n = NUM_USES (uses);
1261 for (i = 0; i < n; i++)
1263 if (TREE_CODE (stmt) == PHI_NODE)
1264 op = PHI_ARG_DEF (stmt, i);
1266 op = USE_OP (uses, i);
1268 record_invariant (data, op, false);
1272 /* Finds interesting uses of induction variables in the statement STMT. */
1275 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1279 use_optype uses = NULL;
1282 find_invariants_stmt (data, stmt);
1284 if (TREE_CODE (stmt) == COND_EXPR)
1286 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1290 if (TREE_CODE (stmt) == MODIFY_EXPR)
1292 lhs = TREE_OPERAND (stmt, 0);
1293 rhs = TREE_OPERAND (stmt, 1);
1295 if (TREE_CODE (lhs) == SSA_NAME)
1297 /* If the statement defines an induction variable, the uses are not
1298 interesting by themselves. */
1300 iv = get_iv (data, lhs);
1302 if (iv && !zero_p (iv->step))
1306 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1309 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1313 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1314 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r')
1315 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1321 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r')
1323 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1324 find_interesting_uses_op (data, rhs);
1329 if (TREE_CODE (stmt) == PHI_NODE
1330 && bb_for_stmt (stmt) == data->current_loop->header)
1332 lhs = PHI_RESULT (stmt);
1333 iv = get_iv (data, lhs);
1335 if (iv && !zero_p (iv->step))
1339 if (TREE_CODE (stmt) == PHI_NODE)
1340 n = PHI_NUM_ARGS (stmt);
1343 uses = STMT_USE_OPS (stmt);
1344 n = NUM_USES (uses);
1347 for (i = 0; i < n; i++)
1349 if (TREE_CODE (stmt) == PHI_NODE)
1350 op = PHI_ARG_DEF (stmt, i);
1352 op = USE_OP (uses, i);
1354 if (TREE_CODE (op) != SSA_NAME)
1357 iv = get_iv (data, op);
1361 find_interesting_uses_op (data, op);
1365 /* Finds interesting uses of induction variables outside of loops
1366 on loop exit edge EXIT. */
1369 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1373 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1375 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1376 find_interesting_uses_outer (data, def);
1380 /* Finds uses of the induction variables that are interesting. */
1383 find_interesting_uses (struct ivopts_data *data)
1386 block_stmt_iterator bsi;
1388 basic_block *body = get_loop_body (data->current_loop);
1390 struct version_info *info;
1393 if (dump_file && (dump_flags & TDF_DETAILS))
1394 fprintf (dump_file, "Uses:\n\n");
1396 for (i = 0; i < data->current_loop->num_nodes; i++)
1400 for (e = bb->succ; e; e = e->succ_next)
1401 if (e->dest != EXIT_BLOCK_PTR
1402 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1403 find_interesting_uses_outside (data, e);
1405 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1406 find_interesting_uses_stmt (data, phi);
1407 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1408 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1411 if (dump_file && (dump_flags & TDF_DETAILS))
1413 fprintf (dump_file, "\n");
1415 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1417 info = ver_info (data, i);
1420 fprintf (dump_file, " ");
1421 print_generic_expr (dump_file, info->name, TDF_SLIM);
1422 fprintf (dump_file, " is invariant (%d)%s\n",
1423 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1427 fprintf (dump_file, "\n");
1433 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1434 position to POS. If USE is not NULL, the candidate is set as related to
1435 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1436 replacement of the final value of the iv by a direct computation. */
1438 static struct iv_cand *
1439 add_candidate_1 (struct ivopts_data *data,
1440 tree base, tree step, bool important, enum iv_position pos,
1441 struct iv_use *use, tree incremented_at)
1444 struct iv_cand *cand = NULL;
1449 type = TREE_TYPE (base);
1450 if (!TYPE_UNSIGNED (type))
1452 type = unsigned_type_for (type);
1453 base = fold_convert (type, base);
1455 step = fold_convert (type, step);
1459 for (i = 0; i < n_iv_cands (data); i++)
1461 cand = iv_cand (data, i);
1463 if (cand->pos != pos)
1466 if (cand->incremented_at != incremented_at)
1480 if (!operand_equal_p (base, cand->iv->base, 0))
1483 if (zero_p (cand->iv->step))
1490 if (step && operand_equal_p (step, cand->iv->step, 0))
1495 if (i == n_iv_cands (data))
1497 cand = xcalloc (1, sizeof (struct iv_cand));
1503 cand->iv = alloc_iv (base, step);
1506 if (pos != IP_ORIGINAL && cand->iv)
1508 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1509 cand->var_after = cand->var_before;
1511 cand->important = important;
1512 cand->incremented_at = incremented_at;
1513 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1515 if (dump_file && (dump_flags & TDF_DETAILS))
1516 dump_cand (dump_file, cand);
1519 if (important && !cand->important)
1521 cand->important = true;
1522 if (dump_file && (dump_flags & TDF_DETAILS))
1523 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1528 bitmap_set_bit (use->related_cands, i);
1529 if (dump_file && (dump_flags & TDF_DETAILS))
1530 fprintf (dump_file, "Candidate %d is related to use %d\n",
1537 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1538 position to POS. If USE is not NULL, the candidate is set as related to
1539 it. The candidate computation is scheduled on all available positions. */
1542 add_candidate (struct ivopts_data *data,
1543 tree base, tree step, bool important, struct iv_use *use)
1545 if (ip_normal_pos (data->current_loop))
1546 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1547 if (ip_end_pos (data->current_loop))
1548 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1551 /* Adds standard iv candidates. */
1554 add_standard_iv_candidates (struct ivopts_data *data)
1556 /* Add 0 + 1 * iteration candidate. */
1557 add_candidate (data,
1558 build_int_cst (unsigned_intSI_type_node, 0),
1559 build_int_cst (unsigned_intSI_type_node, 1),
1562 /* The same for a long type if it is still fast enought. */
1563 if (BITS_PER_WORD > 32)
1564 add_candidate (data,
1565 build_int_cst (unsigned_intDI_type_node, 0),
1566 build_int_cst (unsigned_intDI_type_node, 1),
1571 /* Adds candidates bases on the old induction variable IV. */
1574 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1577 struct iv_cand *cand;
1579 add_candidate (data, iv->base, iv->step, true, NULL);
1581 /* The same, but with initial value zero. */
1582 add_candidate (data,
1583 build_int_cst (TREE_TYPE (iv->base), 0),
1584 iv->step, true, NULL);
1586 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1587 if (TREE_CODE (phi) == PHI_NODE)
1589 /* Additionally record the possibility of leaving the original iv
1591 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1592 cand = add_candidate_1 (data,
1593 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1594 SSA_NAME_DEF_STMT (def));
1595 cand->var_before = iv->ssa_name;
1596 cand->var_after = def;
1600 /* Adds candidates based on the old induction variables. */
1603 add_old_ivs_candidates (struct ivopts_data *data)
1608 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1610 iv = ver_info (data, i)->iv;
1611 if (iv && iv->biv_p && !zero_p (iv->step))
1612 add_old_iv_candidates (data, iv);
1616 /* Adds candidates based on the value of the induction variable IV and USE. */
1619 add_iv_value_candidates (struct ivopts_data *data,
1620 struct iv *iv, struct iv_use *use)
1622 add_candidate (data, iv->base, iv->step, false, use);
1624 /* The same, but with initial value zero. */
1625 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1626 iv->step, false, use);
1629 /* Adds candidates based on the address IV and USE. */
1632 add_address_candidates (struct ivopts_data *data,
1633 struct iv *iv, struct iv_use *use)
1635 tree base, abase, tmp, *act;
1637 /* First, the trivial choices. */
1638 add_iv_value_candidates (data, iv, use);
1640 /* Second, try removing the COMPONENT_REFs. */
1641 if (TREE_CODE (iv->base) == ADDR_EXPR)
1643 base = TREE_OPERAND (iv->base, 0);
1644 while (TREE_CODE (base) == COMPONENT_REF
1645 || (TREE_CODE (base) == ARRAY_REF
1646 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1647 base = TREE_OPERAND (base, 0);
1649 if (base != TREE_OPERAND (iv->base, 0))
1651 if (TREE_CODE (base) == INDIRECT_REF)
1652 base = TREE_OPERAND (base, 0);
1654 base = build_addr (base);
1655 add_candidate (data, base, iv->step, false, use);
1659 /* Third, try removing the constant offset. */
1661 while (TREE_CODE (abase) == PLUS_EXPR
1662 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1663 abase = TREE_OPERAND (abase, 0);
1664 /* We found the offset, so make the copy of the non-shared part and
1666 if (TREE_CODE (abase) == PLUS_EXPR)
1671 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1673 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1674 NULL_TREE, TREE_OPERAND (tmp, 1));
1675 act = &TREE_OPERAND (*act, 0);
1677 *act = TREE_OPERAND (tmp, 0);
1679 add_candidate (data, base, iv->step, false, use);
1683 /* Possibly adds pseudocandidate for replacing the final value of USE by
1684 a direct computation. */
1687 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1689 struct tree_niter_desc *niter;
1690 struct loop *loop = data->current_loop;
1692 /* We must know where we exit the loop and how many times does it roll. */
1693 if (!single_dom_exit (loop))
1696 niter = &loop_data (loop)->niter;
1698 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1699 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1702 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1705 /* Adds candidates based on the uses. */
1708 add_derived_ivs_candidates (struct ivopts_data *data)
1712 for (i = 0; i < n_iv_uses (data); i++)
1714 struct iv_use *use = iv_use (data, i);
1721 case USE_NONLINEAR_EXPR:
1723 /* Just add the ivs based on the value of the iv used here. */
1724 add_iv_value_candidates (data, use->iv, use);
1728 add_iv_value_candidates (data, use->iv, use);
1730 /* Additionally, add the pseudocandidate for the possibility to
1731 replace the final value by a direct computation. */
1732 add_iv_outer_candidates (data, use);
1736 add_address_candidates (data, use->iv, use);
1745 /* Finds the candidates for the induction variables. */
1748 find_iv_candidates (struct ivopts_data *data)
1750 /* Add commonly used ivs. */
1751 add_standard_iv_candidates (data);
1753 /* Add old induction variables. */
1754 add_old_ivs_candidates (data);
1756 /* Add induction variables derived from uses. */
1757 add_derived_ivs_candidates (data);
1760 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1761 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1762 we allocate a simple list to every use. */
1765 alloc_use_cost_map (struct ivopts_data *data)
1767 unsigned i, n_imp = 0, size, j;
1769 if (!data->consider_all_candidates)
1771 for (i = 0; i < n_iv_cands (data); i++)
1773 struct iv_cand *cand = iv_cand (data, i);
1774 if (cand->important)
1779 for (i = 0; i < n_iv_uses (data); i++)
1781 struct iv_use *use = iv_use (data, i);
1783 if (data->consider_all_candidates)
1785 size = n_iv_cands (data);
1786 use->n_map_members = size;
1791 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, size++);
1792 use->n_map_members = 0;
1795 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1799 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1800 on invariants DEPENDS_ON. */
1803 set_use_iv_cost (struct ivopts_data *data,
1804 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1810 BITMAP_XFREE (depends_on);
1814 if (data->consider_all_candidates)
1816 use->cost_map[cand->id].cand = cand;
1817 use->cost_map[cand->id].cost = cost;
1818 use->cost_map[cand->id].depends_on = depends_on;
1825 use->cost_map[use->n_map_members].cand = cand;
1826 use->cost_map[use->n_map_members].cost = cost;
1827 use->cost_map[use->n_map_members].depends_on = depends_on;
1828 use->n_map_members++;
1831 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1835 get_use_iv_cost (struct ivopts_data *data,
1836 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1843 if (data->consider_all_candidates)
1847 for (i = 0; i < use->n_map_members; i++)
1848 if (use->cost_map[i].cand == cand)
1851 if (i == use->n_map_members)
1856 *depends_on = use->cost_map[i].depends_on;
1857 return use->cost_map[i].cost;
1860 /* Returns estimate on cost of computing SEQ. */
1868 for (; seq; seq = NEXT_INSN (seq))
1870 set = single_set (seq);
1872 cost += rtx_cost (set, SET);
1880 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
1882 produce_memory_decl_rtl (tree obj, int *regno)
1887 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
1889 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
1890 x = gen_rtx_SYMBOL_REF (Pmode, name);
1893 x = gen_raw_REG (Pmode, (*regno)++);
1895 return gen_rtx_MEM (DECL_MODE (obj), x);
1898 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
1899 walk_tree. DATA contains the actual fake register number. */
1902 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
1904 tree obj = NULL_TREE;
1908 switch (TREE_CODE (*expr_p))
1911 for (expr_p = &TREE_OPERAND (*expr_p, 0);
1912 (handled_component_p (*expr_p)
1913 || TREE_CODE (*expr_p) == REALPART_EXPR
1914 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
1915 expr_p = &TREE_OPERAND (*expr_p, 0));
1918 x = produce_memory_decl_rtl (obj, regno);
1923 obj = SSA_NAME_VAR (*expr_p);
1924 if (!DECL_RTL_SET_P (obj))
1925 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1934 if (DECL_RTL_SET_P (obj))
1937 if (DECL_MODE (obj) == BLKmode)
1938 x = produce_memory_decl_rtl (obj, regno);
1940 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1950 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
1951 SET_DECL_RTL (obj, x);
1957 /* Determines cost of the computation of EXPR. */
1960 computation_cost (tree expr)
1963 tree type = TREE_TYPE (expr);
1967 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
1969 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
1973 cost = seq_cost (seq);
1974 if (GET_CODE (rslt) == MEM)
1975 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
1980 /* Returns variable containing the value of candidate CAND at statement AT. */
1983 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
1985 if (stmt_after_increment (loop, cand, stmt))
1986 return cand->var_after;
1988 return cand->var_before;
1991 /* Determines the expression by that USE is expressed from induction variable
1992 CAND at statement AT in LOOP. */
1995 get_computation_at (struct loop *loop,
1996 struct iv_use *use, struct iv_cand *cand, tree at)
1998 tree ubase = unsave_expr_now (use->iv->base);
1999 tree ustep = unsave_expr_now (use->iv->step);
2000 tree cbase = unsave_expr_now (cand->iv->base);
2001 tree cstep = unsave_expr_now (cand->iv->step);
2002 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2006 unsigned HOST_WIDE_INT ustepi, cstepi;
2007 HOST_WIDE_INT ratioi;
2009 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2011 /* We do not have a precision to express the values of use. */
2015 expr = var_at_stmt (loop, cand, at);
2017 if (TREE_TYPE (expr) != ctype)
2019 /* This may happen with the original ivs. */
2020 expr = fold_convert (ctype, expr);
2023 if (TYPE_UNSIGNED (utype))
2027 uutype = unsigned_type_for (utype);
2028 ubase = fold_convert (uutype, ubase);
2029 ustep = fold_convert (uutype, ustep);
2032 if (uutype != ctype)
2034 expr = fold_convert (uutype, expr);
2035 cbase = fold_convert (uutype, cbase);
2036 cstep = fold_convert (uutype, cstep);
2039 if (!cst_and_fits_in_hwi (cstep)
2040 || !cst_and_fits_in_hwi (ustep))
2043 ustepi = int_cst_value (ustep);
2044 cstepi = int_cst_value (cstep);
2046 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2048 /* TODO maybe consider case when ustep divides cstep and the ratio is
2049 a power of 2 (so that the division is fast to execute)? We would
2050 need to be much more careful with overflows etc. then. */
2054 /* We may need to shift the value if we are after the increment. */
2055 if (stmt_after_increment (loop, cand, at))
2056 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2058 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2059 or |ratio| == 1, it is better to handle this like
2061 ubase - ratio * cbase + ratio * var. */
2065 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2066 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2068 else if (ratioi == -1)
2070 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2071 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2073 else if (TREE_CODE (cbase) == INTEGER_CST)
2075 ratio = build_int_cst_type (uutype, ratioi);
2076 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2077 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2078 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2079 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2083 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2084 ratio = build_int_cst_type (uutype, ratioi);
2085 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2086 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2089 return fold_convert (utype, expr);
2092 /* Determines the expression by that USE is expressed from induction variable
2096 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2098 return get_computation_at (loop, use, cand, use->stmt);
2101 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2104 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2107 enum tree_code code;
2111 if (cst_and_fits_in_hwi (*expr))
2113 *offset += int_cst_value (*expr);
2114 *expr = integer_zero_node;
2118 code = TREE_CODE (*expr);
2120 if (code != PLUS_EXPR && code != MINUS_EXPR)
2123 op0 = TREE_OPERAND (*expr, 0);
2124 op1 = TREE_OPERAND (*expr, 1);
2126 if (cst_and_fits_in_hwi (op1))
2128 if (code == PLUS_EXPR)
2129 *offset += int_cst_value (op1);
2131 *offset -= int_cst_value (op1);
2137 if (code != PLUS_EXPR)
2140 if (!cst_and_fits_in_hwi (op0))
2143 *offset += int_cst_value (op0);
2148 /* Returns cost of addition in MODE. */
2151 add_cost (enum machine_mode mode)
2153 static unsigned costs[NUM_MACHINE_MODES];
2161 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2162 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2163 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2168 cost = seq_cost (seq);
2174 if (dump_file && (dump_flags & TDF_DETAILS))
2175 fprintf (dump_file, "Addition in %s costs %d\n",
2176 GET_MODE_NAME (mode), cost);
2180 /* Entry in a hashtable of already known costs for multiplication. */
2183 HOST_WIDE_INT cst; /* The constant to multiply by. */
2184 enum machine_mode mode; /* In mode. */
2185 unsigned cost; /* The cost. */
2188 /* Counts hash value for the ENTRY. */
2191 mbc_entry_hash (const void *entry)
2193 const struct mbc_entry *e = entry;
2195 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2198 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2201 mbc_entry_eq (const void *entry1, const void *entry2)
2203 const struct mbc_entry *e1 = entry1;
2204 const struct mbc_entry *e2 = entry2;
2206 return (e1->mode == e2->mode
2207 && e1->cst == e2->cst);
2210 /* Returns cost of multiplication by constant CST in MODE. */
2213 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2215 static htab_t costs;
2216 struct mbc_entry **cached, act;
2221 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2225 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2227 return (*cached)->cost;
2229 *cached = xmalloc (sizeof (struct mbc_entry));
2230 (*cached)->mode = mode;
2231 (*cached)->cst = cst;
2234 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2239 cost = seq_cost (seq);
2241 if (dump_file && (dump_flags & TDF_DETAILS))
2242 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2243 (int) cst, GET_MODE_NAME (mode), cost);
2245 (*cached)->cost = cost;
2250 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2251 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2252 variable is omitted. The created memory accesses MODE.
2254 TODO -- there must be some better way. This all is quite crude. */
2257 get_address_cost (bool symbol_present, bool var_present,
2258 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2260 #define MAX_RATIO 128
2261 static sbitmap valid_mult;
2262 static HOST_WIDE_INT rat, off;
2263 static HOST_WIDE_INT min_offset, max_offset;
2264 static unsigned costs[2][2][2][2];
2265 unsigned cost, acost;
2266 rtx seq, addr, base;
2267 bool offset_p, ratio_p;
2269 HOST_WIDE_INT s_offset;
2270 unsigned HOST_WIDE_INT mask;
2277 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2279 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2280 for (i = 1; i <= 1 << 20; i <<= 1)
2282 XEXP (addr, 1) = GEN_INT (i);
2283 if (!memory_address_p (Pmode, addr))
2286 max_offset = i >> 1;
2289 for (i = 1; i <= 1 << 20; i <<= 1)
2291 XEXP (addr, 1) = GEN_INT (-i);
2292 if (!memory_address_p (Pmode, addr))
2295 min_offset = -(i >> 1);
2297 if (dump_file && (dump_flags & TDF_DETAILS))
2299 fprintf (dump_file, "get_address_cost:\n");
2300 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2301 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2304 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2305 sbitmap_zero (valid_mult);
2307 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2308 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2310 XEXP (addr, 1) = GEN_INT (i);
2311 if (memory_address_p (Pmode, addr))
2313 SET_BIT (valid_mult, i + MAX_RATIO);
2318 if (dump_file && (dump_flags & TDF_DETAILS))
2320 fprintf (dump_file, " allowed multipliers:");
2321 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2322 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2323 fprintf (dump_file, " %d", (int) i);
2324 fprintf (dump_file, "\n");
2325 fprintf (dump_file, "\n");
2329 bits = GET_MODE_BITSIZE (Pmode);
2330 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2332 if ((offset >> (bits - 1) & 1))
2337 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2338 ratio_p = (ratio != 1
2339 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2340 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2342 if (ratio != 1 && !ratio_p)
2343 cost += multiply_by_cost (ratio, Pmode);
2345 if (s_offset && !offset_p && !symbol_present)
2347 cost += add_cost (Pmode);
2351 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2356 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2357 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2359 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2363 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2365 base = gen_rtx_fmt_e (CONST, Pmode,
2366 gen_rtx_fmt_ee (PLUS, Pmode,
2370 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2373 else if (var_present)
2377 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2380 base = GEN_INT (off);
2385 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2388 addr = memory_address (Pmode, addr);
2392 acost = seq_cost (seq);
2393 acost += address_cost (addr, Pmode);
2397 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2400 return cost + acost;
2403 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2404 the bitmap to that we should store it. */
2406 static struct ivopts_data *fd_ivopts_data;
2408 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2410 bitmap *depends_on = data;
2411 struct version_info *info;
2413 if (TREE_CODE (*expr_p) != SSA_NAME)
2415 info = name_info (fd_ivopts_data, *expr_p);
2417 if (!info->inv_id || info->has_nonlin_use)
2421 *depends_on = BITMAP_XMALLOC ();
2422 bitmap_set_bit (*depends_on, info->inv_id);
2427 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2428 invariants the computation depends on. */
2431 force_var_cost (struct ivopts_data *data,
2432 tree expr, bitmap *depends_on)
2434 static bool costs_initialized = false;
2435 static unsigned integer_cost;
2436 static unsigned symbol_cost;
2437 static unsigned address_cost;
2439 if (!costs_initialized)
2441 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2442 rtx x = gen_rtx_MEM (DECL_MODE (var),
2443 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2445 tree type = build_pointer_type (integer_type_node);
2447 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2450 SET_DECL_RTL (var, x);
2451 TREE_STATIC (var) = 1;
2452 addr = build1 (ADDR_EXPR, type, var);
2453 symbol_cost = computation_cost (addr) + 1;
2456 = computation_cost (build2 (PLUS_EXPR, type,
2458 build_int_cst_type (type, 2000))) + 1;
2459 if (dump_file && (dump_flags & TDF_DETAILS))
2461 fprintf (dump_file, "force_var_cost:\n");
2462 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2463 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2464 fprintf (dump_file, " address %d\n", (int) address_cost);
2465 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2466 fprintf (dump_file, "\n");
2469 costs_initialized = true;
2474 fd_ivopts_data = data;
2475 walk_tree (&expr, find_depends, depends_on, NULL);
2478 if (SSA_VAR_P (expr))
2481 if (TREE_INVARIANT (expr))
2483 if (TREE_CODE (expr) == INTEGER_CST)
2484 return integer_cost;
2486 if (TREE_CODE (expr) == ADDR_EXPR)
2488 tree obj = TREE_OPERAND (expr, 0);
2490 if (TREE_CODE (obj) == VAR_DECL
2491 || TREE_CODE (obj) == PARM_DECL
2492 || TREE_CODE (obj) == RESULT_DECL)
2496 return address_cost;
2499 /* Just an arbitrary value, FIXME. */
2500 return target_spill_cost;
2503 /* Peels a single layer of ADDR. If DIFF is not NULL, do it only if the
2504 offset is constant and add the offset to DIFF. */
2507 peel_address (tree addr, unsigned HOST_WIDE_INT *diff)
2510 HOST_WIDE_INT bit_offset;
2512 switch (TREE_CODE (addr))
2526 off = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (addr, 1));
2527 bit_offset = TREE_INT_CST_LOW (off);
2529 gcc_assert ((bit_offset % BITS_PER_UNIT) == 0);
2532 *diff += bit_offset / BITS_PER_UNIT;
2534 return TREE_OPERAND (addr, 0);
2537 off = TREE_OPERAND (addr, 1);
2541 if (!cst_and_fits_in_hwi (off))
2544 size = TYPE_SIZE_UNIT (TREE_TYPE (addr));
2545 if (!cst_and_fits_in_hwi (size))
2548 *diff += TREE_INT_CST_LOW (off) * TREE_INT_CST_LOW (size);
2551 return TREE_OPERAND (addr, 0);
2558 /* Checks whether E1 and E2 have constant difference, and if they do,
2559 store it in *DIFF. */
2562 ptr_difference_const (tree e1, tree e2, unsigned HOST_WIDE_INT *diff)
2566 unsigned HOST_WIDE_INT delta1 = 0, delta2 = 0;
2568 /* Find depths of E1 and E2. */
2569 for (x = e1; x; x = peel_address (x, NULL))
2571 for (x = e2; x; x = peel_address (x, NULL))
2574 for (; e1 && d1 > d2; e1 = peel_address (e1, &delta1))
2576 for (; e2 && d2 > d1; e2 = peel_address (e2, &delta2))
2579 while (e1 && e2 && !operand_equal_p (e1, e2, 0))
2581 e1 = peel_address (e1, &delta1);
2582 e2 = peel_address (e2, &delta1);
2588 *diff = delta1 - delta2;
2592 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2593 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2594 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2595 invariants the computation depends on. */
2598 split_address_cost (struct ivopts_data *data,
2599 tree addr, bool *symbol_present, bool *var_present,
2600 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2605 && TREE_CODE (core) != VAR_DECL)
2606 core = peel_address (core, offset);
2610 *symbol_present = false;
2611 *var_present = true;
2612 fd_ivopts_data = data;
2613 walk_tree (&addr, find_depends, depends_on, NULL);
2614 return target_spill_cost;
2617 if (TREE_STATIC (core)
2618 || DECL_EXTERNAL (core))
2620 *symbol_present = true;
2621 *var_present = false;
2625 *symbol_present = false;
2626 *var_present = true;
2630 /* Estimates cost of expressing difference of addresses E1 - E2 as
2631 var + symbol + offset. The value of offset is added to OFFSET,
2632 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2633 part is missing. DEPENDS_ON is a set of the invariants the computation
2637 ptr_difference_cost (struct ivopts_data *data,
2638 tree e1, tree e2, bool *symbol_present, bool *var_present,
2639 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2641 unsigned HOST_WIDE_INT diff = 0;
2644 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2646 if (TREE_CODE (e2) == ADDR_EXPR
2647 && ptr_difference_const (TREE_OPERAND (e1, 0),
2648 TREE_OPERAND (e2, 0), &diff))
2651 *symbol_present = false;
2652 *var_present = false;
2656 if (e2 == integer_zero_node)
2657 return split_address_cost (data, TREE_OPERAND (e1, 0),
2658 symbol_present, var_present, offset, depends_on);
2660 *symbol_present = false;
2661 *var_present = true;
2663 cost = force_var_cost (data, e1, depends_on);
2664 cost += force_var_cost (data, e2, depends_on);
2665 cost += add_cost (Pmode);
2670 /* Estimates cost of expressing difference E1 - E2 as
2671 var + symbol + offset. The value of offset is added to OFFSET,
2672 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2673 part is missing. DEPENDS_ON is a set of the invariants the computation
2677 difference_cost (struct ivopts_data *data,
2678 tree e1, tree e2, bool *symbol_present, bool *var_present,
2679 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2682 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2684 strip_offset (&e1, offset);
2686 strip_offset (&e2, offset);
2689 if (TREE_CODE (e1) == ADDR_EXPR)
2690 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2692 *symbol_present = false;
2694 if (operand_equal_p (e1, e2, 0))
2696 *var_present = false;
2699 *var_present = true;
2701 return force_var_cost (data, e1, depends_on);
2705 cost = force_var_cost (data, e2, depends_on);
2706 cost += multiply_by_cost (-1, mode);
2711 cost = force_var_cost (data, e1, depends_on);
2712 cost += force_var_cost (data, e2, depends_on);
2713 cost += add_cost (mode);
2718 /* Determines the cost of the computation by that USE is expressed
2719 from induction variable CAND. If ADDRESS_P is true, we just need
2720 to create an address from it, otherwise we want to get it into
2721 register. A set of invariants we depend on is stored in
2722 DEPENDS_ON. AT is the statement at that the value is computed. */
2725 get_computation_cost_at (struct ivopts_data *data,
2726 struct iv_use *use, struct iv_cand *cand,
2727 bool address_p, bitmap *depends_on, tree at)
2729 tree ubase = use->iv->base, ustep = use->iv->step;
2731 tree utype = TREE_TYPE (ubase), ctype;
2732 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2733 HOST_WIDE_INT ratio, aratio;
2734 bool var_present, symbol_present;
2735 unsigned cost = 0, n_sums;
2739 /* Only consider real candidates. */
2743 cbase = cand->iv->base;
2744 cstep = cand->iv->step;
2745 ctype = TREE_TYPE (cbase);
2747 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2749 /* We do not have a precision to express the values of use. */
2753 if (!cst_and_fits_in_hwi (ustep)
2754 || !cst_and_fits_in_hwi (cstep))
2757 if (TREE_CODE (ubase) == INTEGER_CST
2758 && !cst_and_fits_in_hwi (ubase))
2761 if (TREE_CODE (cbase) == INTEGER_CST
2762 && !cst_and_fits_in_hwi (cbase))
2765 ustepi = int_cst_value (ustep);
2766 cstepi = int_cst_value (cstep);
2768 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2770 /* TODO -- add direct handling of this case. */
2774 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2777 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2778 or ratio == 1, it is better to handle this like
2780 ubase - ratio * cbase + ratio * var
2782 (also holds in the case ratio == -1, TODO. */
2784 if (TREE_CODE (cbase) == INTEGER_CST)
2786 offset = - ratio * int_cst_value (cbase);
2787 cost += difference_cost (data,
2788 ubase, integer_zero_node,
2789 &symbol_present, &var_present, &offset,
2792 else if (ratio == 1)
2794 cost += difference_cost (data,
2796 &symbol_present, &var_present, &offset,
2801 cost += force_var_cost (data, cbase, depends_on);
2802 cost += add_cost (TYPE_MODE (ctype));
2803 cost += difference_cost (data,
2804 ubase, integer_zero_node,
2805 &symbol_present, &var_present, &offset,
2809 /* If we are after the increment, the value of the candidate is higher by
2811 if (stmt_after_increment (data->current_loop, cand, at))
2812 offset -= ratio * cstepi;
2814 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2815 (symbol/var/const parts may be omitted). If we are looking for an address,
2816 find the cost of addressing this. */
2818 return get_address_cost (symbol_present, var_present, offset, ratio);
2820 /* Otherwise estimate the costs for computing the expression. */
2821 aratio = ratio > 0 ? ratio : -ratio;
2822 if (!symbol_present && !var_present && !offset)
2825 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2831 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2835 /* Symbol + offset should be compile-time computable. */
2836 && (symbol_present || offset))
2839 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2843 /* Just get the expression, expand it and measure the cost. */
2844 tree comp = get_computation_at (data->current_loop, use, cand, at);
2850 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2852 return computation_cost (comp);
2856 /* Determines the cost of the computation by that USE is expressed
2857 from induction variable CAND. If ADDRESS_P is true, we just need
2858 to create an address from it, otherwise we want to get it into
2859 register. A set of invariants we depend on is stored in
2863 get_computation_cost (struct ivopts_data *data,
2864 struct iv_use *use, struct iv_cand *cand,
2865 bool address_p, bitmap *depends_on)
2867 return get_computation_cost_at (data,
2868 use, cand, address_p, depends_on, use->stmt);
2871 /* Determines cost of basing replacement of USE on CAND in a generic
2875 determine_use_iv_cost_generic (struct ivopts_data *data,
2876 struct iv_use *use, struct iv_cand *cand)
2879 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2881 set_use_iv_cost (data, use, cand, cost, depends_on);
2884 /* Determines cost of basing replacement of USE on CAND in an address. */
2887 determine_use_iv_cost_address (struct ivopts_data *data,
2888 struct iv_use *use, struct iv_cand *cand)
2891 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2893 set_use_iv_cost (data, use, cand, cost, depends_on);
2896 /* Computes value of induction variable IV in iteration NITER. */
2899 iv_value (struct iv *iv, tree niter)
2902 tree type = TREE_TYPE (iv->base);
2904 niter = fold_convert (type, niter);
2905 val = fold (build2 (MULT_EXPR, type, iv->step, unsave_expr_now (niter)));
2907 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2910 /* Computes value of candidate CAND at position AT in iteration NITER. */
2913 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2915 tree val = iv_value (cand->iv, niter);
2916 tree type = TREE_TYPE (cand->iv->base);
2918 if (stmt_after_increment (loop, cand, at))
2919 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
2924 /* Check whether it is possible to express the condition in USE by comparison
2925 of candidate CAND. If so, store the comparison code to COMPARE and the
2926 value compared with to BOUND. */
2929 may_eliminate_iv (struct loop *loop,
2930 struct iv_use *use, struct iv_cand *cand,
2931 enum tree_code *compare, tree *bound)
2934 struct tree_niter_desc *niter, new_niter;
2935 tree wider_type, type, base;
2937 /* For now just very primitive -- we work just for the single exit condition,
2938 and are quite conservative about the possible overflows. TODO -- both of
2939 these can be improved. */
2940 exit = single_dom_exit (loop);
2943 if (use->stmt != last_stmt (exit->src))
2946 niter = &loop_data (loop)->niter;
2948 || !integer_nonzerop (niter->assumptions)
2949 || !integer_zerop (niter->may_be_zero))
2952 if (exit->flags & EDGE_TRUE_VALUE)
2957 *bound = cand_value_at (loop, cand, use->stmt, niter->niter);
2959 /* Let us check there is not some problem with overflows, by checking that
2960 the number of iterations is unchanged. */
2961 base = cand->iv->base;
2962 type = TREE_TYPE (base);
2963 if (stmt_after_increment (loop, cand, use->stmt))
2964 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
2966 new_niter.niter = NULL_TREE;
2967 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
2968 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
2970 if (!new_niter.niter
2971 || !integer_nonzerop (new_niter.assumptions)
2972 || !integer_zerop (new_niter.may_be_zero))
2975 wider_type = TREE_TYPE (new_niter.niter);
2976 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter->niter)))
2977 wider_type = TREE_TYPE (niter->niter);
2978 if (!operand_equal_p (fold_convert (wider_type, niter->niter),
2979 fold_convert (wider_type, new_niter.niter), 0))
2985 /* Determines cost of basing replacement of USE on CAND in a condition. */
2988 determine_use_iv_cost_condition (struct ivopts_data *data,
2989 struct iv_use *use, struct iv_cand *cand)
2992 enum tree_code compare;
2994 /* Only consider real candidates. */
2997 set_use_iv_cost (data, use, cand, INFTY, NULL);
3001 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3003 bitmap depends_on = NULL;
3004 unsigned cost = force_var_cost (data, bound, &depends_on);
3006 set_use_iv_cost (data, use, cand, cost, depends_on);
3010 /* The induction variable elimination failed; just express the original
3011 giv. If it is compared with an invariant, note that we cannot get
3013 if (TREE_CODE (*use->op_p) == SSA_NAME)
3014 record_invariant (data, *use->op_p, true);
3017 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3018 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3021 determine_use_iv_cost_generic (data, use, cand);
3024 /* Checks whether it is possible to replace the final value of USE by
3025 a direct computation. If so, the formula is stored to *VALUE. */
3028 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3031 struct tree_niter_desc *niter;
3033 exit = single_dom_exit (loop);
3037 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3038 bb_for_stmt (use->stmt)));
3040 niter = &loop_data (loop)->niter;
3042 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3043 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3046 *value = iv_value (use->iv, niter->niter);
3051 /* Determines cost of replacing final value of USE using CAND. */
3054 determine_use_iv_cost_outer (struct ivopts_data *data,
3055 struct iv_use *use, struct iv_cand *cand)
3061 struct loop *loop = data->current_loop;
3065 if (!may_replace_final_value (loop, use, &value))
3067 set_use_iv_cost (data, use, cand, INFTY, NULL);
3072 cost = force_var_cost (data, value, &depends_on);
3074 cost /= AVG_LOOP_NITER (loop);
3076 set_use_iv_cost (data, use, cand, cost, depends_on);
3080 exit = single_dom_exit (loop);
3083 /* If there is just a single exit, we may use value of the candidate
3084 after we take it to determine the value of use. */
3085 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3086 last_stmt (exit->src));
3088 cost /= AVG_LOOP_NITER (loop);
3092 /* Otherwise we just need to compute the iv. */
3093 cost = get_computation_cost (data, use, cand, false, &depends_on);
3096 set_use_iv_cost (data, use, cand, cost, depends_on);
3099 /* Determines cost of basing replacement of USE on CAND. */
3102 determine_use_iv_cost (struct ivopts_data *data,
3103 struct iv_use *use, struct iv_cand *cand)
3107 case USE_NONLINEAR_EXPR:
3108 determine_use_iv_cost_generic (data, use, cand);
3112 determine_use_iv_cost_outer (data, use, cand);
3116 determine_use_iv_cost_address (data, use, cand);
3120 determine_use_iv_cost_condition (data, use, cand);
3128 /* Determines costs of basing the use of the iv on an iv candidate. */
3131 determine_use_iv_costs (struct ivopts_data *data)
3135 struct iv_cand *cand;
3137 data->consider_all_candidates = (n_iv_cands (data)
3138 <= CONSIDER_ALL_CANDIDATES_BOUND);
3140 alloc_use_cost_map (data);
3142 if (!data->consider_all_candidates)
3144 /* Add the important candidate entries. */
3145 for (j = 0; j < n_iv_cands (data); j++)
3147 cand = iv_cand (data, j);
3148 if (!cand->important)
3150 for (i = 0; i < n_iv_uses (data); i++)
3152 use = iv_use (data, i);
3153 determine_use_iv_cost (data, use, cand);
3158 for (i = 0; i < n_iv_uses (data); i++)
3160 use = iv_use (data, i);
3162 if (data->consider_all_candidates)
3164 for (j = 0; j < n_iv_cands (data); j++)
3166 cand = iv_cand (data, j);
3167 determine_use_iv_cost (data, use, cand);
3172 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j,
3174 cand = iv_cand (data, j);
3175 if (!cand->important)
3176 determine_use_iv_cost (data, use, cand);
3181 if (dump_file && (dump_flags & TDF_DETAILS))
3183 fprintf (dump_file, "Use-candidate costs:\n");
3185 for (i = 0; i < n_iv_uses (data); i++)
3187 use = iv_use (data, i);
3189 fprintf (dump_file, "Use %d:\n", i);
3190 fprintf (dump_file, " cand\tcost\tdepends on\n");
3191 for (j = 0; j < use->n_map_members; j++)
3193 if (!use->cost_map[j].cand
3194 || use->cost_map[j].cost == INFTY)
3197 fprintf (dump_file, " %d\t%d\t",
3198 use->cost_map[j].cand->id,
3199 use->cost_map[j].cost);
3200 if (use->cost_map[j].depends_on)
3201 bitmap_print (dump_file,
3202 use->cost_map[j].depends_on, "","");
3203 fprintf (dump_file, "\n");
3206 fprintf (dump_file, "\n");
3208 fprintf (dump_file, "\n");
3212 /* Determines cost of the candidate CAND. */
3215 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3217 unsigned cost_base, cost_step;
3227 /* There are two costs associated with the candidate -- its increment
3228 and its initialization. The second is almost negligible for any loop
3229 that rolls enough, so we take it just very little into account. */
3231 base = cand->iv->base;
3232 cost_base = force_var_cost (data, base, NULL);
3233 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3235 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3237 /* Prefer the original iv unless we may gain something by replacing it. */
3238 if (cand->pos == IP_ORIGINAL)
3241 /* Prefer not to insert statements into latch unless there are some
3242 already (so that we do not create unnecessary jumps). */
3243 if (cand->pos == IP_END)
3245 bb = ip_end_pos (data->current_loop);
3246 last = last_stmt (bb);
3249 || TREE_CODE (last) == LABEL_EXPR)
3254 /* Determines costs of computation of the candidates. */
3257 determine_iv_costs (struct ivopts_data *data)
3261 if (dump_file && (dump_flags & TDF_DETAILS))
3263 fprintf (dump_file, "Candidate costs:\n");
3264 fprintf (dump_file, " cand\tcost\n");
3267 for (i = 0; i < n_iv_cands (data); i++)
3269 struct iv_cand *cand = iv_cand (data, i);
3271 determine_iv_cost (data, cand);
3273 if (dump_file && (dump_flags & TDF_DETAILS))
3274 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3277 if (dump_file && (dump_flags & TDF_DETAILS))
3278 fprintf (dump_file, "\n");
3281 /* Calculates cost for having SIZE induction variables. */
3284 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3286 return global_cost_for_size (size,
3287 loop_data (data->current_loop)->regs_used,
3291 /* For each size of the induction variable set determine the penalty. */
3294 determine_set_costs (struct ivopts_data *data)
3298 struct loop *loop = data->current_loop;
3300 /* We use the following model (definitely improvable, especially the
3301 cost function -- TODO):
3303 We estimate the number of registers available (using MD data), name it A.
3305 We estimate the number of registers used by the loop, name it U. This
3306 number is obtained as the number of loop phi nodes (not counting virtual
3307 registers and bivs) + the number of variables from outside of the loop.
3309 We set a reserve R (free regs that are used for temporary computations,
3310 etc.). For now the reserve is a constant 3.
3312 Let I be the number of induction variables.
3314 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3315 make a lot of ivs without a reason).
3316 -- if A - R < U + I <= A, the cost is I * PRES_COST
3317 -- if U + I > A, the cost is I * PRES_COST and
3318 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3320 if (dump_file && (dump_flags & TDF_DETAILS))
3322 fprintf (dump_file, "Global costs:\n");
3323 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3324 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3325 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3326 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3330 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3332 op = PHI_RESULT (phi);
3334 if (!is_gimple_reg (op))
3337 if (get_iv (data, op))
3343 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
3345 struct version_info *info = ver_info (data, j);
3347 if (info->inv_id && info->has_nonlin_use)
3351 loop_data (loop)->regs_used = n;
3352 if (dump_file && (dump_flags & TDF_DETAILS))
3353 fprintf (dump_file, " regs_used %d\n", n);
3355 if (dump_file && (dump_flags & TDF_DETAILS))
3357 fprintf (dump_file, " cost for size:\n");
3358 fprintf (dump_file, " ivs\tcost\n");
3359 for (j = 0; j <= 2 * target_avail_regs; j++)
3360 fprintf (dump_file, " %d\t%d\n", j,
3361 ivopts_global_cost_for_size (data, j));
3362 fprintf (dump_file, "\n");
3366 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3367 taken from the set SOL and they may depend on invariants in the set INV.
3368 The really used candidate and invariants are noted to USED_IVS and
3372 find_best_candidate (struct ivopts_data *data,
3373 struct iv_use *use, bitmap sol, bitmap inv,
3374 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3377 unsigned best_cost = INFTY, cost;
3378 struct iv_cand *cnd = NULL, *acnd;
3379 bitmap depends_on = NULL, asol;
3381 if (data->consider_all_candidates)
3385 asol = BITMAP_XMALLOC ();
3386 bitmap_a_and_b (asol, sol, use->related_cands);
3389 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c,
3391 acnd = iv_cand (data, c);
3392 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3396 if (cost > best_cost)
3398 if (cost == best_cost)
3400 /* Prefer the cheaper iv. */
3401 if (acnd->cost >= cnd->cost)
3407 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d,
3410 bitmap_a_or_b (used_inv, used_inv, depends_on);
3418 if (cnd && used_ivs)
3419 bitmap_set_bit (used_ivs, cnd->id);
3424 if (!data->consider_all_candidates)
3425 BITMAP_XFREE (asol);
3430 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3431 induction variable candidates and invariants from the sets. Only
3432 uses 0 .. MAX_USE - 1 are taken into account. */
3435 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3439 unsigned cost = 0, size = 0, acost;
3441 struct iv_cand *cand;
3442 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3444 for (i = 0; i < max_use; i++)
3446 use = iv_use (data, i);
3447 acost = find_best_candidate (data, use, sol, inv,
3448 used_ivs, used_inv, NULL);
3451 BITMAP_XFREE (used_ivs);
3452 BITMAP_XFREE (used_inv);
3458 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i,
3460 cand = iv_cand (data, i);
3462 /* Do not count the pseudocandidates. */
3468 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, size++);
3469 cost += ivopts_global_cost_for_size (data, size);
3471 bitmap_copy (sol, used_ivs);
3472 bitmap_copy (inv, used_inv);
3474 BITMAP_XFREE (used_ivs);
3475 BITMAP_XFREE (used_inv);
3480 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3481 induction variable candidates and invariants from the sets. */
3484 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3486 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3489 /* Tries to extend the sets IVS and INV in the best possible way in order
3490 to express the USE. */
3493 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3496 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3497 bitmap best_ivs = BITMAP_XMALLOC ();
3498 bitmap best_inv = BITMAP_XMALLOC ();
3499 bitmap act_ivs = BITMAP_XMALLOC ();
3500 bitmap act_inv = BITMAP_XMALLOC ();
3502 struct cost_pair *cp;
3504 bitmap_copy (best_ivs, ivs);
3505 bitmap_copy (best_inv, inv);
3507 for (i = 0; i < use->n_map_members; i++)
3509 cp = use->cost_map + i;
3510 if (cp->cost == INFTY)
3513 bitmap_copy (act_ivs, ivs);
3514 bitmap_set_bit (act_ivs, cp->cand->id);
3516 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3518 bitmap_copy (act_inv, inv);
3519 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3521 if (act_cost < best_cost)
3523 best_cost = act_cost;
3524 bitmap_copy (best_ivs, act_ivs);
3525 bitmap_copy (best_inv, act_inv);
3529 bitmap_copy (ivs, best_ivs);
3530 bitmap_copy (inv, best_inv);
3532 BITMAP_XFREE (best_ivs);
3533 BITMAP_XFREE (best_inv);
3534 BITMAP_XFREE (act_ivs);
3535 BITMAP_XFREE (act_inv);
3537 return (best_cost != INFTY);
3540 /* Finds an initial set of IVS and invariants INV. We do this by simply
3541 choosing the best candidate for each use. */
3544 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3548 for (i = 0; i < n_iv_uses (data); i++)
3549 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3552 return set_cost (data, ivs, inv);
3555 /* Tries to improve set of induction variables IVS and invariants INV to get
3556 it better than COST. */
3559 try_improve_iv_set (struct ivopts_data *data,
3560 bitmap ivs, bitmap inv, unsigned *cost)
3563 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3564 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3566 /* Try altering the set of induction variables by one. */
3567 for (i = 0; i < n_iv_cands (data); i++)
3569 bitmap_copy (new_ivs, ivs);
3570 bitmap_copy (new_inv, inv);
3572 if (bitmap_bit_p (ivs, i))
3573 bitmap_clear_bit (new_ivs, i);
3575 bitmap_set_bit (new_ivs, i);
3577 acost = set_cost (data, new_ivs, new_inv);
3583 best_new_ivs = BITMAP_XMALLOC ();
3584 best_new_inv = BITMAP_XMALLOC ();
3588 bitmap_copy (best_new_ivs, new_ivs);
3589 bitmap_copy (best_new_inv, new_inv);
3592 /* Ditto for invariants. */
3593 for (i = 1; i <= data->max_inv_id; i++)
3595 if (ver_info (data, i)->has_nonlin_use)
3598 bitmap_copy (new_ivs, ivs);
3599 bitmap_copy (new_inv, inv);
3601 if (bitmap_bit_p (inv, i))
3602 bitmap_clear_bit (new_inv, i);
3604 bitmap_set_bit (new_inv, i);
3606 acost = set_cost (data, new_ivs, new_inv);
3612 best_new_ivs = BITMAP_XMALLOC ();
3613 best_new_inv = BITMAP_XMALLOC ();
3617 bitmap_copy (best_new_ivs, new_ivs);
3618 bitmap_copy (best_new_inv, new_inv);
3621 BITMAP_XFREE (new_ivs);
3622 BITMAP_XFREE (new_inv);
3627 bitmap_copy (ivs, best_new_ivs);
3628 bitmap_copy (inv, best_new_inv);
3629 BITMAP_XFREE (best_new_ivs);
3630 BITMAP_XFREE (best_new_inv);
3634 /* Attempts to find the optimal set of induction variables. We do simple
3635 greedy heuristic -- we try to replace at most one candidate in the selected
3636 solution and remove the unused ivs while this improves the cost. */
3639 find_optimal_iv_set (struct ivopts_data *data)
3642 bitmap set = BITMAP_XMALLOC ();
3643 bitmap inv = BITMAP_XMALLOC ();
3646 /* Set the upper bound. */
3647 cost = get_initial_solution (data, set, inv);
3650 if (dump_file && (dump_flags & TDF_DETAILS))
3651 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3657 if (dump_file && (dump_flags & TDF_DETAILS))
3659 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3660 bitmap_print (dump_file, set, "", "");
3661 fprintf (dump_file, " invariants ");
3662 bitmap_print (dump_file, inv, "", "");
3663 fprintf (dump_file, "\n");
3666 while (try_improve_iv_set (data, set, inv, &cost))
3668 if (dump_file && (dump_flags & TDF_DETAILS))
3670 fprintf (dump_file, "Improved to (cost %d): ", cost);
3671 bitmap_print (dump_file, set, "", "");
3672 fprintf (dump_file, " invariants ");
3673 bitmap_print (dump_file, inv, "", "");
3674 fprintf (dump_file, "\n");
3678 if (dump_file && (dump_flags & TDF_DETAILS))
3679 fprintf (dump_file, "Final cost %d\n\n", cost);
3681 for (i = 0; i < n_iv_uses (data); i++)
3683 use = iv_use (data, i);
3684 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3692 /* Creates a new induction variable corresponding to CAND. */
3695 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3697 block_stmt_iterator incr_pos;
3707 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3711 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3716 /* Mark that the iv is preserved. */
3717 name_info (data, cand->var_before)->preserve_biv = true;
3718 name_info (data, cand->var_after)->preserve_biv = true;
3720 /* Rewrite the increment so that it uses var_before directly. */
3721 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3726 gimple_add_tmp_var (cand->var_before);
3727 add_referenced_tmp_var (cand->var_before);
3729 base = unshare_expr (cand->iv->base);
3731 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3732 &incr_pos, after, &cand->var_before, &cand->var_after);
3735 /* Creates new induction variables described in SET. */
3738 create_new_ivs (struct ivopts_data *data, bitmap set)
3741 struct iv_cand *cand;
3743 EXECUTE_IF_SET_IN_BITMAP (set, 0, i,
3745 cand = iv_cand (data, i);
3746 create_new_iv (data, cand);
3750 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3751 is true, remove also the ssa name defined by the statement. */
3754 remove_statement (tree stmt, bool including_defined_name)
3756 if (TREE_CODE (stmt) == PHI_NODE)
3758 if (!including_defined_name)
3760 /* Prevent the ssa name defined by the statement from being removed. */
3761 SET_PHI_RESULT (stmt, NULL);
3763 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3767 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3773 /* Rewrites USE (definition of iv used in a nonlinear expression)
3774 using candidate CAND. */
3777 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3778 struct iv_use *use, struct iv_cand *cand)
3780 tree comp = unshare_expr (get_computation (data->current_loop,
3782 tree op, stmts, tgt, ass;
3783 block_stmt_iterator bsi, pbsi;
3785 switch (TREE_CODE (use->stmt))
3788 tgt = PHI_RESULT (use->stmt);
3790 /* If we should keep the biv, do not replace it. */
3791 if (name_info (data, tgt)->preserve_biv)
3794 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3795 while (!bsi_end_p (pbsi)
3796 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3804 tgt = TREE_OPERAND (use->stmt, 0);
3805 bsi = stmt_for_bsi (use->stmt);
3812 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3814 if (TREE_CODE (use->stmt) == PHI_NODE)
3817 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3818 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3819 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3820 remove_statement (use->stmt, false);
3821 SSA_NAME_DEF_STMT (tgt) = ass;
3826 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3827 TREE_OPERAND (use->stmt, 1) = op;
3831 /* Replaces ssa name in index IDX by its basic variable. Callback for
3835 idx_remove_ssa_names (tree base ATTRIBUTE_UNUSED, tree *idx,
3836 void *data ATTRIBUTE_UNUSED)
3838 if (TREE_CODE (*idx) == SSA_NAME)
3839 *idx = SSA_NAME_VAR (*idx);
3843 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3846 unshare_and_remove_ssa_names (tree ref)
3848 ref = unshare_expr (ref);
3849 for_each_index (&ref, idx_remove_ssa_names, NULL);
3854 /* Rewrites base of memory access OP with expression WITH in statement
3855 pointed to by BSI. */
3858 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3860 tree var = get_base_address (*op), new_var, new_name, copy, name;
3863 if (!var || TREE_CODE (with) != SSA_NAME)
3866 if (TREE_CODE (var) == INDIRECT_REF)
3867 var = TREE_OPERAND (var, 0);
3868 if (TREE_CODE (var) == SSA_NAME)
3871 var = SSA_NAME_VAR (var);
3873 else if (DECL_P (var))
3878 if (var_ann (var)->type_mem_tag)
3879 var = var_ann (var)->type_mem_tag;
3881 /* We need to add a memory tag for the variable. But we do not want
3882 to add it to the temporary used for the computations, since this leads
3883 to problems in redundancy elimination when there are common parts
3884 in two computations referring to the different arrays. So we copy
3885 the variable to a new temporary. */
3886 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
3888 new_name = duplicate_ssa_name (name, copy);
3891 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
3892 add_referenced_tmp_var (new_var);
3893 var_ann (new_var)->type_mem_tag = var;
3894 new_name = make_ssa_name (new_var, copy);
3896 TREE_OPERAND (copy, 0) = new_name;
3897 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
3903 if (TREE_CODE (*op) == INDIRECT_REF)
3904 orig = REF_ORIGINAL (*op);
3906 orig = unshare_and_remove_ssa_names (*op);
3908 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
3909 /* Record the original reference, for purposes of alias analysis. */
3910 REF_ORIGINAL (*op) = orig;
3913 /* Rewrites USE (address that is an iv) using candidate CAND. */
3916 rewrite_use_address (struct ivopts_data *data,
3917 struct iv_use *use, struct iv_cand *cand)
3919 tree comp = unshare_expr (get_computation (data->current_loop,
3921 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3923 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
3926 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3928 rewrite_address_base (&bsi, use->op_p, op);
3931 /* Rewrites USE (the condition such that one of the arguments is an iv) using
3935 rewrite_use_compare (struct ivopts_data *data,
3936 struct iv_use *use, struct iv_cand *cand)
3939 tree *op_p, cond, op, stmts, bound;
3940 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3941 enum tree_code compare;
3943 if (may_eliminate_iv (data->current_loop,
3944 use, cand, &compare, &bound))
3946 op = force_gimple_operand (unshare_expr (bound), &stmts,
3950 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3952 *use->op_p = build2 (compare, boolean_type_node,
3953 var_at_stmt (data->current_loop,
3954 cand, use->stmt), op);
3955 modify_stmt (use->stmt);
3959 /* The induction variable elimination failed; just express the original
3961 comp = unshare_expr (get_computation (data->current_loop, use, cand));
3964 op_p = &TREE_OPERAND (cond, 0);
3965 if (TREE_CODE (*op_p) != SSA_NAME
3966 || zero_p (get_iv (data, *op_p)->step))
3967 op_p = &TREE_OPERAND (cond, 1);
3969 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
3971 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3976 /* Ensure that operand *OP_P may be used at the end of EXIT without
3977 violating loop closed ssa form. */
3980 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
3983 struct loop *def_loop;
3986 use = USE_FROM_PTR (op_p);
3987 if (TREE_CODE (use) != SSA_NAME)
3990 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
3994 def_loop = def_bb->loop_father;
3995 if (flow_bb_inside_loop_p (def_loop, exit->dest))
3998 /* Try finding a phi node that copies the value out of the loop. */
3999 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4000 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4005 /* Create such a phi node. */
4006 tree new_name = duplicate_ssa_name (use, NULL);
4008 phi = create_phi_node (new_name, exit->dest);
4009 SSA_NAME_DEF_STMT (new_name) = phi;
4010 add_phi_arg (&phi, use, exit);
4013 SET_USE (op_p, PHI_RESULT (phi));
4016 /* Ensure that operands of STMT may be used at the end of EXIT without
4017 violating loop closed ssa form. */
4020 protect_loop_closed_ssa_form (edge exit, tree stmt)
4024 v_may_def_optype v_may_defs;
4027 get_stmt_operands (stmt);
4029 uses = STMT_USE_OPS (stmt);
4030 for (i = 0; i < NUM_USES (uses); i++)
4031 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4033 vuses = STMT_VUSE_OPS (stmt);
4034 for (i = 0; i < NUM_VUSES (vuses); i++)
4035 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4037 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4038 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4039 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4042 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4043 so that they are emitted on the correct place, and so that the loop closed
4044 ssa form is preserved. */
4047 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4049 tree_stmt_iterator tsi;
4050 block_stmt_iterator bsi;
4051 tree phi, stmt, def, next;
4053 if (exit->dest->pred->pred_next)
4054 split_loop_exit_edge (exit);
4056 if (TREE_CODE (stmts) == STATEMENT_LIST)
4058 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4059 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4062 protect_loop_closed_ssa_form (exit, stmts);
4064 /* Ensure there is label in exit->dest, so that we can
4066 tree_block_label (exit->dest);
4067 bsi = bsi_after_labels (exit->dest);
4068 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4073 for (phi = phi_nodes (exit->dest); phi; phi = next)
4075 next = TREE_CHAIN (phi);
4077 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4079 def = PHI_RESULT (phi);
4080 remove_statement (phi, false);
4081 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4083 SSA_NAME_DEF_STMT (def) = stmt;
4084 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4089 /* Rewrites the final value of USE (that is only needed outside of the loop)
4090 using candidate CAND. */
4093 rewrite_use_outer (struct ivopts_data *data,
4094 struct iv_use *use, struct iv_cand *cand)
4097 tree value, op, stmts, tgt;
4100 switch (TREE_CODE (use->stmt))
4103 tgt = PHI_RESULT (use->stmt);
4106 tgt = TREE_OPERAND (use->stmt, 0);
4112 exit = single_dom_exit (data->current_loop);
4118 bool ok = may_replace_final_value (data->current_loop, use, &value);
4122 value = get_computation_at (data->current_loop,
4123 use, cand, last_stmt (exit->src));
4125 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4127 /* If we will preserve the iv anyway and we would need to perform
4128 some computation to replace the final value, do nothing. */
4129 if (stmts && name_info (data, tgt)->preserve_biv)
4132 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4134 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4136 if (USE_FROM_PTR (use_p) == tgt)
4137 SET_USE (use_p, op);
4141 compute_phi_arg_on_exit (exit, stmts, op);
4143 /* Enable removal of the statement. We cannot remove it directly,
4144 since we may still need the aliasing information attached to the
4145 ssa name defined by it. */
4146 name_info (data, tgt)->iv->have_use_for = false;
4150 /* If the variable is going to be preserved anyway, there is nothing to
4152 if (name_info (data, tgt)->preserve_biv)
4155 /* Otherwise we just need to compute the iv. */
4156 rewrite_use_nonlinear_expr (data, use, cand);
4159 /* Rewrites USE using candidate CAND. */
4162 rewrite_use (struct ivopts_data *data,
4163 struct iv_use *use, struct iv_cand *cand)
4167 case USE_NONLINEAR_EXPR:
4168 rewrite_use_nonlinear_expr (data, use, cand);
4172 rewrite_use_outer (data, use, cand);
4176 rewrite_use_address (data, use, cand);
4180 rewrite_use_compare (data, use, cand);
4186 modify_stmt (use->stmt);
4189 /* Rewrite the uses using the selected induction variables. */
4192 rewrite_uses (struct ivopts_data *data)
4195 struct iv_cand *cand;
4198 for (i = 0; i < n_iv_uses (data); i++)
4200 use = iv_use (data, i);
4201 cand = use->selected;
4204 rewrite_use (data, use, cand);
4208 /* Removes the ivs that are not used after rewriting. */
4211 remove_unused_ivs (struct ivopts_data *data)
4215 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
4217 struct version_info *info;
4219 info = ver_info (data, j);
4221 && !zero_p (info->iv->step)
4223 && !info->iv->have_use_for
4224 && !info->preserve_biv)
4225 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4229 /* Frees data allocated by the optimization of a single loop. */
4232 free_loop_data (struct ivopts_data *data)
4236 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
4238 struct version_info *info;
4240 info = ver_info (data, i);
4244 info->has_nonlin_use = false;
4245 info->preserve_biv = false;
4248 bitmap_clear (data->relevant);
4250 for (i = 0; i < n_iv_uses (data); i++)
4252 struct iv_use *use = iv_use (data, i);
4255 BITMAP_XFREE (use->related_cands);
4256 for (j = 0; j < use->n_map_members; j++)
4257 if (use->cost_map[j].depends_on)
4258 BITMAP_XFREE (use->cost_map[j].depends_on);
4259 free (use->cost_map);
4262 VARRAY_POP_ALL (data->iv_uses);
4264 for (i = 0; i < n_iv_cands (data); i++)
4266 struct iv_cand *cand = iv_cand (data, i);
4272 VARRAY_POP_ALL (data->iv_candidates);
4274 if (data->version_info_size < num_ssa_names)
4276 data->version_info_size = 2 * num_ssa_names;
4277 free (data->version_info);
4278 data->version_info = xcalloc (data->version_info_size,
4279 sizeof (struct version_info));
4282 data->max_inv_id = 0;
4284 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4286 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4288 SET_DECL_RTL (obj, NULL_RTX);
4290 VARRAY_POP_ALL (decl_rtl_to_reset);
4293 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4297 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4301 for (i = 1; i < loops->num; i++)
4302 if (loops->parray[i])
4304 free (loops->parray[i]->aux);
4305 loops->parray[i]->aux = NULL;
4308 free_loop_data (data);
4309 free (data->version_info);
4310 BITMAP_XFREE (data->relevant);
4312 VARRAY_FREE (decl_rtl_to_reset);
4313 VARRAY_FREE (data->iv_uses);
4314 VARRAY_FREE (data->iv_candidates);
4317 /* Optimizes the LOOP. Returns true if anything changed. */
4320 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4322 bool changed = false;
4326 data->current_loop = loop;
4328 if (dump_file && (dump_flags & TDF_DETAILS))
4330 fprintf (dump_file, "Processing loop %d\n", loop->num);
4332 exit = single_dom_exit (loop);
4335 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4336 exit->src->index, exit->dest->index);
4337 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4338 fprintf (dump_file, "\n");
4341 fprintf (dump_file, "\n");
4344 /* For each ssa name determines whether it behaves as an induction variable
4346 if (!find_induction_variables (data))
4349 /* Finds interesting uses (item 1). */
4350 find_interesting_uses (data);
4351 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4354 /* Finds candidates for the induction variables (item 2). */
4355 find_iv_candidates (data);
4357 /* Calculates the costs (item 3, part 1). */
4358 determine_use_iv_costs (data);
4359 determine_iv_costs (data);
4360 determine_set_costs (data);
4362 /* Find the optimal set of induction variables (item 3, part 2). */
4363 iv_set = find_optimal_iv_set (data);
4368 /* Create the new induction variables (item 4, part 1). */
4369 create_new_ivs (data, iv_set);
4371 /* Rewrite the uses (item 4, part 2). */
4372 rewrite_uses (data);
4374 /* Remove the ivs that are unused after rewriting. */
4375 remove_unused_ivs (data);
4377 loop_commit_inserts ();
4379 BITMAP_XFREE (iv_set);
4381 /* We have changed the structure of induction variables; it might happen
4382 that definitions in the scev database refer to some of them that were
4387 free_loop_data (data);
4392 /* Main entry point. Optimizes induction variables in LOOPS. */
4395 tree_ssa_iv_optimize (struct loops *loops)
4398 struct ivopts_data data;
4400 tree_ssa_iv_optimize_init (loops, &data);
4402 /* Optimize the loops starting with the innermost ones. */
4403 loop = loops->tree_root;
4407 #ifdef ENABLE_CHECKING
4408 verify_loop_closed_ssa ();
4412 /* Scan the loops, inner ones first. */
4413 while (loop != loops->tree_root)
4415 if (dump_file && (dump_flags & TDF_DETAILS))
4416 flow_loop_dump (loop, dump_file, NULL, 1);
4417 if (tree_ssa_iv_optimize_loop (&data, loop))
4419 #ifdef ENABLE_CHECKING
4420 verify_loop_closed_ssa ();
4435 tree_ssa_iv_optimize_finalize (loops, &data);