1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id; /* Id of an invariant. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
126 /* Information attached to loop. */
129 struct tree_niter_desc niter;
130 /* Number of iterations. */
132 unsigned regs_used; /* Number of registers used. */
138 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
139 USE_OUTER, /* The induction variable is used outside the loop. */
140 USE_ADDRESS, /* Use in an address. */
141 USE_COMPARE /* Use is a compare. */
144 /* The candidate - cost pair. */
147 struct iv_cand *cand; /* The candidate. */
148 unsigned cost; /* The cost. */
149 bitmap depends_on; /* The list of invariants that have to be
156 unsigned id; /* The id of the use. */
157 enum use_type type; /* Type of the use. */
158 struct iv *iv; /* The induction variable it is based on. */
159 tree stmt; /* Statement in that it occurs. */
160 tree *op_p; /* The place where it occurs. */
161 bitmap related_cands; /* The set of "related" iv candidates. */
163 unsigned n_map_members; /* Number of candidates in the cost_map list. */
164 struct cost_pair *cost_map;
165 /* The costs wrto the iv candidates. */
167 struct iv_cand *selected;
168 /* The selected candidate. */
171 /* The position where the iv is computed. */
174 IP_NORMAL, /* At the end, just before the exit condition. */
175 IP_END, /* At the end of the latch block. */
176 IP_ORIGINAL /* The original biv. */
179 /* The induction variable candidate. */
182 unsigned id; /* The number of the candidate. */
183 bool important; /* Whether this is an "important" candidate, i.e. such
184 that it should be considered by all uses. */
185 enum iv_position pos; /* Where it is computed. */
186 tree incremented_at; /* For original biv, the statement where it is
188 tree var_before; /* The variable used for it before increment. */
189 tree var_after; /* The variable used for it after increment. */
190 struct iv *iv; /* The value of the candidate. NULL for
191 "pseudocandidate" used to indicate the possibility
192 to replace the final value of an iv by direct
193 computation of the value. */
194 unsigned cost; /* Cost of the candidate. */
197 /* The data used by the induction variable optimizations. */
201 /* The currently optimized loop. */
202 struct loop *current_loop;
204 /* The size of version_info array allocated. */
205 unsigned version_info_size;
207 /* The array of information for the ssa names. */
208 struct version_info *version_info;
210 /* The bitmap of indices in version_info whose value was changed. */
213 /* The maximum invariant id. */
216 /* The uses of induction variables. */
219 /* The candidates. */
220 varray_type iv_candidates;
222 /* A bitmap of important candidates. */
223 bitmap important_candidates;
225 /* Whether to consider just related and important candidates when replacing a
227 bool consider_all_candidates;
230 /* Bound on number of candidates below that all candidates are considered. */
232 #define CONSIDER_ALL_CANDIDATES_BOUND \
233 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
235 /* If there are more iv occurrences, we just give up (it is quite unlikely that
236 optimizing such a loop would help, and it would take ages). */
238 #define MAX_CONSIDERED_USES \
239 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
241 /* The list of trees for that the decl_rtl field must be reset is stored
244 static varray_type decl_rtl_to_reset;
246 /* Number of uses recorded in DATA. */
248 static inline unsigned
249 n_iv_uses (struct ivopts_data *data)
251 return VARRAY_ACTIVE_SIZE (data->iv_uses);
254 /* Ith use recorded in DATA. */
256 static inline struct iv_use *
257 iv_use (struct ivopts_data *data, unsigned i)
259 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
262 /* Number of candidates recorded in DATA. */
264 static inline unsigned
265 n_iv_cands (struct ivopts_data *data)
267 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
270 /* Ith candidate recorded in DATA. */
272 static inline struct iv_cand *
273 iv_cand (struct ivopts_data *data, unsigned i)
275 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
278 /* The data for LOOP. */
280 static inline struct loop_data *
281 loop_data (struct loop *loop)
286 /* The single loop exit if it dominates the latch, NULL otherwise. */
289 single_dom_exit (struct loop *loop)
291 edge exit = loop->single_exit;
296 if (!just_once_each_iteration_p (loop, exit->src))
302 /* Dumps information about the induction variable IV to FILE. */
304 extern void dump_iv (FILE *, struct iv *);
306 dump_iv (FILE *file, struct iv *iv)
310 fprintf (file, "ssa name ");
311 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
312 fprintf (file, "\n");
315 fprintf (file, " type ");
316 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
317 fprintf (file, "\n");
321 fprintf (file, " base ");
322 print_generic_expr (file, iv->base, TDF_SLIM);
323 fprintf (file, "\n");
325 fprintf (file, " step ");
326 print_generic_expr (file, iv->step, TDF_SLIM);
327 fprintf (file, "\n");
331 fprintf (file, " invariant ");
332 print_generic_expr (file, iv->base, TDF_SLIM);
333 fprintf (file, "\n");
338 fprintf (file, " base object ");
339 print_generic_expr (file, iv->base_object, TDF_SLIM);
340 fprintf (file, "\n");
344 fprintf (file, " is a biv\n");
347 /* Dumps information about the USE to FILE. */
349 extern void dump_use (FILE *, struct iv_use *);
351 dump_use (FILE *file, struct iv_use *use)
353 fprintf (file, "use %d\n", use->id);
357 case USE_NONLINEAR_EXPR:
358 fprintf (file, " generic\n");
362 fprintf (file, " outside\n");
366 fprintf (file, " address\n");
370 fprintf (file, " compare\n");
377 fprintf (file, " in statement ");
378 print_generic_expr (file, use->stmt, TDF_SLIM);
379 fprintf (file, "\n");
381 fprintf (file, " at position ");
383 print_generic_expr (file, *use->op_p, TDF_SLIM);
384 fprintf (file, "\n");
386 dump_iv (file, use->iv);
388 fprintf (file, " related candidates ");
389 dump_bitmap (file, use->related_cands);
392 /* Dumps information about the uses to FILE. */
394 extern void dump_uses (FILE *, struct ivopts_data *);
396 dump_uses (FILE *file, struct ivopts_data *data)
401 for (i = 0; i < n_iv_uses (data); i++)
403 use = iv_use (data, i);
405 dump_use (file, use);
406 fprintf (file, "\n");
410 /* Dumps information about induction variable candidate CAND to FILE. */
412 extern void dump_cand (FILE *, struct iv_cand *);
414 dump_cand (FILE *file, struct iv_cand *cand)
416 struct iv *iv = cand->iv;
418 fprintf (file, "candidate %d%s\n",
419 cand->id, cand->important ? " (important)" : "");
423 fprintf (file, " final value replacement\n");
430 fprintf (file, " incremented before exit test\n");
434 fprintf (file, " incremented at end\n");
438 fprintf (file, " original biv\n");
445 /* Returns the info for ssa version VER. */
447 static inline struct version_info *
448 ver_info (struct ivopts_data *data, unsigned ver)
450 return data->version_info + ver;
453 /* Returns the info for ssa name NAME. */
455 static inline struct version_info *
456 name_info (struct ivopts_data *data, tree name)
458 return ver_info (data, SSA_NAME_VERSION (name));
461 /* Checks whether there exists number X such that X * B = A, counting modulo
465 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
468 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
469 unsigned HOST_WIDE_INT inv, ex, val;
475 /* First divide the whole equation by 2 as long as possible. */
476 while (!(a & 1) && !(b & 1))
486 /* If b is still even, a is odd and there is no such x. */
490 /* Find the inverse of b. We compute it as
491 b^(2^(bits - 1) - 1) (mod 2^bits). */
494 for (i = 0; i < bits - 1; i++)
496 inv = (inv * ex) & mask;
497 ex = (ex * ex) & mask;
500 val = (a * inv) & mask;
502 gcc_assert (((val * b) & mask) == a);
504 if ((val >> (bits - 1)) & 1)
512 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
516 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
518 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
522 if (sbb == loop->latch)
528 return stmt == last_stmt (bb);
531 /* Returns true if STMT if after the place where the original induction
532 variable CAND is incremented. */
535 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
537 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
538 basic_block stmt_bb = bb_for_stmt (stmt);
539 block_stmt_iterator bsi;
541 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
544 if (stmt_bb != cand_bb)
547 /* Scan the block from the end, since the original ivs are usually
548 incremented at the end of the loop body. */
549 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
551 if (bsi_stmt (bsi) == cand->incremented_at)
553 if (bsi_stmt (bsi) == stmt)
558 /* Returns true if STMT if after the place where the induction variable
559 CAND is incremented in LOOP. */
562 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
570 return stmt_after_ip_normal_pos (loop, stmt);
573 return stmt_after_ip_original_pos (cand, stmt);
580 /* Initializes data structures used by the iv optimization pass, stored
581 in DATA. LOOPS is the loop tree. */
584 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
588 data->version_info_size = 2 * num_ssa_names;
589 data->version_info = xcalloc (data->version_info_size,
590 sizeof (struct version_info));
591 data->relevant = BITMAP_XMALLOC ();
592 data->max_inv_id = 0;
594 for (i = 1; i < loops->num; i++)
595 if (loops->parray[i])
596 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
598 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
599 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
600 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
603 /* Returns a memory object to that EXPR points. In case we are able to
604 determine that it does not point to any such object, NULL is returned. */
607 determine_base_object (tree expr)
609 enum tree_code code = TREE_CODE (expr);
610 tree base, obj, op0, op1;
612 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
621 obj = TREE_OPERAND (expr, 0);
622 base = get_base_address (obj);
625 return fold_convert (ptr_type_node, expr);
627 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
631 op0 = determine_base_object (TREE_OPERAND (expr, 0));
632 op1 = determine_base_object (TREE_OPERAND (expr, 1));
638 return (code == PLUS_EXPR
640 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
642 return fold (build (code, ptr_type_node, op0, op1));
645 return fold_convert (ptr_type_node, expr);
649 /* Allocates an induction variable with given initial value BASE and step STEP
653 alloc_iv (tree base, tree step)
655 struct iv *iv = xcalloc (1, sizeof (struct iv));
657 if (step && integer_zerop (step))
661 iv->base_object = determine_base_object (base);
664 iv->have_use_for = false;
666 iv->ssa_name = NULL_TREE;
671 /* Sets STEP and BASE for induction variable IV. */
674 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
676 struct version_info *info = name_info (data, iv);
678 gcc_assert (!info->iv);
680 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
681 info->iv = alloc_iv (base, step);
682 info->iv->ssa_name = iv;
685 /* Finds induction variable declaration for VAR. */
688 get_iv (struct ivopts_data *data, tree var)
692 if (!name_info (data, var)->iv)
694 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
697 || !flow_bb_inside_loop_p (data->current_loop, bb))
698 set_iv (data, var, var, NULL_TREE);
701 return name_info (data, var)->iv;
704 /* Determines the step of a biv defined in PHI. */
707 determine_biv_step (tree phi)
709 struct loop *loop = bb_for_stmt (phi)->loop_father;
710 tree name = PHI_RESULT (phi), base, step;
711 tree type = TREE_TYPE (name);
713 if (!is_gimple_reg (name))
716 if (!simple_iv (loop, phi, name, &base, &step))
720 return build_int_cst (type, 0);
725 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
728 abnormal_ssa_name_p (tree exp)
733 if (TREE_CODE (exp) != SSA_NAME)
736 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
739 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
740 abnormal phi node. Callback for for_each_index. */
743 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
744 void *data ATTRIBUTE_UNUSED)
746 if (TREE_CODE (base) == ARRAY_REF)
748 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
750 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
754 return !abnormal_ssa_name_p (*index);
757 /* Returns true if EXPR contains a ssa name that occurs in an
758 abnormal phi node. */
761 contains_abnormal_ssa_name_p (tree expr)
763 enum tree_code code = TREE_CODE (expr);
764 enum tree_code_class class = TREE_CODE_CLASS (code);
766 if (code == SSA_NAME)
767 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
769 if (code == INTEGER_CST
770 || is_gimple_min_invariant (expr))
773 if (code == ADDR_EXPR)
774 return !for_each_index (&TREE_OPERAND (expr, 1),
775 idx_contains_abnormal_ssa_name_p,
782 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
787 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
799 /* Finds basic ivs. */
802 find_bivs (struct ivopts_data *data)
804 tree phi, step, type, base;
806 struct loop *loop = data->current_loop;
808 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
810 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
813 step = determine_biv_step (phi);
817 if (cst_and_fits_in_hwi (step)
818 && int_cst_value (step) == 0)
821 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
822 if (contains_abnormal_ssa_name_p (base))
825 type = TREE_TYPE (PHI_RESULT (phi));
826 base = fold_convert (type, base);
827 step = fold_convert (type, step);
829 /* FIXME: We do not handle induction variables whose step does
830 not satisfy cst_and_fits_in_hwi. */
831 if (!cst_and_fits_in_hwi (step))
834 set_iv (data, PHI_RESULT (phi), base, step);
841 /* Marks basic ivs. */
844 mark_bivs (struct ivopts_data *data)
847 struct iv *iv, *incr_iv;
848 struct loop *loop = data->current_loop;
851 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
853 iv = get_iv (data, PHI_RESULT (phi));
857 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
858 incr_iv = get_iv (data, var);
862 /* If the increment is in the subloop, ignore it. */
863 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
864 if (incr_bb->loop_father != data->current_loop
865 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
869 incr_iv->biv_p = true;
873 /* Checks whether STMT defines a linear induction variable and stores its
874 parameters to BASE and STEP. */
877 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
878 tree *base, tree *step)
881 struct loop *loop = data->current_loop;
886 if (TREE_CODE (stmt) != MODIFY_EXPR)
889 lhs = TREE_OPERAND (stmt, 0);
890 if (TREE_CODE (lhs) != SSA_NAME)
893 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
896 /* FIXME: We do not handle induction variables whose step does
897 not satisfy cst_and_fits_in_hwi. */
899 && !cst_and_fits_in_hwi (*step))
902 if (contains_abnormal_ssa_name_p (*base))
908 /* Finds general ivs in statement STMT. */
911 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
915 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
918 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
921 /* Finds general ivs in basic block BB. */
924 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
926 block_stmt_iterator bsi;
928 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
929 find_givs_in_stmt (data, bsi_stmt (bsi));
932 /* Finds general ivs. */
935 find_givs (struct ivopts_data *data)
937 struct loop *loop = data->current_loop;
938 basic_block *body = get_loop_body_in_dom_order (loop);
941 for (i = 0; i < loop->num_nodes; i++)
942 find_givs_in_bb (data, body[i]);
946 /* Determine the number of iterations of the current loop. */
949 determine_number_of_iterations (struct ivopts_data *data)
951 struct loop *loop = data->current_loop;
952 edge exit = single_dom_exit (loop);
957 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
960 /* For each ssa name defined in LOOP determines whether it is an induction
961 variable and if so, its initial value and step. */
964 find_induction_variables (struct ivopts_data *data)
967 struct loop *loop = data->current_loop;
970 if (!find_bivs (data))
975 determine_number_of_iterations (data);
977 if (dump_file && (dump_flags & TDF_DETAILS))
979 if (loop_data (loop)->niter.niter)
981 fprintf (dump_file, " number of iterations ");
982 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
984 fprintf (dump_file, "\n");
986 fprintf (dump_file, " may be zero if ");
987 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
989 fprintf (dump_file, "\n");
991 fprintf (dump_file, " bogus unless ");
992 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
994 fprintf (dump_file, "\n");
995 fprintf (dump_file, "\n");
998 fprintf (dump_file, "Induction variables:\n\n");
1000 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1002 if (ver_info (data, i)->iv)
1003 dump_iv (dump_file, ver_info (data, i)->iv);
1010 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1012 static struct iv_use *
1013 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1014 tree stmt, enum use_type use_type)
1016 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1018 use->id = n_iv_uses (data);
1019 use->type = use_type;
1023 use->related_cands = BITMAP_XMALLOC ();
1025 /* To avoid showing ssa name in the dumps, if it was not reset by the
1027 iv->ssa_name = NULL_TREE;
1029 if (dump_file && (dump_flags & TDF_DETAILS))
1030 dump_use (dump_file, use);
1032 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1037 /* Checks whether OP is a loop-level invariant and if so, records it.
1038 NONLINEAR_USE is true if the invariant is used in a way we do not
1039 handle specially. */
1042 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1045 struct version_info *info;
1047 if (TREE_CODE (op) != SSA_NAME
1048 || !is_gimple_reg (op))
1051 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1053 && flow_bb_inside_loop_p (data->current_loop, bb))
1056 info = name_info (data, op);
1058 info->has_nonlin_use |= nonlinear_use;
1060 info->inv_id = ++data->max_inv_id;
1061 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1064 /* Checks whether the use OP is interesting and if so, records it
1067 static struct iv_use *
1068 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1076 if (TREE_CODE (op) != SSA_NAME)
1079 iv = get_iv (data, op);
1083 if (iv->have_use_for)
1085 use = iv_use (data, iv->use_id);
1087 gcc_assert (use->type == USE_NONLINEAR_EXPR
1088 || use->type == USE_OUTER);
1090 if (type == USE_NONLINEAR_EXPR)
1091 use->type = USE_NONLINEAR_EXPR;
1095 if (zero_p (iv->step))
1097 record_invariant (data, op, true);
1100 iv->have_use_for = true;
1102 civ = xmalloc (sizeof (struct iv));
1105 stmt = SSA_NAME_DEF_STMT (op);
1106 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1107 || TREE_CODE (stmt) == MODIFY_EXPR);
1109 use = record_use (data, NULL, civ, stmt, type);
1110 iv->use_id = use->id;
1115 /* Checks whether the use OP is interesting and if so, records it. */
1117 static struct iv_use *
1118 find_interesting_uses_op (struct ivopts_data *data, tree op)
1120 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1123 /* Records a definition of induction variable OP that is used outside of the
1126 static struct iv_use *
1127 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1129 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1132 /* Checks whether the condition *COND_P in STMT is interesting
1133 and if so, records it. */
1136 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1140 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1142 tree zero = integer_zero_node;
1144 const_iv.step = NULL_TREE;
1146 if (integer_zerop (*cond_p)
1147 || integer_nonzerop (*cond_p))
1150 if (TREE_CODE (*cond_p) == SSA_NAME)
1157 op0_p = &TREE_OPERAND (*cond_p, 0);
1158 op1_p = &TREE_OPERAND (*cond_p, 1);
1161 if (TREE_CODE (*op0_p) == SSA_NAME)
1162 iv0 = get_iv (data, *op0_p);
1166 if (TREE_CODE (*op1_p) == SSA_NAME)
1167 iv1 = get_iv (data, *op1_p);
1171 if (/* When comparing with non-invariant value, we may not do any senseful
1172 induction variable elimination. */
1174 /* Eliminating condition based on two ivs would be nontrivial.
1175 ??? TODO -- it is not really important to handle this case. */
1176 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1178 find_interesting_uses_op (data, *op0_p);
1179 find_interesting_uses_op (data, *op1_p);
1183 if (zero_p (iv0->step) && zero_p (iv1->step))
1185 /* If both are invariants, this is a work for unswitching. */
1189 civ = xmalloc (sizeof (struct iv));
1190 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1191 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1194 /* Returns true if expression EXPR is obviously invariant in LOOP,
1195 i.e. if all its operands are defined outside of the LOOP. */
1198 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1203 if (is_gimple_min_invariant (expr))
1206 if (TREE_CODE (expr) == SSA_NAME)
1208 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1210 && flow_bb_inside_loop_p (loop, def_bb))
1219 len = first_rtl_op (TREE_CODE (expr));
1220 for (i = 0; i < len; i++)
1221 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1227 /* Cumulates the steps of indices into DATA and replaces their values with the
1228 initial ones. Returns false when the value of the index cannot be determined.
1229 Callback for for_each_index. */
1231 struct ifs_ivopts_data
1233 struct ivopts_data *ivopts_data;
1239 idx_find_step (tree base, tree *idx, void *data)
1241 struct ifs_ivopts_data *dta = data;
1243 tree step, type, iv_type, iv_step, lbound, off;
1244 struct loop *loop = dta->ivopts_data->current_loop;
1246 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1247 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1250 /* If base is a component ref, require that the offset of the reference
1252 if (TREE_CODE (base) == COMPONENT_REF)
1254 off = component_ref_field_offset (base);
1255 return expr_invariant_in_loop_p (loop, off);
1258 /* If base is array, first check whether we will be able to move the
1259 reference out of the loop (in order to take its address in strength
1260 reduction). In order for this to work we need both lower bound
1261 and step to be loop invariants. */
1262 if (TREE_CODE (base) == ARRAY_REF)
1264 step = array_ref_element_size (base);
1265 lbound = array_ref_low_bound (base);
1267 if (!expr_invariant_in_loop_p (loop, step)
1268 || !expr_invariant_in_loop_p (loop, lbound))
1272 if (TREE_CODE (*idx) != SSA_NAME)
1275 iv = get_iv (dta->ivopts_data, *idx);
1284 iv_type = TREE_TYPE (iv->base);
1285 type = build_pointer_type (TREE_TYPE (base));
1286 if (TREE_CODE (base) == ARRAY_REF)
1288 step = array_ref_element_size (base);
1290 /* We only handle addresses whose step is an integer constant. */
1291 if (TREE_CODE (step) != INTEGER_CST)
1295 /* The step for pointer arithmetics already is 1 byte. */
1296 step = build_int_cst (type, 1);
1298 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1299 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1300 type, iv->base, iv->step, dta->stmt);
1302 iv_step = fold_convert (iv_type, iv->step);
1306 /* The index might wrap. */
1310 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1313 *dta->step_p = step;
1315 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1320 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1321 object is passed to it in DATA. */
1324 idx_record_use (tree base, tree *idx,
1327 find_interesting_uses_op (data, *idx);
1328 if (TREE_CODE (base) == ARRAY_REF)
1330 find_interesting_uses_op (data, array_ref_element_size (base));
1331 find_interesting_uses_op (data, array_ref_low_bound (base));
1336 /* Finds addresses in *OP_P inside STMT. */
1339 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1341 tree base = unshare_expr (*op_p), step = NULL;
1343 struct ifs_ivopts_data ifs_ivopts_data;
1345 /* Ignore bitfields for now. Not really something terribly complicated
1347 if (TREE_CODE (base) == COMPONENT_REF
1348 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1351 ifs_ivopts_data.ivopts_data = data;
1352 ifs_ivopts_data.stmt = stmt;
1353 ifs_ivopts_data.step_p = &step;
1354 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1358 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1359 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1361 if (TREE_CODE (base) == INDIRECT_REF)
1362 base = TREE_OPERAND (base, 0);
1364 base = build_addr (base);
1366 civ = alloc_iv (base, step);
1367 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1371 for_each_index (op_p, idx_record_use, data);
1374 /* Finds and records invariants used in STMT. */
1377 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1379 use_optype uses = NULL;
1383 if (TREE_CODE (stmt) == PHI_NODE)
1384 n = PHI_NUM_ARGS (stmt);
1387 get_stmt_operands (stmt);
1388 uses = STMT_USE_OPS (stmt);
1389 n = NUM_USES (uses);
1392 for (i = 0; i < n; i++)
1394 if (TREE_CODE (stmt) == PHI_NODE)
1395 op = PHI_ARG_DEF (stmt, i);
1397 op = USE_OP (uses, i);
1399 record_invariant (data, op, false);
1403 /* Finds interesting uses of induction variables in the statement STMT. */
1406 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1410 use_optype uses = NULL;
1413 find_invariants_stmt (data, stmt);
1415 if (TREE_CODE (stmt) == COND_EXPR)
1417 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1421 if (TREE_CODE (stmt) == MODIFY_EXPR)
1423 lhs = TREE_OPERAND (stmt, 0);
1424 rhs = TREE_OPERAND (stmt, 1);
1426 if (TREE_CODE (lhs) == SSA_NAME)
1428 /* If the statement defines an induction variable, the uses are not
1429 interesting by themselves. */
1431 iv = get_iv (data, lhs);
1433 if (iv && !zero_p (iv->step))
1437 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1439 case tcc_comparison:
1440 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1444 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1445 if (REFERENCE_CLASS_P (lhs))
1446 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1452 if (REFERENCE_CLASS_P (lhs)
1453 && is_gimple_val (rhs))
1455 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1456 find_interesting_uses_op (data, rhs);
1460 /* TODO -- we should also handle address uses of type
1462 memory = call (whatever);
1469 if (TREE_CODE (stmt) == PHI_NODE
1470 && bb_for_stmt (stmt) == data->current_loop->header)
1472 lhs = PHI_RESULT (stmt);
1473 iv = get_iv (data, lhs);
1475 if (iv && !zero_p (iv->step))
1479 if (TREE_CODE (stmt) == PHI_NODE)
1480 n = PHI_NUM_ARGS (stmt);
1483 uses = STMT_USE_OPS (stmt);
1484 n = NUM_USES (uses);
1487 for (i = 0; i < n; i++)
1489 if (TREE_CODE (stmt) == PHI_NODE)
1490 op = PHI_ARG_DEF (stmt, i);
1492 op = USE_OP (uses, i);
1494 if (TREE_CODE (op) != SSA_NAME)
1497 iv = get_iv (data, op);
1501 find_interesting_uses_op (data, op);
1505 /* Finds interesting uses of induction variables outside of loops
1506 on loop exit edge EXIT. */
1509 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1513 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1515 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1516 find_interesting_uses_outer (data, def);
1520 /* Finds uses of the induction variables that are interesting. */
1523 find_interesting_uses (struct ivopts_data *data)
1526 block_stmt_iterator bsi;
1528 basic_block *body = get_loop_body (data->current_loop);
1530 struct version_info *info;
1533 if (dump_file && (dump_flags & TDF_DETAILS))
1534 fprintf (dump_file, "Uses:\n\n");
1536 for (i = 0; i < data->current_loop->num_nodes; i++)
1541 FOR_EACH_EDGE (e, ei, bb->succs)
1542 if (e->dest != EXIT_BLOCK_PTR
1543 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1544 find_interesting_uses_outside (data, e);
1546 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1547 find_interesting_uses_stmt (data, phi);
1548 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1549 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1552 if (dump_file && (dump_flags & TDF_DETAILS))
1556 fprintf (dump_file, "\n");
1558 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1560 info = ver_info (data, i);
1563 fprintf (dump_file, " ");
1564 print_generic_expr (dump_file, info->name, TDF_SLIM);
1565 fprintf (dump_file, " is invariant (%d)%s\n",
1566 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1570 fprintf (dump_file, "\n");
1576 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1577 position to POS. If USE is not NULL, the candidate is set as related to
1578 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1579 replacement of the final value of the iv by a direct computation. */
1581 static struct iv_cand *
1582 add_candidate_1 (struct ivopts_data *data,
1583 tree base, tree step, bool important, enum iv_position pos,
1584 struct iv_use *use, tree incremented_at)
1587 struct iv_cand *cand = NULL;
1592 type = TREE_TYPE (base);
1593 if (!TYPE_UNSIGNED (type))
1595 type = unsigned_type_for (type);
1596 base = fold_convert (type, base);
1598 step = fold_convert (type, step);
1602 for (i = 0; i < n_iv_cands (data); i++)
1604 cand = iv_cand (data, i);
1606 if (cand->pos != pos)
1609 if (cand->incremented_at != incremented_at)
1623 if (!operand_equal_p (base, cand->iv->base, 0))
1626 if (zero_p (cand->iv->step))
1633 if (step && operand_equal_p (step, cand->iv->step, 0))
1638 if (i == n_iv_cands (data))
1640 cand = xcalloc (1, sizeof (struct iv_cand));
1646 cand->iv = alloc_iv (base, step);
1649 if (pos != IP_ORIGINAL && cand->iv)
1651 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1652 cand->var_after = cand->var_before;
1654 cand->important = important;
1655 cand->incremented_at = incremented_at;
1656 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1658 if (dump_file && (dump_flags & TDF_DETAILS))
1659 dump_cand (dump_file, cand);
1662 if (important && !cand->important)
1664 cand->important = true;
1665 if (dump_file && (dump_flags & TDF_DETAILS))
1666 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1671 bitmap_set_bit (use->related_cands, i);
1672 if (dump_file && (dump_flags & TDF_DETAILS))
1673 fprintf (dump_file, "Candidate %d is related to use %d\n",
1680 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1681 position to POS. If USE is not NULL, the candidate is set as related to
1682 it. The candidate computation is scheduled on all available positions. */
1685 add_candidate (struct ivopts_data *data,
1686 tree base, tree step, bool important, struct iv_use *use)
1688 if (ip_normal_pos (data->current_loop))
1689 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1690 if (ip_end_pos (data->current_loop))
1691 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1694 /* Adds standard iv candidates. */
1697 add_standard_iv_candidates (struct ivopts_data *data)
1699 /* Add 0 + 1 * iteration candidate. */
1700 add_candidate (data,
1701 build_int_cst (unsigned_intSI_type_node, 0),
1702 build_int_cst (unsigned_intSI_type_node, 1),
1705 /* The same for a long type if it is still fast enough. */
1706 if (BITS_PER_WORD > 32)
1707 add_candidate (data,
1708 build_int_cst (unsigned_intDI_type_node, 0),
1709 build_int_cst (unsigned_intDI_type_node, 1),
1714 /* Adds candidates bases on the old induction variable IV. */
1717 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1720 struct iv_cand *cand;
1722 add_candidate (data, iv->base, iv->step, true, NULL);
1724 /* The same, but with initial value zero. */
1725 add_candidate (data,
1726 build_int_cst (TREE_TYPE (iv->base), 0),
1727 iv->step, true, NULL);
1729 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1730 if (TREE_CODE (phi) == PHI_NODE)
1732 /* Additionally record the possibility of leaving the original iv
1734 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1735 cand = add_candidate_1 (data,
1736 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1737 SSA_NAME_DEF_STMT (def));
1738 cand->var_before = iv->ssa_name;
1739 cand->var_after = def;
1743 /* Adds candidates based on the old induction variables. */
1746 add_old_ivs_candidates (struct ivopts_data *data)
1752 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1754 iv = ver_info (data, i)->iv;
1755 if (iv && iv->biv_p && !zero_p (iv->step))
1756 add_old_iv_candidates (data, iv);
1760 /* Adds candidates based on the value of the induction variable IV and USE. */
1763 add_iv_value_candidates (struct ivopts_data *data,
1764 struct iv *iv, struct iv_use *use)
1766 add_candidate (data, iv->base, iv->step, false, use);
1768 /* The same, but with initial value zero. */
1769 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1770 iv->step, false, use);
1773 /* Adds candidates based on the address IV and USE. */
1776 add_address_candidates (struct ivopts_data *data,
1777 struct iv *iv, struct iv_use *use)
1779 tree base, abase, tmp, *act;
1781 /* First, the trivial choices. */
1782 add_iv_value_candidates (data, iv, use);
1784 /* Second, try removing the COMPONENT_REFs. */
1785 if (TREE_CODE (iv->base) == ADDR_EXPR)
1787 base = TREE_OPERAND (iv->base, 0);
1788 while (TREE_CODE (base) == COMPONENT_REF
1789 || (TREE_CODE (base) == ARRAY_REF
1790 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1791 base = TREE_OPERAND (base, 0);
1793 if (base != TREE_OPERAND (iv->base, 0))
1795 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1796 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1798 if (TREE_CODE (base) == INDIRECT_REF)
1799 base = TREE_OPERAND (base, 0);
1801 base = build_addr (base);
1802 add_candidate (data, base, iv->step, false, use);
1806 /* Third, try removing the constant offset. */
1808 while (TREE_CODE (abase) == PLUS_EXPR
1809 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1810 abase = TREE_OPERAND (abase, 0);
1811 /* We found the offset, so make the copy of the non-shared part and
1813 if (TREE_CODE (abase) == PLUS_EXPR)
1818 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1820 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1821 NULL_TREE, TREE_OPERAND (tmp, 1));
1822 act = &TREE_OPERAND (*act, 0);
1824 *act = TREE_OPERAND (tmp, 0);
1826 add_candidate (data, base, iv->step, false, use);
1830 /* Possibly adds pseudocandidate for replacing the final value of USE by
1831 a direct computation. */
1834 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1836 struct tree_niter_desc *niter;
1837 struct loop *loop = data->current_loop;
1839 /* We must know where we exit the loop and how many times does it roll. */
1840 if (!single_dom_exit (loop))
1843 niter = &loop_data (loop)->niter;
1845 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1846 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1849 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1852 /* Adds candidates based on the uses. */
1855 add_derived_ivs_candidates (struct ivopts_data *data)
1859 for (i = 0; i < n_iv_uses (data); i++)
1861 struct iv_use *use = iv_use (data, i);
1868 case USE_NONLINEAR_EXPR:
1870 /* Just add the ivs based on the value of the iv used here. */
1871 add_iv_value_candidates (data, use->iv, use);
1875 add_iv_value_candidates (data, use->iv, use);
1877 /* Additionally, add the pseudocandidate for the possibility to
1878 replace the final value by a direct computation. */
1879 add_iv_outer_candidates (data, use);
1883 add_address_candidates (data, use->iv, use);
1892 /* Finds the candidates for the induction variables. */
1895 find_iv_candidates (struct ivopts_data *data)
1897 /* Add commonly used ivs. */
1898 add_standard_iv_candidates (data);
1900 /* Add old induction variables. */
1901 add_old_ivs_candidates (data);
1903 /* Add induction variables derived from uses. */
1904 add_derived_ivs_candidates (data);
1907 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1908 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1909 we allocate a simple list to every use. */
1912 alloc_use_cost_map (struct ivopts_data *data)
1914 unsigned i, n_imp = 0, size, j;
1916 if (!data->consider_all_candidates)
1918 for (i = 0; i < n_iv_cands (data); i++)
1920 struct iv_cand *cand = iv_cand (data, i);
1921 if (cand->important)
1926 for (i = 0; i < n_iv_uses (data); i++)
1928 struct iv_use *use = iv_use (data, i);
1931 if (data->consider_all_candidates)
1933 size = n_iv_cands (data);
1934 use->n_map_members = size;
1939 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
1943 use->n_map_members = 0;
1946 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1950 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1951 on invariants DEPENDS_ON. */
1954 set_use_iv_cost (struct ivopts_data *data,
1955 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1961 BITMAP_XFREE (depends_on);
1965 if (data->consider_all_candidates)
1967 use->cost_map[cand->id].cand = cand;
1968 use->cost_map[cand->id].cost = cost;
1969 use->cost_map[cand->id].depends_on = depends_on;
1976 use->cost_map[use->n_map_members].cand = cand;
1977 use->cost_map[use->n_map_members].cost = cost;
1978 use->cost_map[use->n_map_members].depends_on = depends_on;
1979 use->n_map_members++;
1982 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1986 get_use_iv_cost (struct ivopts_data *data,
1987 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1994 if (data->consider_all_candidates)
1998 for (i = 0; i < use->n_map_members; i++)
1999 if (use->cost_map[i].cand == cand)
2002 if (i == use->n_map_members)
2007 *depends_on = use->cost_map[i].depends_on;
2008 return use->cost_map[i].cost;
2011 /* Returns estimate on cost of computing SEQ. */
2019 for (; seq; seq = NEXT_INSN (seq))
2021 set = single_set (seq);
2023 cost += rtx_cost (set, SET);
2031 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2033 produce_memory_decl_rtl (tree obj, int *regno)
2038 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2040 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2041 x = gen_rtx_SYMBOL_REF (Pmode, name);
2044 x = gen_raw_REG (Pmode, (*regno)++);
2046 return gen_rtx_MEM (DECL_MODE (obj), x);
2049 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2050 walk_tree. DATA contains the actual fake register number. */
2053 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2055 tree obj = NULL_TREE;
2059 switch (TREE_CODE (*expr_p))
2062 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2063 (handled_component_p (*expr_p)
2064 || TREE_CODE (*expr_p) == REALPART_EXPR
2065 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
2066 expr_p = &TREE_OPERAND (*expr_p, 0));
2069 x = produce_memory_decl_rtl (obj, regno);
2074 obj = SSA_NAME_VAR (*expr_p);
2075 if (!DECL_RTL_SET_P (obj))
2076 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2085 if (DECL_RTL_SET_P (obj))
2088 if (DECL_MODE (obj) == BLKmode)
2089 x = produce_memory_decl_rtl (obj, regno);
2091 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2101 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2102 SET_DECL_RTL (obj, x);
2108 /* Determines cost of the computation of EXPR. */
2111 computation_cost (tree expr)
2114 tree type = TREE_TYPE (expr);
2118 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2120 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2124 cost = seq_cost (seq);
2125 if (GET_CODE (rslt) == MEM)
2126 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2131 /* Returns variable containing the value of candidate CAND at statement AT. */
2134 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2136 if (stmt_after_increment (loop, cand, stmt))
2137 return cand->var_after;
2139 return cand->var_before;
2142 /* Determines the expression by that USE is expressed from induction variable
2143 CAND at statement AT in LOOP. */
2146 get_computation_at (struct loop *loop,
2147 struct iv_use *use, struct iv_cand *cand, tree at)
2149 tree ubase = use->iv->base;
2150 tree ustep = use->iv->step;
2151 tree cbase = cand->iv->base;
2152 tree cstep = cand->iv->step;
2153 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2157 unsigned HOST_WIDE_INT ustepi, cstepi;
2158 HOST_WIDE_INT ratioi;
2160 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2162 /* We do not have a precision to express the values of use. */
2166 expr = var_at_stmt (loop, cand, at);
2168 if (TREE_TYPE (expr) != ctype)
2170 /* This may happen with the original ivs. */
2171 expr = fold_convert (ctype, expr);
2174 if (TYPE_UNSIGNED (utype))
2178 uutype = unsigned_type_for (utype);
2179 ubase = fold_convert (uutype, ubase);
2180 ustep = fold_convert (uutype, ustep);
2183 if (uutype != ctype)
2185 expr = fold_convert (uutype, expr);
2186 cbase = fold_convert (uutype, cbase);
2187 cstep = fold_convert (uutype, cstep);
2190 if (!cst_and_fits_in_hwi (cstep)
2191 || !cst_and_fits_in_hwi (ustep))
2194 ustepi = int_cst_value (ustep);
2195 cstepi = int_cst_value (cstep);
2197 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2199 /* TODO maybe consider case when ustep divides cstep and the ratio is
2200 a power of 2 (so that the division is fast to execute)? We would
2201 need to be much more careful with overflows etc. then. */
2205 /* We may need to shift the value if we are after the increment. */
2206 if (stmt_after_increment (loop, cand, at))
2207 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2209 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2210 or |ratio| == 1, it is better to handle this like
2212 ubase - ratio * cbase + ratio * var. */
2216 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2217 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2219 else if (ratioi == -1)
2221 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2222 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2224 else if (TREE_CODE (cbase) == INTEGER_CST)
2226 ratio = build_int_cst_type (uutype, ratioi);
2227 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2228 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2229 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2230 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2234 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2235 ratio = build_int_cst_type (uutype, ratioi);
2236 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2237 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2240 return fold_convert (utype, expr);
2243 /* Determines the expression by that USE is expressed from induction variable
2247 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2249 return get_computation_at (loop, use, cand, use->stmt);
2252 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2255 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2258 enum tree_code code;
2262 if (cst_and_fits_in_hwi (*expr))
2264 *offset += int_cst_value (*expr);
2265 *expr = integer_zero_node;
2269 code = TREE_CODE (*expr);
2271 if (code != PLUS_EXPR && code != MINUS_EXPR)
2274 op0 = TREE_OPERAND (*expr, 0);
2275 op1 = TREE_OPERAND (*expr, 1);
2277 if (cst_and_fits_in_hwi (op1))
2279 if (code == PLUS_EXPR)
2280 *offset += int_cst_value (op1);
2282 *offset -= int_cst_value (op1);
2288 if (code != PLUS_EXPR)
2291 if (!cst_and_fits_in_hwi (op0))
2294 *offset += int_cst_value (op0);
2299 /* Returns cost of addition in MODE. */
2302 add_cost (enum machine_mode mode)
2304 static unsigned costs[NUM_MACHINE_MODES];
2312 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2313 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2314 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2319 cost = seq_cost (seq);
2325 if (dump_file && (dump_flags & TDF_DETAILS))
2326 fprintf (dump_file, "Addition in %s costs %d\n",
2327 GET_MODE_NAME (mode), cost);
2331 /* Entry in a hashtable of already known costs for multiplication. */
2334 HOST_WIDE_INT cst; /* The constant to multiply by. */
2335 enum machine_mode mode; /* In mode. */
2336 unsigned cost; /* The cost. */
2339 /* Counts hash value for the ENTRY. */
2342 mbc_entry_hash (const void *entry)
2344 const struct mbc_entry *e = entry;
2346 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2349 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2352 mbc_entry_eq (const void *entry1, const void *entry2)
2354 const struct mbc_entry *e1 = entry1;
2355 const struct mbc_entry *e2 = entry2;
2357 return (e1->mode == e2->mode
2358 && e1->cst == e2->cst);
2361 /* Returns cost of multiplication by constant CST in MODE. */
2364 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2366 static htab_t costs;
2367 struct mbc_entry **cached, act;
2372 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2376 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2378 return (*cached)->cost;
2380 *cached = xmalloc (sizeof (struct mbc_entry));
2381 (*cached)->mode = mode;
2382 (*cached)->cst = cst;
2385 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2390 cost = seq_cost (seq);
2392 if (dump_file && (dump_flags & TDF_DETAILS))
2393 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2394 (int) cst, GET_MODE_NAME (mode), cost);
2396 (*cached)->cost = cost;
2401 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2402 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2403 variable is omitted. The created memory accesses MODE.
2405 TODO -- there must be some better way. This all is quite crude. */
2408 get_address_cost (bool symbol_present, bool var_present,
2409 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2411 #define MAX_RATIO 128
2412 static sbitmap valid_mult;
2413 static HOST_WIDE_INT rat, off;
2414 static HOST_WIDE_INT min_offset, max_offset;
2415 static unsigned costs[2][2][2][2];
2416 unsigned cost, acost;
2417 rtx seq, addr, base;
2418 bool offset_p, ratio_p;
2420 HOST_WIDE_INT s_offset;
2421 unsigned HOST_WIDE_INT mask;
2428 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2430 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2431 for (i = 1; i <= 1 << 20; i <<= 1)
2433 XEXP (addr, 1) = GEN_INT (i);
2434 if (!memory_address_p (Pmode, addr))
2437 max_offset = i >> 1;
2440 for (i = 1; i <= 1 << 20; i <<= 1)
2442 XEXP (addr, 1) = GEN_INT (-i);
2443 if (!memory_address_p (Pmode, addr))
2446 min_offset = -(i >> 1);
2448 if (dump_file && (dump_flags & TDF_DETAILS))
2450 fprintf (dump_file, "get_address_cost:\n");
2451 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2452 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2455 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2456 sbitmap_zero (valid_mult);
2458 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2459 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2461 XEXP (addr, 1) = GEN_INT (i);
2462 if (memory_address_p (Pmode, addr))
2464 SET_BIT (valid_mult, i + MAX_RATIO);
2469 if (dump_file && (dump_flags & TDF_DETAILS))
2471 fprintf (dump_file, " allowed multipliers:");
2472 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2473 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2474 fprintf (dump_file, " %d", (int) i);
2475 fprintf (dump_file, "\n");
2476 fprintf (dump_file, "\n");
2480 bits = GET_MODE_BITSIZE (Pmode);
2481 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2483 if ((offset >> (bits - 1) & 1))
2488 offset_p = (s_offset != 0
2489 && min_offset <= s_offset && s_offset <= max_offset);
2490 ratio_p = (ratio != 1
2491 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2492 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2494 if (ratio != 1 && !ratio_p)
2495 cost += multiply_by_cost (ratio, Pmode);
2497 if (s_offset && !offset_p && !symbol_present)
2499 cost += add_cost (Pmode);
2503 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2508 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2509 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2511 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2514 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, addr);
2518 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2520 base = gen_rtx_fmt_e (CONST, Pmode,
2521 gen_rtx_fmt_ee (PLUS, Pmode,
2526 base = GEN_INT (off);
2531 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2534 addr = memory_address (Pmode, addr);
2538 acost = seq_cost (seq);
2539 acost += address_cost (addr, Pmode);
2543 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2546 return cost + acost;
2549 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2550 the bitmap to that we should store it. */
2552 static struct ivopts_data *fd_ivopts_data;
2554 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2556 bitmap *depends_on = data;
2557 struct version_info *info;
2559 if (TREE_CODE (*expr_p) != SSA_NAME)
2561 info = name_info (fd_ivopts_data, *expr_p);
2563 if (!info->inv_id || info->has_nonlin_use)
2567 *depends_on = BITMAP_XMALLOC ();
2568 bitmap_set_bit (*depends_on, info->inv_id);
2573 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2574 invariants the computation depends on. */
2577 force_var_cost (struct ivopts_data *data,
2578 tree expr, bitmap *depends_on)
2580 static bool costs_initialized = false;
2581 static unsigned integer_cost;
2582 static unsigned symbol_cost;
2583 static unsigned address_cost;
2585 if (!costs_initialized)
2587 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2588 rtx x = gen_rtx_MEM (DECL_MODE (var),
2589 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2591 tree type = build_pointer_type (integer_type_node);
2593 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2596 SET_DECL_RTL (var, x);
2597 TREE_STATIC (var) = 1;
2598 addr = build1 (ADDR_EXPR, type, var);
2599 symbol_cost = computation_cost (addr) + 1;
2602 = computation_cost (build2 (PLUS_EXPR, type,
2604 build_int_cst_type (type, 2000))) + 1;
2605 if (dump_file && (dump_flags & TDF_DETAILS))
2607 fprintf (dump_file, "force_var_cost:\n");
2608 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2609 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2610 fprintf (dump_file, " address %d\n", (int) address_cost);
2611 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2612 fprintf (dump_file, "\n");
2615 costs_initialized = true;
2620 fd_ivopts_data = data;
2621 walk_tree (&expr, find_depends, depends_on, NULL);
2624 if (SSA_VAR_P (expr))
2627 if (TREE_INVARIANT (expr))
2629 if (TREE_CODE (expr) == INTEGER_CST)
2630 return integer_cost;
2632 if (TREE_CODE (expr) == ADDR_EXPR)
2634 tree obj = TREE_OPERAND (expr, 0);
2636 if (TREE_CODE (obj) == VAR_DECL
2637 || TREE_CODE (obj) == PARM_DECL
2638 || TREE_CODE (obj) == RESULT_DECL)
2642 return address_cost;
2645 /* Just an arbitrary value, FIXME. */
2646 return target_spill_cost;
2649 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2650 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2651 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2652 invariants the computation depends on. */
2655 split_address_cost (struct ivopts_data *data,
2656 tree addr, bool *symbol_present, bool *var_present,
2657 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2660 HOST_WIDE_INT bitsize;
2661 HOST_WIDE_INT bitpos;
2663 enum machine_mode mode;
2664 int unsignedp, volatilep;
2666 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2667 &unsignedp, &volatilep);
2670 || bitpos % BITS_PER_UNIT != 0
2671 || TREE_CODE (core) != VAR_DECL)
2673 *symbol_present = false;
2674 *var_present = true;
2675 fd_ivopts_data = data;
2676 walk_tree (&addr, find_depends, depends_on, NULL);
2677 return target_spill_cost;
2680 *offset += bitpos / BITS_PER_UNIT;
2681 if (TREE_STATIC (core)
2682 || DECL_EXTERNAL (core))
2684 *symbol_present = true;
2685 *var_present = false;
2689 *symbol_present = false;
2690 *var_present = true;
2694 /* Estimates cost of expressing difference of addresses E1 - E2 as
2695 var + symbol + offset. The value of offset is added to OFFSET,
2696 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2697 part is missing. DEPENDS_ON is a set of the invariants the computation
2701 ptr_difference_cost (struct ivopts_data *data,
2702 tree e1, tree e2, bool *symbol_present, bool *var_present,
2703 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2705 HOST_WIDE_INT diff = 0;
2708 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2710 if (TREE_CODE (e2) == ADDR_EXPR
2711 && ptr_difference_const (TREE_OPERAND (e1, 0),
2712 TREE_OPERAND (e2, 0), &diff))
2715 *symbol_present = false;
2716 *var_present = false;
2720 if (e2 == integer_zero_node)
2721 return split_address_cost (data, TREE_OPERAND (e1, 0),
2722 symbol_present, var_present, offset, depends_on);
2724 *symbol_present = false;
2725 *var_present = true;
2727 cost = force_var_cost (data, e1, depends_on);
2728 cost += force_var_cost (data, e2, depends_on);
2729 cost += add_cost (Pmode);
2734 /* Estimates cost of expressing difference E1 - E2 as
2735 var + symbol + offset. The value of offset is added to OFFSET,
2736 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2737 part is missing. DEPENDS_ON is a set of the invariants the computation
2741 difference_cost (struct ivopts_data *data,
2742 tree e1, tree e2, bool *symbol_present, bool *var_present,
2743 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2746 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2748 strip_offset (&e1, offset);
2750 strip_offset (&e2, offset);
2753 if (TREE_CODE (e1) == ADDR_EXPR)
2754 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2756 *symbol_present = false;
2758 if (operand_equal_p (e1, e2, 0))
2760 *var_present = false;
2763 *var_present = true;
2765 return force_var_cost (data, e1, depends_on);
2769 cost = force_var_cost (data, e2, depends_on);
2770 cost += multiply_by_cost (-1, mode);
2775 cost = force_var_cost (data, e1, depends_on);
2776 cost += force_var_cost (data, e2, depends_on);
2777 cost += add_cost (mode);
2782 /* Determines the cost of the computation by that USE is expressed
2783 from induction variable CAND. If ADDRESS_P is true, we just need
2784 to create an address from it, otherwise we want to get it into
2785 register. A set of invariants we depend on is stored in
2786 DEPENDS_ON. AT is the statement at that the value is computed. */
2789 get_computation_cost_at (struct ivopts_data *data,
2790 struct iv_use *use, struct iv_cand *cand,
2791 bool address_p, bitmap *depends_on, tree at)
2793 tree ubase = use->iv->base, ustep = use->iv->step;
2795 tree utype = TREE_TYPE (ubase), ctype;
2796 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2797 HOST_WIDE_INT ratio, aratio;
2798 bool var_present, symbol_present;
2799 unsigned cost = 0, n_sums;
2803 /* Only consider real candidates. */
2807 cbase = cand->iv->base;
2808 cstep = cand->iv->step;
2809 ctype = TREE_TYPE (cbase);
2811 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2813 /* We do not have a precision to express the values of use. */
2819 /* Do not try to express address of an object with computation based
2820 on address of a different object. This may cause problems in rtl
2821 level alias analysis (that does not expect this to be happening,
2822 as this is illegal in C), and would be unlikely to be useful
2824 if (use->iv->base_object
2825 && cand->iv->base_object
2826 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
2830 if (!cst_and_fits_in_hwi (ustep)
2831 || !cst_and_fits_in_hwi (cstep))
2834 if (TREE_CODE (ubase) == INTEGER_CST
2835 && !cst_and_fits_in_hwi (ubase))
2838 if (TREE_CODE (cbase) == INTEGER_CST
2839 && !cst_and_fits_in_hwi (cbase))
2842 ustepi = int_cst_value (ustep);
2843 cstepi = int_cst_value (cstep);
2845 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2847 /* TODO -- add direct handling of this case. */
2851 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2854 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2855 or ratio == 1, it is better to handle this like
2857 ubase - ratio * cbase + ratio * var
2859 (also holds in the case ratio == -1, TODO. */
2861 if (TREE_CODE (cbase) == INTEGER_CST)
2863 offset = - ratio * int_cst_value (cbase);
2864 cost += difference_cost (data,
2865 ubase, integer_zero_node,
2866 &symbol_present, &var_present, &offset,
2869 else if (ratio == 1)
2871 cost += difference_cost (data,
2873 &symbol_present, &var_present, &offset,
2878 cost += force_var_cost (data, cbase, depends_on);
2879 cost += add_cost (TYPE_MODE (ctype));
2880 cost += difference_cost (data,
2881 ubase, integer_zero_node,
2882 &symbol_present, &var_present, &offset,
2886 /* If we are after the increment, the value of the candidate is higher by
2888 if (stmt_after_increment (data->current_loop, cand, at))
2889 offset -= ratio * cstepi;
2891 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2892 (symbol/var/const parts may be omitted). If we are looking for an address,
2893 find the cost of addressing this. */
2895 return get_address_cost (symbol_present, var_present, offset, ratio);
2897 /* Otherwise estimate the costs for computing the expression. */
2898 aratio = ratio > 0 ? ratio : -ratio;
2899 if (!symbol_present && !var_present && !offset)
2902 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2908 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2912 /* Symbol + offset should be compile-time computable. */
2913 && (symbol_present || offset))
2916 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2920 /* Just get the expression, expand it and measure the cost. */
2921 tree comp = get_computation_at (data->current_loop, use, cand, at);
2927 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2929 return computation_cost (comp);
2933 /* Determines the cost of the computation by that USE is expressed
2934 from induction variable CAND. If ADDRESS_P is true, we just need
2935 to create an address from it, otherwise we want to get it into
2936 register. A set of invariants we depend on is stored in
2940 get_computation_cost (struct ivopts_data *data,
2941 struct iv_use *use, struct iv_cand *cand,
2942 bool address_p, bitmap *depends_on)
2944 return get_computation_cost_at (data,
2945 use, cand, address_p, depends_on, use->stmt);
2948 /* Determines cost of basing replacement of USE on CAND in a generic
2952 determine_use_iv_cost_generic (struct ivopts_data *data,
2953 struct iv_use *use, struct iv_cand *cand)
2956 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2958 set_use_iv_cost (data, use, cand, cost, depends_on);
2961 /* Determines cost of basing replacement of USE on CAND in an address. */
2964 determine_use_iv_cost_address (struct ivopts_data *data,
2965 struct iv_use *use, struct iv_cand *cand)
2968 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2970 set_use_iv_cost (data, use, cand, cost, depends_on);
2973 /* Computes value of induction variable IV in iteration NITER. */
2976 iv_value (struct iv *iv, tree niter)
2979 tree type = TREE_TYPE (iv->base);
2981 niter = fold_convert (type, niter);
2982 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
2984 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2987 /* Computes value of candidate CAND at position AT in iteration NITER. */
2990 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2992 tree val = iv_value (cand->iv, niter);
2993 tree type = TREE_TYPE (cand->iv->base);
2995 if (stmt_after_increment (loop, cand, at))
2996 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3001 /* Check whether it is possible to express the condition in USE by comparison
3002 of candidate CAND. If so, store the comparison code to COMPARE and the
3003 value compared with to BOUND. */
3006 may_eliminate_iv (struct loop *loop,
3007 struct iv_use *use, struct iv_cand *cand,
3008 enum tree_code *compare, tree *bound)
3012 struct tree_niter_desc niter, new_niter;
3013 tree wider_type, type, base;
3015 /* For now works only for exits that dominate the loop latch. TODO -- extend
3016 for other conditions inside loop body. */
3017 ex_bb = bb_for_stmt (use->stmt);
3018 if (use->stmt != last_stmt (ex_bb)
3019 || TREE_CODE (use->stmt) != COND_EXPR)
3021 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3024 exit = EDGE_SUCC (ex_bb, 0);
3025 if (flow_bb_inside_loop_p (loop, exit->dest))
3026 exit = EDGE_SUCC (ex_bb, 1);
3027 if (flow_bb_inside_loop_p (loop, exit->dest))
3030 niter.niter = NULL_TREE;
3031 number_of_iterations_exit (loop, exit, &niter);
3033 || !integer_nonzerop (niter.assumptions)
3034 || !integer_zerop (niter.may_be_zero))
3037 if (exit->flags & EDGE_TRUE_VALUE)
3042 *bound = cand_value_at (loop, cand, use->stmt, niter.niter);
3044 /* Let us check there is not some problem with overflows, by checking that
3045 the number of iterations is unchanged. */
3046 base = cand->iv->base;
3047 type = TREE_TYPE (base);
3048 if (stmt_after_increment (loop, cand, use->stmt))
3049 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3051 new_niter.niter = NULL_TREE;
3052 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3053 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3055 if (!new_niter.niter
3056 || !integer_nonzerop (new_niter.assumptions)
3057 || !integer_zerop (new_niter.may_be_zero))
3060 wider_type = TREE_TYPE (new_niter.niter);
3061 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter.niter)))
3062 wider_type = TREE_TYPE (niter.niter);
3063 if (!operand_equal_p (fold_convert (wider_type, niter.niter),
3064 fold_convert (wider_type, new_niter.niter), 0))
3070 /* Determines cost of basing replacement of USE on CAND in a condition. */
3073 determine_use_iv_cost_condition (struct ivopts_data *data,
3074 struct iv_use *use, struct iv_cand *cand)
3077 enum tree_code compare;
3079 /* Only consider real candidates. */
3082 set_use_iv_cost (data, use, cand, INFTY, NULL);
3086 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3088 bitmap depends_on = NULL;
3089 unsigned cost = force_var_cost (data, bound, &depends_on);
3091 set_use_iv_cost (data, use, cand, cost, depends_on);
3095 /* The induction variable elimination failed; just express the original
3096 giv. If it is compared with an invariant, note that we cannot get
3098 if (TREE_CODE (*use->op_p) == SSA_NAME)
3099 record_invariant (data, *use->op_p, true);
3102 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3103 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3106 determine_use_iv_cost_generic (data, use, cand);
3109 /* Checks whether it is possible to replace the final value of USE by
3110 a direct computation. If so, the formula is stored to *VALUE. */
3113 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3116 struct tree_niter_desc *niter;
3118 exit = single_dom_exit (loop);
3122 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3123 bb_for_stmt (use->stmt)));
3125 niter = &loop_data (loop)->niter;
3127 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3128 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3131 *value = iv_value (use->iv, niter->niter);
3136 /* Determines cost of replacing final value of USE using CAND. */
3139 determine_use_iv_cost_outer (struct ivopts_data *data,
3140 struct iv_use *use, struct iv_cand *cand)
3146 struct loop *loop = data->current_loop;
3150 if (!may_replace_final_value (loop, use, &value))
3152 set_use_iv_cost (data, use, cand, INFTY, NULL);
3157 cost = force_var_cost (data, value, &depends_on);
3159 cost /= AVG_LOOP_NITER (loop);
3161 set_use_iv_cost (data, use, cand, cost, depends_on);
3165 exit = single_dom_exit (loop);
3168 /* If there is just a single exit, we may use value of the candidate
3169 after we take it to determine the value of use. */
3170 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3171 last_stmt (exit->src));
3173 cost /= AVG_LOOP_NITER (loop);
3177 /* Otherwise we just need to compute the iv. */
3178 cost = get_computation_cost (data, use, cand, false, &depends_on);
3181 set_use_iv_cost (data, use, cand, cost, depends_on);
3184 /* Determines cost of basing replacement of USE on CAND. */
3187 determine_use_iv_cost (struct ivopts_data *data,
3188 struct iv_use *use, struct iv_cand *cand)
3192 case USE_NONLINEAR_EXPR:
3193 determine_use_iv_cost_generic (data, use, cand);
3197 determine_use_iv_cost_outer (data, use, cand);
3201 determine_use_iv_cost_address (data, use, cand);
3205 determine_use_iv_cost_condition (data, use, cand);
3213 /* Determines costs of basing the use of the iv on an iv candidate. */
3216 determine_use_iv_costs (struct ivopts_data *data)
3220 struct iv_cand *cand;
3222 data->consider_all_candidates = (n_iv_cands (data)
3223 <= CONSIDER_ALL_CANDIDATES_BOUND);
3225 alloc_use_cost_map (data);
3227 if (!data->consider_all_candidates)
3229 /* Add the important candidate entries. */
3230 for (j = 0; j < n_iv_cands (data); j++)
3232 cand = iv_cand (data, j);
3233 if (!cand->important)
3235 for (i = 0; i < n_iv_uses (data); i++)
3237 use = iv_use (data, i);
3238 determine_use_iv_cost (data, use, cand);
3243 for (i = 0; i < n_iv_uses (data); i++)
3245 use = iv_use (data, i);
3247 if (data->consider_all_candidates)
3249 for (j = 0; j < n_iv_cands (data); j++)
3251 cand = iv_cand (data, j);
3252 determine_use_iv_cost (data, use, cand);
3259 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3261 cand = iv_cand (data, j);
3262 if (!cand->important)
3263 determine_use_iv_cost (data, use, cand);
3268 if (dump_file && (dump_flags & TDF_DETAILS))
3270 fprintf (dump_file, "Use-candidate costs:\n");
3272 for (i = 0; i < n_iv_uses (data); i++)
3274 use = iv_use (data, i);
3276 fprintf (dump_file, "Use %d:\n", i);
3277 fprintf (dump_file, " cand\tcost\tdepends on\n");
3278 for (j = 0; j < use->n_map_members; j++)
3280 if (!use->cost_map[j].cand
3281 || use->cost_map[j].cost == INFTY)
3284 fprintf (dump_file, " %d\t%d\t",
3285 use->cost_map[j].cand->id,
3286 use->cost_map[j].cost);
3287 if (use->cost_map[j].depends_on)
3288 bitmap_print (dump_file,
3289 use->cost_map[j].depends_on, "","");
3290 fprintf (dump_file, "\n");
3293 fprintf (dump_file, "\n");
3295 fprintf (dump_file, "\n");
3299 /* Determines cost of the candidate CAND. */
3302 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3304 unsigned cost_base, cost_step;
3314 /* There are two costs associated with the candidate -- its increment
3315 and its initialization. The second is almost negligible for any loop
3316 that rolls enough, so we take it just very little into account. */
3318 base = cand->iv->base;
3319 cost_base = force_var_cost (data, base, NULL);
3320 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3322 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3324 /* Prefer the original iv unless we may gain something by replacing it. */
3325 if (cand->pos == IP_ORIGINAL)
3328 /* Prefer not to insert statements into latch unless there are some
3329 already (so that we do not create unnecessary jumps). */
3330 if (cand->pos == IP_END)
3332 bb = ip_end_pos (data->current_loop);
3333 last = last_stmt (bb);
3336 || TREE_CODE (last) == LABEL_EXPR)
3341 /* Determines costs of computation of the candidates. */
3344 determine_iv_costs (struct ivopts_data *data)
3348 if (dump_file && (dump_flags & TDF_DETAILS))
3350 fprintf (dump_file, "Candidate costs:\n");
3351 fprintf (dump_file, " cand\tcost\n");
3354 for (i = 0; i < n_iv_cands (data); i++)
3356 struct iv_cand *cand = iv_cand (data, i);
3358 determine_iv_cost (data, cand);
3360 if (dump_file && (dump_flags & TDF_DETAILS))
3361 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3364 if (dump_file && (dump_flags & TDF_DETAILS))
3365 fprintf (dump_file, "\n");
3368 /* Calculates cost for having SIZE induction variables. */
3371 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3373 return global_cost_for_size (size,
3374 loop_data (data->current_loop)->regs_used,
3378 /* For each size of the induction variable set determine the penalty. */
3381 determine_set_costs (struct ivopts_data *data)
3385 struct loop *loop = data->current_loop;
3388 /* We use the following model (definitely improvable, especially the
3389 cost function -- TODO):
3391 We estimate the number of registers available (using MD data), name it A.
3393 We estimate the number of registers used by the loop, name it U. This
3394 number is obtained as the number of loop phi nodes (not counting virtual
3395 registers and bivs) + the number of variables from outside of the loop.
3397 We set a reserve R (free regs that are used for temporary computations,
3398 etc.). For now the reserve is a constant 3.
3400 Let I be the number of induction variables.
3402 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3403 make a lot of ivs without a reason).
3404 -- if A - R < U + I <= A, the cost is I * PRES_COST
3405 -- if U + I > A, the cost is I * PRES_COST and
3406 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3408 if (dump_file && (dump_flags & TDF_DETAILS))
3410 fprintf (dump_file, "Global costs:\n");
3411 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3412 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3413 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3414 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3418 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3420 op = PHI_RESULT (phi);
3422 if (!is_gimple_reg (op))
3425 if (get_iv (data, op))
3431 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3433 struct version_info *info = ver_info (data, j);
3435 if (info->inv_id && info->has_nonlin_use)
3439 loop_data (loop)->regs_used = n;
3440 if (dump_file && (dump_flags & TDF_DETAILS))
3441 fprintf (dump_file, " regs_used %d\n", n);
3443 if (dump_file && (dump_flags & TDF_DETAILS))
3445 fprintf (dump_file, " cost for size:\n");
3446 fprintf (dump_file, " ivs\tcost\n");
3447 for (j = 0; j <= 2 * target_avail_regs; j++)
3448 fprintf (dump_file, " %d\t%d\n", j,
3449 ivopts_global_cost_for_size (data, j));
3450 fprintf (dump_file, "\n");
3454 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3455 taken from the set SOL and they may depend on invariants in the set INV.
3456 The really used candidate and invariants are noted to USED_IVS and
3460 find_best_candidate (struct ivopts_data *data,
3461 struct iv_use *use, bitmap sol, bitmap inv,
3462 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3465 unsigned best_cost = INFTY, cost;
3466 struct iv_cand *cnd = NULL, *acnd;
3467 bitmap depends_on = NULL, asol;
3468 bitmap_iterator bi, bi1;
3470 if (data->consider_all_candidates)
3474 asol = BITMAP_XMALLOC ();
3476 bitmap_ior (asol, data->important_candidates, use->related_cands);
3477 bitmap_and_into (asol, sol);
3480 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c, bi)
3482 acnd = iv_cand (data, c);
3483 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3487 if (cost > best_cost)
3489 if (cost == best_cost)
3491 /* Prefer the cheaper iv. */
3492 if (acnd->cost >= cnd->cost)
3498 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d, bi1)
3503 bitmap_ior_into (used_inv, depends_on);
3512 if (cnd && used_ivs)
3513 bitmap_set_bit (used_ivs, cnd->id);
3518 if (!data->consider_all_candidates)
3519 BITMAP_XFREE (asol);
3524 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3525 induction variable candidates and invariants from the sets. Only
3526 uses 0 .. MAX_USE - 1 are taken into account. */
3529 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3533 unsigned cost = 0, size = 0, acost;
3535 struct iv_cand *cand;
3536 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3539 for (i = 0; i < max_use; i++)
3541 use = iv_use (data, i);
3542 acost = find_best_candidate (data, use, sol, inv,
3543 used_ivs, used_inv, NULL);
3546 BITMAP_XFREE (used_ivs);
3547 BITMAP_XFREE (used_inv);
3553 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i, bi)
3555 cand = iv_cand (data, i);
3557 /* Do not count the pseudocandidates. */
3563 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, bi)
3567 cost += ivopts_global_cost_for_size (data, size);
3569 bitmap_copy (sol, used_ivs);
3570 bitmap_copy (inv, used_inv);
3572 BITMAP_XFREE (used_ivs);
3573 BITMAP_XFREE (used_inv);
3578 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3579 induction variable candidates and invariants from the sets. */
3582 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3584 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3587 /* Tries to extend the sets IVS and INV in the best possible way in order
3588 to express the USE. */
3591 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3594 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3595 bitmap best_ivs = BITMAP_XMALLOC ();
3596 bitmap best_inv = BITMAP_XMALLOC ();
3597 bitmap act_ivs = BITMAP_XMALLOC ();
3598 bitmap act_inv = BITMAP_XMALLOC ();
3600 struct cost_pair *cp;
3602 struct iv_cand *cand;
3605 bitmap_copy (best_ivs, ivs);
3606 bitmap_copy (best_inv, inv);
3608 /* First try important candidates. Only if it fails, try the specific ones.
3609 Rationale -- in loops with many variables the best choice often is to use
3610 just one generic biv. If we added here many ivs specific to the uses,
3611 the optimization algorithm later would be likely to get stuck in a local
3612 minimum, thus causing us to create too many ivs. The approach from
3613 few ivs to more seems more likely to be successful -- starting from few
3614 ivs, replacing an expensive use by a specific iv should always be a
3616 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
3618 cand = iv_cand (data, i);
3620 if (get_use_iv_cost (data, use, cand, &depends_on) == INFTY)
3623 bitmap_copy (act_ivs, ivs);
3624 bitmap_set_bit (act_ivs, cand->id);
3626 bitmap_ior (act_inv, inv, depends_on);
3628 bitmap_copy (act_inv, inv);
3629 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3631 if (act_cost < best_cost)
3633 best_cost = act_cost;
3634 bitmap_copy (best_ivs, act_ivs);
3635 bitmap_copy (best_inv, act_inv);
3639 if (best_cost == INFTY)
3641 for (i = 0; i < use->n_map_members; i++)
3643 cp = use->cost_map + i;
3644 if (cp->cost == INFTY)
3647 /* Already tried this. */
3648 if (cp->cand->important)
3651 bitmap_copy (act_ivs, ivs);
3652 bitmap_set_bit (act_ivs, cp->cand->id);
3654 bitmap_ior (act_inv, inv, cp->depends_on);
3656 bitmap_copy (act_inv, inv);
3657 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3659 if (act_cost < best_cost)
3661 best_cost = act_cost;
3662 bitmap_copy (best_ivs, act_ivs);
3663 bitmap_copy (best_inv, act_inv);
3668 bitmap_copy (ivs, best_ivs);
3669 bitmap_copy (inv, best_inv);
3671 BITMAP_XFREE (best_ivs);
3672 BITMAP_XFREE (best_inv);
3673 BITMAP_XFREE (act_ivs);
3674 BITMAP_XFREE (act_inv);
3676 return (best_cost != INFTY);
3679 /* Finds an initial set of IVS and invariants INV. We do this by simply
3680 choosing the best candidate for each use. */
3683 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3687 for (i = 0; i < n_iv_uses (data); i++)
3688 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3691 return set_cost (data, ivs, inv);
3694 /* Tries to improve set of induction variables IVS and invariants INV to get
3695 it better than COST. */
3698 try_improve_iv_set (struct ivopts_data *data,
3699 bitmap ivs, bitmap inv, unsigned *cost)
3702 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3703 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3705 /* Try altering the set of induction variables by one. */
3706 for (i = 0; i < n_iv_cands (data); i++)
3708 bitmap_copy (new_ivs, ivs);
3709 bitmap_copy (new_inv, inv);
3711 if (bitmap_bit_p (ivs, i))
3712 bitmap_clear_bit (new_ivs, i);
3714 bitmap_set_bit (new_ivs, i);
3716 acost = set_cost (data, new_ivs, new_inv);
3722 best_new_ivs = BITMAP_XMALLOC ();
3723 best_new_inv = BITMAP_XMALLOC ();
3727 bitmap_copy (best_new_ivs, new_ivs);
3728 bitmap_copy (best_new_inv, new_inv);
3731 /* Ditto for invariants. */
3732 for (i = 1; i <= data->max_inv_id; i++)
3734 if (ver_info (data, i)->has_nonlin_use)
3737 bitmap_copy (new_ivs, ivs);
3738 bitmap_copy (new_inv, inv);
3740 if (bitmap_bit_p (inv, i))
3741 bitmap_clear_bit (new_inv, i);
3743 bitmap_set_bit (new_inv, i);
3745 acost = set_cost (data, new_ivs, new_inv);
3751 best_new_ivs = BITMAP_XMALLOC ();
3752 best_new_inv = BITMAP_XMALLOC ();
3756 bitmap_copy (best_new_ivs, new_ivs);
3757 bitmap_copy (best_new_inv, new_inv);
3760 BITMAP_XFREE (new_ivs);
3761 BITMAP_XFREE (new_inv);
3766 bitmap_copy (ivs, best_new_ivs);
3767 bitmap_copy (inv, best_new_inv);
3768 BITMAP_XFREE (best_new_ivs);
3769 BITMAP_XFREE (best_new_inv);
3773 /* Attempts to find the optimal set of induction variables. We do simple
3774 greedy heuristic -- we try to replace at most one candidate in the selected
3775 solution and remove the unused ivs while this improves the cost. */
3778 find_optimal_iv_set (struct ivopts_data *data)
3781 bitmap set = BITMAP_XMALLOC ();
3782 bitmap inv = BITMAP_XMALLOC ();
3785 data->important_candidates = BITMAP_XMALLOC ();
3786 for (i = 0; i < n_iv_cands (data); i++)
3788 struct iv_cand *cand = iv_cand (data, i);
3790 if (cand->important)
3791 bitmap_set_bit (data->important_candidates, i);
3794 /* Set the upper bound. */
3795 cost = get_initial_solution (data, set, inv);
3798 if (dump_file && (dump_flags & TDF_DETAILS))
3799 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3805 if (dump_file && (dump_flags & TDF_DETAILS))
3807 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3808 bitmap_print (dump_file, set, "", "");
3809 fprintf (dump_file, " invariants ");
3810 bitmap_print (dump_file, inv, "", "");
3811 fprintf (dump_file, "\n");
3814 while (try_improve_iv_set (data, set, inv, &cost))
3816 if (dump_file && (dump_flags & TDF_DETAILS))
3818 fprintf (dump_file, "Improved to (cost %d): ", cost);
3819 bitmap_print (dump_file, set, "", "");
3820 fprintf (dump_file, " invariants ");
3821 bitmap_print (dump_file, inv, "", "");
3822 fprintf (dump_file, "\n");
3826 if (dump_file && (dump_flags & TDF_DETAILS))
3827 fprintf (dump_file, "Final cost %d\n\n", cost);
3829 for (i = 0; i < n_iv_uses (data); i++)
3831 use = iv_use (data, i);
3832 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3836 BITMAP_XFREE (data->important_candidates);
3841 /* Creates a new induction variable corresponding to CAND. */
3844 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3846 block_stmt_iterator incr_pos;
3856 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3860 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3865 /* Mark that the iv is preserved. */
3866 name_info (data, cand->var_before)->preserve_biv = true;
3867 name_info (data, cand->var_after)->preserve_biv = true;
3869 /* Rewrite the increment so that it uses var_before directly. */
3870 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3875 gimple_add_tmp_var (cand->var_before);
3876 add_referenced_tmp_var (cand->var_before);
3878 base = unshare_expr (cand->iv->base);
3880 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3881 &incr_pos, after, &cand->var_before, &cand->var_after);
3884 /* Creates new induction variables described in SET. */
3887 create_new_ivs (struct ivopts_data *data, bitmap set)
3890 struct iv_cand *cand;
3893 EXECUTE_IF_SET_IN_BITMAP (set, 0, i, bi)
3895 cand = iv_cand (data, i);
3896 create_new_iv (data, cand);
3900 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3901 is true, remove also the ssa name defined by the statement. */
3904 remove_statement (tree stmt, bool including_defined_name)
3906 if (TREE_CODE (stmt) == PHI_NODE)
3908 if (!including_defined_name)
3910 /* Prevent the ssa name defined by the statement from being removed. */
3911 SET_PHI_RESULT (stmt, NULL);
3913 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3917 block_stmt_iterator bsi = bsi_for_stmt (stmt);
3923 /* Rewrites USE (definition of iv used in a nonlinear expression)
3924 using candidate CAND. */
3927 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3928 struct iv_use *use, struct iv_cand *cand)
3930 tree comp = unshare_expr (get_computation (data->current_loop,
3932 tree op, stmts, tgt, ass;
3933 block_stmt_iterator bsi, pbsi;
3935 switch (TREE_CODE (use->stmt))
3938 tgt = PHI_RESULT (use->stmt);
3940 /* If we should keep the biv, do not replace it. */
3941 if (name_info (data, tgt)->preserve_biv)
3944 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3945 while (!bsi_end_p (pbsi)
3946 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3954 tgt = TREE_OPERAND (use->stmt, 0);
3955 bsi = bsi_for_stmt (use->stmt);
3962 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3964 if (TREE_CODE (use->stmt) == PHI_NODE)
3967 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3968 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3969 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3970 remove_statement (use->stmt, false);
3971 SSA_NAME_DEF_STMT (tgt) = ass;
3976 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3977 TREE_OPERAND (use->stmt, 1) = op;
3981 /* Replaces ssa name in index IDX by its basic variable. Callback for
3985 idx_remove_ssa_names (tree base, tree *idx,
3986 void *data ATTRIBUTE_UNUSED)
3990 if (TREE_CODE (*idx) == SSA_NAME)
3991 *idx = SSA_NAME_VAR (*idx);
3993 if (TREE_CODE (base) == ARRAY_REF)
3995 op = &TREE_OPERAND (base, 2);
3997 && TREE_CODE (*op) == SSA_NAME)
3998 *op = SSA_NAME_VAR (*op);
3999 op = &TREE_OPERAND (base, 3);
4001 && TREE_CODE (*op) == SSA_NAME)
4002 *op = SSA_NAME_VAR (*op);
4008 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4011 unshare_and_remove_ssa_names (tree ref)
4013 ref = unshare_expr (ref);
4014 for_each_index (&ref, idx_remove_ssa_names, NULL);
4019 /* Rewrites base of memory access OP with expression WITH in statement
4020 pointed to by BSI. */
4023 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4025 tree bvar, var, new_var, new_name, copy, name;
4028 var = bvar = get_base_address (*op);
4030 if (!var || TREE_CODE (with) != SSA_NAME)
4033 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4034 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4035 if (TREE_CODE (var) == INDIRECT_REF)
4036 var = TREE_OPERAND (var, 0);
4037 if (TREE_CODE (var) == SSA_NAME)
4040 var = SSA_NAME_VAR (var);
4042 else if (DECL_P (var))
4047 if (var_ann (var)->type_mem_tag)
4048 var = var_ann (var)->type_mem_tag;
4050 /* We need to add a memory tag for the variable. But we do not want
4051 to add it to the temporary used for the computations, since this leads
4052 to problems in redundancy elimination when there are common parts
4053 in two computations referring to the different arrays. So we copy
4054 the variable to a new temporary. */
4055 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4057 new_name = duplicate_ssa_name (name, copy);
4060 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4061 add_referenced_tmp_var (new_var);
4062 var_ann (new_var)->type_mem_tag = var;
4063 new_name = make_ssa_name (new_var, copy);
4065 TREE_OPERAND (copy, 0) = new_name;
4066 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4072 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4073 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4075 if (TREE_CODE (*op) == INDIRECT_REF)
4076 orig = REF_ORIGINAL (*op);
4078 orig = unshare_and_remove_ssa_names (*op);
4080 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4082 /* Record the original reference, for purposes of alias analysis. */
4083 REF_ORIGINAL (*op) = orig;
4086 /* Rewrites USE (address that is an iv) using candidate CAND. */
4089 rewrite_use_address (struct ivopts_data *data,
4090 struct iv_use *use, struct iv_cand *cand)
4092 tree comp = unshare_expr (get_computation (data->current_loop,
4094 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4096 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4099 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4101 rewrite_address_base (&bsi, use->op_p, op);
4104 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4108 rewrite_use_compare (struct ivopts_data *data,
4109 struct iv_use *use, struct iv_cand *cand)
4112 tree *op_p, cond, op, stmts, bound;
4113 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4114 enum tree_code compare;
4116 if (may_eliminate_iv (data->current_loop,
4117 use, cand, &compare, &bound))
4119 op = force_gimple_operand (unshare_expr (bound), &stmts,
4123 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4125 *use->op_p = build2 (compare, boolean_type_node,
4126 var_at_stmt (data->current_loop,
4127 cand, use->stmt), op);
4128 modify_stmt (use->stmt);
4132 /* The induction variable elimination failed; just express the original
4134 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4137 op_p = &TREE_OPERAND (cond, 0);
4138 if (TREE_CODE (*op_p) != SSA_NAME
4139 || zero_p (get_iv (data, *op_p)->step))
4140 op_p = &TREE_OPERAND (cond, 1);
4142 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4144 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4149 /* Ensure that operand *OP_P may be used at the end of EXIT without
4150 violating loop closed ssa form. */
4153 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4156 struct loop *def_loop;
4159 use = USE_FROM_PTR (op_p);
4160 if (TREE_CODE (use) != SSA_NAME)
4163 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4167 def_loop = def_bb->loop_father;
4168 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4171 /* Try finding a phi node that copies the value out of the loop. */
4172 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4173 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4178 /* Create such a phi node. */
4179 tree new_name = duplicate_ssa_name (use, NULL);
4181 phi = create_phi_node (new_name, exit->dest);
4182 SSA_NAME_DEF_STMT (new_name) = phi;
4183 add_phi_arg (&phi, use, exit);
4186 SET_USE (op_p, PHI_RESULT (phi));
4189 /* Ensure that operands of STMT may be used at the end of EXIT without
4190 violating loop closed ssa form. */
4193 protect_loop_closed_ssa_form (edge exit, tree stmt)
4197 v_may_def_optype v_may_defs;
4200 get_stmt_operands (stmt);
4202 uses = STMT_USE_OPS (stmt);
4203 for (i = 0; i < NUM_USES (uses); i++)
4204 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4206 vuses = STMT_VUSE_OPS (stmt);
4207 for (i = 0; i < NUM_VUSES (vuses); i++)
4208 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4210 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4211 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4212 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4215 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4216 so that they are emitted on the correct place, and so that the loop closed
4217 ssa form is preserved. */
4220 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4222 tree_stmt_iterator tsi;
4223 block_stmt_iterator bsi;
4224 tree phi, stmt, def, next;
4226 if (EDGE_COUNT (exit->dest->preds) > 1)
4227 split_loop_exit_edge (exit);
4229 if (TREE_CODE (stmts) == STATEMENT_LIST)
4231 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4232 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4235 protect_loop_closed_ssa_form (exit, stmts);
4237 /* Ensure there is label in exit->dest, so that we can
4239 tree_block_label (exit->dest);
4240 bsi = bsi_after_labels (exit->dest);
4241 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4246 for (phi = phi_nodes (exit->dest); phi; phi = next)
4248 next = TREE_CHAIN (phi);
4250 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4252 def = PHI_RESULT (phi);
4253 remove_statement (phi, false);
4254 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4256 SSA_NAME_DEF_STMT (def) = stmt;
4257 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4262 /* Rewrites the final value of USE (that is only needed outside of the loop)
4263 using candidate CAND. */
4266 rewrite_use_outer (struct ivopts_data *data,
4267 struct iv_use *use, struct iv_cand *cand)
4270 tree value, op, stmts, tgt;
4273 switch (TREE_CODE (use->stmt))
4276 tgt = PHI_RESULT (use->stmt);
4279 tgt = TREE_OPERAND (use->stmt, 0);
4285 exit = single_dom_exit (data->current_loop);
4291 bool ok = may_replace_final_value (data->current_loop, use, &value);
4295 value = get_computation_at (data->current_loop,
4296 use, cand, last_stmt (exit->src));
4298 value = unshare_expr (value);
4299 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4301 /* If we will preserve the iv anyway and we would need to perform
4302 some computation to replace the final value, do nothing. */
4303 if (stmts && name_info (data, tgt)->preserve_biv)
4306 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4308 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4310 if (USE_FROM_PTR (use_p) == tgt)
4311 SET_USE (use_p, op);
4315 compute_phi_arg_on_exit (exit, stmts, op);
4317 /* Enable removal of the statement. We cannot remove it directly,
4318 since we may still need the aliasing information attached to the
4319 ssa name defined by it. */
4320 name_info (data, tgt)->iv->have_use_for = false;
4324 /* If the variable is going to be preserved anyway, there is nothing to
4326 if (name_info (data, tgt)->preserve_biv)
4329 /* Otherwise we just need to compute the iv. */
4330 rewrite_use_nonlinear_expr (data, use, cand);
4333 /* Rewrites USE using candidate CAND. */
4336 rewrite_use (struct ivopts_data *data,
4337 struct iv_use *use, struct iv_cand *cand)
4341 case USE_NONLINEAR_EXPR:
4342 rewrite_use_nonlinear_expr (data, use, cand);
4346 rewrite_use_outer (data, use, cand);
4350 rewrite_use_address (data, use, cand);
4354 rewrite_use_compare (data, use, cand);
4360 modify_stmt (use->stmt);
4363 /* Rewrite the uses using the selected induction variables. */
4366 rewrite_uses (struct ivopts_data *data)
4369 struct iv_cand *cand;
4372 for (i = 0; i < n_iv_uses (data); i++)
4374 use = iv_use (data, i);
4375 cand = use->selected;
4378 rewrite_use (data, use, cand);
4382 /* Removes the ivs that are not used after rewriting. */
4385 remove_unused_ivs (struct ivopts_data *data)
4390 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4392 struct version_info *info;
4394 info = ver_info (data, j);
4396 && !zero_p (info->iv->step)
4398 && !info->iv->have_use_for
4399 && !info->preserve_biv)
4400 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4404 /* Frees data allocated by the optimization of a single loop. */
4407 free_loop_data (struct ivopts_data *data)
4412 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
4414 struct version_info *info;
4416 info = ver_info (data, i);
4420 info->has_nonlin_use = false;
4421 info->preserve_biv = false;
4424 bitmap_clear (data->relevant);
4426 for (i = 0; i < n_iv_uses (data); i++)
4428 struct iv_use *use = iv_use (data, i);
4431 BITMAP_XFREE (use->related_cands);
4432 for (j = 0; j < use->n_map_members; j++)
4433 if (use->cost_map[j].depends_on)
4434 BITMAP_XFREE (use->cost_map[j].depends_on);
4435 free (use->cost_map);
4438 VARRAY_POP_ALL (data->iv_uses);
4440 for (i = 0; i < n_iv_cands (data); i++)
4442 struct iv_cand *cand = iv_cand (data, i);
4448 VARRAY_POP_ALL (data->iv_candidates);
4450 if (data->version_info_size < num_ssa_names)
4452 data->version_info_size = 2 * num_ssa_names;
4453 free (data->version_info);
4454 data->version_info = xcalloc (data->version_info_size,
4455 sizeof (struct version_info));
4458 data->max_inv_id = 0;
4460 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4462 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4464 SET_DECL_RTL (obj, NULL_RTX);
4466 VARRAY_POP_ALL (decl_rtl_to_reset);
4469 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4473 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4477 for (i = 1; i < loops->num; i++)
4478 if (loops->parray[i])
4480 free (loops->parray[i]->aux);
4481 loops->parray[i]->aux = NULL;
4484 free_loop_data (data);
4485 free (data->version_info);
4486 BITMAP_XFREE (data->relevant);
4488 VARRAY_FREE (decl_rtl_to_reset);
4489 VARRAY_FREE (data->iv_uses);
4490 VARRAY_FREE (data->iv_candidates);
4493 /* Optimizes the LOOP. Returns true if anything changed. */
4496 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4498 bool changed = false;
4502 data->current_loop = loop;
4504 if (dump_file && (dump_flags & TDF_DETAILS))
4506 fprintf (dump_file, "Processing loop %d\n", loop->num);
4508 exit = single_dom_exit (loop);
4511 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4512 exit->src->index, exit->dest->index);
4513 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4514 fprintf (dump_file, "\n");
4517 fprintf (dump_file, "\n");
4520 /* For each ssa name determines whether it behaves as an induction variable
4522 if (!find_induction_variables (data))
4525 /* Finds interesting uses (item 1). */
4526 find_interesting_uses (data);
4527 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4530 /* Finds candidates for the induction variables (item 2). */
4531 find_iv_candidates (data);
4533 /* Calculates the costs (item 3, part 1). */
4534 determine_use_iv_costs (data);
4535 determine_iv_costs (data);
4536 determine_set_costs (data);
4538 /* Find the optimal set of induction variables (item 3, part 2). */
4539 iv_set = find_optimal_iv_set (data);
4544 /* Create the new induction variables (item 4, part 1). */
4545 create_new_ivs (data, iv_set);
4547 /* Rewrite the uses (item 4, part 2). */
4548 rewrite_uses (data);
4550 /* Remove the ivs that are unused after rewriting. */
4551 remove_unused_ivs (data);
4553 loop_commit_inserts ();
4555 BITMAP_XFREE (iv_set);
4557 /* We have changed the structure of induction variables; it might happen
4558 that definitions in the scev database refer to some of them that were
4563 free_loop_data (data);
4568 /* Main entry point. Optimizes induction variables in LOOPS. */
4571 tree_ssa_iv_optimize (struct loops *loops)
4574 struct ivopts_data data;
4576 tree_ssa_iv_optimize_init (loops, &data);
4578 /* Optimize the loops starting with the innermost ones. */
4579 loop = loops->tree_root;
4583 #ifdef ENABLE_CHECKING
4584 verify_loop_closed_ssa ();
4588 /* Scan the loops, inner ones first. */
4589 while (loop != loops->tree_root)
4591 if (dump_file && (dump_flags & TDF_DETAILS))
4592 flow_loop_dump (loop, dump_file, NULL, 1);
4594 tree_ssa_iv_optimize_loop (&data, loop);
4606 #ifdef ENABLE_CHECKING
4607 verify_loop_closed_ssa ();
4611 tree_ssa_iv_optimize_finalize (loops, &data);