1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
106 tree base; /* Initial value of the iv. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Information attached to loop. */
128 struct tree_niter_desc niter;
129 /* Number of iterations. */
131 unsigned regs_used; /* Number of registers used. */
137 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
138 USE_OUTER, /* The induction variable is used outside the loop. */
139 USE_ADDRESS, /* Use in an address. */
140 USE_COMPARE /* Use is a compare. */
143 /* The candidate - cost pair. */
146 struct iv_cand *cand; /* The candidate. */
147 unsigned cost; /* The cost. */
148 bitmap depends_on; /* The list of invariants that have to be
155 unsigned id; /* The id of the use. */
156 enum use_type type; /* Type of the use. */
157 struct iv *iv; /* The induction variable it is based on. */
158 tree stmt; /* Statement in that it occurs. */
159 tree *op_p; /* The place where it occurs. */
160 bitmap related_cands; /* The set of "related" iv candidates. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
200 /* The currently optimized loop. */
201 struct loop *current_loop;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
212 /* The maximum invariant id. */
215 /* The uses of induction variables. */
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* Whether to consider just related and important candidates when replacing a
223 bool consider_all_candidates;
226 /* Bound on number of candidates below that all candidates are considered. */
228 #define CONSIDER_ALL_CANDIDATES_BOUND \
229 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
231 /* If there are more iv occurrences, we just give up (it is quite unlikely that
232 optimizing such a loop would help, and it would take ages). */
234 #define MAX_CONSIDERED_USES \
235 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
237 /* The list of trees for that the decl_rtl field must be reset is stored
240 static varray_type decl_rtl_to_reset;
242 /* Number of uses recorded in DATA. */
244 static inline unsigned
245 n_iv_uses (struct ivopts_data *data)
247 return VARRAY_ACTIVE_SIZE (data->iv_uses);
250 /* Ith use recorded in DATA. */
252 static inline struct iv_use *
253 iv_use (struct ivopts_data *data, unsigned i)
255 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
258 /* Number of candidates recorded in DATA. */
260 static inline unsigned
261 n_iv_cands (struct ivopts_data *data)
263 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
266 /* Ith candidate recorded in DATA. */
268 static inline struct iv_cand *
269 iv_cand (struct ivopts_data *data, unsigned i)
271 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
274 /* The data for LOOP. */
276 static inline struct loop_data *
277 loop_data (struct loop *loop)
282 /* The single loop exit if it dominates the latch, NULL otherwise. */
285 single_dom_exit (struct loop *loop)
287 edge exit = loop->single_exit;
292 if (!just_once_each_iteration_p (loop, exit->src))
298 /* Dumps information about the induction variable IV to FILE. */
300 extern void dump_iv (FILE *, struct iv *);
302 dump_iv (FILE *file, struct iv *iv)
304 fprintf (file, "ssa name ");
305 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
306 fprintf (file, "\n");
308 fprintf (file, " type ");
309 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
310 fprintf (file, "\n");
314 fprintf (file, " base ");
315 print_generic_expr (file, iv->base, TDF_SLIM);
316 fprintf (file, "\n");
318 fprintf (file, " step ");
319 print_generic_expr (file, iv->step, TDF_SLIM);
320 fprintf (file, "\n");
324 fprintf (file, " invariant ");
325 print_generic_expr (file, iv->base, TDF_SLIM);
326 fprintf (file, "\n");
330 fprintf (file, " is a biv\n");
333 /* Dumps information about the USE to FILE. */
335 extern void dump_use (FILE *, struct iv_use *);
337 dump_use (FILE *file, struct iv_use *use)
339 struct iv *iv = use->iv;
341 fprintf (file, "use %d\n", use->id);
345 case USE_NONLINEAR_EXPR:
346 fprintf (file, " generic\n");
350 fprintf (file, " outside\n");
354 fprintf (file, " address\n");
358 fprintf (file, " compare\n");
365 fprintf (file, " in statement ");
366 print_generic_expr (file, use->stmt, TDF_SLIM);
367 fprintf (file, "\n");
369 fprintf (file, " at position ");
371 print_generic_expr (file, *use->op_p, TDF_SLIM);
372 fprintf (file, "\n");
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " related candidates ");
396 dump_bitmap (file, use->related_cands);
399 /* Dumps information about the uses to FILE. */
401 extern void dump_uses (FILE *, struct ivopts_data *);
403 dump_uses (FILE *file, struct ivopts_data *data)
408 for (i = 0; i < n_iv_uses (data); i++)
410 use = iv_use (data, i);
412 dump_use (file, use);
413 fprintf (file, "\n");
417 /* Dumps information about induction variable candidate CAND to FILE. */
419 extern void dump_cand (FILE *, struct iv_cand *);
421 dump_cand (FILE *file, struct iv_cand *cand)
423 struct iv *iv = cand->iv;
425 fprintf (file, "candidate %d%s\n",
426 cand->id, cand->important ? " (important)" : "");
430 fprintf (file, " final value replacement\n");
437 fprintf (file, " incremented before exit test\n");
441 fprintf (file, " incremented at end\n");
445 fprintf (file, " original biv\n");
449 fprintf (file, " type ");
450 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
451 fprintf (file, "\n");
455 fprintf (file, " base ");
456 print_generic_expr (file, iv->base, TDF_SLIM);
457 fprintf (file, "\n");
459 fprintf (file, " step ");
460 print_generic_expr (file, iv->step, TDF_SLIM);
461 fprintf (file, "\n");
465 fprintf (file, " invariant ");
466 print_generic_expr (file, iv->base, TDF_SLIM);
467 fprintf (file, "\n");
471 /* Returns the info for ssa version VER. */
473 static inline struct version_info *
474 ver_info (struct ivopts_data *data, unsigned ver)
476 return data->version_info + ver;
479 /* Returns the info for ssa name NAME. */
481 static inline struct version_info *
482 name_info (struct ivopts_data *data, tree name)
484 return ver_info (data, SSA_NAME_VERSION (name));
487 /* Checks whether there exists number X such that X * B = A, counting modulo
491 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
494 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
495 unsigned HOST_WIDE_INT inv, ex, val;
501 /* First divide the whole equation by 2 as long as possible. */
502 while (!(a & 1) && !(b & 1))
512 /* If b is still even, a is odd and there is no such x. */
516 /* Find the inverse of b. We compute it as
517 b^(2^(bits - 1) - 1) (mod 2^bits). */
520 for (i = 0; i < bits - 1; i++)
522 inv = (inv * ex) & mask;
523 ex = (ex * ex) & mask;
526 val = (a * inv) & mask;
528 gcc_assert (((val * b) & mask) == a);
530 if ((val >> (bits - 1)) & 1)
538 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
542 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
544 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
548 if (sbb == loop->latch)
554 return stmt == last_stmt (bb);
557 /* Returns true if STMT if after the place where the original induction
558 variable CAND is incremented. */
561 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
563 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
564 basic_block stmt_bb = bb_for_stmt (stmt);
565 block_stmt_iterator bsi;
567 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
570 if (stmt_bb != cand_bb)
573 /* Scan the block from the end, since the original ivs are usually
574 incremented at the end of the loop body. */
575 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
577 if (bsi_stmt (bsi) == cand->incremented_at)
579 if (bsi_stmt (bsi) == stmt)
584 /* Returns true if STMT if after the place where the induction variable
585 CAND is incremented in LOOP. */
588 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
596 return stmt_after_ip_normal_pos (loop, stmt);
599 return stmt_after_ip_original_pos (cand, stmt);
606 /* Initializes data structures used by the iv optimization pass, stored
607 in DATA. LOOPS is the loop tree. */
610 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
614 data->version_info_size = 2 * num_ssa_names;
615 data->version_info = xcalloc (data->version_info_size,
616 sizeof (struct version_info));
617 data->relevant = BITMAP_XMALLOC ();
618 data->max_inv_id = 0;
620 for (i = 1; i < loops->num; i++)
621 if (loops->parray[i])
622 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
624 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
625 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
626 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
629 /* Allocates an induction variable with given initial value BASE and step STEP
633 alloc_iv (tree base, tree step)
635 struct iv *iv = xcalloc (1, sizeof (struct iv));
637 if (step && integer_zerop (step))
643 iv->have_use_for = false;
645 iv->ssa_name = NULL_TREE;
650 /* Sets STEP and BASE for induction variable IV. */
653 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
655 struct version_info *info = name_info (data, iv);
657 gcc_assert (!info->iv);
659 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
660 info->iv = alloc_iv (base, step);
661 info->iv->ssa_name = iv;
664 /* Finds induction variable declaration for VAR. */
667 get_iv (struct ivopts_data *data, tree var)
671 if (!name_info (data, var)->iv)
673 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
676 || !flow_bb_inside_loop_p (data->current_loop, bb))
677 set_iv (data, var, var, NULL_TREE);
680 return name_info (data, var)->iv;
683 /* Determines the step of a biv defined in PHI. */
686 determine_biv_step (tree phi)
688 struct loop *loop = bb_for_stmt (phi)->loop_father;
689 tree name = PHI_RESULT (phi), base, step;
690 tree type = TREE_TYPE (name);
692 if (!is_gimple_reg (name))
695 if (!simple_iv (loop, phi, name, &base, &step))
699 return build_int_cst (type, 0);
704 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
707 abnormal_ssa_name_p (tree exp)
712 if (TREE_CODE (exp) != SSA_NAME)
715 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
718 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
719 abnormal phi node. Callback for for_each_index. */
722 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
723 void *data ATTRIBUTE_UNUSED)
725 if (TREE_CODE (base) == ARRAY_REF)
727 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
729 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
733 return !abnormal_ssa_name_p (*index);
736 /* Returns true if EXPR contains a ssa name that occurs in an
737 abnormal phi node. */
740 contains_abnormal_ssa_name_p (tree expr)
742 enum tree_code code = TREE_CODE (expr);
743 enum tree_code_class class = TREE_CODE_CLASS (code);
745 if (code == SSA_NAME)
746 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
748 if (code == INTEGER_CST
749 || is_gimple_min_invariant (expr))
752 if (code == ADDR_EXPR)
753 return !for_each_index (&TREE_OPERAND (expr, 1),
754 idx_contains_abnormal_ssa_name_p,
761 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
766 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
778 /* Finds basic ivs. */
781 find_bivs (struct ivopts_data *data)
783 tree phi, step, type, base;
785 struct loop *loop = data->current_loop;
787 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
789 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
792 step = determine_biv_step (phi);
796 if (cst_and_fits_in_hwi (step)
797 && int_cst_value (step) == 0)
800 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
801 if (contains_abnormal_ssa_name_p (base))
804 type = TREE_TYPE (PHI_RESULT (phi));
805 base = fold_convert (type, base);
806 step = fold_convert (type, step);
808 /* FIXME: We do not handle induction variables whose step does
809 not satisfy cst_and_fits_in_hwi. */
810 if (!cst_and_fits_in_hwi (step))
813 set_iv (data, PHI_RESULT (phi), base, step);
820 /* Marks basic ivs. */
823 mark_bivs (struct ivopts_data *data)
826 struct iv *iv, *incr_iv;
827 struct loop *loop = data->current_loop;
830 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
832 iv = get_iv (data, PHI_RESULT (phi));
836 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
837 incr_iv = get_iv (data, var);
841 /* If the increment is in the subloop, ignore it. */
842 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
843 if (incr_bb->loop_father != data->current_loop
844 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
848 incr_iv->biv_p = true;
852 /* Checks whether STMT defines a linear induction variable and stores its
853 parameters to BASE and STEP. */
856 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
857 tree *base, tree *step)
860 struct loop *loop = data->current_loop;
865 if (TREE_CODE (stmt) != MODIFY_EXPR)
868 lhs = TREE_OPERAND (stmt, 0);
869 if (TREE_CODE (lhs) != SSA_NAME)
872 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
875 /* FIXME: We do not handle induction variables whose step does
876 not satisfy cst_and_fits_in_hwi. */
878 && !cst_and_fits_in_hwi (*step))
881 if (contains_abnormal_ssa_name_p (*base))
887 /* Finds general ivs in statement STMT. */
890 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
894 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
897 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
900 /* Finds general ivs in basic block BB. */
903 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
905 block_stmt_iterator bsi;
907 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
908 find_givs_in_stmt (data, bsi_stmt (bsi));
911 /* Finds general ivs. */
914 find_givs (struct ivopts_data *data)
916 struct loop *loop = data->current_loop;
917 basic_block *body = get_loop_body_in_dom_order (loop);
920 for (i = 0; i < loop->num_nodes; i++)
921 find_givs_in_bb (data, body[i]);
925 /* Determine the number of iterations of the current loop. */
928 determine_number_of_iterations (struct ivopts_data *data)
930 struct loop *loop = data->current_loop;
931 edge exit = single_dom_exit (loop);
936 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
939 /* For each ssa name defined in LOOP determines whether it is an induction
940 variable and if so, its initial value and step. */
943 find_induction_variables (struct ivopts_data *data)
946 struct loop *loop = data->current_loop;
949 if (!find_bivs (data))
954 determine_number_of_iterations (data);
956 if (dump_file && (dump_flags & TDF_DETAILS))
958 if (loop_data (loop)->niter.niter)
960 fprintf (dump_file, " number of iterations ");
961 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
963 fprintf (dump_file, "\n");
965 fprintf (dump_file, " may be zero if ");
966 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
968 fprintf (dump_file, "\n");
970 fprintf (dump_file, " bogus unless ");
971 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
973 fprintf (dump_file, "\n");
974 fprintf (dump_file, "\n");
977 fprintf (dump_file, "Induction variables:\n\n");
979 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
981 if (ver_info (data, i)->iv)
982 dump_iv (dump_file, ver_info (data, i)->iv);
989 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
991 static struct iv_use *
992 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
993 tree stmt, enum use_type use_type)
995 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
997 use->id = n_iv_uses (data);
998 use->type = use_type;
1002 use->related_cands = BITMAP_XMALLOC ();
1004 if (dump_file && (dump_flags & TDF_DETAILS))
1005 dump_use (dump_file, use);
1007 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1012 /* Checks whether OP is a loop-level invariant and if so, records it.
1013 NONLINEAR_USE is true if the invariant is used in a way we do not
1014 handle specially. */
1017 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1020 struct version_info *info;
1022 if (TREE_CODE (op) != SSA_NAME
1023 || !is_gimple_reg (op))
1026 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1028 && flow_bb_inside_loop_p (data->current_loop, bb))
1031 info = name_info (data, op);
1033 info->has_nonlin_use |= nonlinear_use;
1035 info->inv_id = ++data->max_inv_id;
1036 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1039 /* Checks whether the use OP is interesting and if so, records it
1042 static struct iv_use *
1043 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1051 if (TREE_CODE (op) != SSA_NAME)
1054 iv = get_iv (data, op);
1058 if (iv->have_use_for)
1060 use = iv_use (data, iv->use_id);
1062 gcc_assert (use->type == USE_NONLINEAR_EXPR
1063 || use->type == USE_OUTER);
1065 if (type == USE_NONLINEAR_EXPR)
1066 use->type = USE_NONLINEAR_EXPR;
1070 if (zero_p (iv->step))
1072 record_invariant (data, op, true);
1075 iv->have_use_for = true;
1077 civ = xmalloc (sizeof (struct iv));
1080 stmt = SSA_NAME_DEF_STMT (op);
1081 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1082 || TREE_CODE (stmt) == MODIFY_EXPR);
1084 use = record_use (data, NULL, civ, stmt, type);
1085 iv->use_id = use->id;
1090 /* Checks whether the use OP is interesting and if so, records it. */
1092 static struct iv_use *
1093 find_interesting_uses_op (struct ivopts_data *data, tree op)
1095 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1098 /* Records a definition of induction variable OP that is used outside of the
1101 static struct iv_use *
1102 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1104 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1107 /* Checks whether the condition *COND_P in STMT is interesting
1108 and if so, records it. */
1111 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1115 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1117 tree zero = integer_zero_node;
1119 const_iv.step = NULL_TREE;
1121 if (integer_zerop (*cond_p)
1122 || integer_nonzerop (*cond_p))
1125 if (TREE_CODE (*cond_p) == SSA_NAME)
1132 op0_p = &TREE_OPERAND (*cond_p, 0);
1133 op1_p = &TREE_OPERAND (*cond_p, 1);
1136 if (TREE_CODE (*op0_p) == SSA_NAME)
1137 iv0 = get_iv (data, *op0_p);
1141 if (TREE_CODE (*op1_p) == SSA_NAME)
1142 iv1 = get_iv (data, *op1_p);
1146 if (/* When comparing with non-invariant value, we may not do any senseful
1147 induction variable elimination. */
1149 /* Eliminating condition based on two ivs would be nontrivial.
1150 ??? TODO -- it is not really important to handle this case. */
1151 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1153 find_interesting_uses_op (data, *op0_p);
1154 find_interesting_uses_op (data, *op1_p);
1158 if (zero_p (iv0->step) && zero_p (iv1->step))
1160 /* If both are invariants, this is a work for unswitching. */
1164 civ = xmalloc (sizeof (struct iv));
1165 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1166 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1169 /* Returns true if expression EXPR is obviously invariant in LOOP,
1170 i.e. if all its operands are defined outside of the LOOP. */
1173 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1178 if (is_gimple_min_invariant (expr))
1181 if (TREE_CODE (expr) == SSA_NAME)
1183 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1185 && flow_bb_inside_loop_p (loop, def_bb))
1194 len = first_rtl_op (TREE_CODE (expr));
1195 for (i = 0; i < len; i++)
1196 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1202 /* Cumulates the steps of indices into DATA and replaces their values with the
1203 initial ones. Returns false when the value of the index cannot be determined.
1204 Callback for for_each_index. */
1206 struct ifs_ivopts_data
1208 struct ivopts_data *ivopts_data;
1214 idx_find_step (tree base, tree *idx, void *data)
1216 struct ifs_ivopts_data *dta = data;
1218 tree step, type, iv_type, iv_step, lbound, off;
1219 struct loop *loop = dta->ivopts_data->current_loop;
1221 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1222 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1225 /* If base is a component ref, require that the offset of the reference
1227 if (TREE_CODE (base) == COMPONENT_REF)
1229 off = component_ref_field_offset (base);
1230 return expr_invariant_in_loop_p (loop, off);
1233 /* If base is array, first check whether we will be able to move the
1234 reference out of the loop (in order to take its address in strength
1235 reduction). In order for this to work we need both lower bound
1236 and step to be loop invariants. */
1237 if (TREE_CODE (base) == ARRAY_REF)
1239 step = array_ref_element_size (base);
1240 lbound = array_ref_low_bound (base);
1242 if (!expr_invariant_in_loop_p (loop, step)
1243 || !expr_invariant_in_loop_p (loop, lbound))
1247 if (TREE_CODE (*idx) != SSA_NAME)
1250 iv = get_iv (dta->ivopts_data, *idx);
1259 iv_type = TREE_TYPE (iv->base);
1260 type = build_pointer_type (TREE_TYPE (base));
1261 if (TREE_CODE (base) == ARRAY_REF)
1263 step = array_ref_element_size (base);
1265 /* We only handle addresses whose step is an integer constant. */
1266 if (TREE_CODE (step) != INTEGER_CST)
1270 /* The step for pointer arithmetics already is 1 byte. */
1271 step = build_int_cst (type, 1);
1273 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1274 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1275 type, iv->base, iv->step, dta->stmt);
1277 iv_step = fold_convert (iv_type, iv->step);
1281 /* The index might wrap. */
1285 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1288 *dta->step_p = step;
1290 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1295 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1296 object is passed to it in DATA. */
1299 idx_record_use (tree base, tree *idx,
1302 find_interesting_uses_op (data, *idx);
1303 if (TREE_CODE (base) == ARRAY_REF)
1305 find_interesting_uses_op (data, array_ref_element_size (base));
1306 find_interesting_uses_op (data, array_ref_low_bound (base));
1311 /* Finds addresses in *OP_P inside STMT. */
1314 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1316 tree base = unshare_expr (*op_p), step = NULL;
1318 struct ifs_ivopts_data ifs_ivopts_data;
1320 /* Ignore bitfields for now. Not really something terribly complicated
1322 if (TREE_CODE (base) == COMPONENT_REF
1323 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1326 ifs_ivopts_data.ivopts_data = data;
1327 ifs_ivopts_data.stmt = stmt;
1328 ifs_ivopts_data.step_p = &step;
1329 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1333 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1334 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1336 if (TREE_CODE (base) == INDIRECT_REF)
1337 base = TREE_OPERAND (base, 0);
1339 base = build_addr (base);
1341 civ = alloc_iv (base, step);
1342 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1346 for_each_index (op_p, idx_record_use, data);
1349 /* Finds and records invariants used in STMT. */
1352 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1354 use_optype uses = NULL;
1358 if (TREE_CODE (stmt) == PHI_NODE)
1359 n = PHI_NUM_ARGS (stmt);
1362 get_stmt_operands (stmt);
1363 uses = STMT_USE_OPS (stmt);
1364 n = NUM_USES (uses);
1367 for (i = 0; i < n; i++)
1369 if (TREE_CODE (stmt) == PHI_NODE)
1370 op = PHI_ARG_DEF (stmt, i);
1372 op = USE_OP (uses, i);
1374 record_invariant (data, op, false);
1378 /* Finds interesting uses of induction variables in the statement STMT. */
1381 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1385 use_optype uses = NULL;
1388 find_invariants_stmt (data, stmt);
1390 if (TREE_CODE (stmt) == COND_EXPR)
1392 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1396 if (TREE_CODE (stmt) == MODIFY_EXPR)
1398 lhs = TREE_OPERAND (stmt, 0);
1399 rhs = TREE_OPERAND (stmt, 1);
1401 if (TREE_CODE (lhs) == SSA_NAME)
1403 /* If the statement defines an induction variable, the uses are not
1404 interesting by themselves. */
1406 iv = get_iv (data, lhs);
1408 if (iv && !zero_p (iv->step))
1412 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1414 case tcc_comparison:
1415 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1419 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1420 if (REFERENCE_CLASS_P (lhs))
1421 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1427 if (REFERENCE_CLASS_P (lhs)
1428 && is_gimple_val (rhs))
1430 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1431 find_interesting_uses_op (data, rhs);
1435 /* TODO -- we should also handle address uses of type
1437 memory = call (whatever);
1444 if (TREE_CODE (stmt) == PHI_NODE
1445 && bb_for_stmt (stmt) == data->current_loop->header)
1447 lhs = PHI_RESULT (stmt);
1448 iv = get_iv (data, lhs);
1450 if (iv && !zero_p (iv->step))
1454 if (TREE_CODE (stmt) == PHI_NODE)
1455 n = PHI_NUM_ARGS (stmt);
1458 uses = STMT_USE_OPS (stmt);
1459 n = NUM_USES (uses);
1462 for (i = 0; i < n; i++)
1464 if (TREE_CODE (stmt) == PHI_NODE)
1465 op = PHI_ARG_DEF (stmt, i);
1467 op = USE_OP (uses, i);
1469 if (TREE_CODE (op) != SSA_NAME)
1472 iv = get_iv (data, op);
1476 find_interesting_uses_op (data, op);
1480 /* Finds interesting uses of induction variables outside of loops
1481 on loop exit edge EXIT. */
1484 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1488 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1490 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1491 find_interesting_uses_outer (data, def);
1495 /* Finds uses of the induction variables that are interesting. */
1498 find_interesting_uses (struct ivopts_data *data)
1501 block_stmt_iterator bsi;
1503 basic_block *body = get_loop_body (data->current_loop);
1505 struct version_info *info;
1508 if (dump_file && (dump_flags & TDF_DETAILS))
1509 fprintf (dump_file, "Uses:\n\n");
1511 for (i = 0; i < data->current_loop->num_nodes; i++)
1515 for (e = bb->succ; e; e = e->succ_next)
1516 if (e->dest != EXIT_BLOCK_PTR
1517 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1518 find_interesting_uses_outside (data, e);
1520 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1521 find_interesting_uses_stmt (data, phi);
1522 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1523 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1526 if (dump_file && (dump_flags & TDF_DETAILS))
1530 fprintf (dump_file, "\n");
1532 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1534 info = ver_info (data, i);
1537 fprintf (dump_file, " ");
1538 print_generic_expr (dump_file, info->name, TDF_SLIM);
1539 fprintf (dump_file, " is invariant (%d)%s\n",
1540 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1544 fprintf (dump_file, "\n");
1550 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1551 position to POS. If USE is not NULL, the candidate is set as related to
1552 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1553 replacement of the final value of the iv by a direct computation. */
1555 static struct iv_cand *
1556 add_candidate_1 (struct ivopts_data *data,
1557 tree base, tree step, bool important, enum iv_position pos,
1558 struct iv_use *use, tree incremented_at)
1561 struct iv_cand *cand = NULL;
1566 type = TREE_TYPE (base);
1567 if (!TYPE_UNSIGNED (type))
1569 type = unsigned_type_for (type);
1570 base = fold_convert (type, base);
1572 step = fold_convert (type, step);
1576 for (i = 0; i < n_iv_cands (data); i++)
1578 cand = iv_cand (data, i);
1580 if (cand->pos != pos)
1583 if (cand->incremented_at != incremented_at)
1597 if (!operand_equal_p (base, cand->iv->base, 0))
1600 if (zero_p (cand->iv->step))
1607 if (step && operand_equal_p (step, cand->iv->step, 0))
1612 if (i == n_iv_cands (data))
1614 cand = xcalloc (1, sizeof (struct iv_cand));
1620 cand->iv = alloc_iv (base, step);
1623 if (pos != IP_ORIGINAL && cand->iv)
1625 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1626 cand->var_after = cand->var_before;
1628 cand->important = important;
1629 cand->incremented_at = incremented_at;
1630 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1632 if (dump_file && (dump_flags & TDF_DETAILS))
1633 dump_cand (dump_file, cand);
1636 if (important && !cand->important)
1638 cand->important = true;
1639 if (dump_file && (dump_flags & TDF_DETAILS))
1640 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1645 bitmap_set_bit (use->related_cands, i);
1646 if (dump_file && (dump_flags & TDF_DETAILS))
1647 fprintf (dump_file, "Candidate %d is related to use %d\n",
1654 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1655 position to POS. If USE is not NULL, the candidate is set as related to
1656 it. The candidate computation is scheduled on all available positions. */
1659 add_candidate (struct ivopts_data *data,
1660 tree base, tree step, bool important, struct iv_use *use)
1662 if (ip_normal_pos (data->current_loop))
1663 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1664 if (ip_end_pos (data->current_loop))
1665 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1668 /* Adds standard iv candidates. */
1671 add_standard_iv_candidates (struct ivopts_data *data)
1673 /* Add 0 + 1 * iteration candidate. */
1674 add_candidate (data,
1675 build_int_cst (unsigned_intSI_type_node, 0),
1676 build_int_cst (unsigned_intSI_type_node, 1),
1679 /* The same for a long type if it is still fast enough. */
1680 if (BITS_PER_WORD > 32)
1681 add_candidate (data,
1682 build_int_cst (unsigned_intDI_type_node, 0),
1683 build_int_cst (unsigned_intDI_type_node, 1),
1688 /* Adds candidates bases on the old induction variable IV. */
1691 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1694 struct iv_cand *cand;
1696 add_candidate (data, iv->base, iv->step, true, NULL);
1698 /* The same, but with initial value zero. */
1699 add_candidate (data,
1700 build_int_cst (TREE_TYPE (iv->base), 0),
1701 iv->step, true, NULL);
1703 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1704 if (TREE_CODE (phi) == PHI_NODE)
1706 /* Additionally record the possibility of leaving the original iv
1708 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1709 cand = add_candidate_1 (data,
1710 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1711 SSA_NAME_DEF_STMT (def));
1712 cand->var_before = iv->ssa_name;
1713 cand->var_after = def;
1717 /* Adds candidates based on the old induction variables. */
1720 add_old_ivs_candidates (struct ivopts_data *data)
1726 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1728 iv = ver_info (data, i)->iv;
1729 if (iv && iv->biv_p && !zero_p (iv->step))
1730 add_old_iv_candidates (data, iv);
1734 /* Adds candidates based on the value of the induction variable IV and USE. */
1737 add_iv_value_candidates (struct ivopts_data *data,
1738 struct iv *iv, struct iv_use *use)
1740 add_candidate (data, iv->base, iv->step, false, use);
1742 /* The same, but with initial value zero. */
1743 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1744 iv->step, false, use);
1747 /* Adds candidates based on the address IV and USE. */
1750 add_address_candidates (struct ivopts_data *data,
1751 struct iv *iv, struct iv_use *use)
1753 tree base, abase, tmp, *act;
1755 /* First, the trivial choices. */
1756 add_iv_value_candidates (data, iv, use);
1758 /* Second, try removing the COMPONENT_REFs. */
1759 if (TREE_CODE (iv->base) == ADDR_EXPR)
1761 base = TREE_OPERAND (iv->base, 0);
1762 while (TREE_CODE (base) == COMPONENT_REF
1763 || (TREE_CODE (base) == ARRAY_REF
1764 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1765 base = TREE_OPERAND (base, 0);
1767 if (base != TREE_OPERAND (iv->base, 0))
1769 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1770 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1772 if (TREE_CODE (base) == INDIRECT_REF)
1773 base = TREE_OPERAND (base, 0);
1775 base = build_addr (base);
1776 add_candidate (data, base, iv->step, false, use);
1780 /* Third, try removing the constant offset. */
1782 while (TREE_CODE (abase) == PLUS_EXPR
1783 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1784 abase = TREE_OPERAND (abase, 0);
1785 /* We found the offset, so make the copy of the non-shared part and
1787 if (TREE_CODE (abase) == PLUS_EXPR)
1792 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1794 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1795 NULL_TREE, TREE_OPERAND (tmp, 1));
1796 act = &TREE_OPERAND (*act, 0);
1798 *act = TREE_OPERAND (tmp, 0);
1800 add_candidate (data, base, iv->step, false, use);
1804 /* Possibly adds pseudocandidate for replacing the final value of USE by
1805 a direct computation. */
1808 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1810 struct tree_niter_desc *niter;
1811 struct loop *loop = data->current_loop;
1813 /* We must know where we exit the loop and how many times does it roll. */
1814 if (!single_dom_exit (loop))
1817 niter = &loop_data (loop)->niter;
1819 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1820 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1823 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1826 /* Adds candidates based on the uses. */
1829 add_derived_ivs_candidates (struct ivopts_data *data)
1833 for (i = 0; i < n_iv_uses (data); i++)
1835 struct iv_use *use = iv_use (data, i);
1842 case USE_NONLINEAR_EXPR:
1844 /* Just add the ivs based on the value of the iv used here. */
1845 add_iv_value_candidates (data, use->iv, use);
1849 add_iv_value_candidates (data, use->iv, use);
1851 /* Additionally, add the pseudocandidate for the possibility to
1852 replace the final value by a direct computation. */
1853 add_iv_outer_candidates (data, use);
1857 add_address_candidates (data, use->iv, use);
1866 /* Finds the candidates for the induction variables. */
1869 find_iv_candidates (struct ivopts_data *data)
1871 /* Add commonly used ivs. */
1872 add_standard_iv_candidates (data);
1874 /* Add old induction variables. */
1875 add_old_ivs_candidates (data);
1877 /* Add induction variables derived from uses. */
1878 add_derived_ivs_candidates (data);
1881 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1882 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1883 we allocate a simple list to every use. */
1886 alloc_use_cost_map (struct ivopts_data *data)
1888 unsigned i, n_imp = 0, size, j;
1890 if (!data->consider_all_candidates)
1892 for (i = 0; i < n_iv_cands (data); i++)
1894 struct iv_cand *cand = iv_cand (data, i);
1895 if (cand->important)
1900 for (i = 0; i < n_iv_uses (data); i++)
1902 struct iv_use *use = iv_use (data, i);
1905 if (data->consider_all_candidates)
1907 size = n_iv_cands (data);
1908 use->n_map_members = size;
1913 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
1917 use->n_map_members = 0;
1920 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1924 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1925 on invariants DEPENDS_ON. */
1928 set_use_iv_cost (struct ivopts_data *data,
1929 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1935 BITMAP_XFREE (depends_on);
1939 if (data->consider_all_candidates)
1941 use->cost_map[cand->id].cand = cand;
1942 use->cost_map[cand->id].cost = cost;
1943 use->cost_map[cand->id].depends_on = depends_on;
1950 use->cost_map[use->n_map_members].cand = cand;
1951 use->cost_map[use->n_map_members].cost = cost;
1952 use->cost_map[use->n_map_members].depends_on = depends_on;
1953 use->n_map_members++;
1956 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1960 get_use_iv_cost (struct ivopts_data *data,
1961 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1968 if (data->consider_all_candidates)
1972 for (i = 0; i < use->n_map_members; i++)
1973 if (use->cost_map[i].cand == cand)
1976 if (i == use->n_map_members)
1981 *depends_on = use->cost_map[i].depends_on;
1982 return use->cost_map[i].cost;
1985 /* Returns estimate on cost of computing SEQ. */
1993 for (; seq; seq = NEXT_INSN (seq))
1995 set = single_set (seq);
1997 cost += rtx_cost (set, SET);
2005 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2007 produce_memory_decl_rtl (tree obj, int *regno)
2012 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2014 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2015 x = gen_rtx_SYMBOL_REF (Pmode, name);
2018 x = gen_raw_REG (Pmode, (*regno)++);
2020 return gen_rtx_MEM (DECL_MODE (obj), x);
2023 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2024 walk_tree. DATA contains the actual fake register number. */
2027 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2029 tree obj = NULL_TREE;
2033 switch (TREE_CODE (*expr_p))
2036 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2037 (handled_component_p (*expr_p)
2038 || TREE_CODE (*expr_p) == REALPART_EXPR
2039 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
2040 expr_p = &TREE_OPERAND (*expr_p, 0));
2043 x = produce_memory_decl_rtl (obj, regno);
2048 obj = SSA_NAME_VAR (*expr_p);
2049 if (!DECL_RTL_SET_P (obj))
2050 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2059 if (DECL_RTL_SET_P (obj))
2062 if (DECL_MODE (obj) == BLKmode)
2063 x = produce_memory_decl_rtl (obj, regno);
2065 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2075 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2076 SET_DECL_RTL (obj, x);
2082 /* Determines cost of the computation of EXPR. */
2085 computation_cost (tree expr)
2088 tree type = TREE_TYPE (expr);
2092 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2094 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2098 cost = seq_cost (seq);
2099 if (GET_CODE (rslt) == MEM)
2100 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2105 /* Returns variable containing the value of candidate CAND at statement AT. */
2108 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2110 if (stmt_after_increment (loop, cand, stmt))
2111 return cand->var_after;
2113 return cand->var_before;
2116 /* Determines the expression by that USE is expressed from induction variable
2117 CAND at statement AT in LOOP. */
2120 get_computation_at (struct loop *loop,
2121 struct iv_use *use, struct iv_cand *cand, tree at)
2123 tree ubase = use->iv->base;
2124 tree ustep = use->iv->step;
2125 tree cbase = cand->iv->base;
2126 tree cstep = cand->iv->step;
2127 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2131 unsigned HOST_WIDE_INT ustepi, cstepi;
2132 HOST_WIDE_INT ratioi;
2134 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2136 /* We do not have a precision to express the values of use. */
2140 expr = var_at_stmt (loop, cand, at);
2142 if (TREE_TYPE (expr) != ctype)
2144 /* This may happen with the original ivs. */
2145 expr = fold_convert (ctype, expr);
2148 if (TYPE_UNSIGNED (utype))
2152 uutype = unsigned_type_for (utype);
2153 ubase = fold_convert (uutype, ubase);
2154 ustep = fold_convert (uutype, ustep);
2157 if (uutype != ctype)
2159 expr = fold_convert (uutype, expr);
2160 cbase = fold_convert (uutype, cbase);
2161 cstep = fold_convert (uutype, cstep);
2164 if (!cst_and_fits_in_hwi (cstep)
2165 || !cst_and_fits_in_hwi (ustep))
2168 ustepi = int_cst_value (ustep);
2169 cstepi = int_cst_value (cstep);
2171 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2173 /* TODO maybe consider case when ustep divides cstep and the ratio is
2174 a power of 2 (so that the division is fast to execute)? We would
2175 need to be much more careful with overflows etc. then. */
2179 /* We may need to shift the value if we are after the increment. */
2180 if (stmt_after_increment (loop, cand, at))
2181 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2183 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2184 or |ratio| == 1, it is better to handle this like
2186 ubase - ratio * cbase + ratio * var. */
2190 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2191 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2193 else if (ratioi == -1)
2195 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2196 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2198 else if (TREE_CODE (cbase) == INTEGER_CST)
2200 ratio = build_int_cst_type (uutype, ratioi);
2201 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2202 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2203 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2204 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2208 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2209 ratio = build_int_cst_type (uutype, ratioi);
2210 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2211 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2214 return fold_convert (utype, expr);
2217 /* Determines the expression by that USE is expressed from induction variable
2221 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2223 return get_computation_at (loop, use, cand, use->stmt);
2226 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2229 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2232 enum tree_code code;
2236 if (cst_and_fits_in_hwi (*expr))
2238 *offset += int_cst_value (*expr);
2239 *expr = integer_zero_node;
2243 code = TREE_CODE (*expr);
2245 if (code != PLUS_EXPR && code != MINUS_EXPR)
2248 op0 = TREE_OPERAND (*expr, 0);
2249 op1 = TREE_OPERAND (*expr, 1);
2251 if (cst_and_fits_in_hwi (op1))
2253 if (code == PLUS_EXPR)
2254 *offset += int_cst_value (op1);
2256 *offset -= int_cst_value (op1);
2262 if (code != PLUS_EXPR)
2265 if (!cst_and_fits_in_hwi (op0))
2268 *offset += int_cst_value (op0);
2273 /* Returns cost of addition in MODE. */
2276 add_cost (enum machine_mode mode)
2278 static unsigned costs[NUM_MACHINE_MODES];
2286 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2287 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2288 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2293 cost = seq_cost (seq);
2299 if (dump_file && (dump_flags & TDF_DETAILS))
2300 fprintf (dump_file, "Addition in %s costs %d\n",
2301 GET_MODE_NAME (mode), cost);
2305 /* Entry in a hashtable of already known costs for multiplication. */
2308 HOST_WIDE_INT cst; /* The constant to multiply by. */
2309 enum machine_mode mode; /* In mode. */
2310 unsigned cost; /* The cost. */
2313 /* Counts hash value for the ENTRY. */
2316 mbc_entry_hash (const void *entry)
2318 const struct mbc_entry *e = entry;
2320 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2323 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2326 mbc_entry_eq (const void *entry1, const void *entry2)
2328 const struct mbc_entry *e1 = entry1;
2329 const struct mbc_entry *e2 = entry2;
2331 return (e1->mode == e2->mode
2332 && e1->cst == e2->cst);
2335 /* Returns cost of multiplication by constant CST in MODE. */
2338 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2340 static htab_t costs;
2341 struct mbc_entry **cached, act;
2346 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2350 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2352 return (*cached)->cost;
2354 *cached = xmalloc (sizeof (struct mbc_entry));
2355 (*cached)->mode = mode;
2356 (*cached)->cst = cst;
2359 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2364 cost = seq_cost (seq);
2366 if (dump_file && (dump_flags & TDF_DETAILS))
2367 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2368 (int) cst, GET_MODE_NAME (mode), cost);
2370 (*cached)->cost = cost;
2375 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2376 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2377 variable is omitted. The created memory accesses MODE.
2379 TODO -- there must be some better way. This all is quite crude. */
2382 get_address_cost (bool symbol_present, bool var_present,
2383 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2385 #define MAX_RATIO 128
2386 static sbitmap valid_mult;
2387 static HOST_WIDE_INT rat, off;
2388 static HOST_WIDE_INT min_offset, max_offset;
2389 static unsigned costs[2][2][2][2];
2390 unsigned cost, acost;
2391 rtx seq, addr, base;
2392 bool offset_p, ratio_p;
2394 HOST_WIDE_INT s_offset;
2395 unsigned HOST_WIDE_INT mask;
2402 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2404 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2405 for (i = 1; i <= 1 << 20; i <<= 1)
2407 XEXP (addr, 1) = GEN_INT (i);
2408 if (!memory_address_p (Pmode, addr))
2411 max_offset = i >> 1;
2414 for (i = 1; i <= 1 << 20; i <<= 1)
2416 XEXP (addr, 1) = GEN_INT (-i);
2417 if (!memory_address_p (Pmode, addr))
2420 min_offset = -(i >> 1);
2422 if (dump_file && (dump_flags & TDF_DETAILS))
2424 fprintf (dump_file, "get_address_cost:\n");
2425 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2426 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2429 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2430 sbitmap_zero (valid_mult);
2432 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2433 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2435 XEXP (addr, 1) = GEN_INT (i);
2436 if (memory_address_p (Pmode, addr))
2438 SET_BIT (valid_mult, i + MAX_RATIO);
2443 if (dump_file && (dump_flags & TDF_DETAILS))
2445 fprintf (dump_file, " allowed multipliers:");
2446 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2447 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2448 fprintf (dump_file, " %d", (int) i);
2449 fprintf (dump_file, "\n");
2450 fprintf (dump_file, "\n");
2454 bits = GET_MODE_BITSIZE (Pmode);
2455 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2457 if ((offset >> (bits - 1) & 1))
2462 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2463 ratio_p = (ratio != 1
2464 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2465 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2467 if (ratio != 1 && !ratio_p)
2468 cost += multiply_by_cost (ratio, Pmode);
2470 if (s_offset && !offset_p && !symbol_present)
2472 cost += add_cost (Pmode);
2476 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2481 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2482 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2484 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2488 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2490 base = gen_rtx_fmt_e (CONST, Pmode,
2491 gen_rtx_fmt_ee (PLUS, Pmode,
2495 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2498 else if (var_present)
2502 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2505 base = GEN_INT (off);
2510 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2513 addr = memory_address (Pmode, addr);
2517 acost = seq_cost (seq);
2518 acost += address_cost (addr, Pmode);
2522 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2525 return cost + acost;
2528 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2529 the bitmap to that we should store it. */
2531 static struct ivopts_data *fd_ivopts_data;
2533 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2535 bitmap *depends_on = data;
2536 struct version_info *info;
2538 if (TREE_CODE (*expr_p) != SSA_NAME)
2540 info = name_info (fd_ivopts_data, *expr_p);
2542 if (!info->inv_id || info->has_nonlin_use)
2546 *depends_on = BITMAP_XMALLOC ();
2547 bitmap_set_bit (*depends_on, info->inv_id);
2552 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2553 invariants the computation depends on. */
2556 force_var_cost (struct ivopts_data *data,
2557 tree expr, bitmap *depends_on)
2559 static bool costs_initialized = false;
2560 static unsigned integer_cost;
2561 static unsigned symbol_cost;
2562 static unsigned address_cost;
2564 if (!costs_initialized)
2566 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2567 rtx x = gen_rtx_MEM (DECL_MODE (var),
2568 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2570 tree type = build_pointer_type (integer_type_node);
2572 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2575 SET_DECL_RTL (var, x);
2576 TREE_STATIC (var) = 1;
2577 addr = build1 (ADDR_EXPR, type, var);
2578 symbol_cost = computation_cost (addr) + 1;
2581 = computation_cost (build2 (PLUS_EXPR, type,
2583 build_int_cst_type (type, 2000))) + 1;
2584 if (dump_file && (dump_flags & TDF_DETAILS))
2586 fprintf (dump_file, "force_var_cost:\n");
2587 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2588 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2589 fprintf (dump_file, " address %d\n", (int) address_cost);
2590 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2591 fprintf (dump_file, "\n");
2594 costs_initialized = true;
2599 fd_ivopts_data = data;
2600 walk_tree (&expr, find_depends, depends_on, NULL);
2603 if (SSA_VAR_P (expr))
2606 if (TREE_INVARIANT (expr))
2608 if (TREE_CODE (expr) == INTEGER_CST)
2609 return integer_cost;
2611 if (TREE_CODE (expr) == ADDR_EXPR)
2613 tree obj = TREE_OPERAND (expr, 0);
2615 if (TREE_CODE (obj) == VAR_DECL
2616 || TREE_CODE (obj) == PARM_DECL
2617 || TREE_CODE (obj) == RESULT_DECL)
2621 return address_cost;
2624 /* Just an arbitrary value, FIXME. */
2625 return target_spill_cost;
2628 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2629 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2630 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2631 invariants the computation depends on. */
2634 split_address_cost (struct ivopts_data *data,
2635 tree addr, bool *symbol_present, bool *var_present,
2636 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2639 HOST_WIDE_INT bitsize;
2640 HOST_WIDE_INT bitpos;
2642 enum machine_mode mode;
2643 int unsignedp, volatilep;
2645 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2646 &unsignedp, &volatilep);
2649 || bitpos % BITS_PER_UNIT != 0
2650 || TREE_CODE (core) != VAR_DECL)
2652 *symbol_present = false;
2653 *var_present = true;
2654 fd_ivopts_data = data;
2655 walk_tree (&addr, find_depends, depends_on, NULL);
2656 return target_spill_cost;
2659 *offset += bitpos / BITS_PER_UNIT;
2660 if (TREE_STATIC (core)
2661 || DECL_EXTERNAL (core))
2663 *symbol_present = true;
2664 *var_present = false;
2668 *symbol_present = false;
2669 *var_present = true;
2673 /* Estimates cost of expressing difference of addresses E1 - E2 as
2674 var + symbol + offset. The value of offset is added to OFFSET,
2675 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2676 part is missing. DEPENDS_ON is a set of the invariants the computation
2680 ptr_difference_cost (struct ivopts_data *data,
2681 tree e1, tree e2, bool *symbol_present, bool *var_present,
2682 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2684 HOST_WIDE_INT diff = 0;
2687 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2689 if (TREE_CODE (e2) == ADDR_EXPR
2690 && ptr_difference_const (TREE_OPERAND (e1, 0),
2691 TREE_OPERAND (e2, 0), &diff))
2694 *symbol_present = false;
2695 *var_present = false;
2699 if (e2 == integer_zero_node)
2700 return split_address_cost (data, TREE_OPERAND (e1, 0),
2701 symbol_present, var_present, offset, depends_on);
2703 *symbol_present = false;
2704 *var_present = true;
2706 cost = force_var_cost (data, e1, depends_on);
2707 cost += force_var_cost (data, e2, depends_on);
2708 cost += add_cost (Pmode);
2713 /* Estimates cost of expressing difference E1 - E2 as
2714 var + symbol + offset. The value of offset is added to OFFSET,
2715 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2716 part is missing. DEPENDS_ON is a set of the invariants the computation
2720 difference_cost (struct ivopts_data *data,
2721 tree e1, tree e2, bool *symbol_present, bool *var_present,
2722 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2725 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2727 strip_offset (&e1, offset);
2729 strip_offset (&e2, offset);
2732 if (TREE_CODE (e1) == ADDR_EXPR)
2733 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2735 *symbol_present = false;
2737 if (operand_equal_p (e1, e2, 0))
2739 *var_present = false;
2742 *var_present = true;
2744 return force_var_cost (data, e1, depends_on);
2748 cost = force_var_cost (data, e2, depends_on);
2749 cost += multiply_by_cost (-1, mode);
2754 cost = force_var_cost (data, e1, depends_on);
2755 cost += force_var_cost (data, e2, depends_on);
2756 cost += add_cost (mode);
2761 /* Determines the cost of the computation by that USE is expressed
2762 from induction variable CAND. If ADDRESS_P is true, we just need
2763 to create an address from it, otherwise we want to get it into
2764 register. A set of invariants we depend on is stored in
2765 DEPENDS_ON. AT is the statement at that the value is computed. */
2768 get_computation_cost_at (struct ivopts_data *data,
2769 struct iv_use *use, struct iv_cand *cand,
2770 bool address_p, bitmap *depends_on, tree at)
2772 tree ubase = use->iv->base, ustep = use->iv->step;
2774 tree utype = TREE_TYPE (ubase), ctype;
2775 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2776 HOST_WIDE_INT ratio, aratio;
2777 bool var_present, symbol_present;
2778 unsigned cost = 0, n_sums;
2782 /* Only consider real candidates. */
2786 cbase = cand->iv->base;
2787 cstep = cand->iv->step;
2788 ctype = TREE_TYPE (cbase);
2790 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2792 /* We do not have a precision to express the values of use. */
2796 if (!cst_and_fits_in_hwi (ustep)
2797 || !cst_and_fits_in_hwi (cstep))
2800 if (TREE_CODE (ubase) == INTEGER_CST
2801 && !cst_and_fits_in_hwi (ubase))
2804 if (TREE_CODE (cbase) == INTEGER_CST
2805 && !cst_and_fits_in_hwi (cbase))
2808 ustepi = int_cst_value (ustep);
2809 cstepi = int_cst_value (cstep);
2811 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2813 /* TODO -- add direct handling of this case. */
2817 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2820 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2821 or ratio == 1, it is better to handle this like
2823 ubase - ratio * cbase + ratio * var
2825 (also holds in the case ratio == -1, TODO. */
2827 if (TREE_CODE (cbase) == INTEGER_CST)
2829 offset = - ratio * int_cst_value (cbase);
2830 cost += difference_cost (data,
2831 ubase, integer_zero_node,
2832 &symbol_present, &var_present, &offset,
2835 else if (ratio == 1)
2837 cost += difference_cost (data,
2839 &symbol_present, &var_present, &offset,
2844 cost += force_var_cost (data, cbase, depends_on);
2845 cost += add_cost (TYPE_MODE (ctype));
2846 cost += difference_cost (data,
2847 ubase, integer_zero_node,
2848 &symbol_present, &var_present, &offset,
2852 /* If we are after the increment, the value of the candidate is higher by
2854 if (stmt_after_increment (data->current_loop, cand, at))
2855 offset -= ratio * cstepi;
2857 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2858 (symbol/var/const parts may be omitted). If we are looking for an address,
2859 find the cost of addressing this. */
2861 return get_address_cost (symbol_present, var_present, offset, ratio);
2863 /* Otherwise estimate the costs for computing the expression. */
2864 aratio = ratio > 0 ? ratio : -ratio;
2865 if (!symbol_present && !var_present && !offset)
2868 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2874 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2878 /* Symbol + offset should be compile-time computable. */
2879 && (symbol_present || offset))
2882 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2886 /* Just get the expression, expand it and measure the cost. */
2887 tree comp = get_computation_at (data->current_loop, use, cand, at);
2893 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2895 return computation_cost (comp);
2899 /* Determines the cost of the computation by that USE is expressed
2900 from induction variable CAND. If ADDRESS_P is true, we just need
2901 to create an address from it, otherwise we want to get it into
2902 register. A set of invariants we depend on is stored in
2906 get_computation_cost (struct ivopts_data *data,
2907 struct iv_use *use, struct iv_cand *cand,
2908 bool address_p, bitmap *depends_on)
2910 return get_computation_cost_at (data,
2911 use, cand, address_p, depends_on, use->stmt);
2914 /* Determines cost of basing replacement of USE on CAND in a generic
2918 determine_use_iv_cost_generic (struct ivopts_data *data,
2919 struct iv_use *use, struct iv_cand *cand)
2922 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2924 set_use_iv_cost (data, use, cand, cost, depends_on);
2927 /* Determines cost of basing replacement of USE on CAND in an address. */
2930 determine_use_iv_cost_address (struct ivopts_data *data,
2931 struct iv_use *use, struct iv_cand *cand)
2934 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2936 set_use_iv_cost (data, use, cand, cost, depends_on);
2939 /* Computes value of induction variable IV in iteration NITER. */
2942 iv_value (struct iv *iv, tree niter)
2945 tree type = TREE_TYPE (iv->base);
2947 niter = fold_convert (type, niter);
2948 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
2950 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2953 /* Computes value of candidate CAND at position AT in iteration NITER. */
2956 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2958 tree val = iv_value (cand->iv, niter);
2959 tree type = TREE_TYPE (cand->iv->base);
2961 if (stmt_after_increment (loop, cand, at))
2962 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
2967 /* Check whether it is possible to express the condition in USE by comparison
2968 of candidate CAND. If so, store the comparison code to COMPARE and the
2969 value compared with to BOUND. */
2972 may_eliminate_iv (struct loop *loop,
2973 struct iv_use *use, struct iv_cand *cand,
2974 enum tree_code *compare, tree *bound)
2977 struct tree_niter_desc *niter, new_niter;
2978 tree wider_type, type, base;
2980 /* For now just very primitive -- we work just for the single exit condition,
2981 and are quite conservative about the possible overflows. TODO -- both of
2982 these can be improved. */
2983 exit = single_dom_exit (loop);
2986 if (use->stmt != last_stmt (exit->src))
2989 niter = &loop_data (loop)->niter;
2991 || !integer_nonzerop (niter->assumptions)
2992 || !integer_zerop (niter->may_be_zero))
2995 if (exit->flags & EDGE_TRUE_VALUE)
3000 *bound = cand_value_at (loop, cand, use->stmt, niter->niter);
3002 /* Let us check there is not some problem with overflows, by checking that
3003 the number of iterations is unchanged. */
3004 base = cand->iv->base;
3005 type = TREE_TYPE (base);
3006 if (stmt_after_increment (loop, cand, use->stmt))
3007 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
3009 new_niter.niter = NULL_TREE;
3010 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
3011 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
3013 if (!new_niter.niter
3014 || !integer_nonzerop (new_niter.assumptions)
3015 || !integer_zerop (new_niter.may_be_zero))
3018 wider_type = TREE_TYPE (new_niter.niter);
3019 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter->niter)))
3020 wider_type = TREE_TYPE (niter->niter);
3021 if (!operand_equal_p (fold_convert (wider_type, niter->niter),
3022 fold_convert (wider_type, new_niter.niter), 0))
3028 /* Determines cost of basing replacement of USE on CAND in a condition. */
3031 determine_use_iv_cost_condition (struct ivopts_data *data,
3032 struct iv_use *use, struct iv_cand *cand)
3035 enum tree_code compare;
3037 /* Only consider real candidates. */
3040 set_use_iv_cost (data, use, cand, INFTY, NULL);
3044 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
3046 bitmap depends_on = NULL;
3047 unsigned cost = force_var_cost (data, bound, &depends_on);
3049 set_use_iv_cost (data, use, cand, cost, depends_on);
3053 /* The induction variable elimination failed; just express the original
3054 giv. If it is compared with an invariant, note that we cannot get
3056 if (TREE_CODE (*use->op_p) == SSA_NAME)
3057 record_invariant (data, *use->op_p, true);
3060 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3061 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3064 determine_use_iv_cost_generic (data, use, cand);
3067 /* Checks whether it is possible to replace the final value of USE by
3068 a direct computation. If so, the formula is stored to *VALUE. */
3071 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3074 struct tree_niter_desc *niter;
3076 exit = single_dom_exit (loop);
3080 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3081 bb_for_stmt (use->stmt)));
3083 niter = &loop_data (loop)->niter;
3085 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3086 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3089 *value = iv_value (use->iv, niter->niter);
3094 /* Determines cost of replacing final value of USE using CAND. */
3097 determine_use_iv_cost_outer (struct ivopts_data *data,
3098 struct iv_use *use, struct iv_cand *cand)
3104 struct loop *loop = data->current_loop;
3108 if (!may_replace_final_value (loop, use, &value))
3110 set_use_iv_cost (data, use, cand, INFTY, NULL);
3115 cost = force_var_cost (data, value, &depends_on);
3117 cost /= AVG_LOOP_NITER (loop);
3119 set_use_iv_cost (data, use, cand, cost, depends_on);
3123 exit = single_dom_exit (loop);
3126 /* If there is just a single exit, we may use value of the candidate
3127 after we take it to determine the value of use. */
3128 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3129 last_stmt (exit->src));
3131 cost /= AVG_LOOP_NITER (loop);
3135 /* Otherwise we just need to compute the iv. */
3136 cost = get_computation_cost (data, use, cand, false, &depends_on);
3139 set_use_iv_cost (data, use, cand, cost, depends_on);
3142 /* Determines cost of basing replacement of USE on CAND. */
3145 determine_use_iv_cost (struct ivopts_data *data,
3146 struct iv_use *use, struct iv_cand *cand)
3150 case USE_NONLINEAR_EXPR:
3151 determine_use_iv_cost_generic (data, use, cand);
3155 determine_use_iv_cost_outer (data, use, cand);
3159 determine_use_iv_cost_address (data, use, cand);
3163 determine_use_iv_cost_condition (data, use, cand);
3171 /* Determines costs of basing the use of the iv on an iv candidate. */
3174 determine_use_iv_costs (struct ivopts_data *data)
3178 struct iv_cand *cand;
3180 data->consider_all_candidates = (n_iv_cands (data)
3181 <= CONSIDER_ALL_CANDIDATES_BOUND);
3183 alloc_use_cost_map (data);
3185 if (!data->consider_all_candidates)
3187 /* Add the important candidate entries. */
3188 for (j = 0; j < n_iv_cands (data); j++)
3190 cand = iv_cand (data, j);
3191 if (!cand->important)
3193 for (i = 0; i < n_iv_uses (data); i++)
3195 use = iv_use (data, i);
3196 determine_use_iv_cost (data, use, cand);
3201 for (i = 0; i < n_iv_uses (data); i++)
3203 use = iv_use (data, i);
3205 if (data->consider_all_candidates)
3207 for (j = 0; j < n_iv_cands (data); j++)
3209 cand = iv_cand (data, j);
3210 determine_use_iv_cost (data, use, cand);
3217 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3219 cand = iv_cand (data, j);
3220 if (!cand->important)
3221 determine_use_iv_cost (data, use, cand);
3226 if (dump_file && (dump_flags & TDF_DETAILS))
3228 fprintf (dump_file, "Use-candidate costs:\n");
3230 for (i = 0; i < n_iv_uses (data); i++)
3232 use = iv_use (data, i);
3234 fprintf (dump_file, "Use %d:\n", i);
3235 fprintf (dump_file, " cand\tcost\tdepends on\n");
3236 for (j = 0; j < use->n_map_members; j++)
3238 if (!use->cost_map[j].cand
3239 || use->cost_map[j].cost == INFTY)
3242 fprintf (dump_file, " %d\t%d\t",
3243 use->cost_map[j].cand->id,
3244 use->cost_map[j].cost);
3245 if (use->cost_map[j].depends_on)
3246 bitmap_print (dump_file,
3247 use->cost_map[j].depends_on, "","");
3248 fprintf (dump_file, "\n");
3251 fprintf (dump_file, "\n");
3253 fprintf (dump_file, "\n");
3257 /* Determines cost of the candidate CAND. */
3260 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3262 unsigned cost_base, cost_step;
3272 /* There are two costs associated with the candidate -- its increment
3273 and its initialization. The second is almost negligible for any loop
3274 that rolls enough, so we take it just very little into account. */
3276 base = cand->iv->base;
3277 cost_base = force_var_cost (data, base, NULL);
3278 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3280 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3282 /* Prefer the original iv unless we may gain something by replacing it. */
3283 if (cand->pos == IP_ORIGINAL)
3286 /* Prefer not to insert statements into latch unless there are some
3287 already (so that we do not create unnecessary jumps). */
3288 if (cand->pos == IP_END)
3290 bb = ip_end_pos (data->current_loop);
3291 last = last_stmt (bb);
3294 || TREE_CODE (last) == LABEL_EXPR)
3299 /* Determines costs of computation of the candidates. */
3302 determine_iv_costs (struct ivopts_data *data)
3306 if (dump_file && (dump_flags & TDF_DETAILS))
3308 fprintf (dump_file, "Candidate costs:\n");
3309 fprintf (dump_file, " cand\tcost\n");
3312 for (i = 0; i < n_iv_cands (data); i++)
3314 struct iv_cand *cand = iv_cand (data, i);
3316 determine_iv_cost (data, cand);
3318 if (dump_file && (dump_flags & TDF_DETAILS))
3319 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3322 if (dump_file && (dump_flags & TDF_DETAILS))
3323 fprintf (dump_file, "\n");
3326 /* Calculates cost for having SIZE induction variables. */
3329 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3331 return global_cost_for_size (size,
3332 loop_data (data->current_loop)->regs_used,
3336 /* For each size of the induction variable set determine the penalty. */
3339 determine_set_costs (struct ivopts_data *data)
3343 struct loop *loop = data->current_loop;
3346 /* We use the following model (definitely improvable, especially the
3347 cost function -- TODO):
3349 We estimate the number of registers available (using MD data), name it A.
3351 We estimate the number of registers used by the loop, name it U. This
3352 number is obtained as the number of loop phi nodes (not counting virtual
3353 registers and bivs) + the number of variables from outside of the loop.
3355 We set a reserve R (free regs that are used for temporary computations,
3356 etc.). For now the reserve is a constant 3.
3358 Let I be the number of induction variables.
3360 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3361 make a lot of ivs without a reason).
3362 -- if A - R < U + I <= A, the cost is I * PRES_COST
3363 -- if U + I > A, the cost is I * PRES_COST and
3364 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3366 if (dump_file && (dump_flags & TDF_DETAILS))
3368 fprintf (dump_file, "Global costs:\n");
3369 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3370 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3371 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3372 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3376 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3378 op = PHI_RESULT (phi);
3380 if (!is_gimple_reg (op))
3383 if (get_iv (data, op))
3389 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3391 struct version_info *info = ver_info (data, j);
3393 if (info->inv_id && info->has_nonlin_use)
3397 loop_data (loop)->regs_used = n;
3398 if (dump_file && (dump_flags & TDF_DETAILS))
3399 fprintf (dump_file, " regs_used %d\n", n);
3401 if (dump_file && (dump_flags & TDF_DETAILS))
3403 fprintf (dump_file, " cost for size:\n");
3404 fprintf (dump_file, " ivs\tcost\n");
3405 for (j = 0; j <= 2 * target_avail_regs; j++)
3406 fprintf (dump_file, " %d\t%d\n", j,
3407 ivopts_global_cost_for_size (data, j));
3408 fprintf (dump_file, "\n");
3412 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3413 taken from the set SOL and they may depend on invariants in the set INV.
3414 The really used candidate and invariants are noted to USED_IVS and
3418 find_best_candidate (struct ivopts_data *data,
3419 struct iv_use *use, bitmap sol, bitmap inv,
3420 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3423 unsigned best_cost = INFTY, cost;
3424 struct iv_cand *cnd = NULL, *acnd;
3425 bitmap depends_on = NULL, asol;
3426 bitmap_iterator bi, bi1;
3428 if (data->consider_all_candidates)
3432 asol = BITMAP_XMALLOC ();
3433 bitmap_a_and_b (asol, sol, use->related_cands);
3436 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c, bi)
3438 acnd = iv_cand (data, c);
3439 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3443 if (cost > best_cost)
3445 if (cost == best_cost)
3447 /* Prefer the cheaper iv. */
3448 if (acnd->cost >= cnd->cost)
3454 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d, bi1)
3459 bitmap_a_or_b (used_inv, used_inv, depends_on);
3468 if (cnd && used_ivs)
3469 bitmap_set_bit (used_ivs, cnd->id);
3474 if (!data->consider_all_candidates)
3475 BITMAP_XFREE (asol);
3480 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3481 induction variable candidates and invariants from the sets. Only
3482 uses 0 .. MAX_USE - 1 are taken into account. */
3485 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3489 unsigned cost = 0, size = 0, acost;
3491 struct iv_cand *cand;
3492 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3495 for (i = 0; i < max_use; i++)
3497 use = iv_use (data, i);
3498 acost = find_best_candidate (data, use, sol, inv,
3499 used_ivs, used_inv, NULL);
3502 BITMAP_XFREE (used_ivs);
3503 BITMAP_XFREE (used_inv);
3509 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i, bi)
3511 cand = iv_cand (data, i);
3513 /* Do not count the pseudocandidates. */
3519 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, bi)
3523 cost += ivopts_global_cost_for_size (data, size);
3525 bitmap_copy (sol, used_ivs);
3526 bitmap_copy (inv, used_inv);
3528 BITMAP_XFREE (used_ivs);
3529 BITMAP_XFREE (used_inv);
3534 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3535 induction variable candidates and invariants from the sets. */
3538 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3540 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3543 /* Tries to extend the sets IVS and INV in the best possible way in order
3544 to express the USE. */
3547 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3550 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3551 bitmap best_ivs = BITMAP_XMALLOC ();
3552 bitmap best_inv = BITMAP_XMALLOC ();
3553 bitmap act_ivs = BITMAP_XMALLOC ();
3554 bitmap act_inv = BITMAP_XMALLOC ();
3556 struct cost_pair *cp;
3558 bitmap_copy (best_ivs, ivs);
3559 bitmap_copy (best_inv, inv);
3561 for (i = 0; i < use->n_map_members; i++)
3563 cp = use->cost_map + i;
3564 if (cp->cost == INFTY)
3567 bitmap_copy (act_ivs, ivs);
3568 bitmap_set_bit (act_ivs, cp->cand->id);
3570 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3572 bitmap_copy (act_inv, inv);
3573 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3575 if (act_cost < best_cost)
3577 best_cost = act_cost;
3578 bitmap_copy (best_ivs, act_ivs);
3579 bitmap_copy (best_inv, act_inv);
3583 bitmap_copy (ivs, best_ivs);
3584 bitmap_copy (inv, best_inv);
3586 BITMAP_XFREE (best_ivs);
3587 BITMAP_XFREE (best_inv);
3588 BITMAP_XFREE (act_ivs);
3589 BITMAP_XFREE (act_inv);
3591 return (best_cost != INFTY);
3594 /* Finds an initial set of IVS and invariants INV. We do this by simply
3595 choosing the best candidate for each use. */
3598 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3602 for (i = 0; i < n_iv_uses (data); i++)
3603 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3606 return set_cost (data, ivs, inv);
3609 /* Tries to improve set of induction variables IVS and invariants INV to get
3610 it better than COST. */
3613 try_improve_iv_set (struct ivopts_data *data,
3614 bitmap ivs, bitmap inv, unsigned *cost)
3617 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3618 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3620 /* Try altering the set of induction variables by one. */
3621 for (i = 0; i < n_iv_cands (data); i++)
3623 bitmap_copy (new_ivs, ivs);
3624 bitmap_copy (new_inv, inv);
3626 if (bitmap_bit_p (ivs, i))
3627 bitmap_clear_bit (new_ivs, i);
3629 bitmap_set_bit (new_ivs, i);
3631 acost = set_cost (data, new_ivs, new_inv);
3637 best_new_ivs = BITMAP_XMALLOC ();
3638 best_new_inv = BITMAP_XMALLOC ();
3642 bitmap_copy (best_new_ivs, new_ivs);
3643 bitmap_copy (best_new_inv, new_inv);
3646 /* Ditto for invariants. */
3647 for (i = 1; i <= data->max_inv_id; i++)
3649 if (ver_info (data, i)->has_nonlin_use)
3652 bitmap_copy (new_ivs, ivs);
3653 bitmap_copy (new_inv, inv);
3655 if (bitmap_bit_p (inv, i))
3656 bitmap_clear_bit (new_inv, i);
3658 bitmap_set_bit (new_inv, i);
3660 acost = set_cost (data, new_ivs, new_inv);
3666 best_new_ivs = BITMAP_XMALLOC ();
3667 best_new_inv = BITMAP_XMALLOC ();
3671 bitmap_copy (best_new_ivs, new_ivs);
3672 bitmap_copy (best_new_inv, new_inv);
3675 BITMAP_XFREE (new_ivs);
3676 BITMAP_XFREE (new_inv);
3681 bitmap_copy (ivs, best_new_ivs);
3682 bitmap_copy (inv, best_new_inv);
3683 BITMAP_XFREE (best_new_ivs);
3684 BITMAP_XFREE (best_new_inv);
3688 /* Attempts to find the optimal set of induction variables. We do simple
3689 greedy heuristic -- we try to replace at most one candidate in the selected
3690 solution and remove the unused ivs while this improves the cost. */
3693 find_optimal_iv_set (struct ivopts_data *data)
3696 bitmap set = BITMAP_XMALLOC ();
3697 bitmap inv = BITMAP_XMALLOC ();
3700 /* Set the upper bound. */
3701 cost = get_initial_solution (data, set, inv);
3704 if (dump_file && (dump_flags & TDF_DETAILS))
3705 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3711 if (dump_file && (dump_flags & TDF_DETAILS))
3713 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3714 bitmap_print (dump_file, set, "", "");
3715 fprintf (dump_file, " invariants ");
3716 bitmap_print (dump_file, inv, "", "");
3717 fprintf (dump_file, "\n");
3720 while (try_improve_iv_set (data, set, inv, &cost))
3722 if (dump_file && (dump_flags & TDF_DETAILS))
3724 fprintf (dump_file, "Improved to (cost %d): ", cost);
3725 bitmap_print (dump_file, set, "", "");
3726 fprintf (dump_file, " invariants ");
3727 bitmap_print (dump_file, inv, "", "");
3728 fprintf (dump_file, "\n");
3732 if (dump_file && (dump_flags & TDF_DETAILS))
3733 fprintf (dump_file, "Final cost %d\n\n", cost);
3735 for (i = 0; i < n_iv_uses (data); i++)
3737 use = iv_use (data, i);
3738 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3746 /* Creates a new induction variable corresponding to CAND. */
3749 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3751 block_stmt_iterator incr_pos;
3761 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3765 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3770 /* Mark that the iv is preserved. */
3771 name_info (data, cand->var_before)->preserve_biv = true;
3772 name_info (data, cand->var_after)->preserve_biv = true;
3774 /* Rewrite the increment so that it uses var_before directly. */
3775 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3780 gimple_add_tmp_var (cand->var_before);
3781 add_referenced_tmp_var (cand->var_before);
3783 base = unshare_expr (cand->iv->base);
3785 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3786 &incr_pos, after, &cand->var_before, &cand->var_after);
3789 /* Creates new induction variables described in SET. */
3792 create_new_ivs (struct ivopts_data *data, bitmap set)
3795 struct iv_cand *cand;
3798 EXECUTE_IF_SET_IN_BITMAP (set, 0, i, bi)
3800 cand = iv_cand (data, i);
3801 create_new_iv (data, cand);
3805 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3806 is true, remove also the ssa name defined by the statement. */
3809 remove_statement (tree stmt, bool including_defined_name)
3811 if (TREE_CODE (stmt) == PHI_NODE)
3813 if (!including_defined_name)
3815 /* Prevent the ssa name defined by the statement from being removed. */
3816 SET_PHI_RESULT (stmt, NULL);
3818 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3822 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3828 /* Rewrites USE (definition of iv used in a nonlinear expression)
3829 using candidate CAND. */
3832 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3833 struct iv_use *use, struct iv_cand *cand)
3835 tree comp = unshare_expr (get_computation (data->current_loop,
3837 tree op, stmts, tgt, ass;
3838 block_stmt_iterator bsi, pbsi;
3840 switch (TREE_CODE (use->stmt))
3843 tgt = PHI_RESULT (use->stmt);
3845 /* If we should keep the biv, do not replace it. */
3846 if (name_info (data, tgt)->preserve_biv)
3849 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3850 while (!bsi_end_p (pbsi)
3851 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3859 tgt = TREE_OPERAND (use->stmt, 0);
3860 bsi = stmt_for_bsi (use->stmt);
3867 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3869 if (TREE_CODE (use->stmt) == PHI_NODE)
3872 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3873 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3874 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3875 remove_statement (use->stmt, false);
3876 SSA_NAME_DEF_STMT (tgt) = ass;
3881 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3882 TREE_OPERAND (use->stmt, 1) = op;
3886 /* Replaces ssa name in index IDX by its basic variable. Callback for
3890 idx_remove_ssa_names (tree base, tree *idx,
3891 void *data ATTRIBUTE_UNUSED)
3895 if (TREE_CODE (*idx) == SSA_NAME)
3896 *idx = SSA_NAME_VAR (*idx);
3898 if (TREE_CODE (base) == ARRAY_REF)
3900 op = &TREE_OPERAND (base, 2);
3902 && TREE_CODE (*op) == SSA_NAME)
3903 *op = SSA_NAME_VAR (*op);
3904 op = &TREE_OPERAND (base, 3);
3906 && TREE_CODE (*op) == SSA_NAME)
3907 *op = SSA_NAME_VAR (*op);
3913 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3916 unshare_and_remove_ssa_names (tree ref)
3918 ref = unshare_expr (ref);
3919 for_each_index (&ref, idx_remove_ssa_names, NULL);
3924 /* Rewrites base of memory access OP with expression WITH in statement
3925 pointed to by BSI. */
3928 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3930 tree bvar, var, new_var, new_name, copy, name;
3933 var = bvar = get_base_address (*op);
3935 if (!var || TREE_CODE (with) != SSA_NAME)
3938 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
3939 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
3940 if (TREE_CODE (var) == INDIRECT_REF)
3941 var = TREE_OPERAND (var, 0);
3942 if (TREE_CODE (var) == SSA_NAME)
3945 var = SSA_NAME_VAR (var);
3947 else if (DECL_P (var))
3952 if (var_ann (var)->type_mem_tag)
3953 var = var_ann (var)->type_mem_tag;
3955 /* We need to add a memory tag for the variable. But we do not want
3956 to add it to the temporary used for the computations, since this leads
3957 to problems in redundancy elimination when there are common parts
3958 in two computations referring to the different arrays. So we copy
3959 the variable to a new temporary. */
3960 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
3962 new_name = duplicate_ssa_name (name, copy);
3965 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
3966 add_referenced_tmp_var (new_var);
3967 var_ann (new_var)->type_mem_tag = var;
3968 new_name = make_ssa_name (new_var, copy);
3970 TREE_OPERAND (copy, 0) = new_name;
3971 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
3977 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
3978 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
3980 if (TREE_CODE (*op) == INDIRECT_REF)
3981 orig = REF_ORIGINAL (*op);
3983 orig = unshare_and_remove_ssa_names (*op);
3985 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
3987 /* Record the original reference, for purposes of alias analysis. */
3988 REF_ORIGINAL (*op) = orig;
3991 /* Rewrites USE (address that is an iv) using candidate CAND. */
3994 rewrite_use_address (struct ivopts_data *data,
3995 struct iv_use *use, struct iv_cand *cand)
3997 tree comp = unshare_expr (get_computation (data->current_loop,
3999 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
4001 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4004 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4006 rewrite_address_base (&bsi, use->op_p, op);
4009 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4013 rewrite_use_compare (struct ivopts_data *data,
4014 struct iv_use *use, struct iv_cand *cand)
4017 tree *op_p, cond, op, stmts, bound;
4018 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
4019 enum tree_code compare;
4021 if (may_eliminate_iv (data->current_loop,
4022 use, cand, &compare, &bound))
4024 op = force_gimple_operand (unshare_expr (bound), &stmts,
4028 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4030 *use->op_p = build2 (compare, boolean_type_node,
4031 var_at_stmt (data->current_loop,
4032 cand, use->stmt), op);
4033 modify_stmt (use->stmt);
4037 /* The induction variable elimination failed; just express the original
4039 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4042 op_p = &TREE_OPERAND (cond, 0);
4043 if (TREE_CODE (*op_p) != SSA_NAME
4044 || zero_p (get_iv (data, *op_p)->step))
4045 op_p = &TREE_OPERAND (cond, 1);
4047 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4049 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4054 /* Ensure that operand *OP_P may be used at the end of EXIT without
4055 violating loop closed ssa form. */
4058 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4061 struct loop *def_loop;
4064 use = USE_FROM_PTR (op_p);
4065 if (TREE_CODE (use) != SSA_NAME)
4068 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4072 def_loop = def_bb->loop_father;
4073 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4076 /* Try finding a phi node that copies the value out of the loop. */
4077 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4078 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4083 /* Create such a phi node. */
4084 tree new_name = duplicate_ssa_name (use, NULL);
4086 phi = create_phi_node (new_name, exit->dest);
4087 SSA_NAME_DEF_STMT (new_name) = phi;
4088 add_phi_arg (&phi, use, exit);
4091 SET_USE (op_p, PHI_RESULT (phi));
4094 /* Ensure that operands of STMT may be used at the end of EXIT without
4095 violating loop closed ssa form. */
4098 protect_loop_closed_ssa_form (edge exit, tree stmt)
4102 v_may_def_optype v_may_defs;
4105 get_stmt_operands (stmt);
4107 uses = STMT_USE_OPS (stmt);
4108 for (i = 0; i < NUM_USES (uses); i++)
4109 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4111 vuses = STMT_VUSE_OPS (stmt);
4112 for (i = 0; i < NUM_VUSES (vuses); i++)
4113 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4115 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4116 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4117 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4120 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4121 so that they are emitted on the correct place, and so that the loop closed
4122 ssa form is preserved. */
4125 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4127 tree_stmt_iterator tsi;
4128 block_stmt_iterator bsi;
4129 tree phi, stmt, def, next;
4131 if (exit->dest->pred->pred_next)
4132 split_loop_exit_edge (exit);
4134 if (TREE_CODE (stmts) == STATEMENT_LIST)
4136 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4137 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4140 protect_loop_closed_ssa_form (exit, stmts);
4142 /* Ensure there is label in exit->dest, so that we can
4144 tree_block_label (exit->dest);
4145 bsi = bsi_after_labels (exit->dest);
4146 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4151 for (phi = phi_nodes (exit->dest); phi; phi = next)
4153 next = TREE_CHAIN (phi);
4155 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4157 def = PHI_RESULT (phi);
4158 remove_statement (phi, false);
4159 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4161 SSA_NAME_DEF_STMT (def) = stmt;
4162 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4167 /* Rewrites the final value of USE (that is only needed outside of the loop)
4168 using candidate CAND. */
4171 rewrite_use_outer (struct ivopts_data *data,
4172 struct iv_use *use, struct iv_cand *cand)
4175 tree value, op, stmts, tgt;
4178 switch (TREE_CODE (use->stmt))
4181 tgt = PHI_RESULT (use->stmt);
4184 tgt = TREE_OPERAND (use->stmt, 0);
4190 exit = single_dom_exit (data->current_loop);
4196 bool ok = may_replace_final_value (data->current_loop, use, &value);
4200 value = get_computation_at (data->current_loop,
4201 use, cand, last_stmt (exit->src));
4203 value = unshare_expr (value);
4204 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4206 /* If we will preserve the iv anyway and we would need to perform
4207 some computation to replace the final value, do nothing. */
4208 if (stmts && name_info (data, tgt)->preserve_biv)
4211 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4213 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4215 if (USE_FROM_PTR (use_p) == tgt)
4216 SET_USE (use_p, op);
4220 compute_phi_arg_on_exit (exit, stmts, op);
4222 /* Enable removal of the statement. We cannot remove it directly,
4223 since we may still need the aliasing information attached to the
4224 ssa name defined by it. */
4225 name_info (data, tgt)->iv->have_use_for = false;
4229 /* If the variable is going to be preserved anyway, there is nothing to
4231 if (name_info (data, tgt)->preserve_biv)
4234 /* Otherwise we just need to compute the iv. */
4235 rewrite_use_nonlinear_expr (data, use, cand);
4238 /* Rewrites USE using candidate CAND. */
4241 rewrite_use (struct ivopts_data *data,
4242 struct iv_use *use, struct iv_cand *cand)
4246 case USE_NONLINEAR_EXPR:
4247 rewrite_use_nonlinear_expr (data, use, cand);
4251 rewrite_use_outer (data, use, cand);
4255 rewrite_use_address (data, use, cand);
4259 rewrite_use_compare (data, use, cand);
4265 modify_stmt (use->stmt);
4268 /* Rewrite the uses using the selected induction variables. */
4271 rewrite_uses (struct ivopts_data *data)
4274 struct iv_cand *cand;
4277 for (i = 0; i < n_iv_uses (data); i++)
4279 use = iv_use (data, i);
4280 cand = use->selected;
4283 rewrite_use (data, use, cand);
4287 /* Removes the ivs that are not used after rewriting. */
4290 remove_unused_ivs (struct ivopts_data *data)
4295 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4297 struct version_info *info;
4299 info = ver_info (data, j);
4301 && !zero_p (info->iv->step)
4303 && !info->iv->have_use_for
4304 && !info->preserve_biv)
4305 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4309 /* Frees data allocated by the optimization of a single loop. */
4312 free_loop_data (struct ivopts_data *data)
4317 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
4319 struct version_info *info;
4321 info = ver_info (data, i);
4325 info->has_nonlin_use = false;
4326 info->preserve_biv = false;
4329 bitmap_clear (data->relevant);
4331 for (i = 0; i < n_iv_uses (data); i++)
4333 struct iv_use *use = iv_use (data, i);
4336 BITMAP_XFREE (use->related_cands);
4337 for (j = 0; j < use->n_map_members; j++)
4338 if (use->cost_map[j].depends_on)
4339 BITMAP_XFREE (use->cost_map[j].depends_on);
4340 free (use->cost_map);
4343 VARRAY_POP_ALL (data->iv_uses);
4345 for (i = 0; i < n_iv_cands (data); i++)
4347 struct iv_cand *cand = iv_cand (data, i);
4353 VARRAY_POP_ALL (data->iv_candidates);
4355 if (data->version_info_size < num_ssa_names)
4357 data->version_info_size = 2 * num_ssa_names;
4358 free (data->version_info);
4359 data->version_info = xcalloc (data->version_info_size,
4360 sizeof (struct version_info));
4363 data->max_inv_id = 0;
4365 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4367 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4369 SET_DECL_RTL (obj, NULL_RTX);
4371 VARRAY_POP_ALL (decl_rtl_to_reset);
4374 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4378 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4382 for (i = 1; i < loops->num; i++)
4383 if (loops->parray[i])
4385 free (loops->parray[i]->aux);
4386 loops->parray[i]->aux = NULL;
4389 free_loop_data (data);
4390 free (data->version_info);
4391 BITMAP_XFREE (data->relevant);
4393 VARRAY_FREE (decl_rtl_to_reset);
4394 VARRAY_FREE (data->iv_uses);
4395 VARRAY_FREE (data->iv_candidates);
4398 /* Optimizes the LOOP. Returns true if anything changed. */
4401 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4403 bool changed = false;
4407 data->current_loop = loop;
4409 if (dump_file && (dump_flags & TDF_DETAILS))
4411 fprintf (dump_file, "Processing loop %d\n", loop->num);
4413 exit = single_dom_exit (loop);
4416 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4417 exit->src->index, exit->dest->index);
4418 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4419 fprintf (dump_file, "\n");
4422 fprintf (dump_file, "\n");
4425 /* For each ssa name determines whether it behaves as an induction variable
4427 if (!find_induction_variables (data))
4430 /* Finds interesting uses (item 1). */
4431 find_interesting_uses (data);
4432 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4435 /* Finds candidates for the induction variables (item 2). */
4436 find_iv_candidates (data);
4438 /* Calculates the costs (item 3, part 1). */
4439 determine_use_iv_costs (data);
4440 determine_iv_costs (data);
4441 determine_set_costs (data);
4443 /* Find the optimal set of induction variables (item 3, part 2). */
4444 iv_set = find_optimal_iv_set (data);
4449 /* Create the new induction variables (item 4, part 1). */
4450 create_new_ivs (data, iv_set);
4452 /* Rewrite the uses (item 4, part 2). */
4453 rewrite_uses (data);
4455 /* Remove the ivs that are unused after rewriting. */
4456 remove_unused_ivs (data);
4458 loop_commit_inserts ();
4460 BITMAP_XFREE (iv_set);
4462 /* We have changed the structure of induction variables; it might happen
4463 that definitions in the scev database refer to some of them that were
4468 free_loop_data (data);
4473 /* Main entry point. Optimizes induction variables in LOOPS. */
4476 tree_ssa_iv_optimize (struct loops *loops)
4479 struct ivopts_data data;
4481 tree_ssa_iv_optimize_init (loops, &data);
4483 /* Optimize the loops starting with the innermost ones. */
4484 loop = loops->tree_root;
4488 #ifdef ENABLE_CHECKING
4489 verify_loop_closed_ssa ();
4493 /* Scan the loops, inner ones first. */
4494 while (loop != loops->tree_root)
4496 if (dump_file && (dump_flags & TDF_DETAILS))
4497 flow_loop_dump (loop, dump_file, NULL, 1);
4498 if (tree_ssa_iv_optimize_loop (&data, loop))
4500 #ifdef ENABLE_CHECKING
4501 verify_loop_closed_ssa ();
4516 tree_ssa_iv_optimize_finalize (loops, &data);