1 /* Induction variable optimizations.
2 Copyright (C) 2003 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
106 tree base; /* Initial value of the iv. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Information attached to loop. */
128 struct tree_niter_desc niter;
129 /* Number of iterations. */
131 unsigned regs_used; /* Number of registers used. */
137 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
138 USE_OUTER, /* The induction variable is used outside the loop. */
139 USE_ADDRESS, /* Use in an address. */
140 USE_COMPARE /* Use is a compare. */
143 /* The candidate - cost pair. */
146 struct iv_cand *cand; /* The candidate. */
147 unsigned cost; /* The cost. */
148 bitmap depends_on; /* The list of invariants that have to be
155 unsigned id; /* The id of the use. */
156 enum use_type type; /* Type of the use. */
157 struct iv *iv; /* The induction variable it is based on. */
158 tree stmt; /* Statement in that it occurs. */
159 tree *op_p; /* The place where it occurs. */
160 bitmap related_cands; /* The set of "related" iv candidates. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
200 /* The currently optimized loop. */
201 struct loop *current_loop;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
212 /* The maximum invariant id. */
215 /* The uses of induction variables. */
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* Whether to consider just related and important candidates when replacing a
223 bool consider_all_candidates;
226 /* Bound on number of candidates below that all candidates are considered. */
228 #define CONSIDER_ALL_CANDIDATES_BOUND \
229 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
231 /* If there are more iv occurrences, we just give up (it is quite unlikely that
232 optimizing such a loop would help, and it would take ages). */
234 #define MAX_CONSIDERED_USES \
235 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
237 /* The list of trees for that the decl_rtl field must be reset is stored
240 static varray_type decl_rtl_to_reset;
242 /* Number of uses recorded in DATA. */
244 static inline unsigned
245 n_iv_uses (struct ivopts_data *data)
247 return VARRAY_ACTIVE_SIZE (data->iv_uses);
250 /* Ith use recorded in DATA. */
252 static inline struct iv_use *
253 iv_use (struct ivopts_data *data, unsigned i)
255 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
258 /* Number of candidates recorded in DATA. */
260 static inline unsigned
261 n_iv_cands (struct ivopts_data *data)
263 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
266 /* Ith candidate recorded in DATA. */
268 static inline struct iv_cand *
269 iv_cand (struct ivopts_data *data, unsigned i)
271 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
274 /* The data for LOOP. */
276 static inline struct loop_data *
277 loop_data (struct loop *loop)
282 /* The single loop exit if it dominates the latch, NULL otherwise. */
285 single_dom_exit (struct loop *loop)
287 edge exit = loop->single_exit;
292 if (!just_once_each_iteration_p (loop, exit->src))
298 /* Dumps information about the induction variable IV to FILE. */
300 extern void dump_iv (FILE *, struct iv *);
302 dump_iv (FILE *file, struct iv *iv)
304 fprintf (file, "ssa name ");
305 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
306 fprintf (file, "\n");
308 fprintf (file, " type ");
309 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
310 fprintf (file, "\n");
314 fprintf (file, " base ");
315 print_generic_expr (file, iv->base, TDF_SLIM);
316 fprintf (file, "\n");
318 fprintf (file, " step ");
319 print_generic_expr (file, iv->step, TDF_SLIM);
320 fprintf (file, "\n");
324 fprintf (file, " invariant ");
325 print_generic_expr (file, iv->base, TDF_SLIM);
326 fprintf (file, "\n");
330 fprintf (file, " is a biv\n");
333 /* Dumps information about the USE to FILE. */
335 extern void dump_use (FILE *, struct iv_use *);
337 dump_use (FILE *file, struct iv_use *use)
339 struct iv *iv = use->iv;
341 fprintf (file, "use %d\n", use->id);
345 case USE_NONLINEAR_EXPR:
346 fprintf (file, " generic\n");
350 fprintf (file, " outside\n");
354 fprintf (file, " address\n");
358 fprintf (file, " compare\n");
365 fprintf (file, " in statement ");
366 print_generic_expr (file, use->stmt, TDF_SLIM);
367 fprintf (file, "\n");
369 fprintf (file, " at position ");
371 print_generic_expr (file, *use->op_p, TDF_SLIM);
372 fprintf (file, "\n");
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " related candidates ");
396 dump_bitmap (file, use->related_cands);
399 /* Dumps information about the uses to FILE. */
401 extern void dump_uses (FILE *, struct ivopts_data *);
403 dump_uses (FILE *file, struct ivopts_data *data)
408 for (i = 0; i < n_iv_uses (data); i++)
410 use = iv_use (data, i);
412 dump_use (file, use);
413 fprintf (file, "\n");
417 /* Dumps information about induction variable candidate CAND to FILE. */
419 extern void dump_cand (FILE *, struct iv_cand *);
421 dump_cand (FILE *file, struct iv_cand *cand)
423 struct iv *iv = cand->iv;
425 fprintf (file, "candidate %d%s\n",
426 cand->id, cand->important ? " (important)" : "");
430 fprintf (file, " final value replacement\n");
437 fprintf (file, " incremented before exit test\n");
441 fprintf (file, " incremented at end\n");
445 fprintf (file, " original biv\n");
449 fprintf (file, " type ");
450 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
451 fprintf (file, "\n");
455 fprintf (file, " base ");
456 print_generic_expr (file, iv->base, TDF_SLIM);
457 fprintf (file, "\n");
459 fprintf (file, " step ");
460 print_generic_expr (file, iv->step, TDF_SLIM);
461 fprintf (file, "\n");
465 fprintf (file, " invariant ");
466 print_generic_expr (file, iv->base, TDF_SLIM);
467 fprintf (file, "\n");
471 /* Returns the info for ssa version VER. */
473 static inline struct version_info *
474 ver_info (struct ivopts_data *data, unsigned ver)
476 return data->version_info + ver;
479 /* Returns the info for ssa name NAME. */
481 static inline struct version_info *
482 name_info (struct ivopts_data *data, tree name)
484 return ver_info (data, SSA_NAME_VERSION (name));
487 /* Checks whether there exists number X such that X * B = A, counting modulo
491 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
494 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
495 unsigned HOST_WIDE_INT inv, ex, val;
501 /* First divide the whole equation by 2 as long as possible. */
502 while (!(a & 1) && !(b & 1))
512 /* If b is still even, a is odd and there is no such x. */
516 /* Find the inverse of b. We compute it as
517 b^(2^(bits - 1) - 1) (mod 2^bits). */
520 for (i = 0; i < bits - 1; i++)
522 inv = (inv * ex) & mask;
523 ex = (ex * ex) & mask;
526 val = (a * inv) & mask;
528 gcc_assert (((val * b) & mask) == a);
530 if ((val >> (bits - 1)) & 1)
538 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
542 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
544 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
548 if (sbb == loop->latch)
554 return stmt == last_stmt (bb);
557 /* Returns true if STMT if after the place where the original induction
558 variable CAND is incremented. */
561 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
563 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
564 basic_block stmt_bb = bb_for_stmt (stmt);
565 block_stmt_iterator bsi;
567 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
570 if (stmt_bb != cand_bb)
573 /* Scan the block from the end, since the original ivs are usually
574 incremented at the end of the loop body. */
575 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
577 if (bsi_stmt (bsi) == cand->incremented_at)
579 if (bsi_stmt (bsi) == stmt)
584 /* Returns true if STMT if after the place where the induction variable
585 CAND is incremented in LOOP. */
588 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
596 return stmt_after_ip_normal_pos (loop, stmt);
599 return stmt_after_ip_original_pos (cand, stmt);
606 /* Initializes data structures used by the iv optimization pass, stored
607 in DATA. LOOPS is the loop tree. */
610 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
614 data->version_info_size = 2 * num_ssa_names;
615 data->version_info = xcalloc (data->version_info_size,
616 sizeof (struct version_info));
617 data->relevant = BITMAP_XMALLOC ();
618 data->max_inv_id = 0;
620 for (i = 1; i < loops->num; i++)
621 if (loops->parray[i])
622 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
624 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
625 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
626 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
629 /* Allocates an induction variable with given initial value BASE and step STEP
633 alloc_iv (tree base, tree step)
635 struct iv *iv = xcalloc (1, sizeof (struct iv));
637 if (step && integer_zerop (step))
643 iv->have_use_for = false;
645 iv->ssa_name = NULL_TREE;
650 /* Sets STEP and BASE for induction variable IV. */
653 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
655 struct version_info *info = name_info (data, iv);
657 gcc_assert (!info->iv);
659 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
660 info->iv = alloc_iv (base, step);
661 info->iv->ssa_name = iv;
664 /* Finds induction variable declaration for VAR. */
667 get_iv (struct ivopts_data *data, tree var)
671 if (!name_info (data, var)->iv)
673 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
676 || !flow_bb_inside_loop_p (data->current_loop, bb))
677 set_iv (data, var, var, NULL_TREE);
680 return name_info (data, var)->iv;
683 /* Determines the step of a biv defined in PHI. */
686 determine_biv_step (tree phi)
688 struct loop *loop = bb_for_stmt (phi)->loop_father;
689 tree name = PHI_RESULT (phi), base, step;
690 tree type = TREE_TYPE (name);
692 if (!is_gimple_reg (name))
695 if (!simple_iv (loop, phi, name, &base, &step))
699 return build_int_cst (type, 0);
704 /* Returns false if INDEX is a ssa name that occurs in an
705 abnormal phi node. Callback for for_each_index. */
708 idx_contains_abnormal_ssa_name_p (tree base ATTRIBUTE_UNUSED, tree *index,
709 void *data ATTRIBUTE_UNUSED)
711 if (TREE_CODE (*index) != SSA_NAME)
714 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*index) == 0;
717 /* Returns true if EXPR contains a ssa name that occurs in an
718 abnormal phi node. */
721 contains_abnormal_ssa_name_p (tree expr)
723 enum tree_code code = TREE_CODE (expr);
724 char class = TREE_CODE_CLASS (code);
726 if (code == SSA_NAME)
727 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
729 if (code == INTEGER_CST
730 || is_gimple_min_invariant (expr))
733 if (code == ADDR_EXPR)
734 return !for_each_index (&TREE_OPERAND (expr, 1),
735 idx_contains_abnormal_ssa_name_p,
742 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
747 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
759 /* Finds basic ivs. */
762 find_bivs (struct ivopts_data *data)
764 tree phi, step, type, base;
766 struct loop *loop = data->current_loop;
768 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
770 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
773 step = determine_biv_step (phi);
777 if (cst_and_fits_in_hwi (step)
778 && int_cst_value (step) == 0)
781 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
782 if (contains_abnormal_ssa_name_p (base))
785 type = TREE_TYPE (PHI_RESULT (phi));
786 base = fold_convert (type, base);
787 step = fold_convert (type, step);
789 /* FIXME: We do not handle induction variables whose step does
790 not satisfy cst_and_fits_in_hwi. */
791 if (!cst_and_fits_in_hwi (step))
794 set_iv (data, PHI_RESULT (phi), base, step);
801 /* Marks basic ivs. */
804 mark_bivs (struct ivopts_data *data)
807 struct iv *iv, *incr_iv;
808 struct loop *loop = data->current_loop;
811 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
813 iv = get_iv (data, PHI_RESULT (phi));
817 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
818 incr_iv = get_iv (data, var);
822 /* If the increment is in the subloop, ignore it. */
823 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
824 if (incr_bb->loop_father != data->current_loop
825 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
829 incr_iv->biv_p = true;
833 /* Checks whether STMT defines a linear induction variable and stores its
834 parameters to BASE and STEP. */
837 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
838 tree *base, tree *step)
841 struct loop *loop = data->current_loop;
846 if (TREE_CODE (stmt) != MODIFY_EXPR)
849 lhs = TREE_OPERAND (stmt, 0);
850 if (TREE_CODE (lhs) != SSA_NAME)
853 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
856 /* FIXME: We do not handle induction variables whose step does
857 not satisfy cst_and_fits_in_hwi. */
859 && !cst_and_fits_in_hwi (*step))
862 if (contains_abnormal_ssa_name_p (*base))
868 /* Finds general ivs in statement STMT. */
871 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
875 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
878 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
881 /* Finds general ivs in basic block BB. */
884 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
886 block_stmt_iterator bsi;
888 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
889 find_givs_in_stmt (data, bsi_stmt (bsi));
892 /* Finds general ivs. */
895 find_givs (struct ivopts_data *data)
897 struct loop *loop = data->current_loop;
898 basic_block *body = get_loop_body_in_dom_order (loop);
901 for (i = 0; i < loop->num_nodes; i++)
902 find_givs_in_bb (data, body[i]);
906 /* Determine the number of iterations of the current loop. */
909 determine_number_of_iterations (struct ivopts_data *data)
911 struct loop *loop = data->current_loop;
912 edge exit = single_dom_exit (loop);
917 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
920 /* For each ssa name defined in LOOP determines whether it is an induction
921 variable and if so, its initial value and step. */
924 find_induction_variables (struct ivopts_data *data)
927 struct loop *loop = data->current_loop;
929 if (!find_bivs (data))
934 determine_number_of_iterations (data);
936 if (dump_file && (dump_flags & TDF_DETAILS))
938 if (loop_data (loop)->niter.niter)
940 fprintf (dump_file, " number of iterations ");
941 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
943 fprintf (dump_file, "\n");
945 fprintf (dump_file, " may be zero if ");
946 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
948 fprintf (dump_file, "\n");
950 fprintf (dump_file, " bogus unless ");
951 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
953 fprintf (dump_file, "\n");
954 fprintf (dump_file, "\n");
957 fprintf (dump_file, "Induction variables:\n\n");
959 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
961 if (ver_info (data, i)->iv)
962 dump_iv (dump_file, ver_info (data, i)->iv);
969 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
971 static struct iv_use *
972 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
973 tree stmt, enum use_type use_type)
975 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
977 use->id = n_iv_uses (data);
978 use->type = use_type;
982 use->related_cands = BITMAP_XMALLOC ();
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 dump_use (dump_file, use);
987 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
992 /* Checks whether OP is a loop-level invariant and if so, records it.
993 NONLINEAR_USE is true if the invariant is used in a way we do not
997 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1000 struct version_info *info;
1002 if (TREE_CODE (op) != SSA_NAME
1003 || !is_gimple_reg (op))
1006 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1008 && flow_bb_inside_loop_p (data->current_loop, bb))
1011 info = name_info (data, op);
1013 info->has_nonlin_use |= nonlinear_use;
1015 info->inv_id = ++data->max_inv_id;
1016 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1019 /* Checks whether the use OP is interesting and if so, records it
1022 static struct iv_use *
1023 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1031 if (TREE_CODE (op) != SSA_NAME)
1034 iv = get_iv (data, op);
1038 if (iv->have_use_for)
1040 use = iv_use (data, iv->use_id);
1042 gcc_assert (use->type == USE_NONLINEAR_EXPR
1043 || use->type == USE_OUTER);
1045 if (type == USE_NONLINEAR_EXPR)
1046 use->type = USE_NONLINEAR_EXPR;
1050 if (zero_p (iv->step))
1052 record_invariant (data, op, true);
1055 iv->have_use_for = true;
1057 civ = xmalloc (sizeof (struct iv));
1060 stmt = SSA_NAME_DEF_STMT (op);
1061 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1062 || TREE_CODE (stmt) == MODIFY_EXPR);
1064 use = record_use (data, NULL, civ, stmt, type);
1065 iv->use_id = use->id;
1070 /* Checks whether the use OP is interesting and if so, records it. */
1072 static struct iv_use *
1073 find_interesting_uses_op (struct ivopts_data *data, tree op)
1075 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1078 /* Records a definition of induction variable OP that is used outside of the
1081 static struct iv_use *
1082 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1084 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1087 /* Checks whether the condition *COND_P in STMT is interesting
1088 and if so, records it. */
1091 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1095 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1097 tree zero = integer_zero_node;
1099 const_iv.step = NULL_TREE;
1101 if (integer_zerop (*cond_p)
1102 || integer_nonzerop (*cond_p))
1105 if (TREE_CODE (*cond_p) == SSA_NAME)
1112 op0_p = &TREE_OPERAND (*cond_p, 0);
1113 op1_p = &TREE_OPERAND (*cond_p, 1);
1116 if (TREE_CODE (*op0_p) == SSA_NAME)
1117 iv0 = get_iv (data, *op0_p);
1121 if (TREE_CODE (*op1_p) == SSA_NAME)
1122 iv1 = get_iv (data, *op1_p);
1126 if (/* When comparing with non-invariant value, we may not do any senseful
1127 induction variable elimination. */
1129 /* Eliminating condition based on two ivs would be nontrivial.
1130 ??? TODO -- it is not really important to handle this case. */
1131 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1133 find_interesting_uses_op (data, *op0_p);
1134 find_interesting_uses_op (data, *op1_p);
1138 if (zero_p (iv0->step) && zero_p (iv1->step))
1140 /* If both are invariants, this is a work for unswitching. */
1144 civ = xmalloc (sizeof (struct iv));
1145 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1146 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1149 /* Cumulates the steps of indices into DATA and replaces their values with the
1150 initial ones. Returns false when the value of the index cannot be determined.
1151 Callback for for_each_index. */
1153 struct ifs_ivopts_data
1155 struct ivopts_data *ivopts_data;
1161 idx_find_step (tree base, tree *idx, void *data)
1163 struct ifs_ivopts_data *dta = data;
1165 tree step, type, iv_type, iv_step, lbound;
1167 struct loop *loop = dta->ivopts_data->current_loop;
1169 if (TREE_CODE (*idx) != SSA_NAME)
1172 iv = get_iv (dta->ivopts_data, *idx);
1181 iv_type = TREE_TYPE (iv->base);
1182 type = build_pointer_type (TREE_TYPE (base));
1183 if (TREE_CODE (base) == ARRAY_REF)
1185 step = array_ref_element_size (base);
1186 lbound = array_ref_low_bound (base);
1188 /* We only handle addresses whose step is an integer constant. */
1189 if (TREE_CODE (step) != INTEGER_CST)
1192 /* We need the lower bound to be invariant in loop, since otherwise
1193 we are unable to initialize a new induction variable created
1194 in strength reduction -- we need to take the address of the
1195 reference in front of the loop. */
1196 if (is_gimple_min_invariant (lbound))
1197 ; /* Nothing to do. */
1198 else if (TREE_CODE (lbound) != SSA_NAME)
1202 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (lbound));
1204 && flow_bb_inside_loop_p (loop, def_bb))
1209 /* The step for pointer arithmetics already is 1 byte. */
1210 step = build_int_cst (type, 1);
1212 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1213 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1214 type, iv->base, iv->step, dta->stmt);
1216 iv_step = fold_convert (iv_type, iv->step);
1220 /* The index might wrap. */
1224 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1227 *dta->step_p = step;
1229 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1234 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1235 object is passed to it in DATA. */
1238 idx_record_use (tree base, tree *idx,
1241 find_interesting_uses_op (data, *idx);
1242 if (TREE_CODE (base) == ARRAY_REF)
1244 find_interesting_uses_op (data, array_ref_element_size (base));
1245 find_interesting_uses_op (data, array_ref_low_bound (base));
1250 /* Finds addresses in *OP_P inside STMT. */
1253 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1255 tree base = unshare_expr (*op_p), step = NULL;
1257 struct ifs_ivopts_data ifs_ivopts_data;
1259 /* Ignore bitfields for now. Not really something terribly complicated
1261 if (TREE_CODE (base) == COMPONENT_REF
1262 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1265 ifs_ivopts_data.ivopts_data = data;
1266 ifs_ivopts_data.stmt = stmt;
1267 ifs_ivopts_data.step_p = &step;
1268 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1272 if (TREE_CODE (base) == INDIRECT_REF)
1273 base = TREE_OPERAND (base, 0);
1275 base = build_addr (base);
1277 civ = alloc_iv (base, step);
1278 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1282 for_each_index (op_p, idx_record_use, data);
1285 /* Finds and records invariants used in STMT. */
1288 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1290 use_optype uses = NULL;
1294 if (TREE_CODE (stmt) == PHI_NODE)
1295 n = PHI_NUM_ARGS (stmt);
1298 get_stmt_operands (stmt);
1299 uses = STMT_USE_OPS (stmt);
1300 n = NUM_USES (uses);
1303 for (i = 0; i < n; i++)
1305 if (TREE_CODE (stmt) == PHI_NODE)
1306 op = PHI_ARG_DEF (stmt, i);
1308 op = USE_OP (uses, i);
1310 record_invariant (data, op, false);
1314 /* Finds interesting uses of induction variables in the statement STMT. */
1317 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1321 use_optype uses = NULL;
1324 find_invariants_stmt (data, stmt);
1326 if (TREE_CODE (stmt) == COND_EXPR)
1328 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1332 if (TREE_CODE (stmt) == MODIFY_EXPR)
1334 lhs = TREE_OPERAND (stmt, 0);
1335 rhs = TREE_OPERAND (stmt, 1);
1337 if (TREE_CODE (lhs) == SSA_NAME)
1339 /* If the statement defines an induction variable, the uses are not
1340 interesting by themselves. */
1342 iv = get_iv (data, lhs);
1344 if (iv && !zero_p (iv->step))
1348 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1351 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1355 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1356 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r')
1357 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1363 /* Handle memory = gimple_val. */
1364 if (TREE_CODE_CLASS (TREE_CODE (lhs)) == 'r'
1365 && is_gimple_val (rhs))
1367 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1368 find_interesting_uses_op (data, rhs);
1372 /* TODO -- we should also handle address uses of type
1374 memory = call (whatever);
1381 if (TREE_CODE (stmt) == PHI_NODE
1382 && bb_for_stmt (stmt) == data->current_loop->header)
1384 lhs = PHI_RESULT (stmt);
1385 iv = get_iv (data, lhs);
1387 if (iv && !zero_p (iv->step))
1391 if (TREE_CODE (stmt) == PHI_NODE)
1392 n = PHI_NUM_ARGS (stmt);
1395 uses = STMT_USE_OPS (stmt);
1396 n = NUM_USES (uses);
1399 for (i = 0; i < n; i++)
1401 if (TREE_CODE (stmt) == PHI_NODE)
1402 op = PHI_ARG_DEF (stmt, i);
1404 op = USE_OP (uses, i);
1406 if (TREE_CODE (op) != SSA_NAME)
1409 iv = get_iv (data, op);
1413 find_interesting_uses_op (data, op);
1417 /* Finds interesting uses of induction variables outside of loops
1418 on loop exit edge EXIT. */
1421 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1425 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1427 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1428 find_interesting_uses_outer (data, def);
1432 /* Finds uses of the induction variables that are interesting. */
1435 find_interesting_uses (struct ivopts_data *data)
1438 block_stmt_iterator bsi;
1440 basic_block *body = get_loop_body (data->current_loop);
1442 struct version_info *info;
1445 if (dump_file && (dump_flags & TDF_DETAILS))
1446 fprintf (dump_file, "Uses:\n\n");
1448 for (i = 0; i < data->current_loop->num_nodes; i++)
1452 for (e = bb->succ; e; e = e->succ_next)
1453 if (e->dest != EXIT_BLOCK_PTR
1454 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1455 find_interesting_uses_outside (data, e);
1457 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1458 find_interesting_uses_stmt (data, phi);
1459 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1460 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1463 if (dump_file && (dump_flags & TDF_DETAILS))
1465 fprintf (dump_file, "\n");
1467 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1469 info = ver_info (data, i);
1472 fprintf (dump_file, " ");
1473 print_generic_expr (dump_file, info->name, TDF_SLIM);
1474 fprintf (dump_file, " is invariant (%d)%s\n",
1475 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1479 fprintf (dump_file, "\n");
1485 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1486 position to POS. If USE is not NULL, the candidate is set as related to
1487 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1488 replacement of the final value of the iv by a direct computation. */
1490 static struct iv_cand *
1491 add_candidate_1 (struct ivopts_data *data,
1492 tree base, tree step, bool important, enum iv_position pos,
1493 struct iv_use *use, tree incremented_at)
1496 struct iv_cand *cand = NULL;
1501 type = TREE_TYPE (base);
1502 if (!TYPE_UNSIGNED (type))
1504 type = unsigned_type_for (type);
1505 base = fold_convert (type, base);
1507 step = fold_convert (type, step);
1511 for (i = 0; i < n_iv_cands (data); i++)
1513 cand = iv_cand (data, i);
1515 if (cand->pos != pos)
1518 if (cand->incremented_at != incremented_at)
1532 if (!operand_equal_p (base, cand->iv->base, 0))
1535 if (zero_p (cand->iv->step))
1542 if (step && operand_equal_p (step, cand->iv->step, 0))
1547 if (i == n_iv_cands (data))
1549 cand = xcalloc (1, sizeof (struct iv_cand));
1555 cand->iv = alloc_iv (base, step);
1558 if (pos != IP_ORIGINAL && cand->iv)
1560 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1561 cand->var_after = cand->var_before;
1563 cand->important = important;
1564 cand->incremented_at = incremented_at;
1565 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1567 if (dump_file && (dump_flags & TDF_DETAILS))
1568 dump_cand (dump_file, cand);
1571 if (important && !cand->important)
1573 cand->important = true;
1574 if (dump_file && (dump_flags & TDF_DETAILS))
1575 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1580 bitmap_set_bit (use->related_cands, i);
1581 if (dump_file && (dump_flags & TDF_DETAILS))
1582 fprintf (dump_file, "Candidate %d is related to use %d\n",
1589 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1590 position to POS. If USE is not NULL, the candidate is set as related to
1591 it. The candidate computation is scheduled on all available positions. */
1594 add_candidate (struct ivopts_data *data,
1595 tree base, tree step, bool important, struct iv_use *use)
1597 if (ip_normal_pos (data->current_loop))
1598 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1599 if (ip_end_pos (data->current_loop))
1600 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1603 /* Adds standard iv candidates. */
1606 add_standard_iv_candidates (struct ivopts_data *data)
1608 /* Add 0 + 1 * iteration candidate. */
1609 add_candidate (data,
1610 build_int_cst (unsigned_intSI_type_node, 0),
1611 build_int_cst (unsigned_intSI_type_node, 1),
1614 /* The same for a long type if it is still fast enough. */
1615 if (BITS_PER_WORD > 32)
1616 add_candidate (data,
1617 build_int_cst (unsigned_intDI_type_node, 0),
1618 build_int_cst (unsigned_intDI_type_node, 1),
1623 /* Adds candidates bases on the old induction variable IV. */
1626 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1629 struct iv_cand *cand;
1631 add_candidate (data, iv->base, iv->step, true, NULL);
1633 /* The same, but with initial value zero. */
1634 add_candidate (data,
1635 build_int_cst (TREE_TYPE (iv->base), 0),
1636 iv->step, true, NULL);
1638 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1639 if (TREE_CODE (phi) == PHI_NODE)
1641 /* Additionally record the possibility of leaving the original iv
1643 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1644 cand = add_candidate_1 (data,
1645 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1646 SSA_NAME_DEF_STMT (def));
1647 cand->var_before = iv->ssa_name;
1648 cand->var_after = def;
1652 /* Adds candidates based on the old induction variables. */
1655 add_old_ivs_candidates (struct ivopts_data *data)
1660 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1662 iv = ver_info (data, i)->iv;
1663 if (iv && iv->biv_p && !zero_p (iv->step))
1664 add_old_iv_candidates (data, iv);
1668 /* Adds candidates based on the value of the induction variable IV and USE. */
1671 add_iv_value_candidates (struct ivopts_data *data,
1672 struct iv *iv, struct iv_use *use)
1674 add_candidate (data, iv->base, iv->step, false, use);
1676 /* The same, but with initial value zero. */
1677 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1678 iv->step, false, use);
1681 /* Adds candidates based on the address IV and USE. */
1684 add_address_candidates (struct ivopts_data *data,
1685 struct iv *iv, struct iv_use *use)
1687 tree base, abase, tmp, *act;
1689 /* First, the trivial choices. */
1690 add_iv_value_candidates (data, iv, use);
1692 /* Second, try removing the COMPONENT_REFs. */
1693 if (TREE_CODE (iv->base) == ADDR_EXPR)
1695 base = TREE_OPERAND (iv->base, 0);
1696 while (TREE_CODE (base) == COMPONENT_REF
1697 || (TREE_CODE (base) == ARRAY_REF
1698 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1699 base = TREE_OPERAND (base, 0);
1701 if (base != TREE_OPERAND (iv->base, 0))
1703 if (TREE_CODE (base) == INDIRECT_REF)
1704 base = TREE_OPERAND (base, 0);
1706 base = build_addr (base);
1707 add_candidate (data, base, iv->step, false, use);
1711 /* Third, try removing the constant offset. */
1713 while (TREE_CODE (abase) == PLUS_EXPR
1714 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1715 abase = TREE_OPERAND (abase, 0);
1716 /* We found the offset, so make the copy of the non-shared part and
1718 if (TREE_CODE (abase) == PLUS_EXPR)
1723 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1725 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1726 NULL_TREE, TREE_OPERAND (tmp, 1));
1727 act = &TREE_OPERAND (*act, 0);
1729 *act = TREE_OPERAND (tmp, 0);
1731 add_candidate (data, base, iv->step, false, use);
1735 /* Possibly adds pseudocandidate for replacing the final value of USE by
1736 a direct computation. */
1739 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1741 struct tree_niter_desc *niter;
1742 struct loop *loop = data->current_loop;
1744 /* We must know where we exit the loop and how many times does it roll. */
1745 if (!single_dom_exit (loop))
1748 niter = &loop_data (loop)->niter;
1750 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1751 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1754 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1757 /* Adds candidates based on the uses. */
1760 add_derived_ivs_candidates (struct ivopts_data *data)
1764 for (i = 0; i < n_iv_uses (data); i++)
1766 struct iv_use *use = iv_use (data, i);
1773 case USE_NONLINEAR_EXPR:
1775 /* Just add the ivs based on the value of the iv used here. */
1776 add_iv_value_candidates (data, use->iv, use);
1780 add_iv_value_candidates (data, use->iv, use);
1782 /* Additionally, add the pseudocandidate for the possibility to
1783 replace the final value by a direct computation. */
1784 add_iv_outer_candidates (data, use);
1788 add_address_candidates (data, use->iv, use);
1797 /* Finds the candidates for the induction variables. */
1800 find_iv_candidates (struct ivopts_data *data)
1802 /* Add commonly used ivs. */
1803 add_standard_iv_candidates (data);
1805 /* Add old induction variables. */
1806 add_old_ivs_candidates (data);
1808 /* Add induction variables derived from uses. */
1809 add_derived_ivs_candidates (data);
1812 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1813 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1814 we allocate a simple list to every use. */
1817 alloc_use_cost_map (struct ivopts_data *data)
1819 unsigned i, n_imp = 0, size, j;
1821 if (!data->consider_all_candidates)
1823 for (i = 0; i < n_iv_cands (data); i++)
1825 struct iv_cand *cand = iv_cand (data, i);
1826 if (cand->important)
1831 for (i = 0; i < n_iv_uses (data); i++)
1833 struct iv_use *use = iv_use (data, i);
1835 if (data->consider_all_candidates)
1837 size = n_iv_cands (data);
1838 use->n_map_members = size;
1843 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, size++);
1844 use->n_map_members = 0;
1847 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1851 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1852 on invariants DEPENDS_ON. */
1855 set_use_iv_cost (struct ivopts_data *data,
1856 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1862 BITMAP_XFREE (depends_on);
1866 if (data->consider_all_candidates)
1868 use->cost_map[cand->id].cand = cand;
1869 use->cost_map[cand->id].cost = cost;
1870 use->cost_map[cand->id].depends_on = depends_on;
1877 use->cost_map[use->n_map_members].cand = cand;
1878 use->cost_map[use->n_map_members].cost = cost;
1879 use->cost_map[use->n_map_members].depends_on = depends_on;
1880 use->n_map_members++;
1883 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1887 get_use_iv_cost (struct ivopts_data *data,
1888 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1895 if (data->consider_all_candidates)
1899 for (i = 0; i < use->n_map_members; i++)
1900 if (use->cost_map[i].cand == cand)
1903 if (i == use->n_map_members)
1908 *depends_on = use->cost_map[i].depends_on;
1909 return use->cost_map[i].cost;
1912 /* Returns estimate on cost of computing SEQ. */
1920 for (; seq; seq = NEXT_INSN (seq))
1922 set = single_set (seq);
1924 cost += rtx_cost (set, SET);
1932 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
1934 produce_memory_decl_rtl (tree obj, int *regno)
1939 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
1941 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
1942 x = gen_rtx_SYMBOL_REF (Pmode, name);
1945 x = gen_raw_REG (Pmode, (*regno)++);
1947 return gen_rtx_MEM (DECL_MODE (obj), x);
1950 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
1951 walk_tree. DATA contains the actual fake register number. */
1954 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
1956 tree obj = NULL_TREE;
1960 switch (TREE_CODE (*expr_p))
1963 for (expr_p = &TREE_OPERAND (*expr_p, 0);
1964 (handled_component_p (*expr_p)
1965 || TREE_CODE (*expr_p) == REALPART_EXPR
1966 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
1967 expr_p = &TREE_OPERAND (*expr_p, 0));
1970 x = produce_memory_decl_rtl (obj, regno);
1975 obj = SSA_NAME_VAR (*expr_p);
1976 if (!DECL_RTL_SET_P (obj))
1977 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1986 if (DECL_RTL_SET_P (obj))
1989 if (DECL_MODE (obj) == BLKmode)
1990 x = produce_memory_decl_rtl (obj, regno);
1992 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2002 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2003 SET_DECL_RTL (obj, x);
2009 /* Determines cost of the computation of EXPR. */
2012 computation_cost (tree expr)
2015 tree type = TREE_TYPE (expr);
2019 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2021 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2025 cost = seq_cost (seq);
2026 if (GET_CODE (rslt) == MEM)
2027 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2032 /* Returns variable containing the value of candidate CAND at statement AT. */
2035 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2037 if (stmt_after_increment (loop, cand, stmt))
2038 return cand->var_after;
2040 return cand->var_before;
2043 /* Determines the expression by that USE is expressed from induction variable
2044 CAND at statement AT in LOOP. */
2047 get_computation_at (struct loop *loop,
2048 struct iv_use *use, struct iv_cand *cand, tree at)
2050 tree ubase = use->iv->base;
2051 tree ustep = use->iv->step;
2052 tree cbase = cand->iv->base;
2053 tree cstep = cand->iv->step;
2054 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2058 unsigned HOST_WIDE_INT ustepi, cstepi;
2059 HOST_WIDE_INT ratioi;
2061 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2063 /* We do not have a precision to express the values of use. */
2067 expr = var_at_stmt (loop, cand, at);
2069 if (TREE_TYPE (expr) != ctype)
2071 /* This may happen with the original ivs. */
2072 expr = fold_convert (ctype, expr);
2075 if (TYPE_UNSIGNED (utype))
2079 uutype = unsigned_type_for (utype);
2080 ubase = fold_convert (uutype, ubase);
2081 ustep = fold_convert (uutype, ustep);
2084 if (uutype != ctype)
2086 expr = fold_convert (uutype, expr);
2087 cbase = fold_convert (uutype, cbase);
2088 cstep = fold_convert (uutype, cstep);
2091 if (!cst_and_fits_in_hwi (cstep)
2092 || !cst_and_fits_in_hwi (ustep))
2095 ustepi = int_cst_value (ustep);
2096 cstepi = int_cst_value (cstep);
2098 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2100 /* TODO maybe consider case when ustep divides cstep and the ratio is
2101 a power of 2 (so that the division is fast to execute)? We would
2102 need to be much more careful with overflows etc. then. */
2106 /* We may need to shift the value if we are after the increment. */
2107 if (stmt_after_increment (loop, cand, at))
2108 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2110 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2111 or |ratio| == 1, it is better to handle this like
2113 ubase - ratio * cbase + ratio * var. */
2117 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2118 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2120 else if (ratioi == -1)
2122 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2123 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2125 else if (TREE_CODE (cbase) == INTEGER_CST)
2127 ratio = build_int_cst_type (uutype, ratioi);
2128 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2129 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2130 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2131 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2135 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2136 ratio = build_int_cst_type (uutype, ratioi);
2137 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2138 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2141 return fold_convert (utype, expr);
2144 /* Determines the expression by that USE is expressed from induction variable
2148 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2150 return get_computation_at (loop, use, cand, use->stmt);
2153 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2156 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2159 enum tree_code code;
2163 if (cst_and_fits_in_hwi (*expr))
2165 *offset += int_cst_value (*expr);
2166 *expr = integer_zero_node;
2170 code = TREE_CODE (*expr);
2172 if (code != PLUS_EXPR && code != MINUS_EXPR)
2175 op0 = TREE_OPERAND (*expr, 0);
2176 op1 = TREE_OPERAND (*expr, 1);
2178 if (cst_and_fits_in_hwi (op1))
2180 if (code == PLUS_EXPR)
2181 *offset += int_cst_value (op1);
2183 *offset -= int_cst_value (op1);
2189 if (code != PLUS_EXPR)
2192 if (!cst_and_fits_in_hwi (op0))
2195 *offset += int_cst_value (op0);
2200 /* Returns cost of addition in MODE. */
2203 add_cost (enum machine_mode mode)
2205 static unsigned costs[NUM_MACHINE_MODES];
2213 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2214 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2215 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2220 cost = seq_cost (seq);
2226 if (dump_file && (dump_flags & TDF_DETAILS))
2227 fprintf (dump_file, "Addition in %s costs %d\n",
2228 GET_MODE_NAME (mode), cost);
2232 /* Entry in a hashtable of already known costs for multiplication. */
2235 HOST_WIDE_INT cst; /* The constant to multiply by. */
2236 enum machine_mode mode; /* In mode. */
2237 unsigned cost; /* The cost. */
2240 /* Counts hash value for the ENTRY. */
2243 mbc_entry_hash (const void *entry)
2245 const struct mbc_entry *e = entry;
2247 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2250 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2253 mbc_entry_eq (const void *entry1, const void *entry2)
2255 const struct mbc_entry *e1 = entry1;
2256 const struct mbc_entry *e2 = entry2;
2258 return (e1->mode == e2->mode
2259 && e1->cst == e2->cst);
2262 /* Returns cost of multiplication by constant CST in MODE. */
2265 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2267 static htab_t costs;
2268 struct mbc_entry **cached, act;
2273 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2277 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2279 return (*cached)->cost;
2281 *cached = xmalloc (sizeof (struct mbc_entry));
2282 (*cached)->mode = mode;
2283 (*cached)->cst = cst;
2286 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2291 cost = seq_cost (seq);
2293 if (dump_file && (dump_flags & TDF_DETAILS))
2294 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2295 (int) cst, GET_MODE_NAME (mode), cost);
2297 (*cached)->cost = cost;
2302 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2303 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2304 variable is omitted. The created memory accesses MODE.
2306 TODO -- there must be some better way. This all is quite crude. */
2309 get_address_cost (bool symbol_present, bool var_present,
2310 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2312 #define MAX_RATIO 128
2313 static sbitmap valid_mult;
2314 static HOST_WIDE_INT rat, off;
2315 static HOST_WIDE_INT min_offset, max_offset;
2316 static unsigned costs[2][2][2][2];
2317 unsigned cost, acost;
2318 rtx seq, addr, base;
2319 bool offset_p, ratio_p;
2321 HOST_WIDE_INT s_offset;
2322 unsigned HOST_WIDE_INT mask;
2329 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2331 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2332 for (i = 1; i <= 1 << 20; i <<= 1)
2334 XEXP (addr, 1) = GEN_INT (i);
2335 if (!memory_address_p (Pmode, addr))
2338 max_offset = i >> 1;
2341 for (i = 1; i <= 1 << 20; i <<= 1)
2343 XEXP (addr, 1) = GEN_INT (-i);
2344 if (!memory_address_p (Pmode, addr))
2347 min_offset = -(i >> 1);
2349 if (dump_file && (dump_flags & TDF_DETAILS))
2351 fprintf (dump_file, "get_address_cost:\n");
2352 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2353 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2356 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2357 sbitmap_zero (valid_mult);
2359 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2360 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2362 XEXP (addr, 1) = GEN_INT (i);
2363 if (memory_address_p (Pmode, addr))
2365 SET_BIT (valid_mult, i + MAX_RATIO);
2370 if (dump_file && (dump_flags & TDF_DETAILS))
2372 fprintf (dump_file, " allowed multipliers:");
2373 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2374 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2375 fprintf (dump_file, " %d", (int) i);
2376 fprintf (dump_file, "\n");
2377 fprintf (dump_file, "\n");
2381 bits = GET_MODE_BITSIZE (Pmode);
2382 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2384 if ((offset >> (bits - 1) & 1))
2389 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2390 ratio_p = (ratio != 1
2391 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2392 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2394 if (ratio != 1 && !ratio_p)
2395 cost += multiply_by_cost (ratio, Pmode);
2397 if (s_offset && !offset_p && !symbol_present)
2399 cost += add_cost (Pmode);
2403 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2408 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2409 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2411 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2415 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2417 base = gen_rtx_fmt_e (CONST, Pmode,
2418 gen_rtx_fmt_ee (PLUS, Pmode,
2422 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2425 else if (var_present)
2429 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2432 base = GEN_INT (off);
2437 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2440 addr = memory_address (Pmode, addr);
2444 acost = seq_cost (seq);
2445 acost += address_cost (addr, Pmode);
2449 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2452 return cost + acost;
2455 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2456 the bitmap to that we should store it. */
2458 static struct ivopts_data *fd_ivopts_data;
2460 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2462 bitmap *depends_on = data;
2463 struct version_info *info;
2465 if (TREE_CODE (*expr_p) != SSA_NAME)
2467 info = name_info (fd_ivopts_data, *expr_p);
2469 if (!info->inv_id || info->has_nonlin_use)
2473 *depends_on = BITMAP_XMALLOC ();
2474 bitmap_set_bit (*depends_on, info->inv_id);
2479 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2480 invariants the computation depends on. */
2483 force_var_cost (struct ivopts_data *data,
2484 tree expr, bitmap *depends_on)
2486 static bool costs_initialized = false;
2487 static unsigned integer_cost;
2488 static unsigned symbol_cost;
2489 static unsigned address_cost;
2491 if (!costs_initialized)
2493 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2494 rtx x = gen_rtx_MEM (DECL_MODE (var),
2495 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2497 tree type = build_pointer_type (integer_type_node);
2499 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2502 SET_DECL_RTL (var, x);
2503 TREE_STATIC (var) = 1;
2504 addr = build1 (ADDR_EXPR, type, var);
2505 symbol_cost = computation_cost (addr) + 1;
2508 = computation_cost (build2 (PLUS_EXPR, type,
2510 build_int_cst_type (type, 2000))) + 1;
2511 if (dump_file && (dump_flags & TDF_DETAILS))
2513 fprintf (dump_file, "force_var_cost:\n");
2514 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2515 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2516 fprintf (dump_file, " address %d\n", (int) address_cost);
2517 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2518 fprintf (dump_file, "\n");
2521 costs_initialized = true;
2526 fd_ivopts_data = data;
2527 walk_tree (&expr, find_depends, depends_on, NULL);
2530 if (SSA_VAR_P (expr))
2533 if (TREE_INVARIANT (expr))
2535 if (TREE_CODE (expr) == INTEGER_CST)
2536 return integer_cost;
2538 if (TREE_CODE (expr) == ADDR_EXPR)
2540 tree obj = TREE_OPERAND (expr, 0);
2542 if (TREE_CODE (obj) == VAR_DECL
2543 || TREE_CODE (obj) == PARM_DECL
2544 || TREE_CODE (obj) == RESULT_DECL)
2548 return address_cost;
2551 /* Just an arbitrary value, FIXME. */
2552 return target_spill_cost;
2555 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2556 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2557 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2558 invariants the computation depends on. */
2561 split_address_cost (struct ivopts_data *data,
2562 tree addr, bool *symbol_present, bool *var_present,
2563 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2566 HOST_WIDE_INT bitsize;
2567 HOST_WIDE_INT bitpos;
2569 enum machine_mode mode;
2570 int unsignedp, volatilep;
2572 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2573 &unsignedp, &volatilep);
2576 || bitpos % BITS_PER_UNIT != 0
2577 || TREE_CODE (core) != VAR_DECL)
2579 *symbol_present = false;
2580 *var_present = true;
2581 fd_ivopts_data = data;
2582 walk_tree (&addr, find_depends, depends_on, NULL);
2583 return target_spill_cost;
2586 *offset += bitpos / BITS_PER_UNIT;
2587 if (TREE_STATIC (core)
2588 || DECL_EXTERNAL (core))
2590 *symbol_present = true;
2591 *var_present = false;
2595 *symbol_present = false;
2596 *var_present = true;
2600 /* Estimates cost of expressing difference of addresses E1 - E2 as
2601 var + symbol + offset. The value of offset is added to OFFSET,
2602 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2603 part is missing. DEPENDS_ON is a set of the invariants the computation
2607 ptr_difference_cost (struct ivopts_data *data,
2608 tree e1, tree e2, bool *symbol_present, bool *var_present,
2609 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2611 HOST_WIDE_INT diff = 0;
2614 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2616 if (TREE_CODE (e2) == ADDR_EXPR
2617 && ptr_difference_const (TREE_OPERAND (e1, 0),
2618 TREE_OPERAND (e2, 0), &diff))
2621 *symbol_present = false;
2622 *var_present = false;
2626 if (e2 == integer_zero_node)
2627 return split_address_cost (data, TREE_OPERAND (e1, 0),
2628 symbol_present, var_present, offset, depends_on);
2630 *symbol_present = false;
2631 *var_present = true;
2633 cost = force_var_cost (data, e1, depends_on);
2634 cost += force_var_cost (data, e2, depends_on);
2635 cost += add_cost (Pmode);
2640 /* Estimates cost of expressing difference E1 - E2 as
2641 var + symbol + offset. The value of offset is added to OFFSET,
2642 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2643 part is missing. DEPENDS_ON is a set of the invariants the computation
2647 difference_cost (struct ivopts_data *data,
2648 tree e1, tree e2, bool *symbol_present, bool *var_present,
2649 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2652 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2654 strip_offset (&e1, offset);
2656 strip_offset (&e2, offset);
2659 if (TREE_CODE (e1) == ADDR_EXPR)
2660 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2662 *symbol_present = false;
2664 if (operand_equal_p (e1, e2, 0))
2666 *var_present = false;
2669 *var_present = true;
2671 return force_var_cost (data, e1, depends_on);
2675 cost = force_var_cost (data, e2, depends_on);
2676 cost += multiply_by_cost (-1, mode);
2681 cost = force_var_cost (data, e1, depends_on);
2682 cost += force_var_cost (data, e2, depends_on);
2683 cost += add_cost (mode);
2688 /* Determines the cost of the computation by that USE is expressed
2689 from induction variable CAND. If ADDRESS_P is true, we just need
2690 to create an address from it, otherwise we want to get it into
2691 register. A set of invariants we depend on is stored in
2692 DEPENDS_ON. AT is the statement at that the value is computed. */
2695 get_computation_cost_at (struct ivopts_data *data,
2696 struct iv_use *use, struct iv_cand *cand,
2697 bool address_p, bitmap *depends_on, tree at)
2699 tree ubase = use->iv->base, ustep = use->iv->step;
2701 tree utype = TREE_TYPE (ubase), ctype;
2702 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2703 HOST_WIDE_INT ratio, aratio;
2704 bool var_present, symbol_present;
2705 unsigned cost = 0, n_sums;
2709 /* Only consider real candidates. */
2713 cbase = cand->iv->base;
2714 cstep = cand->iv->step;
2715 ctype = TREE_TYPE (cbase);
2717 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2719 /* We do not have a precision to express the values of use. */
2723 if (!cst_and_fits_in_hwi (ustep)
2724 || !cst_and_fits_in_hwi (cstep))
2727 if (TREE_CODE (ubase) == INTEGER_CST
2728 && !cst_and_fits_in_hwi (ubase))
2731 if (TREE_CODE (cbase) == INTEGER_CST
2732 && !cst_and_fits_in_hwi (cbase))
2735 ustepi = int_cst_value (ustep);
2736 cstepi = int_cst_value (cstep);
2738 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2740 /* TODO -- add direct handling of this case. */
2744 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2747 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2748 or ratio == 1, it is better to handle this like
2750 ubase - ratio * cbase + ratio * var
2752 (also holds in the case ratio == -1, TODO. */
2754 if (TREE_CODE (cbase) == INTEGER_CST)
2756 offset = - ratio * int_cst_value (cbase);
2757 cost += difference_cost (data,
2758 ubase, integer_zero_node,
2759 &symbol_present, &var_present, &offset,
2762 else if (ratio == 1)
2764 cost += difference_cost (data,
2766 &symbol_present, &var_present, &offset,
2771 cost += force_var_cost (data, cbase, depends_on);
2772 cost += add_cost (TYPE_MODE (ctype));
2773 cost += difference_cost (data,
2774 ubase, integer_zero_node,
2775 &symbol_present, &var_present, &offset,
2779 /* If we are after the increment, the value of the candidate is higher by
2781 if (stmt_after_increment (data->current_loop, cand, at))
2782 offset -= ratio * cstepi;
2784 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2785 (symbol/var/const parts may be omitted). If we are looking for an address,
2786 find the cost of addressing this. */
2788 return get_address_cost (symbol_present, var_present, offset, ratio);
2790 /* Otherwise estimate the costs for computing the expression. */
2791 aratio = ratio > 0 ? ratio : -ratio;
2792 if (!symbol_present && !var_present && !offset)
2795 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2801 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2805 /* Symbol + offset should be compile-time computable. */
2806 && (symbol_present || offset))
2809 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2813 /* Just get the expression, expand it and measure the cost. */
2814 tree comp = get_computation_at (data->current_loop, use, cand, at);
2820 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2822 return computation_cost (comp);
2826 /* Determines the cost of the computation by that USE is expressed
2827 from induction variable CAND. If ADDRESS_P is true, we just need
2828 to create an address from it, otherwise we want to get it into
2829 register. A set of invariants we depend on is stored in
2833 get_computation_cost (struct ivopts_data *data,
2834 struct iv_use *use, struct iv_cand *cand,
2835 bool address_p, bitmap *depends_on)
2837 return get_computation_cost_at (data,
2838 use, cand, address_p, depends_on, use->stmt);
2841 /* Determines cost of basing replacement of USE on CAND in a generic
2845 determine_use_iv_cost_generic (struct ivopts_data *data,
2846 struct iv_use *use, struct iv_cand *cand)
2849 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2851 set_use_iv_cost (data, use, cand, cost, depends_on);
2854 /* Determines cost of basing replacement of USE on CAND in an address. */
2857 determine_use_iv_cost_address (struct ivopts_data *data,
2858 struct iv_use *use, struct iv_cand *cand)
2861 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2863 set_use_iv_cost (data, use, cand, cost, depends_on);
2866 /* Computes value of induction variable IV in iteration NITER. */
2869 iv_value (struct iv *iv, tree niter)
2872 tree type = TREE_TYPE (iv->base);
2874 niter = fold_convert (type, niter);
2875 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
2877 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2880 /* Computes value of candidate CAND at position AT in iteration NITER. */
2883 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2885 tree val = iv_value (cand->iv, niter);
2886 tree type = TREE_TYPE (cand->iv->base);
2888 if (stmt_after_increment (loop, cand, at))
2889 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
2894 /* Check whether it is possible to express the condition in USE by comparison
2895 of candidate CAND. If so, store the comparison code to COMPARE and the
2896 value compared with to BOUND. */
2899 may_eliminate_iv (struct loop *loop,
2900 struct iv_use *use, struct iv_cand *cand,
2901 enum tree_code *compare, tree *bound)
2904 struct tree_niter_desc *niter, new_niter;
2905 tree wider_type, type, base;
2907 /* For now just very primitive -- we work just for the single exit condition,
2908 and are quite conservative about the possible overflows. TODO -- both of
2909 these can be improved. */
2910 exit = single_dom_exit (loop);
2913 if (use->stmt != last_stmt (exit->src))
2916 niter = &loop_data (loop)->niter;
2918 || !integer_nonzerop (niter->assumptions)
2919 || !integer_zerop (niter->may_be_zero))
2922 if (exit->flags & EDGE_TRUE_VALUE)
2927 *bound = cand_value_at (loop, cand, use->stmt, niter->niter);
2929 /* Let us check there is not some problem with overflows, by checking that
2930 the number of iterations is unchanged. */
2931 base = cand->iv->base;
2932 type = TREE_TYPE (base);
2933 if (stmt_after_increment (loop, cand, use->stmt))
2934 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
2936 new_niter.niter = NULL_TREE;
2937 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
2938 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
2940 if (!new_niter.niter
2941 || !integer_nonzerop (new_niter.assumptions)
2942 || !integer_zerop (new_niter.may_be_zero))
2945 wider_type = TREE_TYPE (new_niter.niter);
2946 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter->niter)))
2947 wider_type = TREE_TYPE (niter->niter);
2948 if (!operand_equal_p (fold_convert (wider_type, niter->niter),
2949 fold_convert (wider_type, new_niter.niter), 0))
2955 /* Determines cost of basing replacement of USE on CAND in a condition. */
2958 determine_use_iv_cost_condition (struct ivopts_data *data,
2959 struct iv_use *use, struct iv_cand *cand)
2962 enum tree_code compare;
2964 /* Only consider real candidates. */
2967 set_use_iv_cost (data, use, cand, INFTY, NULL);
2971 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
2973 bitmap depends_on = NULL;
2974 unsigned cost = force_var_cost (data, bound, &depends_on);
2976 set_use_iv_cost (data, use, cand, cost, depends_on);
2980 /* The induction variable elimination failed; just express the original
2981 giv. If it is compared with an invariant, note that we cannot get
2983 if (TREE_CODE (*use->op_p) == SSA_NAME)
2984 record_invariant (data, *use->op_p, true);
2987 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
2988 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
2991 determine_use_iv_cost_generic (data, use, cand);
2994 /* Checks whether it is possible to replace the final value of USE by
2995 a direct computation. If so, the formula is stored to *VALUE. */
2998 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3001 struct tree_niter_desc *niter;
3003 exit = single_dom_exit (loop);
3007 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3008 bb_for_stmt (use->stmt)));
3010 niter = &loop_data (loop)->niter;
3012 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3013 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3016 *value = iv_value (use->iv, niter->niter);
3021 /* Determines cost of replacing final value of USE using CAND. */
3024 determine_use_iv_cost_outer (struct ivopts_data *data,
3025 struct iv_use *use, struct iv_cand *cand)
3031 struct loop *loop = data->current_loop;
3035 if (!may_replace_final_value (loop, use, &value))
3037 set_use_iv_cost (data, use, cand, INFTY, NULL);
3042 cost = force_var_cost (data, value, &depends_on);
3044 cost /= AVG_LOOP_NITER (loop);
3046 set_use_iv_cost (data, use, cand, cost, depends_on);
3050 exit = single_dom_exit (loop);
3053 /* If there is just a single exit, we may use value of the candidate
3054 after we take it to determine the value of use. */
3055 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3056 last_stmt (exit->src));
3058 cost /= AVG_LOOP_NITER (loop);
3062 /* Otherwise we just need to compute the iv. */
3063 cost = get_computation_cost (data, use, cand, false, &depends_on);
3066 set_use_iv_cost (data, use, cand, cost, depends_on);
3069 /* Determines cost of basing replacement of USE on CAND. */
3072 determine_use_iv_cost (struct ivopts_data *data,
3073 struct iv_use *use, struct iv_cand *cand)
3077 case USE_NONLINEAR_EXPR:
3078 determine_use_iv_cost_generic (data, use, cand);
3082 determine_use_iv_cost_outer (data, use, cand);
3086 determine_use_iv_cost_address (data, use, cand);
3090 determine_use_iv_cost_condition (data, use, cand);
3098 /* Determines costs of basing the use of the iv on an iv candidate. */
3101 determine_use_iv_costs (struct ivopts_data *data)
3105 struct iv_cand *cand;
3107 data->consider_all_candidates = (n_iv_cands (data)
3108 <= CONSIDER_ALL_CANDIDATES_BOUND);
3110 alloc_use_cost_map (data);
3112 if (!data->consider_all_candidates)
3114 /* Add the important candidate entries. */
3115 for (j = 0; j < n_iv_cands (data); j++)
3117 cand = iv_cand (data, j);
3118 if (!cand->important)
3120 for (i = 0; i < n_iv_uses (data); i++)
3122 use = iv_use (data, i);
3123 determine_use_iv_cost (data, use, cand);
3128 for (i = 0; i < n_iv_uses (data); i++)
3130 use = iv_use (data, i);
3132 if (data->consider_all_candidates)
3134 for (j = 0; j < n_iv_cands (data); j++)
3136 cand = iv_cand (data, j);
3137 determine_use_iv_cost (data, use, cand);
3142 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j,
3144 cand = iv_cand (data, j);
3145 if (!cand->important)
3146 determine_use_iv_cost (data, use, cand);
3151 if (dump_file && (dump_flags & TDF_DETAILS))
3153 fprintf (dump_file, "Use-candidate costs:\n");
3155 for (i = 0; i < n_iv_uses (data); i++)
3157 use = iv_use (data, i);
3159 fprintf (dump_file, "Use %d:\n", i);
3160 fprintf (dump_file, " cand\tcost\tdepends on\n");
3161 for (j = 0; j < use->n_map_members; j++)
3163 if (!use->cost_map[j].cand
3164 || use->cost_map[j].cost == INFTY)
3167 fprintf (dump_file, " %d\t%d\t",
3168 use->cost_map[j].cand->id,
3169 use->cost_map[j].cost);
3170 if (use->cost_map[j].depends_on)
3171 bitmap_print (dump_file,
3172 use->cost_map[j].depends_on, "","");
3173 fprintf (dump_file, "\n");
3176 fprintf (dump_file, "\n");
3178 fprintf (dump_file, "\n");
3182 /* Determines cost of the candidate CAND. */
3185 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3187 unsigned cost_base, cost_step;
3197 /* There are two costs associated with the candidate -- its increment
3198 and its initialization. The second is almost negligible for any loop
3199 that rolls enough, so we take it just very little into account. */
3201 base = cand->iv->base;
3202 cost_base = force_var_cost (data, base, NULL);
3203 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3205 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3207 /* Prefer the original iv unless we may gain something by replacing it. */
3208 if (cand->pos == IP_ORIGINAL)
3211 /* Prefer not to insert statements into latch unless there are some
3212 already (so that we do not create unnecessary jumps). */
3213 if (cand->pos == IP_END)
3215 bb = ip_end_pos (data->current_loop);
3216 last = last_stmt (bb);
3219 || TREE_CODE (last) == LABEL_EXPR)
3224 /* Determines costs of computation of the candidates. */
3227 determine_iv_costs (struct ivopts_data *data)
3231 if (dump_file && (dump_flags & TDF_DETAILS))
3233 fprintf (dump_file, "Candidate costs:\n");
3234 fprintf (dump_file, " cand\tcost\n");
3237 for (i = 0; i < n_iv_cands (data); i++)
3239 struct iv_cand *cand = iv_cand (data, i);
3241 determine_iv_cost (data, cand);
3243 if (dump_file && (dump_flags & TDF_DETAILS))
3244 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3247 if (dump_file && (dump_flags & TDF_DETAILS))
3248 fprintf (dump_file, "\n");
3251 /* Calculates cost for having SIZE induction variables. */
3254 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3256 return global_cost_for_size (size,
3257 loop_data (data->current_loop)->regs_used,
3261 /* For each size of the induction variable set determine the penalty. */
3264 determine_set_costs (struct ivopts_data *data)
3268 struct loop *loop = data->current_loop;
3270 /* We use the following model (definitely improvable, especially the
3271 cost function -- TODO):
3273 We estimate the number of registers available (using MD data), name it A.
3275 We estimate the number of registers used by the loop, name it U. This
3276 number is obtained as the number of loop phi nodes (not counting virtual
3277 registers and bivs) + the number of variables from outside of the loop.
3279 We set a reserve R (free regs that are used for temporary computations,
3280 etc.). For now the reserve is a constant 3.
3282 Let I be the number of induction variables.
3284 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3285 make a lot of ivs without a reason).
3286 -- if A - R < U + I <= A, the cost is I * PRES_COST
3287 -- if U + I > A, the cost is I * PRES_COST and
3288 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3290 if (dump_file && (dump_flags & TDF_DETAILS))
3292 fprintf (dump_file, "Global costs:\n");
3293 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3294 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3295 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3296 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3300 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3302 op = PHI_RESULT (phi);
3304 if (!is_gimple_reg (op))
3307 if (get_iv (data, op))
3313 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
3315 struct version_info *info = ver_info (data, j);
3317 if (info->inv_id && info->has_nonlin_use)
3321 loop_data (loop)->regs_used = n;
3322 if (dump_file && (dump_flags & TDF_DETAILS))
3323 fprintf (dump_file, " regs_used %d\n", n);
3325 if (dump_file && (dump_flags & TDF_DETAILS))
3327 fprintf (dump_file, " cost for size:\n");
3328 fprintf (dump_file, " ivs\tcost\n");
3329 for (j = 0; j <= 2 * target_avail_regs; j++)
3330 fprintf (dump_file, " %d\t%d\n", j,
3331 ivopts_global_cost_for_size (data, j));
3332 fprintf (dump_file, "\n");
3336 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3337 taken from the set SOL and they may depend on invariants in the set INV.
3338 The really used candidate and invariants are noted to USED_IVS and
3342 find_best_candidate (struct ivopts_data *data,
3343 struct iv_use *use, bitmap sol, bitmap inv,
3344 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3347 unsigned best_cost = INFTY, cost;
3348 struct iv_cand *cnd = NULL, *acnd;
3349 bitmap depends_on = NULL, asol;
3351 if (data->consider_all_candidates)
3355 asol = BITMAP_XMALLOC ();
3356 bitmap_a_and_b (asol, sol, use->related_cands);
3359 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c,
3361 acnd = iv_cand (data, c);
3362 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3366 if (cost > best_cost)
3368 if (cost == best_cost)
3370 /* Prefer the cheaper iv. */
3371 if (acnd->cost >= cnd->cost)
3377 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d,
3380 bitmap_a_or_b (used_inv, used_inv, depends_on);
3388 if (cnd && used_ivs)
3389 bitmap_set_bit (used_ivs, cnd->id);
3394 if (!data->consider_all_candidates)
3395 BITMAP_XFREE (asol);
3400 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3401 induction variable candidates and invariants from the sets. Only
3402 uses 0 .. MAX_USE - 1 are taken into account. */
3405 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3409 unsigned cost = 0, size = 0, acost;
3411 struct iv_cand *cand;
3412 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3414 for (i = 0; i < max_use; i++)
3416 use = iv_use (data, i);
3417 acost = find_best_candidate (data, use, sol, inv,
3418 used_ivs, used_inv, NULL);
3421 BITMAP_XFREE (used_ivs);
3422 BITMAP_XFREE (used_inv);
3428 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i,
3430 cand = iv_cand (data, i);
3432 /* Do not count the pseudocandidates. */
3438 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, size++);
3439 cost += ivopts_global_cost_for_size (data, size);
3441 bitmap_copy (sol, used_ivs);
3442 bitmap_copy (inv, used_inv);
3444 BITMAP_XFREE (used_ivs);
3445 BITMAP_XFREE (used_inv);
3450 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3451 induction variable candidates and invariants from the sets. */
3454 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3456 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3459 /* Tries to extend the sets IVS and INV in the best possible way in order
3460 to express the USE. */
3463 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3466 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3467 bitmap best_ivs = BITMAP_XMALLOC ();
3468 bitmap best_inv = BITMAP_XMALLOC ();
3469 bitmap act_ivs = BITMAP_XMALLOC ();
3470 bitmap act_inv = BITMAP_XMALLOC ();
3472 struct cost_pair *cp;
3474 bitmap_copy (best_ivs, ivs);
3475 bitmap_copy (best_inv, inv);
3477 for (i = 0; i < use->n_map_members; i++)
3479 cp = use->cost_map + i;
3480 if (cp->cost == INFTY)
3483 bitmap_copy (act_ivs, ivs);
3484 bitmap_set_bit (act_ivs, cp->cand->id);
3486 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3488 bitmap_copy (act_inv, inv);
3489 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3491 if (act_cost < best_cost)
3493 best_cost = act_cost;
3494 bitmap_copy (best_ivs, act_ivs);
3495 bitmap_copy (best_inv, act_inv);
3499 bitmap_copy (ivs, best_ivs);
3500 bitmap_copy (inv, best_inv);
3502 BITMAP_XFREE (best_ivs);
3503 BITMAP_XFREE (best_inv);
3504 BITMAP_XFREE (act_ivs);
3505 BITMAP_XFREE (act_inv);
3507 return (best_cost != INFTY);
3510 /* Finds an initial set of IVS and invariants INV. We do this by simply
3511 choosing the best candidate for each use. */
3514 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3518 for (i = 0; i < n_iv_uses (data); i++)
3519 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3522 return set_cost (data, ivs, inv);
3525 /* Tries to improve set of induction variables IVS and invariants INV to get
3526 it better than COST. */
3529 try_improve_iv_set (struct ivopts_data *data,
3530 bitmap ivs, bitmap inv, unsigned *cost)
3533 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3534 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3536 /* Try altering the set of induction variables by one. */
3537 for (i = 0; i < n_iv_cands (data); i++)
3539 bitmap_copy (new_ivs, ivs);
3540 bitmap_copy (new_inv, inv);
3542 if (bitmap_bit_p (ivs, i))
3543 bitmap_clear_bit (new_ivs, i);
3545 bitmap_set_bit (new_ivs, i);
3547 acost = set_cost (data, new_ivs, new_inv);
3553 best_new_ivs = BITMAP_XMALLOC ();
3554 best_new_inv = BITMAP_XMALLOC ();
3558 bitmap_copy (best_new_ivs, new_ivs);
3559 bitmap_copy (best_new_inv, new_inv);
3562 /* Ditto for invariants. */
3563 for (i = 1; i <= data->max_inv_id; i++)
3565 if (ver_info (data, i)->has_nonlin_use)
3568 bitmap_copy (new_ivs, ivs);
3569 bitmap_copy (new_inv, inv);
3571 if (bitmap_bit_p (inv, i))
3572 bitmap_clear_bit (new_inv, i);
3574 bitmap_set_bit (new_inv, i);
3576 acost = set_cost (data, new_ivs, new_inv);
3582 best_new_ivs = BITMAP_XMALLOC ();
3583 best_new_inv = BITMAP_XMALLOC ();
3587 bitmap_copy (best_new_ivs, new_ivs);
3588 bitmap_copy (best_new_inv, new_inv);
3591 BITMAP_XFREE (new_ivs);
3592 BITMAP_XFREE (new_inv);
3597 bitmap_copy (ivs, best_new_ivs);
3598 bitmap_copy (inv, best_new_inv);
3599 BITMAP_XFREE (best_new_ivs);
3600 BITMAP_XFREE (best_new_inv);
3604 /* Attempts to find the optimal set of induction variables. We do simple
3605 greedy heuristic -- we try to replace at most one candidate in the selected
3606 solution and remove the unused ivs while this improves the cost. */
3609 find_optimal_iv_set (struct ivopts_data *data)
3612 bitmap set = BITMAP_XMALLOC ();
3613 bitmap inv = BITMAP_XMALLOC ();
3616 /* Set the upper bound. */
3617 cost = get_initial_solution (data, set, inv);
3620 if (dump_file && (dump_flags & TDF_DETAILS))
3621 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3627 if (dump_file && (dump_flags & TDF_DETAILS))
3629 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3630 bitmap_print (dump_file, set, "", "");
3631 fprintf (dump_file, " invariants ");
3632 bitmap_print (dump_file, inv, "", "");
3633 fprintf (dump_file, "\n");
3636 while (try_improve_iv_set (data, set, inv, &cost))
3638 if (dump_file && (dump_flags & TDF_DETAILS))
3640 fprintf (dump_file, "Improved to (cost %d): ", cost);
3641 bitmap_print (dump_file, set, "", "");
3642 fprintf (dump_file, " invariants ");
3643 bitmap_print (dump_file, inv, "", "");
3644 fprintf (dump_file, "\n");
3648 if (dump_file && (dump_flags & TDF_DETAILS))
3649 fprintf (dump_file, "Final cost %d\n\n", cost);
3651 for (i = 0; i < n_iv_uses (data); i++)
3653 use = iv_use (data, i);
3654 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3662 /* Creates a new induction variable corresponding to CAND. */
3665 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3667 block_stmt_iterator incr_pos;
3677 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3681 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3686 /* Mark that the iv is preserved. */
3687 name_info (data, cand->var_before)->preserve_biv = true;
3688 name_info (data, cand->var_after)->preserve_biv = true;
3690 /* Rewrite the increment so that it uses var_before directly. */
3691 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3696 gimple_add_tmp_var (cand->var_before);
3697 add_referenced_tmp_var (cand->var_before);
3699 base = unshare_expr (cand->iv->base);
3701 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3702 &incr_pos, after, &cand->var_before, &cand->var_after);
3705 /* Creates new induction variables described in SET. */
3708 create_new_ivs (struct ivopts_data *data, bitmap set)
3711 struct iv_cand *cand;
3713 EXECUTE_IF_SET_IN_BITMAP (set, 0, i,
3715 cand = iv_cand (data, i);
3716 create_new_iv (data, cand);
3720 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3721 is true, remove also the ssa name defined by the statement. */
3724 remove_statement (tree stmt, bool including_defined_name)
3726 if (TREE_CODE (stmt) == PHI_NODE)
3728 if (!including_defined_name)
3730 /* Prevent the ssa name defined by the statement from being removed. */
3731 SET_PHI_RESULT (stmt, NULL);
3733 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3737 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3743 /* Rewrites USE (definition of iv used in a nonlinear expression)
3744 using candidate CAND. */
3747 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3748 struct iv_use *use, struct iv_cand *cand)
3750 tree comp = unshare_expr (get_computation (data->current_loop,
3752 tree op, stmts, tgt, ass;
3753 block_stmt_iterator bsi, pbsi;
3755 switch (TREE_CODE (use->stmt))
3758 tgt = PHI_RESULT (use->stmt);
3760 /* If we should keep the biv, do not replace it. */
3761 if (name_info (data, tgt)->preserve_biv)
3764 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3765 while (!bsi_end_p (pbsi)
3766 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3774 tgt = TREE_OPERAND (use->stmt, 0);
3775 bsi = stmt_for_bsi (use->stmt);
3782 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3784 if (TREE_CODE (use->stmt) == PHI_NODE)
3787 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3788 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3789 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3790 remove_statement (use->stmt, false);
3791 SSA_NAME_DEF_STMT (tgt) = ass;
3796 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3797 TREE_OPERAND (use->stmt, 1) = op;
3801 /* Replaces ssa name in index IDX by its basic variable. Callback for
3805 idx_remove_ssa_names (tree base ATTRIBUTE_UNUSED, tree *idx,
3806 void *data ATTRIBUTE_UNUSED)
3808 if (TREE_CODE (*idx) == SSA_NAME)
3809 *idx = SSA_NAME_VAR (*idx);
3813 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3816 unshare_and_remove_ssa_names (tree ref)
3818 ref = unshare_expr (ref);
3819 for_each_index (&ref, idx_remove_ssa_names, NULL);
3824 /* Rewrites base of memory access OP with expression WITH in statement
3825 pointed to by BSI. */
3828 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3830 tree var = get_base_address (*op), new_var, new_name, copy, name;
3833 if (!var || TREE_CODE (with) != SSA_NAME)
3836 if (TREE_CODE (var) == INDIRECT_REF)
3837 var = TREE_OPERAND (var, 0);
3838 if (TREE_CODE (var) == SSA_NAME)
3841 var = SSA_NAME_VAR (var);
3843 else if (DECL_P (var))
3848 if (var_ann (var)->type_mem_tag)
3849 var = var_ann (var)->type_mem_tag;
3851 /* We need to add a memory tag for the variable. But we do not want
3852 to add it to the temporary used for the computations, since this leads
3853 to problems in redundancy elimination when there are common parts
3854 in two computations referring to the different arrays. So we copy
3855 the variable to a new temporary. */
3856 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
3858 new_name = duplicate_ssa_name (name, copy);
3861 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
3862 add_referenced_tmp_var (new_var);
3863 var_ann (new_var)->type_mem_tag = var;
3864 new_name = make_ssa_name (new_var, copy);
3866 TREE_OPERAND (copy, 0) = new_name;
3867 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
3873 if (TREE_CODE (*op) == INDIRECT_REF)
3874 orig = REF_ORIGINAL (*op);
3876 orig = unshare_and_remove_ssa_names (*op);
3878 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
3879 /* Record the original reference, for purposes of alias analysis. */
3880 REF_ORIGINAL (*op) = orig;
3883 /* Rewrites USE (address that is an iv) using candidate CAND. */
3886 rewrite_use_address (struct ivopts_data *data,
3887 struct iv_use *use, struct iv_cand *cand)
3889 tree comp = unshare_expr (get_computation (data->current_loop,
3891 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3893 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
3896 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3898 rewrite_address_base (&bsi, use->op_p, op);
3901 /* Rewrites USE (the condition such that one of the arguments is an iv) using
3905 rewrite_use_compare (struct ivopts_data *data,
3906 struct iv_use *use, struct iv_cand *cand)
3909 tree *op_p, cond, op, stmts, bound;
3910 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3911 enum tree_code compare;
3913 if (may_eliminate_iv (data->current_loop,
3914 use, cand, &compare, &bound))
3916 op = force_gimple_operand (unshare_expr (bound), &stmts,
3920 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3922 *use->op_p = build2 (compare, boolean_type_node,
3923 var_at_stmt (data->current_loop,
3924 cand, use->stmt), op);
3925 modify_stmt (use->stmt);
3929 /* The induction variable elimination failed; just express the original
3931 comp = unshare_expr (get_computation (data->current_loop, use, cand));
3934 op_p = &TREE_OPERAND (cond, 0);
3935 if (TREE_CODE (*op_p) != SSA_NAME
3936 || zero_p (get_iv (data, *op_p)->step))
3937 op_p = &TREE_OPERAND (cond, 1);
3939 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
3941 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3946 /* Ensure that operand *OP_P may be used at the end of EXIT without
3947 violating loop closed ssa form. */
3950 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
3953 struct loop *def_loop;
3956 use = USE_FROM_PTR (op_p);
3957 if (TREE_CODE (use) != SSA_NAME)
3960 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
3964 def_loop = def_bb->loop_father;
3965 if (flow_bb_inside_loop_p (def_loop, exit->dest))
3968 /* Try finding a phi node that copies the value out of the loop. */
3969 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
3970 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
3975 /* Create such a phi node. */
3976 tree new_name = duplicate_ssa_name (use, NULL);
3978 phi = create_phi_node (new_name, exit->dest);
3979 SSA_NAME_DEF_STMT (new_name) = phi;
3980 add_phi_arg (&phi, use, exit);
3983 SET_USE (op_p, PHI_RESULT (phi));
3986 /* Ensure that operands of STMT may be used at the end of EXIT without
3987 violating loop closed ssa form. */
3990 protect_loop_closed_ssa_form (edge exit, tree stmt)
3994 v_may_def_optype v_may_defs;
3997 get_stmt_operands (stmt);
3999 uses = STMT_USE_OPS (stmt);
4000 for (i = 0; i < NUM_USES (uses); i++)
4001 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4003 vuses = STMT_VUSE_OPS (stmt);
4004 for (i = 0; i < NUM_VUSES (vuses); i++)
4005 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4007 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4008 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4009 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4012 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4013 so that they are emitted on the correct place, and so that the loop closed
4014 ssa form is preserved. */
4017 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4019 tree_stmt_iterator tsi;
4020 block_stmt_iterator bsi;
4021 tree phi, stmt, def, next;
4023 if (exit->dest->pred->pred_next)
4024 split_loop_exit_edge (exit);
4026 if (TREE_CODE (stmts) == STATEMENT_LIST)
4028 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4029 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4032 protect_loop_closed_ssa_form (exit, stmts);
4034 /* Ensure there is label in exit->dest, so that we can
4036 tree_block_label (exit->dest);
4037 bsi = bsi_after_labels (exit->dest);
4038 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4043 for (phi = phi_nodes (exit->dest); phi; phi = next)
4045 next = TREE_CHAIN (phi);
4047 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4049 def = PHI_RESULT (phi);
4050 remove_statement (phi, false);
4051 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4053 SSA_NAME_DEF_STMT (def) = stmt;
4054 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4059 /* Rewrites the final value of USE (that is only needed outside of the loop)
4060 using candidate CAND. */
4063 rewrite_use_outer (struct ivopts_data *data,
4064 struct iv_use *use, struct iv_cand *cand)
4067 tree value, op, stmts, tgt;
4070 switch (TREE_CODE (use->stmt))
4073 tgt = PHI_RESULT (use->stmt);
4076 tgt = TREE_OPERAND (use->stmt, 0);
4082 exit = single_dom_exit (data->current_loop);
4088 bool ok = may_replace_final_value (data->current_loop, use, &value);
4092 value = get_computation_at (data->current_loop,
4093 use, cand, last_stmt (exit->src));
4095 value = unshare_expr (value);
4096 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4098 /* If we will preserve the iv anyway and we would need to perform
4099 some computation to replace the final value, do nothing. */
4100 if (stmts && name_info (data, tgt)->preserve_biv)
4103 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4105 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4107 if (USE_FROM_PTR (use_p) == tgt)
4108 SET_USE (use_p, op);
4112 compute_phi_arg_on_exit (exit, stmts, op);
4114 /* Enable removal of the statement. We cannot remove it directly,
4115 since we may still need the aliasing information attached to the
4116 ssa name defined by it. */
4117 name_info (data, tgt)->iv->have_use_for = false;
4121 /* If the variable is going to be preserved anyway, there is nothing to
4123 if (name_info (data, tgt)->preserve_biv)
4126 /* Otherwise we just need to compute the iv. */
4127 rewrite_use_nonlinear_expr (data, use, cand);
4130 /* Rewrites USE using candidate CAND. */
4133 rewrite_use (struct ivopts_data *data,
4134 struct iv_use *use, struct iv_cand *cand)
4138 case USE_NONLINEAR_EXPR:
4139 rewrite_use_nonlinear_expr (data, use, cand);
4143 rewrite_use_outer (data, use, cand);
4147 rewrite_use_address (data, use, cand);
4151 rewrite_use_compare (data, use, cand);
4157 modify_stmt (use->stmt);
4160 /* Rewrite the uses using the selected induction variables. */
4163 rewrite_uses (struct ivopts_data *data)
4166 struct iv_cand *cand;
4169 for (i = 0; i < n_iv_uses (data); i++)
4171 use = iv_use (data, i);
4172 cand = use->selected;
4175 rewrite_use (data, use, cand);
4179 /* Removes the ivs that are not used after rewriting. */
4182 remove_unused_ivs (struct ivopts_data *data)
4186 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
4188 struct version_info *info;
4190 info = ver_info (data, j);
4192 && !zero_p (info->iv->step)
4194 && !info->iv->have_use_for
4195 && !info->preserve_biv)
4196 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4200 /* Frees data allocated by the optimization of a single loop. */
4203 free_loop_data (struct ivopts_data *data)
4207 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
4209 struct version_info *info;
4211 info = ver_info (data, i);
4215 info->has_nonlin_use = false;
4216 info->preserve_biv = false;
4219 bitmap_clear (data->relevant);
4221 for (i = 0; i < n_iv_uses (data); i++)
4223 struct iv_use *use = iv_use (data, i);
4226 BITMAP_XFREE (use->related_cands);
4227 for (j = 0; j < use->n_map_members; j++)
4228 if (use->cost_map[j].depends_on)
4229 BITMAP_XFREE (use->cost_map[j].depends_on);
4230 free (use->cost_map);
4233 VARRAY_POP_ALL (data->iv_uses);
4235 for (i = 0; i < n_iv_cands (data); i++)
4237 struct iv_cand *cand = iv_cand (data, i);
4243 VARRAY_POP_ALL (data->iv_candidates);
4245 if (data->version_info_size < num_ssa_names)
4247 data->version_info_size = 2 * num_ssa_names;
4248 free (data->version_info);
4249 data->version_info = xcalloc (data->version_info_size,
4250 sizeof (struct version_info));
4253 data->max_inv_id = 0;
4255 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4257 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4259 SET_DECL_RTL (obj, NULL_RTX);
4261 VARRAY_POP_ALL (decl_rtl_to_reset);
4264 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4268 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4272 for (i = 1; i < loops->num; i++)
4273 if (loops->parray[i])
4275 free (loops->parray[i]->aux);
4276 loops->parray[i]->aux = NULL;
4279 free_loop_data (data);
4280 free (data->version_info);
4281 BITMAP_XFREE (data->relevant);
4283 VARRAY_FREE (decl_rtl_to_reset);
4284 VARRAY_FREE (data->iv_uses);
4285 VARRAY_FREE (data->iv_candidates);
4288 /* Optimizes the LOOP. Returns true if anything changed. */
4291 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4293 bool changed = false;
4297 data->current_loop = loop;
4299 if (dump_file && (dump_flags & TDF_DETAILS))
4301 fprintf (dump_file, "Processing loop %d\n", loop->num);
4303 exit = single_dom_exit (loop);
4306 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4307 exit->src->index, exit->dest->index);
4308 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4309 fprintf (dump_file, "\n");
4312 fprintf (dump_file, "\n");
4315 /* For each ssa name determines whether it behaves as an induction variable
4317 if (!find_induction_variables (data))
4320 /* Finds interesting uses (item 1). */
4321 find_interesting_uses (data);
4322 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4325 /* Finds candidates for the induction variables (item 2). */
4326 find_iv_candidates (data);
4328 /* Calculates the costs (item 3, part 1). */
4329 determine_use_iv_costs (data);
4330 determine_iv_costs (data);
4331 determine_set_costs (data);
4333 /* Find the optimal set of induction variables (item 3, part 2). */
4334 iv_set = find_optimal_iv_set (data);
4339 /* Create the new induction variables (item 4, part 1). */
4340 create_new_ivs (data, iv_set);
4342 /* Rewrite the uses (item 4, part 2). */
4343 rewrite_uses (data);
4345 /* Remove the ivs that are unused after rewriting. */
4346 remove_unused_ivs (data);
4348 loop_commit_inserts ();
4350 BITMAP_XFREE (iv_set);
4352 /* We have changed the structure of induction variables; it might happen
4353 that definitions in the scev database refer to some of them that were
4358 free_loop_data (data);
4363 /* Main entry point. Optimizes induction variables in LOOPS. */
4366 tree_ssa_iv_optimize (struct loops *loops)
4369 struct ivopts_data data;
4371 tree_ssa_iv_optimize_init (loops, &data);
4373 /* Optimize the loops starting with the innermost ones. */
4374 loop = loops->tree_root;
4378 #ifdef ENABLE_CHECKING
4379 verify_loop_closed_ssa ();
4383 /* Scan the loops, inner ones first. */
4384 while (loop != loops->tree_root)
4386 if (dump_file && (dump_flags & TDF_DETAILS))
4387 flow_loop_dump (loop, dump_file, NULL, 1);
4388 if (tree_ssa_iv_optimize_loop (&data, loop))
4390 #ifdef ENABLE_CHECKING
4391 verify_loop_closed_ssa ();
4406 tree_ssa_iv_optimize_finalize (loops, &data);