1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
99 /* Just to shorten the ugly names. */
100 #define EXEC_BINARY nondestructive_fold_binary_to_constant
101 #define EXEC_UNARY nondestructive_fold_unary_to_constant
103 /* Representation of the induction variable. */
106 tree base; /* Initial value of the iv. */
107 tree step; /* Step of the iv (constant only). */
108 tree ssa_name; /* The ssa name with the value. */
109 bool biv_p; /* Is it a biv? */
110 bool have_use_for; /* Do we already have a use for it? */
111 unsigned use_id; /* The identifier in the use if it is the case. */
114 /* Per-ssa version information (induction variable descriptions, etc.). */
117 tree name; /* The ssa name. */
118 struct iv *iv; /* Induction variable description. */
119 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
120 an expression that is not an induction variable. */
121 unsigned inv_id; /* Id of an invariant. */
122 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 /* Information attached to loop. */
128 struct tree_niter_desc niter;
129 /* Number of iterations. */
131 unsigned regs_used; /* Number of registers used. */
137 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
138 USE_OUTER, /* The induction variable is used outside the loop. */
139 USE_ADDRESS, /* Use in an address. */
140 USE_COMPARE /* Use is a compare. */
143 /* The candidate - cost pair. */
146 struct iv_cand *cand; /* The candidate. */
147 unsigned cost; /* The cost. */
148 bitmap depends_on; /* The list of invariants that have to be
155 unsigned id; /* The id of the use. */
156 enum use_type type; /* Type of the use. */
157 struct iv *iv; /* The induction variable it is based on. */
158 tree stmt; /* Statement in that it occurs. */
159 tree *op_p; /* The place where it occurs. */
160 bitmap related_cands; /* The set of "related" iv candidates. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
196 /* The data used by the induction variable optimizations. */
200 /* The currently optimized loop. */
201 struct loop *current_loop;
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
212 /* The maximum invariant id. */
215 /* The uses of induction variables. */
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* Whether to consider just related and important candidates when replacing a
223 bool consider_all_candidates;
226 /* Bound on number of candidates below that all candidates are considered. */
228 #define CONSIDER_ALL_CANDIDATES_BOUND \
229 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
231 /* If there are more iv occurrences, we just give up (it is quite unlikely that
232 optimizing such a loop would help, and it would take ages). */
234 #define MAX_CONSIDERED_USES \
235 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
237 /* The list of trees for that the decl_rtl field must be reset is stored
240 static varray_type decl_rtl_to_reset;
242 /* Number of uses recorded in DATA. */
244 static inline unsigned
245 n_iv_uses (struct ivopts_data *data)
247 return VARRAY_ACTIVE_SIZE (data->iv_uses);
250 /* Ith use recorded in DATA. */
252 static inline struct iv_use *
253 iv_use (struct ivopts_data *data, unsigned i)
255 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
258 /* Number of candidates recorded in DATA. */
260 static inline unsigned
261 n_iv_cands (struct ivopts_data *data)
263 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
266 /* Ith candidate recorded in DATA. */
268 static inline struct iv_cand *
269 iv_cand (struct ivopts_data *data, unsigned i)
271 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
274 /* The data for LOOP. */
276 static inline struct loop_data *
277 loop_data (struct loop *loop)
282 /* The single loop exit if it dominates the latch, NULL otherwise. */
285 single_dom_exit (struct loop *loop)
287 edge exit = loop->single_exit;
292 if (!just_once_each_iteration_p (loop, exit->src))
298 /* Dumps information about the induction variable IV to FILE. */
300 extern void dump_iv (FILE *, struct iv *);
302 dump_iv (FILE *file, struct iv *iv)
304 fprintf (file, "ssa name ");
305 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
306 fprintf (file, "\n");
308 fprintf (file, " type ");
309 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
310 fprintf (file, "\n");
314 fprintf (file, " base ");
315 print_generic_expr (file, iv->base, TDF_SLIM);
316 fprintf (file, "\n");
318 fprintf (file, " step ");
319 print_generic_expr (file, iv->step, TDF_SLIM);
320 fprintf (file, "\n");
324 fprintf (file, " invariant ");
325 print_generic_expr (file, iv->base, TDF_SLIM);
326 fprintf (file, "\n");
330 fprintf (file, " is a biv\n");
333 /* Dumps information about the USE to FILE. */
335 extern void dump_use (FILE *, struct iv_use *);
337 dump_use (FILE *file, struct iv_use *use)
339 struct iv *iv = use->iv;
341 fprintf (file, "use %d\n", use->id);
345 case USE_NONLINEAR_EXPR:
346 fprintf (file, " generic\n");
350 fprintf (file, " outside\n");
354 fprintf (file, " address\n");
358 fprintf (file, " compare\n");
365 fprintf (file, " in statement ");
366 print_generic_expr (file, use->stmt, TDF_SLIM);
367 fprintf (file, "\n");
369 fprintf (file, " at position ");
371 print_generic_expr (file, *use->op_p, TDF_SLIM);
372 fprintf (file, "\n");
374 fprintf (file, " type ");
375 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
376 fprintf (file, "\n");
380 fprintf (file, " base ");
381 print_generic_expr (file, iv->base, TDF_SLIM);
382 fprintf (file, "\n");
384 fprintf (file, " step ");
385 print_generic_expr (file, iv->step, TDF_SLIM);
386 fprintf (file, "\n");
390 fprintf (file, " invariant ");
391 print_generic_expr (file, iv->base, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " related candidates ");
396 dump_bitmap (file, use->related_cands);
399 /* Dumps information about the uses to FILE. */
401 extern void dump_uses (FILE *, struct ivopts_data *);
403 dump_uses (FILE *file, struct ivopts_data *data)
408 for (i = 0; i < n_iv_uses (data); i++)
410 use = iv_use (data, i);
412 dump_use (file, use);
413 fprintf (file, "\n");
417 /* Dumps information about induction variable candidate CAND to FILE. */
419 extern void dump_cand (FILE *, struct iv_cand *);
421 dump_cand (FILE *file, struct iv_cand *cand)
423 struct iv *iv = cand->iv;
425 fprintf (file, "candidate %d%s\n",
426 cand->id, cand->important ? " (important)" : "");
430 fprintf (file, " final value replacement\n");
437 fprintf (file, " incremented before exit test\n");
441 fprintf (file, " incremented at end\n");
445 fprintf (file, " original biv\n");
449 fprintf (file, " type ");
450 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
451 fprintf (file, "\n");
455 fprintf (file, " base ");
456 print_generic_expr (file, iv->base, TDF_SLIM);
457 fprintf (file, "\n");
459 fprintf (file, " step ");
460 print_generic_expr (file, iv->step, TDF_SLIM);
461 fprintf (file, "\n");
465 fprintf (file, " invariant ");
466 print_generic_expr (file, iv->base, TDF_SLIM);
467 fprintf (file, "\n");
471 /* Returns the info for ssa version VER. */
473 static inline struct version_info *
474 ver_info (struct ivopts_data *data, unsigned ver)
476 return data->version_info + ver;
479 /* Returns the info for ssa name NAME. */
481 static inline struct version_info *
482 name_info (struct ivopts_data *data, tree name)
484 return ver_info (data, SSA_NAME_VERSION (name));
487 /* Checks whether there exists number X such that X * B = A, counting modulo
491 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
494 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
495 unsigned HOST_WIDE_INT inv, ex, val;
501 /* First divide the whole equation by 2 as long as possible. */
502 while (!(a & 1) && !(b & 1))
512 /* If b is still even, a is odd and there is no such x. */
516 /* Find the inverse of b. We compute it as
517 b^(2^(bits - 1) - 1) (mod 2^bits). */
520 for (i = 0; i < bits - 1; i++)
522 inv = (inv * ex) & mask;
523 ex = (ex * ex) & mask;
526 val = (a * inv) & mask;
528 gcc_assert (((val * b) & mask) == a);
530 if ((val >> (bits - 1)) & 1)
538 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
542 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
544 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
548 if (sbb == loop->latch)
554 return stmt == last_stmt (bb);
557 /* Returns true if STMT if after the place where the original induction
558 variable CAND is incremented. */
561 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
563 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
564 basic_block stmt_bb = bb_for_stmt (stmt);
565 block_stmt_iterator bsi;
567 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
570 if (stmt_bb != cand_bb)
573 /* Scan the block from the end, since the original ivs are usually
574 incremented at the end of the loop body. */
575 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
577 if (bsi_stmt (bsi) == cand->incremented_at)
579 if (bsi_stmt (bsi) == stmt)
584 /* Returns true if STMT if after the place where the induction variable
585 CAND is incremented in LOOP. */
588 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
596 return stmt_after_ip_normal_pos (loop, stmt);
599 return stmt_after_ip_original_pos (cand, stmt);
606 /* Initializes data structures used by the iv optimization pass, stored
607 in DATA. LOOPS is the loop tree. */
610 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
614 data->version_info_size = 2 * num_ssa_names;
615 data->version_info = xcalloc (data->version_info_size,
616 sizeof (struct version_info));
617 data->relevant = BITMAP_XMALLOC ();
618 data->max_inv_id = 0;
620 for (i = 1; i < loops->num; i++)
621 if (loops->parray[i])
622 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
624 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
625 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
626 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
629 /* Allocates an induction variable with given initial value BASE and step STEP
633 alloc_iv (tree base, tree step)
635 struct iv *iv = xcalloc (1, sizeof (struct iv));
637 if (step && integer_zerop (step))
643 iv->have_use_for = false;
645 iv->ssa_name = NULL_TREE;
650 /* Sets STEP and BASE for induction variable IV. */
653 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
655 struct version_info *info = name_info (data, iv);
657 gcc_assert (!info->iv);
659 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
660 info->iv = alloc_iv (base, step);
661 info->iv->ssa_name = iv;
664 /* Finds induction variable declaration for VAR. */
667 get_iv (struct ivopts_data *data, tree var)
671 if (!name_info (data, var)->iv)
673 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
676 || !flow_bb_inside_loop_p (data->current_loop, bb))
677 set_iv (data, var, var, NULL_TREE);
680 return name_info (data, var)->iv;
683 /* Determines the step of a biv defined in PHI. */
686 determine_biv_step (tree phi)
688 struct loop *loop = bb_for_stmt (phi)->loop_father;
689 tree name = PHI_RESULT (phi), base, step;
690 tree type = TREE_TYPE (name);
692 if (!is_gimple_reg (name))
695 if (!simple_iv (loop, phi, name, &base, &step))
699 return build_int_cst (type, 0);
704 /* Returns false if INDEX is a ssa name that occurs in an
705 abnormal phi node. Callback for for_each_index. */
708 idx_contains_abnormal_ssa_name_p (tree base ATTRIBUTE_UNUSED, tree *index,
709 void *data ATTRIBUTE_UNUSED)
711 if (TREE_CODE (*index) != SSA_NAME)
714 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*index) == 0;
717 /* Returns true if EXPR contains a ssa name that occurs in an
718 abnormal phi node. */
721 contains_abnormal_ssa_name_p (tree expr)
723 enum tree_code code = TREE_CODE (expr);
724 enum tree_code_class class = TREE_CODE_CLASS (code);
726 if (code == SSA_NAME)
727 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
729 if (code == INTEGER_CST
730 || is_gimple_min_invariant (expr))
733 if (code == ADDR_EXPR)
734 return !for_each_index (&TREE_OPERAND (expr, 1),
735 idx_contains_abnormal_ssa_name_p,
742 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
747 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
759 /* Finds basic ivs. */
762 find_bivs (struct ivopts_data *data)
764 tree phi, step, type, base;
766 struct loop *loop = data->current_loop;
768 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
770 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
773 step = determine_biv_step (phi);
777 if (cst_and_fits_in_hwi (step)
778 && int_cst_value (step) == 0)
781 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
782 if (contains_abnormal_ssa_name_p (base))
785 type = TREE_TYPE (PHI_RESULT (phi));
786 base = fold_convert (type, base);
787 step = fold_convert (type, step);
789 /* FIXME: We do not handle induction variables whose step does
790 not satisfy cst_and_fits_in_hwi. */
791 if (!cst_and_fits_in_hwi (step))
794 set_iv (data, PHI_RESULT (phi), base, step);
801 /* Marks basic ivs. */
804 mark_bivs (struct ivopts_data *data)
807 struct iv *iv, *incr_iv;
808 struct loop *loop = data->current_loop;
811 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
813 iv = get_iv (data, PHI_RESULT (phi));
817 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
818 incr_iv = get_iv (data, var);
822 /* If the increment is in the subloop, ignore it. */
823 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
824 if (incr_bb->loop_father != data->current_loop
825 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
829 incr_iv->biv_p = true;
833 /* Checks whether STMT defines a linear induction variable and stores its
834 parameters to BASE and STEP. */
837 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
838 tree *base, tree *step)
841 struct loop *loop = data->current_loop;
846 if (TREE_CODE (stmt) != MODIFY_EXPR)
849 lhs = TREE_OPERAND (stmt, 0);
850 if (TREE_CODE (lhs) != SSA_NAME)
853 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
856 /* FIXME: We do not handle induction variables whose step does
857 not satisfy cst_and_fits_in_hwi. */
859 && !cst_and_fits_in_hwi (*step))
862 if (contains_abnormal_ssa_name_p (*base))
868 /* Finds general ivs in statement STMT. */
871 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
875 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
878 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
881 /* Finds general ivs in basic block BB. */
884 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
886 block_stmt_iterator bsi;
888 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
889 find_givs_in_stmt (data, bsi_stmt (bsi));
892 /* Finds general ivs. */
895 find_givs (struct ivopts_data *data)
897 struct loop *loop = data->current_loop;
898 basic_block *body = get_loop_body_in_dom_order (loop);
901 for (i = 0; i < loop->num_nodes; i++)
902 find_givs_in_bb (data, body[i]);
906 /* Determine the number of iterations of the current loop. */
909 determine_number_of_iterations (struct ivopts_data *data)
911 struct loop *loop = data->current_loop;
912 edge exit = single_dom_exit (loop);
917 number_of_iterations_exit (loop, exit, &loop_data (loop)->niter);
920 /* For each ssa name defined in LOOP determines whether it is an induction
921 variable and if so, its initial value and step. */
924 find_induction_variables (struct ivopts_data *data)
927 struct loop *loop = data->current_loop;
929 if (!find_bivs (data))
934 determine_number_of_iterations (data);
936 if (dump_file && (dump_flags & TDF_DETAILS))
938 if (loop_data (loop)->niter.niter)
940 fprintf (dump_file, " number of iterations ");
941 print_generic_expr (dump_file, loop_data (loop)->niter.niter,
943 fprintf (dump_file, "\n");
945 fprintf (dump_file, " may be zero if ");
946 print_generic_expr (dump_file, loop_data (loop)->niter.may_be_zero,
948 fprintf (dump_file, "\n");
950 fprintf (dump_file, " bogus unless ");
951 print_generic_expr (dump_file, loop_data (loop)->niter.assumptions,
953 fprintf (dump_file, "\n");
954 fprintf (dump_file, "\n");
957 fprintf (dump_file, "Induction variables:\n\n");
959 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
961 if (ver_info (data, i)->iv)
962 dump_iv (dump_file, ver_info (data, i)->iv);
969 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
971 static struct iv_use *
972 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
973 tree stmt, enum use_type use_type)
975 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
977 use->id = n_iv_uses (data);
978 use->type = use_type;
982 use->related_cands = BITMAP_XMALLOC ();
984 if (dump_file && (dump_flags & TDF_DETAILS))
985 dump_use (dump_file, use);
987 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
992 /* Checks whether OP is a loop-level invariant and if so, records it.
993 NONLINEAR_USE is true if the invariant is used in a way we do not
997 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1000 struct version_info *info;
1002 if (TREE_CODE (op) != SSA_NAME
1003 || !is_gimple_reg (op))
1006 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1008 && flow_bb_inside_loop_p (data->current_loop, bb))
1011 info = name_info (data, op);
1013 info->has_nonlin_use |= nonlinear_use;
1015 info->inv_id = ++data->max_inv_id;
1016 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1019 /* Checks whether the use OP is interesting and if so, records it
1022 static struct iv_use *
1023 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1031 if (TREE_CODE (op) != SSA_NAME)
1034 iv = get_iv (data, op);
1038 if (iv->have_use_for)
1040 use = iv_use (data, iv->use_id);
1042 gcc_assert (use->type == USE_NONLINEAR_EXPR
1043 || use->type == USE_OUTER);
1045 if (type == USE_NONLINEAR_EXPR)
1046 use->type = USE_NONLINEAR_EXPR;
1050 if (zero_p (iv->step))
1052 record_invariant (data, op, true);
1055 iv->have_use_for = true;
1057 civ = xmalloc (sizeof (struct iv));
1060 stmt = SSA_NAME_DEF_STMT (op);
1061 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1062 || TREE_CODE (stmt) == MODIFY_EXPR);
1064 use = record_use (data, NULL, civ, stmt, type);
1065 iv->use_id = use->id;
1070 /* Checks whether the use OP is interesting and if so, records it. */
1072 static struct iv_use *
1073 find_interesting_uses_op (struct ivopts_data *data, tree op)
1075 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1078 /* Records a definition of induction variable OP that is used outside of the
1081 static struct iv_use *
1082 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1084 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1087 /* Checks whether the condition *COND_P in STMT is interesting
1088 and if so, records it. */
1091 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1095 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1097 tree zero = integer_zero_node;
1099 const_iv.step = NULL_TREE;
1101 if (integer_zerop (*cond_p)
1102 || integer_nonzerop (*cond_p))
1105 if (TREE_CODE (*cond_p) == SSA_NAME)
1112 op0_p = &TREE_OPERAND (*cond_p, 0);
1113 op1_p = &TREE_OPERAND (*cond_p, 1);
1116 if (TREE_CODE (*op0_p) == SSA_NAME)
1117 iv0 = get_iv (data, *op0_p);
1121 if (TREE_CODE (*op1_p) == SSA_NAME)
1122 iv1 = get_iv (data, *op1_p);
1126 if (/* When comparing with non-invariant value, we may not do any senseful
1127 induction variable elimination. */
1129 /* Eliminating condition based on two ivs would be nontrivial.
1130 ??? TODO -- it is not really important to handle this case. */
1131 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1133 find_interesting_uses_op (data, *op0_p);
1134 find_interesting_uses_op (data, *op1_p);
1138 if (zero_p (iv0->step) && zero_p (iv1->step))
1140 /* If both are invariants, this is a work for unswitching. */
1144 civ = xmalloc (sizeof (struct iv));
1145 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1146 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1149 /* Cumulates the steps of indices into DATA and replaces their values with the
1150 initial ones. Returns false when the value of the index cannot be determined.
1151 Callback for for_each_index. */
1153 struct ifs_ivopts_data
1155 struct ivopts_data *ivopts_data;
1161 idx_find_step (tree base, tree *idx, void *data)
1163 struct ifs_ivopts_data *dta = data;
1165 tree step, type, iv_type, iv_step, lbound;
1167 struct loop *loop = dta->ivopts_data->current_loop;
1169 if (TREE_CODE (*idx) != SSA_NAME)
1172 iv = get_iv (dta->ivopts_data, *idx);
1181 iv_type = TREE_TYPE (iv->base);
1182 type = build_pointer_type (TREE_TYPE (base));
1183 if (TREE_CODE (base) == ARRAY_REF)
1185 step = array_ref_element_size (base);
1186 lbound = array_ref_low_bound (base);
1188 /* We only handle addresses whose step is an integer constant. */
1189 if (TREE_CODE (step) != INTEGER_CST)
1192 /* We need the lower bound to be invariant in loop, since otherwise
1193 we are unable to initialize a new induction variable created
1194 in strength reduction -- we need to take the address of the
1195 reference in front of the loop. */
1196 if (is_gimple_min_invariant (lbound))
1197 ; /* Nothing to do. */
1198 else if (TREE_CODE (lbound) != SSA_NAME)
1202 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (lbound));
1204 && flow_bb_inside_loop_p (loop, def_bb))
1209 /* The step for pointer arithmetics already is 1 byte. */
1210 step = build_int_cst (type, 1);
1212 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1213 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1214 type, iv->base, iv->step, dta->stmt);
1216 iv_step = fold_convert (iv_type, iv->step);
1220 /* The index might wrap. */
1224 step = EXEC_BINARY (MULT_EXPR, type, step, iv_step);
1227 *dta->step_p = step;
1229 *dta->step_p = EXEC_BINARY (PLUS_EXPR, type, *dta->step_p, step);
1234 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1235 object is passed to it in DATA. */
1238 idx_record_use (tree base, tree *idx,
1241 find_interesting_uses_op (data, *idx);
1242 if (TREE_CODE (base) == ARRAY_REF)
1244 find_interesting_uses_op (data, array_ref_element_size (base));
1245 find_interesting_uses_op (data, array_ref_low_bound (base));
1250 /* Finds addresses in *OP_P inside STMT. */
1253 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1255 tree base = unshare_expr (*op_p), step = NULL;
1257 struct ifs_ivopts_data ifs_ivopts_data;
1259 /* Ignore bitfields for now. Not really something terribly complicated
1261 if (TREE_CODE (base) == COMPONENT_REF
1262 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1265 ifs_ivopts_data.ivopts_data = data;
1266 ifs_ivopts_data.stmt = stmt;
1267 ifs_ivopts_data.step_p = &step;
1268 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1272 if (TREE_CODE (base) == INDIRECT_REF)
1273 base = TREE_OPERAND (base, 0);
1275 base = build_addr (base);
1277 civ = alloc_iv (base, step);
1278 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1282 for_each_index (op_p, idx_record_use, data);
1285 /* Finds and records invariants used in STMT. */
1288 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1290 use_optype uses = NULL;
1294 if (TREE_CODE (stmt) == PHI_NODE)
1295 n = PHI_NUM_ARGS (stmt);
1298 get_stmt_operands (stmt);
1299 uses = STMT_USE_OPS (stmt);
1300 n = NUM_USES (uses);
1303 for (i = 0; i < n; i++)
1305 if (TREE_CODE (stmt) == PHI_NODE)
1306 op = PHI_ARG_DEF (stmt, i);
1308 op = USE_OP (uses, i);
1310 record_invariant (data, op, false);
1314 /* Finds interesting uses of induction variables in the statement STMT. */
1317 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1321 use_optype uses = NULL;
1324 find_invariants_stmt (data, stmt);
1326 if (TREE_CODE (stmt) == COND_EXPR)
1328 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1332 if (TREE_CODE (stmt) == MODIFY_EXPR)
1334 lhs = TREE_OPERAND (stmt, 0);
1335 rhs = TREE_OPERAND (stmt, 1);
1337 if (TREE_CODE (lhs) == SSA_NAME)
1339 /* If the statement defines an induction variable, the uses are not
1340 interesting by themselves. */
1342 iv = get_iv (data, lhs);
1344 if (iv && !zero_p (iv->step))
1348 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1350 case tcc_comparison:
1351 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1355 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1356 if (REFERENCE_CLASS_P (lhs))
1357 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1363 if (REFERENCE_CLASS_P (lhs)
1364 && is_gimple_val (rhs))
1366 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1367 find_interesting_uses_op (data, rhs);
1371 /* TODO -- we should also handle address uses of type
1373 memory = call (whatever);
1380 if (TREE_CODE (stmt) == PHI_NODE
1381 && bb_for_stmt (stmt) == data->current_loop->header)
1383 lhs = PHI_RESULT (stmt);
1384 iv = get_iv (data, lhs);
1386 if (iv && !zero_p (iv->step))
1390 if (TREE_CODE (stmt) == PHI_NODE)
1391 n = PHI_NUM_ARGS (stmt);
1394 uses = STMT_USE_OPS (stmt);
1395 n = NUM_USES (uses);
1398 for (i = 0; i < n; i++)
1400 if (TREE_CODE (stmt) == PHI_NODE)
1401 op = PHI_ARG_DEF (stmt, i);
1403 op = USE_OP (uses, i);
1405 if (TREE_CODE (op) != SSA_NAME)
1408 iv = get_iv (data, op);
1412 find_interesting_uses_op (data, op);
1416 /* Finds interesting uses of induction variables outside of loops
1417 on loop exit edge EXIT. */
1420 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1424 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
1426 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1427 find_interesting_uses_outer (data, def);
1431 /* Finds uses of the induction variables that are interesting. */
1434 find_interesting_uses (struct ivopts_data *data)
1437 block_stmt_iterator bsi;
1439 basic_block *body = get_loop_body (data->current_loop);
1441 struct version_info *info;
1444 if (dump_file && (dump_flags & TDF_DETAILS))
1445 fprintf (dump_file, "Uses:\n\n");
1447 for (i = 0; i < data->current_loop->num_nodes; i++)
1451 for (e = bb->succ; e; e = e->succ_next)
1452 if (e->dest != EXIT_BLOCK_PTR
1453 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1454 find_interesting_uses_outside (data, e);
1456 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
1457 find_interesting_uses_stmt (data, phi);
1458 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1459 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1462 if (dump_file && (dump_flags & TDF_DETAILS))
1464 fprintf (dump_file, "\n");
1466 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1468 info = ver_info (data, i);
1471 fprintf (dump_file, " ");
1472 print_generic_expr (dump_file, info->name, TDF_SLIM);
1473 fprintf (dump_file, " is invariant (%d)%s\n",
1474 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1478 fprintf (dump_file, "\n");
1484 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1485 position to POS. If USE is not NULL, the candidate is set as related to
1486 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1487 replacement of the final value of the iv by a direct computation. */
1489 static struct iv_cand *
1490 add_candidate_1 (struct ivopts_data *data,
1491 tree base, tree step, bool important, enum iv_position pos,
1492 struct iv_use *use, tree incremented_at)
1495 struct iv_cand *cand = NULL;
1500 type = TREE_TYPE (base);
1501 if (!TYPE_UNSIGNED (type))
1503 type = unsigned_type_for (type);
1504 base = fold_convert (type, base);
1506 step = fold_convert (type, step);
1510 for (i = 0; i < n_iv_cands (data); i++)
1512 cand = iv_cand (data, i);
1514 if (cand->pos != pos)
1517 if (cand->incremented_at != incremented_at)
1531 if (!operand_equal_p (base, cand->iv->base, 0))
1534 if (zero_p (cand->iv->step))
1541 if (step && operand_equal_p (step, cand->iv->step, 0))
1546 if (i == n_iv_cands (data))
1548 cand = xcalloc (1, sizeof (struct iv_cand));
1554 cand->iv = alloc_iv (base, step);
1557 if (pos != IP_ORIGINAL && cand->iv)
1559 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1560 cand->var_after = cand->var_before;
1562 cand->important = important;
1563 cand->incremented_at = incremented_at;
1564 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1566 if (dump_file && (dump_flags & TDF_DETAILS))
1567 dump_cand (dump_file, cand);
1570 if (important && !cand->important)
1572 cand->important = true;
1573 if (dump_file && (dump_flags & TDF_DETAILS))
1574 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1579 bitmap_set_bit (use->related_cands, i);
1580 if (dump_file && (dump_flags & TDF_DETAILS))
1581 fprintf (dump_file, "Candidate %d is related to use %d\n",
1588 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1589 position to POS. If USE is not NULL, the candidate is set as related to
1590 it. The candidate computation is scheduled on all available positions. */
1593 add_candidate (struct ivopts_data *data,
1594 tree base, tree step, bool important, struct iv_use *use)
1596 if (ip_normal_pos (data->current_loop))
1597 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1598 if (ip_end_pos (data->current_loop))
1599 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1602 /* Adds standard iv candidates. */
1605 add_standard_iv_candidates (struct ivopts_data *data)
1607 /* Add 0 + 1 * iteration candidate. */
1608 add_candidate (data,
1609 build_int_cst (unsigned_intSI_type_node, 0),
1610 build_int_cst (unsigned_intSI_type_node, 1),
1613 /* The same for a long type if it is still fast enough. */
1614 if (BITS_PER_WORD > 32)
1615 add_candidate (data,
1616 build_int_cst (unsigned_intDI_type_node, 0),
1617 build_int_cst (unsigned_intDI_type_node, 1),
1622 /* Adds candidates bases on the old induction variable IV. */
1625 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
1628 struct iv_cand *cand;
1630 add_candidate (data, iv->base, iv->step, true, NULL);
1632 /* The same, but with initial value zero. */
1633 add_candidate (data,
1634 build_int_cst (TREE_TYPE (iv->base), 0),
1635 iv->step, true, NULL);
1637 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
1638 if (TREE_CODE (phi) == PHI_NODE)
1640 /* Additionally record the possibility of leaving the original iv
1642 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
1643 cand = add_candidate_1 (data,
1644 iv->base, iv->step, true, IP_ORIGINAL, NULL,
1645 SSA_NAME_DEF_STMT (def));
1646 cand->var_before = iv->ssa_name;
1647 cand->var_after = def;
1651 /* Adds candidates based on the old induction variables. */
1654 add_old_ivs_candidates (struct ivopts_data *data)
1659 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
1661 iv = ver_info (data, i)->iv;
1662 if (iv && iv->biv_p && !zero_p (iv->step))
1663 add_old_iv_candidates (data, iv);
1667 /* Adds candidates based on the value of the induction variable IV and USE. */
1670 add_iv_value_candidates (struct ivopts_data *data,
1671 struct iv *iv, struct iv_use *use)
1673 add_candidate (data, iv->base, iv->step, false, use);
1675 /* The same, but with initial value zero. */
1676 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
1677 iv->step, false, use);
1680 /* Adds candidates based on the address IV and USE. */
1683 add_address_candidates (struct ivopts_data *data,
1684 struct iv *iv, struct iv_use *use)
1686 tree base, abase, tmp, *act;
1688 /* First, the trivial choices. */
1689 add_iv_value_candidates (data, iv, use);
1691 /* Second, try removing the COMPONENT_REFs. */
1692 if (TREE_CODE (iv->base) == ADDR_EXPR)
1694 base = TREE_OPERAND (iv->base, 0);
1695 while (TREE_CODE (base) == COMPONENT_REF
1696 || (TREE_CODE (base) == ARRAY_REF
1697 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
1698 base = TREE_OPERAND (base, 0);
1700 if (base != TREE_OPERAND (iv->base, 0))
1702 if (TREE_CODE (base) == INDIRECT_REF)
1703 base = TREE_OPERAND (base, 0);
1705 base = build_addr (base);
1706 add_candidate (data, base, iv->step, false, use);
1710 /* Third, try removing the constant offset. */
1712 while (TREE_CODE (abase) == PLUS_EXPR
1713 && TREE_CODE (TREE_OPERAND (abase, 1)) != INTEGER_CST)
1714 abase = TREE_OPERAND (abase, 0);
1715 /* We found the offset, so make the copy of the non-shared part and
1717 if (TREE_CODE (abase) == PLUS_EXPR)
1722 for (tmp = iv->base; tmp != abase; tmp = TREE_OPERAND (tmp, 0))
1724 *act = build2 (PLUS_EXPR, TREE_TYPE (tmp),
1725 NULL_TREE, TREE_OPERAND (tmp, 1));
1726 act = &TREE_OPERAND (*act, 0);
1728 *act = TREE_OPERAND (tmp, 0);
1730 add_candidate (data, base, iv->step, false, use);
1734 /* Possibly adds pseudocandidate for replacing the final value of USE by
1735 a direct computation. */
1738 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
1740 struct tree_niter_desc *niter;
1741 struct loop *loop = data->current_loop;
1743 /* We must know where we exit the loop and how many times does it roll. */
1744 if (!single_dom_exit (loop))
1747 niter = &loop_data (loop)->niter;
1749 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
1750 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
1753 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
1756 /* Adds candidates based on the uses. */
1759 add_derived_ivs_candidates (struct ivopts_data *data)
1763 for (i = 0; i < n_iv_uses (data); i++)
1765 struct iv_use *use = iv_use (data, i);
1772 case USE_NONLINEAR_EXPR:
1774 /* Just add the ivs based on the value of the iv used here. */
1775 add_iv_value_candidates (data, use->iv, use);
1779 add_iv_value_candidates (data, use->iv, use);
1781 /* Additionally, add the pseudocandidate for the possibility to
1782 replace the final value by a direct computation. */
1783 add_iv_outer_candidates (data, use);
1787 add_address_candidates (data, use->iv, use);
1796 /* Finds the candidates for the induction variables. */
1799 find_iv_candidates (struct ivopts_data *data)
1801 /* Add commonly used ivs. */
1802 add_standard_iv_candidates (data);
1804 /* Add old induction variables. */
1805 add_old_ivs_candidates (data);
1807 /* Add induction variables derived from uses. */
1808 add_derived_ivs_candidates (data);
1811 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
1812 If consider_all_candidates is true, we use a two-dimensional array, otherwise
1813 we allocate a simple list to every use. */
1816 alloc_use_cost_map (struct ivopts_data *data)
1818 unsigned i, n_imp = 0, size, j;
1820 if (!data->consider_all_candidates)
1822 for (i = 0; i < n_iv_cands (data); i++)
1824 struct iv_cand *cand = iv_cand (data, i);
1825 if (cand->important)
1830 for (i = 0; i < n_iv_uses (data); i++)
1832 struct iv_use *use = iv_use (data, i);
1834 if (data->consider_all_candidates)
1836 size = n_iv_cands (data);
1837 use->n_map_members = size;
1842 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, size++);
1843 use->n_map_members = 0;
1846 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
1850 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
1851 on invariants DEPENDS_ON. */
1854 set_use_iv_cost (struct ivopts_data *data,
1855 struct iv_use *use, struct iv_cand *cand, unsigned cost,
1861 BITMAP_XFREE (depends_on);
1865 if (data->consider_all_candidates)
1867 use->cost_map[cand->id].cand = cand;
1868 use->cost_map[cand->id].cost = cost;
1869 use->cost_map[cand->id].depends_on = depends_on;
1876 use->cost_map[use->n_map_members].cand = cand;
1877 use->cost_map[use->n_map_members].cost = cost;
1878 use->cost_map[use->n_map_members].depends_on = depends_on;
1879 use->n_map_members++;
1882 /* Gets cost of (USE, CANDIDATE) pair. Stores the bitmap of dependencies to
1886 get_use_iv_cost (struct ivopts_data *data,
1887 struct iv_use *use, struct iv_cand *cand, bitmap *depends_on)
1894 if (data->consider_all_candidates)
1898 for (i = 0; i < use->n_map_members; i++)
1899 if (use->cost_map[i].cand == cand)
1902 if (i == use->n_map_members)
1907 *depends_on = use->cost_map[i].depends_on;
1908 return use->cost_map[i].cost;
1911 /* Returns estimate on cost of computing SEQ. */
1919 for (; seq; seq = NEXT_INSN (seq))
1921 set = single_set (seq);
1923 cost += rtx_cost (set, SET);
1931 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
1933 produce_memory_decl_rtl (tree obj, int *regno)
1938 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
1940 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
1941 x = gen_rtx_SYMBOL_REF (Pmode, name);
1944 x = gen_raw_REG (Pmode, (*regno)++);
1946 return gen_rtx_MEM (DECL_MODE (obj), x);
1949 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
1950 walk_tree. DATA contains the actual fake register number. */
1953 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
1955 tree obj = NULL_TREE;
1959 switch (TREE_CODE (*expr_p))
1962 for (expr_p = &TREE_OPERAND (*expr_p, 0);
1963 (handled_component_p (*expr_p)
1964 || TREE_CODE (*expr_p) == REALPART_EXPR
1965 || TREE_CODE (*expr_p) == IMAGPART_EXPR);
1966 expr_p = &TREE_OPERAND (*expr_p, 0));
1969 x = produce_memory_decl_rtl (obj, regno);
1974 obj = SSA_NAME_VAR (*expr_p);
1975 if (!DECL_RTL_SET_P (obj))
1976 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
1985 if (DECL_RTL_SET_P (obj))
1988 if (DECL_MODE (obj) == BLKmode)
1989 x = produce_memory_decl_rtl (obj, regno);
1991 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2001 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2002 SET_DECL_RTL (obj, x);
2008 /* Determines cost of the computation of EXPR. */
2011 computation_cost (tree expr)
2014 tree type = TREE_TYPE (expr);
2018 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2020 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2024 cost = seq_cost (seq);
2025 if (GET_CODE (rslt) == MEM)
2026 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2031 /* Returns variable containing the value of candidate CAND at statement AT. */
2034 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2036 if (stmt_after_increment (loop, cand, stmt))
2037 return cand->var_after;
2039 return cand->var_before;
2042 /* Determines the expression by that USE is expressed from induction variable
2043 CAND at statement AT in LOOP. */
2046 get_computation_at (struct loop *loop,
2047 struct iv_use *use, struct iv_cand *cand, tree at)
2049 tree ubase = use->iv->base;
2050 tree ustep = use->iv->step;
2051 tree cbase = cand->iv->base;
2052 tree cstep = cand->iv->step;
2053 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2057 unsigned HOST_WIDE_INT ustepi, cstepi;
2058 HOST_WIDE_INT ratioi;
2060 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2062 /* We do not have a precision to express the values of use. */
2066 expr = var_at_stmt (loop, cand, at);
2068 if (TREE_TYPE (expr) != ctype)
2070 /* This may happen with the original ivs. */
2071 expr = fold_convert (ctype, expr);
2074 if (TYPE_UNSIGNED (utype))
2078 uutype = unsigned_type_for (utype);
2079 ubase = fold_convert (uutype, ubase);
2080 ustep = fold_convert (uutype, ustep);
2083 if (uutype != ctype)
2085 expr = fold_convert (uutype, expr);
2086 cbase = fold_convert (uutype, cbase);
2087 cstep = fold_convert (uutype, cstep);
2090 if (!cst_and_fits_in_hwi (cstep)
2091 || !cst_and_fits_in_hwi (ustep))
2094 ustepi = int_cst_value (ustep);
2095 cstepi = int_cst_value (cstep);
2097 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2099 /* TODO maybe consider case when ustep divides cstep and the ratio is
2100 a power of 2 (so that the division is fast to execute)? We would
2101 need to be much more careful with overflows etc. then. */
2105 /* We may need to shift the value if we are after the increment. */
2106 if (stmt_after_increment (loop, cand, at))
2107 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2109 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2110 or |ratio| == 1, it is better to handle this like
2112 ubase - ratio * cbase + ratio * var. */
2116 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2117 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2119 else if (ratioi == -1)
2121 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2122 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2124 else if (TREE_CODE (cbase) == INTEGER_CST)
2126 ratio = build_int_cst_type (uutype, ratioi);
2127 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2128 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2129 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2130 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2134 expr = fold (build2 (MINUS_EXPR, uutype, expr, cbase));
2135 ratio = build_int_cst_type (uutype, ratioi);
2136 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2137 expr = fold (build2 (PLUS_EXPR, uutype, ubase, expr));
2140 return fold_convert (utype, expr);
2143 /* Determines the expression by that USE is expressed from induction variable
2147 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2149 return get_computation_at (loop, use, cand, use->stmt);
2152 /* Strips constant offsets from EXPR and adds them to OFFSET. */
2155 strip_offset (tree *expr, unsigned HOST_WIDE_INT *offset)
2158 enum tree_code code;
2162 if (cst_and_fits_in_hwi (*expr))
2164 *offset += int_cst_value (*expr);
2165 *expr = integer_zero_node;
2169 code = TREE_CODE (*expr);
2171 if (code != PLUS_EXPR && code != MINUS_EXPR)
2174 op0 = TREE_OPERAND (*expr, 0);
2175 op1 = TREE_OPERAND (*expr, 1);
2177 if (cst_and_fits_in_hwi (op1))
2179 if (code == PLUS_EXPR)
2180 *offset += int_cst_value (op1);
2182 *offset -= int_cst_value (op1);
2188 if (code != PLUS_EXPR)
2191 if (!cst_and_fits_in_hwi (op0))
2194 *offset += int_cst_value (op0);
2199 /* Returns cost of addition in MODE. */
2202 add_cost (enum machine_mode mode)
2204 static unsigned costs[NUM_MACHINE_MODES];
2212 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2213 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2214 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2219 cost = seq_cost (seq);
2225 if (dump_file && (dump_flags & TDF_DETAILS))
2226 fprintf (dump_file, "Addition in %s costs %d\n",
2227 GET_MODE_NAME (mode), cost);
2231 /* Entry in a hashtable of already known costs for multiplication. */
2234 HOST_WIDE_INT cst; /* The constant to multiply by. */
2235 enum machine_mode mode; /* In mode. */
2236 unsigned cost; /* The cost. */
2239 /* Counts hash value for the ENTRY. */
2242 mbc_entry_hash (const void *entry)
2244 const struct mbc_entry *e = entry;
2246 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2249 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2252 mbc_entry_eq (const void *entry1, const void *entry2)
2254 const struct mbc_entry *e1 = entry1;
2255 const struct mbc_entry *e2 = entry2;
2257 return (e1->mode == e2->mode
2258 && e1->cst == e2->cst);
2261 /* Returns cost of multiplication by constant CST in MODE. */
2264 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2266 static htab_t costs;
2267 struct mbc_entry **cached, act;
2272 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2276 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2278 return (*cached)->cost;
2280 *cached = xmalloc (sizeof (struct mbc_entry));
2281 (*cached)->mode = mode;
2282 (*cached)->cst = cst;
2285 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2290 cost = seq_cost (seq);
2292 if (dump_file && (dump_flags & TDF_DETAILS))
2293 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2294 (int) cst, GET_MODE_NAME (mode), cost);
2296 (*cached)->cost = cost;
2301 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2302 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2303 variable is omitted. The created memory accesses MODE.
2305 TODO -- there must be some better way. This all is quite crude. */
2308 get_address_cost (bool symbol_present, bool var_present,
2309 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2311 #define MAX_RATIO 128
2312 static sbitmap valid_mult;
2313 static HOST_WIDE_INT rat, off;
2314 static HOST_WIDE_INT min_offset, max_offset;
2315 static unsigned costs[2][2][2][2];
2316 unsigned cost, acost;
2317 rtx seq, addr, base;
2318 bool offset_p, ratio_p;
2320 HOST_WIDE_INT s_offset;
2321 unsigned HOST_WIDE_INT mask;
2328 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2330 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2331 for (i = 1; i <= 1 << 20; i <<= 1)
2333 XEXP (addr, 1) = GEN_INT (i);
2334 if (!memory_address_p (Pmode, addr))
2337 max_offset = i >> 1;
2340 for (i = 1; i <= 1 << 20; i <<= 1)
2342 XEXP (addr, 1) = GEN_INT (-i);
2343 if (!memory_address_p (Pmode, addr))
2346 min_offset = -(i >> 1);
2348 if (dump_file && (dump_flags & TDF_DETAILS))
2350 fprintf (dump_file, "get_address_cost:\n");
2351 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2352 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2355 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2356 sbitmap_zero (valid_mult);
2358 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2359 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2361 XEXP (addr, 1) = GEN_INT (i);
2362 if (memory_address_p (Pmode, addr))
2364 SET_BIT (valid_mult, i + MAX_RATIO);
2369 if (dump_file && (dump_flags & TDF_DETAILS))
2371 fprintf (dump_file, " allowed multipliers:");
2372 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2373 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2374 fprintf (dump_file, " %d", (int) i);
2375 fprintf (dump_file, "\n");
2376 fprintf (dump_file, "\n");
2380 bits = GET_MODE_BITSIZE (Pmode);
2381 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2383 if ((offset >> (bits - 1) & 1))
2388 offset_p = (min_offset <= s_offset && s_offset <= max_offset);
2389 ratio_p = (ratio != 1
2390 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2391 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2393 if (ratio != 1 && !ratio_p)
2394 cost += multiply_by_cost (ratio, Pmode);
2396 if (s_offset && !offset_p && !symbol_present)
2398 cost += add_cost (Pmode);
2402 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2407 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2408 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2410 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2414 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2416 base = gen_rtx_fmt_e (CONST, Pmode,
2417 gen_rtx_fmt_ee (PLUS, Pmode,
2421 base = gen_rtx_fmt_ee (PLUS, Pmode, reg1, base);
2424 else if (var_present)
2428 base = gen_rtx_fmt_ee (PLUS, Pmode, base, GEN_INT (off));
2431 base = GEN_INT (off);
2436 addr = gen_rtx_fmt_ee (PLUS, Pmode, base, addr);
2439 addr = memory_address (Pmode, addr);
2443 acost = seq_cost (seq);
2444 acost += address_cost (addr, Pmode);
2448 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2451 return cost + acost;
2454 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2455 the bitmap to that we should store it. */
2457 static struct ivopts_data *fd_ivopts_data;
2459 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2461 bitmap *depends_on = data;
2462 struct version_info *info;
2464 if (TREE_CODE (*expr_p) != SSA_NAME)
2466 info = name_info (fd_ivopts_data, *expr_p);
2468 if (!info->inv_id || info->has_nonlin_use)
2472 *depends_on = BITMAP_XMALLOC ();
2473 bitmap_set_bit (*depends_on, info->inv_id);
2478 /* Estimates cost of forcing EXPR into variable. DEPENDS_ON is a set of the
2479 invariants the computation depends on. */
2482 force_var_cost (struct ivopts_data *data,
2483 tree expr, bitmap *depends_on)
2485 static bool costs_initialized = false;
2486 static unsigned integer_cost;
2487 static unsigned symbol_cost;
2488 static unsigned address_cost;
2490 if (!costs_initialized)
2492 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2493 rtx x = gen_rtx_MEM (DECL_MODE (var),
2494 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2496 tree type = build_pointer_type (integer_type_node);
2498 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2501 SET_DECL_RTL (var, x);
2502 TREE_STATIC (var) = 1;
2503 addr = build1 (ADDR_EXPR, type, var);
2504 symbol_cost = computation_cost (addr) + 1;
2507 = computation_cost (build2 (PLUS_EXPR, type,
2509 build_int_cst_type (type, 2000))) + 1;
2510 if (dump_file && (dump_flags & TDF_DETAILS))
2512 fprintf (dump_file, "force_var_cost:\n");
2513 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2514 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2515 fprintf (dump_file, " address %d\n", (int) address_cost);
2516 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2517 fprintf (dump_file, "\n");
2520 costs_initialized = true;
2525 fd_ivopts_data = data;
2526 walk_tree (&expr, find_depends, depends_on, NULL);
2529 if (SSA_VAR_P (expr))
2532 if (TREE_INVARIANT (expr))
2534 if (TREE_CODE (expr) == INTEGER_CST)
2535 return integer_cost;
2537 if (TREE_CODE (expr) == ADDR_EXPR)
2539 tree obj = TREE_OPERAND (expr, 0);
2541 if (TREE_CODE (obj) == VAR_DECL
2542 || TREE_CODE (obj) == PARM_DECL
2543 || TREE_CODE (obj) == RESULT_DECL)
2547 return address_cost;
2550 /* Just an arbitrary value, FIXME. */
2551 return target_spill_cost;
2554 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2555 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2556 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2557 invariants the computation depends on. */
2560 split_address_cost (struct ivopts_data *data,
2561 tree addr, bool *symbol_present, bool *var_present,
2562 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2565 HOST_WIDE_INT bitsize;
2566 HOST_WIDE_INT bitpos;
2568 enum machine_mode mode;
2569 int unsignedp, volatilep;
2571 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2572 &unsignedp, &volatilep);
2575 || bitpos % BITS_PER_UNIT != 0
2576 || TREE_CODE (core) != VAR_DECL)
2578 *symbol_present = false;
2579 *var_present = true;
2580 fd_ivopts_data = data;
2581 walk_tree (&addr, find_depends, depends_on, NULL);
2582 return target_spill_cost;
2585 *offset += bitpos / BITS_PER_UNIT;
2586 if (TREE_STATIC (core)
2587 || DECL_EXTERNAL (core))
2589 *symbol_present = true;
2590 *var_present = false;
2594 *symbol_present = false;
2595 *var_present = true;
2599 /* Estimates cost of expressing difference of addresses E1 - E2 as
2600 var + symbol + offset. The value of offset is added to OFFSET,
2601 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2602 part is missing. DEPENDS_ON is a set of the invariants the computation
2606 ptr_difference_cost (struct ivopts_data *data,
2607 tree e1, tree e2, bool *symbol_present, bool *var_present,
2608 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2610 HOST_WIDE_INT diff = 0;
2613 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
2615 if (TREE_CODE (e2) == ADDR_EXPR
2616 && ptr_difference_const (TREE_OPERAND (e1, 0),
2617 TREE_OPERAND (e2, 0), &diff))
2620 *symbol_present = false;
2621 *var_present = false;
2625 if (e2 == integer_zero_node)
2626 return split_address_cost (data, TREE_OPERAND (e1, 0),
2627 symbol_present, var_present, offset, depends_on);
2629 *symbol_present = false;
2630 *var_present = true;
2632 cost = force_var_cost (data, e1, depends_on);
2633 cost += force_var_cost (data, e2, depends_on);
2634 cost += add_cost (Pmode);
2639 /* Estimates cost of expressing difference E1 - E2 as
2640 var + symbol + offset. The value of offset is added to OFFSET,
2641 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
2642 part is missing. DEPENDS_ON is a set of the invariants the computation
2646 difference_cost (struct ivopts_data *data,
2647 tree e1, tree e2, bool *symbol_present, bool *var_present,
2648 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
2651 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
2653 strip_offset (&e1, offset);
2655 strip_offset (&e2, offset);
2658 if (TREE_CODE (e1) == ADDR_EXPR)
2659 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
2661 *symbol_present = false;
2663 if (operand_equal_p (e1, e2, 0))
2665 *var_present = false;
2668 *var_present = true;
2670 return force_var_cost (data, e1, depends_on);
2674 cost = force_var_cost (data, e2, depends_on);
2675 cost += multiply_by_cost (-1, mode);
2680 cost = force_var_cost (data, e1, depends_on);
2681 cost += force_var_cost (data, e2, depends_on);
2682 cost += add_cost (mode);
2687 /* Determines the cost of the computation by that USE is expressed
2688 from induction variable CAND. If ADDRESS_P is true, we just need
2689 to create an address from it, otherwise we want to get it into
2690 register. A set of invariants we depend on is stored in
2691 DEPENDS_ON. AT is the statement at that the value is computed. */
2694 get_computation_cost_at (struct ivopts_data *data,
2695 struct iv_use *use, struct iv_cand *cand,
2696 bool address_p, bitmap *depends_on, tree at)
2698 tree ubase = use->iv->base, ustep = use->iv->step;
2700 tree utype = TREE_TYPE (ubase), ctype;
2701 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
2702 HOST_WIDE_INT ratio, aratio;
2703 bool var_present, symbol_present;
2704 unsigned cost = 0, n_sums;
2708 /* Only consider real candidates. */
2712 cbase = cand->iv->base;
2713 cstep = cand->iv->step;
2714 ctype = TREE_TYPE (cbase);
2716 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2718 /* We do not have a precision to express the values of use. */
2722 if (!cst_and_fits_in_hwi (ustep)
2723 || !cst_and_fits_in_hwi (cstep))
2726 if (TREE_CODE (ubase) == INTEGER_CST
2727 && !cst_and_fits_in_hwi (ubase))
2730 if (TREE_CODE (cbase) == INTEGER_CST
2731 && !cst_and_fits_in_hwi (cbase))
2734 ustepi = int_cst_value (ustep);
2735 cstepi = int_cst_value (cstep);
2737 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
2739 /* TODO -- add direct handling of this case. */
2743 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
2746 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
2747 or ratio == 1, it is better to handle this like
2749 ubase - ratio * cbase + ratio * var
2751 (also holds in the case ratio == -1, TODO. */
2753 if (TREE_CODE (cbase) == INTEGER_CST)
2755 offset = - ratio * int_cst_value (cbase);
2756 cost += difference_cost (data,
2757 ubase, integer_zero_node,
2758 &symbol_present, &var_present, &offset,
2761 else if (ratio == 1)
2763 cost += difference_cost (data,
2765 &symbol_present, &var_present, &offset,
2770 cost += force_var_cost (data, cbase, depends_on);
2771 cost += add_cost (TYPE_MODE (ctype));
2772 cost += difference_cost (data,
2773 ubase, integer_zero_node,
2774 &symbol_present, &var_present, &offset,
2778 /* If we are after the increment, the value of the candidate is higher by
2780 if (stmt_after_increment (data->current_loop, cand, at))
2781 offset -= ratio * cstepi;
2783 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
2784 (symbol/var/const parts may be omitted). If we are looking for an address,
2785 find the cost of addressing this. */
2787 return get_address_cost (symbol_present, var_present, offset, ratio);
2789 /* Otherwise estimate the costs for computing the expression. */
2790 aratio = ratio > 0 ? ratio : -ratio;
2791 if (!symbol_present && !var_present && !offset)
2794 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
2800 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
2804 /* Symbol + offset should be compile-time computable. */
2805 && (symbol_present || offset))
2808 return cost + n_sums * add_cost (TYPE_MODE (ctype));
2812 /* Just get the expression, expand it and measure the cost. */
2813 tree comp = get_computation_at (data->current_loop, use, cand, at);
2819 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
2821 return computation_cost (comp);
2825 /* Determines the cost of the computation by that USE is expressed
2826 from induction variable CAND. If ADDRESS_P is true, we just need
2827 to create an address from it, otherwise we want to get it into
2828 register. A set of invariants we depend on is stored in
2832 get_computation_cost (struct ivopts_data *data,
2833 struct iv_use *use, struct iv_cand *cand,
2834 bool address_p, bitmap *depends_on)
2836 return get_computation_cost_at (data,
2837 use, cand, address_p, depends_on, use->stmt);
2840 /* Determines cost of basing replacement of USE on CAND in a generic
2844 determine_use_iv_cost_generic (struct ivopts_data *data,
2845 struct iv_use *use, struct iv_cand *cand)
2848 unsigned cost = get_computation_cost (data, use, cand, false, &depends_on);
2850 set_use_iv_cost (data, use, cand, cost, depends_on);
2853 /* Determines cost of basing replacement of USE on CAND in an address. */
2856 determine_use_iv_cost_address (struct ivopts_data *data,
2857 struct iv_use *use, struct iv_cand *cand)
2860 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
2862 set_use_iv_cost (data, use, cand, cost, depends_on);
2865 /* Computes value of induction variable IV in iteration NITER. */
2868 iv_value (struct iv *iv, tree niter)
2871 tree type = TREE_TYPE (iv->base);
2873 niter = fold_convert (type, niter);
2874 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
2876 return fold (build2 (PLUS_EXPR, type, iv->base, val));
2879 /* Computes value of candidate CAND at position AT in iteration NITER. */
2882 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
2884 tree val = iv_value (cand->iv, niter);
2885 tree type = TREE_TYPE (cand->iv->base);
2887 if (stmt_after_increment (loop, cand, at))
2888 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
2893 /* Check whether it is possible to express the condition in USE by comparison
2894 of candidate CAND. If so, store the comparison code to COMPARE and the
2895 value compared with to BOUND. */
2898 may_eliminate_iv (struct loop *loop,
2899 struct iv_use *use, struct iv_cand *cand,
2900 enum tree_code *compare, tree *bound)
2903 struct tree_niter_desc *niter, new_niter;
2904 tree wider_type, type, base;
2906 /* For now just very primitive -- we work just for the single exit condition,
2907 and are quite conservative about the possible overflows. TODO -- both of
2908 these can be improved. */
2909 exit = single_dom_exit (loop);
2912 if (use->stmt != last_stmt (exit->src))
2915 niter = &loop_data (loop)->niter;
2917 || !integer_nonzerop (niter->assumptions)
2918 || !integer_zerop (niter->may_be_zero))
2921 if (exit->flags & EDGE_TRUE_VALUE)
2926 *bound = cand_value_at (loop, cand, use->stmt, niter->niter);
2928 /* Let us check there is not some problem with overflows, by checking that
2929 the number of iterations is unchanged. */
2930 base = cand->iv->base;
2931 type = TREE_TYPE (base);
2932 if (stmt_after_increment (loop, cand, use->stmt))
2933 base = fold (build2 (PLUS_EXPR, type, base, cand->iv->step));
2935 new_niter.niter = NULL_TREE;
2936 number_of_iterations_cond (TREE_TYPE (cand->iv->base), base,
2937 cand->iv->step, NE_EXPR, *bound, NULL_TREE,
2939 if (!new_niter.niter
2940 || !integer_nonzerop (new_niter.assumptions)
2941 || !integer_zerop (new_niter.may_be_zero))
2944 wider_type = TREE_TYPE (new_niter.niter);
2945 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (TREE_TYPE (niter->niter)))
2946 wider_type = TREE_TYPE (niter->niter);
2947 if (!operand_equal_p (fold_convert (wider_type, niter->niter),
2948 fold_convert (wider_type, new_niter.niter), 0))
2954 /* Determines cost of basing replacement of USE on CAND in a condition. */
2957 determine_use_iv_cost_condition (struct ivopts_data *data,
2958 struct iv_use *use, struct iv_cand *cand)
2961 enum tree_code compare;
2963 /* Only consider real candidates. */
2966 set_use_iv_cost (data, use, cand, INFTY, NULL);
2970 if (may_eliminate_iv (data->current_loop, use, cand, &compare, &bound))
2972 bitmap depends_on = NULL;
2973 unsigned cost = force_var_cost (data, bound, &depends_on);
2975 set_use_iv_cost (data, use, cand, cost, depends_on);
2979 /* The induction variable elimination failed; just express the original
2980 giv. If it is compared with an invariant, note that we cannot get
2982 if (TREE_CODE (*use->op_p) == SSA_NAME)
2983 record_invariant (data, *use->op_p, true);
2986 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
2987 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
2990 determine_use_iv_cost_generic (data, use, cand);
2993 /* Checks whether it is possible to replace the final value of USE by
2994 a direct computation. If so, the formula is stored to *VALUE. */
2997 may_replace_final_value (struct loop *loop, struct iv_use *use, tree *value)
3000 struct tree_niter_desc *niter;
3002 exit = single_dom_exit (loop);
3006 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3007 bb_for_stmt (use->stmt)));
3009 niter = &loop_data (loop)->niter;
3011 || !operand_equal_p (niter->assumptions, boolean_true_node, 0)
3012 || !operand_equal_p (niter->may_be_zero, boolean_false_node, 0))
3015 *value = iv_value (use->iv, niter->niter);
3020 /* Determines cost of replacing final value of USE using CAND. */
3023 determine_use_iv_cost_outer (struct ivopts_data *data,
3024 struct iv_use *use, struct iv_cand *cand)
3030 struct loop *loop = data->current_loop;
3034 if (!may_replace_final_value (loop, use, &value))
3036 set_use_iv_cost (data, use, cand, INFTY, NULL);
3041 cost = force_var_cost (data, value, &depends_on);
3043 cost /= AVG_LOOP_NITER (loop);
3045 set_use_iv_cost (data, use, cand, cost, depends_on);
3049 exit = single_dom_exit (loop);
3052 /* If there is just a single exit, we may use value of the candidate
3053 after we take it to determine the value of use. */
3054 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3055 last_stmt (exit->src));
3057 cost /= AVG_LOOP_NITER (loop);
3061 /* Otherwise we just need to compute the iv. */
3062 cost = get_computation_cost (data, use, cand, false, &depends_on);
3065 set_use_iv_cost (data, use, cand, cost, depends_on);
3068 /* Determines cost of basing replacement of USE on CAND. */
3071 determine_use_iv_cost (struct ivopts_data *data,
3072 struct iv_use *use, struct iv_cand *cand)
3076 case USE_NONLINEAR_EXPR:
3077 determine_use_iv_cost_generic (data, use, cand);
3081 determine_use_iv_cost_outer (data, use, cand);
3085 determine_use_iv_cost_address (data, use, cand);
3089 determine_use_iv_cost_condition (data, use, cand);
3097 /* Determines costs of basing the use of the iv on an iv candidate. */
3100 determine_use_iv_costs (struct ivopts_data *data)
3104 struct iv_cand *cand;
3106 data->consider_all_candidates = (n_iv_cands (data)
3107 <= CONSIDER_ALL_CANDIDATES_BOUND);
3109 alloc_use_cost_map (data);
3111 if (!data->consider_all_candidates)
3113 /* Add the important candidate entries. */
3114 for (j = 0; j < n_iv_cands (data); j++)
3116 cand = iv_cand (data, j);
3117 if (!cand->important)
3119 for (i = 0; i < n_iv_uses (data); i++)
3121 use = iv_use (data, i);
3122 determine_use_iv_cost (data, use, cand);
3127 for (i = 0; i < n_iv_uses (data); i++)
3129 use = iv_use (data, i);
3131 if (data->consider_all_candidates)
3133 for (j = 0; j < n_iv_cands (data); j++)
3135 cand = iv_cand (data, j);
3136 determine_use_iv_cost (data, use, cand);
3141 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j,
3143 cand = iv_cand (data, j);
3144 if (!cand->important)
3145 determine_use_iv_cost (data, use, cand);
3150 if (dump_file && (dump_flags & TDF_DETAILS))
3152 fprintf (dump_file, "Use-candidate costs:\n");
3154 for (i = 0; i < n_iv_uses (data); i++)
3156 use = iv_use (data, i);
3158 fprintf (dump_file, "Use %d:\n", i);
3159 fprintf (dump_file, " cand\tcost\tdepends on\n");
3160 for (j = 0; j < use->n_map_members; j++)
3162 if (!use->cost_map[j].cand
3163 || use->cost_map[j].cost == INFTY)
3166 fprintf (dump_file, " %d\t%d\t",
3167 use->cost_map[j].cand->id,
3168 use->cost_map[j].cost);
3169 if (use->cost_map[j].depends_on)
3170 bitmap_print (dump_file,
3171 use->cost_map[j].depends_on, "","");
3172 fprintf (dump_file, "\n");
3175 fprintf (dump_file, "\n");
3177 fprintf (dump_file, "\n");
3181 /* Determines cost of the candidate CAND. */
3184 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3186 unsigned cost_base, cost_step;
3196 /* There are two costs associated with the candidate -- its increment
3197 and its initialization. The second is almost negligible for any loop
3198 that rolls enough, so we take it just very little into account. */
3200 base = cand->iv->base;
3201 cost_base = force_var_cost (data, base, NULL);
3202 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3204 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3206 /* Prefer the original iv unless we may gain something by replacing it. */
3207 if (cand->pos == IP_ORIGINAL)
3210 /* Prefer not to insert statements into latch unless there are some
3211 already (so that we do not create unnecessary jumps). */
3212 if (cand->pos == IP_END)
3214 bb = ip_end_pos (data->current_loop);
3215 last = last_stmt (bb);
3218 || TREE_CODE (last) == LABEL_EXPR)
3223 /* Determines costs of computation of the candidates. */
3226 determine_iv_costs (struct ivopts_data *data)
3230 if (dump_file && (dump_flags & TDF_DETAILS))
3232 fprintf (dump_file, "Candidate costs:\n");
3233 fprintf (dump_file, " cand\tcost\n");
3236 for (i = 0; i < n_iv_cands (data); i++)
3238 struct iv_cand *cand = iv_cand (data, i);
3240 determine_iv_cost (data, cand);
3242 if (dump_file && (dump_flags & TDF_DETAILS))
3243 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3246 if (dump_file && (dump_flags & TDF_DETAILS))
3247 fprintf (dump_file, "\n");
3250 /* Calculates cost for having SIZE induction variables. */
3253 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3255 return global_cost_for_size (size,
3256 loop_data (data->current_loop)->regs_used,
3260 /* For each size of the induction variable set determine the penalty. */
3263 determine_set_costs (struct ivopts_data *data)
3267 struct loop *loop = data->current_loop;
3269 /* We use the following model (definitely improvable, especially the
3270 cost function -- TODO):
3272 We estimate the number of registers available (using MD data), name it A.
3274 We estimate the number of registers used by the loop, name it U. This
3275 number is obtained as the number of loop phi nodes (not counting virtual
3276 registers and bivs) + the number of variables from outside of the loop.
3278 We set a reserve R (free regs that are used for temporary computations,
3279 etc.). For now the reserve is a constant 3.
3281 Let I be the number of induction variables.
3283 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3284 make a lot of ivs without a reason).
3285 -- if A - R < U + I <= A, the cost is I * PRES_COST
3286 -- if U + I > A, the cost is I * PRES_COST and
3287 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3289 if (dump_file && (dump_flags & TDF_DETAILS))
3291 fprintf (dump_file, "Global costs:\n");
3292 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3293 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3294 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3295 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3299 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
3301 op = PHI_RESULT (phi);
3303 if (!is_gimple_reg (op))
3306 if (get_iv (data, op))
3312 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
3314 struct version_info *info = ver_info (data, j);
3316 if (info->inv_id && info->has_nonlin_use)
3320 loop_data (loop)->regs_used = n;
3321 if (dump_file && (dump_flags & TDF_DETAILS))
3322 fprintf (dump_file, " regs_used %d\n", n);
3324 if (dump_file && (dump_flags & TDF_DETAILS))
3326 fprintf (dump_file, " cost for size:\n");
3327 fprintf (dump_file, " ivs\tcost\n");
3328 for (j = 0; j <= 2 * target_avail_regs; j++)
3329 fprintf (dump_file, " %d\t%d\n", j,
3330 ivopts_global_cost_for_size (data, j));
3331 fprintf (dump_file, "\n");
3335 /* Finds a best candidate for USE and stores it to CAND. The candidates are
3336 taken from the set SOL and they may depend on invariants in the set INV.
3337 The really used candidate and invariants are noted to USED_IVS and
3341 find_best_candidate (struct ivopts_data *data,
3342 struct iv_use *use, bitmap sol, bitmap inv,
3343 bitmap used_ivs, bitmap used_inv, struct iv_cand **cand)
3346 unsigned best_cost = INFTY, cost;
3347 struct iv_cand *cnd = NULL, *acnd;
3348 bitmap depends_on = NULL, asol;
3350 if (data->consider_all_candidates)
3354 asol = BITMAP_XMALLOC ();
3355 bitmap_a_and_b (asol, sol, use->related_cands);
3358 EXECUTE_IF_SET_IN_BITMAP (asol, 0, c,
3360 acnd = iv_cand (data, c);
3361 cost = get_use_iv_cost (data, use, acnd, &depends_on);
3365 if (cost > best_cost)
3367 if (cost == best_cost)
3369 /* Prefer the cheaper iv. */
3370 if (acnd->cost >= cnd->cost)
3376 EXECUTE_IF_AND_COMPL_IN_BITMAP (depends_on, inv, 0, d,
3379 bitmap_a_or_b (used_inv, used_inv, depends_on);
3387 if (cnd && used_ivs)
3388 bitmap_set_bit (used_ivs, cnd->id);
3393 if (!data->consider_all_candidates)
3394 BITMAP_XFREE (asol);
3399 /* Returns cost of set of ivs SOL + invariants INV. Removes unnecessary
3400 induction variable candidates and invariants from the sets. Only
3401 uses 0 .. MAX_USE - 1 are taken into account. */
3404 set_cost_up_to (struct ivopts_data *data, bitmap sol, bitmap inv,
3408 unsigned cost = 0, size = 0, acost;
3410 struct iv_cand *cand;
3411 bitmap used_ivs = BITMAP_XMALLOC (), used_inv = BITMAP_XMALLOC ();
3413 for (i = 0; i < max_use; i++)
3415 use = iv_use (data, i);
3416 acost = find_best_candidate (data, use, sol, inv,
3417 used_ivs, used_inv, NULL);
3420 BITMAP_XFREE (used_ivs);
3421 BITMAP_XFREE (used_inv);
3427 EXECUTE_IF_SET_IN_BITMAP (used_ivs, 0, i,
3429 cand = iv_cand (data, i);
3431 /* Do not count the pseudocandidates. */
3437 EXECUTE_IF_SET_IN_BITMAP (used_inv, 0, i, size++);
3438 cost += ivopts_global_cost_for_size (data, size);
3440 bitmap_copy (sol, used_ivs);
3441 bitmap_copy (inv, used_inv);
3443 BITMAP_XFREE (used_ivs);
3444 BITMAP_XFREE (used_inv);
3449 /* Computes cost of set of ivs SOL + invariants INV. Removes unnecessary
3450 induction variable candidates and invariants from the sets. */
3453 set_cost (struct ivopts_data *data, bitmap sol, bitmap inv)
3455 return set_cost_up_to (data, sol, inv, n_iv_uses (data));
3458 /* Tries to extend the sets IVS and INV in the best possible way in order
3459 to express the USE. */
3462 try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
3465 unsigned best_cost = set_cost_up_to (data, ivs, inv, use->id + 1), act_cost;
3466 bitmap best_ivs = BITMAP_XMALLOC ();
3467 bitmap best_inv = BITMAP_XMALLOC ();
3468 bitmap act_ivs = BITMAP_XMALLOC ();
3469 bitmap act_inv = BITMAP_XMALLOC ();
3471 struct cost_pair *cp;
3473 bitmap_copy (best_ivs, ivs);
3474 bitmap_copy (best_inv, inv);
3476 for (i = 0; i < use->n_map_members; i++)
3478 cp = use->cost_map + i;
3479 if (cp->cost == INFTY)
3482 bitmap_copy (act_ivs, ivs);
3483 bitmap_set_bit (act_ivs, cp->cand->id);
3485 bitmap_a_or_b (act_inv, inv, cp->depends_on);
3487 bitmap_copy (act_inv, inv);
3488 act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
3490 if (act_cost < best_cost)
3492 best_cost = act_cost;
3493 bitmap_copy (best_ivs, act_ivs);
3494 bitmap_copy (best_inv, act_inv);
3498 bitmap_copy (ivs, best_ivs);
3499 bitmap_copy (inv, best_inv);
3501 BITMAP_XFREE (best_ivs);
3502 BITMAP_XFREE (best_inv);
3503 BITMAP_XFREE (act_ivs);
3504 BITMAP_XFREE (act_inv);
3506 return (best_cost != INFTY);
3509 /* Finds an initial set of IVS and invariants INV. We do this by simply
3510 choosing the best candidate for each use. */
3513 get_initial_solution (struct ivopts_data *data, bitmap ivs, bitmap inv)
3517 for (i = 0; i < n_iv_uses (data); i++)
3518 if (!try_add_cand_for (data, ivs, inv, iv_use (data, i)))
3521 return set_cost (data, ivs, inv);
3524 /* Tries to improve set of induction variables IVS and invariants INV to get
3525 it better than COST. */
3528 try_improve_iv_set (struct ivopts_data *data,
3529 bitmap ivs, bitmap inv, unsigned *cost)
3532 bitmap new_ivs = BITMAP_XMALLOC (), new_inv = BITMAP_XMALLOC ();
3533 bitmap best_new_ivs = NULL, best_new_inv = NULL;
3535 /* Try altering the set of induction variables by one. */
3536 for (i = 0; i < n_iv_cands (data); i++)
3538 bitmap_copy (new_ivs, ivs);
3539 bitmap_copy (new_inv, inv);
3541 if (bitmap_bit_p (ivs, i))
3542 bitmap_clear_bit (new_ivs, i);
3544 bitmap_set_bit (new_ivs, i);
3546 acost = set_cost (data, new_ivs, new_inv);
3552 best_new_ivs = BITMAP_XMALLOC ();
3553 best_new_inv = BITMAP_XMALLOC ();
3557 bitmap_copy (best_new_ivs, new_ivs);
3558 bitmap_copy (best_new_inv, new_inv);
3561 /* Ditto for invariants. */
3562 for (i = 1; i <= data->max_inv_id; i++)
3564 if (ver_info (data, i)->has_nonlin_use)
3567 bitmap_copy (new_ivs, ivs);
3568 bitmap_copy (new_inv, inv);
3570 if (bitmap_bit_p (inv, i))
3571 bitmap_clear_bit (new_inv, i);
3573 bitmap_set_bit (new_inv, i);
3575 acost = set_cost (data, new_ivs, new_inv);
3581 best_new_ivs = BITMAP_XMALLOC ();
3582 best_new_inv = BITMAP_XMALLOC ();
3586 bitmap_copy (best_new_ivs, new_ivs);
3587 bitmap_copy (best_new_inv, new_inv);
3590 BITMAP_XFREE (new_ivs);
3591 BITMAP_XFREE (new_inv);
3596 bitmap_copy (ivs, best_new_ivs);
3597 bitmap_copy (inv, best_new_inv);
3598 BITMAP_XFREE (best_new_ivs);
3599 BITMAP_XFREE (best_new_inv);
3603 /* Attempts to find the optimal set of induction variables. We do simple
3604 greedy heuristic -- we try to replace at most one candidate in the selected
3605 solution and remove the unused ivs while this improves the cost. */
3608 find_optimal_iv_set (struct ivopts_data *data)
3611 bitmap set = BITMAP_XMALLOC ();
3612 bitmap inv = BITMAP_XMALLOC ();
3615 /* Set the upper bound. */
3616 cost = get_initial_solution (data, set, inv);
3619 if (dump_file && (dump_flags & TDF_DETAILS))
3620 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
3626 if (dump_file && (dump_flags & TDF_DETAILS))
3628 fprintf (dump_file, "Initial set of candidates (cost %d): ", cost);
3629 bitmap_print (dump_file, set, "", "");
3630 fprintf (dump_file, " invariants ");
3631 bitmap_print (dump_file, inv, "", "");
3632 fprintf (dump_file, "\n");
3635 while (try_improve_iv_set (data, set, inv, &cost))
3637 if (dump_file && (dump_flags & TDF_DETAILS))
3639 fprintf (dump_file, "Improved to (cost %d): ", cost);
3640 bitmap_print (dump_file, set, "", "");
3641 fprintf (dump_file, " invariants ");
3642 bitmap_print (dump_file, inv, "", "");
3643 fprintf (dump_file, "\n");
3647 if (dump_file && (dump_flags & TDF_DETAILS))
3648 fprintf (dump_file, "Final cost %d\n\n", cost);
3650 for (i = 0; i < n_iv_uses (data); i++)
3652 use = iv_use (data, i);
3653 find_best_candidate (data, use, set, inv, NULL, NULL, &use->selected);
3661 /* Creates a new induction variable corresponding to CAND. */
3664 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
3666 block_stmt_iterator incr_pos;
3676 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
3680 incr_pos = bsi_last (ip_end_pos (data->current_loop));
3685 /* Mark that the iv is preserved. */
3686 name_info (data, cand->var_before)->preserve_biv = true;
3687 name_info (data, cand->var_after)->preserve_biv = true;
3689 /* Rewrite the increment so that it uses var_before directly. */
3690 find_interesting_uses_op (data, cand->var_after)->selected = cand;
3695 gimple_add_tmp_var (cand->var_before);
3696 add_referenced_tmp_var (cand->var_before);
3698 base = unshare_expr (cand->iv->base);
3700 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
3701 &incr_pos, after, &cand->var_before, &cand->var_after);
3704 /* Creates new induction variables described in SET. */
3707 create_new_ivs (struct ivopts_data *data, bitmap set)
3710 struct iv_cand *cand;
3712 EXECUTE_IF_SET_IN_BITMAP (set, 0, i,
3714 cand = iv_cand (data, i);
3715 create_new_iv (data, cand);
3719 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
3720 is true, remove also the ssa name defined by the statement. */
3723 remove_statement (tree stmt, bool including_defined_name)
3725 if (TREE_CODE (stmt) == PHI_NODE)
3727 if (!including_defined_name)
3729 /* Prevent the ssa name defined by the statement from being removed. */
3730 SET_PHI_RESULT (stmt, NULL);
3732 remove_phi_node (stmt, NULL_TREE, bb_for_stmt (stmt));
3736 block_stmt_iterator bsi = stmt_for_bsi (stmt);
3742 /* Rewrites USE (definition of iv used in a nonlinear expression)
3743 using candidate CAND. */
3746 rewrite_use_nonlinear_expr (struct ivopts_data *data,
3747 struct iv_use *use, struct iv_cand *cand)
3749 tree comp = unshare_expr (get_computation (data->current_loop,
3751 tree op, stmts, tgt, ass;
3752 block_stmt_iterator bsi, pbsi;
3754 switch (TREE_CODE (use->stmt))
3757 tgt = PHI_RESULT (use->stmt);
3759 /* If we should keep the biv, do not replace it. */
3760 if (name_info (data, tgt)->preserve_biv)
3763 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
3764 while (!bsi_end_p (pbsi)
3765 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
3773 tgt = TREE_OPERAND (use->stmt, 0);
3774 bsi = stmt_for_bsi (use->stmt);
3781 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
3783 if (TREE_CODE (use->stmt) == PHI_NODE)
3786 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
3787 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
3788 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
3789 remove_statement (use->stmt, false);
3790 SSA_NAME_DEF_STMT (tgt) = ass;
3795 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3796 TREE_OPERAND (use->stmt, 1) = op;
3800 /* Replaces ssa name in index IDX by its basic variable. Callback for
3804 idx_remove_ssa_names (tree base ATTRIBUTE_UNUSED, tree *idx,
3805 void *data ATTRIBUTE_UNUSED)
3807 if (TREE_CODE (*idx) == SSA_NAME)
3808 *idx = SSA_NAME_VAR (*idx);
3812 /* Unshares REF and replaces ssa names inside it by their basic variables. */
3815 unshare_and_remove_ssa_names (tree ref)
3817 ref = unshare_expr (ref);
3818 for_each_index (&ref, idx_remove_ssa_names, NULL);
3823 /* Rewrites base of memory access OP with expression WITH in statement
3824 pointed to by BSI. */
3827 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
3829 tree var = get_base_address (*op), new_var, new_name, copy, name;
3832 if (!var || TREE_CODE (with) != SSA_NAME)
3835 if (TREE_CODE (var) == INDIRECT_REF)
3836 var = TREE_OPERAND (var, 0);
3837 if (TREE_CODE (var) == SSA_NAME)
3840 var = SSA_NAME_VAR (var);
3842 else if (DECL_P (var))
3847 if (var_ann (var)->type_mem_tag)
3848 var = var_ann (var)->type_mem_tag;
3850 /* We need to add a memory tag for the variable. But we do not want
3851 to add it to the temporary used for the computations, since this leads
3852 to problems in redundancy elimination when there are common parts
3853 in two computations referring to the different arrays. So we copy
3854 the variable to a new temporary. */
3855 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
3857 new_name = duplicate_ssa_name (name, copy);
3860 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
3861 add_referenced_tmp_var (new_var);
3862 var_ann (new_var)->type_mem_tag = var;
3863 new_name = make_ssa_name (new_var, copy);
3865 TREE_OPERAND (copy, 0) = new_name;
3866 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
3872 if (TREE_CODE (*op) == INDIRECT_REF)
3873 orig = REF_ORIGINAL (*op);
3875 orig = unshare_and_remove_ssa_names (*op);
3877 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
3878 /* Record the original reference, for purposes of alias analysis. */
3879 REF_ORIGINAL (*op) = orig;
3882 /* Rewrites USE (address that is an iv) using candidate CAND. */
3885 rewrite_use_address (struct ivopts_data *data,
3886 struct iv_use *use, struct iv_cand *cand)
3888 tree comp = unshare_expr (get_computation (data->current_loop,
3890 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3892 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
3895 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3897 rewrite_address_base (&bsi, use->op_p, op);
3900 /* Rewrites USE (the condition such that one of the arguments is an iv) using
3904 rewrite_use_compare (struct ivopts_data *data,
3905 struct iv_use *use, struct iv_cand *cand)
3908 tree *op_p, cond, op, stmts, bound;
3909 block_stmt_iterator bsi = stmt_for_bsi (use->stmt);
3910 enum tree_code compare;
3912 if (may_eliminate_iv (data->current_loop,
3913 use, cand, &compare, &bound))
3915 op = force_gimple_operand (unshare_expr (bound), &stmts,
3919 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3921 *use->op_p = build2 (compare, boolean_type_node,
3922 var_at_stmt (data->current_loop,
3923 cand, use->stmt), op);
3924 modify_stmt (use->stmt);
3928 /* The induction variable elimination failed; just express the original
3930 comp = unshare_expr (get_computation (data->current_loop, use, cand));
3933 op_p = &TREE_OPERAND (cond, 0);
3934 if (TREE_CODE (*op_p) != SSA_NAME
3935 || zero_p (get_iv (data, *op_p)->step))
3936 op_p = &TREE_OPERAND (cond, 1);
3938 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
3940 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
3945 /* Ensure that operand *OP_P may be used at the end of EXIT without
3946 violating loop closed ssa form. */
3949 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
3952 struct loop *def_loop;
3955 use = USE_FROM_PTR (op_p);
3956 if (TREE_CODE (use) != SSA_NAME)
3959 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
3963 def_loop = def_bb->loop_father;
3964 if (flow_bb_inside_loop_p (def_loop, exit->dest))
3967 /* Try finding a phi node that copies the value out of the loop. */
3968 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
3969 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
3974 /* Create such a phi node. */
3975 tree new_name = duplicate_ssa_name (use, NULL);
3977 phi = create_phi_node (new_name, exit->dest);
3978 SSA_NAME_DEF_STMT (new_name) = phi;
3979 add_phi_arg (&phi, use, exit);
3982 SET_USE (op_p, PHI_RESULT (phi));
3985 /* Ensure that operands of STMT may be used at the end of EXIT without
3986 violating loop closed ssa form. */
3989 protect_loop_closed_ssa_form (edge exit, tree stmt)
3993 v_may_def_optype v_may_defs;
3996 get_stmt_operands (stmt);
3998 uses = STMT_USE_OPS (stmt);
3999 for (i = 0; i < NUM_USES (uses); i++)
4000 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4002 vuses = STMT_VUSE_OPS (stmt);
4003 for (i = 0; i < NUM_VUSES (vuses); i++)
4004 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
4006 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4007 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
4008 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
4011 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
4012 so that they are emitted on the correct place, and so that the loop closed
4013 ssa form is preserved. */
4016 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
4018 tree_stmt_iterator tsi;
4019 block_stmt_iterator bsi;
4020 tree phi, stmt, def, next;
4022 if (exit->dest->pred->pred_next)
4023 split_loop_exit_edge (exit);
4025 if (TREE_CODE (stmts) == STATEMENT_LIST)
4027 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
4028 protect_loop_closed_ssa_form (exit, tsi_stmt (tsi));
4031 protect_loop_closed_ssa_form (exit, stmts);
4033 /* Ensure there is label in exit->dest, so that we can
4035 tree_block_label (exit->dest);
4036 bsi = bsi_after_labels (exit->dest);
4037 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4042 for (phi = phi_nodes (exit->dest); phi; phi = next)
4044 next = TREE_CHAIN (phi);
4046 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
4048 def = PHI_RESULT (phi);
4049 remove_statement (phi, false);
4050 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
4052 SSA_NAME_DEF_STMT (def) = stmt;
4053 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
4058 /* Rewrites the final value of USE (that is only needed outside of the loop)
4059 using candidate CAND. */
4062 rewrite_use_outer (struct ivopts_data *data,
4063 struct iv_use *use, struct iv_cand *cand)
4066 tree value, op, stmts, tgt;
4069 switch (TREE_CODE (use->stmt))
4072 tgt = PHI_RESULT (use->stmt);
4075 tgt = TREE_OPERAND (use->stmt, 0);
4081 exit = single_dom_exit (data->current_loop);
4087 bool ok = may_replace_final_value (data->current_loop, use, &value);
4091 value = get_computation_at (data->current_loop,
4092 use, cand, last_stmt (exit->src));
4094 value = unshare_expr (value);
4095 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
4097 /* If we will preserve the iv anyway and we would need to perform
4098 some computation to replace the final value, do nothing. */
4099 if (stmts && name_info (data, tgt)->preserve_biv)
4102 for (phi = phi_nodes (exit->dest); phi; phi = TREE_CHAIN (phi))
4104 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
4106 if (USE_FROM_PTR (use_p) == tgt)
4107 SET_USE (use_p, op);
4111 compute_phi_arg_on_exit (exit, stmts, op);
4113 /* Enable removal of the statement. We cannot remove it directly,
4114 since we may still need the aliasing information attached to the
4115 ssa name defined by it. */
4116 name_info (data, tgt)->iv->have_use_for = false;
4120 /* If the variable is going to be preserved anyway, there is nothing to
4122 if (name_info (data, tgt)->preserve_biv)
4125 /* Otherwise we just need to compute the iv. */
4126 rewrite_use_nonlinear_expr (data, use, cand);
4129 /* Rewrites USE using candidate CAND. */
4132 rewrite_use (struct ivopts_data *data,
4133 struct iv_use *use, struct iv_cand *cand)
4137 case USE_NONLINEAR_EXPR:
4138 rewrite_use_nonlinear_expr (data, use, cand);
4142 rewrite_use_outer (data, use, cand);
4146 rewrite_use_address (data, use, cand);
4150 rewrite_use_compare (data, use, cand);
4156 modify_stmt (use->stmt);
4159 /* Rewrite the uses using the selected induction variables. */
4162 rewrite_uses (struct ivopts_data *data)
4165 struct iv_cand *cand;
4168 for (i = 0; i < n_iv_uses (data); i++)
4170 use = iv_use (data, i);
4171 cand = use->selected;
4174 rewrite_use (data, use, cand);
4178 /* Removes the ivs that are not used after rewriting. */
4181 remove_unused_ivs (struct ivopts_data *data)
4185 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j,
4187 struct version_info *info;
4189 info = ver_info (data, j);
4191 && !zero_p (info->iv->step)
4193 && !info->iv->have_use_for
4194 && !info->preserve_biv)
4195 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
4199 /* Frees data allocated by the optimization of a single loop. */
4202 free_loop_data (struct ivopts_data *data)
4206 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i,
4208 struct version_info *info;
4210 info = ver_info (data, i);
4214 info->has_nonlin_use = false;
4215 info->preserve_biv = false;
4218 bitmap_clear (data->relevant);
4220 for (i = 0; i < n_iv_uses (data); i++)
4222 struct iv_use *use = iv_use (data, i);
4225 BITMAP_XFREE (use->related_cands);
4226 for (j = 0; j < use->n_map_members; j++)
4227 if (use->cost_map[j].depends_on)
4228 BITMAP_XFREE (use->cost_map[j].depends_on);
4229 free (use->cost_map);
4232 VARRAY_POP_ALL (data->iv_uses);
4234 for (i = 0; i < n_iv_cands (data); i++)
4236 struct iv_cand *cand = iv_cand (data, i);
4242 VARRAY_POP_ALL (data->iv_candidates);
4244 if (data->version_info_size < num_ssa_names)
4246 data->version_info_size = 2 * num_ssa_names;
4247 free (data->version_info);
4248 data->version_info = xcalloc (data->version_info_size,
4249 sizeof (struct version_info));
4252 data->max_inv_id = 0;
4254 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
4256 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
4258 SET_DECL_RTL (obj, NULL_RTX);
4260 VARRAY_POP_ALL (decl_rtl_to_reset);
4263 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
4267 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
4271 for (i = 1; i < loops->num; i++)
4272 if (loops->parray[i])
4274 free (loops->parray[i]->aux);
4275 loops->parray[i]->aux = NULL;
4278 free_loop_data (data);
4279 free (data->version_info);
4280 BITMAP_XFREE (data->relevant);
4282 VARRAY_FREE (decl_rtl_to_reset);
4283 VARRAY_FREE (data->iv_uses);
4284 VARRAY_FREE (data->iv_candidates);
4287 /* Optimizes the LOOP. Returns true if anything changed. */
4290 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
4292 bool changed = false;
4296 data->current_loop = loop;
4298 if (dump_file && (dump_flags & TDF_DETAILS))
4300 fprintf (dump_file, "Processing loop %d\n", loop->num);
4302 exit = single_dom_exit (loop);
4305 fprintf (dump_file, " single exit %d -> %d, exit condition ",
4306 exit->src->index, exit->dest->index);
4307 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
4308 fprintf (dump_file, "\n");
4311 fprintf (dump_file, "\n");
4314 /* For each ssa name determines whether it behaves as an induction variable
4316 if (!find_induction_variables (data))
4319 /* Finds interesting uses (item 1). */
4320 find_interesting_uses (data);
4321 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
4324 /* Finds candidates for the induction variables (item 2). */
4325 find_iv_candidates (data);
4327 /* Calculates the costs (item 3, part 1). */
4328 determine_use_iv_costs (data);
4329 determine_iv_costs (data);
4330 determine_set_costs (data);
4332 /* Find the optimal set of induction variables (item 3, part 2). */
4333 iv_set = find_optimal_iv_set (data);
4338 /* Create the new induction variables (item 4, part 1). */
4339 create_new_ivs (data, iv_set);
4341 /* Rewrite the uses (item 4, part 2). */
4342 rewrite_uses (data);
4344 /* Remove the ivs that are unused after rewriting. */
4345 remove_unused_ivs (data);
4347 loop_commit_inserts ();
4349 BITMAP_XFREE (iv_set);
4351 /* We have changed the structure of induction variables; it might happen
4352 that definitions in the scev database refer to some of them that were
4357 free_loop_data (data);
4362 /* Main entry point. Optimizes induction variables in LOOPS. */
4365 tree_ssa_iv_optimize (struct loops *loops)
4368 struct ivopts_data data;
4370 tree_ssa_iv_optimize_init (loops, &data);
4372 /* Optimize the loops starting with the innermost ones. */
4373 loop = loops->tree_root;
4377 #ifdef ENABLE_CHECKING
4378 verify_loop_closed_ssa ();
4382 /* Scan the loops, inner ones first. */
4383 while (loop != loops->tree_root)
4385 if (dump_file && (dump_flags & TDF_DETAILS))
4386 flow_loop_dump (loop, dump_file, NULL, 1);
4387 if (tree_ssa_iv_optimize_loop (&data, loop))
4389 #ifdef ENABLE_CHECKING
4390 verify_loop_closed_ssa ();
4405 tree_ssa_iv_optimize_finalize (loops, &data);