1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
127 unsigned regs_used; /* Number of registers used. */
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
151 unsigned id; /* The id of the use. */
152 enum use_type type; /* Type of the use. */
153 struct iv *iv; /* The induction variable it is based on. */
154 tree stmt; /* Statement in that it occurs. */
155 tree *op_p; /* The place where it occurs. */
156 bitmap related_cands; /* The set of "related" iv candidates, plus the common
159 unsigned n_map_members; /* Number of candidates in the cost_map list. */
160 struct cost_pair *cost_map;
161 /* The costs wrto the iv candidates. */
163 struct iv_cand *selected;
164 /* The selected candidate. */
167 /* The position where the iv is computed. */
170 IP_NORMAL, /* At the end, just before the exit condition. */
171 IP_END, /* At the end of the latch block. */
172 IP_ORIGINAL /* The original biv. */
175 /* The induction variable candidate. */
178 unsigned id; /* The number of the candidate. */
179 bool important; /* Whether this is an "important" candidate, i.e. such
180 that it should be considered by all uses. */
181 enum iv_position pos; /* Where it is computed. */
182 tree incremented_at; /* For original biv, the statement where it is
184 tree var_before; /* The variable used for it before increment. */
185 tree var_after; /* The variable used for it after increment. */
186 struct iv *iv; /* The value of the candidate. NULL for
187 "pseudocandidate" used to indicate the possibility
188 to replace the final value of an iv by direct
189 computation of the value. */
190 unsigned cost; /* Cost of the candidate. */
193 /* The data used by the induction variable optimizations. */
197 /* The currently optimized loop. */
198 struct loop *current_loop;
200 /* Numbers of iterations for all exits of the current loop. */
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
209 /* The bitmap of indices in version_info whose value was changed. */
212 /* The maximum invariant id. */
215 /* The uses of induction variables. */
218 /* The candidates. */
219 varray_type iv_candidates;
221 /* A bitmap of important candidates. */
222 bitmap important_candidates;
224 /* Whether to consider just related and important candidates when replacing a
226 bool consider_all_candidates;
229 /* An assignment of iv candidates to uses. */
233 /* The number of uses covered by the assignment. */
236 /* Number of uses that cannot be expressed by the candidates in the set. */
239 /* Candidate assigned to a use, together with the related costs. */
240 struct cost_pair **cand_for_use;
242 /* Number of times each candidate is used. */
243 unsigned *n_cand_uses;
245 /* The candidates used. */
248 /* The number of candidates in the set. */
251 /* Total number of registers needed. */
254 /* Total cost of expressing uses. */
255 unsigned cand_use_cost;
257 /* Total cost of candidates. */
260 /* Number of times each invariant is used. */
261 unsigned *n_invariant_uses;
263 /* Total cost of the assignment. */
267 /* Difference of two iv candidate assignments. */
274 /* An old assignment (for rollback purposes). */
275 struct cost_pair *old_cp;
277 /* A new assignment. */
278 struct cost_pair *new_cp;
280 /* Next change in the list. */
281 struct iv_ca_delta *next_change;
284 /* Bound on number of candidates below that all candidates are considered. */
286 #define CONSIDER_ALL_CANDIDATES_BOUND \
287 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
289 /* If there are more iv occurrences, we just give up (it is quite unlikely that
290 optimizing such a loop would help, and it would take ages). */
292 #define MAX_CONSIDERED_USES \
293 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
295 /* If there are at most this number of ivs in the set, try removing unnecessary
296 ivs from the set always. */
298 #define ALWAYS_PRUNE_CAND_SET_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
301 /* The list of trees for that the decl_rtl field must be reset is stored
304 static varray_type decl_rtl_to_reset;
306 /* Number of uses recorded in DATA. */
308 static inline unsigned
309 n_iv_uses (struct ivopts_data *data)
311 return VARRAY_ACTIVE_SIZE (data->iv_uses);
314 /* Ith use recorded in DATA. */
316 static inline struct iv_use *
317 iv_use (struct ivopts_data *data, unsigned i)
319 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
322 /* Number of candidates recorded in DATA. */
324 static inline unsigned
325 n_iv_cands (struct ivopts_data *data)
327 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
330 /* Ith candidate recorded in DATA. */
332 static inline struct iv_cand *
333 iv_cand (struct ivopts_data *data, unsigned i)
335 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
338 /* The data for LOOP. */
340 static inline struct loop_data *
341 loop_data (struct loop *loop)
346 /* The single loop exit if it dominates the latch, NULL otherwise. */
349 single_dom_exit (struct loop *loop)
351 edge exit = loop->single_exit;
356 if (!just_once_each_iteration_p (loop, exit->src))
362 /* Dumps information about the induction variable IV to FILE. */
364 extern void dump_iv (FILE *, struct iv *);
366 dump_iv (FILE *file, struct iv *iv)
370 fprintf (file, "ssa name ");
371 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
372 fprintf (file, "\n");
375 fprintf (file, " type ");
376 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
377 fprintf (file, "\n");
381 fprintf (file, " base ");
382 print_generic_expr (file, iv->base, TDF_SLIM);
383 fprintf (file, "\n");
385 fprintf (file, " step ");
386 print_generic_expr (file, iv->step, TDF_SLIM);
387 fprintf (file, "\n");
391 fprintf (file, " invariant ");
392 print_generic_expr (file, iv->base, TDF_SLIM);
393 fprintf (file, "\n");
398 fprintf (file, " base object ");
399 print_generic_expr (file, iv->base_object, TDF_SLIM);
400 fprintf (file, "\n");
404 fprintf (file, " is a biv\n");
407 /* Dumps information about the USE to FILE. */
409 extern void dump_use (FILE *, struct iv_use *);
411 dump_use (FILE *file, struct iv_use *use)
413 fprintf (file, "use %d\n", use->id);
417 case USE_NONLINEAR_EXPR:
418 fprintf (file, " generic\n");
422 fprintf (file, " outside\n");
426 fprintf (file, " address\n");
430 fprintf (file, " compare\n");
437 fprintf (file, " in statement ");
438 print_generic_expr (file, use->stmt, TDF_SLIM);
439 fprintf (file, "\n");
441 fprintf (file, " at position ");
443 print_generic_expr (file, *use->op_p, TDF_SLIM);
444 fprintf (file, "\n");
446 dump_iv (file, use->iv);
448 if (use->related_cands)
450 fprintf (file, " related candidates ");
451 dump_bitmap (file, use->related_cands);
455 /* Dumps information about the uses to FILE. */
457 extern void dump_uses (FILE *, struct ivopts_data *);
459 dump_uses (FILE *file, struct ivopts_data *data)
464 for (i = 0; i < n_iv_uses (data); i++)
466 use = iv_use (data, i);
468 dump_use (file, use);
469 fprintf (file, "\n");
473 /* Dumps information about induction variable candidate CAND to FILE. */
475 extern void dump_cand (FILE *, struct iv_cand *);
477 dump_cand (FILE *file, struct iv_cand *cand)
479 struct iv *iv = cand->iv;
481 fprintf (file, "candidate %d%s\n",
482 cand->id, cand->important ? " (important)" : "");
486 fprintf (file, " final value replacement\n");
493 fprintf (file, " incremented before exit test\n");
497 fprintf (file, " incremented at end\n");
501 fprintf (file, " original biv\n");
508 /* Returns the info for ssa version VER. */
510 static inline struct version_info *
511 ver_info (struct ivopts_data *data, unsigned ver)
513 return data->version_info + ver;
516 /* Returns the info for ssa name NAME. */
518 static inline struct version_info *
519 name_info (struct ivopts_data *data, tree name)
521 return ver_info (data, SSA_NAME_VERSION (name));
524 /* Checks whether there exists number X such that X * B = A, counting modulo
528 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
531 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
532 unsigned HOST_WIDE_INT inv, ex, val;
538 /* First divide the whole equation by 2 as long as possible. */
539 while (!(a & 1) && !(b & 1))
549 /* If b is still even, a is odd and there is no such x. */
553 /* Find the inverse of b. We compute it as
554 b^(2^(bits - 1) - 1) (mod 2^bits). */
557 for (i = 0; i < bits - 1; i++)
559 inv = (inv * ex) & mask;
560 ex = (ex * ex) & mask;
563 val = (a * inv) & mask;
565 gcc_assert (((val * b) & mask) == a);
567 if ((val >> (bits - 1)) & 1)
575 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
579 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
581 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
585 if (sbb == loop->latch)
591 return stmt == last_stmt (bb);
594 /* Returns true if STMT if after the place where the original induction
595 variable CAND is incremented. */
598 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
600 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
601 basic_block stmt_bb = bb_for_stmt (stmt);
602 block_stmt_iterator bsi;
604 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
607 if (stmt_bb != cand_bb)
610 /* Scan the block from the end, since the original ivs are usually
611 incremented at the end of the loop body. */
612 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
614 if (bsi_stmt (bsi) == cand->incremented_at)
616 if (bsi_stmt (bsi) == stmt)
621 /* Returns true if STMT if after the place where the induction variable
622 CAND is incremented in LOOP. */
625 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
633 return stmt_after_ip_normal_pos (loop, stmt);
636 return stmt_after_ip_original_pos (cand, stmt);
643 /* Element of the table in that we cache the numbers of iterations obtained
644 from exits of the loop. */
648 /* The edge for that the number of iterations is cached. */
651 /* True if the # of iterations was successfully determined. */
654 /* Description of # of iterations. */
655 struct tree_niter_desc niter;
658 /* Hash function for nfe_cache_elt E. */
661 nfe_hash (const void *e)
663 const struct nfe_cache_elt *elt = e;
665 return htab_hash_pointer (elt->exit);
668 /* Equality function for nfe_cache_elt E1 and edge E2. */
671 nfe_eq (const void *e1, const void *e2)
673 const struct nfe_cache_elt *elt1 = e1;
675 return elt1->exit == e2;
678 /* Returns structure describing number of iterations determined from
679 EXIT of DATA->current_loop, or NULL if something goes wrong. */
681 static struct tree_niter_desc *
682 niter_for_exit (struct ivopts_data *data, edge exit)
684 struct nfe_cache_elt *nfe_desc;
687 slot = htab_find_slot_with_hash (data->niters, exit,
688 htab_hash_pointer (exit),
693 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
694 nfe_desc->exit = exit;
695 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
696 exit, &nfe_desc->niter);
702 if (!nfe_desc->valid_p)
705 return &nfe_desc->niter;
708 /* Returns structure describing number of iterations determined from
709 single dominating exit of DATA->current_loop, or NULL if something
712 static struct tree_niter_desc *
713 niter_for_single_dom_exit (struct ivopts_data *data)
715 edge exit = single_dom_exit (data->current_loop);
720 return niter_for_exit (data, exit);
723 /* Initializes data structures used by the iv optimization pass, stored
724 in DATA. LOOPS is the loop tree. */
727 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
731 data->version_info_size = 2 * num_ssa_names;
732 data->version_info = xcalloc (data->version_info_size,
733 sizeof (struct version_info));
734 data->relevant = BITMAP_ALLOC (NULL);
735 data->important_candidates = BITMAP_ALLOC (NULL);
736 data->max_inv_id = 0;
737 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
739 for (i = 1; i < loops->num; i++)
740 if (loops->parray[i])
741 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
743 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
744 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
745 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
748 /* Returns a memory object to that EXPR points. In case we are able to
749 determine that it does not point to any such object, NULL is returned. */
752 determine_base_object (tree expr)
754 enum tree_code code = TREE_CODE (expr);
755 tree base, obj, op0, op1;
757 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
766 obj = TREE_OPERAND (expr, 0);
767 base = get_base_address (obj);
772 if (TREE_CODE (base) == INDIRECT_REF)
773 return determine_base_object (TREE_OPERAND (base, 0));
775 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
779 op0 = determine_base_object (TREE_OPERAND (expr, 0));
780 op1 = determine_base_object (TREE_OPERAND (expr, 1));
786 return (code == PLUS_EXPR
788 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
790 return fold (build (code, ptr_type_node, op0, op1));
794 return determine_base_object (TREE_OPERAND (expr, 0));
797 return fold_convert (ptr_type_node, expr);
801 /* Allocates an induction variable with given initial value BASE and step STEP
805 alloc_iv (tree base, tree step)
807 struct iv *iv = xcalloc (1, sizeof (struct iv));
809 if (step && integer_zerop (step))
813 iv->base_object = determine_base_object (base);
816 iv->have_use_for = false;
818 iv->ssa_name = NULL_TREE;
823 /* Sets STEP and BASE for induction variable IV. */
826 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
828 struct version_info *info = name_info (data, iv);
830 gcc_assert (!info->iv);
832 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
833 info->iv = alloc_iv (base, step);
834 info->iv->ssa_name = iv;
837 /* Finds induction variable declaration for VAR. */
840 get_iv (struct ivopts_data *data, tree var)
844 if (!name_info (data, var)->iv)
846 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
849 || !flow_bb_inside_loop_p (data->current_loop, bb))
850 set_iv (data, var, var, NULL_TREE);
853 return name_info (data, var)->iv;
856 /* Determines the step of a biv defined in PHI. */
859 determine_biv_step (tree phi)
861 struct loop *loop = bb_for_stmt (phi)->loop_father;
862 tree name = PHI_RESULT (phi), base, step;
863 tree type = TREE_TYPE (name);
865 if (!is_gimple_reg (name))
868 if (!simple_iv (loop, phi, name, &base, &step))
872 return build_int_cst (type, 0);
877 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
880 abnormal_ssa_name_p (tree exp)
885 if (TREE_CODE (exp) != SSA_NAME)
888 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
891 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
892 abnormal phi node. Callback for for_each_index. */
895 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
896 void *data ATTRIBUTE_UNUSED)
898 if (TREE_CODE (base) == ARRAY_REF)
900 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
902 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
906 return !abnormal_ssa_name_p (*index);
909 /* Returns true if EXPR contains a ssa name that occurs in an
910 abnormal phi node. */
913 contains_abnormal_ssa_name_p (tree expr)
915 enum tree_code code = TREE_CODE (expr);
916 enum tree_code_class class = TREE_CODE_CLASS (code);
918 if (code == SSA_NAME)
919 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
921 if (code == INTEGER_CST
922 || is_gimple_min_invariant (expr))
925 if (code == ADDR_EXPR)
926 return !for_each_index (&TREE_OPERAND (expr, 0),
927 idx_contains_abnormal_ssa_name_p,
934 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
939 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
951 /* Finds basic ivs. */
954 find_bivs (struct ivopts_data *data)
956 tree phi, step, type, base;
958 struct loop *loop = data->current_loop;
960 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
962 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
965 step = determine_biv_step (phi);
969 if (cst_and_fits_in_hwi (step)
970 && int_cst_value (step) == 0)
973 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
974 if (contains_abnormal_ssa_name_p (base))
977 type = TREE_TYPE (PHI_RESULT (phi));
978 base = fold_convert (type, base);
979 step = fold_convert (type, step);
981 /* FIXME: We do not handle induction variables whose step does
982 not satisfy cst_and_fits_in_hwi. */
983 if (!cst_and_fits_in_hwi (step))
986 set_iv (data, PHI_RESULT (phi), base, step);
993 /* Marks basic ivs. */
996 mark_bivs (struct ivopts_data *data)
999 struct iv *iv, *incr_iv;
1000 struct loop *loop = data->current_loop;
1001 basic_block incr_bb;
1003 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1005 iv = get_iv (data, PHI_RESULT (phi));
1009 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010 incr_iv = get_iv (data, var);
1014 /* If the increment is in the subloop, ignore it. */
1015 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016 if (incr_bb->loop_father != data->current_loop
1017 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1021 incr_iv->biv_p = true;
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026 parameters to BASE and STEP. */
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1030 tree *base, tree *step)
1033 struct loop *loop = data->current_loop;
1038 if (TREE_CODE (stmt) != MODIFY_EXPR)
1041 lhs = TREE_OPERAND (stmt, 0);
1042 if (TREE_CODE (lhs) != SSA_NAME)
1045 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1048 /* FIXME: We do not handle induction variables whose step does
1049 not satisfy cst_and_fits_in_hwi. */
1051 && !cst_and_fits_in_hwi (*step))
1054 if (contains_abnormal_ssa_name_p (*base))
1060 /* Finds general ivs in statement STMT. */
1063 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1067 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1070 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1073 /* Finds general ivs in basic block BB. */
1076 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1078 block_stmt_iterator bsi;
1080 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1081 find_givs_in_stmt (data, bsi_stmt (bsi));
1084 /* Finds general ivs. */
1087 find_givs (struct ivopts_data *data)
1089 struct loop *loop = data->current_loop;
1090 basic_block *body = get_loop_body_in_dom_order (loop);
1093 for (i = 0; i < loop->num_nodes; i++)
1094 find_givs_in_bb (data, body[i]);
1098 /* For each ssa name defined in LOOP determines whether it is an induction
1099 variable and if so, its initial value and step. */
1102 find_induction_variables (struct ivopts_data *data)
1107 if (!find_bivs (data))
1113 if (dump_file && (dump_flags & TDF_DETAILS))
1115 struct tree_niter_desc *niter;
1117 niter = niter_for_single_dom_exit (data);
1121 fprintf (dump_file, " number of iterations ");
1122 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1123 fprintf (dump_file, "\n");
1125 fprintf (dump_file, " may be zero if ");
1126 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1127 fprintf (dump_file, "\n");
1128 fprintf (dump_file, "\n");
1131 fprintf (dump_file, "Induction variables:\n\n");
1133 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1135 if (ver_info (data, i)->iv)
1136 dump_iv (dump_file, ver_info (data, i)->iv);
1143 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1145 static struct iv_use *
1146 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1147 tree stmt, enum use_type use_type)
1149 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1151 use->id = n_iv_uses (data);
1152 use->type = use_type;
1156 use->related_cands = BITMAP_ALLOC (NULL);
1158 /* To avoid showing ssa name in the dumps, if it was not reset by the
1160 iv->ssa_name = NULL_TREE;
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 dump_use (dump_file, use);
1165 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1170 /* Checks whether OP is a loop-level invariant and if so, records it.
1171 NONLINEAR_USE is true if the invariant is used in a way we do not
1172 handle specially. */
1175 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1178 struct version_info *info;
1180 if (TREE_CODE (op) != SSA_NAME
1181 || !is_gimple_reg (op))
1184 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1186 && flow_bb_inside_loop_p (data->current_loop, bb))
1189 info = name_info (data, op);
1191 info->has_nonlin_use |= nonlinear_use;
1193 info->inv_id = ++data->max_inv_id;
1194 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1197 /* Checks whether the use OP is interesting and if so, records it
1200 static struct iv_use *
1201 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1209 if (TREE_CODE (op) != SSA_NAME)
1212 iv = get_iv (data, op);
1216 if (iv->have_use_for)
1218 use = iv_use (data, iv->use_id);
1220 gcc_assert (use->type == USE_NONLINEAR_EXPR
1221 || use->type == USE_OUTER);
1223 if (type == USE_NONLINEAR_EXPR)
1224 use->type = USE_NONLINEAR_EXPR;
1228 if (zero_p (iv->step))
1230 record_invariant (data, op, true);
1233 iv->have_use_for = true;
1235 civ = xmalloc (sizeof (struct iv));
1238 stmt = SSA_NAME_DEF_STMT (op);
1239 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1240 || TREE_CODE (stmt) == MODIFY_EXPR);
1242 use = record_use (data, NULL, civ, stmt, type);
1243 iv->use_id = use->id;
1248 /* Checks whether the use OP is interesting and if so, records it. */
1250 static struct iv_use *
1251 find_interesting_uses_op (struct ivopts_data *data, tree op)
1253 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1256 /* Records a definition of induction variable OP that is used outside of the
1259 static struct iv_use *
1260 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1262 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1265 /* Checks whether the condition *COND_P in STMT is interesting
1266 and if so, records it. */
1269 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1273 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1275 tree zero = integer_zero_node;
1277 const_iv.step = NULL_TREE;
1279 if (integer_zerop (*cond_p)
1280 || integer_nonzerop (*cond_p))
1283 if (TREE_CODE (*cond_p) == SSA_NAME)
1290 op0_p = &TREE_OPERAND (*cond_p, 0);
1291 op1_p = &TREE_OPERAND (*cond_p, 1);
1294 if (TREE_CODE (*op0_p) == SSA_NAME)
1295 iv0 = get_iv (data, *op0_p);
1299 if (TREE_CODE (*op1_p) == SSA_NAME)
1300 iv1 = get_iv (data, *op1_p);
1304 if (/* When comparing with non-invariant value, we may not do any senseful
1305 induction variable elimination. */
1307 /* Eliminating condition based on two ivs would be nontrivial.
1308 ??? TODO -- it is not really important to handle this case. */
1309 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1311 find_interesting_uses_op (data, *op0_p);
1312 find_interesting_uses_op (data, *op1_p);
1316 if (zero_p (iv0->step) && zero_p (iv1->step))
1318 /* If both are invariants, this is a work for unswitching. */
1322 civ = xmalloc (sizeof (struct iv));
1323 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1324 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1327 /* Returns true if expression EXPR is obviously invariant in LOOP,
1328 i.e. if all its operands are defined outside of the LOOP. */
1331 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1336 if (is_gimple_min_invariant (expr))
1339 if (TREE_CODE (expr) == SSA_NAME)
1341 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1343 && flow_bb_inside_loop_p (loop, def_bb))
1352 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1353 for (i = 0; i < len; i++)
1354 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1360 /* Cumulates the steps of indices into DATA and replaces their values with the
1361 initial ones. Returns false when the value of the index cannot be determined.
1362 Callback for for_each_index. */
1364 struct ifs_ivopts_data
1366 struct ivopts_data *ivopts_data;
1372 idx_find_step (tree base, tree *idx, void *data)
1374 struct ifs_ivopts_data *dta = data;
1376 tree step, type, iv_type, iv_step, lbound, off;
1377 struct loop *loop = dta->ivopts_data->current_loop;
1379 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1380 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1383 /* If base is a component ref, require that the offset of the reference
1385 if (TREE_CODE (base) == COMPONENT_REF)
1387 off = component_ref_field_offset (base);
1388 return expr_invariant_in_loop_p (loop, off);
1391 /* If base is array, first check whether we will be able to move the
1392 reference out of the loop (in order to take its address in strength
1393 reduction). In order for this to work we need both lower bound
1394 and step to be loop invariants. */
1395 if (TREE_CODE (base) == ARRAY_REF)
1397 step = array_ref_element_size (base);
1398 lbound = array_ref_low_bound (base);
1400 if (!expr_invariant_in_loop_p (loop, step)
1401 || !expr_invariant_in_loop_p (loop, lbound))
1405 if (TREE_CODE (*idx) != SSA_NAME)
1408 iv = get_iv (dta->ivopts_data, *idx);
1417 iv_type = TREE_TYPE (iv->base);
1418 type = build_pointer_type (TREE_TYPE (base));
1419 if (TREE_CODE (base) == ARRAY_REF)
1421 step = array_ref_element_size (base);
1423 /* We only handle addresses whose step is an integer constant. */
1424 if (TREE_CODE (step) != INTEGER_CST)
1428 /* The step for pointer arithmetics already is 1 byte. */
1429 step = build_int_cst (type, 1);
1431 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1432 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1433 type, iv->base, iv->step, dta->stmt);
1435 iv_step = fold_convert (iv_type, iv->step);
1439 /* The index might wrap. */
1443 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
1446 *dta->step_p = step;
1448 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1449 *dta->step_p, step);
1454 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1455 object is passed to it in DATA. */
1458 idx_record_use (tree base, tree *idx,
1461 find_interesting_uses_op (data, *idx);
1462 if (TREE_CODE (base) == ARRAY_REF)
1464 find_interesting_uses_op (data, array_ref_element_size (base));
1465 find_interesting_uses_op (data, array_ref_low_bound (base));
1470 /* Returns true if memory reference REF may be unaligned. */
1473 may_be_unaligned_p (tree ref)
1477 HOST_WIDE_INT bitsize;
1478 HOST_WIDE_INT bitpos;
1480 enum machine_mode mode;
1481 int unsignedp, volatilep;
1482 unsigned base_align;
1484 /* The test below is basically copy of what expr.c:normal_inner_ref
1485 does to check whether the object must be loaded by parts when
1486 STRICT_ALIGNMENT is true. */
1487 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1488 &unsignedp, &volatilep, true);
1489 base_type = TREE_TYPE (base);
1490 base_align = TYPE_ALIGN (base_type);
1493 && (base_align < GET_MODE_ALIGNMENT (mode)
1494 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1495 || bitpos % BITS_PER_UNIT != 0))
1501 /* Finds addresses in *OP_P inside STMT. */
1504 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1506 tree base = unshare_expr (*op_p), step = NULL;
1508 struct ifs_ivopts_data ifs_ivopts_data;
1510 /* Ignore bitfields for now. Not really something terribly complicated
1512 if (TREE_CODE (base) == COMPONENT_REF
1513 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1516 if (STRICT_ALIGNMENT
1517 && may_be_unaligned_p (base))
1520 ifs_ivopts_data.ivopts_data = data;
1521 ifs_ivopts_data.stmt = stmt;
1522 ifs_ivopts_data.step_p = &step;
1523 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1527 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1528 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1530 if (TREE_CODE (base) == INDIRECT_REF)
1531 base = TREE_OPERAND (base, 0);
1533 base = build_addr (base);
1535 civ = alloc_iv (base, step);
1536 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1540 for_each_index (op_p, idx_record_use, data);
1543 /* Finds and records invariants used in STMT. */
1546 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1548 use_optype uses = NULL;
1552 if (TREE_CODE (stmt) == PHI_NODE)
1553 n = PHI_NUM_ARGS (stmt);
1556 get_stmt_operands (stmt);
1557 uses = STMT_USE_OPS (stmt);
1558 n = NUM_USES (uses);
1561 for (i = 0; i < n; i++)
1563 if (TREE_CODE (stmt) == PHI_NODE)
1564 op = PHI_ARG_DEF (stmt, i);
1566 op = USE_OP (uses, i);
1568 record_invariant (data, op, false);
1572 /* Finds interesting uses of induction variables in the statement STMT. */
1575 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1579 use_optype uses = NULL;
1582 find_invariants_stmt (data, stmt);
1584 if (TREE_CODE (stmt) == COND_EXPR)
1586 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1590 if (TREE_CODE (stmt) == MODIFY_EXPR)
1592 lhs = TREE_OPERAND (stmt, 0);
1593 rhs = TREE_OPERAND (stmt, 1);
1595 if (TREE_CODE (lhs) == SSA_NAME)
1597 /* If the statement defines an induction variable, the uses are not
1598 interesting by themselves. */
1600 iv = get_iv (data, lhs);
1602 if (iv && !zero_p (iv->step))
1606 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1608 case tcc_comparison:
1609 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1613 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1614 if (REFERENCE_CLASS_P (lhs))
1615 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1621 if (REFERENCE_CLASS_P (lhs)
1622 && is_gimple_val (rhs))
1624 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1625 find_interesting_uses_op (data, rhs);
1629 /* TODO -- we should also handle address uses of type
1631 memory = call (whatever);
1638 if (TREE_CODE (stmt) == PHI_NODE
1639 && bb_for_stmt (stmt) == data->current_loop->header)
1641 lhs = PHI_RESULT (stmt);
1642 iv = get_iv (data, lhs);
1644 if (iv && !zero_p (iv->step))
1648 if (TREE_CODE (stmt) == PHI_NODE)
1649 n = PHI_NUM_ARGS (stmt);
1652 uses = STMT_USE_OPS (stmt);
1653 n = NUM_USES (uses);
1656 for (i = 0; i < n; i++)
1658 if (TREE_CODE (stmt) == PHI_NODE)
1659 op = PHI_ARG_DEF (stmt, i);
1661 op = USE_OP (uses, i);
1663 if (TREE_CODE (op) != SSA_NAME)
1666 iv = get_iv (data, op);
1670 find_interesting_uses_op (data, op);
1674 /* Finds interesting uses of induction variables outside of loops
1675 on loop exit edge EXIT. */
1678 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1682 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1684 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1685 find_interesting_uses_outer (data, def);
1689 /* Finds uses of the induction variables that are interesting. */
1692 find_interesting_uses (struct ivopts_data *data)
1695 block_stmt_iterator bsi;
1697 basic_block *body = get_loop_body (data->current_loop);
1699 struct version_info *info;
1702 if (dump_file && (dump_flags & TDF_DETAILS))
1703 fprintf (dump_file, "Uses:\n\n");
1705 for (i = 0; i < data->current_loop->num_nodes; i++)
1710 FOR_EACH_EDGE (e, ei, bb->succs)
1711 if (e->dest != EXIT_BLOCK_PTR
1712 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1713 find_interesting_uses_outside (data, e);
1715 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1716 find_interesting_uses_stmt (data, phi);
1717 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1718 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1721 if (dump_file && (dump_flags & TDF_DETAILS))
1725 fprintf (dump_file, "\n");
1727 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1729 info = ver_info (data, i);
1732 fprintf (dump_file, " ");
1733 print_generic_expr (dump_file, info->name, TDF_SLIM);
1734 fprintf (dump_file, " is invariant (%d)%s\n",
1735 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1739 fprintf (dump_file, "\n");
1745 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1746 is true, assume we are inside an address. */
1749 strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1751 tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1752 enum tree_code code;
1753 tree type, orig_type = TREE_TYPE (expr);
1754 unsigned HOST_WIDE_INT off0, off1, st;
1755 tree orig_expr = expr;
1758 type = TREE_TYPE (expr);
1759 code = TREE_CODE (expr);
1765 if (!cst_and_fits_in_hwi (expr)
1769 *offset = int_cst_value (expr);
1770 return build_int_cst_type (orig_type, 0);
1774 op0 = TREE_OPERAND (expr, 0);
1775 op1 = TREE_OPERAND (expr, 1);
1777 op0 = strip_offset (op0, false, &off0);
1778 op1 = strip_offset (op1, false, &off1);
1780 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1781 if (op0 == TREE_OPERAND (expr, 0)
1782 && op1 == TREE_OPERAND (expr, 1))
1787 else if (zero_p (op0))
1789 if (code == PLUS_EXPR)
1792 expr = build1 (NEGATE_EXPR, type, op1);
1795 expr = build2 (code, type, op0, op1);
1797 return fold_convert (orig_type, expr);
1803 step = array_ref_element_size (expr);
1804 if (!cst_and_fits_in_hwi (step))
1807 st = int_cst_value (step);
1808 op1 = TREE_OPERAND (expr, 1);
1809 op1 = strip_offset (op1, false, &off1);
1810 *offset = off1 * st;
1826 /* Default handling of expressions for that we want to recurse into
1827 the first operand. */
1828 op0 = TREE_OPERAND (expr, 0);
1829 op0 = strip_offset (op0, inside_addr, &off0);
1832 if (op0 == TREE_OPERAND (expr, 0)
1833 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1836 expr = copy_node (expr);
1837 TREE_OPERAND (expr, 0) = op0;
1839 TREE_OPERAND (expr, 1) = op1;
1841 return fold_convert (orig_type, expr);
1844 /* Returns variant of TYPE that can be used as base for different uses.
1845 For integer types, we return unsigned variant of the type, which
1846 avoids problems with overflows. For pointer types, we return void *. */
1849 generic_type_for (tree type)
1851 if (POINTER_TYPE_P (type))
1852 return ptr_type_node;
1854 if (TYPE_UNSIGNED (type))
1857 return unsigned_type_for (type);
1860 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1861 position to POS. If USE is not NULL, the candidate is set as related to
1862 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1863 replacement of the final value of the iv by a direct computation. */
1865 static struct iv_cand *
1866 add_candidate_1 (struct ivopts_data *data,
1867 tree base, tree step, bool important, enum iv_position pos,
1868 struct iv_use *use, tree incremented_at)
1871 struct iv_cand *cand = NULL;
1872 tree type, orig_type;
1876 orig_type = TREE_TYPE (base);
1877 type = generic_type_for (orig_type);
1878 if (type != orig_type)
1880 base = fold_convert (type, base);
1882 step = fold_convert (type, step);
1886 for (i = 0; i < n_iv_cands (data); i++)
1888 cand = iv_cand (data, i);
1890 if (cand->pos != pos)
1893 if (cand->incremented_at != incremented_at)
1907 if (!operand_equal_p (base, cand->iv->base, 0))
1910 if (zero_p (cand->iv->step))
1917 if (step && operand_equal_p (step, cand->iv->step, 0))
1922 if (i == n_iv_cands (data))
1924 cand = xcalloc (1, sizeof (struct iv_cand));
1930 cand->iv = alloc_iv (base, step);
1933 if (pos != IP_ORIGINAL && cand->iv)
1935 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1936 cand->var_after = cand->var_before;
1938 cand->important = important;
1939 cand->incremented_at = incremented_at;
1940 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1942 if (dump_file && (dump_flags & TDF_DETAILS))
1943 dump_cand (dump_file, cand);
1946 if (important && !cand->important)
1948 cand->important = true;
1949 if (dump_file && (dump_flags & TDF_DETAILS))
1950 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1955 bitmap_set_bit (use->related_cands, i);
1956 if (dump_file && (dump_flags & TDF_DETAILS))
1957 fprintf (dump_file, "Candidate %d is related to use %d\n",
1964 /* Returns true if incrementing the induction variable at the end of the LOOP
1967 The purpose is to avoid splitting latch edge with a biv increment, thus
1968 creating a jump, possibly confusing other optimization passes and leaving
1969 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1970 is not available (so we do not have a better alternative), or if the latch
1971 edge is already nonempty. */
1974 allow_ip_end_pos_p (struct loop *loop)
1976 if (!ip_normal_pos (loop))
1979 if (!empty_block_p (ip_end_pos (loop)))
1985 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1986 position to POS. If USE is not NULL, the candidate is set as related to
1987 it. The candidate computation is scheduled on all available positions. */
1990 add_candidate (struct ivopts_data *data,
1991 tree base, tree step, bool important, struct iv_use *use)
1993 if (ip_normal_pos (data->current_loop))
1994 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
1995 if (ip_end_pos (data->current_loop)
1996 && allow_ip_end_pos_p (data->current_loop))
1997 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2000 /* Add a standard "0 + 1 * iteration" iv candidate for a
2001 type with SIZE bits. */
2004 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2007 tree type = lang_hooks.types.type_for_size (size, true);
2008 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2012 /* Adds standard iv candidates. */
2015 add_standard_iv_candidates (struct ivopts_data *data)
2017 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2019 /* The same for a double-integer type if it is still fast enough. */
2020 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2021 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2025 /* Adds candidates bases on the old induction variable IV. */
2028 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2031 struct iv_cand *cand;
2033 add_candidate (data, iv->base, iv->step, true, NULL);
2035 /* The same, but with initial value zero. */
2036 add_candidate (data,
2037 build_int_cst (TREE_TYPE (iv->base), 0),
2038 iv->step, true, NULL);
2040 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2041 if (TREE_CODE (phi) == PHI_NODE)
2043 /* Additionally record the possibility of leaving the original iv
2045 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2046 cand = add_candidate_1 (data,
2047 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2048 SSA_NAME_DEF_STMT (def));
2049 cand->var_before = iv->ssa_name;
2050 cand->var_after = def;
2054 /* Adds candidates based on the old induction variables. */
2057 add_old_ivs_candidates (struct ivopts_data *data)
2063 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2065 iv = ver_info (data, i)->iv;
2066 if (iv && iv->biv_p && !zero_p (iv->step))
2067 add_old_iv_candidates (data, iv);
2071 /* Adds candidates based on the value of the induction variable IV and USE. */
2074 add_iv_value_candidates (struct ivopts_data *data,
2075 struct iv *iv, struct iv_use *use)
2077 add_candidate (data, iv->base, iv->step, false, use);
2079 /* The same, but with initial value zero. */
2080 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2081 iv->step, false, use);
2084 /* Adds candidates based on the address IV and USE. */
2087 add_address_candidates (struct ivopts_data *data,
2088 struct iv *iv, struct iv_use *use)
2091 unsigned HOST_WIDE_INT offset;
2093 /* First, the trivial choices. */
2094 add_iv_value_candidates (data, iv, use);
2096 /* Second, try removing the COMPONENT_REFs. */
2097 if (TREE_CODE (iv->base) == ADDR_EXPR)
2099 base = TREE_OPERAND (iv->base, 0);
2100 while (TREE_CODE (base) == COMPONENT_REF
2101 || (TREE_CODE (base) == ARRAY_REF
2102 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2103 base = TREE_OPERAND (base, 0);
2105 if (base != TREE_OPERAND (iv->base, 0))
2107 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2108 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2110 if (TREE_CODE (base) == INDIRECT_REF)
2111 base = TREE_OPERAND (base, 0);
2113 base = build_addr (base);
2114 add_candidate (data, base, iv->step, false, use);
2118 /* Third, try removing the constant offset. */
2120 base = strip_offset (abase, false, &offset);
2122 add_candidate (data, base, iv->step, false, use);
2125 /* Possibly adds pseudocandidate for replacing the final value of USE by
2126 a direct computation. */
2129 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2131 struct tree_niter_desc *niter;
2133 /* We must know where we exit the loop and how many times does it roll. */
2134 niter = niter_for_single_dom_exit (data);
2136 || !zero_p (niter->may_be_zero))
2139 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2142 /* Adds candidates based on the uses. */
2145 add_derived_ivs_candidates (struct ivopts_data *data)
2149 for (i = 0; i < n_iv_uses (data); i++)
2151 struct iv_use *use = iv_use (data, i);
2158 case USE_NONLINEAR_EXPR:
2160 /* Just add the ivs based on the value of the iv used here. */
2161 add_iv_value_candidates (data, use->iv, use);
2165 add_iv_value_candidates (data, use->iv, use);
2167 /* Additionally, add the pseudocandidate for the possibility to
2168 replace the final value by a direct computation. */
2169 add_iv_outer_candidates (data, use);
2173 add_address_candidates (data, use->iv, use);
2182 /* Record important candidates and add them to related_cands bitmaps
2186 record_important_candidates (struct ivopts_data *data)
2191 for (i = 0; i < n_iv_cands (data); i++)
2193 struct iv_cand *cand = iv_cand (data, i);
2195 if (cand->important)
2196 bitmap_set_bit (data->important_candidates, i);
2199 data->consider_all_candidates = (n_iv_cands (data)
2200 <= CONSIDER_ALL_CANDIDATES_BOUND);
2202 if (data->consider_all_candidates)
2204 /* We will not need "related_cands" bitmaps in this case,
2205 so release them to decrease peak memory consumption. */
2206 for (i = 0; i < n_iv_uses (data); i++)
2208 use = iv_use (data, i);
2209 BITMAP_FREE (use->related_cands);
2214 /* Add important candidates to the related_cands bitmaps. */
2215 for (i = 0; i < n_iv_uses (data); i++)
2216 bitmap_ior_into (iv_use (data, i)->related_cands,
2217 data->important_candidates);
2221 /* Finds the candidates for the induction variables. */
2224 find_iv_candidates (struct ivopts_data *data)
2226 /* Add commonly used ivs. */
2227 add_standard_iv_candidates (data);
2229 /* Add old induction variables. */
2230 add_old_ivs_candidates (data);
2232 /* Add induction variables derived from uses. */
2233 add_derived_ivs_candidates (data);
2235 /* Record the important candidates. */
2236 record_important_candidates (data);
2239 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2240 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2241 we allocate a simple list to every use. */
2244 alloc_use_cost_map (struct ivopts_data *data)
2246 unsigned i, size, s, j;
2248 for (i = 0; i < n_iv_uses (data); i++)
2250 struct iv_use *use = iv_use (data, i);
2253 if (data->consider_all_candidates)
2254 size = n_iv_cands (data);
2258 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2263 /* Round up to the power of two, so that moduling by it is fast. */
2264 for (size = 1; size < s; size <<= 1)
2268 use->n_map_members = size;
2269 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2273 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2274 on invariants DEPENDS_ON. */
2277 set_use_iv_cost (struct ivopts_data *data,
2278 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2285 BITMAP_FREE (depends_on);
2289 if (data->consider_all_candidates)
2291 use->cost_map[cand->id].cand = cand;
2292 use->cost_map[cand->id].cost = cost;
2293 use->cost_map[cand->id].depends_on = depends_on;
2297 /* n_map_members is a power of two, so this computes modulo. */
2298 s = cand->id & (use->n_map_members - 1);
2299 for (i = s; i < use->n_map_members; i++)
2300 if (!use->cost_map[i].cand)
2302 for (i = 0; i < s; i++)
2303 if (!use->cost_map[i].cand)
2309 use->cost_map[i].cand = cand;
2310 use->cost_map[i].cost = cost;
2311 use->cost_map[i].depends_on = depends_on;
2314 /* Gets cost of (USE, CANDIDATE) pair. */
2316 static struct cost_pair *
2317 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2318 struct iv_cand *cand)
2321 struct cost_pair *ret;
2326 if (data->consider_all_candidates)
2328 ret = use->cost_map + cand->id;
2335 /* n_map_members is a power of two, so this computes modulo. */
2336 s = cand->id & (use->n_map_members - 1);
2337 for (i = s; i < use->n_map_members; i++)
2338 if (use->cost_map[i].cand == cand)
2339 return use->cost_map + i;
2341 for (i = 0; i < s; i++)
2342 if (use->cost_map[i].cand == cand)
2343 return use->cost_map + i;
2348 /* Returns estimate on cost of computing SEQ. */
2356 for (; seq; seq = NEXT_INSN (seq))
2358 set = single_set (seq);
2360 cost += rtx_cost (set, SET);
2368 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2370 produce_memory_decl_rtl (tree obj, int *regno)
2375 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2377 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2378 x = gen_rtx_SYMBOL_REF (Pmode, name);
2381 x = gen_raw_REG (Pmode, (*regno)++);
2383 return gen_rtx_MEM (DECL_MODE (obj), x);
2386 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2387 walk_tree. DATA contains the actual fake register number. */
2390 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2392 tree obj = NULL_TREE;
2396 switch (TREE_CODE (*expr_p))
2399 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2400 handled_component_p (*expr_p);
2401 expr_p = &TREE_OPERAND (*expr_p, 0))
2405 x = produce_memory_decl_rtl (obj, regno);
2410 obj = SSA_NAME_VAR (*expr_p);
2411 if (!DECL_RTL_SET_P (obj))
2412 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2421 if (DECL_RTL_SET_P (obj))
2424 if (DECL_MODE (obj) == BLKmode)
2425 x = produce_memory_decl_rtl (obj, regno);
2427 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2437 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2438 SET_DECL_RTL (obj, x);
2444 /* Determines cost of the computation of EXPR. */
2447 computation_cost (tree expr)
2450 tree type = TREE_TYPE (expr);
2452 /* Avoid using hard regs in ways which may be unsupported. */
2453 int regno = LAST_VIRTUAL_REGISTER + 1;
2455 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2457 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2461 cost = seq_cost (seq);
2462 if (GET_CODE (rslt) == MEM)
2463 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2468 /* Returns variable containing the value of candidate CAND at statement AT. */
2471 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2473 if (stmt_after_increment (loop, cand, stmt))
2474 return cand->var_after;
2476 return cand->var_before;
2479 /* Determines the expression by that USE is expressed from induction variable
2480 CAND at statement AT in LOOP. */
2483 get_computation_at (struct loop *loop,
2484 struct iv_use *use, struct iv_cand *cand, tree at)
2486 tree ubase = use->iv->base;
2487 tree ustep = use->iv->step;
2488 tree cbase = cand->iv->base;
2489 tree cstep = cand->iv->step;
2490 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2494 unsigned HOST_WIDE_INT ustepi, cstepi;
2495 HOST_WIDE_INT ratioi;
2497 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2499 /* We do not have a precision to express the values of use. */
2503 expr = var_at_stmt (loop, cand, at);
2505 if (TREE_TYPE (expr) != ctype)
2507 /* This may happen with the original ivs. */
2508 expr = fold_convert (ctype, expr);
2511 if (TYPE_UNSIGNED (utype))
2515 uutype = unsigned_type_for (utype);
2516 ubase = fold_convert (uutype, ubase);
2517 ustep = fold_convert (uutype, ustep);
2520 if (uutype != ctype)
2522 expr = fold_convert (uutype, expr);
2523 cbase = fold_convert (uutype, cbase);
2524 cstep = fold_convert (uutype, cstep);
2527 if (!cst_and_fits_in_hwi (cstep)
2528 || !cst_and_fits_in_hwi (ustep))
2531 ustepi = int_cst_value (ustep);
2532 cstepi = int_cst_value (cstep);
2534 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2536 /* TODO maybe consider case when ustep divides cstep and the ratio is
2537 a power of 2 (so that the division is fast to execute)? We would
2538 need to be much more careful with overflows etc. then. */
2542 /* We may need to shift the value if we are after the increment. */
2543 if (stmt_after_increment (loop, cand, at))
2544 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2546 /* use = ubase - ratio * cbase + ratio * var.
2548 In general case ubase + ratio * (var - cbase) could be better (one less
2549 multiplication), but often it is possible to eliminate redundant parts
2550 of computations from (ubase - ratio * cbase) term, and if it does not
2551 happen, fold is able to apply the distributive law to obtain this form
2556 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2557 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2559 else if (ratioi == -1)
2561 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2562 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2566 ratio = build_int_cst_type (uutype, ratioi);
2567 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2568 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2569 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2570 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2573 return fold_convert (utype, expr);
2576 /* Determines the expression by that USE is expressed from induction variable
2580 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2582 return get_computation_at (loop, use, cand, use->stmt);
2585 /* Returns cost of addition in MODE. */
2588 add_cost (enum machine_mode mode)
2590 static unsigned costs[NUM_MACHINE_MODES];
2598 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2599 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2600 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2605 cost = seq_cost (seq);
2611 if (dump_file && (dump_flags & TDF_DETAILS))
2612 fprintf (dump_file, "Addition in %s costs %d\n",
2613 GET_MODE_NAME (mode), cost);
2617 /* Entry in a hashtable of already known costs for multiplication. */
2620 HOST_WIDE_INT cst; /* The constant to multiply by. */
2621 enum machine_mode mode; /* In mode. */
2622 unsigned cost; /* The cost. */
2625 /* Counts hash value for the ENTRY. */
2628 mbc_entry_hash (const void *entry)
2630 const struct mbc_entry *e = entry;
2632 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2635 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2638 mbc_entry_eq (const void *entry1, const void *entry2)
2640 const struct mbc_entry *e1 = entry1;
2641 const struct mbc_entry *e2 = entry2;
2643 return (e1->mode == e2->mode
2644 && e1->cst == e2->cst);
2647 /* Returns cost of multiplication by constant CST in MODE. */
2650 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2652 static htab_t costs;
2653 struct mbc_entry **cached, act;
2658 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2662 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2664 return (*cached)->cost;
2666 *cached = xmalloc (sizeof (struct mbc_entry));
2667 (*cached)->mode = mode;
2668 (*cached)->cst = cst;
2671 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2676 cost = seq_cost (seq);
2678 if (dump_file && (dump_flags & TDF_DETAILS))
2679 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2680 (int) cst, GET_MODE_NAME (mode), cost);
2682 (*cached)->cost = cost;
2687 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2688 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2689 variable is omitted. The created memory accesses MODE.
2691 TODO -- there must be some better way. This all is quite crude. */
2694 get_address_cost (bool symbol_present, bool var_present,
2695 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2697 #define MAX_RATIO 128
2698 static sbitmap valid_mult;
2699 static HOST_WIDE_INT rat, off;
2700 static HOST_WIDE_INT min_offset, max_offset;
2701 static unsigned costs[2][2][2][2];
2702 unsigned cost, acost;
2703 rtx seq, addr, base;
2704 bool offset_p, ratio_p;
2706 HOST_WIDE_INT s_offset;
2707 unsigned HOST_WIDE_INT mask;
2714 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2716 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2717 for (i = 1; i <= 1 << 20; i <<= 1)
2719 XEXP (addr, 1) = GEN_INT (i);
2720 if (!memory_address_p (Pmode, addr))
2723 max_offset = i >> 1;
2726 for (i = 1; i <= 1 << 20; i <<= 1)
2728 XEXP (addr, 1) = GEN_INT (-i);
2729 if (!memory_address_p (Pmode, addr))
2732 min_offset = -(i >> 1);
2734 if (dump_file && (dump_flags & TDF_DETAILS))
2736 fprintf (dump_file, "get_address_cost:\n");
2737 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2738 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2741 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2742 sbitmap_zero (valid_mult);
2744 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2745 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2747 XEXP (addr, 1) = GEN_INT (i);
2748 if (memory_address_p (Pmode, addr))
2750 SET_BIT (valid_mult, i + MAX_RATIO);
2755 if (dump_file && (dump_flags & TDF_DETAILS))
2757 fprintf (dump_file, " allowed multipliers:");
2758 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2759 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2760 fprintf (dump_file, " %d", (int) i);
2761 fprintf (dump_file, "\n");
2762 fprintf (dump_file, "\n");
2766 bits = GET_MODE_BITSIZE (Pmode);
2767 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2769 if ((offset >> (bits - 1) & 1))
2774 offset_p = (s_offset != 0
2775 && min_offset <= s_offset && s_offset <= max_offset);
2776 ratio_p = (ratio != 1
2777 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2778 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2780 if (ratio != 1 && !ratio_p)
2781 cost += multiply_by_cost (ratio, Pmode);
2783 if (s_offset && !offset_p && !symbol_present)
2785 cost += add_cost (Pmode);
2789 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2794 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2795 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2797 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2800 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2804 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2806 base = gen_rtx_fmt_e (CONST, Pmode,
2807 gen_rtx_fmt_ee (PLUS, Pmode,
2812 base = GEN_INT (off);
2817 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2820 addr = memory_address (Pmode, addr);
2824 acost = seq_cost (seq);
2825 acost += address_cost (addr, Pmode);
2829 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2832 return cost + acost;
2835 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2836 the bitmap to that we should store it. */
2838 static struct ivopts_data *fd_ivopts_data;
2840 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2842 bitmap *depends_on = data;
2843 struct version_info *info;
2845 if (TREE_CODE (*expr_p) != SSA_NAME)
2847 info = name_info (fd_ivopts_data, *expr_p);
2849 if (!info->inv_id || info->has_nonlin_use)
2853 *depends_on = BITMAP_ALLOC (NULL);
2854 bitmap_set_bit (*depends_on, info->inv_id);
2859 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
2860 invariants the computation depends on. */
2863 force_var_cost (struct ivopts_data *data,
2864 tree expr, bitmap *depends_on)
2866 static bool costs_initialized = false;
2867 static unsigned integer_cost;
2868 static unsigned symbol_cost;
2869 static unsigned address_cost;
2871 unsigned cost0, cost1, cost;
2872 enum machine_mode mode;
2874 if (!costs_initialized)
2876 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2877 rtx x = gen_rtx_MEM (DECL_MODE (var),
2878 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2880 tree type = build_pointer_type (integer_type_node);
2882 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2885 SET_DECL_RTL (var, x);
2886 TREE_STATIC (var) = 1;
2887 addr = build1 (ADDR_EXPR, type, var);
2888 symbol_cost = computation_cost (addr) + 1;
2891 = computation_cost (build2 (PLUS_EXPR, type,
2893 build_int_cst_type (type, 2000))) + 1;
2894 if (dump_file && (dump_flags & TDF_DETAILS))
2896 fprintf (dump_file, "force_var_cost:\n");
2897 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2898 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2899 fprintf (dump_file, " address %d\n", (int) address_cost);
2900 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2901 fprintf (dump_file, "\n");
2904 costs_initialized = true;
2911 fd_ivopts_data = data;
2912 walk_tree (&expr, find_depends, depends_on, NULL);
2915 if (SSA_VAR_P (expr))
2918 if (TREE_INVARIANT (expr))
2920 if (TREE_CODE (expr) == INTEGER_CST)
2921 return integer_cost;
2923 if (TREE_CODE (expr) == ADDR_EXPR)
2925 tree obj = TREE_OPERAND (expr, 0);
2927 if (TREE_CODE (obj) == VAR_DECL
2928 || TREE_CODE (obj) == PARM_DECL
2929 || TREE_CODE (obj) == RESULT_DECL)
2933 return address_cost;
2936 switch (TREE_CODE (expr))
2941 op0 = TREE_OPERAND (expr, 0);
2942 op1 = TREE_OPERAND (expr, 1);
2946 if (is_gimple_val (op0))
2949 cost0 = force_var_cost (data, op0, NULL);
2951 if (is_gimple_val (op1))
2954 cost1 = force_var_cost (data, op1, NULL);
2959 /* Just an arbitrary value, FIXME. */
2960 return target_spill_cost;
2963 mode = TYPE_MODE (TREE_TYPE (expr));
2964 switch (TREE_CODE (expr))
2968 cost = add_cost (mode);
2972 if (cst_and_fits_in_hwi (op0))
2973 cost = multiply_by_cost (int_cst_value (op0), mode);
2974 else if (cst_and_fits_in_hwi (op1))
2975 cost = multiply_by_cost (int_cst_value (op1), mode);
2977 return target_spill_cost;
2987 /* Bound the cost by target_spill_cost. The parts of complicated
2988 computations often are either loop invariant or at least can
2989 be shared between several iv uses, so letting this grow without
2990 limits would not give reasonable results. */
2991 return cost < target_spill_cost ? cost : target_spill_cost;
2994 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
2995 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2996 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2997 invariants the computation depends on. */
3000 split_address_cost (struct ivopts_data *data,
3001 tree addr, bool *symbol_present, bool *var_present,
3002 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3005 HOST_WIDE_INT bitsize;
3006 HOST_WIDE_INT bitpos;
3008 enum machine_mode mode;
3009 int unsignedp, volatilep;
3011 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3012 &unsignedp, &volatilep, false);
3015 || bitpos % BITS_PER_UNIT != 0
3016 || TREE_CODE (core) != VAR_DECL)
3018 *symbol_present = false;
3019 *var_present = true;
3020 fd_ivopts_data = data;
3021 walk_tree (&addr, find_depends, depends_on, NULL);
3022 return target_spill_cost;
3025 *offset += bitpos / BITS_PER_UNIT;
3026 if (TREE_STATIC (core)
3027 || DECL_EXTERNAL (core))
3029 *symbol_present = true;
3030 *var_present = false;
3034 *symbol_present = false;
3035 *var_present = true;
3039 /* Estimates cost of expressing difference of addresses E1 - E2 as
3040 var + symbol + offset. The value of offset is added to OFFSET,
3041 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3042 part is missing. DEPENDS_ON is a set of the invariants the computation
3046 ptr_difference_cost (struct ivopts_data *data,
3047 tree e1, tree e2, bool *symbol_present, bool *var_present,
3048 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3050 HOST_WIDE_INT diff = 0;
3053 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3055 if (ptr_difference_const (e1, e2, &diff))
3058 *symbol_present = false;
3059 *var_present = false;
3063 if (e2 == integer_zero_node)
3064 return split_address_cost (data, TREE_OPERAND (e1, 0),
3065 symbol_present, var_present, offset, depends_on);
3067 *symbol_present = false;
3068 *var_present = true;
3070 cost = force_var_cost (data, e1, depends_on);
3071 cost += force_var_cost (data, e2, depends_on);
3072 cost += add_cost (Pmode);
3077 /* Estimates cost of expressing difference E1 - E2 as
3078 var + symbol + offset. The value of offset is added to OFFSET,
3079 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3080 part is missing. DEPENDS_ON is a set of the invariants the computation
3084 difference_cost (struct ivopts_data *data,
3085 tree e1, tree e2, bool *symbol_present, bool *var_present,
3086 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3089 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3090 unsigned HOST_WIDE_INT off1, off2;
3092 e1 = strip_offset (e1, false, &off1);
3093 e2 = strip_offset (e2, false, &off2);
3094 *offset += off1 - off2;
3099 if (TREE_CODE (e1) == ADDR_EXPR)
3100 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3102 *symbol_present = false;
3104 if (operand_equal_p (e1, e2, 0))
3106 *var_present = false;
3109 *var_present = true;
3111 return force_var_cost (data, e1, depends_on);
3115 cost = force_var_cost (data, e2, depends_on);
3116 cost += multiply_by_cost (-1, mode);
3121 cost = force_var_cost (data, e1, depends_on);
3122 cost += force_var_cost (data, e2, depends_on);
3123 cost += add_cost (mode);
3128 /* Determines the cost of the computation by that USE is expressed
3129 from induction variable CAND. If ADDRESS_P is true, we just need
3130 to create an address from it, otherwise we want to get it into
3131 register. A set of invariants we depend on is stored in
3132 DEPENDS_ON. AT is the statement at that the value is computed. */
3135 get_computation_cost_at (struct ivopts_data *data,
3136 struct iv_use *use, struct iv_cand *cand,
3137 bool address_p, bitmap *depends_on, tree at)
3139 tree ubase = use->iv->base, ustep = use->iv->step;
3141 tree utype = TREE_TYPE (ubase), ctype;
3142 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3143 HOST_WIDE_INT ratio, aratio;
3144 bool var_present, symbol_present;
3145 unsigned cost = 0, n_sums;
3149 /* Only consider real candidates. */
3153 cbase = cand->iv->base;
3154 cstep = cand->iv->step;
3155 ctype = TREE_TYPE (cbase);
3157 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3159 /* We do not have a precision to express the values of use. */
3165 /* Do not try to express address of an object with computation based
3166 on address of a different object. This may cause problems in rtl
3167 level alias analysis (that does not expect this to be happening,
3168 as this is illegal in C), and would be unlikely to be useful
3170 if (use->iv->base_object
3171 && cand->iv->base_object
3172 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3176 if (!cst_and_fits_in_hwi (ustep)
3177 || !cst_and_fits_in_hwi (cstep))
3180 if (TREE_CODE (ubase) == INTEGER_CST
3181 && !cst_and_fits_in_hwi (ubase))
3184 if (TREE_CODE (cbase) == INTEGER_CST
3185 && !cst_and_fits_in_hwi (cbase))
3188 ustepi = int_cst_value (ustep);
3189 cstepi = int_cst_value (cstep);
3191 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3193 /* TODO -- add direct handling of this case. */
3197 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3200 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3201 or ratio == 1, it is better to handle this like
3203 ubase - ratio * cbase + ratio * var
3205 (also holds in the case ratio == -1, TODO. */
3207 if (TREE_CODE (cbase) == INTEGER_CST)
3209 offset = - ratio * int_cst_value (cbase);
3210 cost += difference_cost (data,
3211 ubase, integer_zero_node,
3212 &symbol_present, &var_present, &offset,
3215 else if (ratio == 1)
3217 cost += difference_cost (data,
3219 &symbol_present, &var_present, &offset,
3224 cost += force_var_cost (data, cbase, depends_on);
3225 cost += add_cost (TYPE_MODE (ctype));
3226 cost += difference_cost (data,
3227 ubase, integer_zero_node,
3228 &symbol_present, &var_present, &offset,
3232 /* If we are after the increment, the value of the candidate is higher by
3234 if (stmt_after_increment (data->current_loop, cand, at))
3235 offset -= ratio * cstepi;
3237 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3238 (symbol/var/const parts may be omitted). If we are looking for an address,
3239 find the cost of addressing this. */
3241 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3243 /* Otherwise estimate the costs for computing the expression. */
3244 aratio = ratio > 0 ? ratio : -ratio;
3245 if (!symbol_present && !var_present && !offset)
3248 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3254 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3258 /* Symbol + offset should be compile-time computable. */
3259 && (symbol_present || offset))
3262 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3266 /* Just get the expression, expand it and measure the cost. */
3267 tree comp = get_computation_at (data->current_loop, use, cand, at);
3273 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3275 return computation_cost (comp);
3279 /* Determines the cost of the computation by that USE is expressed
3280 from induction variable CAND. If ADDRESS_P is true, we just need
3281 to create an address from it, otherwise we want to get it into
3282 register. A set of invariants we depend on is stored in
3286 get_computation_cost (struct ivopts_data *data,
3287 struct iv_use *use, struct iv_cand *cand,
3288 bool address_p, bitmap *depends_on)
3290 return get_computation_cost_at (data,
3291 use, cand, address_p, depends_on, use->stmt);
3294 /* Determines cost of basing replacement of USE on CAND in a generic
3298 determine_use_iv_cost_generic (struct ivopts_data *data,
3299 struct iv_use *use, struct iv_cand *cand)
3304 /* The simple case first -- if we need to express value of the preserved
3305 original biv, the cost is 0. This also prevents us from counting the
3306 cost of increment twice -- once at this use and once in the cost of
3308 if (cand->pos == IP_ORIGINAL
3309 && cand->incremented_at == use->stmt)
3311 set_use_iv_cost (data, use, cand, 0, NULL);
3315 cost = get_computation_cost (data, use, cand, false, &depends_on);
3316 set_use_iv_cost (data, use, cand, cost, depends_on);
3318 return cost != INFTY;
3321 /* Determines cost of basing replacement of USE on CAND in an address. */
3324 determine_use_iv_cost_address (struct ivopts_data *data,
3325 struct iv_use *use, struct iv_cand *cand)
3328 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3330 set_use_iv_cost (data, use, cand, cost, depends_on);
3332 return cost != INFTY;
3335 /* Computes value of induction variable IV in iteration NITER. */
3338 iv_value (struct iv *iv, tree niter)
3341 tree type = TREE_TYPE (iv->base);
3343 niter = fold_convert (type, niter);
3344 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
3346 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3349 /* Computes value of candidate CAND at position AT in iteration NITER. */
3352 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3354 tree val = iv_value (cand->iv, niter);
3355 tree type = TREE_TYPE (cand->iv->base);
3357 if (stmt_after_increment (loop, cand, at))
3358 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3363 /* Returns period of induction variable iv. */
3366 iv_period (struct iv *iv)
3368 tree step = iv->step, period, type;
3371 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3373 /* Period of the iv is gcd (step, type range). Since type range is power
3374 of two, it suffices to determine the maximum power of two that divides
3376 pow2div = num_ending_zeros (step);
3377 type = unsigned_type_for (TREE_TYPE (step));
3379 period = build_low_bits_mask (type,
3380 (TYPE_PRECISION (type)
3381 - tree_low_cst (pow2div, 1)));
3386 /* Check whether it is possible to express the condition in USE by comparison
3387 of candidate CAND. If so, store the comparison code to COMPARE and the
3388 value compared with to BOUND. */
3391 may_eliminate_iv (struct ivopts_data *data,
3392 struct iv_use *use, struct iv_cand *cand,
3393 enum tree_code *compare, tree *bound)
3397 struct tree_niter_desc *niter;
3399 tree wider_type, period, per_type;
3400 struct loop *loop = data->current_loop;
3402 /* For now works only for exits that dominate the loop latch. TODO -- extend
3403 for other conditions inside loop body. */
3404 ex_bb = bb_for_stmt (use->stmt);
3405 if (use->stmt != last_stmt (ex_bb)
3406 || TREE_CODE (use->stmt) != COND_EXPR)
3408 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3411 exit = EDGE_SUCC (ex_bb, 0);
3412 if (flow_bb_inside_loop_p (loop, exit->dest))
3413 exit = EDGE_SUCC (ex_bb, 1);
3414 if (flow_bb_inside_loop_p (loop, exit->dest))
3417 niter = niter_for_exit (data, exit);
3419 || !zero_p (niter->may_be_zero))
3423 nit_type = TREE_TYPE (nit);
3425 /* Determine whether we may use the variable to test whether niter iterations
3426 elapsed. This is the case iff the period of the induction variable is
3427 greater than the number of iterations. */
3428 period = iv_period (cand->iv);
3431 per_type = TREE_TYPE (period);
3433 wider_type = TREE_TYPE (period);
3434 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3435 wider_type = per_type;
3437 wider_type = nit_type;
3439 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3440 fold_convert (wider_type, period),
3441 fold_convert (wider_type, nit)))))
3444 if (exit->flags & EDGE_TRUE_VALUE)
3449 *bound = cand_value_at (loop, cand, use->stmt, nit);
3453 /* Determines cost of basing replacement of USE on CAND in a condition. */
3456 determine_use_iv_cost_condition (struct ivopts_data *data,
3457 struct iv_use *use, struct iv_cand *cand)
3460 enum tree_code compare;
3462 /* Only consider real candidates. */
3465 set_use_iv_cost (data, use, cand, INFTY, NULL);
3469 if (may_eliminate_iv (data, use, cand, &compare, &bound))
3471 bitmap depends_on = NULL;
3472 unsigned cost = force_var_cost (data, bound, &depends_on);
3474 set_use_iv_cost (data, use, cand, cost, depends_on);
3475 return cost != INFTY;
3478 /* The induction variable elimination failed; just express the original
3479 giv. If it is compared with an invariant, note that we cannot get
3481 if (TREE_CODE (*use->op_p) == SSA_NAME)
3482 record_invariant (data, *use->op_p, true);
3485 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3486 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3489 return determine_use_iv_cost_generic (data, use, cand);
3492 /* Checks whether it is possible to replace the final value of USE by
3493 a direct computation. If so, the formula is stored to *VALUE. */
3496 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3499 struct loop *loop = data->current_loop;
3501 struct tree_niter_desc *niter;
3503 exit = single_dom_exit (loop);
3507 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3508 bb_for_stmt (use->stmt)));
3510 niter = niter_for_single_dom_exit (data);
3512 || !zero_p (niter->may_be_zero))
3515 *value = iv_value (use->iv, niter->niter);
3520 /* Determines cost of replacing final value of USE using CAND. */
3523 determine_use_iv_cost_outer (struct ivopts_data *data,
3524 struct iv_use *use, struct iv_cand *cand)
3530 struct loop *loop = data->current_loop;
3532 /* The simple case first -- if we need to express value of the preserved
3533 original biv, the cost is 0. This also prevents us from counting the
3534 cost of increment twice -- once at this use and once in the cost of
3536 if (cand->pos == IP_ORIGINAL
3537 && cand->incremented_at == use->stmt)
3539 set_use_iv_cost (data, use, cand, 0, NULL);
3545 if (!may_replace_final_value (data, use, &value))
3547 set_use_iv_cost (data, use, cand, INFTY, NULL);
3552 cost = force_var_cost (data, value, &depends_on);
3554 cost /= AVG_LOOP_NITER (loop);
3556 set_use_iv_cost (data, use, cand, cost, depends_on);
3557 return cost != INFTY;
3560 exit = single_dom_exit (loop);
3563 /* If there is just a single exit, we may use value of the candidate
3564 after we take it to determine the value of use. */
3565 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3566 last_stmt (exit->src));
3568 cost /= AVG_LOOP_NITER (loop);
3572 /* Otherwise we just need to compute the iv. */
3573 cost = get_computation_cost (data, use, cand, false, &depends_on);
3576 set_use_iv_cost (data, use, cand, cost, depends_on);
3578 return cost != INFTY;
3581 /* Determines cost of basing replacement of USE on CAND. Returns false
3582 if USE cannot be based on CAND. */
3585 determine_use_iv_cost (struct ivopts_data *data,
3586 struct iv_use *use, struct iv_cand *cand)
3590 case USE_NONLINEAR_EXPR:
3591 return determine_use_iv_cost_generic (data, use, cand);
3594 return determine_use_iv_cost_outer (data, use, cand);
3597 return determine_use_iv_cost_address (data, use, cand);
3600 return determine_use_iv_cost_condition (data, use, cand);
3607 /* Determines costs of basing the use of the iv on an iv candidate. */
3610 determine_use_iv_costs (struct ivopts_data *data)
3614 struct iv_cand *cand;
3615 bitmap to_clear = BITMAP_ALLOC (NULL);
3617 alloc_use_cost_map (data);
3619 for (i = 0; i < n_iv_uses (data); i++)
3621 use = iv_use (data, i);
3623 if (data->consider_all_candidates)
3625 for (j = 0; j < n_iv_cands (data); j++)
3627 cand = iv_cand (data, j);
3628 determine_use_iv_cost (data, use, cand);
3635 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3637 cand = iv_cand (data, j);
3638 if (!determine_use_iv_cost (data, use, cand))
3639 bitmap_set_bit (to_clear, j);
3642 /* Remove the candidates for that the cost is infinite from
3643 the list of related candidates. */
3644 bitmap_and_compl_into (use->related_cands, to_clear);
3645 bitmap_clear (to_clear);
3649 BITMAP_FREE (to_clear);
3651 if (dump_file && (dump_flags & TDF_DETAILS))
3653 fprintf (dump_file, "Use-candidate costs:\n");
3655 for (i = 0; i < n_iv_uses (data); i++)
3657 use = iv_use (data, i);
3659 fprintf (dump_file, "Use %d:\n", i);
3660 fprintf (dump_file, " cand\tcost\tdepends on\n");
3661 for (j = 0; j < use->n_map_members; j++)
3663 if (!use->cost_map[j].cand
3664 || use->cost_map[j].cost == INFTY)
3667 fprintf (dump_file, " %d\t%d\t",
3668 use->cost_map[j].cand->id,
3669 use->cost_map[j].cost);
3670 if (use->cost_map[j].depends_on)
3671 bitmap_print (dump_file,
3672 use->cost_map[j].depends_on, "","");
3673 fprintf (dump_file, "\n");
3676 fprintf (dump_file, "\n");
3678 fprintf (dump_file, "\n");
3682 /* Determines cost of the candidate CAND. */
3685 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3687 unsigned cost_base, cost_step;
3696 /* There are two costs associated with the candidate -- its increment
3697 and its initialization. The second is almost negligible for any loop
3698 that rolls enough, so we take it just very little into account. */
3700 base = cand->iv->base;
3701 cost_base = force_var_cost (data, base, NULL);
3702 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3704 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3706 /* Prefer the original iv unless we may gain something by replacing it;
3707 this is not really relevant for artificial ivs created by other
3709 if (cand->pos == IP_ORIGINAL
3710 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3713 /* Prefer not to insert statements into latch unless there are some
3714 already (so that we do not create unnecessary jumps). */
3715 if (cand->pos == IP_END
3716 && empty_block_p (ip_end_pos (data->current_loop)))
3720 /* Determines costs of computation of the candidates. */
3723 determine_iv_costs (struct ivopts_data *data)
3727 if (dump_file && (dump_flags & TDF_DETAILS))
3729 fprintf (dump_file, "Candidate costs:\n");
3730 fprintf (dump_file, " cand\tcost\n");
3733 for (i = 0; i < n_iv_cands (data); i++)
3735 struct iv_cand *cand = iv_cand (data, i);
3737 determine_iv_cost (data, cand);
3739 if (dump_file && (dump_flags & TDF_DETAILS))
3740 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3743 if (dump_file && (dump_flags & TDF_DETAILS))
3744 fprintf (dump_file, "\n");
3747 /* Calculates cost for having SIZE induction variables. */
3750 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3752 return global_cost_for_size (size,
3753 loop_data (data->current_loop)->regs_used,
3757 /* For each size of the induction variable set determine the penalty. */
3760 determine_set_costs (struct ivopts_data *data)
3764 struct loop *loop = data->current_loop;
3767 /* We use the following model (definitely improvable, especially the
3768 cost function -- TODO):
3770 We estimate the number of registers available (using MD data), name it A.
3772 We estimate the number of registers used by the loop, name it U. This
3773 number is obtained as the number of loop phi nodes (not counting virtual
3774 registers and bivs) + the number of variables from outside of the loop.
3776 We set a reserve R (free regs that are used for temporary computations,
3777 etc.). For now the reserve is a constant 3.
3779 Let I be the number of induction variables.
3781 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3782 make a lot of ivs without a reason).
3783 -- if A - R < U + I <= A, the cost is I * PRES_COST
3784 -- if U + I > A, the cost is I * PRES_COST and
3785 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3787 if (dump_file && (dump_flags & TDF_DETAILS))
3789 fprintf (dump_file, "Global costs:\n");
3790 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3791 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3792 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3793 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3797 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3799 op = PHI_RESULT (phi);
3801 if (!is_gimple_reg (op))
3804 if (get_iv (data, op))
3810 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3812 struct version_info *info = ver_info (data, j);
3814 if (info->inv_id && info->has_nonlin_use)
3818 loop_data (loop)->regs_used = n;
3819 if (dump_file && (dump_flags & TDF_DETAILS))
3820 fprintf (dump_file, " regs_used %d\n", n);
3822 if (dump_file && (dump_flags & TDF_DETAILS))
3824 fprintf (dump_file, " cost for size:\n");
3825 fprintf (dump_file, " ivs\tcost\n");
3826 for (j = 0; j <= 2 * target_avail_regs; j++)
3827 fprintf (dump_file, " %d\t%d\n", j,
3828 ivopts_global_cost_for_size (data, j));
3829 fprintf (dump_file, "\n");
3833 /* Returns true if A is a cheaper cost pair than B. */
3836 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3844 if (a->cost < b->cost)
3847 if (a->cost > b->cost)
3850 /* In case the costs are the same, prefer the cheaper candidate. */
3851 if (a->cand->cost < b->cand->cost)
3857 /* Computes the cost field of IVS structure. */
3860 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3864 cost += ivs->cand_use_cost;
3865 cost += ivs->cand_cost;
3866 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3871 /* Set USE not to be expressed by any candidate in IVS. */
3874 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3877 unsigned uid = use->id, cid, iid;
3879 struct cost_pair *cp;
3882 cp = ivs->cand_for_use[uid];
3888 ivs->cand_for_use[uid] = NULL;
3889 ivs->n_cand_uses[cid]--;
3891 if (ivs->n_cand_uses[cid] == 0)
3893 bitmap_clear_bit (ivs->cands, cid);
3894 /* Do not count the pseudocandidates. */
3898 ivs->cand_cost -= cp->cand->cost;
3901 ivs->cand_use_cost -= cp->cost;
3903 deps = cp->depends_on;
3907 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3909 ivs->n_invariant_uses[iid]--;
3910 if (ivs->n_invariant_uses[iid] == 0)
3915 iv_ca_recount_cost (data, ivs);
3918 /* Set cost pair for USE in set IVS to CP. */
3921 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3922 struct iv_use *use, struct cost_pair *cp)
3924 unsigned uid = use->id, cid, iid;
3928 if (ivs->cand_for_use[uid] == cp)
3931 if (ivs->cand_for_use[uid])
3932 iv_ca_set_no_cp (data, ivs, use);
3939 ivs->cand_for_use[uid] = cp;
3940 ivs->n_cand_uses[cid]++;
3941 if (ivs->n_cand_uses[cid] == 1)
3943 bitmap_set_bit (ivs->cands, cid);
3944 /* Do not count the pseudocandidates. */
3948 ivs->cand_cost += cp->cand->cost;
3951 ivs->cand_use_cost += cp->cost;
3953 deps = cp->depends_on;
3957 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3959 ivs->n_invariant_uses[iid]++;
3960 if (ivs->n_invariant_uses[iid] == 1)
3965 iv_ca_recount_cost (data, ivs);
3969 /* Extend set IVS by expressing USE by some of the candidates in it
3973 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3976 struct cost_pair *best_cp = NULL, *cp;
3980 gcc_assert (ivs->upto >= use->id);
3982 if (ivs->upto == use->id)
3988 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3990 cp = get_use_iv_cost (data, use, iv_cand (data, i));
3992 if (cheaper_cost_pair (cp, best_cp))
3996 iv_ca_set_cp (data, ivs, use, best_cp);
3999 /* Get cost for assignment IVS. */
4002 iv_ca_cost (struct iv_ca *ivs)
4004 return (ivs->bad_uses ? INFTY : ivs->cost);
4007 /* Returns true if all dependences of CP are among invariants in IVS. */
4010 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4015 if (!cp->depends_on)
4018 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4020 if (ivs->n_invariant_uses[i] == 0)
4027 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4028 it before NEXT_CHANGE. */
4030 static struct iv_ca_delta *
4031 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4032 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4034 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4037 change->old_cp = old_cp;
4038 change->new_cp = new_cp;
4039 change->next_change = next_change;
4044 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4047 static struct iv_ca_delta *
4048 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4050 struct iv_ca_delta *last;
4058 for (last = l1; last->next_change; last = last->next_change)
4060 last->next_change = l2;
4065 /* Returns candidate by that USE is expressed in IVS. */
4067 static struct cost_pair *
4068 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4070 return ivs->cand_for_use[use->id];
4073 /* Reverse the list of changes DELTA, forming the inverse to it. */
4075 static struct iv_ca_delta *
4076 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4078 struct iv_ca_delta *act, *next, *prev = NULL;
4079 struct cost_pair *tmp;
4081 for (act = delta; act; act = next)
4083 next = act->next_change;
4084 act->next_change = prev;
4088 act->old_cp = act->new_cp;
4095 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4096 reverted instead. */
4099 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4100 struct iv_ca_delta *delta, bool forward)
4102 struct cost_pair *from, *to;
4103 struct iv_ca_delta *act;
4106 delta = iv_ca_delta_reverse (delta);
4108 for (act = delta; act; act = act->next_change)
4112 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4113 iv_ca_set_cp (data, ivs, act->use, to);
4117 iv_ca_delta_reverse (delta);
4120 /* Returns true if CAND is used in IVS. */
4123 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4125 return ivs->n_cand_uses[cand->id] > 0;
4128 /* Returns number of induction variable candidates in the set IVS. */
4131 iv_ca_n_cands (struct iv_ca *ivs)
4133 return ivs->n_cands;
4136 /* Free the list of changes DELTA. */
4139 iv_ca_delta_free (struct iv_ca_delta **delta)
4141 struct iv_ca_delta *act, *next;
4143 for (act = *delta; act; act = next)
4145 next = act->next_change;
4152 /* Allocates new iv candidates assignment. */
4154 static struct iv_ca *
4155 iv_ca_new (struct ivopts_data *data)
4157 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4161 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4162 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4163 nw->cands = BITMAP_ALLOC (NULL);
4166 nw->cand_use_cost = 0;
4168 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4174 /* Free memory occupied by the set IVS. */
4177 iv_ca_free (struct iv_ca **ivs)
4179 free ((*ivs)->cand_for_use);
4180 free ((*ivs)->n_cand_uses);
4181 BITMAP_FREE ((*ivs)->cands);
4182 free ((*ivs)->n_invariant_uses);
4187 /* Dumps IVS to FILE. */
4190 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4192 const char *pref = " invariants ";
4195 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4196 bitmap_print (file, ivs->cands, " candidates ","\n");
4198 for (i = 1; i <= data->max_inv_id; i++)
4199 if (ivs->n_invariant_uses[i])
4201 fprintf (file, "%s%d", pref, i);
4204 fprintf (file, "\n");
4207 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4208 new set, and store differences in DELTA. Number of induction variables
4209 in the new set is stored to N_IVS. */
4212 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4213 struct iv_cand *cand, struct iv_ca_delta **delta,
4218 struct cost_pair *old_cp, *new_cp;
4221 for (i = 0; i < ivs->upto; i++)
4223 use = iv_use (data, i);
4224 old_cp = iv_ca_cand_for_use (ivs, use);
4227 && old_cp->cand == cand)
4230 new_cp = get_use_iv_cost (data, use, cand);
4234 if (!iv_ca_has_deps (ivs, new_cp))
4237 if (!cheaper_cost_pair (new_cp, old_cp))
4240 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4243 iv_ca_delta_commit (data, ivs, *delta, true);
4244 cost = iv_ca_cost (ivs);
4246 *n_ivs = iv_ca_n_cands (ivs);
4247 iv_ca_delta_commit (data, ivs, *delta, false);
4252 /* Try narrowing set IVS by removing CAND. Return the cost of
4253 the new set and store the differences in DELTA. */
4256 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4257 struct iv_cand *cand, struct iv_ca_delta **delta)
4261 struct cost_pair *old_cp, *new_cp, *cp;
4263 struct iv_cand *cnd;
4267 for (i = 0; i < n_iv_uses (data); i++)
4269 use = iv_use (data, i);
4271 old_cp = iv_ca_cand_for_use (ivs, use);
4272 if (old_cp->cand != cand)
4277 if (data->consider_all_candidates)
4279 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4284 cnd = iv_cand (data, ci);
4286 cp = get_use_iv_cost (data, use, cnd);
4289 if (!iv_ca_has_deps (ivs, cp))
4292 if (!cheaper_cost_pair (cp, new_cp))
4300 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4305 cnd = iv_cand (data, ci);
4307 cp = get_use_iv_cost (data, use, cnd);
4310 if (!iv_ca_has_deps (ivs, cp))
4313 if (!cheaper_cost_pair (cp, new_cp))
4322 iv_ca_delta_free (delta);
4326 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4329 iv_ca_delta_commit (data, ivs, *delta, true);
4330 cost = iv_ca_cost (ivs);
4331 iv_ca_delta_commit (data, ivs, *delta, false);
4336 /* Try optimizing the set of candidates IVS by removing candidates different
4337 from to EXCEPT_CAND from it. Return cost of the new set, and store
4338 differences in DELTA. */
4341 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4342 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4345 struct iv_ca_delta *act_delta, *best_delta;
4346 unsigned i, best_cost, acost;
4347 struct iv_cand *cand;
4350 best_cost = iv_ca_cost (ivs);
4352 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4354 cand = iv_cand (data, i);
4356 if (cand == except_cand)
4359 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4361 if (acost < best_cost)
4364 iv_ca_delta_free (&best_delta);
4365 best_delta = act_delta;
4368 iv_ca_delta_free (&act_delta);
4377 /* Recurse to possibly remove other unnecessary ivs. */
4378 iv_ca_delta_commit (data, ivs, best_delta, true);
4379 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4380 iv_ca_delta_commit (data, ivs, best_delta, false);
4381 *delta = iv_ca_delta_join (best_delta, *delta);
4385 /* Tries to extend the sets IVS in the best possible way in order
4386 to express the USE. */
4389 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4392 unsigned best_cost, act_cost;
4395 struct iv_cand *cand;
4396 struct iv_ca_delta *best_delta = NULL, *act_delta;
4397 struct cost_pair *cp;
4399 iv_ca_add_use (data, ivs, use);
4400 best_cost = iv_ca_cost (ivs);
4402 cp = iv_ca_cand_for_use (ivs, use);
4405 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4406 iv_ca_set_no_cp (data, ivs, use);
4409 /* First try important candidates. Only if it fails, try the specific ones.
4410 Rationale -- in loops with many variables the best choice often is to use
4411 just one generic biv. If we added here many ivs specific to the uses,
4412 the optimization algorithm later would be likely to get stuck in a local
4413 minimum, thus causing us to create too many ivs. The approach from
4414 few ivs to more seems more likely to be successful -- starting from few
4415 ivs, replacing an expensive use by a specific iv should always be a
4417 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4419 cand = iv_cand (data, i);
4421 if (iv_ca_cand_used_p (ivs, cand))
4424 cp = get_use_iv_cost (data, use, cand);
4428 iv_ca_set_cp (data, ivs, use, cp);
4429 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4430 iv_ca_set_no_cp (data, ivs, use);
4431 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4433 if (act_cost < best_cost)
4435 best_cost = act_cost;
4437 iv_ca_delta_free (&best_delta);
4438 best_delta = act_delta;
4441 iv_ca_delta_free (&act_delta);
4444 if (best_cost == INFTY)
4446 for (i = 0; i < use->n_map_members; i++)
4448 cp = use->cost_map + i;
4453 /* Already tried this. */
4454 if (cand->important)
4457 if (iv_ca_cand_used_p (ivs, cand))
4461 iv_ca_set_cp (data, ivs, use, cp);
4462 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4463 iv_ca_set_no_cp (data, ivs, use);
4464 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4467 if (act_cost < best_cost)
4469 best_cost = act_cost;
4472 iv_ca_delta_free (&best_delta);
4473 best_delta = act_delta;
4476 iv_ca_delta_free (&act_delta);
4480 iv_ca_delta_commit (data, ivs, best_delta, true);
4481 iv_ca_delta_free (&best_delta);
4483 return (best_cost != INFTY);
4486 /* Finds an initial assignment of candidates to uses. */
4488 static struct iv_ca *
4489 get_initial_solution (struct ivopts_data *data)
4491 struct iv_ca *ivs = iv_ca_new (data);
4494 for (i = 0; i < n_iv_uses (data); i++)
4495 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4504 /* Tries to improve set of induction variables IVS. */
4507 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4509 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4510 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4511 struct iv_cand *cand;
4513 /* Try extending the set of induction variables by one. */
4514 for (i = 0; i < n_iv_cands (data); i++)
4516 cand = iv_cand (data, i);
4518 if (iv_ca_cand_used_p (ivs, cand))
4521 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4525 /* If we successfully added the candidate and the set is small enough,
4526 try optimizing it by removing other candidates. */
4527 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4529 iv_ca_delta_commit (data, ivs, act_delta, true);
4530 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4531 iv_ca_delta_commit (data, ivs, act_delta, false);
4532 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4535 if (acost < best_cost)
4538 iv_ca_delta_free (&best_delta);
4539 best_delta = act_delta;
4542 iv_ca_delta_free (&act_delta);
4547 /* Try removing the candidates from the set instead. */
4548 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4550 /* Nothing more we can do. */
4555 iv_ca_delta_commit (data, ivs, best_delta, true);
4556 gcc_assert (best_cost == iv_ca_cost (ivs));
4557 iv_ca_delta_free (&best_delta);
4561 /* Attempts to find the optimal set of induction variables. We do simple
4562 greedy heuristic -- we try to replace at most one candidate in the selected
4563 solution and remove the unused ivs while this improves the cost. */
4565 static struct iv_ca *
4566 find_optimal_iv_set (struct ivopts_data *data)
4572 /* Get the initial solution. */
4573 set = get_initial_solution (data);
4576 if (dump_file && (dump_flags & TDF_DETAILS))
4577 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4581 if (dump_file && (dump_flags & TDF_DETAILS))
4583 fprintf (dump_file, "Initial set of candidates:\n");
4584 iv_ca_dump (data, dump_file, set);
4587 while (try_improve_iv_set (data, set))
4589 if (dump_file && (dump_flags & TDF_DETAILS))
4591 fprintf (dump_file, "Improved to:\n");
4592 iv_ca_dump (data, dump_file, set);
4596 if (dump_file && (dump_flags & TDF_DETAILS))
4597 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4599 for (i = 0; i < n_iv_uses (data); i++)
4601 use = iv_use (data, i);
4602 use->selected = iv_ca_cand_for_use (set, use)->cand;
4608 /* Creates a new induction variable corresponding to CAND. */
4611 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4613 block_stmt_iterator incr_pos;
4623 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4627 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4632 /* Mark that the iv is preserved. */
4633 name_info (data, cand->var_before)->preserve_biv = true;
4634 name_info (data, cand->var_after)->preserve_biv = true;
4636 /* Rewrite the increment so that it uses var_before directly. */
4637 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4642 gimple_add_tmp_var (cand->var_before);
4643 add_referenced_tmp_var (cand->var_before);
4645 base = unshare_expr (cand->iv->base);
4647 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4648 &incr_pos, after, &cand->var_before, &cand->var_after);
4651 /* Creates new induction variables described in SET. */
4654 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4657 struct iv_cand *cand;
4660 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4662 cand = iv_cand (data, i);
4663 create_new_iv (data, cand);
4667 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4668 is true, remove also the ssa name defined by the statement. */
4671 remove_statement (tree stmt, bool including_defined_name)
4673 if (TREE_CODE (stmt) == PHI_NODE)
4675 if (!including_defined_name)
4677 /* Prevent the ssa name defined by the statement from being removed. */
4678 SET_PHI_RESULT (stmt, NULL);
4680 remove_phi_node (stmt, NULL_TREE);
4684 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4690 /* Rewrites USE (definition of iv used in a nonlinear expression)
4691 using candidate CAND. */
4694 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4695 struct iv_use *use, struct iv_cand *cand)
4698 tree op, stmts, tgt, ass;
4699 block_stmt_iterator bsi, pbsi;
4701 /* An important special case -- if we are asked to express value of
4702 the original iv by itself, just exit; there is no need to
4703 introduce a new computation (that might also need casting the
4704 variable to unsigned and back). */
4705 if (cand->pos == IP_ORIGINAL
4706 && TREE_CODE (use->stmt) == MODIFY_EXPR
4707 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4709 op = TREE_OPERAND (use->stmt, 1);
4711 /* Be a bit careful. In case variable is expressed in some
4712 complicated way, rewrite it so that we may get rid of this
4713 complicated expression. */
4714 if ((TREE_CODE (op) == PLUS_EXPR
4715 || TREE_CODE (op) == MINUS_EXPR)
4716 && TREE_OPERAND (op, 0) == cand->var_before
4717 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4721 comp = unshare_expr (get_computation (data->current_loop,
4723 switch (TREE_CODE (use->stmt))
4726 tgt = PHI_RESULT (use->stmt);
4728 /* If we should keep the biv, do not replace it. */
4729 if (name_info (data, tgt)->preserve_biv)
4732 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4733 while (!bsi_end_p (pbsi)
4734 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4742 tgt = TREE_OPERAND (use->stmt, 0);
4743 bsi = bsi_for_stmt (use->stmt);
4750 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4752 if (TREE_CODE (use->stmt) == PHI_NODE)
4755 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4756 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4757 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4758 remove_statement (use->stmt, false);
4759 SSA_NAME_DEF_STMT (tgt) = ass;
4764 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4765 TREE_OPERAND (use->stmt, 1) = op;
4769 /* Replaces ssa name in index IDX by its basic variable. Callback for
4773 idx_remove_ssa_names (tree base, tree *idx,
4774 void *data ATTRIBUTE_UNUSED)
4778 if (TREE_CODE (*idx) == SSA_NAME)
4779 *idx = SSA_NAME_VAR (*idx);
4781 if (TREE_CODE (base) == ARRAY_REF)
4783 op = &TREE_OPERAND (base, 2);
4785 && TREE_CODE (*op) == SSA_NAME)
4786 *op = SSA_NAME_VAR (*op);
4787 op = &TREE_OPERAND (base, 3);
4789 && TREE_CODE (*op) == SSA_NAME)
4790 *op = SSA_NAME_VAR (*op);
4796 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4799 unshare_and_remove_ssa_names (tree ref)
4801 ref = unshare_expr (ref);
4802 for_each_index (&ref, idx_remove_ssa_names, NULL);
4807 /* Rewrites base of memory access OP with expression WITH in statement
4808 pointed to by BSI. */
4811 rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4813 tree bvar, var, new_var, new_name, copy, name;
4816 var = bvar = get_base_address (*op);
4818 if (!var || TREE_CODE (with) != SSA_NAME)
4821 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4822 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4823 if (TREE_CODE (var) == INDIRECT_REF)
4824 var = TREE_OPERAND (var, 0);
4825 if (TREE_CODE (var) == SSA_NAME)
4828 var = SSA_NAME_VAR (var);
4830 else if (DECL_P (var))
4835 if (var_ann (var)->type_mem_tag)
4836 var = var_ann (var)->type_mem_tag;
4838 /* We need to add a memory tag for the variable. But we do not want
4839 to add it to the temporary used for the computations, since this leads
4840 to problems in redundancy elimination when there are common parts
4841 in two computations referring to the different arrays. So we copy
4842 the variable to a new temporary. */
4843 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
4845 new_name = duplicate_ssa_name (name, copy);
4848 new_var = create_tmp_var (TREE_TYPE (with), "ruatmp");
4849 add_referenced_tmp_var (new_var);
4850 var_ann (new_var)->type_mem_tag = var;
4851 new_name = make_ssa_name (new_var, copy);
4853 TREE_OPERAND (copy, 0) = new_name;
4855 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4861 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4862 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4864 if (TREE_CODE (*op) == INDIRECT_REF)
4865 orig = REF_ORIGINAL (*op);
4867 orig = unshare_and_remove_ssa_names (*op);
4869 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
4871 /* Record the original reference, for purposes of alias analysis. */
4872 REF_ORIGINAL (*op) = orig;
4875 /* Rewrites USE (address that is an iv) using candidate CAND. */
4878 rewrite_use_address (struct ivopts_data *data,
4879 struct iv_use *use, struct iv_cand *cand)
4881 tree comp = unshare_expr (get_computation (data->current_loop,
4883 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4885 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4888 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4890 rewrite_address_base (&bsi, use->op_p, op);
4893 /* Rewrites USE (the condition such that one of the arguments is an iv) using
4897 rewrite_use_compare (struct ivopts_data *data,
4898 struct iv_use *use, struct iv_cand *cand)
4901 tree *op_p, cond, op, stmts, bound;
4902 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
4903 enum tree_code compare;
4905 if (may_eliminate_iv (data, use, cand, &compare, &bound))
4907 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4908 tree var_type = TREE_TYPE (var);
4910 bound = fold_convert (var_type, bound);
4911 op = force_gimple_operand (unshare_expr (bound), &stmts,
4915 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4917 *use->op_p = build2 (compare, boolean_type_node, var, op);
4918 update_stmt (use->stmt);
4922 /* The induction variable elimination failed; just express the original
4924 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4927 op_p = &TREE_OPERAND (cond, 0);
4928 if (TREE_CODE (*op_p) != SSA_NAME
4929 || zero_p (get_iv (data, *op_p)->step))
4930 op_p = &TREE_OPERAND (cond, 1);
4932 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4934 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4939 /* Ensure that operand *OP_P may be used at the end of EXIT without
4940 violating loop closed ssa form. */
4943 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4946 struct loop *def_loop;
4949 use = USE_FROM_PTR (op_p);
4950 if (TREE_CODE (use) != SSA_NAME)
4953 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4957 def_loop = def_bb->loop_father;
4958 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4961 /* Try finding a phi node that copies the value out of the loop. */
4962 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
4963 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4968 /* Create such a phi node. */
4969 tree new_name = duplicate_ssa_name (use, NULL);
4971 phi = create_phi_node (new_name, exit->dest);
4972 SSA_NAME_DEF_STMT (new_name) = phi;
4973 add_phi_arg (phi, use, exit);
4976 SET_USE (op_p, PHI_RESULT (phi));
4979 /* Ensure that operands of STMT may be used at the end of EXIT without
4980 violating loop closed ssa form. */
4983 protect_loop_closed_ssa_form (edge exit, tree stmt)
4987 v_may_def_optype v_may_defs;
4990 get_stmt_operands (stmt);
4992 uses = STMT_USE_OPS (stmt);
4993 for (i = 0; i < NUM_USES (uses); i++)
4994 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4996 vuses = STMT_VUSE_OPS (stmt);
4997 for (i = 0; i < NUM_VUSES (vuses); i++)
4998 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5000 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5001 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5002 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5005 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5006 so that they are emitted on the correct place, and so that the loop closed
5007 ssa form is preserved. */
5010 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5012 tree_stmt_iterator tsi;
5013 block_stmt_iterator bsi;
5014 tree phi, stmt, def, next;
5016 if (!single_pred_p (exit->dest))
5017 split_loop_exit_edge (exit);
5019 /* Ensure there is label in exit->dest, so that we can
5021 tree_block_label (exit->dest);
5022 bsi = bsi_after_labels (exit->dest);
5024 if (TREE_CODE (stmts) == STATEMENT_LIST)
5026 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5028 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5029 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5034 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5035 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5041 for (phi = phi_nodes (exit->dest); phi; phi = next)
5043 next = PHI_CHAIN (phi);
5045 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5047 def = PHI_RESULT (phi);
5048 remove_statement (phi, false);
5049 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5051 SSA_NAME_DEF_STMT (def) = stmt;
5052 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5057 /* Rewrites the final value of USE (that is only needed outside of the loop)
5058 using candidate CAND. */
5061 rewrite_use_outer (struct ivopts_data *data,
5062 struct iv_use *use, struct iv_cand *cand)
5065 tree value, op, stmts, tgt;
5068 switch (TREE_CODE (use->stmt))
5071 tgt = PHI_RESULT (use->stmt);
5074 tgt = TREE_OPERAND (use->stmt, 0);
5080 exit = single_dom_exit (data->current_loop);
5086 bool ok = may_replace_final_value (data, use, &value);
5090 value = get_computation_at (data->current_loop,
5091 use, cand, last_stmt (exit->src));
5093 value = unshare_expr (value);
5094 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5096 /* If we will preserve the iv anyway and we would need to perform
5097 some computation to replace the final value, do nothing. */
5098 if (stmts && name_info (data, tgt)->preserve_biv)
5101 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5103 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5105 if (USE_FROM_PTR (use_p) == tgt)
5106 SET_USE (use_p, op);
5110 compute_phi_arg_on_exit (exit, stmts, op);
5112 /* Enable removal of the statement. We cannot remove it directly,
5113 since we may still need the aliasing information attached to the
5114 ssa name defined by it. */
5115 name_info (data, tgt)->iv->have_use_for = false;
5119 /* If the variable is going to be preserved anyway, there is nothing to
5121 if (name_info (data, tgt)->preserve_biv)
5124 /* Otherwise we just need to compute the iv. */
5125 rewrite_use_nonlinear_expr (data, use, cand);
5128 /* Rewrites USE using candidate CAND. */
5131 rewrite_use (struct ivopts_data *data,
5132 struct iv_use *use, struct iv_cand *cand)
5136 case USE_NONLINEAR_EXPR:
5137 rewrite_use_nonlinear_expr (data, use, cand);
5141 rewrite_use_outer (data, use, cand);
5145 rewrite_use_address (data, use, cand);
5149 rewrite_use_compare (data, use, cand);
5155 update_stmt (use->stmt);
5158 /* Rewrite the uses using the selected induction variables. */
5161 rewrite_uses (struct ivopts_data *data)
5164 struct iv_cand *cand;
5167 for (i = 0; i < n_iv_uses (data); i++)
5169 use = iv_use (data, i);
5170 cand = use->selected;
5173 rewrite_use (data, use, cand);
5177 /* Removes the ivs that are not used after rewriting. */
5180 remove_unused_ivs (struct ivopts_data *data)
5185 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5187 struct version_info *info;
5189 info = ver_info (data, j);
5191 && !zero_p (info->iv->step)
5193 && !info->iv->have_use_for
5194 && !info->preserve_biv)
5195 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5199 /* Frees data allocated by the optimization of a single loop. */
5202 free_loop_data (struct ivopts_data *data)
5207 htab_empty (data->niters);
5209 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5211 struct version_info *info;
5213 info = ver_info (data, i);
5217 info->has_nonlin_use = false;
5218 info->preserve_biv = false;
5221 bitmap_clear (data->relevant);
5222 bitmap_clear (data->important_candidates);
5224 for (i = 0; i < n_iv_uses (data); i++)
5226 struct iv_use *use = iv_use (data, i);
5229 BITMAP_FREE (use->related_cands);
5230 for (j = 0; j < use->n_map_members; j++)
5231 if (use->cost_map[j].depends_on)
5232 BITMAP_FREE (use->cost_map[j].depends_on);
5233 free (use->cost_map);
5236 VARRAY_POP_ALL (data->iv_uses);
5238 for (i = 0; i < n_iv_cands (data); i++)
5240 struct iv_cand *cand = iv_cand (data, i);
5246 VARRAY_POP_ALL (data->iv_candidates);
5248 if (data->version_info_size < num_ssa_names)
5250 data->version_info_size = 2 * num_ssa_names;
5251 free (data->version_info);
5252 data->version_info = xcalloc (data->version_info_size,
5253 sizeof (struct version_info));
5256 data->max_inv_id = 0;
5258 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5260 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5262 SET_DECL_RTL (obj, NULL_RTX);
5264 VARRAY_POP_ALL (decl_rtl_to_reset);
5267 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5271 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5275 for (i = 1; i < loops->num; i++)
5276 if (loops->parray[i])
5278 free (loops->parray[i]->aux);
5279 loops->parray[i]->aux = NULL;
5282 free_loop_data (data);
5283 free (data->version_info);
5284 BITMAP_FREE (data->relevant);
5285 BITMAP_FREE (data->important_candidates);
5286 htab_delete (data->niters);
5288 VARRAY_FREE (decl_rtl_to_reset);
5289 VARRAY_FREE (data->iv_uses);
5290 VARRAY_FREE (data->iv_candidates);
5293 /* Optimizes the LOOP. Returns true if anything changed. */
5296 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5298 bool changed = false;
5299 struct iv_ca *iv_ca;
5302 data->current_loop = loop;
5304 if (dump_file && (dump_flags & TDF_DETAILS))
5306 fprintf (dump_file, "Processing loop %d\n", loop->num);
5308 exit = single_dom_exit (loop);
5311 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5312 exit->src->index, exit->dest->index);
5313 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5314 fprintf (dump_file, "\n");
5317 fprintf (dump_file, "\n");
5320 /* For each ssa name determines whether it behaves as an induction variable
5322 if (!find_induction_variables (data))
5325 /* Finds interesting uses (item 1). */
5326 find_interesting_uses (data);
5327 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5330 /* Finds candidates for the induction variables (item 2). */
5331 find_iv_candidates (data);
5333 /* Calculates the costs (item 3, part 1). */
5334 determine_use_iv_costs (data);
5335 determine_iv_costs (data);
5336 determine_set_costs (data);
5338 /* Find the optimal set of induction variables (item 3, part 2). */
5339 iv_ca = find_optimal_iv_set (data);
5344 /* Create the new induction variables (item 4, part 1). */
5345 create_new_ivs (data, iv_ca);
5346 iv_ca_free (&iv_ca);
5348 /* Rewrite the uses (item 4, part 2). */
5349 rewrite_uses (data);
5351 /* Remove the ivs that are unused after rewriting. */
5352 remove_unused_ivs (data);
5354 /* We have changed the structure of induction variables; it might happen
5355 that definitions in the scev database refer to some of them that were
5360 free_loop_data (data);
5365 /* Main entry point. Optimizes induction variables in LOOPS. */
5368 tree_ssa_iv_optimize (struct loops *loops)
5371 struct ivopts_data data;
5373 tree_ssa_iv_optimize_init (loops, &data);
5375 /* Optimize the loops starting with the innermost ones. */
5376 loop = loops->tree_root;
5380 #ifdef ENABLE_CHECKING
5381 verify_loop_closed_ssa ();
5385 /* Scan the loops, inner ones first. */
5386 while (loop != loops->tree_root)
5388 if (dump_file && (dump_flags & TDF_DETAILS))
5389 flow_loop_dump (loop, dump_file, NULL, 1);
5391 tree_ssa_iv_optimize_loop (&data, loop);
5403 #ifdef ENABLE_CHECKING
5404 verify_loop_closed_ssa ();
5408 tree_ssa_iv_optimize_finalize (loops, &data);