1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
86 #include "pointer-set.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* Cost of a computation. */
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
148 /* The candidate - cost pair. */
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand *selected;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
187 /* The induction variable candidate. */
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
207 /* The data used by the induction variable optimizations. */
209 typedef struct iv_use *iv_use_p;
211 DEF_VEC_ALLOC_P(iv_use_p,heap);
213 typedef struct iv_cand *iv_cand_p;
214 DEF_VEC_P(iv_cand_p);
215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
219 /* The currently optimized loop. */
220 struct loop *current_loop;
222 /* Number of registers used in it. */
225 /* Numbers of iterations for all exits of the current loop. */
226 struct pointer_map_t *niters;
228 /* The size of version_info array allocated. */
229 unsigned version_info_size;
231 /* The array of information for the ssa names. */
232 struct version_info *version_info;
234 /* The bitmap of indices in version_info whose value was changed. */
237 /* The maximum invariant id. */
240 /* The uses of induction variables. */
241 VEC(iv_use_p,heap) *iv_uses;
243 /* The candidates. */
244 VEC(iv_cand_p,heap) *iv_candidates;
246 /* A bitmap of important candidates. */
247 bitmap important_candidates;
249 /* Whether to consider just related and important candidates when replacing a
251 bool consider_all_candidates;
254 /* An assignment of iv candidates to uses. */
258 /* The number of uses covered by the assignment. */
261 /* Number of uses that cannot be expressed by the candidates in the set. */
264 /* Candidate assigned to a use, together with the related costs. */
265 struct cost_pair **cand_for_use;
267 /* Number of times each candidate is used. */
268 unsigned *n_cand_uses;
270 /* The candidates used. */
273 /* The number of candidates in the set. */
276 /* Total number of registers needed. */
279 /* Total cost of expressing uses. */
280 comp_cost cand_use_cost;
282 /* Total cost of candidates. */
285 /* Number of times each invariant is used. */
286 unsigned *n_invariant_uses;
288 /* Total cost of the assignment. */
292 /* Difference of two iv candidate assignments. */
299 /* An old assignment (for rollback purposes). */
300 struct cost_pair *old_cp;
302 /* A new assignment. */
303 struct cost_pair *new_cp;
305 /* Next change in the list. */
306 struct iv_ca_delta *next_change;
309 /* Bound on number of candidates below that all candidates are considered. */
311 #define CONSIDER_ALL_CANDIDATES_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
314 /* If there are more iv occurrences, we just give up (it is quite unlikely that
315 optimizing such a loop would help, and it would take ages). */
317 #define MAX_CONSIDERED_USES \
318 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
320 /* If there are at most this number of ivs in the set, try removing unnecessary
321 ivs from the set always. */
323 #define ALWAYS_PRUNE_CAND_SET_BOUND \
324 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
326 /* The list of trees for that the decl_rtl field must be reset is stored
329 static VEC(tree,heap) *decl_rtl_to_reset;
331 /* Number of uses recorded in DATA. */
333 static inline unsigned
334 n_iv_uses (struct ivopts_data *data)
336 return VEC_length (iv_use_p, data->iv_uses);
339 /* Ith use recorded in DATA. */
341 static inline struct iv_use *
342 iv_use (struct ivopts_data *data, unsigned i)
344 return VEC_index (iv_use_p, data->iv_uses, i);
347 /* Number of candidates recorded in DATA. */
349 static inline unsigned
350 n_iv_cands (struct ivopts_data *data)
352 return VEC_length (iv_cand_p, data->iv_candidates);
355 /* Ith candidate recorded in DATA. */
357 static inline struct iv_cand *
358 iv_cand (struct ivopts_data *data, unsigned i)
360 return VEC_index (iv_cand_p, data->iv_candidates, i);
363 /* The single loop exit if it dominates the latch, NULL otherwise. */
366 single_dom_exit (struct loop *loop)
368 edge exit = single_exit (loop);
373 if (!just_once_each_iteration_p (loop, exit->src))
379 /* Dumps information about the induction variable IV to FILE. */
381 extern void dump_iv (FILE *, struct iv *);
383 dump_iv (FILE *file, struct iv *iv)
387 fprintf (file, "ssa name ");
388 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
389 fprintf (file, "\n");
392 fprintf (file, " type ");
393 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
394 fprintf (file, "\n");
398 fprintf (file, " base ");
399 print_generic_expr (file, iv->base, TDF_SLIM);
400 fprintf (file, "\n");
402 fprintf (file, " step ");
403 print_generic_expr (file, iv->step, TDF_SLIM);
404 fprintf (file, "\n");
408 fprintf (file, " invariant ");
409 print_generic_expr (file, iv->base, TDF_SLIM);
410 fprintf (file, "\n");
415 fprintf (file, " base object ");
416 print_generic_expr (file, iv->base_object, TDF_SLIM);
417 fprintf (file, "\n");
421 fprintf (file, " is a biv\n");
424 /* Dumps information about the USE to FILE. */
426 extern void dump_use (FILE *, struct iv_use *);
428 dump_use (FILE *file, struct iv_use *use)
430 fprintf (file, "use %d\n", use->id);
434 case USE_NONLINEAR_EXPR:
435 fprintf (file, " generic\n");
439 fprintf (file, " address\n");
443 fprintf (file, " compare\n");
450 fprintf (file, " in statement ");
451 print_gimple_stmt (file, use->stmt, 0, 0);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
472 dump_uses (FILE *file, struct ivopts_data *data)
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
505 fprintf (file, " final value replacement\n");
512 fprintf (file, " incremented before exit test\n");
516 fprintf (file, " incremented at end\n");
520 fprintf (file, " original biv\n");
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
547 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
549 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
553 if (sbb == loop->latch)
559 return stmt == last_stmt (bb);
562 /* Returns true if STMT if after the place where the original induction
563 variable CAND is incremented. */
566 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
568 basic_block cand_bb = gimple_bb (cand->incremented_at);
569 basic_block stmt_bb = gimple_bb (stmt);
570 gimple_stmt_iterator bsi;
572 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
575 if (stmt_bb != cand_bb)
578 /* Scan the block from the end, since the original ivs are usually
579 incremented at the end of the loop body. */
580 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
582 if (gsi_stmt (bsi) == cand->incremented_at)
584 if (gsi_stmt (bsi) == stmt)
589 /* Returns true if STMT if after the place where the induction variable
590 CAND is incremented in LOOP. */
593 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
601 return stmt_after_ip_normal_pos (loop, stmt);
604 return stmt_after_ip_original_pos (cand, stmt);
611 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
614 abnormal_ssa_name_p (tree exp)
619 if (TREE_CODE (exp) != SSA_NAME)
622 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
625 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
626 abnormal phi node. Callback for for_each_index. */
629 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
630 void *data ATTRIBUTE_UNUSED)
632 if (TREE_CODE (base) == ARRAY_REF)
634 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
636 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
640 return !abnormal_ssa_name_p (*index);
643 /* Returns true if EXPR contains a ssa name that occurs in an
644 abnormal phi node. */
647 contains_abnormal_ssa_name_p (tree expr)
650 enum tree_code_class codeclass;
655 code = TREE_CODE (expr);
656 codeclass = TREE_CODE_CLASS (code);
658 if (code == SSA_NAME)
659 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
661 if (code == INTEGER_CST
662 || is_gimple_min_invariant (expr))
665 if (code == ADDR_EXPR)
666 return !for_each_index (&TREE_OPERAND (expr, 0),
667 idx_contains_abnormal_ssa_name_p,
674 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
679 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
691 /* Returns tree describing number of iterations determined from
692 EXIT of DATA->current_loop, or NULL if something goes wrong. */
695 niter_for_exit (struct ivopts_data *data, edge exit)
697 struct tree_niter_desc desc;
703 data->niters = pointer_map_create ();
707 slot = pointer_map_contains (data->niters, exit);
711 /* Try to determine number of iterations. We must know it
712 unconditionally (i.e., without possibility of # of iterations
713 being zero). Also, we cannot safely work with ssa names that
714 appear in phi nodes on abnormal edges, so that we do not create
715 overlapping life ranges for them (PR 27283). */
716 if (number_of_iterations_exit (data->current_loop,
718 && integer_zerop (desc.may_be_zero)
719 && !contains_abnormal_ssa_name_p (desc.niter))
724 *pointer_map_insert (data->niters, exit) = niter;
727 niter = (tree) *slot;
732 /* Returns tree describing number of iterations determined from
733 single dominating exit of DATA->current_loop, or NULL if something
737 niter_for_single_dom_exit (struct ivopts_data *data)
739 edge exit = single_dom_exit (data->current_loop);
744 return niter_for_exit (data, exit);
747 /* Initializes data structures used by the iv optimization pass, stored
751 tree_ssa_iv_optimize_init (struct ivopts_data *data)
753 data->version_info_size = 2 * num_ssa_names;
754 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
755 data->relevant = BITMAP_ALLOC (NULL);
756 data->important_candidates = BITMAP_ALLOC (NULL);
757 data->max_inv_id = 0;
759 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
760 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
761 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
764 /* Returns a memory object to that EXPR points. In case we are able to
765 determine that it does not point to any such object, NULL is returned. */
768 determine_base_object (tree expr)
770 enum tree_code code = TREE_CODE (expr);
773 /* If this is a pointer casted to any type, we need to determine
774 the base object for the pointer; so handle conversions before
775 throwing away non-pointer expressions. */
776 if (CONVERT_EXPR_P (expr))
777 return determine_base_object (TREE_OPERAND (expr, 0));
779 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
788 obj = TREE_OPERAND (expr, 0);
789 base = get_base_address (obj);
794 if (TREE_CODE (base) == INDIRECT_REF)
795 return determine_base_object (TREE_OPERAND (base, 0));
797 return fold_convert (ptr_type_node,
798 build_fold_addr_expr (base));
800 case POINTER_PLUS_EXPR:
801 return determine_base_object (TREE_OPERAND (expr, 0));
805 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
809 return fold_convert (ptr_type_node, expr);
813 /* Allocates an induction variable with given initial value BASE and step STEP
817 alloc_iv (tree base, tree step)
819 struct iv *iv = XCNEW (struct iv);
820 gcc_assert (step != NULL_TREE);
823 iv->base_object = determine_base_object (base);
826 iv->have_use_for = false;
828 iv->ssa_name = NULL_TREE;
833 /* Sets STEP and BASE for induction variable IV. */
836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
838 struct version_info *info = name_info (data, iv);
840 gcc_assert (!info->iv);
842 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
843 info->iv = alloc_iv (base, step);
844 info->iv->ssa_name = iv;
847 /* Finds induction variable declaration for VAR. */
850 get_iv (struct ivopts_data *data, tree var)
853 tree type = TREE_TYPE (var);
855 if (!POINTER_TYPE_P (type)
856 && !INTEGRAL_TYPE_P (type))
859 if (!name_info (data, var)->iv)
861 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
864 || !flow_bb_inside_loop_p (data->current_loop, bb))
865 set_iv (data, var, var, build_int_cst (type, 0));
868 return name_info (data, var)->iv;
871 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
872 not define a simple affine biv with nonzero step. */
875 determine_biv_step (gimple phi)
877 struct loop *loop = gimple_bb (phi)->loop_father;
878 tree name = PHI_RESULT (phi);
881 if (!is_gimple_reg (name))
884 if (!simple_iv (loop, phi, name, &iv, true))
887 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
890 /* Finds basic ivs. */
893 find_bivs (struct ivopts_data *data)
896 tree step, type, base;
898 struct loop *loop = data->current_loop;
899 gimple_stmt_iterator psi;
901 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
903 phi = gsi_stmt (psi);
905 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
908 step = determine_biv_step (phi);
912 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
913 base = expand_simple_operations (base);
914 if (contains_abnormal_ssa_name_p (base)
915 || contains_abnormal_ssa_name_p (step))
918 type = TREE_TYPE (PHI_RESULT (phi));
919 base = fold_convert (type, base);
922 if (POINTER_TYPE_P (type))
923 step = fold_convert (sizetype, step);
925 step = fold_convert (type, step);
928 set_iv (data, PHI_RESULT (phi), base, step);
935 /* Marks basic ivs. */
938 mark_bivs (struct ivopts_data *data)
942 struct iv *iv, *incr_iv;
943 struct loop *loop = data->current_loop;
945 gimple_stmt_iterator psi;
947 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
949 phi = gsi_stmt (psi);
951 iv = get_iv (data, PHI_RESULT (phi));
955 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
956 incr_iv = get_iv (data, var);
960 /* If the increment is in the subloop, ignore it. */
961 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
962 if (incr_bb->loop_father != data->current_loop
963 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
967 incr_iv->biv_p = true;
971 /* Checks whether STMT defines a linear induction variable and stores its
975 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
978 struct loop *loop = data->current_loop;
980 iv->base = NULL_TREE;
981 iv->step = NULL_TREE;
983 if (gimple_code (stmt) != GIMPLE_ASSIGN)
986 lhs = gimple_assign_lhs (stmt);
987 if (TREE_CODE (lhs) != SSA_NAME)
990 if (!simple_iv (loop, stmt, lhs, iv, true))
992 iv->base = expand_simple_operations (iv->base);
994 if (contains_abnormal_ssa_name_p (iv->base)
995 || contains_abnormal_ssa_name_p (iv->step))
1001 /* Finds general ivs in statement STMT. */
1004 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1008 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1011 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1014 /* Finds general ivs in basic block BB. */
1017 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1019 gimple_stmt_iterator bsi;
1021 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1022 find_givs_in_stmt (data, gsi_stmt (bsi));
1025 /* Finds general ivs. */
1028 find_givs (struct ivopts_data *data)
1030 struct loop *loop = data->current_loop;
1031 basic_block *body = get_loop_body_in_dom_order (loop);
1034 for (i = 0; i < loop->num_nodes; i++)
1035 find_givs_in_bb (data, body[i]);
1039 /* For each ssa name defined in LOOP determines whether it is an induction
1040 variable and if so, its initial value and step. */
1043 find_induction_variables (struct ivopts_data *data)
1048 if (!find_bivs (data))
1054 if (dump_file && (dump_flags & TDF_DETAILS))
1056 tree niter = niter_for_single_dom_exit (data);
1060 fprintf (dump_file, " number of iterations ");
1061 print_generic_expr (dump_file, niter, TDF_SLIM);
1062 fprintf (dump_file, "\n\n");
1065 fprintf (dump_file, "Induction variables:\n\n");
1067 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1069 if (ver_info (data, i)->iv)
1070 dump_iv (dump_file, ver_info (data, i)->iv);
1077 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1079 static struct iv_use *
1080 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1081 gimple stmt, enum use_type use_type)
1083 struct iv_use *use = XCNEW (struct iv_use);
1085 use->id = n_iv_uses (data);
1086 use->type = use_type;
1090 use->related_cands = BITMAP_ALLOC (NULL);
1092 /* To avoid showing ssa name in the dumps, if it was not reset by the
1094 iv->ssa_name = NULL_TREE;
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1097 dump_use (dump_file, use);
1099 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1104 /* Checks whether OP is a loop-level invariant and if so, records it.
1105 NONLINEAR_USE is true if the invariant is used in a way we do not
1106 handle specially. */
1109 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1112 struct version_info *info;
1114 if (TREE_CODE (op) != SSA_NAME
1115 || !is_gimple_reg (op))
1118 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1120 && flow_bb_inside_loop_p (data->current_loop, bb))
1123 info = name_info (data, op);
1125 info->has_nonlin_use |= nonlinear_use;
1127 info->inv_id = ++data->max_inv_id;
1128 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1131 /* Checks whether the use OP is interesting and if so, records it. */
1133 static struct iv_use *
1134 find_interesting_uses_op (struct ivopts_data *data, tree op)
1141 if (TREE_CODE (op) != SSA_NAME)
1144 iv = get_iv (data, op);
1148 if (iv->have_use_for)
1150 use = iv_use (data, iv->use_id);
1152 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1156 if (integer_zerop (iv->step))
1158 record_invariant (data, op, true);
1161 iv->have_use_for = true;
1163 civ = XNEW (struct iv);
1166 stmt = SSA_NAME_DEF_STMT (op);
1167 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1168 || is_gimple_assign (stmt));
1170 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1171 iv->use_id = use->id;
1176 /* Given a condition in statement STMT, checks whether it is a compare
1177 of an induction variable and an invariant. If this is the case,
1178 CONTROL_VAR is set to location of the iv, BOUND to the location of
1179 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1180 induction variable descriptions, and true is returned. If this is not
1181 the case, CONTROL_VAR and BOUND are set to the arguments of the
1182 condition and false is returned. */
1185 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1186 tree **control_var, tree **bound,
1187 struct iv **iv_var, struct iv **iv_bound)
1189 /* The objects returned when COND has constant operands. */
1190 static struct iv const_iv;
1192 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1193 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1196 if (gimple_code (stmt) == GIMPLE_COND)
1198 op0 = gimple_cond_lhs_ptr (stmt);
1199 op1 = gimple_cond_rhs_ptr (stmt);
1203 op0 = gimple_assign_rhs1_ptr (stmt);
1204 op1 = gimple_assign_rhs2_ptr (stmt);
1207 zero = integer_zero_node;
1208 const_iv.step = integer_zero_node;
1210 if (TREE_CODE (*op0) == SSA_NAME)
1211 iv0 = get_iv (data, *op0);
1212 if (TREE_CODE (*op1) == SSA_NAME)
1213 iv1 = get_iv (data, *op1);
1215 /* Exactly one of the compared values must be an iv, and the other one must
1220 if (integer_zerop (iv0->step))
1222 /* Control variable may be on the other side. */
1223 tmp_op = op0; op0 = op1; op1 = tmp_op;
1224 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1226 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1230 *control_var = op0;;
1241 /* Checks whether the condition in STMT is interesting and if so,
1245 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1247 tree *var_p, *bound_p;
1248 struct iv *var_iv, *civ;
1250 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1252 find_interesting_uses_op (data, *var_p);
1253 find_interesting_uses_op (data, *bound_p);
1257 civ = XNEW (struct iv);
1259 record_use (data, NULL, civ, stmt, USE_COMPARE);
1262 /* Returns true if expression EXPR is obviously invariant in LOOP,
1263 i.e. if all its operands are defined outside of the LOOP. LOOP
1264 should not be the function body. */
1267 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1272 gcc_assert (loop_depth (loop) > 0);
1274 if (is_gimple_min_invariant (expr))
1277 if (TREE_CODE (expr) == SSA_NAME)
1279 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1281 && flow_bb_inside_loop_p (loop, def_bb))
1290 len = TREE_OPERAND_LENGTH (expr);
1291 for (i = 0; i < len; i++)
1292 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1298 /* Returns true if statement STMT is obviously invariant in LOOP,
1299 i.e. if all its operands on the RHS are defined outside of the LOOP.
1300 LOOP should not be the function body. */
1303 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1308 gcc_assert (loop_depth (loop) > 0);
1310 lhs = gimple_get_lhs (stmt);
1311 for (i = 0; i < gimple_num_ops (stmt); i++)
1313 tree op = gimple_op (stmt, i);
1314 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1321 /* Cumulates the steps of indices into DATA and replaces their values with the
1322 initial ones. Returns false when the value of the index cannot be determined.
1323 Callback for for_each_index. */
1325 struct ifs_ivopts_data
1327 struct ivopts_data *ivopts_data;
1333 idx_find_step (tree base, tree *idx, void *data)
1335 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1337 tree step, iv_base, iv_step, lbound, off;
1338 struct loop *loop = dta->ivopts_data->current_loop;
1340 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1341 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1344 /* If base is a component ref, require that the offset of the reference
1346 if (TREE_CODE (base) == COMPONENT_REF)
1348 off = component_ref_field_offset (base);
1349 return expr_invariant_in_loop_p (loop, off);
1352 /* If base is array, first check whether we will be able to move the
1353 reference out of the loop (in order to take its address in strength
1354 reduction). In order for this to work we need both lower bound
1355 and step to be loop invariants. */
1356 if (TREE_CODE (base) == ARRAY_REF)
1358 step = array_ref_element_size (base);
1359 lbound = array_ref_low_bound (base);
1361 if (!expr_invariant_in_loop_p (loop, step)
1362 || !expr_invariant_in_loop_p (loop, lbound))
1366 if (TREE_CODE (*idx) != SSA_NAME)
1369 iv = get_iv (dta->ivopts_data, *idx);
1373 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1374 *&x[0], which is not folded and does not trigger the
1375 ARRAY_REF path below. */
1378 if (integer_zerop (iv->step))
1381 if (TREE_CODE (base) == ARRAY_REF)
1383 step = array_ref_element_size (base);
1385 /* We only handle addresses whose step is an integer constant. */
1386 if (TREE_CODE (step) != INTEGER_CST)
1390 /* The step for pointer arithmetics already is 1 byte. */
1391 step = build_int_cst (sizetype, 1);
1395 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1396 sizetype, &iv_base, &iv_step, dta->stmt,
1399 /* The index might wrap. */
1403 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1404 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1409 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1410 object is passed to it in DATA. */
1413 idx_record_use (tree base, tree *idx,
1416 struct ivopts_data *data = (struct ivopts_data *) vdata;
1417 find_interesting_uses_op (data, *idx);
1418 if (TREE_CODE (base) == ARRAY_REF)
1420 find_interesting_uses_op (data, array_ref_element_size (base));
1421 find_interesting_uses_op (data, array_ref_low_bound (base));
1426 /* If we can prove that TOP = cst * BOT for some constant cst,
1427 store cst to MUL and return true. Otherwise return false.
1428 The returned value is always sign-extended, regardless of the
1429 signedness of TOP and BOT. */
1432 constant_multiple_of (tree top, tree bot, double_int *mul)
1435 enum tree_code code;
1436 double_int res, p0, p1;
1437 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1442 if (operand_equal_p (top, bot, 0))
1444 *mul = double_int_one;
1448 code = TREE_CODE (top);
1452 mby = TREE_OPERAND (top, 1);
1453 if (TREE_CODE (mby) != INTEGER_CST)
1456 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1459 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1465 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1466 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1469 if (code == MINUS_EXPR)
1470 p1 = double_int_neg (p1);
1471 *mul = double_int_sext (double_int_add (p0, p1), precision);
1475 if (TREE_CODE (bot) != INTEGER_CST)
1478 p0 = double_int_sext (tree_to_double_int (top), precision);
1479 p1 = double_int_sext (tree_to_double_int (bot), precision);
1480 if (double_int_zero_p (p1))
1482 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1484 return double_int_zero_p (res);
1491 /* Returns true if memory reference REF with step STEP may be unaligned. */
1494 may_be_unaligned_p (tree ref, tree step)
1498 HOST_WIDE_INT bitsize;
1499 HOST_WIDE_INT bitpos;
1501 enum machine_mode mode;
1502 int unsignedp, volatilep;
1503 unsigned base_align;
1505 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1506 thus they are not misaligned. */
1507 if (TREE_CODE (ref) == TARGET_MEM_REF)
1510 /* The test below is basically copy of what expr.c:normal_inner_ref
1511 does to check whether the object must be loaded by parts when
1512 STRICT_ALIGNMENT is true. */
1513 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1514 &unsignedp, &volatilep, true);
1515 base_type = TREE_TYPE (base);
1516 base_align = TYPE_ALIGN (base_type);
1518 if (mode != BLKmode)
1521 tree al = build_int_cst (TREE_TYPE (step),
1522 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1524 if (base_align < GET_MODE_ALIGNMENT (mode)
1525 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1526 || bitpos % BITS_PER_UNIT != 0)
1529 if (!constant_multiple_of (step, al, &mul))
1536 /* Return true if EXPR may be non-addressable. */
1539 may_be_nonaddressable_p (tree expr)
1541 switch (TREE_CODE (expr))
1543 case TARGET_MEM_REF:
1544 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1545 target, thus they are always addressable. */
1549 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1550 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1552 case VIEW_CONVERT_EXPR:
1553 /* This kind of view-conversions may wrap non-addressable objects
1554 and make them look addressable. After some processing the
1555 non-addressability may be uncovered again, causing ADDR_EXPRs
1556 of inappropriate objects to be built. */
1557 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1558 || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1561 /* ... fall through ... */
1564 case ARRAY_RANGE_REF:
1565 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1577 /* Finds addresses in *OP_P inside STMT. */
1580 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1582 tree base = *op_p, step = build_int_cst (sizetype, 0);
1584 struct ifs_ivopts_data ifs_ivopts_data;
1586 /* Do not play with volatile memory references. A bit too conservative,
1587 perhaps, but safe. */
1588 if (gimple_has_volatile_ops (stmt))
1591 /* Ignore bitfields for now. Not really something terribly complicated
1593 if (TREE_CODE (base) == BIT_FIELD_REF)
1596 base = unshare_expr (base);
1598 if (TREE_CODE (base) == TARGET_MEM_REF)
1600 tree type = build_pointer_type (TREE_TYPE (base));
1604 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1606 civ = get_iv (data, TMR_BASE (base));
1610 TMR_BASE (base) = civ->base;
1613 if (TMR_INDEX (base)
1614 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1616 civ = get_iv (data, TMR_INDEX (base));
1620 TMR_INDEX (base) = civ->base;
1625 if (TMR_STEP (base))
1626 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1628 step = fold_build2 (PLUS_EXPR, type, step, astep);
1632 if (integer_zerop (step))
1634 base = tree_mem_ref_addr (type, base);
1638 ifs_ivopts_data.ivopts_data = data;
1639 ifs_ivopts_data.stmt = stmt;
1640 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1641 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1642 || integer_zerop (ifs_ivopts_data.step))
1644 step = ifs_ivopts_data.step;
1646 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1647 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1649 /* Check that the base expression is addressable. This needs
1650 to be done after substituting bases of IVs into it. */
1651 if (may_be_nonaddressable_p (base))
1654 /* Moreover, on strict alignment platforms, check that it is
1655 sufficiently aligned. */
1656 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1659 base = build_fold_addr_expr (base);
1661 /* Substituting bases of IVs into the base expression might
1662 have caused folding opportunities. */
1663 if (TREE_CODE (base) == ADDR_EXPR)
1665 tree *ref = &TREE_OPERAND (base, 0);
1666 while (handled_component_p (*ref))
1667 ref = &TREE_OPERAND (*ref, 0);
1668 if (TREE_CODE (*ref) == INDIRECT_REF)
1669 *ref = fold_indirect_ref (*ref);
1673 civ = alloc_iv (base, step);
1674 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1678 for_each_index (op_p, idx_record_use, data);
1681 /* Finds and records invariants used in STMT. */
1684 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1687 use_operand_p use_p;
1690 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1692 op = USE_FROM_PTR (use_p);
1693 record_invariant (data, op, false);
1697 /* Finds interesting uses of induction variables in the statement STMT. */
1700 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1703 tree op, *lhs, *rhs;
1705 use_operand_p use_p;
1706 enum tree_code code;
1708 find_invariants_stmt (data, stmt);
1710 if (gimple_code (stmt) == GIMPLE_COND)
1712 find_interesting_uses_cond (data, stmt);
1716 if (is_gimple_assign (stmt))
1718 lhs = gimple_assign_lhs_ptr (stmt);
1719 rhs = gimple_assign_rhs1_ptr (stmt);
1721 if (TREE_CODE (*lhs) == SSA_NAME)
1723 /* If the statement defines an induction variable, the uses are not
1724 interesting by themselves. */
1726 iv = get_iv (data, *lhs);
1728 if (iv && !integer_zerop (iv->step))
1732 code = gimple_assign_rhs_code (stmt);
1733 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1734 && (REFERENCE_CLASS_P (*rhs)
1735 || is_gimple_val (*rhs)))
1737 if (REFERENCE_CLASS_P (*rhs))
1738 find_interesting_uses_address (data, stmt, rhs);
1740 find_interesting_uses_op (data, *rhs);
1742 if (REFERENCE_CLASS_P (*lhs))
1743 find_interesting_uses_address (data, stmt, lhs);
1746 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1748 find_interesting_uses_cond (data, stmt);
1752 /* TODO -- we should also handle address uses of type
1754 memory = call (whatever);
1761 if (gimple_code (stmt) == GIMPLE_PHI
1762 && gimple_bb (stmt) == data->current_loop->header)
1764 iv = get_iv (data, PHI_RESULT (stmt));
1766 if (iv && !integer_zerop (iv->step))
1770 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1772 op = USE_FROM_PTR (use_p);
1774 if (TREE_CODE (op) != SSA_NAME)
1777 iv = get_iv (data, op);
1781 find_interesting_uses_op (data, op);
1785 /* Finds interesting uses of induction variables outside of loops
1786 on loop exit edge EXIT. */
1789 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1792 gimple_stmt_iterator psi;
1795 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1797 phi = gsi_stmt (psi);
1798 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1799 if (is_gimple_reg (def))
1800 find_interesting_uses_op (data, def);
1804 /* Finds uses of the induction variables that are interesting. */
1807 find_interesting_uses (struct ivopts_data *data)
1810 gimple_stmt_iterator bsi;
1811 basic_block *body = get_loop_body (data->current_loop);
1813 struct version_info *info;
1816 if (dump_file && (dump_flags & TDF_DETAILS))
1817 fprintf (dump_file, "Uses:\n\n");
1819 for (i = 0; i < data->current_loop->num_nodes; i++)
1824 FOR_EACH_EDGE (e, ei, bb->succs)
1825 if (e->dest != EXIT_BLOCK_PTR
1826 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1827 find_interesting_uses_outside (data, e);
1829 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1830 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1831 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1832 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1835 if (dump_file && (dump_flags & TDF_DETAILS))
1839 fprintf (dump_file, "\n");
1841 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1843 info = ver_info (data, i);
1846 fprintf (dump_file, " ");
1847 print_generic_expr (dump_file, info->name, TDF_SLIM);
1848 fprintf (dump_file, " is invariant (%d)%s\n",
1849 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1853 fprintf (dump_file, "\n");
1859 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1860 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1861 we are at the top-level of the processed address. */
1864 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1865 unsigned HOST_WIDE_INT *offset)
1867 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1868 enum tree_code code;
1869 tree type, orig_type = TREE_TYPE (expr);
1870 unsigned HOST_WIDE_INT off0, off1, st;
1871 tree orig_expr = expr;
1875 type = TREE_TYPE (expr);
1876 code = TREE_CODE (expr);
1882 if (!cst_and_fits_in_hwi (expr)
1883 || integer_zerop (expr))
1886 *offset = int_cst_value (expr);
1887 return build_int_cst (orig_type, 0);
1889 case POINTER_PLUS_EXPR:
1892 op0 = TREE_OPERAND (expr, 0);
1893 op1 = TREE_OPERAND (expr, 1);
1895 op0 = strip_offset_1 (op0, false, false, &off0);
1896 op1 = strip_offset_1 (op1, false, false, &off1);
1898 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1899 if (op0 == TREE_OPERAND (expr, 0)
1900 && op1 == TREE_OPERAND (expr, 1))
1903 if (integer_zerop (op1))
1905 else if (integer_zerop (op0))
1907 if (code == MINUS_EXPR)
1908 expr = fold_build1 (NEGATE_EXPR, type, op1);
1913 expr = fold_build2 (code, type, op0, op1);
1915 return fold_convert (orig_type, expr);
1921 step = array_ref_element_size (expr);
1922 if (!cst_and_fits_in_hwi (step))
1925 st = int_cst_value (step);
1926 op1 = TREE_OPERAND (expr, 1);
1927 op1 = strip_offset_1 (op1, false, false, &off1);
1928 *offset = off1 * st;
1931 && integer_zerop (op1))
1933 /* Strip the component reference completely. */
1934 op0 = TREE_OPERAND (expr, 0);
1935 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1945 tmp = component_ref_field_offset (expr);
1947 && cst_and_fits_in_hwi (tmp))
1949 /* Strip the component reference completely. */
1950 op0 = TREE_OPERAND (expr, 0);
1951 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1952 *offset = off0 + int_cst_value (tmp);
1958 op0 = TREE_OPERAND (expr, 0);
1959 op0 = strip_offset_1 (op0, true, true, &off0);
1962 if (op0 == TREE_OPERAND (expr, 0))
1965 expr = build_fold_addr_expr (op0);
1966 return fold_convert (orig_type, expr);
1969 inside_addr = false;
1976 /* Default handling of expressions for that we want to recurse into
1977 the first operand. */
1978 op0 = TREE_OPERAND (expr, 0);
1979 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1982 if (op0 == TREE_OPERAND (expr, 0)
1983 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1986 expr = copy_node (expr);
1987 TREE_OPERAND (expr, 0) = op0;
1989 TREE_OPERAND (expr, 1) = op1;
1991 /* Inside address, we might strip the top level component references,
1992 thus changing type of the expression. Handling of ADDR_EXPR
1994 expr = fold_convert (orig_type, expr);
1999 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2002 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2004 return strip_offset_1 (expr, false, false, offset);
2007 /* Returns variant of TYPE that can be used as base for different uses.
2008 We return unsigned type with the same precision, which avoids problems
2012 generic_type_for (tree type)
2014 if (POINTER_TYPE_P (type))
2015 return unsigned_type_for (type);
2017 if (TYPE_UNSIGNED (type))
2020 return unsigned_type_for (type);
2023 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2024 the bitmap to that we should store it. */
2026 static struct ivopts_data *fd_ivopts_data;
2028 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2030 bitmap *depends_on = (bitmap *) data;
2031 struct version_info *info;
2033 if (TREE_CODE (*expr_p) != SSA_NAME)
2035 info = name_info (fd_ivopts_data, *expr_p);
2037 if (!info->inv_id || info->has_nonlin_use)
2041 *depends_on = BITMAP_ALLOC (NULL);
2042 bitmap_set_bit (*depends_on, info->inv_id);
2047 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2048 position to POS. If USE is not NULL, the candidate is set as related to
2049 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2050 replacement of the final value of the iv by a direct computation. */
2052 static struct iv_cand *
2053 add_candidate_1 (struct ivopts_data *data,
2054 tree base, tree step, bool important, enum iv_position pos,
2055 struct iv_use *use, gimple incremented_at)
2058 struct iv_cand *cand = NULL;
2059 tree type, orig_type;
2063 orig_type = TREE_TYPE (base);
2064 type = generic_type_for (orig_type);
2065 /* Don't convert the base to the generic type for pointers as the generic
2066 type is an integer type with the same size as the pointer type. */
2067 if (type != orig_type && !POINTER_TYPE_P (orig_type))
2069 base = fold_convert (type, base);
2070 step = fold_convert (type, step);
2074 for (i = 0; i < n_iv_cands (data); i++)
2076 cand = iv_cand (data, i);
2078 if (cand->pos != pos)
2081 if (cand->incremented_at != incremented_at)
2095 if (operand_equal_p (base, cand->iv->base, 0)
2096 && operand_equal_p (step, cand->iv->step, 0))
2100 if (i == n_iv_cands (data))
2102 cand = XCNEW (struct iv_cand);
2108 cand->iv = alloc_iv (base, step);
2111 if (pos != IP_ORIGINAL && cand->iv)
2113 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2114 cand->var_after = cand->var_before;
2116 cand->important = important;
2117 cand->incremented_at = incremented_at;
2118 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2121 && TREE_CODE (step) != INTEGER_CST)
2123 fd_ivopts_data = data;
2124 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2127 if (dump_file && (dump_flags & TDF_DETAILS))
2128 dump_cand (dump_file, cand);
2131 if (important && !cand->important)
2133 cand->important = true;
2134 if (dump_file && (dump_flags & TDF_DETAILS))
2135 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2140 bitmap_set_bit (use->related_cands, i);
2141 if (dump_file && (dump_flags & TDF_DETAILS))
2142 fprintf (dump_file, "Candidate %d is related to use %d\n",
2149 /* Returns true if incrementing the induction variable at the end of the LOOP
2152 The purpose is to avoid splitting latch edge with a biv increment, thus
2153 creating a jump, possibly confusing other optimization passes and leaving
2154 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2155 is not available (so we do not have a better alternative), or if the latch
2156 edge is already nonempty. */
2159 allow_ip_end_pos_p (struct loop *loop)
2161 if (!ip_normal_pos (loop))
2164 if (!empty_block_p (ip_end_pos (loop)))
2170 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2171 position to POS. If USE is not NULL, the candidate is set as related to
2172 it. The candidate computation is scheduled on all available positions. */
2175 add_candidate (struct ivopts_data *data,
2176 tree base, tree step, bool important, struct iv_use *use)
2178 if (ip_normal_pos (data->current_loop))
2179 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2180 if (ip_end_pos (data->current_loop)
2181 && allow_ip_end_pos_p (data->current_loop))
2182 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2185 /* Add a standard "0 + 1 * iteration" iv candidate for a
2186 type with SIZE bits. */
2189 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2192 tree type = lang_hooks.types.type_for_size (size, true);
2193 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2197 /* Adds standard iv candidates. */
2200 add_standard_iv_candidates (struct ivopts_data *data)
2202 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2204 /* The same for a double-integer type if it is still fast enough. */
2205 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2206 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2210 /* Adds candidates bases on the old induction variable IV. */
2213 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2217 struct iv_cand *cand;
2219 add_candidate (data, iv->base, iv->step, true, NULL);
2221 /* The same, but with initial value zero. */
2222 add_candidate (data,
2223 build_int_cst (TREE_TYPE (iv->base), 0),
2224 iv->step, true, NULL);
2226 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2227 if (gimple_code (phi) == GIMPLE_PHI)
2229 /* Additionally record the possibility of leaving the original iv
2231 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2232 cand = add_candidate_1 (data,
2233 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2234 SSA_NAME_DEF_STMT (def));
2235 cand->var_before = iv->ssa_name;
2236 cand->var_after = def;
2240 /* Adds candidates based on the old induction variables. */
2243 add_old_ivs_candidates (struct ivopts_data *data)
2249 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2251 iv = ver_info (data, i)->iv;
2252 if (iv && iv->biv_p && !integer_zerop (iv->step))
2253 add_old_iv_candidates (data, iv);
2257 /* Adds candidates based on the value of the induction variable IV and USE. */
2260 add_iv_value_candidates (struct ivopts_data *data,
2261 struct iv *iv, struct iv_use *use)
2263 unsigned HOST_WIDE_INT offset;
2267 add_candidate (data, iv->base, iv->step, false, use);
2269 /* The same, but with initial value zero. Make such variable important,
2270 since it is generic enough so that possibly many uses may be based
2272 basetype = TREE_TYPE (iv->base);
2273 if (POINTER_TYPE_P (basetype))
2274 basetype = sizetype;
2275 add_candidate (data, build_int_cst (basetype, 0),
2276 iv->step, true, use);
2278 /* Third, try removing the constant offset. */
2279 base = strip_offset (iv->base, &offset);
2281 add_candidate (data, base, iv->step, false, use);
2284 /* Adds candidates based on the uses. */
2287 add_derived_ivs_candidates (struct ivopts_data *data)
2291 for (i = 0; i < n_iv_uses (data); i++)
2293 struct iv_use *use = iv_use (data, i);
2300 case USE_NONLINEAR_EXPR:
2303 /* Just add the ivs based on the value of the iv used here. */
2304 add_iv_value_candidates (data, use->iv, use);
2313 /* Record important candidates and add them to related_cands bitmaps
2317 record_important_candidates (struct ivopts_data *data)
2322 for (i = 0; i < n_iv_cands (data); i++)
2324 struct iv_cand *cand = iv_cand (data, i);
2326 if (cand->important)
2327 bitmap_set_bit (data->important_candidates, i);
2330 data->consider_all_candidates = (n_iv_cands (data)
2331 <= CONSIDER_ALL_CANDIDATES_BOUND);
2333 if (data->consider_all_candidates)
2335 /* We will not need "related_cands" bitmaps in this case,
2336 so release them to decrease peak memory consumption. */
2337 for (i = 0; i < n_iv_uses (data); i++)
2339 use = iv_use (data, i);
2340 BITMAP_FREE (use->related_cands);
2345 /* Add important candidates to the related_cands bitmaps. */
2346 for (i = 0; i < n_iv_uses (data); i++)
2347 bitmap_ior_into (iv_use (data, i)->related_cands,
2348 data->important_candidates);
2352 /* Finds the candidates for the induction variables. */
2355 find_iv_candidates (struct ivopts_data *data)
2357 /* Add commonly used ivs. */
2358 add_standard_iv_candidates (data);
2360 /* Add old induction variables. */
2361 add_old_ivs_candidates (data);
2363 /* Add induction variables derived from uses. */
2364 add_derived_ivs_candidates (data);
2366 /* Record the important candidates. */
2367 record_important_candidates (data);
2370 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2371 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2372 we allocate a simple list to every use. */
2375 alloc_use_cost_map (struct ivopts_data *data)
2377 unsigned i, size, s, j;
2379 for (i = 0; i < n_iv_uses (data); i++)
2381 struct iv_use *use = iv_use (data, i);
2384 if (data->consider_all_candidates)
2385 size = n_iv_cands (data);
2389 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2394 /* Round up to the power of two, so that moduling by it is fast. */
2395 for (size = 1; size < s; size <<= 1)
2399 use->n_map_members = size;
2400 use->cost_map = XCNEWVEC (struct cost_pair, size);
2404 /* Returns description of computation cost of expression whose runtime
2405 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2408 new_cost (unsigned runtime, unsigned complexity)
2412 cost.cost = runtime;
2413 cost.complexity = complexity;
2418 /* Adds costs COST1 and COST2. */
2421 add_costs (comp_cost cost1, comp_cost cost2)
2423 cost1.cost += cost2.cost;
2424 cost1.complexity += cost2.complexity;
2428 /* Subtracts costs COST1 and COST2. */
2431 sub_costs (comp_cost cost1, comp_cost cost2)
2433 cost1.cost -= cost2.cost;
2434 cost1.complexity -= cost2.complexity;
2439 /* Returns a negative number if COST1 < COST2, a positive number if
2440 COST1 > COST2, and 0 if COST1 = COST2. */
2443 compare_costs (comp_cost cost1, comp_cost cost2)
2445 if (cost1.cost == cost2.cost)
2446 return cost1.complexity - cost2.complexity;
2448 return cost1.cost - cost2.cost;
2451 /* Returns true if COST is infinite. */
2454 infinite_cost_p (comp_cost cost)
2456 return cost.cost == INFTY;
2459 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2460 on invariants DEPENDS_ON and that the value used in expressing it
2464 set_use_iv_cost (struct ivopts_data *data,
2465 struct iv_use *use, struct iv_cand *cand,
2466 comp_cost cost, bitmap depends_on, tree value)
2470 if (infinite_cost_p (cost))
2472 BITMAP_FREE (depends_on);
2476 if (data->consider_all_candidates)
2478 use->cost_map[cand->id].cand = cand;
2479 use->cost_map[cand->id].cost = cost;
2480 use->cost_map[cand->id].depends_on = depends_on;
2481 use->cost_map[cand->id].value = value;
2485 /* n_map_members is a power of two, so this computes modulo. */
2486 s = cand->id & (use->n_map_members - 1);
2487 for (i = s; i < use->n_map_members; i++)
2488 if (!use->cost_map[i].cand)
2490 for (i = 0; i < s; i++)
2491 if (!use->cost_map[i].cand)
2497 use->cost_map[i].cand = cand;
2498 use->cost_map[i].cost = cost;
2499 use->cost_map[i].depends_on = depends_on;
2500 use->cost_map[i].value = value;
2503 /* Gets cost of (USE, CANDIDATE) pair. */
2505 static struct cost_pair *
2506 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2507 struct iv_cand *cand)
2510 struct cost_pair *ret;
2515 if (data->consider_all_candidates)
2517 ret = use->cost_map + cand->id;
2524 /* n_map_members is a power of two, so this computes modulo. */
2525 s = cand->id & (use->n_map_members - 1);
2526 for (i = s; i < use->n_map_members; i++)
2527 if (use->cost_map[i].cand == cand)
2528 return use->cost_map + i;
2530 for (i = 0; i < s; i++)
2531 if (use->cost_map[i].cand == cand)
2532 return use->cost_map + i;
2537 /* Returns estimate on cost of computing SEQ. */
2545 for (; seq; seq = NEXT_INSN (seq))
2547 set = single_set (seq);
2549 cost += rtx_cost (set, SET);
2557 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2559 produce_memory_decl_rtl (tree obj, int *regno)
2564 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2566 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2567 x = gen_rtx_SYMBOL_REF (Pmode, name);
2568 SET_SYMBOL_REF_DECL (x, obj);
2569 x = gen_rtx_MEM (DECL_MODE (obj), x);
2570 targetm.encode_section_info (obj, x, true);
2574 x = gen_raw_REG (Pmode, (*regno)++);
2575 x = gen_rtx_MEM (DECL_MODE (obj), x);
2581 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2582 walk_tree. DATA contains the actual fake register number. */
2585 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2587 tree obj = NULL_TREE;
2589 int *regno = (int *) data;
2591 switch (TREE_CODE (*expr_p))
2594 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2595 handled_component_p (*expr_p);
2596 expr_p = &TREE_OPERAND (*expr_p, 0))
2599 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2600 x = produce_memory_decl_rtl (obj, regno);
2605 obj = SSA_NAME_VAR (*expr_p);
2606 if (!DECL_RTL_SET_P (obj))
2607 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2616 if (DECL_RTL_SET_P (obj))
2619 if (DECL_MODE (obj) == BLKmode)
2620 x = produce_memory_decl_rtl (obj, regno);
2622 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2632 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2633 SET_DECL_RTL (obj, x);
2639 /* Determines cost of the computation of EXPR. */
2642 computation_cost (tree expr)
2645 tree type = TREE_TYPE (expr);
2647 /* Avoid using hard regs in ways which may be unsupported. */
2648 int regno = LAST_VIRTUAL_REGISTER + 1;
2650 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2652 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2656 cost = seq_cost (seq);
2658 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2663 /* Returns variable containing the value of candidate CAND at statement AT. */
2666 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2668 if (stmt_after_increment (loop, cand, stmt))
2669 return cand->var_after;
2671 return cand->var_before;
2674 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2675 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2678 tree_int_cst_sign_bit (const_tree t)
2680 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2681 unsigned HOST_WIDE_INT w;
2683 if (bitno < HOST_BITS_PER_WIDE_INT)
2684 w = TREE_INT_CST_LOW (t);
2687 w = TREE_INT_CST_HIGH (t);
2688 bitno -= HOST_BITS_PER_WIDE_INT;
2691 return (w >> bitno) & 1;
2694 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2695 same precision that is at least as wide as the precision of TYPE, stores
2696 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2700 determine_common_wider_type (tree *a, tree *b)
2702 tree wider_type = NULL;
2704 tree atype = TREE_TYPE (*a);
2706 if (CONVERT_EXPR_P (*a))
2708 suba = TREE_OPERAND (*a, 0);
2709 wider_type = TREE_TYPE (suba);
2710 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2716 if (CONVERT_EXPR_P (*b))
2718 subb = TREE_OPERAND (*b, 0);
2719 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2730 /* Determines the expression by that USE is expressed from induction variable
2731 CAND at statement AT in LOOP. The expression is stored in a decomposed
2732 form into AFF. Returns false if USE cannot be expressed using CAND. */
2735 get_computation_aff (struct loop *loop,
2736 struct iv_use *use, struct iv_cand *cand, gimple at,
2737 struct affine_tree_combination *aff)
2739 tree ubase = use->iv->base;
2740 tree ustep = use->iv->step;
2741 tree cbase = cand->iv->base;
2742 tree cstep = cand->iv->step, cstep_common;
2743 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2744 tree common_type, var;
2746 aff_tree cbase_aff, var_aff;
2749 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2751 /* We do not have a precision to express the values of use. */
2755 var = var_at_stmt (loop, cand, at);
2756 uutype = unsigned_type_for (utype);
2758 /* If the conversion is not noop, perform it. */
2759 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2761 cstep = fold_convert (uutype, cstep);
2762 cbase = fold_convert (uutype, cbase);
2763 var = fold_convert (uutype, var);
2766 if (!constant_multiple_of (ustep, cstep, &rat))
2769 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2770 type, we achieve better folding by computing their difference in this
2771 wider type, and cast the result to UUTYPE. We do not need to worry about
2772 overflows, as all the arithmetics will in the end be performed in UUTYPE
2774 common_type = determine_common_wider_type (&ubase, &cbase);
2776 /* use = ubase - ratio * cbase + ratio * var. */
2777 tree_to_aff_combination (ubase, common_type, aff);
2778 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2779 tree_to_aff_combination (var, uutype, &var_aff);
2781 /* We need to shift the value if we are after the increment. */
2782 if (stmt_after_increment (loop, cand, at))
2786 if (common_type != uutype)
2787 cstep_common = fold_convert (common_type, cstep);
2789 cstep_common = cstep;
2791 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2792 aff_combination_add (&cbase_aff, &cstep_aff);
2795 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2796 aff_combination_add (aff, &cbase_aff);
2797 if (common_type != uutype)
2798 aff_combination_convert (aff, uutype);
2800 aff_combination_scale (&var_aff, rat);
2801 aff_combination_add (aff, &var_aff);
2806 /* Determines the expression by that USE is expressed from induction variable
2807 CAND at statement AT in LOOP. The computation is unshared. */
2810 get_computation_at (struct loop *loop,
2811 struct iv_use *use, struct iv_cand *cand, gimple at)
2814 tree type = TREE_TYPE (use->iv->base);
2816 if (!get_computation_aff (loop, use, cand, at, &aff))
2818 unshare_aff_combination (&aff);
2819 return fold_convert (type, aff_combination_to_tree (&aff));
2822 /* Determines the expression by that USE is expressed from induction variable
2823 CAND in LOOP. The computation is unshared. */
2826 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2828 return get_computation_at (loop, use, cand, use->stmt);
2831 /* Returns cost of addition in MODE. */
2834 add_cost (enum machine_mode mode)
2836 static unsigned costs[NUM_MACHINE_MODES];
2844 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2845 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2846 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2851 cost = seq_cost (seq);
2857 if (dump_file && (dump_flags & TDF_DETAILS))
2858 fprintf (dump_file, "Addition in %s costs %d\n",
2859 GET_MODE_NAME (mode), cost);
2863 /* Entry in a hashtable of already known costs for multiplication. */
2866 HOST_WIDE_INT cst; /* The constant to multiply by. */
2867 enum machine_mode mode; /* In mode. */
2868 unsigned cost; /* The cost. */
2871 /* Counts hash value for the ENTRY. */
2874 mbc_entry_hash (const void *entry)
2876 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2878 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2881 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2884 mbc_entry_eq (const void *entry1, const void *entry2)
2886 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2887 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2889 return (e1->mode == e2->mode
2890 && e1->cst == e2->cst);
2893 /* Returns cost of multiplication by constant CST in MODE. */
2896 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2898 static htab_t costs;
2899 struct mbc_entry **cached, act;
2904 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2908 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2910 return (*cached)->cost;
2912 *cached = XNEW (struct mbc_entry);
2913 (*cached)->mode = mode;
2914 (*cached)->cst = cst;
2917 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2918 gen_int_mode (cst, mode), NULL_RTX, 0);
2922 cost = seq_cost (seq);
2924 if (dump_file && (dump_flags & TDF_DETAILS))
2925 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2926 (int) cst, GET_MODE_NAME (mode), cost);
2928 (*cached)->cost = cost;
2933 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2934 validity for a memory reference accessing memory of mode MODE. */
2937 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2939 #define MAX_RATIO 128
2940 static sbitmap valid_mult[MAX_MACHINE_MODE];
2942 if (!valid_mult[mode])
2944 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2948 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2949 sbitmap_zero (valid_mult[mode]);
2950 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2951 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2953 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2954 if (memory_address_p (mode, addr))
2955 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2958 if (dump_file && (dump_flags & TDF_DETAILS))
2960 fprintf (dump_file, " allowed multipliers:");
2961 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2962 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2963 fprintf (dump_file, " %d", (int) i);
2964 fprintf (dump_file, "\n");
2965 fprintf (dump_file, "\n");
2969 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2972 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2975 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2976 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2977 variable is omitted. Compute the cost for a memory reference that accesses
2978 a memory location of mode MEM_MODE.
2980 TODO -- there must be some better way. This all is quite crude. */
2983 get_address_cost (bool symbol_present, bool var_present,
2984 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2985 enum machine_mode mem_mode)
2987 static bool initialized[MAX_MACHINE_MODE];
2988 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2989 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2990 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2991 unsigned cost, acost, complexity;
2992 bool offset_p, ratio_p;
2993 HOST_WIDE_INT s_offset;
2994 unsigned HOST_WIDE_INT mask;
2997 if (!initialized[mem_mode])
3000 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3001 int old_cse_not_expected;
3002 unsigned sym_p, var_p, off_p, rat_p, add_c;
3003 rtx seq, addr, base;
3006 initialized[mem_mode] = true;
3008 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3010 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3011 for (i = start; i <= 1 << 20; i <<= 1)
3013 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3014 if (!memory_address_p (mem_mode, addr))
3017 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3018 off[mem_mode] = max_offset[mem_mode];
3020 for (i = start; i <= 1 << 20; i <<= 1)
3022 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3023 if (!memory_address_p (mem_mode, addr))
3026 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3028 if (dump_file && (dump_flags & TDF_DETAILS))
3030 fprintf (dump_file, "get_address_cost:\n");
3031 fprintf (dump_file, " min offset %s %d\n",
3032 GET_MODE_NAME (mem_mode),
3033 (int) min_offset[mem_mode]);
3034 fprintf (dump_file, " max offset %s %d\n",
3035 GET_MODE_NAME (mem_mode),
3036 (int) max_offset[mem_mode]);
3040 for (i = 2; i <= MAX_RATIO; i++)
3041 if (multiplier_allowed_in_address_p (i, mem_mode))
3047 /* Compute the cost of various addressing modes. */
3049 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3050 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3052 for (i = 0; i < 16; i++)
3055 var_p = (i >> 1) & 1;
3056 off_p = (i >> 2) & 1;
3057 rat_p = (i >> 3) & 1;
3061 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3062 gen_int_mode (rat[mem_mode], Pmode));
3065 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3069 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3070 /* ??? We can run into trouble with some backends by presenting
3071 it with symbols which haven't been properly passed through
3072 targetm.encode_section_info. By setting the local bit, we
3073 enhance the probability of things working. */
3074 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3077 base = gen_rtx_fmt_e (CONST, Pmode,
3078 gen_rtx_fmt_ee (PLUS, Pmode,
3080 gen_int_mode (off[mem_mode],
3084 base = gen_int_mode (off[mem_mode], Pmode);
3089 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3092 /* To avoid splitting addressing modes, pretend that no cse will
3094 old_cse_not_expected = cse_not_expected;
3095 cse_not_expected = true;
3096 addr = memory_address (mem_mode, addr);
3097 cse_not_expected = old_cse_not_expected;
3101 acost = seq_cost (seq);
3102 acost += address_cost (addr, mem_mode);
3106 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3109 /* On some targets, it is quite expensive to load symbol to a register,
3110 which makes addresses that contain symbols look much more expensive.
3111 However, the symbol will have to be loaded in any case before the
3112 loop (and quite likely we have it in register already), so it does not
3113 make much sense to penalize them too heavily. So make some final
3114 tweaks for the SYMBOL_PRESENT modes:
3116 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3117 var is cheaper, use this mode with small penalty.
3118 If VAR_PRESENT is true, try whether the mode with
3119 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3120 if this is the case, use it. */
3121 add_c = add_cost (Pmode);
3122 for (i = 0; i < 8; i++)
3125 off_p = (i >> 1) & 1;
3126 rat_p = (i >> 2) & 1;
3128 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3132 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3133 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3136 if (dump_file && (dump_flags & TDF_DETAILS))
3138 fprintf (dump_file, "Address costs:\n");
3140 for (i = 0; i < 16; i++)
3143 var_p = (i >> 1) & 1;
3144 off_p = (i >> 2) & 1;
3145 rat_p = (i >> 3) & 1;
3147 fprintf (dump_file, " ");
3149 fprintf (dump_file, "sym + ");
3151 fprintf (dump_file, "var + ");
3153 fprintf (dump_file, "cst + ");
3155 fprintf (dump_file, "rat * ");
3157 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3158 fprintf (dump_file, "index costs %d\n", acost);
3160 fprintf (dump_file, "\n");
3164 bits = GET_MODE_BITSIZE (Pmode);
3165 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3167 if ((offset >> (bits - 1) & 1))
3172 offset_p = (s_offset != 0
3173 && min_offset[mem_mode] <= s_offset
3174 && s_offset <= max_offset[mem_mode]);
3175 ratio_p = (ratio != 1
3176 && multiplier_allowed_in_address_p (ratio, mem_mode));
3178 if (ratio != 1 && !ratio_p)
3179 cost += multiply_by_cost (ratio, Pmode);
3181 if (s_offset && !offset_p && !symbol_present)
3182 cost += add_cost (Pmode);
3184 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3185 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3186 return new_cost (cost + acost, complexity);
3189 /* Estimates cost of forcing expression EXPR into a variable. */
3192 force_expr_to_var_cost (tree expr)
3194 static bool costs_initialized = false;
3195 static unsigned integer_cost;
3196 static unsigned symbol_cost;
3197 static unsigned address_cost;
3199 comp_cost cost0, cost1, cost;
3200 enum machine_mode mode;
3202 if (!costs_initialized)
3204 tree type = build_pointer_type (integer_type_node);
3208 var = create_tmp_var_raw (integer_type_node, "test_var");
3209 TREE_STATIC (var) = 1;
3210 x = produce_memory_decl_rtl (var, NULL);
3211 SET_DECL_RTL (var, x);
3213 integer_cost = computation_cost (build_int_cst (integer_type_node,
3216 addr = build1 (ADDR_EXPR, type, var);
3217 symbol_cost = computation_cost (addr) + 1;
3220 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3222 build_int_cst (sizetype, 2000))) + 1;
3223 if (dump_file && (dump_flags & TDF_DETAILS))
3225 fprintf (dump_file, "force_expr_to_var_cost:\n");
3226 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3227 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3228 fprintf (dump_file, " address %d\n", (int) address_cost);
3229 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3230 fprintf (dump_file, "\n");
3233 costs_initialized = true;
3238 if (SSA_VAR_P (expr))
3241 if (is_gimple_min_invariant (expr))
3243 if (TREE_CODE (expr) == INTEGER_CST)
3244 return new_cost (integer_cost, 0);
3246 if (TREE_CODE (expr) == ADDR_EXPR)
3248 tree obj = TREE_OPERAND (expr, 0);
3250 if (TREE_CODE (obj) == VAR_DECL
3251 || TREE_CODE (obj) == PARM_DECL
3252 || TREE_CODE (obj) == RESULT_DECL)
3253 return new_cost (symbol_cost, 0);
3256 return new_cost (address_cost, 0);
3259 switch (TREE_CODE (expr))
3261 case POINTER_PLUS_EXPR:
3265 op0 = TREE_OPERAND (expr, 0);
3266 op1 = TREE_OPERAND (expr, 1);
3270 if (is_gimple_val (op0))
3273 cost0 = force_expr_to_var_cost (op0);
3275 if (is_gimple_val (op1))
3278 cost1 = force_expr_to_var_cost (op1);
3283 /* Just an arbitrary value, FIXME. */
3284 return new_cost (target_spill_cost, 0);
3287 mode = TYPE_MODE (TREE_TYPE (expr));
3288 switch (TREE_CODE (expr))
3290 case POINTER_PLUS_EXPR:
3293 cost = new_cost (add_cost (mode), 0);
3297 if (cst_and_fits_in_hwi (op0))
3298 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3299 else if (cst_and_fits_in_hwi (op1))
3300 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3302 return new_cost (target_spill_cost, 0);
3309 cost = add_costs (cost, cost0);
3310 cost = add_costs (cost, cost1);
3312 /* Bound the cost by target_spill_cost. The parts of complicated
3313 computations often are either loop invariant or at least can
3314 be shared between several iv uses, so letting this grow without
3315 limits would not give reasonable results. */
3316 if (cost.cost > target_spill_cost)
3317 cost.cost = target_spill_cost;
3322 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3323 invariants the computation depends on. */
3326 force_var_cost (struct ivopts_data *data,
3327 tree expr, bitmap *depends_on)
3331 fd_ivopts_data = data;
3332 walk_tree (&expr, find_depends, depends_on, NULL);
3335 return force_expr_to_var_cost (expr);
3338 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3339 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3340 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3341 invariants the computation depends on. */
3344 split_address_cost (struct ivopts_data *data,
3345 tree addr, bool *symbol_present, bool *var_present,
3346 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3349 HOST_WIDE_INT bitsize;
3350 HOST_WIDE_INT bitpos;
3352 enum machine_mode mode;
3353 int unsignedp, volatilep;
3355 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3356 &unsignedp, &volatilep, false);
3359 || bitpos % BITS_PER_UNIT != 0
3360 || TREE_CODE (core) != VAR_DECL)
3362 *symbol_present = false;
3363 *var_present = true;
3364 fd_ivopts_data = data;
3365 walk_tree (&addr, find_depends, depends_on, NULL);
3366 return new_cost (target_spill_cost, 0);
3369 *offset += bitpos / BITS_PER_UNIT;
3370 if (TREE_STATIC (core)
3371 || DECL_EXTERNAL (core))
3373 *symbol_present = true;
3374 *var_present = false;
3378 *symbol_present = false;
3379 *var_present = true;
3383 /* Estimates cost of expressing difference of addresses E1 - E2 as
3384 var + symbol + offset. The value of offset is added to OFFSET,
3385 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3386 part is missing. DEPENDS_ON is a set of the invariants the computation
3390 ptr_difference_cost (struct ivopts_data *data,
3391 tree e1, tree e2, bool *symbol_present, bool *var_present,
3392 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3394 HOST_WIDE_INT diff = 0;
3397 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3399 if (ptr_difference_const (e1, e2, &diff))
3402 *symbol_present = false;
3403 *var_present = false;
3407 if (integer_zerop (e2))
3408 return split_address_cost (data, TREE_OPERAND (e1, 0),
3409 symbol_present, var_present, offset, depends_on);
3411 *symbol_present = false;
3412 *var_present = true;
3414 cost = force_var_cost (data, e1, depends_on);
3415 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3416 cost.cost += add_cost (Pmode);
3421 /* Estimates cost of expressing difference E1 - E2 as
3422 var + symbol + offset. The value of offset is added to OFFSET,
3423 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3424 part is missing. DEPENDS_ON is a set of the invariants the computation
3428 difference_cost (struct ivopts_data *data,
3429 tree e1, tree e2, bool *symbol_present, bool *var_present,
3430 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3433 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3434 unsigned HOST_WIDE_INT off1, off2;
3436 e1 = strip_offset (e1, &off1);
3437 e2 = strip_offset (e2, &off2);
3438 *offset += off1 - off2;
3443 if (TREE_CODE (e1) == ADDR_EXPR)
3444 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3446 *symbol_present = false;
3448 if (operand_equal_p (e1, e2, 0))
3450 *var_present = false;
3453 *var_present = true;
3454 if (integer_zerop (e2))
3455 return force_var_cost (data, e1, depends_on);
3457 if (integer_zerop (e1))
3459 cost = force_var_cost (data, e2, depends_on);
3460 cost.cost += multiply_by_cost (-1, mode);
3465 cost = force_var_cost (data, e1, depends_on);
3466 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3467 cost.cost += add_cost (mode);
3472 /* Determines the cost of the computation by that USE is expressed
3473 from induction variable CAND. If ADDRESS_P is true, we just need
3474 to create an address from it, otherwise we want to get it into
3475 register. A set of invariants we depend on is stored in
3476 DEPENDS_ON. AT is the statement at that the value is computed. */
3479 get_computation_cost_at (struct ivopts_data *data,
3480 struct iv_use *use, struct iv_cand *cand,
3481 bool address_p, bitmap *depends_on, gimple at)
3483 tree ubase = use->iv->base, ustep = use->iv->step;
3485 tree utype = TREE_TYPE (ubase), ctype;
3486 unsigned HOST_WIDE_INT cstepi, offset = 0;
3487 HOST_WIDE_INT ratio, aratio;
3488 bool var_present, symbol_present;
3495 /* Only consider real candidates. */
3497 return infinite_cost;
3499 cbase = cand->iv->base;
3500 cstep = cand->iv->step;
3501 ctype = TREE_TYPE (cbase);
3503 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3505 /* We do not have a precision to express the values of use. */
3506 return infinite_cost;
3511 /* Do not try to express address of an object with computation based
3512 on address of a different object. This may cause problems in rtl
3513 level alias analysis (that does not expect this to be happening,
3514 as this is illegal in C), and would be unlikely to be useful
3516 if (use->iv->base_object
3517 && cand->iv->base_object
3518 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3519 return infinite_cost;
3522 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3524 /* TODO -- add direct handling of this case. */
3528 /* CSTEPI is removed from the offset in case statement is after the
3529 increment. If the step is not constant, we use zero instead.
3530 This is a bit imprecise (there is the extra addition), but
3531 redundancy elimination is likely to transform the code so that
3532 it uses value of the variable before increment anyway,
3533 so it is not that much unrealistic. */
3534 if (cst_and_fits_in_hwi (cstep))
3535 cstepi = int_cst_value (cstep);
3539 if (!constant_multiple_of (ustep, cstep, &rat))
3540 return infinite_cost;
3542 if (double_int_fits_in_shwi_p (rat))
3543 ratio = double_int_to_shwi (rat);
3545 return infinite_cost;
3547 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3548 or ratio == 1, it is better to handle this like
3550 ubase - ratio * cbase + ratio * var
3552 (also holds in the case ratio == -1, TODO. */
3554 if (cst_and_fits_in_hwi (cbase))
3556 offset = - ratio * int_cst_value (cbase);
3557 cost = difference_cost (data,
3558 ubase, build_int_cst (utype, 0),
3559 &symbol_present, &var_present, &offset,
3562 else if (ratio == 1)
3564 cost = difference_cost (data,
3566 &symbol_present, &var_present, &offset,
3571 cost = force_var_cost (data, cbase, depends_on);
3572 cost.cost += add_cost (TYPE_MODE (ctype));
3573 cost = add_costs (cost,
3574 difference_cost (data,
3575 ubase, build_int_cst (utype, 0),
3576 &symbol_present, &var_present,
3577 &offset, depends_on));
3580 /* If we are after the increment, the value of the candidate is higher by
3582 if (stmt_after_increment (data->current_loop, cand, at))
3583 offset -= ratio * cstepi;
3585 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3586 (symbol/var/const parts may be omitted). If we are looking for an address,
3587 find the cost of addressing this. */
3589 return add_costs (cost, get_address_cost (symbol_present, var_present,
3591 TYPE_MODE (TREE_TYPE (*use->op_p))));
3593 /* Otherwise estimate the costs for computing the expression. */
3594 aratio = ratio > 0 ? ratio : -ratio;
3595 if (!symbol_present && !var_present && !offset)
3598 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3604 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3608 /* Symbol + offset should be compile-time computable. */
3609 && (symbol_present || offset))
3612 /* Having offset does not affect runtime cost in case it is added to
3613 symbol, but it increases complexity. */
3617 cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3622 /* Just get the expression, expand it and measure the cost. */
3623 tree comp = get_computation_at (data->current_loop, use, cand, at);
3626 return infinite_cost;
3629 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3631 return new_cost (computation_cost (comp), 0);
3635 /* Determines the cost of the computation by that USE is expressed
3636 from induction variable CAND. If ADDRESS_P is true, we just need
3637 to create an address from it, otherwise we want to get it into
3638 register. A set of invariants we depend on is stored in
3642 get_computation_cost (struct ivopts_data *data,
3643 struct iv_use *use, struct iv_cand *cand,
3644 bool address_p, bitmap *depends_on)
3646 return get_computation_cost_at (data,
3647 use, cand, address_p, depends_on, use->stmt);
3650 /* Determines cost of basing replacement of USE on CAND in a generic
3654 determine_use_iv_cost_generic (struct ivopts_data *data,
3655 struct iv_use *use, struct iv_cand *cand)
3660 /* The simple case first -- if we need to express value of the preserved
3661 original biv, the cost is 0. This also prevents us from counting the
3662 cost of increment twice -- once at this use and once in the cost of
3664 if (cand->pos == IP_ORIGINAL
3665 && cand->incremented_at == use->stmt)
3667 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3671 cost = get_computation_cost (data, use, cand, false, &depends_on);
3672 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3674 return !infinite_cost_p (cost);
3677 /* Determines cost of basing replacement of USE on CAND in an address. */
3680 determine_use_iv_cost_address (struct ivopts_data *data,
3681 struct iv_use *use, struct iv_cand *cand)
3684 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3686 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3688 return !infinite_cost_p (cost);
3691 /* Computes value of candidate CAND at position AT in iteration NITER, and
3692 stores it to VAL. */
3695 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3698 aff_tree step, delta, nit;
3699 struct iv *iv = cand->iv;
3700 tree type = TREE_TYPE (iv->base);
3701 tree steptype = type;
3702 if (POINTER_TYPE_P (type))
3703 steptype = sizetype;
3705 tree_to_aff_combination (iv->step, steptype, &step);
3706 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3707 aff_combination_convert (&nit, steptype);
3708 aff_combination_mult (&nit, &step, &delta);
3709 if (stmt_after_increment (loop, cand, at))
3710 aff_combination_add (&delta, &step);
3712 tree_to_aff_combination (iv->base, type, val);
3713 aff_combination_add (val, &delta);
3716 /* Returns period of induction variable iv. */
3719 iv_period (struct iv *iv)
3721 tree step = iv->step, period, type;
3724 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3726 /* Period of the iv is gcd (step, type range). Since type range is power
3727 of two, it suffices to determine the maximum power of two that divides
3729 pow2div = num_ending_zeros (step);
3730 type = unsigned_type_for (TREE_TYPE (step));
3732 period = build_low_bits_mask (type,
3733 (TYPE_PRECISION (type)
3734 - tree_low_cst (pow2div, 1)));
3739 /* Returns the comparison operator used when eliminating the iv USE. */
3741 static enum tree_code
3742 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3744 struct loop *loop = data->current_loop;
3748 ex_bb = gimple_bb (use->stmt);
3749 exit = EDGE_SUCC (ex_bb, 0);
3750 if (flow_bb_inside_loop_p (loop, exit->dest))
3751 exit = EDGE_SUCC (ex_bb, 1);
3753 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3756 /* Check whether it is possible to express the condition in USE by comparison
3757 of candidate CAND. If so, store the value compared with to BOUND. */
3760 may_eliminate_iv (struct ivopts_data *data,
3761 struct iv_use *use, struct iv_cand *cand, tree *bound)
3766 struct loop *loop = data->current_loop;
3769 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3772 /* For now works only for exits that dominate the loop latch.
3773 TODO: extend to other conditions inside loop body. */
3774 ex_bb = gimple_bb (use->stmt);
3775 if (use->stmt != last_stmt (ex_bb)
3776 || gimple_code (use->stmt) != GIMPLE_COND
3777 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3780 exit = EDGE_SUCC (ex_bb, 0);
3781 if (flow_bb_inside_loop_p (loop, exit->dest))
3782 exit = EDGE_SUCC (ex_bb, 1);
3783 if (flow_bb_inside_loop_p (loop, exit->dest))
3786 nit = niter_for_exit (data, exit);
3790 /* Determine whether we can use the variable to test the exit condition.
3791 This is the case iff the period of the induction variable is greater
3792 than the number of iterations for which the exit condition is true. */
3793 period = iv_period (cand->iv);
3795 /* If the number of iterations is constant, compare against it directly. */
3796 if (TREE_CODE (nit) == INTEGER_CST)
3798 if (!tree_int_cst_lt (nit, period))
3802 /* If not, and if this is the only possible exit of the loop, see whether
3803 we can get a conservative estimate on the number of iterations of the
3804 entire loop and compare against that instead. */
3805 else if (loop_only_exit_p (loop, exit))
3807 double_int period_value, max_niter;
3808 if (!estimated_loop_iterations (loop, true, &max_niter))
3810 period_value = tree_to_double_int (period);
3811 if (double_int_ucmp (max_niter, period_value) >= 0)
3815 /* Otherwise, punt. */
3819 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3820 *bound = aff_combination_to_tree (&bnd);
3824 /* Determines cost of basing replacement of USE on CAND in a condition. */
3827 determine_use_iv_cost_condition (struct ivopts_data *data,
3828 struct iv_use *use, struct iv_cand *cand)
3830 tree bound = NULL_TREE;
3832 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3833 comp_cost elim_cost, express_cost, cost;
3836 /* Only consider real candidates. */
3839 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3843 /* Try iv elimination. */
3844 if (may_eliminate_iv (data, use, cand, &bound))
3846 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3847 /* The bound is a loop invariant, so it will be only computed
3849 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3852 elim_cost = infinite_cost;
3854 /* Try expressing the original giv. If it is compared with an invariant,
3855 note that we cannot get rid of it. */
3856 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3859 express_cost = get_computation_cost (data, use, cand, false,
3860 &depends_on_express);
3861 fd_ivopts_data = data;
3862 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3864 /* Choose the better approach. */
3865 if (compare_costs (elim_cost, express_cost) < 0)
3868 depends_on = depends_on_elim;
3869 depends_on_elim = NULL;
3873 cost = express_cost;
3874 depends_on = depends_on_express;
3875 depends_on_express = NULL;
3879 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3881 if (depends_on_elim)
3882 BITMAP_FREE (depends_on_elim);
3883 if (depends_on_express)
3884 BITMAP_FREE (depends_on_express);
3886 return !infinite_cost_p (cost);
3889 /* Determines cost of basing replacement of USE on CAND. Returns false
3890 if USE cannot be based on CAND. */
3893 determine_use_iv_cost (struct ivopts_data *data,
3894 struct iv_use *use, struct iv_cand *cand)
3898 case USE_NONLINEAR_EXPR:
3899 return determine_use_iv_cost_generic (data, use, cand);
3902 return determine_use_iv_cost_address (data, use, cand);
3905 return determine_use_iv_cost_condition (data, use, cand);
3912 /* Determines costs of basing the use of the iv on an iv candidate. */
3915 determine_use_iv_costs (struct ivopts_data *data)
3919 struct iv_cand *cand;
3920 bitmap to_clear = BITMAP_ALLOC (NULL);
3922 alloc_use_cost_map (data);
3924 for (i = 0; i < n_iv_uses (data); i++)
3926 use = iv_use (data, i);
3928 if (data->consider_all_candidates)
3930 for (j = 0; j < n_iv_cands (data); j++)
3932 cand = iv_cand (data, j);
3933 determine_use_iv_cost (data, use, cand);
3940 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3942 cand = iv_cand (data, j);
3943 if (!determine_use_iv_cost (data, use, cand))
3944 bitmap_set_bit (to_clear, j);
3947 /* Remove the candidates for that the cost is infinite from
3948 the list of related candidates. */
3949 bitmap_and_compl_into (use->related_cands, to_clear);
3950 bitmap_clear (to_clear);
3954 BITMAP_FREE (to_clear);
3956 if (dump_file && (dump_flags & TDF_DETAILS))
3958 fprintf (dump_file, "Use-candidate costs:\n");
3960 for (i = 0; i < n_iv_uses (data); i++)
3962 use = iv_use (data, i);
3964 fprintf (dump_file, "Use %d:\n", i);
3965 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3966 for (j = 0; j < use->n_map_members; j++)
3968 if (!use->cost_map[j].cand
3969 || infinite_cost_p (use->cost_map[j].cost))
3972 fprintf (dump_file, " %d\t%d\t%d\t",
3973 use->cost_map[j].cand->id,
3974 use->cost_map[j].cost.cost,
3975 use->cost_map[j].cost.complexity);
3976 if (use->cost_map[j].depends_on)
3977 bitmap_print (dump_file,
3978 use->cost_map[j].depends_on, "","");
3979 fprintf (dump_file, "\n");
3982 fprintf (dump_file, "\n");
3984 fprintf (dump_file, "\n");
3988 /* Determines cost of the candidate CAND. */
3991 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3993 comp_cost cost_base;
3994 unsigned cost, cost_step;
4003 /* There are two costs associated with the candidate -- its increment
4004 and its initialization. The second is almost negligible for any loop
4005 that rolls enough, so we take it just very little into account. */
4007 base = cand->iv->base;
4008 cost_base = force_var_cost (data, base, NULL);
4009 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4011 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4013 /* Prefer the original ivs unless we may gain something by replacing it.
4014 The reason is to make debugging simpler; so this is not relevant for
4015 artificial ivs created by other optimization passes. */
4016 if (cand->pos != IP_ORIGINAL
4017 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4020 /* Prefer not to insert statements into latch unless there are some
4021 already (so that we do not create unnecessary jumps). */
4022 if (cand->pos == IP_END
4023 && empty_block_p (ip_end_pos (data->current_loop)))
4029 /* Determines costs of computation of the candidates. */
4032 determine_iv_costs (struct ivopts_data *data)
4036 if (dump_file && (dump_flags & TDF_DETAILS))
4038 fprintf (dump_file, "Candidate costs:\n");
4039 fprintf (dump_file, " cand\tcost\n");
4042 for (i = 0; i < n_iv_cands (data); i++)
4044 struct iv_cand *cand = iv_cand (data, i);
4046 determine_iv_cost (data, cand);
4048 if (dump_file && (dump_flags & TDF_DETAILS))
4049 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4052 if (dump_file && (dump_flags & TDF_DETAILS))
4053 fprintf (dump_file, "\n");
4056 /* Calculates cost for having SIZE induction variables. */
4059 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4061 /* We add size to the cost, so that we prefer eliminating ivs
4063 return size + estimate_reg_pressure_cost (size, data->regs_used);
4066 /* For each size of the induction variable set determine the penalty. */
4069 determine_set_costs (struct ivopts_data *data)
4073 gimple_stmt_iterator psi;
4075 struct loop *loop = data->current_loop;
4078 /* We use the following model (definitely improvable, especially the
4079 cost function -- TODO):
4081 We estimate the number of registers available (using MD data), name it A.
4083 We estimate the number of registers used by the loop, name it U. This
4084 number is obtained as the number of loop phi nodes (not counting virtual
4085 registers and bivs) + the number of variables from outside of the loop.
4087 We set a reserve R (free regs that are used for temporary computations,
4088 etc.). For now the reserve is a constant 3.
4090 Let I be the number of induction variables.
4092 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4093 make a lot of ivs without a reason).
4094 -- if A - R < U + I <= A, the cost is I * PRES_COST
4095 -- if U + I > A, the cost is I * PRES_COST and
4096 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4098 if (dump_file && (dump_flags & TDF_DETAILS))
4100 fprintf (dump_file, "Global costs:\n");
4101 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4102 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
4103 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4107 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4109 phi = gsi_stmt (psi);
4110 op = PHI_RESULT (phi);
4112 if (!is_gimple_reg (op))
4115 if (get_iv (data, op))
4121 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4123 struct version_info *info = ver_info (data, j);
4125 if (info->inv_id && info->has_nonlin_use)
4129 data->regs_used = n;
4130 if (dump_file && (dump_flags & TDF_DETAILS))
4131 fprintf (dump_file, " regs_used %d\n", n);
4133 if (dump_file && (dump_flags & TDF_DETAILS))
4135 fprintf (dump_file, " cost for size:\n");
4136 fprintf (dump_file, " ivs\tcost\n");
4137 for (j = 0; j <= 2 * target_avail_regs; j++)
4138 fprintf (dump_file, " %d\t%d\n", j,
4139 ivopts_global_cost_for_size (data, j));
4140 fprintf (dump_file, "\n");
4144 /* Returns true if A is a cheaper cost pair than B. */
4147 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4157 cmp = compare_costs (a->cost, b->cost);
4164 /* In case the costs are the same, prefer the cheaper candidate. */
4165 if (a->cand->cost < b->cand->cost)
4171 /* Computes the cost field of IVS structure. */
4174 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4176 comp_cost cost = ivs->cand_use_cost;
4177 cost.cost += ivs->cand_cost;
4178 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4183 /* Remove invariants in set INVS to set IVS. */
4186 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4194 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4196 ivs->n_invariant_uses[iid]--;
4197 if (ivs->n_invariant_uses[iid] == 0)
4202 /* Set USE not to be expressed by any candidate in IVS. */
4205 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4208 unsigned uid = use->id, cid;
4209 struct cost_pair *cp;
4211 cp = ivs->cand_for_use[uid];
4217 ivs->cand_for_use[uid] = NULL;
4218 ivs->n_cand_uses[cid]--;
4220 if (ivs->n_cand_uses[cid] == 0)
4222 bitmap_clear_bit (ivs->cands, cid);
4223 /* Do not count the pseudocandidates. */
4227 ivs->cand_cost -= cp->cand->cost;
4229 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4232 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4234 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4235 iv_ca_recount_cost (data, ivs);
4238 /* Add invariants in set INVS to set IVS. */
4241 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4249 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4251 ivs->n_invariant_uses[iid]++;
4252 if (ivs->n_invariant_uses[iid] == 1)
4257 /* Set cost pair for USE in set IVS to CP. */
4260 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4261 struct iv_use *use, struct cost_pair *cp)
4263 unsigned uid = use->id, cid;
4265 if (ivs->cand_for_use[uid] == cp)
4268 if (ivs->cand_for_use[uid])
4269 iv_ca_set_no_cp (data, ivs, use);
4276 ivs->cand_for_use[uid] = cp;
4277 ivs->n_cand_uses[cid]++;
4278 if (ivs->n_cand_uses[cid] == 1)
4280 bitmap_set_bit (ivs->cands, cid);
4281 /* Do not count the pseudocandidates. */
4285 ivs->cand_cost += cp->cand->cost;
4287 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4290 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4291 iv_ca_set_add_invariants (ivs, cp->depends_on);
4292 iv_ca_recount_cost (data, ivs);
4296 /* Extend set IVS by expressing USE by some of the candidates in it
4300 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4303 struct cost_pair *best_cp = NULL, *cp;
4307 gcc_assert (ivs->upto >= use->id);
4309 if (ivs->upto == use->id)
4315 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4317 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4319 if (cheaper_cost_pair (cp, best_cp))
4323 iv_ca_set_cp (data, ivs, use, best_cp);
4326 /* Get cost for assignment IVS. */
4329 iv_ca_cost (struct iv_ca *ivs)
4331 return (ivs->bad_uses ? infinite_cost : ivs->cost);
4334 /* Returns true if all dependences of CP are among invariants in IVS. */
4337 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4342 if (!cp->depends_on)
4345 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4347 if (ivs->n_invariant_uses[i] == 0)
4354 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4355 it before NEXT_CHANGE. */
4357 static struct iv_ca_delta *
4358 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4359 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4361 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4364 change->old_cp = old_cp;
4365 change->new_cp = new_cp;
4366 change->next_change = next_change;
4371 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4374 static struct iv_ca_delta *
4375 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4377 struct iv_ca_delta *last;
4385 for (last = l1; last->next_change; last = last->next_change)
4387 last->next_change = l2;
4392 /* Returns candidate by that USE is expressed in IVS. */
4394 static struct cost_pair *
4395 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4397 return ivs->cand_for_use[use->id];
4400 /* Reverse the list of changes DELTA, forming the inverse to it. */
4402 static struct iv_ca_delta *
4403 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4405 struct iv_ca_delta *act, *next, *prev = NULL;
4406 struct cost_pair *tmp;
4408 for (act = delta; act; act = next)
4410 next = act->next_change;
4411 act->next_change = prev;
4415 act->old_cp = act->new_cp;
4422 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4423 reverted instead. */
4426 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4427 struct iv_ca_delta *delta, bool forward)
4429 struct cost_pair *from, *to;
4430 struct iv_ca_delta *act;
4433 delta = iv_ca_delta_reverse (delta);
4435 for (act = delta; act; act = act->next_change)
4439 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4440 iv_ca_set_cp (data, ivs, act->use, to);
4444 iv_ca_delta_reverse (delta);
4447 /* Returns true if CAND is used in IVS. */
4450 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4452 return ivs->n_cand_uses[cand->id] > 0;
4455 /* Returns number of induction variable candidates in the set IVS. */
4458 iv_ca_n_cands (struct iv_ca *ivs)
4460 return ivs->n_cands;
4463 /* Free the list of changes DELTA. */
4466 iv_ca_delta_free (struct iv_ca_delta **delta)
4468 struct iv_ca_delta *act, *next;
4470 for (act = *delta; act; act = next)
4472 next = act->next_change;
4479 /* Allocates new iv candidates assignment. */
4481 static struct iv_ca *
4482 iv_ca_new (struct ivopts_data *data)
4484 struct iv_ca *nw = XNEW (struct iv_ca);
4488 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4489 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4490 nw->cands = BITMAP_ALLOC (NULL);
4493 nw->cand_use_cost = zero_cost;
4495 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4496 nw->cost = zero_cost;
4501 /* Free memory occupied by the set IVS. */
4504 iv_ca_free (struct iv_ca **ivs)
4506 free ((*ivs)->cand_for_use);
4507 free ((*ivs)->n_cand_uses);
4508 BITMAP_FREE ((*ivs)->cands);
4509 free ((*ivs)->n_invariant_uses);
4514 /* Dumps IVS to FILE. */
4517 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4519 const char *pref = " invariants ";
4521 comp_cost cost = iv_ca_cost (ivs);
4523 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4524 bitmap_print (file, ivs->cands, " candidates ","\n");
4526 for (i = 1; i <= data->max_inv_id; i++)
4527 if (ivs->n_invariant_uses[i])
4529 fprintf (file, "%s%d", pref, i);
4532 fprintf (file, "\n");
4535 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4536 new set, and store differences in DELTA. Number of induction variables
4537 in the new set is stored to N_IVS. */
4540 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4541 struct iv_cand *cand, struct iv_ca_delta **delta,
4547 struct cost_pair *old_cp, *new_cp;
4550 for (i = 0; i < ivs->upto; i++)
4552 use = iv_use (data, i);
4553 old_cp = iv_ca_cand_for_use (ivs, use);
4556 && old_cp->cand == cand)
4559 new_cp = get_use_iv_cost (data, use, cand);
4563 if (!iv_ca_has_deps (ivs, new_cp))
4566 if (!cheaper_cost_pair (new_cp, old_cp))
4569 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4572 iv_ca_delta_commit (data, ivs, *delta, true);
4573 cost = iv_ca_cost (ivs);
4575 *n_ivs = iv_ca_n_cands (ivs);
4576 iv_ca_delta_commit (data, ivs, *delta, false);
4581 /* Try narrowing set IVS by removing CAND. Return the cost of
4582 the new set and store the differences in DELTA. */
4585 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4586 struct iv_cand *cand, struct iv_ca_delta **delta)
4590 struct cost_pair *old_cp, *new_cp, *cp;
4592 struct iv_cand *cnd;
4596 for (i = 0; i < n_iv_uses (data); i++)
4598 use = iv_use (data, i);
4600 old_cp = iv_ca_cand_for_use (ivs, use);
4601 if (old_cp->cand != cand)
4606 if (data->consider_all_candidates)
4608 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4613 cnd = iv_cand (data, ci);
4615 cp = get_use_iv_cost (data, use, cnd);
4618 if (!iv_ca_has_deps (ivs, cp))
4621 if (!cheaper_cost_pair (cp, new_cp))
4629 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4634 cnd = iv_cand (data, ci);
4636 cp = get_use_iv_cost (data, use, cnd);
4639 if (!iv_ca_has_deps (ivs, cp))
4642 if (!cheaper_cost_pair (cp, new_cp))
4651 iv_ca_delta_free (delta);
4652 return infinite_cost;
4655 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4658 iv_ca_delta_commit (data, ivs, *delta, true);
4659 cost = iv_ca_cost (ivs);
4660 iv_ca_delta_commit (data, ivs, *delta, false);
4665 /* Try optimizing the set of candidates IVS by removing candidates different
4666 from to EXCEPT_CAND from it. Return cost of the new set, and store
4667 differences in DELTA. */
4670 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4671 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4674 struct iv_ca_delta *act_delta, *best_delta;
4676 comp_cost best_cost, acost;
4677 struct iv_cand *cand;
4680 best_cost = iv_ca_cost (ivs);
4682 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4684 cand = iv_cand (data, i);
4686 if (cand == except_cand)
4689 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4691 if (compare_costs (acost, best_cost) < 0)
4694 iv_ca_delta_free (&best_delta);
4695 best_delta = act_delta;
4698 iv_ca_delta_free (&act_delta);
4707 /* Recurse to possibly remove other unnecessary ivs. */
4708 iv_ca_delta_commit (data, ivs, best_delta, true);
4709 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4710 iv_ca_delta_commit (data, ivs, best_delta, false);
4711 *delta = iv_ca_delta_join (best_delta, *delta);
4715 /* Tries to extend the sets IVS in the best possible way in order
4716 to express the USE. */
4719 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4722 comp_cost best_cost, act_cost;
4725 struct iv_cand *cand;
4726 struct iv_ca_delta *best_delta = NULL, *act_delta;
4727 struct cost_pair *cp;
4729 iv_ca_add_use (data, ivs, use);
4730 best_cost = iv_ca_cost (ivs);
4732 cp = iv_ca_cand_for_use (ivs, use);
4735 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4736 iv_ca_set_no_cp (data, ivs, use);
4739 /* First try important candidates not based on any memory object. Only if
4740 this fails, try the specific ones. Rationale -- in loops with many
4741 variables the best choice often is to use just one generic biv. If we
4742 added here many ivs specific to the uses, the optimization algorithm later
4743 would be likely to get stuck in a local minimum, thus causing us to create
4744 too many ivs. The approach from few ivs to more seems more likely to be
4745 successful -- starting from few ivs, replacing an expensive use by a
4746 specific iv should always be a win. */
4747 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4749 cand = iv_cand (data, i);
4751 if (cand->iv->base_object != NULL_TREE)
4754 if (iv_ca_cand_used_p (ivs, cand))
4757 cp = get_use_iv_cost (data, use, cand);
4761 iv_ca_set_cp (data, ivs, use, cp);
4762 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4763 iv_ca_set_no_cp (data, ivs, use);
4764 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4766 if (compare_costs (act_cost, best_cost) < 0)
4768 best_cost = act_cost;
4770 iv_ca_delta_free (&best_delta);
4771 best_delta = act_delta;
4774 iv_ca_delta_free (&act_delta);
4777 if (infinite_cost_p (best_cost))
4779 for (i = 0; i < use->n_map_members; i++)
4781 cp = use->cost_map + i;
4786 /* Already tried this. */
4787 if (cand->important && cand->iv->base_object == NULL_TREE)
4790 if (iv_ca_cand_used_p (ivs, cand))
4794 iv_ca_set_cp (data, ivs, use, cp);
4795 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4796 iv_ca_set_no_cp (data, ivs, use);
4797 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4800 if (compare_costs (act_cost, best_cost) < 0)
4802 best_cost = act_cost;
4805 iv_ca_delta_free (&best_delta);
4806 best_delta = act_delta;
4809 iv_ca_delta_free (&act_delta);
4813 iv_ca_delta_commit (data, ivs, best_delta, true);
4814 iv_ca_delta_free (&best_delta);
4816 return !infinite_cost_p (best_cost);
4819 /* Finds an initial assignment of candidates to uses. */
4821 static struct iv_ca *
4822 get_initial_solution (struct ivopts_data *data)
4824 struct iv_ca *ivs = iv_ca_new (data);
4827 for (i = 0; i < n_iv_uses (data); i++)
4828 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4837 /* Tries to improve set of induction variables IVS. */
4840 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4843 comp_cost acost, best_cost = iv_ca_cost (ivs);
4844 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4845 struct iv_cand *cand;
4847 /* Try extending the set of induction variables by one. */
4848 for (i = 0; i < n_iv_cands (data); i++)
4850 cand = iv_cand (data, i);
4852 if (iv_ca_cand_used_p (ivs, cand))
4855 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4859 /* If we successfully added the candidate and the set is small enough,
4860 try optimizing it by removing other candidates. */
4861 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4863 iv_ca_delta_commit (data, ivs, act_delta, true);
4864 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4865 iv_ca_delta_commit (data, ivs, act_delta, false);
4866 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4869 if (compare_costs (acost, best_cost) < 0)
4872 iv_ca_delta_free (&best_delta);
4873 best_delta = act_delta;
4876 iv_ca_delta_free (&act_delta);
4881 /* Try removing the candidates from the set instead. */
4882 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4884 /* Nothing more we can do. */
4889 iv_ca_delta_commit (data, ivs, best_delta, true);
4890 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4891 iv_ca_delta_free (&best_delta);
4895 /* Attempts to find the optimal set of induction variables. We do simple
4896 greedy heuristic -- we try to replace at most one candidate in the selected
4897 solution and remove the unused ivs while this improves the cost. */
4899 static struct iv_ca *
4900 find_optimal_iv_set (struct ivopts_data *data)
4906 /* Get the initial solution. */
4907 set = get_initial_solution (data);
4910 if (dump_file && (dump_flags & TDF_DETAILS))
4911 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4915 if (dump_file && (dump_flags & TDF_DETAILS))
4917 fprintf (dump_file, "Initial set of candidates:\n");
4918 iv_ca_dump (data, dump_file, set);
4921 while (try_improve_iv_set (data, set))
4923 if (dump_file && (dump_flags & TDF_DETAILS))
4925 fprintf (dump_file, "Improved to:\n");
4926 iv_ca_dump (data, dump_file, set);
4930 if (dump_file && (dump_flags & TDF_DETAILS))
4932 comp_cost cost = iv_ca_cost (set);
4933 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4936 for (i = 0; i < n_iv_uses (data); i++)
4938 use = iv_use (data, i);
4939 use->selected = iv_ca_cand_for_use (set, use)->cand;
4945 /* Creates a new induction variable corresponding to CAND. */
4948 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4950 gimple_stmt_iterator incr_pos;
4960 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
4964 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
4969 /* Mark that the iv is preserved. */
4970 name_info (data, cand->var_before)->preserve_biv = true;
4971 name_info (data, cand->var_after)->preserve_biv = true;
4973 /* Rewrite the increment so that it uses var_before directly. */
4974 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4979 gimple_add_tmp_var (cand->var_before);
4980 add_referenced_var (cand->var_before);
4982 base = unshare_expr (cand->iv->base);
4984 create_iv (base, unshare_expr (cand->iv->step),
4985 cand->var_before, data->current_loop,
4986 &incr_pos, after, &cand->var_before, &cand->var_after);
4989 /* Creates new induction variables described in SET. */
4992 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4995 struct iv_cand *cand;
4998 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5000 cand = iv_cand (data, i);
5001 create_new_iv (data, cand);
5005 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5006 is true, remove also the ssa name defined by the statement. */
5009 remove_statement (gimple stmt, bool including_defined_name)
5011 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5013 if (gimple_code (stmt) == GIMPLE_PHI)
5014 remove_phi_node (&bsi, including_defined_name);
5017 gsi_remove (&bsi, true);
5018 release_defs (stmt);
5022 /* Rewrites USE (definition of iv used in a nonlinear expression)
5023 using candidate CAND. */
5026 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5027 struct iv_use *use, struct iv_cand *cand)
5032 gimple_stmt_iterator bsi;
5034 /* An important special case -- if we are asked to express value of
5035 the original iv by itself, just exit; there is no need to
5036 introduce a new computation (that might also need casting the
5037 variable to unsigned and back). */
5038 if (cand->pos == IP_ORIGINAL
5039 && cand->incremented_at == use->stmt)
5041 tree step, ctype, utype;
5042 enum tree_code incr_code = PLUS_EXPR, old_code;
5044 gcc_assert (is_gimple_assign (use->stmt));
5045 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5047 step = cand->iv->step;
5048 ctype = TREE_TYPE (step);
5049 utype = TREE_TYPE (cand->var_after);
5050 if (TREE_CODE (step) == NEGATE_EXPR)
5052 incr_code = MINUS_EXPR;
5053 step = TREE_OPERAND (step, 0);
5056 /* Check whether we may leave the computation unchanged.
5057 This is the case only if it does not rely on other
5058 computations in the loop -- otherwise, the computation
5059 we rely upon may be removed in remove_unused_ivs,
5060 thus leading to ICE. */
5061 old_code = gimple_assign_rhs_code (use->stmt);
5062 if (old_code == PLUS_EXPR
5063 || old_code == MINUS_EXPR
5064 || old_code == POINTER_PLUS_EXPR)
5066 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5067 op = gimple_assign_rhs2 (use->stmt);
5068 else if (old_code != MINUS_EXPR
5069 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5070 op = gimple_assign_rhs1 (use->stmt);
5078 && (TREE_CODE (op) == INTEGER_CST
5079 || operand_equal_p (op, step, 0)))
5082 /* Otherwise, add the necessary computations to express
5084 op = fold_convert (ctype, cand->var_before);
5085 comp = fold_convert (utype,
5086 build2 (incr_code, ctype, op,
5087 unshare_expr (step)));
5091 comp = get_computation (data->current_loop, use, cand);
5092 gcc_assert (comp != NULL_TREE);
5095 switch (gimple_code (use->stmt))
5098 tgt = PHI_RESULT (use->stmt);
5100 /* If we should keep the biv, do not replace it. */
5101 if (name_info (data, tgt)->preserve_biv)
5104 bsi = gsi_after_labels (gimple_bb (use->stmt));
5108 tgt = gimple_assign_lhs (use->stmt);
5109 bsi = gsi_for_stmt (use->stmt);
5116 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5117 true, GSI_SAME_STMT);
5119 if (gimple_code (use->stmt) == GIMPLE_PHI)
5121 ass = gimple_build_assign (tgt, op);
5122 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5123 remove_statement (use->stmt, false);
5127 gimple_assign_set_rhs_from_tree (&bsi, op);
5128 use->stmt = gsi_stmt (bsi);
5132 /* Replaces ssa name in index IDX by its basic variable. Callback for
5136 idx_remove_ssa_names (tree base, tree *idx,
5137 void *data ATTRIBUTE_UNUSED)
5141 if (TREE_CODE (*idx) == SSA_NAME)
5142 *idx = SSA_NAME_VAR (*idx);
5144 if (TREE_CODE (base) == ARRAY_REF)
5146 op = &TREE_OPERAND (base, 2);
5148 && TREE_CODE (*op) == SSA_NAME)
5149 *op = SSA_NAME_VAR (*op);
5150 op = &TREE_OPERAND (base, 3);
5152 && TREE_CODE (*op) == SSA_NAME)
5153 *op = SSA_NAME_VAR (*op);
5159 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5162 unshare_and_remove_ssa_names (tree ref)
5164 ref = unshare_expr (ref);
5165 for_each_index (&ref, idx_remove_ssa_names, NULL);
5170 /* Extract the alias analysis info for the memory reference REF. There are
5171 several ways how this information may be stored and what precisely is
5172 its semantics depending on the type of the reference, but there always is
5173 somewhere hidden one _DECL node that is used to determine the set of
5174 virtual operands for the reference. The code below deciphers this jungle
5175 and extracts this single useful piece of information. */
5178 get_ref_tag (tree ref, tree orig)
5180 tree var = get_base_address (ref);
5181 tree aref = NULL_TREE, tag, sv;
5182 HOST_WIDE_INT offset, size, maxsize;
5184 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5186 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5194 if (TREE_CODE (var) == INDIRECT_REF)
5196 /* If the base is a dereference of a pointer, first check its name memory
5197 tag. If it does not have one, use its symbol memory tag. */
5198 var = TREE_OPERAND (var, 0);
5199 if (TREE_CODE (var) != SSA_NAME)
5202 if (SSA_NAME_PTR_INFO (var))
5204 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5209 var = SSA_NAME_VAR (var);
5210 tag = symbol_mem_tag (var);
5211 gcc_assert (tag != NULL_TREE);
5219 tag = symbol_mem_tag (var);
5227 /* Copies the reference information from OLD_REF to NEW_REF. */
5230 copy_ref_info (tree new_ref, tree old_ref)
5232 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5233 copy_mem_ref_info (new_ref, old_ref);
5236 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5237 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5241 /* Rewrites USE (address that is an iv) using candidate CAND. */
5244 rewrite_use_address (struct ivopts_data *data,
5245 struct iv_use *use, struct iv_cand *cand)
5248 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5252 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5254 unshare_aff_combination (&aff);
5256 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5257 copy_ref_info (ref, *use->op_p);
5261 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5265 rewrite_use_compare (struct ivopts_data *data,
5266 struct iv_use *use, struct iv_cand *cand)
5268 tree comp, *var_p, op, bound;
5269 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5270 enum tree_code compare;
5271 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5277 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5278 tree var_type = TREE_TYPE (var);
5280 compare = iv_elimination_compare (data, use);
5281 bound = unshare_expr (fold_convert (var_type, bound));
5282 op = force_gimple_operand_gsi (&bsi, bound, true, NULL_TREE,
5283 true, GSI_SAME_STMT);
5285 gimple_cond_set_lhs (use->stmt, var);
5286 gimple_cond_set_code (use->stmt, compare);
5287 gimple_cond_set_rhs (use->stmt, op);
5291 /* The induction variable elimination failed; just express the original
5293 comp = get_computation (data->current_loop, use, cand);
5294 gcc_assert (comp != NULL_TREE);
5296 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5299 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5300 true, GSI_SAME_STMT);
5303 /* Rewrites USE using candidate CAND. */
5306 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5308 push_stmt_changes (&use->stmt);
5312 case USE_NONLINEAR_EXPR:
5313 rewrite_use_nonlinear_expr (data, use, cand);
5317 rewrite_use_address (data, use, cand);
5321 rewrite_use_compare (data, use, cand);
5328 pop_stmt_changes (&use->stmt);
5331 /* Rewrite the uses using the selected induction variables. */
5334 rewrite_uses (struct ivopts_data *data)
5337 struct iv_cand *cand;
5340 for (i = 0; i < n_iv_uses (data); i++)
5342 use = iv_use (data, i);
5343 cand = use->selected;
5346 rewrite_use (data, use, cand);
5350 /* Removes the ivs that are not used after rewriting. */
5353 remove_unused_ivs (struct ivopts_data *data)
5358 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5360 struct version_info *info;
5362 info = ver_info (data, j);
5364 && !integer_zerop (info->iv->step)
5366 && !info->iv->have_use_for
5367 && !info->preserve_biv)
5368 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5372 /* Frees data allocated by the optimization of a single loop. */
5375 free_loop_data (struct ivopts_data *data)
5383 pointer_map_destroy (data->niters);
5384 data->niters = NULL;
5387 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5389 struct version_info *info;
5391 info = ver_info (data, i);
5395 info->has_nonlin_use = false;
5396 info->preserve_biv = false;
5399 bitmap_clear (data->relevant);
5400 bitmap_clear (data->important_candidates);
5402 for (i = 0; i < n_iv_uses (data); i++)
5404 struct iv_use *use = iv_use (data, i);
5407 BITMAP_FREE (use->related_cands);
5408 for (j = 0; j < use->n_map_members; j++)
5409 if (use->cost_map[j].depends_on)
5410 BITMAP_FREE (use->cost_map[j].depends_on);
5411 free (use->cost_map);
5414 VEC_truncate (iv_use_p, data->iv_uses, 0);
5416 for (i = 0; i < n_iv_cands (data); i++)
5418 struct iv_cand *cand = iv_cand (data, i);
5422 if (cand->depends_on)
5423 BITMAP_FREE (cand->depends_on);
5426 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5428 if (data->version_info_size < num_ssa_names)
5430 data->version_info_size = 2 * num_ssa_names;
5431 free (data->version_info);
5432 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5435 data->max_inv_id = 0;
5437 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5438 SET_DECL_RTL (obj, NULL_RTX);
5440 VEC_truncate (tree, decl_rtl_to_reset, 0);
5443 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5447 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5449 free_loop_data (data);
5450 free (data->version_info);
5451 BITMAP_FREE (data->relevant);
5452 BITMAP_FREE (data->important_candidates);
5454 VEC_free (tree, heap, decl_rtl_to_reset);
5455 VEC_free (iv_use_p, heap, data->iv_uses);
5456 VEC_free (iv_cand_p, heap, data->iv_candidates);
5459 /* Optimizes the LOOP. Returns true if anything changed. */
5462 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5464 bool changed = false;
5465 struct iv_ca *iv_ca;
5468 gcc_assert (!data->niters);
5469 data->current_loop = loop;
5471 if (dump_file && (dump_flags & TDF_DETAILS))
5473 fprintf (dump_file, "Processing loop %d\n", loop->num);
5475 exit = single_dom_exit (loop);
5478 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5479 exit->src->index, exit->dest->index);
5480 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5481 fprintf (dump_file, "\n");
5484 fprintf (dump_file, "\n");
5487 /* For each ssa name determines whether it behaves as an induction variable
5489 if (!find_induction_variables (data))
5492 /* Finds interesting uses (item 1). */
5493 find_interesting_uses (data);
5494 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5497 /* Finds candidates for the induction variables (item 2). */
5498 find_iv_candidates (data);
5500 /* Calculates the costs (item 3, part 1). */
5501 determine_use_iv_costs (data);
5502 determine_iv_costs (data);
5503 determine_set_costs (data);
5505 /* Find the optimal set of induction variables (item 3, part 2). */
5506 iv_ca = find_optimal_iv_set (data);
5511 /* Create the new induction variables (item 4, part 1). */
5512 create_new_ivs (data, iv_ca);
5513 iv_ca_free (&iv_ca);
5515 /* Rewrite the uses (item 4, part 2). */
5516 rewrite_uses (data);
5518 /* Remove the ivs that are unused after rewriting. */
5519 remove_unused_ivs (data);
5521 /* We have changed the structure of induction variables; it might happen
5522 that definitions in the scev database refer to some of them that were
5527 free_loop_data (data);
5532 /* Main entry point. Optimizes induction variables in loops. */
5535 tree_ssa_iv_optimize (void)
5538 struct ivopts_data data;
5541 tree_ssa_iv_optimize_init (&data);
5543 /* Optimize the loops starting with the innermost ones. */
5544 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5546 if (dump_file && (dump_flags & TDF_DETAILS))
5547 flow_loop_dump (loop, dump_file, NULL, 1);
5549 tree_ssa_iv_optimize_loop (&data, loop);
5552 tree_ssa_iv_optimize_finalize (&data);