1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
71 #include "hard-reg-set.h"
72 #include "basic-block.h"
74 #include "diagnostic.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
81 #include "tree-pass.h"
83 #include "insn-config.h"
85 #include "pointer-set.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
92 #include "tree-affine.h"
95 /* The infinite cost. */
96 #define INFTY 10000000
98 /* The expected number of loop iterations. TODO -- use profiling instead of
100 #define AVG_LOOP_NITER(LOOP) 5
103 /* Representation of the induction variable. */
106 tree base; /* Initial value of the iv. */
107 tree base_object; /* A memory object to that the induction variable points. */
108 tree step; /* Step of the iv (constant only). */
109 tree ssa_name; /* The ssa name with the value. */
110 bool biv_p; /* Is it a biv? */
111 bool have_use_for; /* Do we already have a use for it? */
112 unsigned use_id; /* The identifier in the use if it is the case. */
115 /* Per-ssa version information (induction variable descriptions, etc.). */
118 tree name; /* The ssa name. */
119 struct iv *iv; /* Induction variable description. */
120 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
121 an expression that is not an induction variable. */
122 unsigned inv_id; /* Id of an invariant. */
123 bool preserve_biv; /* For the original biv, whether to preserve it. */
129 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
130 USE_ADDRESS, /* Use in an address. */
131 USE_COMPARE /* Use is a compare. */
134 /* Cost of a computation. */
137 unsigned cost; /* The runtime cost. */
138 unsigned complexity; /* The estimate of the complexity of the code for
139 the computation (in no concrete units --
140 complexity field should be larger for more
141 complex expressions and addressing modes). */
144 static const comp_cost zero_cost = {0, 0};
145 static const comp_cost infinite_cost = {INFTY, INFTY};
147 /* The candidate - cost pair. */
150 struct iv_cand *cand; /* The candidate. */
151 comp_cost cost; /* The cost. */
152 bitmap depends_on; /* The list of invariants that have to be
154 tree value; /* For final value elimination, the expression for
155 the final value of the iv. For iv elimination,
156 the new bound to compare with. */
162 unsigned id; /* The id of the use. */
163 enum use_type type; /* Type of the use. */
164 struct iv *iv; /* The induction variable it is based on. */
165 tree stmt; /* Statement in that it occurs. */
166 tree *op_p; /* The place where it occurs. */
167 bitmap related_cands; /* The set of "related" iv candidates, plus the common
170 unsigned n_map_members; /* Number of candidates in the cost_map list. */
171 struct cost_pair *cost_map;
172 /* The costs wrto the iv candidates. */
174 struct iv_cand *selected;
175 /* The selected candidate. */
178 /* The position where the iv is computed. */
181 IP_NORMAL, /* At the end, just before the exit condition. */
182 IP_END, /* At the end of the latch block. */
183 IP_ORIGINAL /* The original biv. */
186 /* The induction variable candidate. */
189 unsigned id; /* The number of the candidate. */
190 bool important; /* Whether this is an "important" candidate, i.e. such
191 that it should be considered by all uses. */
192 enum iv_position pos; /* Where it is computed. */
193 tree incremented_at; /* For original biv, the statement where it is
195 tree var_before; /* The variable used for it before increment. */
196 tree var_after; /* The variable used for it after increment. */
197 struct iv *iv; /* The value of the candidate. NULL for
198 "pseudocandidate" used to indicate the possibility
199 to replace the final value of an iv by direct
200 computation of the value. */
201 unsigned cost; /* Cost of the candidate. */
202 bitmap depends_on; /* The list of invariants that are used in step of the
206 /* The data used by the induction variable optimizations. */
208 typedef struct iv_use *iv_use_p;
210 DEF_VEC_ALLOC_P(iv_use_p,heap);
212 typedef struct iv_cand *iv_cand_p;
213 DEF_VEC_P(iv_cand_p);
214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
218 /* The currently optimized loop. */
219 struct loop *current_loop;
221 /* Number of registers used in it. */
224 /* Numbers of iterations for all exits of the current loop. */
225 struct pointer_map_t *niters;
227 /* The size of version_info array allocated. */
228 unsigned version_info_size;
230 /* The array of information for the ssa names. */
231 struct version_info *version_info;
233 /* The bitmap of indices in version_info whose value was changed. */
236 /* The maximum invariant id. */
239 /* The uses of induction variables. */
240 VEC(iv_use_p,heap) *iv_uses;
242 /* The candidates. */
243 VEC(iv_cand_p,heap) *iv_candidates;
245 /* A bitmap of important candidates. */
246 bitmap important_candidates;
248 /* Whether to consider just related and important candidates when replacing a
250 bool consider_all_candidates;
253 /* An assignment of iv candidates to uses. */
257 /* The number of uses covered by the assignment. */
260 /* Number of uses that cannot be expressed by the candidates in the set. */
263 /* Candidate assigned to a use, together with the related costs. */
264 struct cost_pair **cand_for_use;
266 /* Number of times each candidate is used. */
267 unsigned *n_cand_uses;
269 /* The candidates used. */
272 /* The number of candidates in the set. */
275 /* Total number of registers needed. */
278 /* Total cost of expressing uses. */
279 comp_cost cand_use_cost;
281 /* Total cost of candidates. */
284 /* Number of times each invariant is used. */
285 unsigned *n_invariant_uses;
287 /* Total cost of the assignment. */
291 /* Difference of two iv candidate assignments. */
298 /* An old assignment (for rollback purposes). */
299 struct cost_pair *old_cp;
301 /* A new assignment. */
302 struct cost_pair *new_cp;
304 /* Next change in the list. */
305 struct iv_ca_delta *next_change;
308 /* Bound on number of candidates below that all candidates are considered. */
310 #define CONSIDER_ALL_CANDIDATES_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
314 optimizing such a loop would help, and it would take ages). */
316 #define MAX_CONSIDERED_USES \
317 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
319 /* If there are at most this number of ivs in the set, try removing unnecessary
320 ivs from the set always. */
322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
323 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
325 /* The list of trees for that the decl_rtl field must be reset is stored
328 static VEC(tree,heap) *decl_rtl_to_reset;
330 /* Number of uses recorded in DATA. */
332 static inline unsigned
333 n_iv_uses (struct ivopts_data *data)
335 return VEC_length (iv_use_p, data->iv_uses);
338 /* Ith use recorded in DATA. */
340 static inline struct iv_use *
341 iv_use (struct ivopts_data *data, unsigned i)
343 return VEC_index (iv_use_p, data->iv_uses, i);
346 /* Number of candidates recorded in DATA. */
348 static inline unsigned
349 n_iv_cands (struct ivopts_data *data)
351 return VEC_length (iv_cand_p, data->iv_candidates);
354 /* Ith candidate recorded in DATA. */
356 static inline struct iv_cand *
357 iv_cand (struct ivopts_data *data, unsigned i)
359 return VEC_index (iv_cand_p, data->iv_candidates, i);
362 /* The single loop exit if it dominates the latch, NULL otherwise. */
365 single_dom_exit (struct loop *loop)
367 edge exit = single_exit (loop);
372 if (!just_once_each_iteration_p (loop, exit->src))
378 /* Dumps information about the induction variable IV to FILE. */
380 extern void dump_iv (FILE *, struct iv *);
382 dump_iv (FILE *file, struct iv *iv)
386 fprintf (file, "ssa name ");
387 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
388 fprintf (file, "\n");
391 fprintf (file, " type ");
392 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
393 fprintf (file, "\n");
397 fprintf (file, " base ");
398 print_generic_expr (file, iv->base, TDF_SLIM);
399 fprintf (file, "\n");
401 fprintf (file, " step ");
402 print_generic_expr (file, iv->step, TDF_SLIM);
403 fprintf (file, "\n");
407 fprintf (file, " invariant ");
408 print_generic_expr (file, iv->base, TDF_SLIM);
409 fprintf (file, "\n");
414 fprintf (file, " base object ");
415 print_generic_expr (file, iv->base_object, TDF_SLIM);
416 fprintf (file, "\n");
420 fprintf (file, " is a biv\n");
423 /* Dumps information about the USE to FILE. */
425 extern void dump_use (FILE *, struct iv_use *);
427 dump_use (FILE *file, struct iv_use *use)
429 fprintf (file, "use %d\n", use->id);
433 case USE_NONLINEAR_EXPR:
434 fprintf (file, " generic\n");
438 fprintf (file, " address\n");
442 fprintf (file, " compare\n");
449 fprintf (file, " in statement ");
450 print_generic_expr (file, use->stmt, TDF_SLIM);
451 fprintf (file, "\n");
453 fprintf (file, " at position ");
455 print_generic_expr (file, *use->op_p, TDF_SLIM);
456 fprintf (file, "\n");
458 dump_iv (file, use->iv);
460 if (use->related_cands)
462 fprintf (file, " related candidates ");
463 dump_bitmap (file, use->related_cands);
467 /* Dumps information about the uses to FILE. */
469 extern void dump_uses (FILE *, struct ivopts_data *);
471 dump_uses (FILE *file, struct ivopts_data *data)
476 for (i = 0; i < n_iv_uses (data); i++)
478 use = iv_use (data, i);
480 dump_use (file, use);
481 fprintf (file, "\n");
485 /* Dumps information about induction variable candidate CAND to FILE. */
487 extern void dump_cand (FILE *, struct iv_cand *);
489 dump_cand (FILE *file, struct iv_cand *cand)
491 struct iv *iv = cand->iv;
493 fprintf (file, "candidate %d%s\n",
494 cand->id, cand->important ? " (important)" : "");
496 if (cand->depends_on)
498 fprintf (file, " depends on ");
499 dump_bitmap (file, cand->depends_on);
504 fprintf (file, " final value replacement\n");
511 fprintf (file, " incremented before exit test\n");
515 fprintf (file, " incremented at end\n");
519 fprintf (file, " original biv\n");
526 /* Returns the info for ssa version VER. */
528 static inline struct version_info *
529 ver_info (struct ivopts_data *data, unsigned ver)
531 return data->version_info + ver;
534 /* Returns the info for ssa name NAME. */
536 static inline struct version_info *
537 name_info (struct ivopts_data *data, tree name)
539 return ver_info (data, SSA_NAME_VERSION (name));
542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
548 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
552 if (sbb == loop->latch)
558 return stmt == last_stmt (bb);
561 /* Returns true if STMT if after the place where the original induction
562 variable CAND is incremented. */
565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
567 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
568 basic_block stmt_bb = bb_for_stmt (stmt);
569 block_stmt_iterator bsi;
571 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
574 if (stmt_bb != cand_bb)
577 /* Scan the block from the end, since the original ivs are usually
578 incremented at the end of the loop body. */
579 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
581 if (bsi_stmt (bsi) == cand->incremented_at)
583 if (bsi_stmt (bsi) == stmt)
588 /* Returns true if STMT if after the place where the induction variable
589 CAND is incremented in LOOP. */
592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
600 return stmt_after_ip_normal_pos (loop, stmt);
603 return stmt_after_ip_original_pos (cand, stmt);
610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
613 abnormal_ssa_name_p (tree exp)
618 if (TREE_CODE (exp) != SSA_NAME)
621 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
625 abnormal phi node. Callback for for_each_index. */
628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
629 void *data ATTRIBUTE_UNUSED)
631 if (TREE_CODE (base) == ARRAY_REF)
633 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
635 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
639 return !abnormal_ssa_name_p (*index);
642 /* Returns true if EXPR contains a ssa name that occurs in an
643 abnormal phi node. */
646 contains_abnormal_ssa_name_p (tree expr)
649 enum tree_code_class codeclass;
654 code = TREE_CODE (expr);
655 codeclass = TREE_CODE_CLASS (code);
657 if (code == SSA_NAME)
658 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
660 if (code == INTEGER_CST
661 || is_gimple_min_invariant (expr))
664 if (code == ADDR_EXPR)
665 return !for_each_index (&TREE_OPERAND (expr, 0),
666 idx_contains_abnormal_ssa_name_p,
673 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
678 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
690 /* Returns tree describing number of iterations determined from
691 EXIT of DATA->current_loop, or NULL if something goes wrong. */
694 niter_for_exit (struct ivopts_data *data, edge exit)
696 struct tree_niter_desc desc;
702 data->niters = pointer_map_create ();
706 slot = pointer_map_contains (data->niters, exit);
710 /* Try to determine number of iterations. We must know it
711 unconditionally (i.e., without possibility of # of iterations
712 being zero). Also, we cannot safely work with ssa names that
713 appear in phi nodes on abnormal edges, so that we do not create
714 overlapping life ranges for them (PR 27283). */
715 if (number_of_iterations_exit (data->current_loop,
717 && integer_zerop (desc.may_be_zero)
718 && !contains_abnormal_ssa_name_p (desc.niter))
723 *pointer_map_insert (data->niters, exit) = niter;
726 niter = (tree) *slot;
731 /* Returns tree describing number of iterations determined from
732 single dominating exit of DATA->current_loop, or NULL if something
736 niter_for_single_dom_exit (struct ivopts_data *data)
738 edge exit = single_dom_exit (data->current_loop);
743 return niter_for_exit (data, exit);
746 /* Initializes data structures used by the iv optimization pass, stored
750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
752 data->version_info_size = 2 * num_ssa_names;
753 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
754 data->relevant = BITMAP_ALLOC (NULL);
755 data->important_candidates = BITMAP_ALLOC (NULL);
756 data->max_inv_id = 0;
758 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
759 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
760 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
763 /* Returns a memory object to that EXPR points. In case we are able to
764 determine that it does not point to any such object, NULL is returned. */
767 determine_base_object (tree expr)
769 enum tree_code code = TREE_CODE (expr);
772 /* If this is a pointer casted to any type, we need to determine
773 the base object for the pointer; so handle conversions before
774 throwing away non-pointer expressions. */
775 if (TREE_CODE (expr) == NOP_EXPR
776 || TREE_CODE (expr) == CONVERT_EXPR)
777 return determine_base_object (TREE_OPERAND (expr, 0));
779 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
788 obj = TREE_OPERAND (expr, 0);
789 base = get_base_address (obj);
794 if (TREE_CODE (base) == INDIRECT_REF)
795 return determine_base_object (TREE_OPERAND (base, 0));
797 return fold_convert (ptr_type_node,
798 build_fold_addr_expr (base));
800 case POINTER_PLUS_EXPR:
801 return determine_base_object (TREE_OPERAND (expr, 0));
805 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
809 return fold_convert (ptr_type_node, expr);
813 /* Allocates an induction variable with given initial value BASE and step STEP
817 alloc_iv (tree base, tree step)
819 struct iv *iv = XCNEW (struct iv);
820 gcc_assert (step != NULL_TREE);
823 iv->base_object = determine_base_object (base);
826 iv->have_use_for = false;
828 iv->ssa_name = NULL_TREE;
833 /* Sets STEP and BASE for induction variable IV. */
836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
838 struct version_info *info = name_info (data, iv);
840 gcc_assert (!info->iv);
842 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
843 info->iv = alloc_iv (base, step);
844 info->iv->ssa_name = iv;
847 /* Finds induction variable declaration for VAR. */
850 get_iv (struct ivopts_data *data, tree var)
853 tree type = TREE_TYPE (var);
855 if (!POINTER_TYPE_P (type)
856 && !INTEGRAL_TYPE_P (type))
859 if (!name_info (data, var)->iv)
861 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
864 || !flow_bb_inside_loop_p (data->current_loop, bb))
865 set_iv (data, var, var, build_int_cst (type, 0));
868 return name_info (data, var)->iv;
871 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
872 not define a simple affine biv with nonzero step. */
875 determine_biv_step (tree phi)
877 struct loop *loop = bb_for_stmt (phi)->loop_father;
878 tree name = PHI_RESULT (phi);
881 if (!is_gimple_reg (name))
884 if (!simple_iv (loop, phi, name, &iv, true))
887 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
890 /* Finds basic ivs. */
893 find_bivs (struct ivopts_data *data)
895 tree phi, step, type, base;
897 struct loop *loop = data->current_loop;
899 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
901 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
904 step = determine_biv_step (phi);
908 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
909 base = expand_simple_operations (base);
910 if (contains_abnormal_ssa_name_p (base)
911 || contains_abnormal_ssa_name_p (step))
914 type = TREE_TYPE (PHI_RESULT (phi));
915 base = fold_convert (type, base);
917 step = fold_convert (type, step);
919 set_iv (data, PHI_RESULT (phi), base, step);
926 /* Marks basic ivs. */
929 mark_bivs (struct ivopts_data *data)
932 struct iv *iv, *incr_iv;
933 struct loop *loop = data->current_loop;
936 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
938 iv = get_iv (data, PHI_RESULT (phi));
942 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
943 incr_iv = get_iv (data, var);
947 /* If the increment is in the subloop, ignore it. */
948 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
949 if (incr_bb->loop_father != data->current_loop
950 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
954 incr_iv->biv_p = true;
958 /* Checks whether STMT defines a linear induction variable and stores its
962 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
965 struct loop *loop = data->current_loop;
967 iv->base = NULL_TREE;
968 iv->step = NULL_TREE;
970 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
973 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
974 if (TREE_CODE (lhs) != SSA_NAME)
977 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
979 iv->base = expand_simple_operations (iv->base);
981 if (contains_abnormal_ssa_name_p (iv->base)
982 || contains_abnormal_ssa_name_p (iv->step))
988 /* Finds general ivs in statement STMT. */
991 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
995 if (!find_givs_in_stmt_scev (data, stmt, &iv))
998 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1001 /* Finds general ivs in basic block BB. */
1004 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1006 block_stmt_iterator bsi;
1008 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1009 find_givs_in_stmt (data, bsi_stmt (bsi));
1012 /* Finds general ivs. */
1015 find_givs (struct ivopts_data *data)
1017 struct loop *loop = data->current_loop;
1018 basic_block *body = get_loop_body_in_dom_order (loop);
1021 for (i = 0; i < loop->num_nodes; i++)
1022 find_givs_in_bb (data, body[i]);
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027 variable and if so, its initial value and step. */
1030 find_induction_variables (struct ivopts_data *data)
1035 if (!find_bivs (data))
1041 if (dump_file && (dump_flags & TDF_DETAILS))
1043 tree niter = niter_for_single_dom_exit (data);
1047 fprintf (dump_file, " number of iterations ");
1048 print_generic_expr (dump_file, niter, TDF_SLIM);
1049 fprintf (dump_file, "\n\n");
1052 fprintf (dump_file, "Induction variables:\n\n");
1054 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1056 if (ver_info (data, i)->iv)
1057 dump_iv (dump_file, ver_info (data, i)->iv);
1064 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1066 static struct iv_use *
1067 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1068 tree stmt, enum use_type use_type)
1070 struct iv_use *use = XCNEW (struct iv_use);
1072 use->id = n_iv_uses (data);
1073 use->type = use_type;
1077 use->related_cands = BITMAP_ALLOC (NULL);
1079 /* To avoid showing ssa name in the dumps, if it was not reset by the
1081 iv->ssa_name = NULL_TREE;
1083 if (dump_file && (dump_flags & TDF_DETAILS))
1084 dump_use (dump_file, use);
1086 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1091 /* Checks whether OP is a loop-level invariant and if so, records it.
1092 NONLINEAR_USE is true if the invariant is used in a way we do not
1093 handle specially. */
1096 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1099 struct version_info *info;
1101 if (TREE_CODE (op) != SSA_NAME
1102 || !is_gimple_reg (op))
1105 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1107 && flow_bb_inside_loop_p (data->current_loop, bb))
1110 info = name_info (data, op);
1112 info->has_nonlin_use |= nonlinear_use;
1114 info->inv_id = ++data->max_inv_id;
1115 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1118 /* Checks whether the use OP is interesting and if so, records it. */
1120 static struct iv_use *
1121 find_interesting_uses_op (struct ivopts_data *data, tree op)
1128 if (TREE_CODE (op) != SSA_NAME)
1131 iv = get_iv (data, op);
1135 if (iv->have_use_for)
1137 use = iv_use (data, iv->use_id);
1139 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1143 if (integer_zerop (iv->step))
1145 record_invariant (data, op, true);
1148 iv->have_use_for = true;
1150 civ = XNEW (struct iv);
1153 stmt = SSA_NAME_DEF_STMT (op);
1154 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1155 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1157 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1158 iv->use_id = use->id;
1163 /* Given a condition *COND_P, checks whether it is a compare of an induction
1164 variable and an invariant. If this is the case, CONTROL_VAR is set
1165 to location of the iv, BOUND to the location of the invariant,
1166 IV_VAR and IV_BOUND are set to the corresponding induction variable
1167 descriptions, and true is returned. If this is not the case,
1168 CONTROL_VAR and BOUND are set to the arguments of the condition and
1169 false is returned. */
1172 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1173 tree **control_var, tree **bound,
1174 struct iv **iv_var, struct iv **iv_bound)
1176 /* The nodes returned when COND has just one operand. Note that you should
1177 not modify anything in BOUND or IV_BOUND because of this. */
1178 static struct iv const_iv;
1180 tree cond = *cond_p;
1181 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1182 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1185 zero = integer_zero_node;
1186 const_iv.step = integer_zero_node;
1188 if (TREE_CODE (cond) == SSA_NAME)
1191 iv0 = get_iv (data, cond);
1192 ret = (iv0 && !integer_zerop (iv0->step));
1196 if (!COMPARISON_CLASS_P (cond))
1202 op0 = &TREE_OPERAND (cond, 0);
1203 op1 = &TREE_OPERAND (cond, 1);
1204 if (TREE_CODE (*op0) == SSA_NAME)
1205 iv0 = get_iv (data, *op0);
1206 if (TREE_CODE (*op1) == SSA_NAME)
1207 iv1 = get_iv (data, *op1);
1209 /* Exactly one of the compared values must be an iv, and the other one must
1214 if (integer_zerop (iv0->step))
1216 /* Control variable may be on the other side. */
1217 tmp_op = op0; op0 = op1; op1 = tmp_op;
1218 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1220 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1224 *control_var = op0;;
1235 /* Checks whether the condition *COND_P in STMT is interesting
1236 and if so, records it. */
1239 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1241 tree *var_p, *bound_p;
1242 struct iv *var_iv, *civ;
1244 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1246 find_interesting_uses_op (data, *var_p);
1247 find_interesting_uses_op (data, *bound_p);
1251 civ = XNEW (struct iv);
1253 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1256 /* Returns true if expression EXPR is obviously invariant in LOOP,
1257 i.e. if all its operands are defined outside of the LOOP. LOOP
1258 should not be the function body. */
1261 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1266 gcc_assert (loop_depth (loop) > 0);
1268 if (is_gimple_min_invariant (expr))
1271 if (TREE_CODE (expr) == SSA_NAME)
1273 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1275 && flow_bb_inside_loop_p (loop, def_bb))
1281 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1284 len = TREE_OPERAND_LENGTH (expr);
1285 for (i = 0; i < len; i++)
1286 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1292 /* Cumulates the steps of indices into DATA and replaces their values with the
1293 initial ones. Returns false when the value of the index cannot be determined.
1294 Callback for for_each_index. */
1296 struct ifs_ivopts_data
1298 struct ivopts_data *ivopts_data;
1304 idx_find_step (tree base, tree *idx, void *data)
1306 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1308 tree step, iv_base, iv_step, lbound, off;
1309 struct loop *loop = dta->ivopts_data->current_loop;
1311 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1312 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1315 /* If base is a component ref, require that the offset of the reference
1317 if (TREE_CODE (base) == COMPONENT_REF)
1319 off = component_ref_field_offset (base);
1320 return expr_invariant_in_loop_p (loop, off);
1323 /* If base is array, first check whether we will be able to move the
1324 reference out of the loop (in order to take its address in strength
1325 reduction). In order for this to work we need both lower bound
1326 and step to be loop invariants. */
1327 if (TREE_CODE (base) == ARRAY_REF)
1329 step = array_ref_element_size (base);
1330 lbound = array_ref_low_bound (base);
1332 if (!expr_invariant_in_loop_p (loop, step)
1333 || !expr_invariant_in_loop_p (loop, lbound))
1337 if (TREE_CODE (*idx) != SSA_NAME)
1340 iv = get_iv (dta->ivopts_data, *idx);
1344 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1345 *&x[0], which is not folded and does not trigger the
1346 ARRAY_REF path below. */
1349 if (integer_zerop (iv->step))
1352 if (TREE_CODE (base) == ARRAY_REF)
1354 step = array_ref_element_size (base);
1356 /* We only handle addresses whose step is an integer constant. */
1357 if (TREE_CODE (step) != INTEGER_CST)
1361 /* The step for pointer arithmetics already is 1 byte. */
1362 step = build_int_cst (sizetype, 1);
1366 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1367 sizetype, &iv_base, &iv_step, dta->stmt,
1370 /* The index might wrap. */
1374 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1375 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1380 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1381 object is passed to it in DATA. */
1384 idx_record_use (tree base, tree *idx,
1387 struct ivopts_data *data = (struct ivopts_data *) vdata;
1388 find_interesting_uses_op (data, *idx);
1389 if (TREE_CODE (base) == ARRAY_REF)
1391 find_interesting_uses_op (data, array_ref_element_size (base));
1392 find_interesting_uses_op (data, array_ref_low_bound (base));
1397 /* If we can prove that TOP = cst * BOT for some constant cst,
1398 store cst to MUL and return true. Otherwise return false.
1399 The returned value is always sign-extended, regardless of the
1400 signedness of TOP and BOT. */
1403 constant_multiple_of (tree top, tree bot, double_int *mul)
1406 enum tree_code code;
1407 double_int res, p0, p1;
1408 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1413 if (operand_equal_p (top, bot, 0))
1415 *mul = double_int_one;
1419 code = TREE_CODE (top);
1423 mby = TREE_OPERAND (top, 1);
1424 if (TREE_CODE (mby) != INTEGER_CST)
1427 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1430 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1436 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1437 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1440 if (code == MINUS_EXPR)
1441 p1 = double_int_neg (p1);
1442 *mul = double_int_sext (double_int_add (p0, p1), precision);
1446 if (TREE_CODE (bot) != INTEGER_CST)
1449 p0 = double_int_sext (tree_to_double_int (top), precision);
1450 p1 = double_int_sext (tree_to_double_int (bot), precision);
1451 if (double_int_zero_p (p1))
1453 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1455 return double_int_zero_p (res);
1462 /* Returns true if memory reference REF with step STEP may be unaligned. */
1465 may_be_unaligned_p (tree ref, tree step)
1469 HOST_WIDE_INT bitsize;
1470 HOST_WIDE_INT bitpos;
1472 enum machine_mode mode;
1473 int unsignedp, volatilep;
1474 unsigned base_align;
1476 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1477 thus they are not misaligned. */
1478 if (TREE_CODE (ref) == TARGET_MEM_REF)
1481 /* The test below is basically copy of what expr.c:normal_inner_ref
1482 does to check whether the object must be loaded by parts when
1483 STRICT_ALIGNMENT is true. */
1484 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1485 &unsignedp, &volatilep, true);
1486 base_type = TREE_TYPE (base);
1487 base_align = TYPE_ALIGN (base_type);
1489 if (mode != BLKmode)
1492 tree al = build_int_cst (TREE_TYPE (step),
1493 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1495 if (base_align < GET_MODE_ALIGNMENT (mode)
1496 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1497 || bitpos % BITS_PER_UNIT != 0)
1500 if (!constant_multiple_of (step, al, &mul))
1507 /* Return true if EXPR may be non-addressable. */
1510 may_be_nonaddressable_p (tree expr)
1512 switch (TREE_CODE (expr))
1514 case TARGET_MEM_REF:
1515 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1516 target, thus they are always addressable. */
1520 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1521 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1523 case VIEW_CONVERT_EXPR:
1524 /* This kind of view-conversions may wrap non-addressable objects
1525 and make them look addressable. After some processing the
1526 non-addressability may be uncovered again, causing ADDR_EXPRs
1527 of inappropriate objects to be built. */
1528 if (AGGREGATE_TYPE_P (TREE_TYPE (expr))
1529 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0))))
1532 /* ... fall through ... */
1535 case ARRAY_RANGE_REF:
1536 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1539 case NON_LVALUE_EXPR:
1550 /* Finds addresses in *OP_P inside STMT. */
1553 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1555 tree base = *op_p, step = build_int_cst (sizetype, 0);
1557 struct ifs_ivopts_data ifs_ivopts_data;
1559 /* Do not play with volatile memory references. A bit too conservative,
1560 perhaps, but safe. */
1561 if (stmt_ann (stmt)->has_volatile_ops)
1564 /* Ignore bitfields for now. Not really something terribly complicated
1566 if (TREE_CODE (base) == BIT_FIELD_REF)
1569 base = unshare_expr (base);
1571 if (TREE_CODE (base) == TARGET_MEM_REF)
1573 tree type = build_pointer_type (TREE_TYPE (base));
1577 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1579 civ = get_iv (data, TMR_BASE (base));
1583 TMR_BASE (base) = civ->base;
1586 if (TMR_INDEX (base)
1587 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1589 civ = get_iv (data, TMR_INDEX (base));
1593 TMR_INDEX (base) = civ->base;
1598 if (TMR_STEP (base))
1599 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1601 step = fold_build2 (PLUS_EXPR, type, step, astep);
1605 if (integer_zerop (step))
1607 base = tree_mem_ref_addr (type, base);
1611 ifs_ivopts_data.ivopts_data = data;
1612 ifs_ivopts_data.stmt = stmt;
1613 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1614 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1615 || integer_zerop (ifs_ivopts_data.step))
1617 step = ifs_ivopts_data.step;
1619 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1620 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1622 /* Check that the base expression is addressable. This needs
1623 to be done after substituting bases of IVs into it. */
1624 if (may_be_nonaddressable_p (base))
1627 /* Moreover, on strict alignment platforms, check that it is
1628 sufficiently aligned. */
1629 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1632 base = build_fold_addr_expr (base);
1634 /* Substituting bases of IVs into the base expression might
1635 have caused folding opportunities. */
1636 if (TREE_CODE (base) == ADDR_EXPR)
1638 tree *ref = &TREE_OPERAND (base, 0);
1639 while (handled_component_p (*ref))
1640 ref = &TREE_OPERAND (*ref, 0);
1641 if (TREE_CODE (*ref) == INDIRECT_REF)
1642 *ref = fold_indirect_ref (*ref);
1646 civ = alloc_iv (base, step);
1647 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1651 for_each_index (op_p, idx_record_use, data);
1654 /* Finds and records invariants used in STMT. */
1657 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1660 use_operand_p use_p;
1663 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1665 op = USE_FROM_PTR (use_p);
1666 record_invariant (data, op, false);
1670 /* Finds interesting uses of induction variables in the statement STMT. */
1673 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1678 use_operand_p use_p;
1680 find_invariants_stmt (data, stmt);
1682 if (TREE_CODE (stmt) == COND_EXPR)
1684 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1688 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1690 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1691 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1693 if (TREE_CODE (lhs) == SSA_NAME)
1695 /* If the statement defines an induction variable, the uses are not
1696 interesting by themselves. */
1698 iv = get_iv (data, lhs);
1700 if (iv && !integer_zerop (iv->step))
1704 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1706 case tcc_comparison:
1707 find_interesting_uses_cond (data, stmt,
1708 &GIMPLE_STMT_OPERAND (stmt, 1));
1712 find_interesting_uses_address (data, stmt,
1713 &GIMPLE_STMT_OPERAND (stmt, 1));
1714 if (REFERENCE_CLASS_P (lhs))
1715 find_interesting_uses_address (data, stmt,
1716 &GIMPLE_STMT_OPERAND (stmt, 0));
1722 if (REFERENCE_CLASS_P (lhs)
1723 && is_gimple_val (rhs))
1725 find_interesting_uses_address (data, stmt,
1726 &GIMPLE_STMT_OPERAND (stmt, 0));
1727 find_interesting_uses_op (data, rhs);
1731 /* TODO -- we should also handle address uses of type
1733 memory = call (whatever);
1740 if (TREE_CODE (stmt) == PHI_NODE
1741 && bb_for_stmt (stmt) == data->current_loop->header)
1743 lhs = PHI_RESULT (stmt);
1744 iv = get_iv (data, lhs);
1746 if (iv && !integer_zerop (iv->step))
1750 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1752 op = USE_FROM_PTR (use_p);
1754 if (TREE_CODE (op) != SSA_NAME)
1757 iv = get_iv (data, op);
1761 find_interesting_uses_op (data, op);
1765 /* Finds interesting uses of induction variables outside of loops
1766 on loop exit edge EXIT. */
1769 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1773 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1775 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1776 if (is_gimple_reg (def))
1777 find_interesting_uses_op (data, def);
1781 /* Finds uses of the induction variables that are interesting. */
1784 find_interesting_uses (struct ivopts_data *data)
1787 block_stmt_iterator bsi;
1789 basic_block *body = get_loop_body (data->current_loop);
1791 struct version_info *info;
1794 if (dump_file && (dump_flags & TDF_DETAILS))
1795 fprintf (dump_file, "Uses:\n\n");
1797 for (i = 0; i < data->current_loop->num_nodes; i++)
1802 FOR_EACH_EDGE (e, ei, bb->succs)
1803 if (e->dest != EXIT_BLOCK_PTR
1804 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1805 find_interesting_uses_outside (data, e);
1807 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1808 find_interesting_uses_stmt (data, phi);
1809 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1810 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1813 if (dump_file && (dump_flags & TDF_DETAILS))
1817 fprintf (dump_file, "\n");
1819 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1821 info = ver_info (data, i);
1824 fprintf (dump_file, " ");
1825 print_generic_expr (dump_file, info->name, TDF_SLIM);
1826 fprintf (dump_file, " is invariant (%d)%s\n",
1827 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1831 fprintf (dump_file, "\n");
1837 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1838 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1839 we are at the top-level of the processed address. */
1842 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1843 unsigned HOST_WIDE_INT *offset)
1845 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1846 enum tree_code code;
1847 tree type, orig_type = TREE_TYPE (expr);
1848 unsigned HOST_WIDE_INT off0, off1, st;
1849 tree orig_expr = expr;
1853 type = TREE_TYPE (expr);
1854 code = TREE_CODE (expr);
1860 if (!cst_and_fits_in_hwi (expr)
1861 || integer_zerop (expr))
1864 *offset = int_cst_value (expr);
1865 return build_int_cst (orig_type, 0);
1867 case POINTER_PLUS_EXPR:
1870 op0 = TREE_OPERAND (expr, 0);
1871 op1 = TREE_OPERAND (expr, 1);
1873 op0 = strip_offset_1 (op0, false, false, &off0);
1874 op1 = strip_offset_1 (op1, false, false, &off1);
1876 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1877 if (op0 == TREE_OPERAND (expr, 0)
1878 && op1 == TREE_OPERAND (expr, 1))
1881 if (integer_zerop (op1))
1883 else if (integer_zerop (op0))
1885 if (code == MINUS_EXPR)
1886 expr = fold_build1 (NEGATE_EXPR, type, op1);
1891 expr = fold_build2 (code, type, op0, op1);
1893 return fold_convert (orig_type, expr);
1899 step = array_ref_element_size (expr);
1900 if (!cst_and_fits_in_hwi (step))
1903 st = int_cst_value (step);
1904 op1 = TREE_OPERAND (expr, 1);
1905 op1 = strip_offset_1 (op1, false, false, &off1);
1906 *offset = off1 * st;
1909 && integer_zerop (op1))
1911 /* Strip the component reference completely. */
1912 op0 = TREE_OPERAND (expr, 0);
1913 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1923 tmp = component_ref_field_offset (expr);
1925 && cst_and_fits_in_hwi (tmp))
1927 /* Strip the component reference completely. */
1928 op0 = TREE_OPERAND (expr, 0);
1929 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1930 *offset = off0 + int_cst_value (tmp);
1936 op0 = TREE_OPERAND (expr, 0);
1937 op0 = strip_offset_1 (op0, true, true, &off0);
1940 if (op0 == TREE_OPERAND (expr, 0))
1943 expr = build_fold_addr_expr (op0);
1944 return fold_convert (orig_type, expr);
1947 inside_addr = false;
1954 /* Default handling of expressions for that we want to recurse into
1955 the first operand. */
1956 op0 = TREE_OPERAND (expr, 0);
1957 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1960 if (op0 == TREE_OPERAND (expr, 0)
1961 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1964 expr = copy_node (expr);
1965 TREE_OPERAND (expr, 0) = op0;
1967 TREE_OPERAND (expr, 1) = op1;
1969 /* Inside address, we might strip the top level component references,
1970 thus changing type of the expression. Handling of ADDR_EXPR
1972 expr = fold_convert (orig_type, expr);
1977 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1980 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1982 return strip_offset_1 (expr, false, false, offset);
1985 /* Returns variant of TYPE that can be used as base for different uses.
1986 We return unsigned type with the same precision, which avoids problems
1990 generic_type_for (tree type)
1992 if (POINTER_TYPE_P (type))
1993 return unsigned_type_for (type);
1995 if (TYPE_UNSIGNED (type))
1998 return unsigned_type_for (type);
2001 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2002 the bitmap to that we should store it. */
2004 static struct ivopts_data *fd_ivopts_data;
2006 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2008 bitmap *depends_on = (bitmap *) data;
2009 struct version_info *info;
2011 if (TREE_CODE (*expr_p) != SSA_NAME)
2013 info = name_info (fd_ivopts_data, *expr_p);
2015 if (!info->inv_id || info->has_nonlin_use)
2019 *depends_on = BITMAP_ALLOC (NULL);
2020 bitmap_set_bit (*depends_on, info->inv_id);
2025 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2026 position to POS. If USE is not NULL, the candidate is set as related to
2027 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2028 replacement of the final value of the iv by a direct computation. */
2030 static struct iv_cand *
2031 add_candidate_1 (struct ivopts_data *data,
2032 tree base, tree step, bool important, enum iv_position pos,
2033 struct iv_use *use, tree incremented_at)
2036 struct iv_cand *cand = NULL;
2037 tree type, orig_type;
2041 orig_type = TREE_TYPE (base);
2042 type = generic_type_for (orig_type);
2043 if (type != orig_type)
2045 base = fold_convert (type, base);
2046 step = fold_convert (type, step);
2050 for (i = 0; i < n_iv_cands (data); i++)
2052 cand = iv_cand (data, i);
2054 if (cand->pos != pos)
2057 if (cand->incremented_at != incremented_at)
2071 if (operand_equal_p (base, cand->iv->base, 0)
2072 && operand_equal_p (step, cand->iv->step, 0))
2076 if (i == n_iv_cands (data))
2078 cand = XCNEW (struct iv_cand);
2084 cand->iv = alloc_iv (base, step);
2087 if (pos != IP_ORIGINAL && cand->iv)
2089 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2090 cand->var_after = cand->var_before;
2092 cand->important = important;
2093 cand->incremented_at = incremented_at;
2094 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2097 && TREE_CODE (step) != INTEGER_CST)
2099 fd_ivopts_data = data;
2100 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2103 if (dump_file && (dump_flags & TDF_DETAILS))
2104 dump_cand (dump_file, cand);
2107 if (important && !cand->important)
2109 cand->important = true;
2110 if (dump_file && (dump_flags & TDF_DETAILS))
2111 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2116 bitmap_set_bit (use->related_cands, i);
2117 if (dump_file && (dump_flags & TDF_DETAILS))
2118 fprintf (dump_file, "Candidate %d is related to use %d\n",
2125 /* Returns true if incrementing the induction variable at the end of the LOOP
2128 The purpose is to avoid splitting latch edge with a biv increment, thus
2129 creating a jump, possibly confusing other optimization passes and leaving
2130 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2131 is not available (so we do not have a better alternative), or if the latch
2132 edge is already nonempty. */
2135 allow_ip_end_pos_p (struct loop *loop)
2137 if (!ip_normal_pos (loop))
2140 if (!empty_block_p (ip_end_pos (loop)))
2146 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2147 position to POS. If USE is not NULL, the candidate is set as related to
2148 it. The candidate computation is scheduled on all available positions. */
2151 add_candidate (struct ivopts_data *data,
2152 tree base, tree step, bool important, struct iv_use *use)
2154 if (ip_normal_pos (data->current_loop))
2155 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2156 if (ip_end_pos (data->current_loop)
2157 && allow_ip_end_pos_p (data->current_loop))
2158 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2161 /* Add a standard "0 + 1 * iteration" iv candidate for a
2162 type with SIZE bits. */
2165 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2168 tree type = lang_hooks.types.type_for_size (size, true);
2169 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2173 /* Adds standard iv candidates. */
2176 add_standard_iv_candidates (struct ivopts_data *data)
2178 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2180 /* The same for a double-integer type if it is still fast enough. */
2181 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2182 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2186 /* Adds candidates bases on the old induction variable IV. */
2189 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2192 struct iv_cand *cand;
2194 add_candidate (data, iv->base, iv->step, true, NULL);
2196 /* The same, but with initial value zero. */
2197 add_candidate (data,
2198 build_int_cst (TREE_TYPE (iv->base), 0),
2199 iv->step, true, NULL);
2201 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2202 if (TREE_CODE (phi) == PHI_NODE)
2204 /* Additionally record the possibility of leaving the original iv
2206 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2207 cand = add_candidate_1 (data,
2208 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2209 SSA_NAME_DEF_STMT (def));
2210 cand->var_before = iv->ssa_name;
2211 cand->var_after = def;
2215 /* Adds candidates based on the old induction variables. */
2218 add_old_ivs_candidates (struct ivopts_data *data)
2224 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2226 iv = ver_info (data, i)->iv;
2227 if (iv && iv->biv_p && !integer_zerop (iv->step))
2228 add_old_iv_candidates (data, iv);
2232 /* Adds candidates based on the value of the induction variable IV and USE. */
2235 add_iv_value_candidates (struct ivopts_data *data,
2236 struct iv *iv, struct iv_use *use)
2238 unsigned HOST_WIDE_INT offset;
2241 add_candidate (data, iv->base, iv->step, false, use);
2243 /* The same, but with initial value zero. Make such variable important,
2244 since it is generic enough so that possibly many uses may be based
2246 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2247 iv->step, true, use);
2249 /* Third, try removing the constant offset. */
2250 base = strip_offset (iv->base, &offset);
2252 add_candidate (data, base, iv->step, false, use);
2255 /* Adds candidates based on the uses. */
2258 add_derived_ivs_candidates (struct ivopts_data *data)
2262 for (i = 0; i < n_iv_uses (data); i++)
2264 struct iv_use *use = iv_use (data, i);
2271 case USE_NONLINEAR_EXPR:
2274 /* Just add the ivs based on the value of the iv used here. */
2275 add_iv_value_candidates (data, use->iv, use);
2284 /* Record important candidates and add them to related_cands bitmaps
2288 record_important_candidates (struct ivopts_data *data)
2293 for (i = 0; i < n_iv_cands (data); i++)
2295 struct iv_cand *cand = iv_cand (data, i);
2297 if (cand->important)
2298 bitmap_set_bit (data->important_candidates, i);
2301 data->consider_all_candidates = (n_iv_cands (data)
2302 <= CONSIDER_ALL_CANDIDATES_BOUND);
2304 if (data->consider_all_candidates)
2306 /* We will not need "related_cands" bitmaps in this case,
2307 so release them to decrease peak memory consumption. */
2308 for (i = 0; i < n_iv_uses (data); i++)
2310 use = iv_use (data, i);
2311 BITMAP_FREE (use->related_cands);
2316 /* Add important candidates to the related_cands bitmaps. */
2317 for (i = 0; i < n_iv_uses (data); i++)
2318 bitmap_ior_into (iv_use (data, i)->related_cands,
2319 data->important_candidates);
2323 /* Finds the candidates for the induction variables. */
2326 find_iv_candidates (struct ivopts_data *data)
2328 /* Add commonly used ivs. */
2329 add_standard_iv_candidates (data);
2331 /* Add old induction variables. */
2332 add_old_ivs_candidates (data);
2334 /* Add induction variables derived from uses. */
2335 add_derived_ivs_candidates (data);
2337 /* Record the important candidates. */
2338 record_important_candidates (data);
2341 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2342 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2343 we allocate a simple list to every use. */
2346 alloc_use_cost_map (struct ivopts_data *data)
2348 unsigned i, size, s, j;
2350 for (i = 0; i < n_iv_uses (data); i++)
2352 struct iv_use *use = iv_use (data, i);
2355 if (data->consider_all_candidates)
2356 size = n_iv_cands (data);
2360 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2365 /* Round up to the power of two, so that moduling by it is fast. */
2366 for (size = 1; size < s; size <<= 1)
2370 use->n_map_members = size;
2371 use->cost_map = XCNEWVEC (struct cost_pair, size);
2375 /* Returns description of computation cost of expression whose runtime
2376 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2379 new_cost (unsigned runtime, unsigned complexity)
2383 cost.cost = runtime;
2384 cost.complexity = complexity;
2389 /* Adds costs COST1 and COST2. */
2392 add_costs (comp_cost cost1, comp_cost cost2)
2394 cost1.cost += cost2.cost;
2395 cost1.complexity += cost2.complexity;
2399 /* Subtracts costs COST1 and COST2. */
2402 sub_costs (comp_cost cost1, comp_cost cost2)
2404 cost1.cost -= cost2.cost;
2405 cost1.complexity -= cost2.complexity;
2410 /* Returns a negative number if COST1 < COST2, a positive number if
2411 COST1 > COST2, and 0 if COST1 = COST2. */
2414 compare_costs (comp_cost cost1, comp_cost cost2)
2416 if (cost1.cost == cost2.cost)
2417 return cost1.complexity - cost2.complexity;
2419 return cost1.cost - cost2.cost;
2422 /* Returns true if COST is infinite. */
2425 infinite_cost_p (comp_cost cost)
2427 return cost.cost == INFTY;
2430 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2431 on invariants DEPENDS_ON and that the value used in expressing it
2435 set_use_iv_cost (struct ivopts_data *data,
2436 struct iv_use *use, struct iv_cand *cand,
2437 comp_cost cost, bitmap depends_on, tree value)
2441 if (infinite_cost_p (cost))
2443 BITMAP_FREE (depends_on);
2447 if (data->consider_all_candidates)
2449 use->cost_map[cand->id].cand = cand;
2450 use->cost_map[cand->id].cost = cost;
2451 use->cost_map[cand->id].depends_on = depends_on;
2452 use->cost_map[cand->id].value = value;
2456 /* n_map_members is a power of two, so this computes modulo. */
2457 s = cand->id & (use->n_map_members - 1);
2458 for (i = s; i < use->n_map_members; i++)
2459 if (!use->cost_map[i].cand)
2461 for (i = 0; i < s; i++)
2462 if (!use->cost_map[i].cand)
2468 use->cost_map[i].cand = cand;
2469 use->cost_map[i].cost = cost;
2470 use->cost_map[i].depends_on = depends_on;
2471 use->cost_map[i].value = value;
2474 /* Gets cost of (USE, CANDIDATE) pair. */
2476 static struct cost_pair *
2477 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2478 struct iv_cand *cand)
2481 struct cost_pair *ret;
2486 if (data->consider_all_candidates)
2488 ret = use->cost_map + cand->id;
2495 /* n_map_members is a power of two, so this computes modulo. */
2496 s = cand->id & (use->n_map_members - 1);
2497 for (i = s; i < use->n_map_members; i++)
2498 if (use->cost_map[i].cand == cand)
2499 return use->cost_map + i;
2501 for (i = 0; i < s; i++)
2502 if (use->cost_map[i].cand == cand)
2503 return use->cost_map + i;
2508 /* Returns estimate on cost of computing SEQ. */
2516 for (; seq; seq = NEXT_INSN (seq))
2518 set = single_set (seq);
2520 cost += rtx_cost (set, SET);
2528 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2530 produce_memory_decl_rtl (tree obj, int *regno)
2535 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2537 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2538 x = gen_rtx_SYMBOL_REF (Pmode, name);
2539 SET_SYMBOL_REF_DECL (x, obj);
2540 x = gen_rtx_MEM (DECL_MODE (obj), x);
2541 targetm.encode_section_info (obj, x, true);
2545 x = gen_raw_REG (Pmode, (*regno)++);
2546 x = gen_rtx_MEM (DECL_MODE (obj), x);
2552 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2553 walk_tree. DATA contains the actual fake register number. */
2556 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2558 tree obj = NULL_TREE;
2560 int *regno = (int *) data;
2562 switch (TREE_CODE (*expr_p))
2565 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2566 handled_component_p (*expr_p);
2567 expr_p = &TREE_OPERAND (*expr_p, 0))
2570 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2571 x = produce_memory_decl_rtl (obj, regno);
2576 obj = SSA_NAME_VAR (*expr_p);
2577 if (!DECL_RTL_SET_P (obj))
2578 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2587 if (DECL_RTL_SET_P (obj))
2590 if (DECL_MODE (obj) == BLKmode)
2591 x = produce_memory_decl_rtl (obj, regno);
2593 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2603 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2604 SET_DECL_RTL (obj, x);
2610 /* Determines cost of the computation of EXPR. */
2613 computation_cost (tree expr)
2616 tree type = TREE_TYPE (expr);
2618 /* Avoid using hard regs in ways which may be unsupported. */
2619 int regno = LAST_VIRTUAL_REGISTER + 1;
2621 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2623 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2627 cost = seq_cost (seq);
2629 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2634 /* Returns variable containing the value of candidate CAND at statement AT. */
2637 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2639 if (stmt_after_increment (loop, cand, stmt))
2640 return cand->var_after;
2642 return cand->var_before;
2645 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2646 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2649 tree_int_cst_sign_bit (const_tree t)
2651 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2652 unsigned HOST_WIDE_INT w;
2654 if (bitno < HOST_BITS_PER_WIDE_INT)
2655 w = TREE_INT_CST_LOW (t);
2658 w = TREE_INT_CST_HIGH (t);
2659 bitno -= HOST_BITS_PER_WIDE_INT;
2662 return (w >> bitno) & 1;
2665 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2666 same precision that is at least as wide as the precision of TYPE, stores
2667 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2671 determine_common_wider_type (tree *a, tree *b)
2673 tree wider_type = NULL;
2675 tree atype = TREE_TYPE (*a);
2677 if ((TREE_CODE (*a) == NOP_EXPR
2678 || TREE_CODE (*a) == CONVERT_EXPR))
2680 suba = TREE_OPERAND (*a, 0);
2681 wider_type = TREE_TYPE (suba);
2682 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2688 if ((TREE_CODE (*b) == NOP_EXPR
2689 || TREE_CODE (*b) == CONVERT_EXPR))
2691 subb = TREE_OPERAND (*b, 0);
2692 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2703 /* Determines the expression by that USE is expressed from induction variable
2704 CAND at statement AT in LOOP. The expression is stored in a decomposed
2705 form into AFF. Returns false if USE cannot be expressed using CAND. */
2708 get_computation_aff (struct loop *loop,
2709 struct iv_use *use, struct iv_cand *cand, tree at,
2710 struct affine_tree_combination *aff)
2712 tree ubase = use->iv->base;
2713 tree ustep = use->iv->step;
2714 tree cbase = cand->iv->base;
2715 tree cstep = cand->iv->step, cstep_common;
2716 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2717 tree common_type, var;
2719 aff_tree cbase_aff, var_aff;
2722 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2724 /* We do not have a precision to express the values of use. */
2728 var = var_at_stmt (loop, cand, at);
2729 uutype = unsigned_type_for (utype);
2731 /* If the conversion is not noop, perform it. */
2732 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2734 cstep = fold_convert (uutype, cstep);
2735 cbase = fold_convert (uutype, cbase);
2736 var = fold_convert (uutype, var);
2739 if (!constant_multiple_of (ustep, cstep, &rat))
2742 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2743 type, we achieve better folding by computing their difference in this
2744 wider type, and cast the result to UUTYPE. We do not need to worry about
2745 overflows, as all the arithmetics will in the end be performed in UUTYPE
2747 common_type = determine_common_wider_type (&ubase, &cbase);
2749 /* use = ubase - ratio * cbase + ratio * var. */
2750 tree_to_aff_combination (ubase, common_type, aff);
2751 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2752 tree_to_aff_combination (var, uutype, &var_aff);
2754 /* We need to shift the value if we are after the increment. */
2755 if (stmt_after_increment (loop, cand, at))
2759 if (common_type != uutype)
2760 cstep_common = fold_convert (common_type, cstep);
2762 cstep_common = cstep;
2764 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2765 aff_combination_add (&cbase_aff, &cstep_aff);
2768 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2769 aff_combination_add (aff, &cbase_aff);
2770 if (common_type != uutype)
2771 aff_combination_convert (aff, uutype);
2773 aff_combination_scale (&var_aff, rat);
2774 aff_combination_add (aff, &var_aff);
2779 /* Determines the expression by that USE is expressed from induction variable
2780 CAND at statement AT in LOOP. The computation is unshared. */
2783 get_computation_at (struct loop *loop,
2784 struct iv_use *use, struct iv_cand *cand, tree at)
2787 tree type = TREE_TYPE (use->iv->base);
2789 if (!get_computation_aff (loop, use, cand, at, &aff))
2791 unshare_aff_combination (&aff);
2792 return fold_convert (type, aff_combination_to_tree (&aff));
2795 /* Determines the expression by that USE is expressed from induction variable
2796 CAND in LOOP. The computation is unshared. */
2799 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2801 return get_computation_at (loop, use, cand, use->stmt);
2804 /* Returns cost of addition in MODE. */
2807 add_cost (enum machine_mode mode)
2809 static unsigned costs[NUM_MACHINE_MODES];
2817 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2818 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2819 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2824 cost = seq_cost (seq);
2830 if (dump_file && (dump_flags & TDF_DETAILS))
2831 fprintf (dump_file, "Addition in %s costs %d\n",
2832 GET_MODE_NAME (mode), cost);
2836 /* Entry in a hashtable of already known costs for multiplication. */
2839 HOST_WIDE_INT cst; /* The constant to multiply by. */
2840 enum machine_mode mode; /* In mode. */
2841 unsigned cost; /* The cost. */
2844 /* Counts hash value for the ENTRY. */
2847 mbc_entry_hash (const void *entry)
2849 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2851 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2854 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2857 mbc_entry_eq (const void *entry1, const void *entry2)
2859 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2860 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2862 return (e1->mode == e2->mode
2863 && e1->cst == e2->cst);
2866 /* Returns cost of multiplication by constant CST in MODE. */
2869 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2871 static htab_t costs;
2872 struct mbc_entry **cached, act;
2877 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2881 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2883 return (*cached)->cost;
2885 *cached = XNEW (struct mbc_entry);
2886 (*cached)->mode = mode;
2887 (*cached)->cst = cst;
2890 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2891 gen_int_mode (cst, mode), NULL_RTX, 0);
2895 cost = seq_cost (seq);
2897 if (dump_file && (dump_flags & TDF_DETAILS))
2898 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2899 (int) cst, GET_MODE_NAME (mode), cost);
2901 (*cached)->cost = cost;
2906 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2907 validity for a memory reference accessing memory of mode MODE. */
2910 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2912 #define MAX_RATIO 128
2913 static sbitmap valid_mult[MAX_MACHINE_MODE];
2915 if (!valid_mult[mode])
2917 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2921 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2922 sbitmap_zero (valid_mult[mode]);
2923 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2924 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2926 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2927 if (memory_address_p (mode, addr))
2928 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2931 if (dump_file && (dump_flags & TDF_DETAILS))
2933 fprintf (dump_file, " allowed multipliers:");
2934 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2935 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2936 fprintf (dump_file, " %d", (int) i);
2937 fprintf (dump_file, "\n");
2938 fprintf (dump_file, "\n");
2942 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2945 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2948 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2949 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2950 variable is omitted. Compute the cost for a memory reference that accesses
2951 a memory location of mode MEM_MODE.
2953 TODO -- there must be some better way. This all is quite crude. */
2956 get_address_cost (bool symbol_present, bool var_present,
2957 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2958 enum machine_mode mem_mode)
2960 static bool initialized[MAX_MACHINE_MODE];
2961 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2962 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2963 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2964 unsigned cost, acost, complexity;
2965 bool offset_p, ratio_p;
2966 HOST_WIDE_INT s_offset;
2967 unsigned HOST_WIDE_INT mask;
2970 if (!initialized[mem_mode])
2973 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2974 int old_cse_not_expected;
2975 unsigned sym_p, var_p, off_p, rat_p, add_c;
2976 rtx seq, addr, base;
2979 initialized[mem_mode] = true;
2981 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2983 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2984 for (i = start; i <= 1 << 20; i <<= 1)
2986 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2987 if (!memory_address_p (mem_mode, addr))
2990 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2991 off[mem_mode] = max_offset[mem_mode];
2993 for (i = start; i <= 1 << 20; i <<= 1)
2995 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2996 if (!memory_address_p (mem_mode, addr))
2999 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3001 if (dump_file && (dump_flags & TDF_DETAILS))
3003 fprintf (dump_file, "get_address_cost:\n");
3004 fprintf (dump_file, " min offset %s %d\n",
3005 GET_MODE_NAME (mem_mode),
3006 (int) min_offset[mem_mode]);
3007 fprintf (dump_file, " max offset %s %d\n",
3008 GET_MODE_NAME (mem_mode),
3009 (int) max_offset[mem_mode]);
3013 for (i = 2; i <= MAX_RATIO; i++)
3014 if (multiplier_allowed_in_address_p (i, mem_mode))
3020 /* Compute the cost of various addressing modes. */
3022 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3023 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3025 for (i = 0; i < 16; i++)
3028 var_p = (i >> 1) & 1;
3029 off_p = (i >> 2) & 1;
3030 rat_p = (i >> 3) & 1;
3034 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3035 gen_int_mode (rat[mem_mode], Pmode));
3038 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3042 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3043 /* ??? We can run into trouble with some backends by presenting
3044 it with symbols which havn't been properly passed through
3045 targetm.encode_section_info. By setting the local bit, we
3046 enhance the probability of things working. */
3047 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3050 base = gen_rtx_fmt_e (CONST, Pmode,
3051 gen_rtx_fmt_ee (PLUS, Pmode,
3053 gen_int_mode (off[mem_mode],
3057 base = gen_int_mode (off[mem_mode], Pmode);
3062 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3065 /* To avoid splitting addressing modes, pretend that no cse will
3067 old_cse_not_expected = cse_not_expected;
3068 cse_not_expected = true;
3069 addr = memory_address (mem_mode, addr);
3070 cse_not_expected = old_cse_not_expected;
3074 acost = seq_cost (seq);
3075 acost += address_cost (addr, mem_mode);
3079 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3082 /* On some targets, it is quite expensive to load symbol to a register,
3083 which makes addresses that contain symbols look much more expensive.
3084 However, the symbol will have to be loaded in any case before the
3085 loop (and quite likely we have it in register already), so it does not
3086 make much sense to penalize them too heavily. So make some final
3087 tweaks for the SYMBOL_PRESENT modes:
3089 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3090 var is cheaper, use this mode with small penalty.
3091 If VAR_PRESENT is true, try whether the mode with
3092 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3093 if this is the case, use it. */
3094 add_c = add_cost (Pmode);
3095 for (i = 0; i < 8; i++)
3098 off_p = (i >> 1) & 1;
3099 rat_p = (i >> 2) & 1;
3101 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3105 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3106 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3109 if (dump_file && (dump_flags & TDF_DETAILS))
3111 fprintf (dump_file, "Address costs:\n");
3113 for (i = 0; i < 16; i++)
3116 var_p = (i >> 1) & 1;
3117 off_p = (i >> 2) & 1;
3118 rat_p = (i >> 3) & 1;
3120 fprintf (dump_file, " ");
3122 fprintf (dump_file, "sym + ");
3124 fprintf (dump_file, "var + ");
3126 fprintf (dump_file, "cst + ");
3128 fprintf (dump_file, "rat * ");
3130 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3131 fprintf (dump_file, "index costs %d\n", acost);
3133 fprintf (dump_file, "\n");
3137 bits = GET_MODE_BITSIZE (Pmode);
3138 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3140 if ((offset >> (bits - 1) & 1))
3145 offset_p = (s_offset != 0
3146 && min_offset[mem_mode] <= s_offset
3147 && s_offset <= max_offset[mem_mode]);
3148 ratio_p = (ratio != 1
3149 && multiplier_allowed_in_address_p (ratio, mem_mode));
3151 if (ratio != 1 && !ratio_p)
3152 cost += multiply_by_cost (ratio, Pmode);
3154 if (s_offset && !offset_p && !symbol_present)
3155 cost += add_cost (Pmode);
3157 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3158 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3159 return new_cost (cost + acost, complexity);
3162 /* Estimates cost of forcing expression EXPR into a variable. */
3165 force_expr_to_var_cost (tree expr)
3167 static bool costs_initialized = false;
3168 static unsigned integer_cost;
3169 static unsigned symbol_cost;
3170 static unsigned address_cost;
3172 comp_cost cost0, cost1, cost;
3173 enum machine_mode mode;
3175 if (!costs_initialized)
3177 tree type = build_pointer_type (integer_type_node);
3181 var = create_tmp_var_raw (integer_type_node, "test_var");
3182 TREE_STATIC (var) = 1;
3183 x = produce_memory_decl_rtl (var, NULL);
3184 SET_DECL_RTL (var, x);
3186 integer_cost = computation_cost (build_int_cst (integer_type_node,
3189 addr = build1 (ADDR_EXPR, type, var);
3190 symbol_cost = computation_cost (addr) + 1;
3193 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3195 build_int_cst (sizetype, 2000))) + 1;
3196 if (dump_file && (dump_flags & TDF_DETAILS))
3198 fprintf (dump_file, "force_expr_to_var_cost:\n");
3199 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3200 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3201 fprintf (dump_file, " address %d\n", (int) address_cost);
3202 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3203 fprintf (dump_file, "\n");
3206 costs_initialized = true;
3211 if (SSA_VAR_P (expr))
3214 if (TREE_INVARIANT (expr))
3216 if (TREE_CODE (expr) == INTEGER_CST)
3217 return new_cost (integer_cost, 0);
3219 if (TREE_CODE (expr) == ADDR_EXPR)
3221 tree obj = TREE_OPERAND (expr, 0);
3223 if (TREE_CODE (obj) == VAR_DECL
3224 || TREE_CODE (obj) == PARM_DECL
3225 || TREE_CODE (obj) == RESULT_DECL)
3226 return new_cost (symbol_cost, 0);
3229 return new_cost (address_cost, 0);
3232 switch (TREE_CODE (expr))
3234 case POINTER_PLUS_EXPR:
3238 op0 = TREE_OPERAND (expr, 0);
3239 op1 = TREE_OPERAND (expr, 1);
3243 if (is_gimple_val (op0))
3246 cost0 = force_expr_to_var_cost (op0);
3248 if (is_gimple_val (op1))
3251 cost1 = force_expr_to_var_cost (op1);
3256 /* Just an arbitrary value, FIXME. */
3257 return new_cost (target_spill_cost, 0);
3260 mode = TYPE_MODE (TREE_TYPE (expr));
3261 switch (TREE_CODE (expr))
3263 case POINTER_PLUS_EXPR:
3266 cost = new_cost (add_cost (mode), 0);
3270 if (cst_and_fits_in_hwi (op0))
3271 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3272 else if (cst_and_fits_in_hwi (op1))
3273 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3275 return new_cost (target_spill_cost, 0);
3282 cost = add_costs (cost, cost0);
3283 cost = add_costs (cost, cost1);
3285 /* Bound the cost by target_spill_cost. The parts of complicated
3286 computations often are either loop invariant or at least can
3287 be shared between several iv uses, so letting this grow without
3288 limits would not give reasonable results. */
3289 if (cost.cost > target_spill_cost)
3290 cost.cost = target_spill_cost;
3295 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3296 invariants the computation depends on. */
3299 force_var_cost (struct ivopts_data *data,
3300 tree expr, bitmap *depends_on)
3304 fd_ivopts_data = data;
3305 walk_tree (&expr, find_depends, depends_on, NULL);
3308 return force_expr_to_var_cost (expr);
3311 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3312 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3313 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3314 invariants the computation depends on. */
3317 split_address_cost (struct ivopts_data *data,
3318 tree addr, bool *symbol_present, bool *var_present,
3319 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3322 HOST_WIDE_INT bitsize;
3323 HOST_WIDE_INT bitpos;
3325 enum machine_mode mode;
3326 int unsignedp, volatilep;
3328 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3329 &unsignedp, &volatilep, false);
3332 || bitpos % BITS_PER_UNIT != 0
3333 || TREE_CODE (core) != VAR_DECL)
3335 *symbol_present = false;
3336 *var_present = true;
3337 fd_ivopts_data = data;
3338 walk_tree (&addr, find_depends, depends_on, NULL);
3339 return new_cost (target_spill_cost, 0);
3342 *offset += bitpos / BITS_PER_UNIT;
3343 if (TREE_STATIC (core)
3344 || DECL_EXTERNAL (core))
3346 *symbol_present = true;
3347 *var_present = false;
3351 *symbol_present = false;
3352 *var_present = true;
3356 /* Estimates cost of expressing difference of addresses E1 - E2 as
3357 var + symbol + offset. The value of offset is added to OFFSET,
3358 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3359 part is missing. DEPENDS_ON is a set of the invariants the computation
3363 ptr_difference_cost (struct ivopts_data *data,
3364 tree e1, tree e2, bool *symbol_present, bool *var_present,
3365 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3367 HOST_WIDE_INT diff = 0;
3370 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3372 if (ptr_difference_const (e1, e2, &diff))
3375 *symbol_present = false;
3376 *var_present = false;
3380 if (integer_zerop (e2))
3381 return split_address_cost (data, TREE_OPERAND (e1, 0),
3382 symbol_present, var_present, offset, depends_on);
3384 *symbol_present = false;
3385 *var_present = true;
3387 cost = force_var_cost (data, e1, depends_on);
3388 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3389 cost.cost += add_cost (Pmode);
3394 /* Estimates cost of expressing difference E1 - E2 as
3395 var + symbol + offset. The value of offset is added to OFFSET,
3396 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3397 part is missing. DEPENDS_ON is a set of the invariants the computation
3401 difference_cost (struct ivopts_data *data,
3402 tree e1, tree e2, bool *symbol_present, bool *var_present,
3403 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3406 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3407 unsigned HOST_WIDE_INT off1, off2;
3409 e1 = strip_offset (e1, &off1);
3410 e2 = strip_offset (e2, &off2);
3411 *offset += off1 - off2;
3416 if (TREE_CODE (e1) == ADDR_EXPR)
3417 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3419 *symbol_present = false;
3421 if (operand_equal_p (e1, e2, 0))
3423 *var_present = false;
3426 *var_present = true;
3427 if (integer_zerop (e2))
3428 return force_var_cost (data, e1, depends_on);
3430 if (integer_zerop (e1))
3432 cost = force_var_cost (data, e2, depends_on);
3433 cost.cost += multiply_by_cost (-1, mode);
3438 cost = force_var_cost (data, e1, depends_on);
3439 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3440 cost.cost += add_cost (mode);
3445 /* Determines the cost of the computation by that USE is expressed
3446 from induction variable CAND. If ADDRESS_P is true, we just need
3447 to create an address from it, otherwise we want to get it into
3448 register. A set of invariants we depend on is stored in
3449 DEPENDS_ON. AT is the statement at that the value is computed. */
3452 get_computation_cost_at (struct ivopts_data *data,
3453 struct iv_use *use, struct iv_cand *cand,
3454 bool address_p, bitmap *depends_on, tree at)
3456 tree ubase = use->iv->base, ustep = use->iv->step;
3458 tree utype = TREE_TYPE (ubase), ctype;
3459 unsigned HOST_WIDE_INT cstepi, offset = 0;
3460 HOST_WIDE_INT ratio, aratio;
3461 bool var_present, symbol_present;
3468 /* Only consider real candidates. */
3470 return infinite_cost;
3472 cbase = cand->iv->base;
3473 cstep = cand->iv->step;
3474 ctype = TREE_TYPE (cbase);
3476 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3478 /* We do not have a precision to express the values of use. */
3479 return infinite_cost;
3484 /* Do not try to express address of an object with computation based
3485 on address of a different object. This may cause problems in rtl
3486 level alias analysis (that does not expect this to be happening,
3487 as this is illegal in C), and would be unlikely to be useful
3489 if (use->iv->base_object
3490 && cand->iv->base_object
3491 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3492 return infinite_cost;
3495 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3497 /* TODO -- add direct handling of this case. */
3501 /* CSTEPI is removed from the offset in case statement is after the
3502 increment. If the step is not constant, we use zero instead.
3503 This is a bit imprecise (there is the extra addition), but
3504 redundancy elimination is likely to transform the code so that
3505 it uses value of the variable before increment anyway,
3506 so it is not that much unrealistic. */
3507 if (cst_and_fits_in_hwi (cstep))
3508 cstepi = int_cst_value (cstep);
3512 if (!constant_multiple_of (ustep, cstep, &rat))
3513 return infinite_cost;
3515 if (double_int_fits_in_shwi_p (rat))
3516 ratio = double_int_to_shwi (rat);
3518 return infinite_cost;
3520 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3521 or ratio == 1, it is better to handle this like
3523 ubase - ratio * cbase + ratio * var
3525 (also holds in the case ratio == -1, TODO. */
3527 if (cst_and_fits_in_hwi (cbase))
3529 offset = - ratio * int_cst_value (cbase);
3530 cost = difference_cost (data,
3531 ubase, build_int_cst (utype, 0),
3532 &symbol_present, &var_present, &offset,
3535 else if (ratio == 1)
3537 cost = difference_cost (data,
3539 &symbol_present, &var_present, &offset,
3544 cost = force_var_cost (data, cbase, depends_on);
3545 cost.cost += add_cost (TYPE_MODE (ctype));
3546 cost = add_costs (cost,
3547 difference_cost (data,
3548 ubase, build_int_cst (utype, 0),
3549 &symbol_present, &var_present,
3550 &offset, depends_on));
3553 /* If we are after the increment, the value of the candidate is higher by
3555 if (stmt_after_increment (data->current_loop, cand, at))
3556 offset -= ratio * cstepi;
3558 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3559 (symbol/var/const parts may be omitted). If we are looking for an address,
3560 find the cost of addressing this. */
3562 return add_costs (cost, get_address_cost (symbol_present, var_present,
3564 TYPE_MODE (TREE_TYPE (*use->op_p))));
3566 /* Otherwise estimate the costs for computing the expression. */
3567 aratio = ratio > 0 ? ratio : -ratio;
3568 if (!symbol_present && !var_present && !offset)
3571 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3577 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3581 /* Symbol + offset should be compile-time computable. */
3582 && (symbol_present || offset))
3585 /* Having offset does not affect runtime cost in case it is added to
3586 symbol, but it increases complexity. */