1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
86 #include "pointer-set.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* Cost of a computation. */
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
148 /* The candidate - cost pair. */
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand *selected;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
187 /* The induction variable candidate. */
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
194 gimple incremented_at;/* For original biv, the statement where it is
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
203 bitmap depends_on; /* The list of invariants that are used in step of the
207 /* The data used by the induction variable optimizations. */
209 typedef struct iv_use *iv_use_p;
211 DEF_VEC_ALLOC_P(iv_use_p,heap);
213 typedef struct iv_cand *iv_cand_p;
214 DEF_VEC_P(iv_cand_p);
215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
219 /* The currently optimized loop. */
220 struct loop *current_loop;
222 /* Are we optimizing for speed? */
225 /* Number of registers used in it. */
228 /* Numbers of iterations for all exits of the current loop. */
229 struct pointer_map_t *niters;
231 /* The size of version_info array allocated. */
232 unsigned version_info_size;
234 /* The array of information for the ssa names. */
235 struct version_info *version_info;
237 /* The bitmap of indices in version_info whose value was changed. */
240 /* The maximum invariant id. */
243 /* The uses of induction variables. */
244 VEC(iv_use_p,heap) *iv_uses;
246 /* The candidates. */
247 VEC(iv_cand_p,heap) *iv_candidates;
249 /* A bitmap of important candidates. */
250 bitmap important_candidates;
252 /* Whether to consider just related and important candidates when replacing a
254 bool consider_all_candidates;
257 /* An assignment of iv candidates to uses. */
261 /* The number of uses covered by the assignment. */
264 /* Number of uses that cannot be expressed by the candidates in the set. */
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair **cand_for_use;
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses;
273 /* The candidates used. */
276 /* The number of candidates in the set. */
279 /* Total number of registers needed. */
282 /* Total cost of expressing uses. */
283 comp_cost cand_use_cost;
285 /* Total cost of candidates. */
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses;
291 /* Total cost of the assignment. */
295 /* Difference of two iv candidate assignments. */
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair *old_cp;
305 /* A new assignment. */
306 struct cost_pair *new_cp;
308 /* Next change in the list. */
309 struct iv_ca_delta *next_change;
312 /* Bound on number of candidates below that all candidates are considered. */
314 #define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
317 /* If there are more iv occurrences, we just give up (it is quite unlikely that
318 optimizing such a loop would help, and it would take ages). */
320 #define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
323 /* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
326 #define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
329 /* The list of trees for that the decl_rtl field must be reset is stored
332 static VEC(tree,heap) *decl_rtl_to_reset;
334 /* Number of uses recorded in DATA. */
336 static inline unsigned
337 n_iv_uses (struct ivopts_data *data)
339 return VEC_length (iv_use_p, data->iv_uses);
342 /* Ith use recorded in DATA. */
344 static inline struct iv_use *
345 iv_use (struct ivopts_data *data, unsigned i)
347 return VEC_index (iv_use_p, data->iv_uses, i);
350 /* Number of candidates recorded in DATA. */
352 static inline unsigned
353 n_iv_cands (struct ivopts_data *data)
355 return VEC_length (iv_cand_p, data->iv_candidates);
358 /* Ith candidate recorded in DATA. */
360 static inline struct iv_cand *
361 iv_cand (struct ivopts_data *data, unsigned i)
363 return VEC_index (iv_cand_p, data->iv_candidates, i);
366 /* The single loop exit if it dominates the latch, NULL otherwise. */
369 single_dom_exit (struct loop *loop)
371 edge exit = single_exit (loop);
376 if (!just_once_each_iteration_p (loop, exit->src))
382 /* Dumps information about the induction variable IV to FILE. */
384 extern void dump_iv (FILE *, struct iv *);
386 dump_iv (FILE *file, struct iv *iv)
390 fprintf (file, "ssa name ");
391 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
392 fprintf (file, "\n");
395 fprintf (file, " type ");
396 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
397 fprintf (file, "\n");
401 fprintf (file, " base ");
402 print_generic_expr (file, iv->base, TDF_SLIM);
403 fprintf (file, "\n");
405 fprintf (file, " step ");
406 print_generic_expr (file, iv->step, TDF_SLIM);
407 fprintf (file, "\n");
411 fprintf (file, " invariant ");
412 print_generic_expr (file, iv->base, TDF_SLIM);
413 fprintf (file, "\n");
418 fprintf (file, " base object ");
419 print_generic_expr (file, iv->base_object, TDF_SLIM);
420 fprintf (file, "\n");
424 fprintf (file, " is a biv\n");
427 /* Dumps information about the USE to FILE. */
429 extern void dump_use (FILE *, struct iv_use *);
431 dump_use (FILE *file, struct iv_use *use)
433 fprintf (file, "use %d\n", use->id);
437 case USE_NONLINEAR_EXPR:
438 fprintf (file, " generic\n");
442 fprintf (file, " address\n");
446 fprintf (file, " compare\n");
453 fprintf (file, " in statement ");
454 print_gimple_stmt (file, use->stmt, 0, 0);
455 fprintf (file, "\n");
457 fprintf (file, " at position ");
459 print_generic_expr (file, *use->op_p, TDF_SLIM);
460 fprintf (file, "\n");
462 dump_iv (file, use->iv);
464 if (use->related_cands)
466 fprintf (file, " related candidates ");
467 dump_bitmap (file, use->related_cands);
471 /* Dumps information about the uses to FILE. */
473 extern void dump_uses (FILE *, struct ivopts_data *);
475 dump_uses (FILE *file, struct ivopts_data *data)
480 for (i = 0; i < n_iv_uses (data); i++)
482 use = iv_use (data, i);
484 dump_use (file, use);
485 fprintf (file, "\n");
489 /* Dumps information about induction variable candidate CAND to FILE. */
491 extern void dump_cand (FILE *, struct iv_cand *);
493 dump_cand (FILE *file, struct iv_cand *cand)
495 struct iv *iv = cand->iv;
497 fprintf (file, "candidate %d%s\n",
498 cand->id, cand->important ? " (important)" : "");
500 if (cand->depends_on)
502 fprintf (file, " depends on ");
503 dump_bitmap (file, cand->depends_on);
508 fprintf (file, " final value replacement\n");
515 fprintf (file, " incremented before exit test\n");
519 fprintf (file, " incremented at end\n");
523 fprintf (file, " original biv\n");
530 /* Returns the info for ssa version VER. */
532 static inline struct version_info *
533 ver_info (struct ivopts_data *data, unsigned ver)
535 return data->version_info + ver;
538 /* Returns the info for ssa name NAME. */
540 static inline struct version_info *
541 name_info (struct ivopts_data *data, tree name)
543 return ver_info (data, SSA_NAME_VERSION (name));
546 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
550 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
552 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
556 if (sbb == loop->latch)
562 return stmt == last_stmt (bb);
565 /* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
569 stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
571 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
578 if (stmt_bb != cand_bb)
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
585 if (gsi_stmt (bsi) == cand->incremented_at)
587 if (gsi_stmt (bsi) == stmt)
592 /* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
596 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
604 return stmt_after_ip_normal_pos (loop, stmt);
607 return stmt_after_ip_original_pos (cand, stmt);
614 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
617 abnormal_ssa_name_p (tree exp)
622 if (TREE_CODE (exp) != SSA_NAME)
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
628 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
632 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
633 void *data ATTRIBUTE_UNUSED)
635 if (TREE_CODE (base) == ARRAY_REF)
637 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
639 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
643 return !abnormal_ssa_name_p (*index);
646 /* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
650 contains_abnormal_ssa_name_p (tree expr)
653 enum tree_code_class codeclass;
658 code = TREE_CODE (expr);
659 codeclass = TREE_CODE_CLASS (code);
661 if (code == SSA_NAME)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
664 if (code == INTEGER_CST
665 || is_gimple_min_invariant (expr))
668 if (code == ADDR_EXPR)
669 return !for_each_index (&TREE_OPERAND (expr, 0),
670 idx_contains_abnormal_ssa_name_p,
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
694 /* Returns tree describing number of iterations determined from
695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
698 niter_for_exit (struct ivopts_data *data, edge exit)
700 struct tree_niter_desc desc;
706 data->niters = pointer_map_create ();
710 slot = pointer_map_contains (data->niters, exit);
714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data->current_loop,
721 && integer_zerop (desc.may_be_zero)
722 && !contains_abnormal_ssa_name_p (desc.niter))
727 *pointer_map_insert (data->niters, exit) = niter;
730 niter = (tree) *slot;
735 /* Returns tree describing number of iterations determined from
736 single dominating exit of DATA->current_loop, or NULL if something
740 niter_for_single_dom_exit (struct ivopts_data *data)
742 edge exit = single_dom_exit (data->current_loop);
747 return niter_for_exit (data, exit);
750 /* Initializes data structures used by the iv optimization pass, stored
754 tree_ssa_iv_optimize_init (struct ivopts_data *data)
756 data->version_info_size = 2 * num_ssa_names;
757 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
758 data->relevant = BITMAP_ALLOC (NULL);
759 data->important_candidates = BITMAP_ALLOC (NULL);
760 data->max_inv_id = 0;
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
779 if (CONVERT_EXPR_P (expr))
780 return determine_base_object (TREE_OPERAND (expr, 0));
782 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
791 obj = TREE_OPERAND (expr, 0);
792 base = get_base_address (obj);
797 if (TREE_CODE (base) == INDIRECT_REF)
798 return determine_base_object (TREE_OPERAND (base, 0));
800 return fold_convert (ptr_type_node,
801 build_fold_addr_expr (base));
803 case POINTER_PLUS_EXPR:
804 return determine_base_object (TREE_OPERAND (expr, 0));
808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
812 return fold_convert (ptr_type_node, expr);
816 /* Allocates an induction variable with given initial value BASE and step STEP
820 alloc_iv (tree base, tree step)
822 struct iv *iv = XCNEW (struct iv);
823 gcc_assert (step != NULL_TREE);
826 iv->base_object = determine_base_object (base);
829 iv->have_use_for = false;
831 iv->ssa_name = NULL_TREE;
836 /* Sets STEP and BASE for induction variable IV. */
839 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
841 struct version_info *info = name_info (data, iv);
843 gcc_assert (!info->iv);
845 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
846 info->iv = alloc_iv (base, step);
847 info->iv->ssa_name = iv;
850 /* Finds induction variable declaration for VAR. */
853 get_iv (struct ivopts_data *data, tree var)
856 tree type = TREE_TYPE (var);
858 if (!POINTER_TYPE_P (type)
859 && !INTEGRAL_TYPE_P (type))
862 if (!name_info (data, var)->iv)
864 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
867 || !flow_bb_inside_loop_p (data->current_loop, bb))
868 set_iv (data, var, var, build_int_cst (type, 0));
871 return name_info (data, var)->iv;
874 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
878 determine_biv_step (gimple phi)
880 struct loop *loop = gimple_bb (phi)->loop_father;
881 tree name = PHI_RESULT (phi);
884 if (!is_gimple_reg (name))
887 if (!simple_iv (loop, phi, name, &iv, true))
890 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
893 /* Finds basic ivs. */
896 find_bivs (struct ivopts_data *data)
899 tree step, type, base;
901 struct loop *loop = data->current_loop;
902 gimple_stmt_iterator psi;
904 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
906 phi = gsi_stmt (psi);
908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
911 step = determine_biv_step (phi);
915 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
916 base = expand_simple_operations (base);
917 if (contains_abnormal_ssa_name_p (base)
918 || contains_abnormal_ssa_name_p (step))
921 type = TREE_TYPE (PHI_RESULT (phi));
922 base = fold_convert (type, base);
925 if (POINTER_TYPE_P (type))
926 step = fold_convert (sizetype, step);
928 step = fold_convert (type, step);
931 set_iv (data, PHI_RESULT (phi), base, step);
938 /* Marks basic ivs. */
941 mark_bivs (struct ivopts_data *data)
945 struct iv *iv, *incr_iv;
946 struct loop *loop = data->current_loop;
948 gimple_stmt_iterator psi;
950 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
952 phi = gsi_stmt (psi);
954 iv = get_iv (data, PHI_RESULT (phi));
958 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
959 incr_iv = get_iv (data, var);
963 /* If the increment is in the subloop, ignore it. */
964 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
965 if (incr_bb->loop_father != data->current_loop
966 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
970 incr_iv->biv_p = true;
974 /* Checks whether STMT defines a linear induction variable and stores its
978 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
981 struct loop *loop = data->current_loop;
983 iv->base = NULL_TREE;
984 iv->step = NULL_TREE;
986 if (gimple_code (stmt) != GIMPLE_ASSIGN)
989 lhs = gimple_assign_lhs (stmt);
990 if (TREE_CODE (lhs) != SSA_NAME)
993 if (!simple_iv (loop, stmt, lhs, iv, true))
995 iv->base = expand_simple_operations (iv->base);
997 if (contains_abnormal_ssa_name_p (iv->base)
998 || contains_abnormal_ssa_name_p (iv->step))
1004 /* Finds general ivs in statement STMT. */
1007 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1011 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1014 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1017 /* Finds general ivs in basic block BB. */
1020 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1022 gimple_stmt_iterator bsi;
1024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 find_givs_in_stmt (data, gsi_stmt (bsi));
1028 /* Finds general ivs. */
1031 find_givs (struct ivopts_data *data)
1033 struct loop *loop = data->current_loop;
1034 basic_block *body = get_loop_body_in_dom_order (loop);
1037 for (i = 0; i < loop->num_nodes; i++)
1038 find_givs_in_bb (data, body[i]);
1042 /* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1046 find_induction_variables (struct ivopts_data *data)
1051 if (!find_bivs (data))
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1059 tree niter = niter_for_single_dom_exit (data);
1063 fprintf (dump_file, " number of iterations ");
1064 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n");
1068 fprintf (dump_file, "Induction variables:\n\n");
1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1072 if (ver_info (data, i)->iv)
1073 dump_iv (dump_file, ver_info (data, i)->iv);
1080 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1082 static struct iv_use *
1083 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1084 gimple stmt, enum use_type use_type)
1086 struct iv_use *use = XCNEW (struct iv_use);
1088 use->id = n_iv_uses (data);
1089 use->type = use_type;
1093 use->related_cands = BITMAP_ALLOC (NULL);
1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1097 iv->ssa_name = NULL_TREE;
1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 dump_use (dump_file, use);
1102 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1107 /* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1112 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1115 struct version_info *info;
1117 if (TREE_CODE (op) != SSA_NAME
1118 || !is_gimple_reg (op))
1121 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1123 && flow_bb_inside_loop_p (data->current_loop, bb))
1126 info = name_info (data, op);
1128 info->has_nonlin_use |= nonlinear_use;
1130 info->inv_id = ++data->max_inv_id;
1131 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1134 /* Checks whether the use OP is interesting and if so, records it. */
1136 static struct iv_use *
1137 find_interesting_uses_op (struct ivopts_data *data, tree op)
1144 if (TREE_CODE (op) != SSA_NAME)
1147 iv = get_iv (data, op);
1151 if (iv->have_use_for)
1153 use = iv_use (data, iv->use_id);
1155 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1159 if (integer_zerop (iv->step))
1161 record_invariant (data, op, true);
1164 iv->have_use_for = true;
1166 civ = XNEW (struct iv);
1169 stmt = SSA_NAME_DEF_STMT (op);
1170 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1171 || is_gimple_assign (stmt));
1173 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1174 iv->use_id = use->id;
1179 /* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
1188 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1189 tree **control_var, tree **bound,
1190 struct iv **iv_var, struct iv **iv_bound)
1192 /* The objects returned when COND has constant operands. */
1193 static struct iv const_iv;
1195 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1196 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1199 if (gimple_code (stmt) == GIMPLE_COND)
1201 op0 = gimple_cond_lhs_ptr (stmt);
1202 op1 = gimple_cond_rhs_ptr (stmt);
1206 op0 = gimple_assign_rhs1_ptr (stmt);
1207 op1 = gimple_assign_rhs2_ptr (stmt);
1210 zero = integer_zero_node;
1211 const_iv.step = integer_zero_node;
1213 if (TREE_CODE (*op0) == SSA_NAME)
1214 iv0 = get_iv (data, *op0);
1215 if (TREE_CODE (*op1) == SSA_NAME)
1216 iv1 = get_iv (data, *op1);
1218 /* Exactly one of the compared values must be an iv, and the other one must
1223 if (integer_zerop (iv0->step))
1225 /* Control variable may be on the other side. */
1226 tmp_op = op0; op0 = op1; op1 = tmp_op;
1227 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1229 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1233 *control_var = op0;;
1244 /* Checks whether the condition in STMT is interesting and if so,
1248 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1250 tree *var_p, *bound_p;
1251 struct iv *var_iv, *civ;
1253 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1255 find_interesting_uses_op (data, *var_p);
1256 find_interesting_uses_op (data, *bound_p);
1260 civ = XNEW (struct iv);
1262 record_use (data, NULL, civ, stmt, USE_COMPARE);
1265 /* Returns true if expression EXPR is obviously invariant in LOOP,
1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
1270 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1275 gcc_assert (loop_depth (loop) > 0);
1277 if (is_gimple_min_invariant (expr))
1280 if (TREE_CODE (expr) == SSA_NAME)
1282 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1284 && flow_bb_inside_loop_p (loop, def_bb))
1293 len = TREE_OPERAND_LENGTH (expr);
1294 for (i = 0; i < len; i++)
1295 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1301 /* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1306 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1311 gcc_assert (loop_depth (loop) > 0);
1313 lhs = gimple_get_lhs (stmt);
1314 for (i = 0; i < gimple_num_ops (stmt); i++)
1316 tree op = gimple_op (stmt, i);
1317 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1324 /* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1328 struct ifs_ivopts_data
1330 struct ivopts_data *ivopts_data;
1336 idx_find_step (tree base, tree *idx, void *data)
1338 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1340 tree step, iv_base, iv_step, lbound, off;
1341 struct loop *loop = dta->ivopts_data->current_loop;
1343 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1347 /* If base is a component ref, require that the offset of the reference
1349 if (TREE_CODE (base) == COMPONENT_REF)
1351 off = component_ref_field_offset (base);
1352 return expr_invariant_in_loop_p (loop, off);
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
1359 if (TREE_CODE (base) == ARRAY_REF)
1361 step = array_ref_element_size (base);
1362 lbound = array_ref_low_bound (base);
1364 if (!expr_invariant_in_loop_p (loop, step)
1365 || !expr_invariant_in_loop_p (loop, lbound))
1369 if (TREE_CODE (*idx) != SSA_NAME)
1372 iv = get_iv (dta->ivopts_data, *idx);
1376 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1377 *&x[0], which is not folded and does not trigger the
1378 ARRAY_REF path below. */
1381 if (integer_zerop (iv->step))
1384 if (TREE_CODE (base) == ARRAY_REF)
1386 step = array_ref_element_size (base);
1388 /* We only handle addresses whose step is an integer constant. */
1389 if (TREE_CODE (step) != INTEGER_CST)
1393 /* The step for pointer arithmetics already is 1 byte. */
1394 step = build_int_cst (sizetype, 1);
1398 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1399 sizetype, &iv_base, &iv_step, dta->stmt,
1402 /* The index might wrap. */
1406 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1407 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1412 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1413 object is passed to it in DATA. */
1416 idx_record_use (tree base, tree *idx,
1419 struct ivopts_data *data = (struct ivopts_data *) vdata;
1420 find_interesting_uses_op (data, *idx);
1421 if (TREE_CODE (base) == ARRAY_REF)
1423 find_interesting_uses_op (data, array_ref_element_size (base));
1424 find_interesting_uses_op (data, array_ref_low_bound (base));
1429 /* If we can prove that TOP = cst * BOT for some constant cst,
1430 store cst to MUL and return true. Otherwise return false.
1431 The returned value is always sign-extended, regardless of the
1432 signedness of TOP and BOT. */
1435 constant_multiple_of (tree top, tree bot, double_int *mul)
1438 enum tree_code code;
1439 double_int res, p0, p1;
1440 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1445 if (operand_equal_p (top, bot, 0))
1447 *mul = double_int_one;
1451 code = TREE_CODE (top);
1455 mby = TREE_OPERAND (top, 1);
1456 if (TREE_CODE (mby) != INTEGER_CST)
1459 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1462 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1468 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1469 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1472 if (code == MINUS_EXPR)
1473 p1 = double_int_neg (p1);
1474 *mul = double_int_sext (double_int_add (p0, p1), precision);
1478 if (TREE_CODE (bot) != INTEGER_CST)
1481 p0 = double_int_sext (tree_to_double_int (top), precision);
1482 p1 = double_int_sext (tree_to_double_int (bot), precision);
1483 if (double_int_zero_p (p1))
1485 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1487 return double_int_zero_p (res);
1494 /* Returns true if memory reference REF with step STEP may be unaligned. */
1497 may_be_unaligned_p (tree ref, tree step)
1501 HOST_WIDE_INT bitsize;
1502 HOST_WIDE_INT bitpos;
1504 enum machine_mode mode;
1505 int unsignedp, volatilep;
1506 unsigned base_align;
1508 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1509 thus they are not misaligned. */
1510 if (TREE_CODE (ref) == TARGET_MEM_REF)
1513 /* The test below is basically copy of what expr.c:normal_inner_ref
1514 does to check whether the object must be loaded by parts when
1515 STRICT_ALIGNMENT is true. */
1516 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1517 &unsignedp, &volatilep, true);
1518 base_type = TREE_TYPE (base);
1519 base_align = TYPE_ALIGN (base_type);
1521 if (mode != BLKmode)
1524 tree al = build_int_cst (TREE_TYPE (step),
1525 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1527 if (base_align < GET_MODE_ALIGNMENT (mode)
1528 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1529 || bitpos % BITS_PER_UNIT != 0)
1532 if (!constant_multiple_of (step, al, &mul))
1539 /* Return true if EXPR may be non-addressable. */
1542 may_be_nonaddressable_p (tree expr)
1544 switch (TREE_CODE (expr))
1546 case TARGET_MEM_REF:
1547 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1548 target, thus they are always addressable. */
1552 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1553 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1555 case VIEW_CONVERT_EXPR:
1556 /* This kind of view-conversions may wrap non-addressable objects
1557 and make them look addressable. After some processing the
1558 non-addressability may be uncovered again, causing ADDR_EXPRs
1559 of inappropriate objects to be built. */
1560 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1561 || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1564 /* ... fall through ... */
1567 case ARRAY_RANGE_REF:
1568 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1580 /* Finds addresses in *OP_P inside STMT. */
1583 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1585 tree base = *op_p, step = build_int_cst (sizetype, 0);
1587 struct ifs_ivopts_data ifs_ivopts_data;
1589 /* Do not play with volatile memory references. A bit too conservative,
1590 perhaps, but safe. */
1591 if (gimple_has_volatile_ops (stmt))
1594 /* Ignore bitfields for now. Not really something terribly complicated
1596 if (TREE_CODE (base) == BIT_FIELD_REF)
1599 base = unshare_expr (base);
1601 if (TREE_CODE (base) == TARGET_MEM_REF)
1603 tree type = build_pointer_type (TREE_TYPE (base));
1607 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1609 civ = get_iv (data, TMR_BASE (base));
1613 TMR_BASE (base) = civ->base;
1616 if (TMR_INDEX (base)
1617 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1619 civ = get_iv (data, TMR_INDEX (base));
1623 TMR_INDEX (base) = civ->base;
1628 if (TMR_STEP (base))
1629 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1631 step = fold_build2 (PLUS_EXPR, type, step, astep);
1635 if (integer_zerop (step))
1637 base = tree_mem_ref_addr (type, base);
1641 ifs_ivopts_data.ivopts_data = data;
1642 ifs_ivopts_data.stmt = stmt;
1643 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1644 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1645 || integer_zerop (ifs_ivopts_data.step))
1647 step = ifs_ivopts_data.step;
1649 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1650 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1652 /* Check that the base expression is addressable. This needs
1653 to be done after substituting bases of IVs into it. */
1654 if (may_be_nonaddressable_p (base))
1657 /* Moreover, on strict alignment platforms, check that it is
1658 sufficiently aligned. */
1659 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1662 base = build_fold_addr_expr (base);
1664 /* Substituting bases of IVs into the base expression might
1665 have caused folding opportunities. */
1666 if (TREE_CODE (base) == ADDR_EXPR)
1668 tree *ref = &TREE_OPERAND (base, 0);
1669 while (handled_component_p (*ref))
1670 ref = &TREE_OPERAND (*ref, 0);
1671 if (TREE_CODE (*ref) == INDIRECT_REF)
1672 *ref = fold_indirect_ref (*ref);
1676 civ = alloc_iv (base, step);
1677 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1681 for_each_index (op_p, idx_record_use, data);
1684 /* Finds and records invariants used in STMT. */
1687 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1690 use_operand_p use_p;
1693 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1695 op = USE_FROM_PTR (use_p);
1696 record_invariant (data, op, false);
1700 /* Finds interesting uses of induction variables in the statement STMT. */
1703 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1706 tree op, *lhs, *rhs;
1708 use_operand_p use_p;
1709 enum tree_code code;
1711 find_invariants_stmt (data, stmt);
1713 if (gimple_code (stmt) == GIMPLE_COND)
1715 find_interesting_uses_cond (data, stmt);
1719 if (is_gimple_assign (stmt))
1721 lhs = gimple_assign_lhs_ptr (stmt);
1722 rhs = gimple_assign_rhs1_ptr (stmt);
1724 if (TREE_CODE (*lhs) == SSA_NAME)
1726 /* If the statement defines an induction variable, the uses are not
1727 interesting by themselves. */
1729 iv = get_iv (data, *lhs);
1731 if (iv && !integer_zerop (iv->step))
1735 code = gimple_assign_rhs_code (stmt);
1736 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1737 && (REFERENCE_CLASS_P (*rhs)
1738 || is_gimple_val (*rhs)))
1740 if (REFERENCE_CLASS_P (*rhs))
1741 find_interesting_uses_address (data, stmt, rhs);
1743 find_interesting_uses_op (data, *rhs);
1745 if (REFERENCE_CLASS_P (*lhs))
1746 find_interesting_uses_address (data, stmt, lhs);
1749 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1751 find_interesting_uses_cond (data, stmt);
1755 /* TODO -- we should also handle address uses of type
1757 memory = call (whatever);
1764 if (gimple_code (stmt) == GIMPLE_PHI
1765 && gimple_bb (stmt) == data->current_loop->header)
1767 iv = get_iv (data, PHI_RESULT (stmt));
1769 if (iv && !integer_zerop (iv->step))
1773 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1775 op = USE_FROM_PTR (use_p);
1777 if (TREE_CODE (op) != SSA_NAME)
1780 iv = get_iv (data, op);
1784 find_interesting_uses_op (data, op);
1788 /* Finds interesting uses of induction variables outside of loops
1789 on loop exit edge EXIT. */
1792 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1795 gimple_stmt_iterator psi;
1798 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1800 phi = gsi_stmt (psi);
1801 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1802 if (is_gimple_reg (def))
1803 find_interesting_uses_op (data, def);
1807 /* Finds uses of the induction variables that are interesting. */
1810 find_interesting_uses (struct ivopts_data *data)
1813 gimple_stmt_iterator bsi;
1814 basic_block *body = get_loop_body (data->current_loop);
1816 struct version_info *info;
1819 if (dump_file && (dump_flags & TDF_DETAILS))
1820 fprintf (dump_file, "Uses:\n\n");
1822 for (i = 0; i < data->current_loop->num_nodes; i++)
1827 FOR_EACH_EDGE (e, ei, bb->succs)
1828 if (e->dest != EXIT_BLOCK_PTR
1829 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1830 find_interesting_uses_outside (data, e);
1832 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1833 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1834 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1835 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1838 if (dump_file && (dump_flags & TDF_DETAILS))
1842 fprintf (dump_file, "\n");
1844 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1846 info = ver_info (data, i);
1849 fprintf (dump_file, " ");
1850 print_generic_expr (dump_file, info->name, TDF_SLIM);
1851 fprintf (dump_file, " is invariant (%d)%s\n",
1852 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1856 fprintf (dump_file, "\n");
1862 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1863 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1864 we are at the top-level of the processed address. */
1867 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1868 unsigned HOST_WIDE_INT *offset)
1870 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1871 enum tree_code code;
1872 tree type, orig_type = TREE_TYPE (expr);
1873 unsigned HOST_WIDE_INT off0, off1, st;
1874 tree orig_expr = expr;
1878 type = TREE_TYPE (expr);
1879 code = TREE_CODE (expr);
1885 if (!cst_and_fits_in_hwi (expr)
1886 || integer_zerop (expr))
1889 *offset = int_cst_value (expr);
1890 return build_int_cst (orig_type, 0);
1892 case POINTER_PLUS_EXPR:
1895 op0 = TREE_OPERAND (expr, 0);
1896 op1 = TREE_OPERAND (expr, 1);
1898 op0 = strip_offset_1 (op0, false, false, &off0);
1899 op1 = strip_offset_1 (op1, false, false, &off1);
1901 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1902 if (op0 == TREE_OPERAND (expr, 0)
1903 && op1 == TREE_OPERAND (expr, 1))
1906 if (integer_zerop (op1))
1908 else if (integer_zerop (op0))
1910 if (code == MINUS_EXPR)
1911 expr = fold_build1 (NEGATE_EXPR, type, op1);
1916 expr = fold_build2 (code, type, op0, op1);
1918 return fold_convert (orig_type, expr);
1924 step = array_ref_element_size (expr);
1925 if (!cst_and_fits_in_hwi (step))
1928 st = int_cst_value (step);
1929 op1 = TREE_OPERAND (expr, 1);
1930 op1 = strip_offset_1 (op1, false, false, &off1);
1931 *offset = off1 * st;
1934 && integer_zerop (op1))
1936 /* Strip the component reference completely. */
1937 op0 = TREE_OPERAND (expr, 0);
1938 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1948 tmp = component_ref_field_offset (expr);
1950 && cst_and_fits_in_hwi (tmp))
1952 /* Strip the component reference completely. */
1953 op0 = TREE_OPERAND (expr, 0);
1954 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1955 *offset = off0 + int_cst_value (tmp);
1961 op0 = TREE_OPERAND (expr, 0);
1962 op0 = strip_offset_1 (op0, true, true, &off0);
1965 if (op0 == TREE_OPERAND (expr, 0))
1968 expr = build_fold_addr_expr (op0);
1969 return fold_convert (orig_type, expr);
1972 inside_addr = false;
1979 /* Default handling of expressions for that we want to recurse into
1980 the first operand. */
1981 op0 = TREE_OPERAND (expr, 0);
1982 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1985 if (op0 == TREE_OPERAND (expr, 0)
1986 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1989 expr = copy_node (expr);
1990 TREE_OPERAND (expr, 0) = op0;
1992 TREE_OPERAND (expr, 1) = op1;
1994 /* Inside address, we might strip the top level component references,
1995 thus changing type of the expression. Handling of ADDR_EXPR
1997 expr = fold_convert (orig_type, expr);
2002 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2005 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2007 return strip_offset_1 (expr, false, false, offset);
2010 /* Returns variant of TYPE that can be used as base for different uses.
2011 We return unsigned type with the same precision, which avoids problems
2015 generic_type_for (tree type)
2017 if (POINTER_TYPE_P (type))
2018 return unsigned_type_for (type);
2020 if (TYPE_UNSIGNED (type))
2023 return unsigned_type_for (type);
2026 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2027 the bitmap to that we should store it. */
2029 static struct ivopts_data *fd_ivopts_data;
2031 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2033 bitmap *depends_on = (bitmap *) data;
2034 struct version_info *info;
2036 if (TREE_CODE (*expr_p) != SSA_NAME)
2038 info = name_info (fd_ivopts_data, *expr_p);
2040 if (!info->inv_id || info->has_nonlin_use)
2044 *depends_on = BITMAP_ALLOC (NULL);
2045 bitmap_set_bit (*depends_on, info->inv_id);
2050 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2051 position to POS. If USE is not NULL, the candidate is set as related to
2052 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2053 replacement of the final value of the iv by a direct computation. */
2055 static struct iv_cand *
2056 add_candidate_1 (struct ivopts_data *data,
2057 tree base, tree step, bool important, enum iv_position pos,
2058 struct iv_use *use, gimple incremented_at)
2061 struct iv_cand *cand = NULL;
2062 tree type, orig_type;
2066 orig_type = TREE_TYPE (base);
2067 type = generic_type_for (orig_type);
2068 /* Don't convert the base to the generic type for pointers as the generic
2069 type is an integer type with the same size as the pointer type. */
2070 if (type != orig_type && !POINTER_TYPE_P (orig_type))
2072 base = fold_convert (type, base);
2073 step = fold_convert (type, step);
2077 for (i = 0; i < n_iv_cands (data); i++)
2079 cand = iv_cand (data, i);
2081 if (cand->pos != pos)
2084 if (cand->incremented_at != incremented_at)
2098 if (operand_equal_p (base, cand->iv->base, 0)
2099 && operand_equal_p (step, cand->iv->step, 0))
2103 if (i == n_iv_cands (data))
2105 cand = XCNEW (struct iv_cand);
2111 cand->iv = alloc_iv (base, step);
2114 if (pos != IP_ORIGINAL && cand->iv)
2116 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2117 cand->var_after = cand->var_before;
2119 cand->important = important;
2120 cand->incremented_at = incremented_at;
2121 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2124 && TREE_CODE (step) != INTEGER_CST)
2126 fd_ivopts_data = data;
2127 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2130 if (dump_file && (dump_flags & TDF_DETAILS))
2131 dump_cand (dump_file, cand);
2134 if (important && !cand->important)
2136 cand->important = true;
2137 if (dump_file && (dump_flags & TDF_DETAILS))
2138 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2143 bitmap_set_bit (use->related_cands, i);
2144 if (dump_file && (dump_flags & TDF_DETAILS))
2145 fprintf (dump_file, "Candidate %d is related to use %d\n",
2152 /* Returns true if incrementing the induction variable at the end of the LOOP
2155 The purpose is to avoid splitting latch edge with a biv increment, thus
2156 creating a jump, possibly confusing other optimization passes and leaving
2157 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2158 is not available (so we do not have a better alternative), or if the latch
2159 edge is already nonempty. */
2162 allow_ip_end_pos_p (struct loop *loop)
2164 if (!ip_normal_pos (loop))
2167 if (!empty_block_p (ip_end_pos (loop)))
2173 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2174 position to POS. If USE is not NULL, the candidate is set as related to
2175 it. The candidate computation is scheduled on all available positions. */
2178 add_candidate (struct ivopts_data *data,
2179 tree base, tree step, bool important, struct iv_use *use)
2181 if (ip_normal_pos (data->current_loop))
2182 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2183 if (ip_end_pos (data->current_loop)
2184 && allow_ip_end_pos_p (data->current_loop))
2185 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2188 /* Add a standard "0 + 1 * iteration" iv candidate for a
2189 type with SIZE bits. */
2192 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2195 tree type = lang_hooks.types.type_for_size (size, true);
2196 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2200 /* Adds standard iv candidates. */
2203 add_standard_iv_candidates (struct ivopts_data *data)
2205 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2207 /* The same for a double-integer type if it is still fast enough. */
2208 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2209 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2213 /* Adds candidates bases on the old induction variable IV. */
2216 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2220 struct iv_cand *cand;
2222 add_candidate (data, iv->base, iv->step, true, NULL);
2224 /* The same, but with initial value zero. */
2225 add_candidate (data,
2226 build_int_cst (TREE_TYPE (iv->base), 0),
2227 iv->step, true, NULL);
2229 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2230 if (gimple_code (phi) == GIMPLE_PHI)
2232 /* Additionally record the possibility of leaving the original iv
2234 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2235 cand = add_candidate_1 (data,
2236 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2237 SSA_NAME_DEF_STMT (def));
2238 cand->var_before = iv->ssa_name;
2239 cand->var_after = def;
2243 /* Adds candidates based on the old induction variables. */
2246 add_old_ivs_candidates (struct ivopts_data *data)
2252 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2254 iv = ver_info (data, i)->iv;
2255 if (iv && iv->biv_p && !integer_zerop (iv->step))
2256 add_old_iv_candidates (data, iv);
2260 /* Adds candidates based on the value of the induction variable IV and USE. */
2263 add_iv_value_candidates (struct ivopts_data *data,
2264 struct iv *iv, struct iv_use *use)
2266 unsigned HOST_WIDE_INT offset;
2270 add_candidate (data, iv->base, iv->step, false, use);
2272 /* The same, but with initial value zero. Make such variable important,
2273 since it is generic enough so that possibly many uses may be based
2275 basetype = TREE_TYPE (iv->base);
2276 if (POINTER_TYPE_P (basetype))
2277 basetype = sizetype;
2278 add_candidate (data, build_int_cst (basetype, 0),
2279 iv->step, true, use);
2281 /* Third, try removing the constant offset. Make sure to even
2282 add a candidate for &a[0] vs. (T *)&a. */
2283 base = strip_offset (iv->base, &offset);
2285 || base != iv->base)
2286 add_candidate (data, base, iv->step, false, use);
2289 /* Adds candidates based on the uses. */
2292 add_derived_ivs_candidates (struct ivopts_data *data)
2296 for (i = 0; i < n_iv_uses (data); i++)
2298 struct iv_use *use = iv_use (data, i);
2305 case USE_NONLINEAR_EXPR:
2308 /* Just add the ivs based on the value of the iv used here. */
2309 add_iv_value_candidates (data, use->iv, use);
2318 /* Record important candidates and add them to related_cands bitmaps
2322 record_important_candidates (struct ivopts_data *data)
2327 for (i = 0; i < n_iv_cands (data); i++)
2329 struct iv_cand *cand = iv_cand (data, i);
2331 if (cand->important)
2332 bitmap_set_bit (data->important_candidates, i);
2335 data->consider_all_candidates = (n_iv_cands (data)
2336 <= CONSIDER_ALL_CANDIDATES_BOUND);
2338 if (data->consider_all_candidates)
2340 /* We will not need "related_cands" bitmaps in this case,
2341 so release them to decrease peak memory consumption. */
2342 for (i = 0; i < n_iv_uses (data); i++)
2344 use = iv_use (data, i);
2345 BITMAP_FREE (use->related_cands);
2350 /* Add important candidates to the related_cands bitmaps. */
2351 for (i = 0; i < n_iv_uses (data); i++)
2352 bitmap_ior_into (iv_use (data, i)->related_cands,
2353 data->important_candidates);
2357 /* Finds the candidates for the induction variables. */
2360 find_iv_candidates (struct ivopts_data *data)
2362 /* Add commonly used ivs. */
2363 add_standard_iv_candidates (data);
2365 /* Add old induction variables. */
2366 add_old_ivs_candidates (data);
2368 /* Add induction variables derived from uses. */
2369 add_derived_ivs_candidates (data);
2371 /* Record the important candidates. */
2372 record_important_candidates (data);
2375 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2376 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2377 we allocate a simple list to every use. */
2380 alloc_use_cost_map (struct ivopts_data *data)
2382 unsigned i, size, s, j;
2384 for (i = 0; i < n_iv_uses (data); i++)
2386 struct iv_use *use = iv_use (data, i);
2389 if (data->consider_all_candidates)
2390 size = n_iv_cands (data);
2394 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2399 /* Round up to the power of two, so that moduling by it is fast. */
2400 for (size = 1; size < s; size <<= 1)
2404 use->n_map_members = size;
2405 use->cost_map = XCNEWVEC (struct cost_pair, size);
2409 /* Returns description of computation cost of expression whose runtime
2410 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2413 new_cost (unsigned runtime, unsigned complexity)
2417 cost.cost = runtime;
2418 cost.complexity = complexity;
2423 /* Adds costs COST1 and COST2. */
2426 add_costs (comp_cost cost1, comp_cost cost2)
2428 cost1.cost += cost2.cost;
2429 cost1.complexity += cost2.complexity;
2433 /* Subtracts costs COST1 and COST2. */
2436 sub_costs (comp_cost cost1, comp_cost cost2)
2438 cost1.cost -= cost2.cost;
2439 cost1.complexity -= cost2.complexity;
2444 /* Returns a negative number if COST1 < COST2, a positive number if
2445 COST1 > COST2, and 0 if COST1 = COST2. */
2448 compare_costs (comp_cost cost1, comp_cost cost2)
2450 if (cost1.cost == cost2.cost)
2451 return cost1.complexity - cost2.complexity;
2453 return cost1.cost - cost2.cost;
2456 /* Returns true if COST is infinite. */
2459 infinite_cost_p (comp_cost cost)
2461 return cost.cost == INFTY;
2464 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2465 on invariants DEPENDS_ON and that the value used in expressing it
2469 set_use_iv_cost (struct ivopts_data *data,
2470 struct iv_use *use, struct iv_cand *cand,
2471 comp_cost cost, bitmap depends_on, tree value)
2475 if (infinite_cost_p (cost))
2477 BITMAP_FREE (depends_on);
2481 if (data->consider_all_candidates)
2483 use->cost_map[cand->id].cand = cand;
2484 use->cost_map[cand->id].cost = cost;
2485 use->cost_map[cand->id].depends_on = depends_on;
2486 use->cost_map[cand->id].value = value;
2490 /* n_map_members is a power of two, so this computes modulo. */
2491 s = cand->id & (use->n_map_members - 1);
2492 for (i = s; i < use->n_map_members; i++)
2493 if (!use->cost_map[i].cand)
2495 for (i = 0; i < s; i++)
2496 if (!use->cost_map[i].cand)
2502 use->cost_map[i].cand = cand;
2503 use->cost_map[i].cost = cost;
2504 use->cost_map[i].depends_on = depends_on;
2505 use->cost_map[i].value = value;
2508 /* Gets cost of (USE, CANDIDATE) pair. */
2510 static struct cost_pair *
2511 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2512 struct iv_cand *cand)
2515 struct cost_pair *ret;
2520 if (data->consider_all_candidates)
2522 ret = use->cost_map + cand->id;
2529 /* n_map_members is a power of two, so this computes modulo. */
2530 s = cand->id & (use->n_map_members - 1);
2531 for (i = s; i < use->n_map_members; i++)
2532 if (use->cost_map[i].cand == cand)
2533 return use->cost_map + i;
2535 for (i = 0; i < s; i++)
2536 if (use->cost_map[i].cand == cand)
2537 return use->cost_map + i;
2542 /* Returns estimate on cost of computing SEQ. */
2545 seq_cost (rtx seq, bool speed)
2550 for (; seq; seq = NEXT_INSN (seq))
2552 set = single_set (seq);
2554 cost += rtx_cost (set, SET,speed);
2562 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2564 produce_memory_decl_rtl (tree obj, int *regno)
2569 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2571 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2572 x = gen_rtx_SYMBOL_REF (Pmode, name);
2573 SET_SYMBOL_REF_DECL (x, obj);
2574 x = gen_rtx_MEM (DECL_MODE (obj), x);
2575 targetm.encode_section_info (obj, x, true);
2579 x = gen_raw_REG (Pmode, (*regno)++);
2580 x = gen_rtx_MEM (DECL_MODE (obj), x);
2586 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2587 walk_tree. DATA contains the actual fake register number. */
2590 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2592 tree obj = NULL_TREE;
2594 int *regno = (int *) data;
2596 switch (TREE_CODE (*expr_p))
2599 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2600 handled_component_p (*expr_p);
2601 expr_p = &TREE_OPERAND (*expr_p, 0))
2604 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2605 x = produce_memory_decl_rtl (obj, regno);
2610 obj = SSA_NAME_VAR (*expr_p);
2611 if (!DECL_RTL_SET_P (obj))
2612 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2621 if (DECL_RTL_SET_P (obj))
2624 if (DECL_MODE (obj) == BLKmode)
2625 x = produce_memory_decl_rtl (obj, regno);
2627 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2637 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2638 SET_DECL_RTL (obj, x);
2644 /* Determines cost of the computation of EXPR. */
2647 computation_cost (tree expr, bool speed)
2650 tree type = TREE_TYPE (expr);
2652 /* Avoid using hard regs in ways which may be unsupported. */
2653 int regno = LAST_VIRTUAL_REGISTER + 1;
2654 enum function_frequency real_frequency = cfun->function_frequency;
2656 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2657 crtl->maybe_hot_insn_p = speed;
2658 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2660 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2663 default_rtl_profile ();
2664 cfun->function_frequency = real_frequency;
2666 cost = seq_cost (seq, speed);
2668 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2673 /* Returns variable containing the value of candidate CAND at statement AT. */
2676 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2678 if (stmt_after_increment (loop, cand, stmt))
2679 return cand->var_after;
2681 return cand->var_before;
2684 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2685 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2688 tree_int_cst_sign_bit (const_tree t)
2690 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2691 unsigned HOST_WIDE_INT w;
2693 if (bitno < HOST_BITS_PER_WIDE_INT)
2694 w = TREE_INT_CST_LOW (t);
2697 w = TREE_INT_CST_HIGH (t);
2698 bitno -= HOST_BITS_PER_WIDE_INT;
2701 return (w >> bitno) & 1;
2704 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2705 same precision that is at least as wide as the precision of TYPE, stores
2706 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2710 determine_common_wider_type (tree *a, tree *b)
2712 tree wider_type = NULL;
2714 tree atype = TREE_TYPE (*a);
2716 if (CONVERT_EXPR_P (*a))
2718 suba = TREE_OPERAND (*a, 0);
2719 wider_type = TREE_TYPE (suba);
2720 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2726 if (CONVERT_EXPR_P (*b))
2728 subb = TREE_OPERAND (*b, 0);
2729 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2740 /* Determines the expression by that USE is expressed from induction variable
2741 CAND at statement AT in LOOP. The expression is stored in a decomposed
2742 form into AFF. Returns false if USE cannot be expressed using CAND. */
2745 get_computation_aff (struct loop *loop,
2746 struct iv_use *use, struct iv_cand *cand, gimple at,
2747 struct affine_tree_combination *aff)
2749 tree ubase = use->iv->base;
2750 tree ustep = use->iv->step;
2751 tree cbase = cand->iv->base;
2752 tree cstep = cand->iv->step, cstep_common;
2753 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2754 tree common_type, var;
2756 aff_tree cbase_aff, var_aff;
2759 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2761 /* We do not have a precision to express the values of use. */
2765 var = var_at_stmt (loop, cand, at);
2766 uutype = unsigned_type_for (utype);
2768 /* If the conversion is not noop, perform it. */
2769 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2771 cstep = fold_convert (uutype, cstep);
2772 cbase = fold_convert (uutype, cbase);
2773 var = fold_convert (uutype, var);
2776 if (!constant_multiple_of (ustep, cstep, &rat))
2779 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2780 type, we achieve better folding by computing their difference in this
2781 wider type, and cast the result to UUTYPE. We do not need to worry about
2782 overflows, as all the arithmetics will in the end be performed in UUTYPE
2784 common_type = determine_common_wider_type (&ubase, &cbase);
2786 /* use = ubase - ratio * cbase + ratio * var. */
2787 tree_to_aff_combination (ubase, common_type, aff);
2788 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2789 tree_to_aff_combination (var, uutype, &var_aff);
2791 /* We need to shift the value if we are after the increment. */
2792 if (stmt_after_increment (loop, cand, at))
2796 if (common_type != uutype)
2797 cstep_common = fold_convert (common_type, cstep);
2799 cstep_common = cstep;
2801 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2802 aff_combination_add (&cbase_aff, &cstep_aff);
2805 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2806 aff_combination_add (aff, &cbase_aff);
2807 if (common_type != uutype)
2808 aff_combination_convert (aff, uutype);
2810 aff_combination_scale (&var_aff, rat);
2811 aff_combination_add (aff, &var_aff);
2816 /* Determines the expression by that USE is expressed from induction variable
2817 CAND at statement AT in LOOP. The computation is unshared. */
2820 get_computation_at (struct loop *loop,
2821 struct iv_use *use, struct iv_cand *cand, gimple at)
2824 tree type = TREE_TYPE (use->iv->base);
2826 if (!get_computation_aff (loop, use, cand, at, &aff))
2828 unshare_aff_combination (&aff);
2829 return fold_convert (type, aff_combination_to_tree (&aff));
2832 /* Determines the expression by that USE is expressed from induction variable
2833 CAND in LOOP. The computation is unshared. */
2836 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2838 return get_computation_at (loop, use, cand, use->stmt);
2841 /* Returns cost of addition in MODE. */
2844 add_cost (enum machine_mode mode, bool speed)
2846 static unsigned costs[NUM_MACHINE_MODES];
2854 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2855 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2856 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2861 cost = seq_cost (seq, speed);
2867 if (dump_file && (dump_flags & TDF_DETAILS))
2868 fprintf (dump_file, "Addition in %s costs %d\n",
2869 GET_MODE_NAME (mode), cost);
2873 /* Entry in a hashtable of already known costs for multiplication. */
2876 HOST_WIDE_INT cst; /* The constant to multiply by. */
2877 enum machine_mode mode; /* In mode. */
2878 unsigned cost; /* The cost. */
2881 /* Counts hash value for the ENTRY. */
2884 mbc_entry_hash (const void *entry)
2886 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2888 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2891 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2894 mbc_entry_eq (const void *entry1, const void *entry2)
2896 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2897 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2899 return (e1->mode == e2->mode
2900 && e1->cst == e2->cst);
2903 /* Returns cost of multiplication by constant CST in MODE. */
2906 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2908 static htab_t costs;
2909 struct mbc_entry **cached, act;
2914 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2918 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2920 return (*cached)->cost;
2922 *cached = XNEW (struct mbc_entry);
2923 (*cached)->mode = mode;
2924 (*cached)->cst = cst;
2927 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2928 gen_int_mode (cst, mode), NULL_RTX, 0);
2932 cost = seq_cost (seq, speed);
2934 if (dump_file && (dump_flags & TDF_DETAILS))
2935 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2936 (int) cst, GET_MODE_NAME (mode), cost);
2938 (*cached)->cost = cost;
2943 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2944 validity for a memory reference accessing memory of mode MODE. */
2947 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2949 #define MAX_RATIO 128
2950 static sbitmap valid_mult[MAX_MACHINE_MODE];
2952 if (!valid_mult[mode])
2954 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2958 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2959 sbitmap_zero (valid_mult[mode]);
2960 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2961 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2963 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2964 if (memory_address_p (mode, addr))
2965 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2968 if (dump_file && (dump_flags & TDF_DETAILS))
2970 fprintf (dump_file, " allowed multipliers:");
2971 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2972 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2973 fprintf (dump_file, " %d", (int) i);
2974 fprintf (dump_file, "\n");
2975 fprintf (dump_file, "\n");
2979 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2982 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2985 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2986 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2987 variable is omitted. Compute the cost for a memory reference that accesses
2988 a memory location of mode MEM_MODE.
2990 TODO -- there must be some better way. This all is quite crude. */
2993 get_address_cost (bool symbol_present, bool var_present,
2994 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2995 enum machine_mode mem_mode,
2998 static bool initialized[MAX_MACHINE_MODE];
2999 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3000 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3001 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3002 unsigned cost, acost, complexity;
3003 bool offset_p, ratio_p;
3004 HOST_WIDE_INT s_offset;
3005 unsigned HOST_WIDE_INT mask;
3008 if (!initialized[mem_mode])
3011 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3012 int old_cse_not_expected;
3013 unsigned sym_p, var_p, off_p, rat_p, add_c;
3014 rtx seq, addr, base;
3017 initialized[mem_mode] = true;
3019 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3021 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3022 for (i = start; i <= 1 << 20; i <<= 1)
3024 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3025 if (!memory_address_p (mem_mode, addr))
3028 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3029 off[mem_mode] = max_offset[mem_mode];
3031 for (i = start; i <= 1 << 20; i <<= 1)
3033 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3034 if (!memory_address_p (mem_mode, addr))
3037 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3039 if (dump_file && (dump_flags & TDF_DETAILS))
3041 fprintf (dump_file, "get_address_cost:\n");
3042 fprintf (dump_file, " min offset %s %d\n",
3043 GET_MODE_NAME (mem_mode),
3044 (int) min_offset[mem_mode]);
3045 fprintf (dump_file, " max offset %s %d\n",
3046 GET_MODE_NAME (mem_mode),
3047 (int) max_offset[mem_mode]);
3051 for (i = 2; i <= MAX_RATIO; i++)
3052 if (multiplier_allowed_in_address_p (i, mem_mode))
3058 /* Compute the cost of various addressing modes. */
3060 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3061 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3063 for (i = 0; i < 16; i++)
3066 var_p = (i >> 1) & 1;
3067 off_p = (i >> 2) & 1;
3068 rat_p = (i >> 3) & 1;
3072 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3073 gen_int_mode (rat[mem_mode], Pmode));
3076 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3080 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3081 /* ??? We can run into trouble with some backends by presenting
3082 it with symbols which haven't been properly passed through
3083 targetm.encode_section_info. By setting the local bit, we
3084 enhance the probability of things working. */
3085 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3088 base = gen_rtx_fmt_e (CONST, Pmode,
3089 gen_rtx_fmt_ee (PLUS, Pmode,
3091 gen_int_mode (off[mem_mode],
3095 base = gen_int_mode (off[mem_mode], Pmode);
3100 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3103 /* To avoid splitting addressing modes, pretend that no cse will
3105 old_cse_not_expected = cse_not_expected;
3106 cse_not_expected = true;
3107 addr = memory_address (mem_mode, addr);
3108 cse_not_expected = old_cse_not_expected;
3112 acost = seq_cost (seq, speed);
3113 acost += address_cost (addr, mem_mode, speed);
3117 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3120 /* On some targets, it is quite expensive to load symbol to a register,
3121 which makes addresses that contain symbols look much more expensive.
3122 However, the symbol will have to be loaded in any case before the
3123 loop (and quite likely we have it in register already), so it does not
3124 make much sense to penalize them too heavily. So make some final
3125 tweaks for the SYMBOL_PRESENT modes:
3127 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3128 var is cheaper, use this mode with small penalty.
3129 If VAR_PRESENT is true, try whether the mode with
3130 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3131 if this is the case, use it. */
3132 add_c = add_cost (Pmode, speed);
3133 for (i = 0; i < 8; i++)
3136 off_p = (i >> 1) & 1;
3137 rat_p = (i >> 2) & 1;
3139 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3143 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3144 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3147 if (dump_file && (dump_flags & TDF_DETAILS))
3149 fprintf (dump_file, "Address costs:\n");
3151 for (i = 0; i < 16; i++)
3154 var_p = (i >> 1) & 1;
3155 off_p = (i >> 2) & 1;
3156 rat_p = (i >> 3) & 1;
3158 fprintf (dump_file, " ");
3160 fprintf (dump_file, "sym + ");
3162 fprintf (dump_file, "var + ");
3164 fprintf (dump_file, "cst + ");
3166 fprintf (dump_file, "rat * ");
3168 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3169 fprintf (dump_file, "index costs %d\n", acost);
3171 fprintf (dump_file, "\n");
3175 bits = GET_MODE_BITSIZE (Pmode);
3176 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3178 if ((offset >> (bits - 1) & 1))
3183 offset_p = (s_offset != 0
3184 && min_offset[mem_mode] <= s_offset
3185 && s_offset <= max_offset[mem_mode]);
3186 ratio_p = (ratio != 1
3187 && multiplier_allowed_in_address_p (ratio, mem_mode));
3189 if (ratio != 1 && !ratio_p)
3190 cost += multiply_by_cost (ratio, Pmode, speed);
3192 if (s_offset && !offset_p && !symbol_present)
3193 cost += add_cost (Pmode, speed);
3195 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3196 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3197 return new_cost (cost + acost, complexity);
3200 /* Estimates cost of forcing expression EXPR into a variable. */
3203 force_expr_to_var_cost (tree expr, bool speed)
3205 static bool costs_initialized = false;
3206 static unsigned integer_cost [2];
3207 static unsigned symbol_cost [2];
3208 static unsigned address_cost [2];
3210 comp_cost cost0, cost1, cost;
3211 enum machine_mode mode;
3213 if (!costs_initialized)
3215 tree type = build_pointer_type (integer_type_node);
3220 var = create_tmp_var_raw (integer_type_node, "test_var");
3221 TREE_STATIC (var) = 1;
3222 x = produce_memory_decl_rtl (var, NULL);
3223 SET_DECL_RTL (var, x);
3225 addr = build1 (ADDR_EXPR, type, var);
3228 for (i = 0; i < 2; i++)
3230 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3233 symbol_cost[i] = computation_cost (addr, i) + 1;
3236 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3238 build_int_cst (sizetype, 2000)), i) + 1;
3239 if (dump_file && (dump_flags & TDF_DETAILS))
3241 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3242 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3243 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3244 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3245 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3246 fprintf (dump_file, "\n");
3250 costs_initialized = true;
3255 if (SSA_VAR_P (expr))
3258 if (is_gimple_min_invariant (expr))
3260 if (TREE_CODE (expr) == INTEGER_CST)
3261 return new_cost (integer_cost [speed], 0);
3263 if (TREE_CODE (expr) == ADDR_EXPR)
3265 tree obj = TREE_OPERAND (expr, 0);
3267 if (TREE_CODE (obj) == VAR_DECL
3268 || TREE_CODE (obj) == PARM_DECL
3269 || TREE_CODE (obj) == RESULT_DECL)
3270 return new_cost (symbol_cost [speed], 0);
3273 return new_cost (address_cost [speed], 0);
3276 switch (TREE_CODE (expr))
3278 case POINTER_PLUS_EXPR:
3282 op0 = TREE_OPERAND (expr, 0);
3283 op1 = TREE_OPERAND (expr, 1);
3287 if (is_gimple_val (op0))
3290 cost0 = force_expr_to_var_cost (op0, speed);
3292 if (is_gimple_val (op1))
3295 cost1 = force_expr_to_var_cost (op1, speed);
3300 /* Just an arbitrary value, FIXME. */
3301 return new_cost (target_spill_cost[speed], 0);
3304 mode = TYPE_MODE (TREE_TYPE (expr));
3305 switch (TREE_CODE (expr))
3307 case POINTER_PLUS_EXPR:
3310 cost = new_cost (add_cost (mode, speed), 0);
3314 if (cst_and_fits_in_hwi (op0))
3315 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3316 else if (cst_and_fits_in_hwi (op1))
3317 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3319 return new_cost (target_spill_cost [speed], 0);
3326 cost = add_costs (cost, cost0);
3327 cost = add_costs (cost, cost1);
3329 /* Bound the cost by target_spill_cost. The parts of complicated
3330 computations often are either loop invariant or at least can
3331 be shared between several iv uses, so letting this grow without
3332 limits would not give reasonable results. */
3333 if (cost.cost > target_spill_cost [speed])
3334 cost.cost = target_spill_cost [speed];
3339 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3340 invariants the computation depends on. */
3343 force_var_cost (struct ivopts_data *data,
3344 tree expr, bitmap *depends_on)
3348 fd_ivopts_data = data;
3349 walk_tree (&expr, find_depends, depends_on, NULL);
3352 return force_expr_to_var_cost (expr, data->speed);
3355 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3356 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3357 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3358 invariants the computation depends on. */
3361 split_address_cost (struct ivopts_data *data,
3362 tree addr, bool *symbol_present, bool *var_present,
3363 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3366 HOST_WIDE_INT bitsize;
3367 HOST_WIDE_INT bitpos;
3369 enum machine_mode mode;
3370 int unsignedp, volatilep;
3372 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3373 &unsignedp, &volatilep, false);
3376 || bitpos % BITS_PER_UNIT != 0
3377 || TREE_CODE (core) != VAR_DECL)
3379 *symbol_present = false;
3380 *var_present = true;
3381 fd_ivopts_data = data;
3382 walk_tree (&addr, find_depends, depends_on, NULL);
3383 return new_cost (target_spill_cost[data->speed], 0);
3386 *offset += bitpos / BITS_PER_UNIT;
3387 if (TREE_STATIC (core)
3388 || DECL_EXTERNAL (core))
3390 *symbol_present = true;
3391 *var_present = false;
3395 *symbol_present = false;
3396 *var_present = true;
3400 /* Estimates cost of expressing difference of addresses E1 - E2 as
3401 var + symbol + offset. The value of offset is added to OFFSET,
3402 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3403 part is missing. DEPENDS_ON is a set of the invariants the computation
3407 ptr_difference_cost (struct ivopts_data *data,
3408 tree e1, tree e2, bool *symbol_present, bool *var_present,
3409 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3411 HOST_WIDE_INT diff = 0;
3413 bool speed = optimize_loop_for_speed_p (data->current_loop);
3415 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3417 if (ptr_difference_const (e1, e2, &diff))
3420 *symbol_present = false;
3421 *var_present = false;
3425 if (integer_zerop (e2))
3426 return split_address_cost (data, TREE_OPERAND (e1, 0),
3427 symbol_present, var_present, offset, depends_on);
3429 *symbol_present = false;
3430 *var_present = true;
3432 cost = force_var_cost (data, e1, depends_on);
3433 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3434 cost.cost += add_cost (Pmode, speed);
3439 /* Estimates cost of expressing difference E1 - E2 as
3440 var + symbol + offset. The value of offset is added to OFFSET,
3441 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3442 part is missing. DEPENDS_ON is a set of the invariants the computation
3446 difference_cost (struct ivopts_data *data,
3447 tree e1, tree e2, bool *symbol_present, bool *var_present,
3448 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3451 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3452 unsigned HOST_WIDE_INT off1, off2;
3454 e1 = strip_offset (e1, &off1);
3455 e2 = strip_offset (e2, &off2);
3456 *offset += off1 - off2;
3461 if (TREE_CODE (e1) == ADDR_EXPR)
3462 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3464 *symbol_present = false;
3466 if (operand_equal_p (e1, e2, 0))
3468 *var_present = false;
3471 *var_present = true;
3472 if (integer_zerop (e2))
3473 return force_var_cost (data, e1, depends_on);
3475 if (integer_zerop (e1))
3477 cost = force_var_cost (data, e2, depends_on);
3478 cost.cost += multiply_by_cost (-1, mode, data->speed);
3483 cost = force_var_cost (data, e1, depends_on);
3484 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3485 cost.cost += add_cost (mode, data->speed);
3490 /* Determines the cost of the computation by that USE is expressed
3491 from induction variable CAND. If ADDRESS_P is true, we just need
3492 to create an address from it, otherwise we want to get it into
3493 register. A set of invariants we depend on is stored in
3494 DEPENDS_ON. AT is the statement at that the value is computed. */
3497 get_computation_cost_at (struct ivopts_data *data,
3498 struct iv_use *use, struct iv_cand *cand,
3499 bool address_p, bitmap *depends_on, gimple at)
3501 tree ubase = use->iv->base, ustep = use->iv->step;
3503 tree utype = TREE_TYPE (ubase), ctype;
3504 unsigned HOST_WIDE_INT cstepi, offset = 0;
3505 HOST_WIDE_INT ratio, aratio;
3506 bool var_present, symbol_present;
3510 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3514 /* Only consider real candidates. */
3516 return infinite_cost;
3518 cbase = cand->iv->base;
3519 cstep = cand->iv->step;
3520 ctype = TREE_TYPE (cbase);
3522 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3524 /* We do not have a precision to express the values of use. */
3525 return infinite_cost;
3530 /* Do not try to express address of an object with computation based
3531 on address of a different object. This may cause problems in rtl
3532 level alias analysis (that does not expect this to be happening,
3533 as this is illegal in C), and would be unlikely to be useful
3535 if (use->iv->base_object
3536 && cand->iv->base_object
3537 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3538 return infinite_cost;
3541 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3543 /* TODO -- add direct handling of this case. */
3547 /* CSTEPI is removed from the offset in case statement is after the
3548 increment. If the step is not constant, we use zero instead.
3549 This is a bit imprecise (there is the extra addition), but
3550 redundancy elimination is likely to transform the code so that
3551 it uses value of the variable before increment anyway,
3552 so it is not that much unrealistic. */
3553 if (cst_and_fits_in_hwi (cstep))
3554 cstepi = int_cst_value (cstep);
3558 if (!constant_multiple_of (ustep, cstep, &rat))
3559 return infinite_cost;
3561 if (double_int_fits_in_shwi_p (rat))
3562 ratio = double_int_to_shwi (rat);
3564 return infinite_cost;
3566 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3567 or ratio == 1, it is better to handle this like
3569 ubase - ratio * cbase + ratio * var
3571 (also holds in the case ratio == -1, TODO. */
3573 if (cst_and_fits_in_hwi (cbase))
3575 offset = - ratio * int_cst_value (cbase);
3576 cost = difference_cost (data,
3577 ubase, build_int_cst (utype, 0),
3578 &symbol_present, &var_present, &offset,
3581 else if (ratio == 1)
3583 cost = difference_cost (data,
3585 &symbol_present, &var_present, &offset,
3590 cost = force_var_cost (data, cbase, depends_on);
3591 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3592 cost = add_costs (cost,
3593 difference_cost (data,
3594 ubase, build_int_cst (utype, 0),
3595 &symbol_present, &var_present,
3596 &offset, depends_on));
3599 /* If we are after the increment, the value of the candidate is higher by
3601 if (stmt_after_increment (data->current_loop, cand, at))
3602 offset -= ratio * cstepi;
3604 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3605 (symbol/var/const parts may be omitted). If we are looking for an address,
3606 find the cost of addressing this. */
3608 return add_costs (cost, get_address_cost (symbol_present, var_present,
3610 TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
3612 /* Otherwise estimate the costs for computing the expression. */
3613 aratio = ratio > 0 ? ratio : -ratio;
3614 if (!symbol_present && !var_present && !offset)
3617 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3623 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3627 /* Symbol + offset should be compile-time computable. */
3628 && (symbol_present || offset))
3631 /* Having offset does not affect runtime cost in case it is added to
3632 symbol, but it increases complexity. */
3636 cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
3641 /* Just get the expression, expand it and measure the cost. */
3642 tree comp = get_computation_at (data->current_loop, use, cand, at);
3645 return infinite_cost;
3648 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3650 return new_cost (computation_cost (comp, speed), 0);
3654 /* Determines the cost of the computation by that USE is expressed
3655 from induction variable CAND. If ADDRESS_P is true, we just need
3656 to create an address from it, otherwise we want to get it into
3657 register. A set of invariants we depend on is stored in
3661 get_computation_cost (struct ivopts_data *data,
3662 struct iv_use *use, struct iv_cand *cand,
3663 bool address_p, bitmap *depends_on)
3665 return get_computation_cost_at (data,
3666 use, cand, address_p, depends_on, use->stmt);
3669 /* Determines cost of basing replacement of USE on CAND in a generic
3673 determine_use_iv_cost_generic (struct ivopts_data *data,
3674 struct iv_use *use, struct iv_cand *cand)
3679 /* The simple case first -- if we need to express value of the preserved
3680 original biv, the cost is 0. This also prevents us from counting the
3681 cost of increment twice -- once at this use and once in the cost of
3683 if (cand->pos == IP_ORIGINAL
3684 && cand->incremented_at == use->stmt)
3686 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3690 cost = get_computation_cost (data, use, cand, false, &depends_on);
3691 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3693 return !infinite_cost_p (cost);
3696 /* Determines cost of basing replacement of USE on CAND in an address. */
3699 determine_use_iv_cost_address (struct ivopts_data *data,
3700 struct iv_use *use, struct iv_cand *cand)
3703 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3705 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3707 return !infinite_cost_p (cost);
3710 /* Computes value of candidate CAND at position AT in iteration NITER, and
3711 stores it to VAL. */
3714 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3717 aff_tree step, delta, nit;
3718 struct iv *iv = cand->iv;
3719 tree type = TREE_TYPE (iv->base);
3720 tree steptype = type;
3721 if (POINTER_TYPE_P (type))
3722 steptype = sizetype;
3724 tree_to_aff_combination (iv->step, steptype, &step);
3725 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3726 aff_combination_convert (&nit, steptype);
3727 aff_combination_mult (&nit, &step, &delta);
3728 if (stmt_after_increment (loop, cand, at))
3729 aff_combination_add (&delta, &step);
3731 tree_to_aff_combination (iv->base, type, val);
3732 aff_combination_add (val, &delta);
3735 /* Returns period of induction variable iv. */
3738 iv_period (struct iv *iv)
3740 tree step = iv->step, period, type;
3743 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3745 /* Period of the iv is gcd (step, type range). Since type range is power
3746 of two, it suffices to determine the maximum power of two that divides
3748 pow2div = num_ending_zeros (step);
3749 type = unsigned_type_for (TREE_TYPE (step));
3751 period = build_low_bits_mask (type,
3752 (TYPE_PRECISION (type)
3753 - tree_low_cst (pow2div, 1)));
3758 /* Returns the comparison operator used when eliminating the iv USE. */
3760 static enum tree_code
3761 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3763 struct loop *loop = data->current_loop;
3767 ex_bb = gimple_bb (use->stmt);
3768 exit = EDGE_SUCC (ex_bb, 0);
3769 if (flow_bb_inside_loop_p (loop, exit->dest))
3770 exit = EDGE_SUCC (ex_bb, 1);
3772 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3775 /* Check whether it is possible to express the condition in USE by comparison
3776 of candidate CAND. If so, store the value compared with to BOUND. */
3779 may_eliminate_iv (struct ivopts_data *data,
3780 struct iv_use *use, struct iv_cand *cand, tree *bound)
3785 struct loop *loop = data->current_loop;
3788 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3791 /* For now works only for exits that dominate the loop latch.
3792 TODO: extend to other conditions inside loop body. */
3793 ex_bb = gimple_bb (use->stmt);
3794 if (use->stmt != last_stmt (ex_bb)
3795 || gimple_code (use->stmt) != GIMPLE_COND
3796 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3799 exit = EDGE_SUCC (ex_bb, 0);
3800 if (flow_bb_inside_loop_p (loop, exit->dest))
3801 exit = EDGE_SUCC (ex_bb, 1);
3802 if (flow_bb_inside_loop_p (loop, exit->dest))
3805 nit = niter_for_exit (data, exit);
3809 /* Determine whether we can use the variable to test the exit condition.
3810 This is the case iff the period of the induction variable is greater
3811 than the number of iterations for which the exit condition is true. */
3812 period = iv_period (cand->iv);
3814 /* If the number of iterations is constant, compare against it directly. */
3815 if (TREE_CODE (nit) == INTEGER_CST)
3817 if (!tree_int_cst_lt (nit, period))
3821 /* If not, and if this is the only possible exit of the loop, see whether
3822 we can get a conservative estimate on the number of iterations of the
3823 entire loop and compare against that instead. */
3824 else if (loop_only_exit_p (loop, exit))
3826 double_int period_value, max_niter;
3827 if (!estimated_loop_iterations (loop, true, &max_niter))
3829 period_value = tree_to_double_int (period);
3830 if (double_int_ucmp (max_niter, period_value) >= 0)
3834 /* Otherwise, punt. */
3838 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3839 *bound = aff_combination_to_tree (&bnd);
3843 /* Determines cost of basing replacement of USE on CAND in a condition. */
3846 determine_use_iv_cost_condition (struct ivopts_data *data,
3847 struct iv_use *use, struct iv_cand *cand)
3849 tree bound = NULL_TREE;
3851 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3852 comp_cost elim_cost, express_cost, cost;
3855 /* Only consider real candidates. */
3858 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3862 /* Try iv elimination. */
3863 if (may_eliminate_iv (data, use, cand, &bound))
3865 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3866 /* The bound is a loop invariant, so it will be only computed
3868 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3871 elim_cost = infinite_cost;
3873 /* Try expressing the original giv. If it is compared with an invariant,
3874 note that we cannot get rid of it. */
3875 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
3878 express_cost = get_computation_cost (data, use, cand, false,
3879 &depends_on_express);
3880 fd_ivopts_data = data;
3881 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3883 /* Choose the better approach, preferring the eliminated IV. */
3884 if (compare_costs (elim_cost, express_cost) <= 0)
3887 depends_on = depends_on_elim;
3888 depends_on_elim = NULL;
3892 cost = express_cost;
3893 depends_on = depends_on_express;
3894 depends_on_express = NULL;
3898 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3900 if (depends_on_elim)
3901 BITMAP_FREE (depends_on_elim);
3902 if (depends_on_express)
3903 BITMAP_FREE (depends_on_express);
3905 return !infinite_cost_p (cost);
3908 /* Determines cost of basing replacement of USE on CAND. Returns false
3909 if USE cannot be based on CAND. */
3912 determine_use_iv_cost (struct ivopts_data *data,
3913 struct iv_use *use, struct iv_cand *cand)
3917 case USE_NONLINEAR_EXPR:
3918 return determine_use_iv_cost_generic (data, use, cand);
3921 return determine_use_iv_cost_address (data, use, cand);
3924 return determine_use_iv_cost_condition (data, use, cand);
3931 /* Determines costs of basing the use of the iv on an iv candidate. */
3934 determine_use_iv_costs (struct ivopts_data *data)
3938 struct iv_cand *cand;
3939 bitmap to_clear = BITMAP_ALLOC (NULL);
3941 alloc_use_cost_map (data);
3943 for (i = 0; i < n_iv_uses (data); i++)
3945 use = iv_use (data, i);
3947 if (data->consider_all_candidates)
3949 for (j = 0; j < n_iv_cands (data); j++)
3951 cand = iv_cand (data, j);
3952 determine_use_iv_cost (data, use, cand);
3959 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3961 cand = iv_cand (data, j);
3962 if (!determine_use_iv_cost (data, use, cand))
3963 bitmap_set_bit (to_clear, j);
3966 /* Remove the candidates for that the cost is infinite from
3967 the list of related candidates. */
3968 bitmap_and_compl_into (use->related_cands, to_clear);
3969 bitmap_clear (to_clear);
3973 BITMAP_FREE (to_clear);
3975 if (dump_file && (dump_flags & TDF_DETAILS))
3977 fprintf (dump_file, "Use-candidate costs:\n");
3979 for (i = 0; i < n_iv_uses (data); i++)
3981 use = iv_use (data, i);
3983 fprintf (dump_file, "Use %d:\n", i);
3984 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
3985 for (j = 0; j < use->n_map_members; j++)
3987 if (!use->cost_map[j].cand
3988 || infinite_cost_p (use->cost_map[j].cost))
3991 fprintf (dump_file, " %d\t%d\t%d\t",
3992 use->cost_map[j].cand->id,
3993 use->cost_map[j].cost.cost,
3994 use->cost_map[j].cost.complexity);
3995 if (use->cost_map[j].depends_on)
3996 bitmap_print (dump_file,
3997 use->cost_map[j].depends_on, "","");
3998 fprintf (dump_file, "\n");
4001 fprintf (dump_file, "\n");
4003 fprintf (dump_file, "\n");
4007 /* Determines cost of the candidate CAND. */
4010 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4012 comp_cost cost_base;
4013 unsigned cost, cost_step;
4022 /* There are two costs associated with the candidate -- its increment
4023 and its initialization. The second is almost negligible for any loop
4024 that rolls enough, so we take it just very little into account. */
4026 base = cand->iv->base;
4027 cost_base = force_var_cost (data, base, NULL);
4028 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4030 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4032 /* Prefer the original ivs unless we may gain something by replacing it.
4033 The reason is to make debugging simpler; so this is not relevant for
4034 artificial ivs created by other optimization passes. */
4035 if (cand->pos != IP_ORIGINAL
4036 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4039 /* Prefer not to insert statements into latch unless there are some
4040 already (so that we do not create unnecessary jumps). */
4041 if (cand->pos == IP_END
4042 && empty_block_p (ip_end_pos (data->current_loop)))
4048 /* Determines costs of computation of the candidates. */
4051 determine_iv_costs (struct ivopts_data *data)
4055 if (dump_file && (dump_flags & TDF_DETAILS))
4057 fprintf (dump_file, "Candidate costs:\n");
4058 fprintf (dump_file, " cand\tcost\n");
4061 for (i = 0; i < n_iv_cands (data); i++)
4063 struct iv_cand *cand = iv_cand (data, i);
4065 determine_iv_cost (data, cand);
4067 if (dump_file && (dump_flags & TDF_DETAILS))
4068 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4071 if (dump_file && (dump_flags & TDF_DETAILS))
4072 fprintf (dump_file, "\n");
4075 /* Calculates cost for having SIZE induction variables. */
4078 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4080 /* We add size to the cost, so that we prefer eliminating ivs
4082 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4085 /* For each size of the induction variable set determine the penalty. */
4088 determine_set_costs (struct ivopts_data *data)
4092 gimple_stmt_iterator psi;
4094 struct loop *loop = data->current_loop;
4097 /* We use the following model (definitely improvable, especially the
4098 cost function -- TODO):
4100 We estimate the number of registers available (using MD data), name it A.
4102 We estimate the number of registers used by the loop, name it U. This
4103 number is obtained as the number of loop phi nodes (not counting virtual
4104 registers and bivs) + the number of variables from outside of the loop.
4106 We set a reserve R (free regs that are used for temporary computations,
4107 etc.). For now the reserve is a constant 3.
4109 Let I be the number of induction variables.
4111 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4112 make a lot of ivs without a reason).
4113 -- if A - R < U + I <= A, the cost is I * PRES_COST
4114 -- if U + I > A, the cost is I * PRES_COST and
4115 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4117 if (dump_file && (dump_flags & TDF_DETAILS))
4119 fprintf (dump_file, "Global costs:\n");
4120 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4121 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4122 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4126 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4128 phi = gsi_stmt (psi);
4129 op = PHI_RESULT (phi);
4131 if (!is_gimple_reg (op))
4134 if (get_iv (data, op))
4140 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4142 struct version_info *info = ver_info (data, j);
4144 if (info->inv_id && info->has_nonlin_use)
4148 data->regs_used = n;
4149 if (dump_file && (dump_flags & TDF_DETAILS))
4150 fprintf (dump_file, " regs_used %d\n", n);
4152 if (dump_file && (dump_flags & TDF_DETAILS))
4154 fprintf (dump_file, " cost for size:\n");
4155 fprintf (dump_file, " ivs\tcost\n");
4156 for (j = 0; j <= 2 * target_avail_regs; j++)
4157 fprintf (dump_file, " %d\t%d\n", j,
4158 ivopts_global_cost_for_size (data, j));
4159 fprintf (dump_file, "\n");
4163 /* Returns true if A is a cheaper cost pair than B. */
4166 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4176 cmp = compare_costs (a->cost, b->cost);
4183 /* In case the costs are the same, prefer the cheaper candidate. */
4184 if (a->cand->cost < b->cand->cost)
4190 /* Computes the cost field of IVS structure. */
4193 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4195 comp_cost cost = ivs->cand_use_cost;
4196 cost.cost += ivs->cand_cost;
4197 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4202 /* Remove invariants in set INVS to set IVS. */
4205 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4213 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4215 ivs->n_invariant_uses[iid]--;
4216 if (ivs->n_invariant_uses[iid] == 0)
4221 /* Set USE not to be expressed by any candidate in IVS. */
4224 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4227 unsigned uid = use->id, cid;
4228 struct cost_pair *cp;
4230 cp = ivs->cand_for_use[uid];
4236 ivs->cand_for_use[uid] = NULL;
4237 ivs->n_cand_uses[cid]--;
4239 if (ivs->n_cand_uses[cid] == 0)
4241 bitmap_clear_bit (ivs->cands, cid);
4242 /* Do not count the pseudocandidates. */
4246 ivs->cand_cost -= cp->cand->cost;
4248 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4251 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4253 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4254 iv_ca_recount_cost (data, ivs);
4257 /* Add invariants in set INVS to set IVS. */
4260 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4268 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4270 ivs->n_invariant_uses[iid]++;
4271 if (ivs->n_invariant_uses[iid] == 1)
4276 /* Set cost pair for USE in set IVS to CP. */
4279 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4280 struct iv_use *use, struct cost_pair *cp)
4282 unsigned uid = use->id, cid;
4284 if (ivs->cand_for_use[uid] == cp)
4287 if (ivs->cand_for_use[uid])
4288 iv_ca_set_no_cp (data, ivs, use);
4295 ivs->cand_for_use[uid] = cp;
4296 ivs->n_cand_uses[cid]++;
4297 if (ivs->n_cand_uses[cid] == 1)
4299 bitmap_set_bit (ivs->cands, cid);
4300 /* Do not count the pseudocandidates. */
4304 ivs->cand_cost += cp->cand->cost;
4306 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4309 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4310 iv_ca_set_add_invariants (ivs, cp->depends_on);
4311 iv_ca_recount_cost (data, ivs);
4315 /* Extend set IVS by expressing USE by some of the candidates in it
4319 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4322 struct cost_pair *best_cp = NULL, *cp;
4326 gcc_assert (ivs->upto >= use->id);
4328 if (ivs->upto == use->id)
4334 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4336 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4338 if (cheaper_cost_pair (cp, best_cp))
4342 iv_ca_set_cp (data, ivs, use, best_cp);
4345 /* Get cost for assignment IVS. */
4348 iv_ca_cost (struct iv_ca *ivs)
4350 return (ivs->bad_uses ? infinite_cost : ivs->cost);
4353 /* Returns true if all dependences of CP are among invariants in IVS. */
4356 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4361 if (!cp->depends_on)
4364 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4366 if (ivs->n_invariant_uses[i] == 0)
4373 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4374 it before NEXT_CHANGE. */
4376 static struct iv_ca_delta *
4377 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4378 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4380 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4383 change->old_cp = old_cp;
4384 change->new_cp = new_cp;
4385 change->next_change = next_change;
4390 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4393 static struct iv_ca_delta *
4394 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4396 struct iv_ca_delta *last;
4404 for (last = l1; last->next_change; last = last->next_change)
4406 last->next_change = l2;
4411 /* Returns candidate by that USE is expressed in IVS. */
4413 static struct cost_pair *
4414 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4416 return ivs->cand_for_use[use->id];
4419 /* Reverse the list of changes DELTA, forming the inverse to it. */
4421 static struct iv_ca_delta *
4422 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4424 struct iv_ca_delta *act, *next, *prev = NULL;
4425 struct cost_pair *tmp;
4427 for (act = delta; act; act = next)
4429 next = act->next_change;
4430 act->next_change = prev;
4434 act->old_cp = act->new_cp;
4441 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4442 reverted instead. */
4445 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4446 struct iv_ca_delta *delta, bool forward)
4448 struct cost_pair *from, *to;
4449 struct iv_ca_delta *act;
4452 delta = iv_ca_delta_reverse (delta);
4454 for (act = delta; act; act = act->next_change)
4458 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4459 iv_ca_set_cp (data, ivs, act->use, to);
4463 iv_ca_delta_reverse (delta);
4466 /* Returns true if CAND is used in IVS. */
4469 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4471 return ivs->n_cand_uses[cand->id] > 0;
4474 /* Returns number of induction variable candidates in the set IVS. */
4477 iv_ca_n_cands (struct iv_ca *ivs)
4479 return ivs->n_cands;
4482 /* Free the list of changes DELTA. */
4485 iv_ca_delta_free (struct iv_ca_delta **delta)
4487 struct iv_ca_delta *act, *next;
4489 for (act = *delta; act; act = next)
4491 next = act->next_change;
4498 /* Allocates new iv candidates assignment. */
4500 static struct iv_ca *
4501 iv_ca_new (struct ivopts_data *data)
4503 struct iv_ca *nw = XNEW (struct iv_ca);
4507 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4508 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4509 nw->cands = BITMAP_ALLOC (NULL);
4512 nw->cand_use_cost = zero_cost;
4514 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4515 nw->cost = zero_cost;
4520 /* Free memory occupied by the set IVS. */
4523 iv_ca_free (struct iv_ca **ivs)
4525 free ((*ivs)->cand_for_use);
4526 free ((*ivs)->n_cand_uses);
4527 BITMAP_FREE ((*ivs)->cands);
4528 free ((*ivs)->n_invariant_uses);
4533 /* Dumps IVS to FILE. */
4536 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4538 const char *pref = " invariants ";
4540 comp_cost cost = iv_ca_cost (ivs);
4542 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4543 bitmap_print (file, ivs->cands, " candidates ","\n");
4545 for (i = 1; i <= data->max_inv_id; i++)
4546 if (ivs->n_invariant_uses[i])
4548 fprintf (file, "%s%d", pref, i);
4551 fprintf (file, "\n");
4554 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4555 new set, and store differences in DELTA. Number of induction variables
4556 in the new set is stored to N_IVS. */
4559 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4560 struct iv_cand *cand, struct iv_ca_delta **delta,
4566 struct cost_pair *old_cp, *new_cp;
4569 for (i = 0; i < ivs->upto; i++)
4571 use = iv_use (data, i);
4572 old_cp = iv_ca_cand_for_use (ivs, use);
4575 && old_cp->cand == cand)
4578 new_cp = get_use_iv_cost (data, use, cand);
4582 if (!iv_ca_has_deps (ivs, new_cp))
4585 if (!cheaper_cost_pair (new_cp, old_cp))
4588 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4591 iv_ca_delta_commit (data, ivs, *delta, true);
4592 cost = iv_ca_cost (ivs);
4594 *n_ivs = iv_ca_n_cands (ivs);
4595 iv_ca_delta_commit (data, ivs, *delta, false);
4600 /* Try narrowing set IVS by removing CAND. Return the cost of
4601 the new set and store the differences in DELTA. */
4604 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4605 struct iv_cand *cand, struct iv_ca_delta **delta)
4609 struct cost_pair *old_cp, *new_cp, *cp;
4611 struct iv_cand *cnd;
4615 for (i = 0; i < n_iv_uses (data); i++)
4617 use = iv_use (data, i);
4619 old_cp = iv_ca_cand_for_use (ivs, use);
4620 if (old_cp->cand != cand)
4625 if (data->consider_all_candidates)
4627 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4632 cnd = iv_cand (data, ci);
4634 cp = get_use_iv_cost (data, use, cnd);
4637 if (!iv_ca_has_deps (ivs, cp))
4640 if (!cheaper_cost_pair (cp, new_cp))
4648 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4653 cnd = iv_cand (data, ci);
4655 cp = get_use_iv_cost (data, use, cnd);
4658 if (!iv_ca_has_deps (ivs, cp))
4661 if (!cheaper_cost_pair (cp, new_cp))
4670 iv_ca_delta_free (delta);
4671 return infinite_cost;
4674 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4677 iv_ca_delta_commit (data, ivs, *delta, true);
4678 cost = iv_ca_cost (ivs);
4679 iv_ca_delta_commit (data, ivs, *delta, false);
4684 /* Try optimizing the set of candidates IVS by removing candidates different
4685 from to EXCEPT_CAND from it. Return cost of the new set, and store
4686 differences in DELTA. */
4689 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4690 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4693 struct iv_ca_delta *act_delta, *best_delta;
4695 comp_cost best_cost, acost;
4696 struct iv_cand *cand;
4699 best_cost = iv_ca_cost (ivs);
4701 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4703 cand = iv_cand (data, i);
4705 if (cand == except_cand)
4708 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4710 if (compare_costs (acost, best_cost) < 0)
4713 iv_ca_delta_free (&best_delta);
4714 best_delta = act_delta;
4717 iv_ca_delta_free (&act_delta);
4726 /* Recurse to possibly remove other unnecessary ivs. */
4727 iv_ca_delta_commit (data, ivs, best_delta, true);
4728 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4729 iv_ca_delta_commit (data, ivs, best_delta, false);
4730 *delta = iv_ca_delta_join (best_delta, *delta);
4734 /* Tries to extend the sets IVS in the best possible way in order
4735 to express the USE. */
4738 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4741 comp_cost best_cost, act_cost;
4744 struct iv_cand *cand;
4745 struct iv_ca_delta *best_delta = NULL, *act_delta;
4746 struct cost_pair *cp;
4748 iv_ca_add_use (data, ivs, use);
4749 best_cost = iv_ca_cost (ivs);
4751 cp = iv_ca_cand_for_use (ivs, use);
4754 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4755 iv_ca_set_no_cp (data, ivs, use);
4758 /* First try important candidates not based on any memory object. Only if
4759 this fails, try the specific ones. Rationale -- in loops with many
4760 variables the best choice often is to use just one generic biv. If we
4761 added here many ivs specific to the uses, the optimization algorithm later
4762 would be likely to get stuck in a local minimum, thus causing us to create
4763 too many ivs. The approach from few ivs to more seems more likely to be
4764 successful -- starting from few ivs, replacing an expensive use by a
4765 specific iv should always be a win. */
4766 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4768 cand = iv_cand (data, i);
4770 if (cand->iv->base_object != NULL_TREE)
4773 if (iv_ca_cand_used_p (ivs, cand))
4776 cp = get_use_iv_cost (data, use, cand);
4780 iv_ca_set_cp (data, ivs, use, cp);
4781 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4782 iv_ca_set_no_cp (data, ivs, use);
4783 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4785 if (compare_costs (act_cost, best_cost) < 0)
4787 best_cost = act_cost;
4789 iv_ca_delta_free (&best_delta);
4790 best_delta = act_delta;
4793 iv_ca_delta_free (&act_delta);
4796 if (infinite_cost_p (best_cost))
4798 for (i = 0; i < use->n_map_members; i++)
4800 cp = use->cost_map + i;
4805 /* Already tried this. */
4806 if (cand->important && cand->iv->base_object == NULL_TREE)
4809 if (iv_ca_cand_used_p (ivs, cand))
4813 iv_ca_set_cp (data, ivs, use, cp);
4814 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4815 iv_ca_set_no_cp (data, ivs, use);
4816 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4819 if (compare_costs (act_cost, best_cost) < 0)
4821 best_cost = act_cost;
4824 iv_ca_delta_free (&best_delta);
4825 best_delta = act_delta;
4828 iv_ca_delta_free (&act_delta);
4832 iv_ca_delta_commit (data, ivs, best_delta, true);
4833 iv_ca_delta_free (&best_delta);
4835 return !infinite_cost_p (best_cost);
4838 /* Finds an initial assignment of candidates to uses. */
4840 static struct iv_ca *
4841 get_initial_solution (struct ivopts_data *data)
4843 struct iv_ca *ivs = iv_ca_new (data);
4846 for (i = 0; i < n_iv_uses (data); i++)
4847 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4856 /* Tries to improve set of induction variables IVS. */
4859 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4862 comp_cost acost, best_cost = iv_ca_cost (ivs);
4863 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4864 struct iv_cand *cand;
4866 /* Try extending the set of induction variables by one. */
4867 for (i = 0; i < n_iv_cands (data); i++)
4869 cand = iv_cand (data, i);
4871 if (iv_ca_cand_used_p (ivs, cand))
4874 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4878 /* If we successfully added the candidate and the set is small enough,
4879 try optimizing it by removing other candidates. */
4880 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4882 iv_ca_delta_commit (data, ivs, act_delta, true);
4883 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4884 iv_ca_delta_commit (data, ivs, act_delta, false);
4885 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4888 if (compare_costs (acost, best_cost) < 0)
4891 iv_ca_delta_free (&best_delta);
4892 best_delta = act_delta;
4895 iv_ca_delta_free (&act_delta);
4900 /* Try removing the candidates from the set instead. */
4901 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4903 /* Nothing more we can do. */
4908 iv_ca_delta_commit (data, ivs, best_delta, true);
4909 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4910 iv_ca_delta_free (&best_delta);
4914 /* Attempts to find the optimal set of induction variables. We do simple
4915 greedy heuristic -- we try to replace at most one candidate in the selected
4916 solution and remove the unused ivs while this improves the cost. */
4918 static struct iv_ca *
4919 find_optimal_iv_set (struct ivopts_data *data)
4925 /* Get the initial solution. */
4926 set = get_initial_solution (data);
4929 if (dump_file && (dump_flags & TDF_DETAILS))
4930 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4934 if (dump_file && (dump_flags & TDF_DETAILS))
4936 fprintf (dump_file, "Initial set of candidates:\n");
4937 iv_ca_dump (data, dump_file, set);
4940 while (try_improve_iv_set (data, set))
4942 if (dump_file && (dump_flags & TDF_DETAILS))
4944 fprintf (dump_file, "Improved to:\n");
4945 iv_ca_dump (data, dump_file, set);
4949 if (dump_file && (dump_flags & TDF_DETAILS))
4951 comp_cost cost = iv_ca_cost (set);
4952 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4955 for (i = 0; i < n_iv_uses (data); i++)
4957 use = iv_use (data, i);
4958 use->selected = iv_ca_cand_for_use (set, use)->cand;
4964 /* Creates a new induction variable corresponding to CAND. */
4967 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4969 gimple_stmt_iterator incr_pos;
4979 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
4983 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
4988 /* Mark that the iv is preserved. */
4989 name_info (data, cand->var_before)->preserve_biv = true;
4990 name_info (data, cand->var_after)->preserve_biv = true;
4992 /* Rewrite the increment so that it uses var_before directly. */
4993 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4998 gimple_add_tmp_var (cand->var_before);
4999 add_referenced_var (cand->var_before);
5001 base = unshare_expr (cand->iv->base);
5003 create_iv (base, unshare_expr (cand->iv->step),
5004 cand->var_before, data->current_loop,
5005 &incr_pos, after, &cand->var_before, &cand->var_after);
5008 /* Creates new induction variables described in SET. */
5011 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5014 struct iv_cand *cand;
5017 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5019 cand = iv_cand (data, i);
5020 create_new_iv (data, cand);
5024 /* Returns the phi-node in BB with result RESULT. */
5027 get_phi_with_result (basic_block bb, tree result)
5029 gimple_stmt_iterator i = gsi_start_phis (bb);
5031 for (; !gsi_end_p (i); gsi_next (&i))
5032 if (gimple_phi_result (gsi_stmt (i)) == result)
5033 return gsi_stmt (i);
5040 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5041 is true, remove also the ssa name defined by the statement. */
5044 remove_statement (gimple stmt, bool including_defined_name)
5046 if (gimple_code (stmt) == GIMPLE_PHI)
5048 gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5049 gimple_phi_result (stmt));
5050 gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5051 remove_phi_node (&bsi, including_defined_name);
5055 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5056 gsi_remove (&bsi, true);
5057 release_defs (stmt);
5061 /* Rewrites USE (definition of iv used in a nonlinear expression)
5062 using candidate CAND. */
5065 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5066 struct iv_use *use, struct iv_cand *cand)
5071 gimple_stmt_iterator bsi;
5073 /* An important special case -- if we are asked to express value of
5074 the original iv by itself, just exit; there is no need to
5075 introduce a new computation (that might also need casting the
5076 variable to unsigned and back). */
5077 if (cand->pos == IP_ORIGINAL
5078 && cand->incremented_at == use->stmt)
5080 tree step, ctype, utype;
5081 enum tree_code incr_code = PLUS_EXPR, old_code;
5083 gcc_assert (is_gimple_assign (use->stmt));
5084 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5086 step = cand->iv->step;
5087 ctype = TREE_TYPE (step);
5088 utype = TREE_TYPE (cand->var_after);
5089 if (TREE_CODE (step) == NEGATE_EXPR)
5091 incr_code = MINUS_EXPR;
5092 step = TREE_OPERAND (step, 0);
5095 /* Check whether we may leave the computation unchanged.
5096 This is the case only if it does not rely on other
5097 computations in the loop -- otherwise, the computation
5098 we rely upon may be removed in remove_unused_ivs,
5099 thus leading to ICE. */
5100 old_code = gimple_assign_rhs_code (use->stmt);
5101 if (old_code == PLUS_EXPR
5102 || old_code == MINUS_EXPR
5103 || old_code == POINTER_PLUS_EXPR)
5105 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5106 op = gimple_assign_rhs2 (use->stmt);
5107 else if (old_code != MINUS_EXPR
5108 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5109 op = gimple_assign_rhs1 (use->stmt);
5117 && (TREE_CODE (op) == INTEGER_CST
5118 || operand_equal_p (op, step, 0)))
5121 /* Otherwise, add the necessary computations to express
5123 op = fold_convert (ctype, cand->var_before);
5124 comp = fold_convert (utype,
5125 build2 (incr_code, ctype, op,
5126 unshare_expr (step)));
5130 comp = get_computation (data->current_loop, use, cand);
5131 gcc_assert (comp != NULL_TREE);
5134 switch (gimple_code (use->stmt))
5137 tgt = PHI_RESULT (use->stmt);
5139 /* If we should keep the biv, do not replace it. */
5140 if (name_info (data, tgt)->preserve_biv)
5143 bsi = gsi_after_labels (gimple_bb (use->stmt));
5147 tgt = gimple_assign_lhs (use->stmt);
5148 bsi = gsi_for_stmt (use->stmt);
5155 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5156 true, GSI_SAME_STMT);
5158 if (gimple_code (use->stmt) == GIMPLE_PHI)
5160 ass = gimple_build_assign (tgt, op);
5161 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5162 remove_statement (use->stmt, false);
5166 gimple_assign_set_rhs_from_tree (&bsi, op);
5167 use->stmt = gsi_stmt (bsi);
5171 /* Replaces ssa name in index IDX by its basic variable. Callback for
5175 idx_remove_ssa_names (tree base, tree *idx,
5176 void *data ATTRIBUTE_UNUSED)
5180 if (TREE_CODE (*idx) == SSA_NAME)
5181 *idx = SSA_NAME_VAR (*idx);
5183 if (TREE_CODE (base) == ARRAY_REF)
5185 op = &TREE_OPERAND (base, 2);
5187 && TREE_CODE (*op) == SSA_NAME)
5188 *op = SSA_NAME_VAR (*op);
5189 op = &TREE_OPERAND (base, 3);
5191 && TREE_CODE (*op) == SSA_NAME)
5192 *op = SSA_NAME_VAR (*op);
5198 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5201 unshare_and_remove_ssa_names (tree ref)
5203 ref = unshare_expr (ref);
5204 for_each_index (&ref, idx_remove_ssa_names, NULL);
5209 /* Extract the alias analysis info for the memory reference REF. There are
5210 several ways how this information may be stored and what precisely is
5211 its semantics depending on the type of the reference, but there always is
5212 somewhere hidden one _DECL node that is used to determine the set of
5213 virtual operands for the reference. The code below deciphers this jungle
5214 and extracts this single useful piece of information. */
5217 get_ref_tag (tree ref, tree orig)
5219 tree var = get_base_address (ref);
5220 tree aref = NULL_TREE, tag, sv;
5221 HOST_WIDE_INT offset, size, maxsize;
5223 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5225 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5233 if (TREE_CODE (var) == INDIRECT_REF)
5235 /* If the base is a dereference of a pointer, first check its name memory
5236 tag. If it does not have one, use its symbol memory tag. */
5237 var = TREE_OPERAND (var, 0);
5238 if (TREE_CODE (var) != SSA_NAME)
5241 if (SSA_NAME_PTR_INFO (var))
5243 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5248 var = SSA_NAME_VAR (var);
5249 tag = symbol_mem_tag (var);
5250 gcc_assert (tag != NULL_TREE);
5258 tag = symbol_mem_tag (var);
5266 /* Copies the reference information from OLD_REF to NEW_REF. */
5269 copy_ref_info (tree new_ref, tree old_ref)
5271 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5272 copy_mem_ref_info (new_ref, old_ref);
5275 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5276 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5280 /* Rewrites USE (address that is an iv) using candidate CAND. */
5283 rewrite_use_address (struct ivopts_data *data,
5284 struct iv_use *use, struct iv_cand *cand)
5287 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5291 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5293 unshare_aff_combination (&aff);
5295 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5296 copy_ref_info (ref, *use->op_p);
5300 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5304 rewrite_use_compare (struct ivopts_data *data,
5305 struct iv_use *use, struct iv_cand *cand)
5307 tree comp, *var_p, op, bound;
5308 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5309 enum tree_code compare;
5310 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5316 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5317 tree var_type = TREE_TYPE (var);
5319 compare = iv_elimination_compare (data, use);
5320 bound = unshare_expr (fold_convert (var_type, bound));
5321 op = force_gimple_operand_gsi (&bsi, bound, true, NULL_TREE,
5322 true, GSI_SAME_STMT);
5324 gimple_cond_set_lhs (use->stmt, var);
5325 gimple_cond_set_code (use->stmt, compare);
5326 gimple_cond_set_rhs (use->stmt, op);
5330 /* The induction variable elimination failed; just express the original
5332 comp = get_computation (data->current_loop, use, cand);
5333 gcc_assert (comp != NULL_TREE);
5335 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5338 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5339 true, GSI_SAME_STMT);
5342 /* Rewrites USE using candidate CAND. */
5345 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5347 push_stmt_changes (&use->stmt);
5351 case USE_NONLINEAR_EXPR:
5352 rewrite_use_nonlinear_expr (data, use, cand);
5356 rewrite_use_address (data, use, cand);
5360 rewrite_use_compare (data, use, cand);
5367 pop_stmt_changes (&use->stmt);
5370 /* Rewrite the uses using the selected induction variables. */
5373 rewrite_uses (struct ivopts_data *data)
5376 struct iv_cand *cand;
5379 for (i = 0; i < n_iv_uses (data); i++)
5381 use = iv_use (data, i);
5382 cand = use->selected;
5385 rewrite_use (data, use, cand);
5389 /* Removes the ivs that are not used after rewriting. */
5392 remove_unused_ivs (struct ivopts_data *data)
5397 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5399 struct version_info *info;
5401 info = ver_info (data, j);
5403 && !integer_zerop (info->iv->step)
5405 && !info->iv->have_use_for
5406 && !info->preserve_biv)
5407 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5411 /* Frees data allocated by the optimization of a single loop. */
5414 free_loop_data (struct ivopts_data *data)
5422 pointer_map_destroy (data->niters);
5423 data->niters = NULL;
5426 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5428 struct version_info *info;
5430 info = ver_info (data, i);
5434 info->has_nonlin_use = false;
5435 info->preserve_biv = false;
5438 bitmap_clear (data->relevant);
5439 bitmap_clear (data->important_candidates);
5441 for (i = 0; i < n_iv_uses (data); i++)
5443 struct iv_use *use = iv_use (data, i);
5446 BITMAP_FREE (use->related_cands);
5447 for (j = 0; j < use->n_map_members; j++)
5448 if (use->cost_map[j].depends_on)
5449 BITMAP_FREE (use->cost_map[j].depends_on);
5450 free (use->cost_map);
5453 VEC_truncate (iv_use_p, data->iv_uses, 0);
5455 for (i = 0; i < n_iv_cands (data); i++)
5457 struct iv_cand *cand = iv_cand (data, i);
5461 if (cand->depends_on)
5462 BITMAP_FREE (cand->depends_on);
5465 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5467 if (data->version_info_size < num_ssa_names)
5469 data->version_info_size = 2 * num_ssa_names;
5470 free (data->version_info);
5471 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5474 data->max_inv_id = 0;
5476 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5477 SET_DECL_RTL (obj, NULL_RTX);
5479 VEC_truncate (tree, decl_rtl_to_reset, 0);
5482 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5486 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5488 free_loop_data (data);
5489 free (data->version_info);
5490 BITMAP_FREE (data->relevant);
5491 BITMAP_FREE (data->important_candidates);
5493 VEC_free (tree, heap, decl_rtl_to_reset);
5494 VEC_free (iv_use_p, heap, data->iv_uses);
5495 VEC_free (iv_cand_p, heap, data->iv_candidates);
5498 /* Optimizes the LOOP. Returns true if anything changed. */
5501 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5503 bool changed = false;
5504 struct iv_ca *iv_ca;
5507 gcc_assert (!data->niters);
5508 data->current_loop = loop;
5509 data->speed = optimize_loop_for_speed_p (loop);
5511 if (dump_file && (dump_flags & TDF_DETAILS))
5513 fprintf (dump_file, "Processing loop %d\n", loop->num);
5515 exit = single_dom_exit (loop);
5518 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5519 exit->src->index, exit->dest->index);
5520 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5521 fprintf (dump_file, "\n");
5524 fprintf (dump_file, "\n");
5527 /* For each ssa name determines whether it behaves as an induction variable
5529 if (!find_induction_variables (data))
5532 /* Finds interesting uses (item 1). */
5533 find_interesting_uses (data);
5534 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5537 /* Finds candidates for the induction variables (item 2). */
5538 find_iv_candidates (data);
5540 /* Calculates the costs (item 3, part 1). */
5541 determine_use_iv_costs (data);
5542 determine_iv_costs (data);
5543 determine_set_costs (data);
5545 /* Find the optimal set of induction variables (item 3, part 2). */
5546 iv_ca = find_optimal_iv_set (data);
5551 /* Create the new induction variables (item 4, part 1). */
5552 create_new_ivs (data, iv_ca);
5553 iv_ca_free (&iv_ca);
5555 /* Rewrite the uses (item 4, part 2). */
5556 rewrite_uses (data);
5558 /* Remove the ivs that are unused after rewriting. */
5559 remove_unused_ivs (data);
5561 /* We have changed the structure of induction variables; it might happen
5562 that definitions in the scev database refer to some of them that were
5567 free_loop_data (data);
5572 /* Main entry point. Optimizes induction variables in loops. */
5575 tree_ssa_iv_optimize (void)
5578 struct ivopts_data data;
5581 tree_ssa_iv_optimize_init (&data);
5583 /* Optimize the loops starting with the innermost ones. */
5584 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5586 if (dump_file && (dump_flags & TDF_DETAILS))
5587 flow_loop_dump (loop, dump_file, NULL, 1);
5589 tree_ssa_iv_optimize_loop (&data, loop);
5592 tree_ssa_iv_optimize_finalize (&data);