1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
86 #include "pointer-set.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
135 /* Cost of a computation. */
138 int cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
148 /* The candidate - cost pair. */
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand *selected;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_BEFORE_USE, /* Immediately before a specific use. */
185 IP_AFTER_USE, /* Immediately after a specific use. */
186 IP_ORIGINAL /* The original biv. */
189 /* The induction variable candidate. */
192 unsigned id; /* The number of the candidate. */
193 bool important; /* Whether this is an "important" candidate, i.e. such
194 that it should be considered by all uses. */
195 enum iv_position pos; /* Where it is computed. */
196 gimple incremented_at;/* For original biv, the statement where it is
198 tree var_before; /* The variable used for it before increment. */
199 tree var_after; /* The variable used for it after increment. */
200 struct iv *iv; /* The value of the candidate. NULL for
201 "pseudocandidate" used to indicate the possibility
202 to replace the final value of an iv by direct
203 computation of the value. */
204 unsigned cost; /* Cost of the candidate. */
205 unsigned cost_step; /* Cost of the candidate's increment operation. */
206 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
207 where it is incremented. */
208 bitmap depends_on; /* The list of invariants that are used in step of the
212 /* The data used by the induction variable optimizations. */
214 typedef struct iv_use *iv_use_p;
216 DEF_VEC_ALLOC_P(iv_use_p,heap);
218 typedef struct iv_cand *iv_cand_p;
219 DEF_VEC_P(iv_cand_p);
220 DEF_VEC_ALLOC_P(iv_cand_p,heap);
224 /* The currently optimized loop. */
225 struct loop *current_loop;
227 /* Numbers of iterations for all exits of the current loop. */
228 struct pointer_map_t *niters;
230 /* Number of registers used in it. */
233 /* The size of version_info array allocated. */
234 unsigned version_info_size;
236 /* The array of information for the ssa names. */
237 struct version_info *version_info;
239 /* The bitmap of indices in version_info whose value was changed. */
242 /* The uses of induction variables. */
243 VEC(iv_use_p,heap) *iv_uses;
245 /* The candidates. */
246 VEC(iv_cand_p,heap) *iv_candidates;
248 /* A bitmap of important candidates. */
249 bitmap important_candidates;
251 /* The maximum invariant id. */
254 /* Whether to consider just related and important candidates when replacing a
256 bool consider_all_candidates;
258 /* Are we optimizing for speed? */
262 /* An assignment of iv candidates to uses. */
266 /* The number of uses covered by the assignment. */
269 /* Number of uses that cannot be expressed by the candidates in the set. */
272 /* Candidate assigned to a use, together with the related costs. */
273 struct cost_pair **cand_for_use;
275 /* Number of times each candidate is used. */
276 unsigned *n_cand_uses;
278 /* The candidates used. */
281 /* The number of candidates in the set. */
284 /* Total number of registers needed. */
287 /* Total cost of expressing uses. */
288 comp_cost cand_use_cost;
290 /* Total cost of candidates. */
293 /* Number of times each invariant is used. */
294 unsigned *n_invariant_uses;
296 /* Total cost of the assignment. */
300 /* Difference of two iv candidate assignments. */
307 /* An old assignment (for rollback purposes). */
308 struct cost_pair *old_cp;
310 /* A new assignment. */
311 struct cost_pair *new_cp;
313 /* Next change in the list. */
314 struct iv_ca_delta *next_change;
317 /* Bound on number of candidates below that all candidates are considered. */
319 #define CONSIDER_ALL_CANDIDATES_BOUND \
320 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
322 /* If there are more iv occurrences, we just give up (it is quite unlikely that
323 optimizing such a loop would help, and it would take ages). */
325 #define MAX_CONSIDERED_USES \
326 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
328 /* If there are at most this number of ivs in the set, try removing unnecessary
329 ivs from the set always. */
331 #define ALWAYS_PRUNE_CAND_SET_BOUND \
332 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
334 /* The list of trees for that the decl_rtl field must be reset is stored
337 static VEC(tree,heap) *decl_rtl_to_reset;
339 /* Number of uses recorded in DATA. */
341 static inline unsigned
342 n_iv_uses (struct ivopts_data *data)
344 return VEC_length (iv_use_p, data->iv_uses);
347 /* Ith use recorded in DATA. */
349 static inline struct iv_use *
350 iv_use (struct ivopts_data *data, unsigned i)
352 return VEC_index (iv_use_p, data->iv_uses, i);
355 /* Number of candidates recorded in DATA. */
357 static inline unsigned
358 n_iv_cands (struct ivopts_data *data)
360 return VEC_length (iv_cand_p, data->iv_candidates);
363 /* Ith candidate recorded in DATA. */
365 static inline struct iv_cand *
366 iv_cand (struct ivopts_data *data, unsigned i)
368 return VEC_index (iv_cand_p, data->iv_candidates, i);
371 /* The single loop exit if it dominates the latch, NULL otherwise. */
374 single_dom_exit (struct loop *loop)
376 edge exit = single_exit (loop);
381 if (!just_once_each_iteration_p (loop, exit->src))
387 /* Dumps information about the induction variable IV to FILE. */
389 extern void dump_iv (FILE *, struct iv *);
391 dump_iv (FILE *file, struct iv *iv)
395 fprintf (file, "ssa name ");
396 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
397 fprintf (file, "\n");
400 fprintf (file, " type ");
401 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
402 fprintf (file, "\n");
406 fprintf (file, " base ");
407 print_generic_expr (file, iv->base, TDF_SLIM);
408 fprintf (file, "\n");
410 fprintf (file, " step ");
411 print_generic_expr (file, iv->step, TDF_SLIM);
412 fprintf (file, "\n");
416 fprintf (file, " invariant ");
417 print_generic_expr (file, iv->base, TDF_SLIM);
418 fprintf (file, "\n");
423 fprintf (file, " base object ");
424 print_generic_expr (file, iv->base_object, TDF_SLIM);
425 fprintf (file, "\n");
429 fprintf (file, " is a biv\n");
432 /* Dumps information about the USE to FILE. */
434 extern void dump_use (FILE *, struct iv_use *);
436 dump_use (FILE *file, struct iv_use *use)
438 fprintf (file, "use %d\n", use->id);
442 case USE_NONLINEAR_EXPR:
443 fprintf (file, " generic\n");
447 fprintf (file, " address\n");
451 fprintf (file, " compare\n");
458 fprintf (file, " in statement ");
459 print_gimple_stmt (file, use->stmt, 0, 0);
460 fprintf (file, "\n");
462 fprintf (file, " at position ");
464 print_generic_expr (file, *use->op_p, TDF_SLIM);
465 fprintf (file, "\n");
467 dump_iv (file, use->iv);
469 if (use->related_cands)
471 fprintf (file, " related candidates ");
472 dump_bitmap (file, use->related_cands);
476 /* Dumps information about the uses to FILE. */
478 extern void dump_uses (FILE *, struct ivopts_data *);
480 dump_uses (FILE *file, struct ivopts_data *data)
485 for (i = 0; i < n_iv_uses (data); i++)
487 use = iv_use (data, i);
489 dump_use (file, use);
490 fprintf (file, "\n");
494 /* Dumps information about induction variable candidate CAND to FILE. */
496 extern void dump_cand (FILE *, struct iv_cand *);
498 dump_cand (FILE *file, struct iv_cand *cand)
500 struct iv *iv = cand->iv;
502 fprintf (file, "candidate %d%s\n",
503 cand->id, cand->important ? " (important)" : "");
505 if (cand->depends_on)
507 fprintf (file, " depends on ");
508 dump_bitmap (file, cand->depends_on);
513 fprintf (file, " final value replacement\n");
520 fprintf (file, " incremented before exit test\n");
524 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
528 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
532 fprintf (file, " incremented at end\n");
536 fprintf (file, " original biv\n");
543 /* Returns the info for ssa version VER. */
545 static inline struct version_info *
546 ver_info (struct ivopts_data *data, unsigned ver)
548 return data->version_info + ver;
551 /* Returns the info for ssa name NAME. */
553 static inline struct version_info *
554 name_info (struct ivopts_data *data, tree name)
556 return ver_info (data, SSA_NAME_VERSION (name));
559 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
563 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
565 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
569 if (sbb == loop->latch)
575 return stmt == last_stmt (bb);
578 /* Returns true if STMT if after the place where the original induction
579 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
580 if the positions are identical. */
583 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
585 basic_block cand_bb = gimple_bb (cand->incremented_at);
586 basic_block stmt_bb = gimple_bb (stmt);
588 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
591 if (stmt_bb != cand_bb)
595 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
597 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
600 /* Returns true if STMT if after the place where the induction variable
601 CAND is incremented in LOOP. */
604 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
612 return stmt_after_ip_normal_pos (loop, stmt);
616 return stmt_after_inc_pos (cand, stmt, false);
619 return stmt_after_inc_pos (cand, stmt, true);
626 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
629 abnormal_ssa_name_p (tree exp)
634 if (TREE_CODE (exp) != SSA_NAME)
637 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
640 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
641 abnormal phi node. Callback for for_each_index. */
644 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
645 void *data ATTRIBUTE_UNUSED)
647 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
649 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
651 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
655 return !abnormal_ssa_name_p (*index);
658 /* Returns true if EXPR contains a ssa name that occurs in an
659 abnormal phi node. */
662 contains_abnormal_ssa_name_p (tree expr)
665 enum tree_code_class codeclass;
670 code = TREE_CODE (expr);
671 codeclass = TREE_CODE_CLASS (code);
673 if (code == SSA_NAME)
674 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
676 if (code == INTEGER_CST
677 || is_gimple_min_invariant (expr))
680 if (code == ADDR_EXPR)
681 return !for_each_index (&TREE_OPERAND (expr, 0),
682 idx_contains_abnormal_ssa_name_p,
689 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
694 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
706 /* Returns tree describing number of iterations determined from
707 EXIT of DATA->current_loop, or NULL if something goes wrong. */
710 niter_for_exit (struct ivopts_data *data, edge exit)
712 struct tree_niter_desc desc;
718 data->niters = pointer_map_create ();
722 slot = pointer_map_contains (data->niters, exit);
726 /* Try to determine number of iterations. We must know it
727 unconditionally (i.e., without possibility of # of iterations
728 being zero). Also, we cannot safely work with ssa names that
729 appear in phi nodes on abnormal edges, so that we do not create
730 overlapping life ranges for them (PR 27283). */
731 if (number_of_iterations_exit (data->current_loop,
733 && integer_zerop (desc.may_be_zero)
734 && !contains_abnormal_ssa_name_p (desc.niter))
739 *pointer_map_insert (data->niters, exit) = niter;
742 niter = (tree) *slot;
747 /* Returns tree describing number of iterations determined from
748 single dominating exit of DATA->current_loop, or NULL if something
752 niter_for_single_dom_exit (struct ivopts_data *data)
754 edge exit = single_dom_exit (data->current_loop);
759 return niter_for_exit (data, exit);
762 /* Initializes data structures used by the iv optimization pass, stored
766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
768 data->version_info_size = 2 * num_ssa_names;
769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770 data->relevant = BITMAP_ALLOC (NULL);
771 data->important_candidates = BITMAP_ALLOC (NULL);
772 data->max_inv_id = 0;
774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
779 /* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
783 determine_base_object (tree expr)
785 enum tree_code code = TREE_CODE (expr);
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
791 if (CONVERT_EXPR_P (expr))
792 return determine_base_object (TREE_OPERAND (expr, 0));
794 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
803 obj = TREE_OPERAND (expr, 0);
804 base = get_base_address (obj);
809 if (TREE_CODE (base) == INDIRECT_REF)
810 return determine_base_object (TREE_OPERAND (base, 0));
812 return fold_convert (ptr_type_node,
813 build_fold_addr_expr (base));
815 case POINTER_PLUS_EXPR:
816 return determine_base_object (TREE_OPERAND (expr, 0));
820 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
824 return fold_convert (ptr_type_node, expr);
828 /* Allocates an induction variable with given initial value BASE and step STEP
832 alloc_iv (tree base, tree step)
834 struct iv *iv = XCNEW (struct iv);
835 gcc_assert (step != NULL_TREE);
838 iv->base_object = determine_base_object (base);
841 iv->have_use_for = false;
843 iv->ssa_name = NULL_TREE;
848 /* Sets STEP and BASE for induction variable IV. */
851 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
853 struct version_info *info = name_info (data, iv);
855 gcc_assert (!info->iv);
857 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
858 info->iv = alloc_iv (base, step);
859 info->iv->ssa_name = iv;
862 /* Finds induction variable declaration for VAR. */
865 get_iv (struct ivopts_data *data, tree var)
868 tree type = TREE_TYPE (var);
870 if (!POINTER_TYPE_P (type)
871 && !INTEGRAL_TYPE_P (type))
874 if (!name_info (data, var)->iv)
876 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
879 || !flow_bb_inside_loop_p (data->current_loop, bb))
880 set_iv (data, var, var, build_int_cst (type, 0));
883 return name_info (data, var)->iv;
886 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
887 not define a simple affine biv with nonzero step. */
890 determine_biv_step (gimple phi)
892 struct loop *loop = gimple_bb (phi)->loop_father;
893 tree name = PHI_RESULT (phi);
896 if (!is_gimple_reg (name))
899 if (!simple_iv (loop, loop, name, &iv, true))
902 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
905 /* Finds basic ivs. */
908 find_bivs (struct ivopts_data *data)
911 tree step, type, base;
913 struct loop *loop = data->current_loop;
914 gimple_stmt_iterator psi;
916 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
918 phi = gsi_stmt (psi);
920 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
923 step = determine_biv_step (phi);
927 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
928 base = expand_simple_operations (base);
929 if (contains_abnormal_ssa_name_p (base)
930 || contains_abnormal_ssa_name_p (step))
933 type = TREE_TYPE (PHI_RESULT (phi));
934 base = fold_convert (type, base);
937 if (POINTER_TYPE_P (type))
938 step = fold_convert (sizetype, step);
940 step = fold_convert (type, step);
943 set_iv (data, PHI_RESULT (phi), base, step);
950 /* Marks basic ivs. */
953 mark_bivs (struct ivopts_data *data)
957 struct iv *iv, *incr_iv;
958 struct loop *loop = data->current_loop;
960 gimple_stmt_iterator psi;
962 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
964 phi = gsi_stmt (psi);
966 iv = get_iv (data, PHI_RESULT (phi));
970 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
971 incr_iv = get_iv (data, var);
975 /* If the increment is in the subloop, ignore it. */
976 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
977 if (incr_bb->loop_father != data->current_loop
978 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
982 incr_iv->biv_p = true;
986 /* Checks whether STMT defines a linear induction variable and stores its
990 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
993 struct loop *loop = data->current_loop;
995 iv->base = NULL_TREE;
996 iv->step = NULL_TREE;
998 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1001 lhs = gimple_assign_lhs (stmt);
1002 if (TREE_CODE (lhs) != SSA_NAME)
1005 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1007 iv->base = expand_simple_operations (iv->base);
1009 if (contains_abnormal_ssa_name_p (iv->base)
1010 || contains_abnormal_ssa_name_p (iv->step))
1016 /* Finds general ivs in statement STMT. */
1019 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1023 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1026 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1029 /* Finds general ivs in basic block BB. */
1032 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1034 gimple_stmt_iterator bsi;
1036 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1037 find_givs_in_stmt (data, gsi_stmt (bsi));
1040 /* Finds general ivs. */
1043 find_givs (struct ivopts_data *data)
1045 struct loop *loop = data->current_loop;
1046 basic_block *body = get_loop_body_in_dom_order (loop);
1049 for (i = 0; i < loop->num_nodes; i++)
1050 find_givs_in_bb (data, body[i]);
1054 /* For each ssa name defined in LOOP determines whether it is an induction
1055 variable and if so, its initial value and step. */
1058 find_induction_variables (struct ivopts_data *data)
1063 if (!find_bivs (data))
1069 if (dump_file && (dump_flags & TDF_DETAILS))
1071 tree niter = niter_for_single_dom_exit (data);
1075 fprintf (dump_file, " number of iterations ");
1076 print_generic_expr (dump_file, niter, TDF_SLIM);
1077 fprintf (dump_file, "\n\n");
1080 fprintf (dump_file, "Induction variables:\n\n");
1082 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1084 if (ver_info (data, i)->iv)
1085 dump_iv (dump_file, ver_info (data, i)->iv);
1092 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1094 static struct iv_use *
1095 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1096 gimple stmt, enum use_type use_type)
1098 struct iv_use *use = XCNEW (struct iv_use);
1100 use->id = n_iv_uses (data);
1101 use->type = use_type;
1105 use->related_cands = BITMAP_ALLOC (NULL);
1107 /* To avoid showing ssa name in the dumps, if it was not reset by the
1109 iv->ssa_name = NULL_TREE;
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1112 dump_use (dump_file, use);
1114 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1119 /* Checks whether OP is a loop-level invariant and if so, records it.
1120 NONLINEAR_USE is true if the invariant is used in a way we do not
1121 handle specially. */
1124 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1127 struct version_info *info;
1129 if (TREE_CODE (op) != SSA_NAME
1130 || !is_gimple_reg (op))
1133 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1135 && flow_bb_inside_loop_p (data->current_loop, bb))
1138 info = name_info (data, op);
1140 info->has_nonlin_use |= nonlinear_use;
1142 info->inv_id = ++data->max_inv_id;
1143 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1146 /* Checks whether the use OP is interesting and if so, records it. */
1148 static struct iv_use *
1149 find_interesting_uses_op (struct ivopts_data *data, tree op)
1156 if (TREE_CODE (op) != SSA_NAME)
1159 iv = get_iv (data, op);
1163 if (iv->have_use_for)
1165 use = iv_use (data, iv->use_id);
1167 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1171 if (integer_zerop (iv->step))
1173 record_invariant (data, op, true);
1176 iv->have_use_for = true;
1178 civ = XNEW (struct iv);
1181 stmt = SSA_NAME_DEF_STMT (op);
1182 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1183 || is_gimple_assign (stmt));
1185 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1186 iv->use_id = use->id;
1191 /* Given a condition in statement STMT, checks whether it is a compare
1192 of an induction variable and an invariant. If this is the case,
1193 CONTROL_VAR is set to location of the iv, BOUND to the location of
1194 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1195 induction variable descriptions, and true is returned. If this is not
1196 the case, CONTROL_VAR and BOUND are set to the arguments of the
1197 condition and false is returned. */
1200 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1201 tree **control_var, tree **bound,
1202 struct iv **iv_var, struct iv **iv_bound)
1204 /* The objects returned when COND has constant operands. */
1205 static struct iv const_iv;
1207 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1208 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1211 if (gimple_code (stmt) == GIMPLE_COND)
1213 op0 = gimple_cond_lhs_ptr (stmt);
1214 op1 = gimple_cond_rhs_ptr (stmt);
1218 op0 = gimple_assign_rhs1_ptr (stmt);
1219 op1 = gimple_assign_rhs2_ptr (stmt);
1222 zero = integer_zero_node;
1223 const_iv.step = integer_zero_node;
1225 if (TREE_CODE (*op0) == SSA_NAME)
1226 iv0 = get_iv (data, *op0);
1227 if (TREE_CODE (*op1) == SSA_NAME)
1228 iv1 = get_iv (data, *op1);
1230 /* Exactly one of the compared values must be an iv, and the other one must
1235 if (integer_zerop (iv0->step))
1237 /* Control variable may be on the other side. */
1238 tmp_op = op0; op0 = op1; op1 = tmp_op;
1239 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1241 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1245 *control_var = op0;;
1256 /* Checks whether the condition in STMT is interesting and if so,
1260 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1262 tree *var_p, *bound_p;
1263 struct iv *var_iv, *civ;
1265 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1267 find_interesting_uses_op (data, *var_p);
1268 find_interesting_uses_op (data, *bound_p);
1272 civ = XNEW (struct iv);
1274 record_use (data, NULL, civ, stmt, USE_COMPARE);
1277 /* Returns true if expression EXPR is obviously invariant in LOOP,
1278 i.e. if all its operands are defined outside of the LOOP. LOOP
1279 should not be the function body. */
1282 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1287 gcc_assert (loop_depth (loop) > 0);
1289 if (is_gimple_min_invariant (expr))
1292 if (TREE_CODE (expr) == SSA_NAME)
1294 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1296 && flow_bb_inside_loop_p (loop, def_bb))
1305 len = TREE_OPERAND_LENGTH (expr);
1306 for (i = 0; i < len; i++)
1307 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1313 /* Returns true if statement STMT is obviously invariant in LOOP,
1314 i.e. if all its operands on the RHS are defined outside of the LOOP.
1315 LOOP should not be the function body. */
1318 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1323 gcc_assert (loop_depth (loop) > 0);
1325 lhs = gimple_get_lhs (stmt);
1326 for (i = 0; i < gimple_num_ops (stmt); i++)
1328 tree op = gimple_op (stmt, i);
1329 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1336 /* Cumulates the steps of indices into DATA and replaces their values with the
1337 initial ones. Returns false when the value of the index cannot be determined.
1338 Callback for for_each_index. */
1340 struct ifs_ivopts_data
1342 struct ivopts_data *ivopts_data;
1348 idx_find_step (tree base, tree *idx, void *data)
1350 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1352 tree step, iv_base, iv_step, lbound, off;
1353 struct loop *loop = dta->ivopts_data->current_loop;
1355 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1356 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1359 /* If base is a component ref, require that the offset of the reference
1361 if (TREE_CODE (base) == COMPONENT_REF)
1363 off = component_ref_field_offset (base);
1364 return expr_invariant_in_loop_p (loop, off);
1367 /* If base is array, first check whether we will be able to move the
1368 reference out of the loop (in order to take its address in strength
1369 reduction). In order for this to work we need both lower bound
1370 and step to be loop invariants. */
1371 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1373 /* Moreover, for a range, the size needs to be invariant as well. */
1374 if (TREE_CODE (base) == ARRAY_RANGE_REF
1375 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1378 step = array_ref_element_size (base);
1379 lbound = array_ref_low_bound (base);
1381 if (!expr_invariant_in_loop_p (loop, step)
1382 || !expr_invariant_in_loop_p (loop, lbound))
1386 if (TREE_CODE (*idx) != SSA_NAME)
1389 iv = get_iv (dta->ivopts_data, *idx);
1393 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1394 *&x[0], which is not folded and does not trigger the
1395 ARRAY_REF path below. */
1398 if (integer_zerop (iv->step))
1401 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1403 step = array_ref_element_size (base);
1405 /* We only handle addresses whose step is an integer constant. */
1406 if (TREE_CODE (step) != INTEGER_CST)
1410 /* The step for pointer arithmetics already is 1 byte. */
1411 step = build_int_cst (sizetype, 1);
1415 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1416 sizetype, &iv_base, &iv_step, dta->stmt,
1419 /* The index might wrap. */
1423 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1424 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1429 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1430 object is passed to it in DATA. */
1433 idx_record_use (tree base, tree *idx,
1436 struct ivopts_data *data = (struct ivopts_data *) vdata;
1437 find_interesting_uses_op (data, *idx);
1438 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1440 find_interesting_uses_op (data, array_ref_element_size (base));
1441 find_interesting_uses_op (data, array_ref_low_bound (base));
1446 /* If we can prove that TOP = cst * BOT for some constant cst,
1447 store cst to MUL and return true. Otherwise return false.
1448 The returned value is always sign-extended, regardless of the
1449 signedness of TOP and BOT. */
1452 constant_multiple_of (tree top, tree bot, double_int *mul)
1455 enum tree_code code;
1456 double_int res, p0, p1;
1457 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1462 if (operand_equal_p (top, bot, 0))
1464 *mul = double_int_one;
1468 code = TREE_CODE (top);
1472 mby = TREE_OPERAND (top, 1);
1473 if (TREE_CODE (mby) != INTEGER_CST)
1476 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1479 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1485 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1486 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1489 if (code == MINUS_EXPR)
1490 p1 = double_int_neg (p1);
1491 *mul = double_int_sext (double_int_add (p0, p1), precision);
1495 if (TREE_CODE (bot) != INTEGER_CST)
1498 p0 = double_int_sext (tree_to_double_int (top), precision);
1499 p1 = double_int_sext (tree_to_double_int (bot), precision);
1500 if (double_int_zero_p (p1))
1502 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1504 return double_int_zero_p (res);
1511 /* Returns true if memory reference REF with step STEP may be unaligned. */
1514 may_be_unaligned_p (tree ref, tree step)
1518 HOST_WIDE_INT bitsize;
1519 HOST_WIDE_INT bitpos;
1521 enum machine_mode mode;
1522 int unsignedp, volatilep;
1523 unsigned base_align;
1525 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1526 thus they are not misaligned. */
1527 if (TREE_CODE (ref) == TARGET_MEM_REF)
1530 /* The test below is basically copy of what expr.c:normal_inner_ref
1531 does to check whether the object must be loaded by parts when
1532 STRICT_ALIGNMENT is true. */
1533 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1534 &unsignedp, &volatilep, true);
1535 base_type = TREE_TYPE (base);
1536 base_align = TYPE_ALIGN (base_type);
1538 if (mode != BLKmode)
1541 tree al = build_int_cst (TREE_TYPE (step),
1542 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1544 if (base_align < GET_MODE_ALIGNMENT (mode)
1545 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1546 || bitpos % BITS_PER_UNIT != 0)
1549 if (!constant_multiple_of (step, al, &mul))
1556 /* Return true if EXPR may be non-addressable. */
1559 may_be_nonaddressable_p (tree expr)
1561 switch (TREE_CODE (expr))
1563 case TARGET_MEM_REF:
1564 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1565 target, thus they are always addressable. */
1569 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1570 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1572 case VIEW_CONVERT_EXPR:
1573 /* This kind of view-conversions may wrap non-addressable objects
1574 and make them look addressable. After some processing the
1575 non-addressability may be uncovered again, causing ADDR_EXPRs
1576 of inappropriate objects to be built. */
1577 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1578 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1581 /* ... fall through ... */
1584 case ARRAY_RANGE_REF:
1585 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1597 /* Finds addresses in *OP_P inside STMT. */
1600 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1602 tree base = *op_p, step = build_int_cst (sizetype, 0);
1604 struct ifs_ivopts_data ifs_ivopts_data;
1606 /* Do not play with volatile memory references. A bit too conservative,
1607 perhaps, but safe. */
1608 if (gimple_has_volatile_ops (stmt))
1611 /* Ignore bitfields for now. Not really something terribly complicated
1613 if (TREE_CODE (base) == BIT_FIELD_REF)
1616 base = unshare_expr (base);
1618 if (TREE_CODE (base) == TARGET_MEM_REF)
1620 tree type = build_pointer_type (TREE_TYPE (base));
1624 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1626 civ = get_iv (data, TMR_BASE (base));
1630 TMR_BASE (base) = civ->base;
1633 if (TMR_INDEX (base)
1634 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1636 civ = get_iv (data, TMR_INDEX (base));
1640 TMR_INDEX (base) = civ->base;
1645 if (TMR_STEP (base))
1646 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1648 step = fold_build2 (PLUS_EXPR, type, step, astep);
1652 if (integer_zerop (step))
1654 base = tree_mem_ref_addr (type, base);
1658 ifs_ivopts_data.ivopts_data = data;
1659 ifs_ivopts_data.stmt = stmt;
1660 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1661 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1662 || integer_zerop (ifs_ivopts_data.step))
1664 step = ifs_ivopts_data.step;
1666 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1667 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1669 /* Check that the base expression is addressable. This needs
1670 to be done after substituting bases of IVs into it. */
1671 if (may_be_nonaddressable_p (base))
1674 /* Moreover, on strict alignment platforms, check that it is
1675 sufficiently aligned. */
1676 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1679 base = build_fold_addr_expr (base);
1681 /* Substituting bases of IVs into the base expression might
1682 have caused folding opportunities. */
1683 if (TREE_CODE (base) == ADDR_EXPR)
1685 tree *ref = &TREE_OPERAND (base, 0);
1686 while (handled_component_p (*ref))
1687 ref = &TREE_OPERAND (*ref, 0);
1688 if (TREE_CODE (*ref) == INDIRECT_REF)
1689 *ref = fold_indirect_ref (*ref);
1693 civ = alloc_iv (base, step);
1694 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1698 for_each_index (op_p, idx_record_use, data);
1701 /* Finds and records invariants used in STMT. */
1704 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1707 use_operand_p use_p;
1710 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1712 op = USE_FROM_PTR (use_p);
1713 record_invariant (data, op, false);
1717 /* Finds interesting uses of induction variables in the statement STMT. */
1720 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1723 tree op, *lhs, *rhs;
1725 use_operand_p use_p;
1726 enum tree_code code;
1728 find_invariants_stmt (data, stmt);
1730 if (gimple_code (stmt) == GIMPLE_COND)
1732 find_interesting_uses_cond (data, stmt);
1736 if (is_gimple_assign (stmt))
1738 lhs = gimple_assign_lhs_ptr (stmt);
1739 rhs = gimple_assign_rhs1_ptr (stmt);
1741 if (TREE_CODE (*lhs) == SSA_NAME)
1743 /* If the statement defines an induction variable, the uses are not
1744 interesting by themselves. */
1746 iv = get_iv (data, *lhs);
1748 if (iv && !integer_zerop (iv->step))
1752 code = gimple_assign_rhs_code (stmt);
1753 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1754 && (REFERENCE_CLASS_P (*rhs)
1755 || is_gimple_val (*rhs)))
1757 if (REFERENCE_CLASS_P (*rhs))
1758 find_interesting_uses_address (data, stmt, rhs);
1760 find_interesting_uses_op (data, *rhs);
1762 if (REFERENCE_CLASS_P (*lhs))
1763 find_interesting_uses_address (data, stmt, lhs);
1766 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1768 find_interesting_uses_cond (data, stmt);
1772 /* TODO -- we should also handle address uses of type
1774 memory = call (whatever);
1781 if (gimple_code (stmt) == GIMPLE_PHI
1782 && gimple_bb (stmt) == data->current_loop->header)
1784 iv = get_iv (data, PHI_RESULT (stmt));
1786 if (iv && !integer_zerop (iv->step))
1790 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1792 op = USE_FROM_PTR (use_p);
1794 if (TREE_CODE (op) != SSA_NAME)
1797 iv = get_iv (data, op);
1801 find_interesting_uses_op (data, op);
1805 /* Finds interesting uses of induction variables outside of loops
1806 on loop exit edge EXIT. */
1809 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1812 gimple_stmt_iterator psi;
1815 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1817 phi = gsi_stmt (psi);
1818 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1819 if (is_gimple_reg (def))
1820 find_interesting_uses_op (data, def);
1824 /* Finds uses of the induction variables that are interesting. */
1827 find_interesting_uses (struct ivopts_data *data)
1830 gimple_stmt_iterator bsi;
1831 basic_block *body = get_loop_body (data->current_loop);
1833 struct version_info *info;
1836 if (dump_file && (dump_flags & TDF_DETAILS))
1837 fprintf (dump_file, "Uses:\n\n");
1839 for (i = 0; i < data->current_loop->num_nodes; i++)
1844 FOR_EACH_EDGE (e, ei, bb->succs)
1845 if (e->dest != EXIT_BLOCK_PTR
1846 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1847 find_interesting_uses_outside (data, e);
1849 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1850 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1851 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1852 if (!is_gimple_debug (gsi_stmt (bsi)))
1853 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1856 if (dump_file && (dump_flags & TDF_DETAILS))
1860 fprintf (dump_file, "\n");
1862 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1864 info = ver_info (data, i);
1867 fprintf (dump_file, " ");
1868 print_generic_expr (dump_file, info->name, TDF_SLIM);
1869 fprintf (dump_file, " is invariant (%d)%s\n",
1870 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1874 fprintf (dump_file, "\n");
1880 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1881 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1882 we are at the top-level of the processed address. */
1885 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1886 unsigned HOST_WIDE_INT *offset)
1888 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1889 enum tree_code code;
1890 tree type, orig_type = TREE_TYPE (expr);
1891 unsigned HOST_WIDE_INT off0, off1, st;
1892 tree orig_expr = expr;
1896 type = TREE_TYPE (expr);
1897 code = TREE_CODE (expr);
1903 if (!cst_and_fits_in_hwi (expr)
1904 || integer_zerop (expr))
1907 *offset = int_cst_value (expr);
1908 return build_int_cst (orig_type, 0);
1910 case POINTER_PLUS_EXPR:
1913 op0 = TREE_OPERAND (expr, 0);
1914 op1 = TREE_OPERAND (expr, 1);
1916 op0 = strip_offset_1 (op0, false, false, &off0);
1917 op1 = strip_offset_1 (op1, false, false, &off1);
1919 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1920 if (op0 == TREE_OPERAND (expr, 0)
1921 && op1 == TREE_OPERAND (expr, 1))
1924 if (integer_zerop (op1))
1926 else if (integer_zerop (op0))
1928 if (code == MINUS_EXPR)
1929 expr = fold_build1 (NEGATE_EXPR, type, op1);
1934 expr = fold_build2 (code, type, op0, op1);
1936 return fold_convert (orig_type, expr);
1939 op1 = TREE_OPERAND (expr, 1);
1940 if (!cst_and_fits_in_hwi (op1))
1943 op0 = TREE_OPERAND (expr, 0);
1944 op0 = strip_offset_1 (op0, false, false, &off0);
1945 if (op0 == TREE_OPERAND (expr, 0))
1948 *offset = off0 * int_cst_value (op1);
1949 if (integer_zerop (op0))
1952 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1954 return fold_convert (orig_type, expr);
1957 case ARRAY_RANGE_REF:
1961 step = array_ref_element_size (expr);
1962 if (!cst_and_fits_in_hwi (step))
1965 st = int_cst_value (step);
1966 op1 = TREE_OPERAND (expr, 1);
1967 op1 = strip_offset_1 (op1, false, false, &off1);
1968 *offset = off1 * st;
1971 && integer_zerop (op1))
1973 /* Strip the component reference completely. */
1974 op0 = TREE_OPERAND (expr, 0);
1975 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1985 tmp = component_ref_field_offset (expr);
1987 && cst_and_fits_in_hwi (tmp))
1989 /* Strip the component reference completely. */
1990 op0 = TREE_OPERAND (expr, 0);
1991 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1992 *offset = off0 + int_cst_value (tmp);
1998 op0 = TREE_OPERAND (expr, 0);
1999 op0 = strip_offset_1 (op0, true, true, &off0);
2002 if (op0 == TREE_OPERAND (expr, 0))
2005 expr = build_fold_addr_expr (op0);
2006 return fold_convert (orig_type, expr);
2009 inside_addr = false;
2016 /* Default handling of expressions for that we want to recurse into
2017 the first operand. */
2018 op0 = TREE_OPERAND (expr, 0);
2019 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2022 if (op0 == TREE_OPERAND (expr, 0)
2023 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2026 expr = copy_node (expr);
2027 TREE_OPERAND (expr, 0) = op0;
2029 TREE_OPERAND (expr, 1) = op1;
2031 /* Inside address, we might strip the top level component references,
2032 thus changing type of the expression. Handling of ADDR_EXPR
2034 expr = fold_convert (orig_type, expr);
2039 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2042 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2044 return strip_offset_1 (expr, false, false, offset);
2047 /* Returns variant of TYPE that can be used as base for different uses.
2048 We return unsigned type with the same precision, which avoids problems
2052 generic_type_for (tree type)
2054 if (POINTER_TYPE_P (type))
2055 return unsigned_type_for (type);
2057 if (TYPE_UNSIGNED (type))
2060 return unsigned_type_for (type);
2063 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2064 the bitmap to that we should store it. */
2066 static struct ivopts_data *fd_ivopts_data;
2068 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2070 bitmap *depends_on = (bitmap *) data;
2071 struct version_info *info;
2073 if (TREE_CODE (*expr_p) != SSA_NAME)
2075 info = name_info (fd_ivopts_data, *expr_p);
2077 if (!info->inv_id || info->has_nonlin_use)
2081 *depends_on = BITMAP_ALLOC (NULL);
2082 bitmap_set_bit (*depends_on, info->inv_id);
2087 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2088 position to POS. If USE is not NULL, the candidate is set as related to
2089 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2090 replacement of the final value of the iv by a direct computation. */
2092 static struct iv_cand *
2093 add_candidate_1 (struct ivopts_data *data,
2094 tree base, tree step, bool important, enum iv_position pos,
2095 struct iv_use *use, gimple incremented_at)
2098 struct iv_cand *cand = NULL;
2099 tree type, orig_type;
2103 orig_type = TREE_TYPE (base);
2104 type = generic_type_for (orig_type);
2105 if (type != orig_type)
2107 base = fold_convert (type, base);
2108 step = fold_convert (type, step);
2112 for (i = 0; i < n_iv_cands (data); i++)
2114 cand = iv_cand (data, i);
2116 if (cand->pos != pos)
2119 if (cand->incremented_at != incremented_at
2120 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2121 && cand->ainc_use != use))
2135 if (operand_equal_p (base, cand->iv->base, 0)
2136 && operand_equal_p (step, cand->iv->step, 0))
2140 if (i == n_iv_cands (data))
2142 cand = XCNEW (struct iv_cand);
2148 cand->iv = alloc_iv (base, step);
2151 if (pos != IP_ORIGINAL && cand->iv)
2153 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2154 cand->var_after = cand->var_before;
2156 cand->important = important;
2157 cand->incremented_at = incremented_at;
2158 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2161 && TREE_CODE (step) != INTEGER_CST)
2163 fd_ivopts_data = data;
2164 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2167 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2168 cand->ainc_use = use;
2170 cand->ainc_use = NULL;
2172 if (dump_file && (dump_flags & TDF_DETAILS))
2173 dump_cand (dump_file, cand);
2176 if (important && !cand->important)
2178 cand->important = true;
2179 if (dump_file && (dump_flags & TDF_DETAILS))
2180 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2185 bitmap_set_bit (use->related_cands, i);
2186 if (dump_file && (dump_flags & TDF_DETAILS))
2187 fprintf (dump_file, "Candidate %d is related to use %d\n",
2194 /* Returns true if incrementing the induction variable at the end of the LOOP
2197 The purpose is to avoid splitting latch edge with a biv increment, thus
2198 creating a jump, possibly confusing other optimization passes and leaving
2199 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2200 is not available (so we do not have a better alternative), or if the latch
2201 edge is already nonempty. */
2204 allow_ip_end_pos_p (struct loop *loop)
2206 if (!ip_normal_pos (loop))
2209 if (!empty_block_p (ip_end_pos (loop)))
2215 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2216 Important field is set to IMPORTANT. */
2219 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2220 bool important, struct iv_use *use)
2222 basic_block use_bb = gimple_bb (use->stmt);
2223 enum machine_mode mem_mode;
2224 unsigned HOST_WIDE_INT cstepi;
2226 /* If we insert the increment in any position other than the standard
2227 ones, we must ensure that it is incremented once per iteration.
2228 It must not be in an inner nested loop, or one side of an if
2230 if (use_bb->loop_father != data->current_loop
2231 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2232 || stmt_could_throw_p (use->stmt)
2233 || !cst_and_fits_in_hwi (step))
2236 cstepi = int_cst_value (step);
2238 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2239 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2240 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2242 enum tree_code code = MINUS_EXPR;
2244 tree new_step = step;
2246 if (POINTER_TYPE_P (TREE_TYPE (base)))
2248 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2249 code = POINTER_PLUS_EXPR;
2252 new_step = fold_convert (TREE_TYPE (base), new_step);
2253 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2254 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2257 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2258 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2260 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2265 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2266 position to POS. If USE is not NULL, the candidate is set as related to
2267 it. The candidate computation is scheduled on all available positions. */
2270 add_candidate (struct ivopts_data *data,
2271 tree base, tree step, bool important, struct iv_use *use)
2273 if (ip_normal_pos (data->current_loop))
2274 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2275 if (ip_end_pos (data->current_loop)
2276 && allow_ip_end_pos_p (data->current_loop))
2277 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2279 if (use != NULL && use->type == USE_ADDRESS)
2280 add_autoinc_candidates (data, base, step, important, use);
2283 /* Add a standard "0 + 1 * iteration" iv candidate for a
2284 type with SIZE bits. */
2287 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2290 tree type = lang_hooks.types.type_for_size (size, true);
2291 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2295 /* Adds standard iv candidates. */
2298 add_standard_iv_candidates (struct ivopts_data *data)
2300 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2302 /* The same for a double-integer type if it is still fast enough. */
2303 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2304 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2308 /* Adds candidates bases on the old induction variable IV. */
2311 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2315 struct iv_cand *cand;
2317 add_candidate (data, iv->base, iv->step, true, NULL);
2319 /* The same, but with initial value zero. */
2320 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2321 add_candidate (data, size_int (0), iv->step, true, NULL);
2323 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2324 iv->step, true, NULL);
2326 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2327 if (gimple_code (phi) == GIMPLE_PHI)
2329 /* Additionally record the possibility of leaving the original iv
2331 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2332 cand = add_candidate_1 (data,
2333 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2334 SSA_NAME_DEF_STMT (def));
2335 cand->var_before = iv->ssa_name;
2336 cand->var_after = def;
2340 /* Adds candidates based on the old induction variables. */
2343 add_old_ivs_candidates (struct ivopts_data *data)
2349 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2351 iv = ver_info (data, i)->iv;
2352 if (iv && iv->biv_p && !integer_zerop (iv->step))
2353 add_old_iv_candidates (data, iv);
2357 /* Adds candidates based on the value of the induction variable IV and USE. */
2360 add_iv_value_candidates (struct ivopts_data *data,
2361 struct iv *iv, struct iv_use *use)
2363 unsigned HOST_WIDE_INT offset;
2367 add_candidate (data, iv->base, iv->step, false, use);
2369 /* The same, but with initial value zero. Make such variable important,
2370 since it is generic enough so that possibly many uses may be based
2372 basetype = TREE_TYPE (iv->base);
2373 if (POINTER_TYPE_P (basetype))
2374 basetype = sizetype;
2375 add_candidate (data, build_int_cst (basetype, 0),
2376 iv->step, true, use);
2378 /* Third, try removing the constant offset. Make sure to even
2379 add a candidate for &a[0] vs. (T *)&a. */
2380 base = strip_offset (iv->base, &offset);
2382 || base != iv->base)
2383 add_candidate (data, base, iv->step, false, use);
2386 /* Adds candidates based on the uses. */
2389 add_derived_ivs_candidates (struct ivopts_data *data)
2393 for (i = 0; i < n_iv_uses (data); i++)
2395 struct iv_use *use = iv_use (data, i);
2402 case USE_NONLINEAR_EXPR:
2405 /* Just add the ivs based on the value of the iv used here. */
2406 add_iv_value_candidates (data, use->iv, use);
2415 /* Record important candidates and add them to related_cands bitmaps
2419 record_important_candidates (struct ivopts_data *data)
2424 for (i = 0; i < n_iv_cands (data); i++)
2426 struct iv_cand *cand = iv_cand (data, i);
2428 if (cand->important)
2429 bitmap_set_bit (data->important_candidates, i);
2432 data->consider_all_candidates = (n_iv_cands (data)
2433 <= CONSIDER_ALL_CANDIDATES_BOUND);
2435 if (data->consider_all_candidates)
2437 /* We will not need "related_cands" bitmaps in this case,
2438 so release them to decrease peak memory consumption. */
2439 for (i = 0; i < n_iv_uses (data); i++)
2441 use = iv_use (data, i);
2442 BITMAP_FREE (use->related_cands);
2447 /* Add important candidates to the related_cands bitmaps. */
2448 for (i = 0; i < n_iv_uses (data); i++)
2449 bitmap_ior_into (iv_use (data, i)->related_cands,
2450 data->important_candidates);
2454 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2455 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2456 we allocate a simple list to every use. */
2459 alloc_use_cost_map (struct ivopts_data *data)
2461 unsigned i, size, s, j;
2463 for (i = 0; i < n_iv_uses (data); i++)
2465 struct iv_use *use = iv_use (data, i);
2468 if (data->consider_all_candidates)
2469 size = n_iv_cands (data);
2473 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2478 /* Round up to the power of two, so that moduling by it is fast. */
2479 for (size = 1; size < s; size <<= 1)
2483 use->n_map_members = size;
2484 use->cost_map = XCNEWVEC (struct cost_pair, size);
2488 /* Returns description of computation cost of expression whose runtime
2489 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2492 new_cost (unsigned runtime, unsigned complexity)
2496 cost.cost = runtime;
2497 cost.complexity = complexity;
2502 /* Adds costs COST1 and COST2. */
2505 add_costs (comp_cost cost1, comp_cost cost2)
2507 cost1.cost += cost2.cost;
2508 cost1.complexity += cost2.complexity;
2512 /* Subtracts costs COST1 and COST2. */
2515 sub_costs (comp_cost cost1, comp_cost cost2)
2517 cost1.cost -= cost2.cost;
2518 cost1.complexity -= cost2.complexity;
2523 /* Returns a negative number if COST1 < COST2, a positive number if
2524 COST1 > COST2, and 0 if COST1 = COST2. */
2527 compare_costs (comp_cost cost1, comp_cost cost2)
2529 if (cost1.cost == cost2.cost)
2530 return cost1.complexity - cost2.complexity;
2532 return cost1.cost - cost2.cost;
2535 /* Returns true if COST is infinite. */
2538 infinite_cost_p (comp_cost cost)
2540 return cost.cost == INFTY;
2543 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2544 on invariants DEPENDS_ON and that the value used in expressing it
2548 set_use_iv_cost (struct ivopts_data *data,
2549 struct iv_use *use, struct iv_cand *cand,
2550 comp_cost cost, bitmap depends_on, tree value)
2554 if (infinite_cost_p (cost))
2556 BITMAP_FREE (depends_on);
2560 if (data->consider_all_candidates)
2562 use->cost_map[cand->id].cand = cand;
2563 use->cost_map[cand->id].cost = cost;
2564 use->cost_map[cand->id].depends_on = depends_on;
2565 use->cost_map[cand->id].value = value;
2569 /* n_map_members is a power of two, so this computes modulo. */
2570 s = cand->id & (use->n_map_members - 1);
2571 for (i = s; i < use->n_map_members; i++)
2572 if (!use->cost_map[i].cand)
2574 for (i = 0; i < s; i++)
2575 if (!use->cost_map[i].cand)
2581 use->cost_map[i].cand = cand;
2582 use->cost_map[i].cost = cost;
2583 use->cost_map[i].depends_on = depends_on;
2584 use->cost_map[i].value = value;
2587 /* Gets cost of (USE, CANDIDATE) pair. */
2589 static struct cost_pair *
2590 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2591 struct iv_cand *cand)
2594 struct cost_pair *ret;
2599 if (data->consider_all_candidates)
2601 ret = use->cost_map + cand->id;
2608 /* n_map_members is a power of two, so this computes modulo. */
2609 s = cand->id & (use->n_map_members - 1);
2610 for (i = s; i < use->n_map_members; i++)
2611 if (use->cost_map[i].cand == cand)
2612 return use->cost_map + i;
2614 for (i = 0; i < s; i++)
2615 if (use->cost_map[i].cand == cand)
2616 return use->cost_map + i;
2621 /* Returns estimate on cost of computing SEQ. */
2624 seq_cost (rtx seq, bool speed)
2629 for (; seq; seq = NEXT_INSN (seq))
2631 set = single_set (seq);
2633 cost += rtx_cost (set, SET,speed);
2641 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2643 produce_memory_decl_rtl (tree obj, int *regno)
2645 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2649 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2651 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2652 x = gen_rtx_SYMBOL_REF (Pmode, name);
2653 SET_SYMBOL_REF_DECL (x, obj);
2654 x = gen_rtx_MEM (DECL_MODE (obj), x);
2655 set_mem_addr_space (x, as);
2656 targetm.encode_section_info (obj, x, true);
2660 x = gen_raw_REG (Pmode, (*regno)++);
2661 x = gen_rtx_MEM (DECL_MODE (obj), x);
2662 set_mem_addr_space (x, as);
2668 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2669 walk_tree. DATA contains the actual fake register number. */
2672 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2674 tree obj = NULL_TREE;
2676 int *regno = (int *) data;
2678 switch (TREE_CODE (*expr_p))
2681 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2682 handled_component_p (*expr_p);
2683 expr_p = &TREE_OPERAND (*expr_p, 0))
2686 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2687 x = produce_memory_decl_rtl (obj, regno);
2692 obj = SSA_NAME_VAR (*expr_p);
2693 if (!DECL_RTL_SET_P (obj))
2694 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2703 if (DECL_RTL_SET_P (obj))
2706 if (DECL_MODE (obj) == BLKmode)
2707 x = produce_memory_decl_rtl (obj, regno);
2709 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2719 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2720 SET_DECL_RTL (obj, x);
2726 /* Determines cost of the computation of EXPR. */
2729 computation_cost (tree expr, bool speed)
2732 tree type = TREE_TYPE (expr);
2734 /* Avoid using hard regs in ways which may be unsupported. */
2735 int regno = LAST_VIRTUAL_REGISTER + 1;
2736 enum function_frequency real_frequency = cfun->function_frequency;
2738 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2739 crtl->maybe_hot_insn_p = speed;
2740 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2742 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2745 default_rtl_profile ();
2746 cfun->function_frequency = real_frequency;
2748 cost = seq_cost (seq, speed);
2750 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2751 TYPE_ADDR_SPACE (type), speed);
2756 /* Returns variable containing the value of candidate CAND at statement AT. */
2759 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2761 if (stmt_after_increment (loop, cand, stmt))
2762 return cand->var_after;
2764 return cand->var_before;
2767 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2768 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2771 tree_int_cst_sign_bit (const_tree t)
2773 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2774 unsigned HOST_WIDE_INT w;
2776 if (bitno < HOST_BITS_PER_WIDE_INT)
2777 w = TREE_INT_CST_LOW (t);
2780 w = TREE_INT_CST_HIGH (t);
2781 bitno -= HOST_BITS_PER_WIDE_INT;
2784 return (w >> bitno) & 1;
2787 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2788 same precision that is at least as wide as the precision of TYPE, stores
2789 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2793 determine_common_wider_type (tree *a, tree *b)
2795 tree wider_type = NULL;
2797 tree atype = TREE_TYPE (*a);
2799 if (CONVERT_EXPR_P (*a))
2801 suba = TREE_OPERAND (*a, 0);
2802 wider_type = TREE_TYPE (suba);
2803 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2809 if (CONVERT_EXPR_P (*b))
2811 subb = TREE_OPERAND (*b, 0);
2812 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2823 /* Determines the expression by that USE is expressed from induction variable
2824 CAND at statement AT in LOOP. The expression is stored in a decomposed
2825 form into AFF. Returns false if USE cannot be expressed using CAND. */
2828 get_computation_aff (struct loop *loop,
2829 struct iv_use *use, struct iv_cand *cand, gimple at,
2830 struct affine_tree_combination *aff)
2832 tree ubase = use->iv->base;
2833 tree ustep = use->iv->step;
2834 tree cbase = cand->iv->base;
2835 tree cstep = cand->iv->step, cstep_common;
2836 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2837 tree common_type, var;
2839 aff_tree cbase_aff, var_aff;
2842 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2844 /* We do not have a precision to express the values of use. */
2848 var = var_at_stmt (loop, cand, at);
2849 uutype = unsigned_type_for (utype);
2851 /* If the conversion is not noop, perform it. */
2852 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2854 cstep = fold_convert (uutype, cstep);
2855 cbase = fold_convert (uutype, cbase);
2856 var = fold_convert (uutype, var);
2859 if (!constant_multiple_of (ustep, cstep, &rat))
2862 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2863 type, we achieve better folding by computing their difference in this
2864 wider type, and cast the result to UUTYPE. We do not need to worry about
2865 overflows, as all the arithmetics will in the end be performed in UUTYPE
2867 common_type = determine_common_wider_type (&ubase, &cbase);
2869 /* use = ubase - ratio * cbase + ratio * var. */
2870 tree_to_aff_combination (ubase, common_type, aff);
2871 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2872 tree_to_aff_combination (var, uutype, &var_aff);
2874 /* We need to shift the value if we are after the increment. */
2875 if (stmt_after_increment (loop, cand, at))
2879 if (common_type != uutype)
2880 cstep_common = fold_convert (common_type, cstep);
2882 cstep_common = cstep;
2884 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2885 aff_combination_add (&cbase_aff, &cstep_aff);
2888 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2889 aff_combination_add (aff, &cbase_aff);
2890 if (common_type != uutype)
2891 aff_combination_convert (aff, uutype);
2893 aff_combination_scale (&var_aff, rat);
2894 aff_combination_add (aff, &var_aff);
2899 /* Determines the expression by that USE is expressed from induction variable
2900 CAND at statement AT in LOOP. The computation is unshared. */
2903 get_computation_at (struct loop *loop,
2904 struct iv_use *use, struct iv_cand *cand, gimple at)
2907 tree type = TREE_TYPE (use->iv->base);
2909 if (!get_computation_aff (loop, use, cand, at, &aff))
2911 unshare_aff_combination (&aff);
2912 return fold_convert (type, aff_combination_to_tree (&aff));
2915 /* Determines the expression by that USE is expressed from induction variable
2916 CAND in LOOP. The computation is unshared. */
2919 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2921 return get_computation_at (loop, use, cand, use->stmt);
2924 /* Returns cost of addition in MODE. */
2927 add_cost (enum machine_mode mode, bool speed)
2929 static unsigned costs[NUM_MACHINE_MODES];
2937 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2938 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2939 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2944 cost = seq_cost (seq, speed);
2950 if (dump_file && (dump_flags & TDF_DETAILS))
2951 fprintf (dump_file, "Addition in %s costs %d\n",
2952 GET_MODE_NAME (mode), cost);
2956 /* Entry in a hashtable of already known costs for multiplication. */
2959 HOST_WIDE_INT cst; /* The constant to multiply by. */
2960 enum machine_mode mode; /* In mode. */
2961 unsigned cost; /* The cost. */
2964 /* Counts hash value for the ENTRY. */
2967 mbc_entry_hash (const void *entry)
2969 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2971 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2974 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2977 mbc_entry_eq (const void *entry1, const void *entry2)
2979 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2980 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2982 return (e1->mode == e2->mode
2983 && e1->cst == e2->cst);
2986 /* Returns cost of multiplication by constant CST in MODE. */
2989 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2991 static htab_t costs;
2992 struct mbc_entry **cached, act;
2997 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3001 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3003 return (*cached)->cost;
3005 *cached = XNEW (struct mbc_entry);
3006 (*cached)->mode = mode;
3007 (*cached)->cst = cst;
3010 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3011 gen_int_mode (cst, mode), NULL_RTX, 0);
3015 cost = seq_cost (seq, speed);
3017 if (dump_file && (dump_flags & TDF_DETAILS))
3018 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3019 (int) cst, GET_MODE_NAME (mode), cost);
3021 (*cached)->cost = cost;
3026 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3027 validity for a memory reference accessing memory of mode MODE in
3028 address space AS. */
3030 DEF_VEC_P (sbitmap);
3031 DEF_VEC_ALLOC_P (sbitmap, heap);
3034 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3037 #define MAX_RATIO 128
3038 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3039 static VEC (sbitmap, heap) *valid_mult_list;
3042 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3043 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3045 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3048 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3052 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3053 sbitmap_zero (valid_mult);
3054 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3055 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3057 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3058 if (memory_address_addr_space_p (mode, addr, as))
3059 SET_BIT (valid_mult, i + MAX_RATIO);
3062 if (dump_file && (dump_flags & TDF_DETAILS))
3064 fprintf (dump_file, " allowed multipliers:");
3065 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3066 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3067 fprintf (dump_file, " %d", (int) i);
3068 fprintf (dump_file, "\n");
3069 fprintf (dump_file, "\n");
3072 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3075 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3078 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3081 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3082 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3083 variable is omitted. Compute the cost for a memory reference that accesses
3084 a memory location of mode MEM_MODE in address space AS.
3086 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3087 size of MEM_MODE / RATIO) is available. To make this determination, we
3088 look at the size of the increment to be made, which is given in CSTEP.
3089 CSTEP may be zero if the step is unknown.
3090 STMT_AFTER_INC is true iff the statement we're looking at is after the
3091 increment of the original biv.
3093 TODO -- there must be some better way. This all is quite crude. */
3097 HOST_WIDE_INT min_offset, max_offset;
3098 unsigned costs[2][2][2][2];
3099 } *address_cost_data;
3101 DEF_VEC_P (address_cost_data);
3102 DEF_VEC_ALLOC_P (address_cost_data, heap);
3105 get_address_cost (bool symbol_present, bool var_present,
3106 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3107 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3108 addr_space_t as, bool speed,
3109 bool stmt_after_inc, bool *may_autoinc)
3111 static VEC(address_cost_data, heap) *address_cost_data_list;
3112 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3113 address_cost_data data;
3114 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3115 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3116 unsigned cost, acost, complexity;
3117 bool offset_p, ratio_p, autoinc;
3118 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3119 unsigned HOST_WIDE_INT mask;
3122 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3123 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3126 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3130 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3131 HOST_WIDE_INT rat, off;
3132 int old_cse_not_expected;
3133 unsigned sym_p, var_p, off_p, rat_p, add_c;
3134 rtx seq, addr, base;
3137 data = (address_cost_data) xcalloc (1, sizeof (*data));
3139 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3141 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3142 for (i = start; i <= 1 << 20; i <<= 1)
3144 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3145 if (!memory_address_addr_space_p (mem_mode, addr, as))
3148 data->max_offset = i == start ? 0 : i >> 1;
3149 off = data->max_offset;
3151 for (i = start; i <= 1 << 20; i <<= 1)
3153 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3154 if (!memory_address_addr_space_p (mem_mode, addr, as))
3157 data->min_offset = i == start ? 0 : -(i >> 1);
3159 if (dump_file && (dump_flags & TDF_DETAILS))
3161 fprintf (dump_file, "get_address_cost:\n");
3162 fprintf (dump_file, " min offset %s %d\n",
3163 GET_MODE_NAME (mem_mode),
3164 (int) data->min_offset);
3165 fprintf (dump_file, " max offset %s %d\n",
3166 GET_MODE_NAME (mem_mode),
3167 (int) data->max_offset);
3171 for (i = 2; i <= MAX_RATIO; i++)
3172 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3178 /* Compute the cost of various addressing modes. */
3180 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3181 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3183 if (HAVE_PRE_DECREMENT)
3185 addr = gen_rtx_PRE_DEC (Pmode, reg0);
3186 has_predec[mem_mode]
3187 = memory_address_addr_space_p (mem_mode, addr, as);
3189 if (HAVE_POST_DECREMENT)
3191 addr = gen_rtx_POST_DEC (Pmode, reg0);
3192 has_postdec[mem_mode]
3193 = memory_address_addr_space_p (mem_mode, addr, as);
3195 if (HAVE_PRE_INCREMENT)
3197 addr = gen_rtx_PRE_INC (Pmode, reg0);
3198 has_preinc[mem_mode]
3199 = memory_address_addr_space_p (mem_mode, addr, as);
3201 if (HAVE_POST_INCREMENT)
3203 addr = gen_rtx_POST_INC (Pmode, reg0);
3204 has_postinc[mem_mode]
3205 = memory_address_addr_space_p (mem_mode, addr, as);
3207 for (i = 0; i < 16; i++)
3210 var_p = (i >> 1) & 1;
3211 off_p = (i >> 2) & 1;
3212 rat_p = (i >> 3) & 1;
3216 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3217 gen_int_mode (rat, Pmode));
3220 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3224 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3225 /* ??? We can run into trouble with some backends by presenting
3226 it with symbols which haven't been properly passed through
3227 targetm.encode_section_info. By setting the local bit, we
3228 enhance the probability of things working. */
3229 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3232 base = gen_rtx_fmt_e (CONST, Pmode,
3235 gen_int_mode (off, Pmode)));
3238 base = gen_int_mode (off, Pmode);
3243 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3246 /* To avoid splitting addressing modes, pretend that no cse will
3248 old_cse_not_expected = cse_not_expected;
3249 cse_not_expected = true;
3250 addr = memory_address_addr_space (mem_mode, addr, as);
3251 cse_not_expected = old_cse_not_expected;
3255 acost = seq_cost (seq, speed);
3256 acost += address_cost (addr, mem_mode, as, speed);
3260 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3263 /* On some targets, it is quite expensive to load symbol to a register,
3264 which makes addresses that contain symbols look much more expensive.
3265 However, the symbol will have to be loaded in any case before the
3266 loop (and quite likely we have it in register already), so it does not
3267 make much sense to penalize them too heavily. So make some final
3268 tweaks for the SYMBOL_PRESENT modes:
3270 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3271 var is cheaper, use this mode with small penalty.
3272 If VAR_PRESENT is true, try whether the mode with
3273 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3274 if this is the case, use it. */
3275 add_c = add_cost (Pmode, speed);
3276 for (i = 0; i < 8; i++)
3279 off_p = (i >> 1) & 1;
3280 rat_p = (i >> 2) & 1;
3282 acost = data->costs[0][1][off_p][rat_p] + 1;
3286 if (acost < data->costs[1][var_p][off_p][rat_p])
3287 data->costs[1][var_p][off_p][rat_p] = acost;
3290 if (dump_file && (dump_flags & TDF_DETAILS))
3292 fprintf (dump_file, "Address costs:\n");
3294 for (i = 0; i < 16; i++)
3297 var_p = (i >> 1) & 1;
3298 off_p = (i >> 2) & 1;
3299 rat_p = (i >> 3) & 1;
3301 fprintf (dump_file, " ");
3303 fprintf (dump_file, "sym + ");
3305 fprintf (dump_file, "var + ");
3307 fprintf (dump_file, "cst + ");
3309 fprintf (dump_file, "rat * ");
3311 acost = data->costs[sym_p][var_p][off_p][rat_p];
3312 fprintf (dump_file, "index costs %d\n", acost);
3314 if (has_predec[mem_mode] || has_postdec[mem_mode]
3315 || has_preinc[mem_mode] || has_postinc[mem_mode])
3316 fprintf (dump_file, " May include autoinc/dec\n");
3317 fprintf (dump_file, "\n");
3320 VEC_replace (address_cost_data, address_cost_data_list,
3324 bits = GET_MODE_BITSIZE (Pmode);
3325 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3327 if ((offset >> (bits - 1) & 1))
3332 msize = GET_MODE_SIZE (mem_mode);
3333 autoinc_offset = offset;
3335 autoinc_offset += ratio * cstep;
3336 if (symbol_present || var_present || ratio != 1)
3338 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3340 || (has_postdec[mem_mode] && autoinc_offset == 0
3342 || (has_preinc[mem_mode] && autoinc_offset == msize
3344 || (has_predec[mem_mode] && autoinc_offset == -msize
3345 && msize == -cstep))
3349 offset_p = (s_offset != 0
3350 && data->min_offset <= s_offset
3351 && s_offset <= data->max_offset);
3352 ratio_p = (ratio != 1
3353 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3355 if (ratio != 1 && !ratio_p)
3356 cost += multiply_by_cost (ratio, Pmode, speed);
3358 if (s_offset && !offset_p && !symbol_present)
3359 cost += add_cost (Pmode, speed);
3362 *may_autoinc = autoinc;
3363 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3364 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3365 return new_cost (cost + acost, complexity);
3368 /* Estimates cost of forcing expression EXPR into a variable. */
3371 force_expr_to_var_cost (tree expr, bool speed)
3373 static bool costs_initialized = false;
3374 static unsigned integer_cost [2];
3375 static unsigned symbol_cost [2];
3376 static unsigned address_cost [2];
3378 comp_cost cost0, cost1, cost;
3379 enum machine_mode mode;
3381 if (!costs_initialized)
3383 tree type = build_pointer_type (integer_type_node);
3388 var = create_tmp_var_raw (integer_type_node, "test_var");
3389 TREE_STATIC (var) = 1;
3390 x = produce_memory_decl_rtl (var, NULL);
3391 SET_DECL_RTL (var, x);
3393 addr = build1 (ADDR_EXPR, type, var);
3396 for (i = 0; i < 2; i++)
3398 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3401 symbol_cost[i] = computation_cost (addr, i) + 1;
3404 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3406 build_int_cst (sizetype, 2000)), i) + 1;
3407 if (dump_file && (dump_flags & TDF_DETAILS))
3409 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3410 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3411 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3412 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3413 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3414 fprintf (dump_file, "\n");
3418 costs_initialized = true;
3423 if (SSA_VAR_P (expr))
3426 if (is_gimple_min_invariant (expr))
3428 if (TREE_CODE (expr) == INTEGER_CST)
3429 return new_cost (integer_cost [speed], 0);
3431 if (TREE_CODE (expr) == ADDR_EXPR)
3433 tree obj = TREE_OPERAND (expr, 0);
3435 if (TREE_CODE (obj) == VAR_DECL
3436 || TREE_CODE (obj) == PARM_DECL
3437 || TREE_CODE (obj) == RESULT_DECL)
3438 return new_cost (symbol_cost [speed], 0);
3441 return new_cost (address_cost [speed], 0);
3444 switch (TREE_CODE (expr))
3446 case POINTER_PLUS_EXPR:
3450 op0 = TREE_OPERAND (expr, 0);
3451 op1 = TREE_OPERAND (expr, 1);
3455 if (is_gimple_val (op0))
3458 cost0 = force_expr_to_var_cost (op0, speed);
3460 if (is_gimple_val (op1))
3463 cost1 = force_expr_to_var_cost (op1, speed);
3468 op0 = TREE_OPERAND (expr, 0);
3472 if (is_gimple_val (op0))
3475 cost0 = force_expr_to_var_cost (op0, speed);
3481 /* Just an arbitrary value, FIXME. */
3482 return new_cost (target_spill_cost[speed], 0);
3485 mode = TYPE_MODE (TREE_TYPE (expr));
3486 switch (TREE_CODE (expr))
3488 case POINTER_PLUS_EXPR:
3492 cost = new_cost (add_cost (mode, speed), 0);
3496 if (cst_and_fits_in_hwi (op0))
3497 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3498 else if (cst_and_fits_in_hwi (op1))
3499 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3501 return new_cost (target_spill_cost [speed], 0);
3508 cost = add_costs (cost, cost0);
3509 cost = add_costs (cost, cost1);
3511 /* Bound the cost by target_spill_cost. The parts of complicated
3512 computations often are either loop invariant or at least can
3513 be shared between several iv uses, so letting this grow without
3514 limits would not give reasonable results. */
3515 if (cost.cost > (int) target_spill_cost [speed])
3516 cost.cost = target_spill_cost [speed];
3521 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3522 invariants the computation depends on. */
3525 force_var_cost (struct ivopts_data *data,
3526 tree expr, bitmap *depends_on)
3530 fd_ivopts_data = data;
3531 walk_tree (&expr, find_depends, depends_on, NULL);
3534 return force_expr_to_var_cost (expr, data->speed);
3537 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3538 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3539 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3540 invariants the computation depends on. */
3543 split_address_cost (struct ivopts_data *data,
3544 tree addr, bool *symbol_present, bool *var_present,
3545 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3548 HOST_WIDE_INT bitsize;
3549 HOST_WIDE_INT bitpos;
3551 enum machine_mode mode;
3552 int unsignedp, volatilep;
3554 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3555 &unsignedp, &volatilep, false);
3558 || bitpos % BITS_PER_UNIT != 0
3559 || TREE_CODE (core) != VAR_DECL)
3561 *symbol_present = false;
3562 *var_present = true;
3563 fd_ivopts_data = data;
3564 walk_tree (&addr, find_depends, depends_on, NULL);
3565 return new_cost (target_spill_cost[data->speed], 0);
3568 *offset += bitpos / BITS_PER_UNIT;
3569 if (TREE_STATIC (core)
3570 || DECL_EXTERNAL (core))
3572 *symbol_present = true;
3573 *var_present = false;
3577 *symbol_present = false;
3578 *var_present = true;
3582 /* Estimates cost of expressing difference of addresses E1 - E2 as
3583 var + symbol + offset. The value of offset is added to OFFSET,
3584 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3585 part is missing. DEPENDS_ON is a set of the invariants the computation
3589 ptr_difference_cost (struct ivopts_data *data,
3590 tree e1, tree e2, bool *symbol_present, bool *var_present,
3591 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3593 HOST_WIDE_INT diff = 0;
3594 aff_tree aff_e1, aff_e2;
3597 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3599 if (ptr_difference_const (e1, e2, &diff))
3602 *symbol_present = false;
3603 *var_present = false;
3607 if (integer_zerop (e2))
3608 return split_address_cost (data, TREE_OPERAND (e1, 0),
3609 symbol_present, var_present, offset, depends_on);
3611 *symbol_present = false;
3612 *var_present = true;
3614 type = signed_type_for (TREE_TYPE (e1));
3615 tree_to_aff_combination (e1, type, &aff_e1);
3616 tree_to_aff_combination (e2, type, &aff_e2);
3617 aff_combination_scale (&aff_e2, double_int_minus_one);
3618 aff_combination_add (&aff_e1, &aff_e2);
3620 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3623 /* Estimates cost of expressing difference E1 - E2 as
3624 var + symbol + offset. The value of offset is added to OFFSET,
3625 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3626 part is missing. DEPENDS_ON is a set of the invariants the computation
3630 difference_cost (struct ivopts_data *data,
3631 tree e1, tree e2, bool *symbol_present, bool *var_present,
3632 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3634 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3635 unsigned HOST_WIDE_INT off1, off2;
3636 aff_tree aff_e1, aff_e2;
3639 e1 = strip_offset (e1, &off1);
3640 e2 = strip_offset (e2, &off2);
3641 *offset += off1 - off2;
3646 if (TREE_CODE (e1) == ADDR_EXPR)
3647 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3648 offset, depends_on);
3649 *symbol_present = false;
3651 if (operand_equal_p (e1, e2, 0))
3653 *var_present = false;
3657 *var_present = true;
3659 if (integer_zerop (e2))
3660 return force_var_cost (data, e1, depends_on);
3662 if (integer_zerop (e1))
3664 comp_cost cost = force_var_cost (data, e2, depends_on);
3665 cost.cost += multiply_by_cost (-1, mode, data->speed);
3669 type = signed_type_for (TREE_TYPE (e1));
3670 tree_to_aff_combination (e1, type, &aff_e1);
3671 tree_to_aff_combination (e2, type, &aff_e2);
3672 aff_combination_scale (&aff_e2, double_int_minus_one);
3673 aff_combination_add (&aff_e1, &aff_e2);
3675 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3678 /* Determines the cost of the computation by that USE is expressed
3679 from induction variable CAND. If ADDRESS_P is true, we just need
3680 to create an address from it, otherwise we want to get it into
3681 register. A set of invariants we depend on is stored in
3682 DEPENDS_ON. AT is the statement at that the value is computed.
3683 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3684 addressing is likely. */
3687 get_computation_cost_at (struct ivopts_data *data,
3688 struct iv_use *use, struct iv_cand *cand,
3689 bool address_p, bitmap *depends_on, gimple at,
3692 tree ubase = use->iv->base, ustep = use->iv->step;
3694 tree utype = TREE_TYPE (ubase), ctype;
3695 unsigned HOST_WIDE_INT cstepi, offset = 0;
3696 HOST_WIDE_INT ratio, aratio;
3697 bool var_present, symbol_present, stmt_is_after_inc;
3700 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3704 /* Only consider real candidates. */
3706 return infinite_cost;
3708 cbase = cand->iv->base;
3709 cstep = cand->iv->step;
3710 ctype = TREE_TYPE (cbase);
3712 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3714 /* We do not have a precision to express the values of use. */
3715 return infinite_cost;
3720 /* Do not try to express address of an object with computation based
3721 on address of a different object. This may cause problems in rtl
3722 level alias analysis (that does not expect this to be happening,
3723 as this is illegal in C), and would be unlikely to be useful
3725 if (use->iv->base_object
3726 && cand->iv->base_object
3727 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3728 return infinite_cost;
3731 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3733 /* TODO -- add direct handling of this case. */
3737 /* CSTEPI is removed from the offset in case statement is after the
3738 increment. If the step is not constant, we use zero instead.
3739 This is a bit imprecise (there is the extra addition), but
3740 redundancy elimination is likely to transform the code so that
3741 it uses value of the variable before increment anyway,
3742 so it is not that much unrealistic. */
3743 if (cst_and_fits_in_hwi (cstep))
3744 cstepi = int_cst_value (cstep);
3748 if (!constant_multiple_of (ustep, cstep, &rat))
3749 return infinite_cost;
3751 if (double_int_fits_in_shwi_p (rat))
3752 ratio = double_int_to_shwi (rat);
3754 return infinite_cost;
3757 ctype = TREE_TYPE (cbase);
3759 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3760 or ratio == 1, it is better to handle this like
3762 ubase - ratio * cbase + ratio * var
3764 (also holds in the case ratio == -1, TODO. */
3766 if (cst_and_fits_in_hwi (cbase))
3768 offset = - ratio * int_cst_value (cbase);
3769 cost = difference_cost (data,
3770 ubase, build_int_cst (utype, 0),
3771 &symbol_present, &var_present, &offset,
3774 else if (ratio == 1)
3776 cost = difference_cost (data,
3778 &symbol_present, &var_present, &offset,
3782 && !POINTER_TYPE_P (ctype)
3783 && multiplier_allowed_in_address_p
3784 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3785 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3788 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3789 cost = difference_cost (data,
3791 &symbol_present, &var_present, &offset,
3796 cost = force_var_cost (data, cbase, depends_on);
3797 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3798 cost = add_costs (cost,
3799 difference_cost (data,
3800 ubase, build_int_cst (utype, 0),
3801 &symbol_present, &var_present,
3802 &offset, depends_on));
3805 /* If we are after the increment, the value of the candidate is higher by
3807 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3808 if (stmt_is_after_inc)
3809 offset -= ratio * cstepi;
3811 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3812 (symbol/var1/const parts may be omitted). If we are looking for an
3813 address, find the cost of addressing this. */
3815 return add_costs (cost,
3816 get_address_cost (symbol_present, var_present,
3817 offset, ratio, cstepi,
3818 TYPE_MODE (TREE_TYPE (utype)),
3819 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3820 speed, stmt_is_after_inc,
3823 /* Otherwise estimate the costs for computing the expression. */
3824 if (!symbol_present && !var_present && !offset)
3827 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3831 /* Symbol + offset should be compile-time computable so consider that they
3832 are added once to the variable, if present. */
3833 if (var_present && (symbol_present || offset))
3834 cost.cost += add_cost (TYPE_MODE (ctype), speed)
3835 / AVG_LOOP_NITER (data->current_loop);
3837 /* Having offset does not affect runtime cost in case it is added to
3838 symbol, but it increases complexity. */
3842 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3844 aratio = ratio > 0 ? ratio : -ratio;
3846 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3850 *can_autoinc = false;
3853 /* Just get the expression, expand it and measure the cost. */
3854 tree comp = get_computation_at (data->current_loop, use, cand, at);
3857 return infinite_cost;
3860 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3862 return new_cost (computation_cost (comp, speed), 0);
3866 /* Determines the cost of the computation by that USE is expressed
3867 from induction variable CAND. If ADDRESS_P is true, we just need
3868 to create an address from it, otherwise we want to get it into
3869 register. A set of invariants we depend on is stored in
3870 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3871 autoinc addressing is likely. */
3874 get_computation_cost (struct ivopts_data *data,
3875 struct iv_use *use, struct iv_cand *cand,
3876 bool address_p, bitmap *depends_on, bool *can_autoinc)
3878 return get_computation_cost_at (data,
3879 use, cand, address_p, depends_on, use->stmt,
3883 /* Determines cost of basing replacement of USE on CAND in a generic
3887 determine_use_iv_cost_generic (struct ivopts_data *data,
3888 struct iv_use *use, struct iv_cand *cand)
3893 /* The simple case first -- if we need to express value of the preserved
3894 original biv, the cost is 0. This also prevents us from counting the
3895 cost of increment twice -- once at this use and once in the cost of
3897 if (cand->pos == IP_ORIGINAL
3898 && cand->incremented_at == use->stmt)
3900 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3904 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3905 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3907 return !infinite_cost_p (cost);
3910 /* Determines cost of basing replacement of USE on CAND in an address. */
3913 determine_use_iv_cost_address (struct ivopts_data *data,
3914 struct iv_use *use, struct iv_cand *cand)
3918 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3921 if (cand->ainc_use == use)
3924 cost.cost -= cand->cost_step;
3925 /* If we generated the candidate solely for exploiting autoincrement
3926 opportunities, and it turns out it can't be used, set the cost to
3927 infinity to make sure we ignore it. */
3928 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3929 cost = infinite_cost;
3931 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3933 return !infinite_cost_p (cost);
3936 /* Computes value of candidate CAND at position AT in iteration NITER, and
3937 stores it to VAL. */
3940 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3943 aff_tree step, delta, nit;
3944 struct iv *iv = cand->iv;
3945 tree type = TREE_TYPE (iv->base);
3946 tree steptype = type;
3947 if (POINTER_TYPE_P (type))
3948 steptype = sizetype;
3950 tree_to_aff_combination (iv->step, steptype, &step);
3951 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3952 aff_combination_convert (&nit, steptype);
3953 aff_combination_mult (&nit, &step, &delta);
3954 if (stmt_after_increment (loop, cand, at))
3955 aff_combination_add (&delta, &step);
3957 tree_to_aff_combination (iv->base, type, val);
3958 aff_combination_add (val, &delta);
3961 /* Returns period of induction variable iv. */
3964 iv_period (struct iv *iv)
3966 tree step = iv->step, period, type;
3969 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3971 /* Period of the iv is gcd (step, type range). Since type range is power
3972 of two, it suffices to determine the maximum power of two that divides
3974 pow2div = num_ending_zeros (step);
3975 type = unsigned_type_for (TREE_TYPE (step));
3977 period = build_low_bits_mask (type,
3978 (TYPE_PRECISION (type)
3979 - tree_low_cst (pow2div, 1)));
3984 /* Returns the comparison operator used when eliminating the iv USE. */
3986 static enum tree_code
3987 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3989 struct loop *loop = data->current_loop;
3993 ex_bb = gimple_bb (use->stmt);
3994 exit = EDGE_SUCC (ex_bb, 0);
3995 if (flow_bb_inside_loop_p (loop, exit->dest))
3996 exit = EDGE_SUCC (ex_bb, 1);
3998 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4001 /* Check whether it is possible to express the condition in USE by comparison
4002 of candidate CAND. If so, store the value compared with to BOUND. */
4005 may_eliminate_iv (struct ivopts_data *data,
4006 struct iv_use *use, struct iv_cand *cand, tree *bound)
4011 struct loop *loop = data->current_loop;
4014 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4017 /* For now works only for exits that dominate the loop latch.
4018 TODO: extend to other conditions inside loop body. */
4019 ex_bb = gimple_bb (use->stmt);
4020 if (use->stmt != last_stmt (ex_bb)
4021 || gimple_code (use->stmt) != GIMPLE_COND
4022 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4025 exit = EDGE_SUCC (ex_bb, 0);
4026 if (flow_bb_inside_loop_p (loop, exit->dest))
4027 exit = EDGE_SUCC (ex_bb, 1);
4028 if (flow_bb_inside_loop_p (loop, exit->dest))
4031 nit = niter_for_exit (data, exit);
4035 /* Determine whether we can use the variable to test the exit condition.
4036 This is the case iff the period of the induction variable is greater
4037 than the number of iterations for which the exit condition is true. */
4038 period = iv_period (cand->iv);
4040 /* If the number of iterations is constant, compare against it directly. */
4041 if (TREE_CODE (nit) == INTEGER_CST)
4043 if (!tree_int_cst_lt (nit, period))
4047 /* If not, and if this is the only possible exit of the loop, see whether
4048 we can get a conservative estimate on the number of iterations of the
4049 entire loop and compare against that instead. */
4050 else if (loop_only_exit_p (loop, exit))
4052 double_int period_value, max_niter;
4053 if (!estimated_loop_iterations (loop, true, &max_niter))
4055 period_value = tree_to_double_int (period);
4056 if (double_int_ucmp (max_niter, period_value) >= 0)
4060 /* Otherwise, punt. */
4064 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4066 *bound = aff_combination_to_tree (&bnd);
4067 /* It is unlikely that computing the number of iterations using division
4068 would be more profitable than keeping the original induction variable. */
4069 if (expression_expensive_p (*bound))
4074 /* Determines cost of basing replacement of USE on CAND in a condition. */
4077 determine_use_iv_cost_condition (struct ivopts_data *data,
4078 struct iv_use *use, struct iv_cand *cand)
4080 tree bound = NULL_TREE;
4082 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4083 comp_cost elim_cost, express_cost, cost;
4086 /* Only consider real candidates. */
4089 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4093 /* Try iv elimination. */
4094 if (may_eliminate_iv (data, use, cand, &bound))
4096 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4097 /* The bound is a loop invariant, so it will be only computed
4099 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4102 elim_cost = infinite_cost;
4104 /* Try expressing the original giv. If it is compared with an invariant,
4105 note that we cannot get rid of it. */
4106 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
4109 express_cost = get_computation_cost (data, use, cand, false,
4110 &depends_on_express, NULL);
4111 fd_ivopts_data = data;
4112 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4114 /* Choose the better approach, preferring the eliminated IV. */
4115 if (compare_costs (elim_cost, express_cost) <= 0)
4118 depends_on = depends_on_elim;
4119 depends_on_elim = NULL;
4123 cost = express_cost;
4124 depends_on = depends_on_express;
4125 depends_on_express = NULL;
4129 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4131 if (depends_on_elim)
4132 BITMAP_FREE (depends_on_elim);
4133 if (depends_on_express)
4134 BITMAP_FREE (depends_on_express);
4136 return !infinite_cost_p (cost);
4139 /* Determines cost of basing replacement of USE on CAND. Returns false
4140 if USE cannot be based on CAND. */
4143 determine_use_iv_cost (struct ivopts_data *data,
4144 struct iv_use *use, struct iv_cand *cand)
4148 case USE_NONLINEAR_EXPR:
4149 return determine_use_iv_cost_generic (data, use, cand);
4152 return determine_use_iv_cost_address (data, use, cand);
4155 return determine_use_iv_cost_condition (data, use, cand);
4162 /* Return true if get_computation_cost indicates that autoincrement is
4163 a possibility for the pair of USE and CAND, false otherwise. */
4166 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4167 struct iv_cand *cand)
4173 if (use->type != USE_ADDRESS)
4176 cost = get_computation_cost (data, use, cand, true, &depends_on,
4179 BITMAP_FREE (depends_on);
4181 return !infinite_cost_p (cost) && can_autoinc;
4184 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4185 use that allows autoincrement, and set their AINC_USE if possible. */
4188 set_autoinc_for_original_candidates (struct ivopts_data *data)
4192 for (i = 0; i < n_iv_cands (data); i++)
4194 struct iv_cand *cand = iv_cand (data, i);
4195 struct iv_use *closest = NULL;
4196 if (cand->pos != IP_ORIGINAL)
4198 for (j = 0; j < n_iv_uses (data); j++)
4200 struct iv_use *use = iv_use (data, j);
4201 unsigned uid = gimple_uid (use->stmt);
4202 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4203 || uid > gimple_uid (cand->incremented_at))
4205 if (closest == NULL || uid > gimple_uid (closest->stmt))
4208 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4210 cand->ainc_use = closest;
4214 /* Finds the candidates for the induction variables. */
4217 find_iv_candidates (struct ivopts_data *data)
4219 /* Add commonly used ivs. */
4220 add_standard_iv_candidates (data);
4222 /* Add old induction variables. */
4223 add_old_ivs_candidates (data);
4225 /* Add induction variables derived from uses. */
4226 add_derived_ivs_candidates (data);
4228 set_autoinc_for_original_candidates (data);
4230 /* Record the important candidates. */
4231 record_important_candidates (data);
4234 /* Determines costs of basing the use of the iv on an iv candidate. */
4237 determine_use_iv_costs (struct ivopts_data *data)
4241 struct iv_cand *cand;
4242 bitmap to_clear = BITMAP_ALLOC (NULL);
4244 alloc_use_cost_map (data);
4246 for (i = 0; i < n_iv_uses (data); i++)
4248 use = iv_use (data, i);
4250 if (data->consider_all_candidates)
4252 for (j = 0; j < n_iv_cands (data); j++)
4254 cand = iv_cand (data, j);
4255 determine_use_iv_cost (data, use, cand);
4262 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4264 cand = iv_cand (data, j);
4265 if (!determine_use_iv_cost (data, use, cand))
4266 bitmap_set_bit (to_clear, j);
4269 /* Remove the candidates for that the cost is infinite from
4270 the list of related candidates. */
4271 bitmap_and_compl_into (use->related_cands, to_clear);
4272 bitmap_clear (to_clear);
4276 BITMAP_FREE (to_clear);
4278 if (dump_file && (dump_flags & TDF_DETAILS))
4280 fprintf (dump_file, "Use-candidate costs:\n");
4282 for (i = 0; i < n_iv_uses (data); i++)
4284 use = iv_use (data, i);
4286 fprintf (dump_file, "Use %d:\n", i);
4287 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4288 for (j = 0; j < use->n_map_members; j++)
4290 if (!use->cost_map[j].cand
4291 || infinite_cost_p (use->cost_map[j].cost))
4294 fprintf (dump_file, " %d\t%d\t%d\t",
4295 use->cost_map[j].cand->id,
4296 use->cost_map[j].cost.cost,
4297 use->cost_map[j].cost.complexity);
4298 if (use->cost_map[j].depends_on)
4299 bitmap_print (dump_file,
4300 use->cost_map[j].depends_on, "","");
4301 fprintf (dump_file, "\n");
4304 fprintf (dump_file, "\n");
4306 fprintf (dump_file, "\n");
4310 /* Determines cost of the candidate CAND. */
4313 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4315 comp_cost cost_base;
4316 unsigned cost, cost_step;
4325 /* There are two costs associated with the candidate -- its increment
4326 and its initialization. The second is almost negligible for any loop
4327 that rolls enough, so we take it just very little into account. */
4329 base = cand->iv->base;
4330 cost_base = force_var_cost (data, base, NULL);
4331 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4333 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4335 /* Prefer the original ivs unless we may gain something by replacing it.
4336 The reason is to make debugging simpler; so this is not relevant for
4337 artificial ivs created by other optimization passes. */
4338 if (cand->pos != IP_ORIGINAL
4339 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4342 /* Prefer not to insert statements into latch unless there are some
4343 already (so that we do not create unnecessary jumps). */
4344 if (cand->pos == IP_END
4345 && empty_block_p (ip_end_pos (data->current_loop)))
4349 cand->cost_step = cost_step;
4352 /* Determines costs of computation of the candidates. */
4355 determine_iv_costs (struct ivopts_data *data)
4359 if (dump_file && (dump_flags & TDF_DETAILS))
4361 fprintf (dump_file, "Candidate costs:\n");
4362 fprintf (dump_file, " cand\tcost\n");
4365 for (i = 0; i < n_iv_cands (data); i++)
4367 struct iv_cand *cand = iv_cand (data, i);
4369 determine_iv_cost (data, cand);
4371 if (dump_file && (dump_flags & TDF_DETAILS))
4372 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4375 if (dump_file && (dump_flags & TDF_DETAILS))
4376 fprintf (dump_file, "\n");
4379 /* Calculates cost for having SIZE induction variables. */
4382 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4384 /* We add size to the cost, so that we prefer eliminating ivs
4386 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4389 /* For each size of the induction variable set determine the penalty. */
4392 determine_set_costs (struct ivopts_data *data)
4396 gimple_stmt_iterator psi;
4398 struct loop *loop = data->current_loop;
4401 /* We use the following model (definitely improvable, especially the
4402 cost function -- TODO):
4404 We estimate the number of registers available (using MD data), name it A.
4406 We estimate the number of registers used by the loop, name it U. This
4407 number is obtained as the number of loop phi nodes (not counting virtual
4408 registers and bivs) + the number of variables from outside of the loop.
4410 We set a reserve R (free regs that are used for temporary computations,
4411 etc.). For now the reserve is a constant 3.
4413 Let I be the number of induction variables.
4415 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4416 make a lot of ivs without a reason).
4417 -- if A - R < U + I <= A, the cost is I * PRES_COST
4418 -- if U + I > A, the cost is I * PRES_COST and
4419 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4421 if (dump_file && (dump_flags & TDF_DETAILS))
4423 fprintf (dump_file, "Global costs:\n");
4424 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4425 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4426 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4430 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4432 phi = gsi_stmt (psi);
4433 op = PHI_RESULT (phi);
4435 if (!is_gimple_reg (op))
4438 if (get_iv (data, op))
4444 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4446 struct version_info *info = ver_info (data, j);
4448 if (info->inv_id && info->has_nonlin_use)
4452 data->regs_used = n;
4453 if (dump_file && (dump_flags & TDF_DETAILS))
4454 fprintf (dump_file, " regs_used %d\n", n);
4456 if (dump_file && (dump_flags & TDF_DETAILS))
4458 fprintf (dump_file, " cost for size:\n");
4459 fprintf (dump_file, " ivs\tcost\n");
4460 for (j = 0; j <= 2 * target_avail_regs; j++)
4461 fprintf (dump_file, " %d\t%d\n", j,
4462 ivopts_global_cost_for_size (data, j));
4463 fprintf (dump_file, "\n");
4467 /* Returns true if A is a cheaper cost pair than B. */
4470 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4480 cmp = compare_costs (a->cost, b->cost);
4487 /* In case the costs are the same, prefer the cheaper candidate. */
4488 if (a->cand->cost < b->cand->cost)
4494 /* Computes the cost field of IVS structure. */
4497 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4499 comp_cost cost = ivs->cand_use_cost;
4500 cost.cost += ivs->cand_cost;
4501 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4506 /* Remove invariants in set INVS to set IVS. */
4509 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4517 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4519 ivs->n_invariant_uses[iid]--;
4520 if (ivs->n_invariant_uses[iid] == 0)
4525 /* Set USE not to be expressed by any candidate in IVS. */
4528 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4531 unsigned uid = use->id, cid;
4532 struct cost_pair *cp;
4534 cp = ivs->cand_for_use[uid];
4540 ivs->cand_for_use[uid] = NULL;
4541 ivs->n_cand_uses[cid]--;
4543 if (ivs->n_cand_uses[cid] == 0)
4545 bitmap_clear_bit (ivs->cands, cid);
4546 /* Do not count the pseudocandidates. */
4550 ivs->cand_cost -= cp->cand->cost;
4552 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4555 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4557 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4558 iv_ca_recount_cost (data, ivs);
4561 /* Add invariants in set INVS to set IVS. */
4564 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4572 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4574 ivs->n_invariant_uses[iid]++;
4575 if (ivs->n_invariant_uses[iid] == 1)
4580 /* Set cost pair for USE in set IVS to CP. */
4583 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4584 struct iv_use *use, struct cost_pair *cp)
4586 unsigned uid = use->id, cid;
4588 if (ivs->cand_for_use[uid] == cp)
4591 if (ivs->cand_for_use[uid])
4592 iv_ca_set_no_cp (data, ivs, use);
4599 ivs->cand_for_use[uid] = cp;
4600 ivs->n_cand_uses[cid]++;
4601 if (ivs->n_cand_uses[cid] == 1)
4603 bitmap_set_bit (ivs->cands, cid);
4604 /* Do not count the pseudocandidates. */
4608 ivs->cand_cost += cp->cand->cost;
4610 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4613 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4614 iv_ca_set_add_invariants (ivs, cp->depends_on);
4615 iv_ca_recount_cost (data, ivs);
4619 /* Extend set IVS by expressing USE by some of the candidates in it
4623 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4626 struct cost_pair *best_cp = NULL, *cp;
4630 gcc_assert (ivs->upto >= use->id);
4632 if (ivs->upto == use->id)
4638 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4640 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4642 if (cheaper_cost_pair (cp, best_cp))
4646 iv_ca_set_cp (data, ivs, use, best_cp);
4649 /* Get cost for assignment IVS. */
4652 iv_ca_cost (struct iv_ca *ivs)
4654 /* This was a conditional expression but it triggered a bug in
4657 return infinite_cost;
4662 /* Returns true if all dependences of CP are among invariants in IVS. */
4665 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4670 if (!cp->depends_on)
4673 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4675 if (ivs->n_invariant_uses[i] == 0)
4682 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4683 it before NEXT_CHANGE. */
4685 static struct iv_ca_delta *
4686 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4687 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4689 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4692 change->old_cp = old_cp;
4693 change->new_cp = new_cp;
4694 change->next_change = next_change;
4699 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4702 static struct iv_ca_delta *
4703 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4705 struct iv_ca_delta *last;
4713 for (last = l1; last->next_change; last = last->next_change)
4715 last->next_change = l2;
4720 /* Returns candidate by that USE is expressed in IVS. */
4722 static struct cost_pair *
4723 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4725 return ivs->cand_for_use[use->id];
4728 /* Reverse the list of changes DELTA, forming the inverse to it. */
4730 static struct iv_ca_delta *
4731 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4733 struct iv_ca_delta *act, *next, *prev = NULL;
4734 struct cost_pair *tmp;
4736 for (act = delta; act; act = next)
4738 next = act->next_change;
4739 act->next_change = prev;
4743 act->old_cp = act->new_cp;
4750 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4751 reverted instead. */
4754 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4755 struct iv_ca_delta *delta, bool forward)
4757 struct cost_pair *from, *to;
4758 struct iv_ca_delta *act;
4761 delta = iv_ca_delta_reverse (delta);
4763 for (act = delta; act; act = act->next_change)
4767 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4768 iv_ca_set_cp (data, ivs, act->use, to);
4772 iv_ca_delta_reverse (delta);
4775 /* Returns true if CAND is used in IVS. */
4778 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4780 return ivs->n_cand_uses[cand->id] > 0;
4783 /* Returns number of induction variable candidates in the set IVS. */
4786 iv_ca_n_cands (struct iv_ca *ivs)
4788 return ivs->n_cands;
4791 /* Free the list of changes DELTA. */
4794 iv_ca_delta_free (struct iv_ca_delta **delta)
4796 struct iv_ca_delta *act, *next;
4798 for (act = *delta; act; act = next)
4800 next = act->next_change;
4807 /* Allocates new iv candidates assignment. */
4809 static struct iv_ca *
4810 iv_ca_new (struct ivopts_data *data)
4812 struct iv_ca *nw = XNEW (struct iv_ca);
4816 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4817 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4818 nw->cands = BITMAP_ALLOC (NULL);
4821 nw->cand_use_cost = zero_cost;
4823 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4824 nw->cost = zero_cost;
4829 /* Free memory occupied by the set IVS. */
4832 iv_ca_free (struct iv_ca **ivs)
4834 free ((*ivs)->cand_for_use);
4835 free ((*ivs)->n_cand_uses);
4836 BITMAP_FREE ((*ivs)->cands);
4837 free ((*ivs)->n_invariant_uses);
4842 /* Dumps IVS to FILE. */
4845 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4847 const char *pref = " invariants ";
4849 comp_cost cost = iv_ca_cost (ivs);
4851 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4852 bitmap_print (file, ivs->cands, " candidates ","\n");
4854 for (i = 1; i <= data->max_inv_id; i++)
4855 if (ivs->n_invariant_uses[i])
4857 fprintf (file, "%s%d", pref, i);
4860 fprintf (file, "\n");
4863 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4864 new set, and store differences in DELTA. Number of induction variables
4865 in the new set is stored to N_IVS. */
4868 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4869 struct iv_cand *cand, struct iv_ca_delta **delta,
4875 struct cost_pair *old_cp, *new_cp;
4878 for (i = 0; i < ivs->upto; i++)
4880 use = iv_use (data, i);
4881 old_cp = iv_ca_cand_for_use (ivs, use);
4884 && old_cp->cand == cand)
4887 new_cp = get_use_iv_cost (data, use, cand);
4891 if (!iv_ca_has_deps (ivs, new_cp))
4894 if (!cheaper_cost_pair (new_cp, old_cp))
4897 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4900 iv_ca_delta_commit (data, ivs, *delta, true);
4901 cost = iv_ca_cost (ivs);
4903 *n_ivs = iv_ca_n_cands (ivs);
4904 iv_ca_delta_commit (data, ivs, *delta, false);
4909 /* Try narrowing set IVS by removing CAND. Return the cost of
4910 the new set and store the differences in DELTA. */
4913 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4914 struct iv_cand *cand, struct iv_ca_delta **delta)
4918 struct cost_pair *old_cp, *new_cp, *cp;
4920 struct iv_cand *cnd;
4924 for (i = 0; i < n_iv_uses (data); i++)
4926 use = iv_use (data, i);
4928 old_cp = iv_ca_cand_for_use (ivs, use);
4929 if (old_cp->cand != cand)
4934 if (data->consider_all_candidates)
4936 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4941 cnd = iv_cand (data, ci);
4943 cp = get_use_iv_cost (data, use, cnd);
4946 if (!iv_ca_has_deps (ivs, cp))
4949 if (!cheaper_cost_pair (cp, new_cp))
4957 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4962 cnd = iv_cand (data, ci);
4964 cp = get_use_iv_cost (data, use, cnd);
4967 if (!iv_ca_has_deps (ivs, cp))
4970 if (!cheaper_cost_pair (cp, new_cp))
4979 iv_ca_delta_free (delta);
4980 return infinite_cost;
4983 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4986 iv_ca_delta_commit (data, ivs, *delta, true);
4987 cost = iv_ca_cost (ivs);
4988 iv_ca_delta_commit (data, ivs, *delta, false);
4993 /* Try optimizing the set of candidates IVS by removing candidates different
4994 from to EXCEPT_CAND from it. Return cost of the new set, and store
4995 differences in DELTA. */
4998 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4999 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5002 struct iv_ca_delta *act_delta, *best_delta;
5004 comp_cost best_cost, acost;
5005 struct iv_cand *cand;
5008 best_cost = iv_ca_cost (ivs);
5010 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5012 cand = iv_cand (data, i);
5014 if (cand == except_cand)
5017 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5019 if (compare_costs (acost, best_cost) < 0)
5022 iv_ca_delta_free (&best_delta);
5023 best_delta = act_delta;
5026 iv_ca_delta_free (&act_delta);
5035 /* Recurse to possibly remove other unnecessary ivs. */
5036 iv_ca_delta_commit (data, ivs, best_delta, true);
5037 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5038 iv_ca_delta_commit (data, ivs, best_delta, false);
5039 *delta = iv_ca_delta_join (best_delta, *delta);
5043 /* Tries to extend the sets IVS in the best possible way in order
5044 to express the USE. */
5047 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5050 comp_cost best_cost, act_cost;
5053 struct iv_cand *cand;
5054 struct iv_ca_delta *best_delta = NULL, *act_delta;
5055 struct cost_pair *cp;
5057 iv_ca_add_use (data, ivs, use);
5058 best_cost = iv_ca_cost (ivs);
5060 cp = iv_ca_cand_for_use (ivs, use);
5063 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5064 iv_ca_set_no_cp (data, ivs, use);
5067 /* First try important candidates not based on any memory object. Only if
5068 this fails, try the specific ones. Rationale -- in loops with many
5069 variables the best choice often is to use just one generic biv. If we
5070 added here many ivs specific to the uses, the optimization algorithm later
5071 would be likely to get stuck in a local minimum, thus causing us to create
5072 too many ivs. The approach from few ivs to more seems more likely to be
5073 successful -- starting from few ivs, replacing an expensive use by a
5074 specific iv should always be a win. */
5075 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5077 cand = iv_cand (data, i);
5079 if (cand->iv->base_object != NULL_TREE)
5082 if (iv_ca_cand_used_p (ivs, cand))
5085 cp = get_use_iv_cost (data, use, cand);
5089 iv_ca_set_cp (data, ivs, use, cp);
5090 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5091 iv_ca_set_no_cp (data, ivs, use);
5092 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5094 if (compare_costs (act_cost, best_cost) < 0)
5096 best_cost = act_cost;
5098 iv_ca_delta_free (&best_delta);
5099 best_delta = act_delta;
5102 iv_ca_delta_free (&act_delta);
5105 if (infinite_cost_p (best_cost))
5107 for (i = 0; i < use->n_map_members; i++)
5109 cp = use->cost_map + i;
5114 /* Already tried this. */
5115 if (cand->important && cand->iv->base_object == NULL_TREE)
5118 if (iv_ca_cand_used_p (ivs, cand))
5122 iv_ca_set_cp (data, ivs, use, cp);
5123 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5124 iv_ca_set_no_cp (data, ivs, use);
5125 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5128 if (compare_costs (act_cost, best_cost) < 0)
5130 best_cost = act_cost;
5133 iv_ca_delta_free (&best_delta);
5134 best_delta = act_delta;
5137 iv_ca_delta_free (&act_delta);
5141 iv_ca_delta_commit (data, ivs, best_delta, true);
5142 iv_ca_delta_free (&best_delta);
5144 return !infinite_cost_p (best_cost);
5147 /* Finds an initial assignment of candidates to uses. */
5149 static struct iv_ca *
5150 get_initial_solution (struct ivopts_data *data)
5152 struct iv_ca *ivs = iv_ca_new (data);
5155 for (i = 0; i < n_iv_uses (data); i++)
5156 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5165 /* Tries to improve set of induction variables IVS. */
5168 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5171 comp_cost acost, best_cost = iv_ca_cost (ivs);
5172 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5173 struct iv_cand *cand;
5175 /* Try extending the set of induction variables by one. */
5176 for (i = 0; i < n_iv_cands (data); i++)
5178 cand = iv_cand (data, i);
5180 if (iv_ca_cand_used_p (ivs, cand))
5183 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5187 /* If we successfully added the candidate and the set is small enough,
5188 try optimizing it by removing other candidates. */
5189 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5191 iv_ca_delta_commit (data, ivs, act_delta, true);
5192 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5193 iv_ca_delta_commit (data, ivs, act_delta, false);
5194 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5197 if (compare_costs (acost, best_cost) < 0)
5200 iv_ca_delta_free (&best_delta);
5201 best_delta = act_delta;
5204 iv_ca_delta_free (&act_delta);
5209 /* Try removing the candidates from the set instead. */
5210 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5212 /* Nothing more we can do. */
5217 iv_ca_delta_commit (data, ivs, best_delta, true);
5218 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5219 iv_ca_delta_free (&best_delta);
5223 /* Attempts to find the optimal set of induction variables. We do simple
5224 greedy heuristic -- we try to replace at most one candidate in the selected
5225 solution and remove the unused ivs while this improves the cost. */
5227 static struct iv_ca *
5228 find_optimal_iv_set (struct ivopts_data *data)
5234 /* Get the initial solution. */
5235 set = get_initial_solution (data);
5238 if (dump_file && (dump_flags & TDF_DETAILS))
5239 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5243 if (dump_file && (dump_flags & TDF_DETAILS))
5245 fprintf (dump_file, "Initial set of candidates:\n");
5246 iv_ca_dump (data, dump_file, set);
5249 while (try_improve_iv_set (data, set))
5251 if (dump_file && (dump_flags & TDF_DETAILS))
5253 fprintf (dump_file, "Improved to:\n");
5254 iv_ca_dump (data, dump_file, set);
5258 if (dump_file && (dump_flags & TDF_DETAILS))
5260 comp_cost cost = iv_ca_cost (set);
5261 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5264 for (i = 0; i < n_iv_uses (data); i++)
5266 use = iv_use (data, i);
5267 use->selected = iv_ca_cand_for_use (set, use)->cand;
5273 /* Creates a new induction variable corresponding to CAND. */
5276 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5278 gimple_stmt_iterator incr_pos;
5288 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5292 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5300 incr_pos = gsi_for_stmt (cand->incremented_at);
5304 /* Mark that the iv is preserved. */
5305 name_info (data, cand->var_before)->preserve_biv = true;
5306 name_info (data, cand->var_after)->preserve_biv = true;
5308 /* Rewrite the increment so that it uses var_before directly. */
5309 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5314 gimple_add_tmp_var (cand->var_before);
5315 add_referenced_var (cand->var_before);
5317 base = unshare_expr (cand->iv->base);
5319 create_iv (base, unshare_expr (cand->iv->step),
5320 cand->var_before, data->current_loop,
5321 &incr_pos, after, &cand->var_before, &cand->var_after);
5324 /* Creates new induction variables described in SET. */
5327 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5330 struct iv_cand *cand;
5333 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5335 cand = iv_cand (data, i);
5336 create_new_iv (data, cand);
5341 /* Rewrites USE (definition of iv used in a nonlinear expression)
5342 using candidate CAND. */
5345 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5346 struct iv_use *use, struct iv_cand *cand)
5351 gimple_stmt_iterator bsi;
5353 /* An important special case -- if we are asked to express value of
5354 the original iv by itself, just exit; there is no need to
5355 introduce a new computation (that might also need casting the
5356 variable to unsigned and back). */
5357 if (cand->pos == IP_ORIGINAL
5358 && cand->incremented_at == use->stmt)
5360 tree step, ctype, utype;
5361 enum tree_code incr_code = PLUS_EXPR, old_code;
5363 gcc_assert (is_gimple_assign (use->stmt));
5364 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5366 step = cand->iv->step;
5367 ctype = TREE_TYPE (step);
5368 utype = TREE_TYPE (cand->var_after);
5369 if (TREE_CODE (step) == NEGATE_EXPR)
5371 incr_code = MINUS_EXPR;
5372 step = TREE_OPERAND (step, 0);
5375 /* Check whether we may leave the computation unchanged.
5376 This is the case only if it does not rely on other
5377 computations in the loop -- otherwise, the computation
5378 we rely upon may be removed in remove_unused_ivs,
5379 thus leading to ICE. */
5380 old_code = gimple_assign_rhs_code (use->stmt);
5381 if (old_code == PLUS_EXPR
5382 || old_code == MINUS_EXPR
5383 || old_code == POINTER_PLUS_EXPR)
5385 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5386 op = gimple_assign_rhs2 (use->stmt);
5387 else if (old_code != MINUS_EXPR
5388 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5389 op = gimple_assign_rhs1 (use->stmt);
5397 && (TREE_CODE (op) == INTEGER_CST
5398 || operand_equal_p (op, step, 0)))
5401 /* Otherwise, add the necessary computations to express
5403 op = fold_convert (ctype, cand->var_before);
5404 comp = fold_convert (utype,
5405 build2 (incr_code, ctype, op,
5406 unshare_expr (step)));
5410 comp = get_computation (data->current_loop, use, cand);
5411 gcc_assert (comp != NULL_TREE);
5414 switch (gimple_code (use->stmt))
5417 tgt = PHI_RESULT (use->stmt);
5419 /* If we should keep the biv, do not replace it. */
5420 if (name_info (data, tgt)->preserve_biv)
5423 bsi = gsi_after_labels (gimple_bb (use->stmt));
5427 tgt = gimple_assign_lhs (use->stmt);
5428 bsi = gsi_for_stmt (use->stmt);
5435 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5436 true, GSI_SAME_STMT);
5438 if (gimple_code (use->stmt) == GIMPLE_PHI)
5440 ass = gimple_build_assign (tgt, op);
5441 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5443 bsi = gsi_for_stmt (use->stmt);
5444 remove_phi_node (&bsi, false);
5448 gimple_assign_set_rhs_from_tree (&bsi, op);
5449 use->stmt = gsi_stmt (bsi);
5453 /* Replaces ssa name in index IDX by its basic variable. Callback for
5457 idx_remove_ssa_names (tree base, tree *idx,
5458 void *data ATTRIBUTE_UNUSED)
5462 if (TREE_CODE (*idx) == SSA_NAME)
5463 *idx = SSA_NAME_VAR (*idx);
5465 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5467 op = &TREE_OPERAND (base, 2);
5469 && TREE_CODE (*op) == SSA_NAME)
5470 *op = SSA_NAME_VAR (*op);
5471 op = &TREE_OPERAND (base, 3);
5473 && TREE_CODE (*op) == SSA_NAME)
5474 *op = SSA_NAME_VAR (*op);
5480 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5483 unshare_and_remove_ssa_names (tree ref)
5485 ref = unshare_expr (ref);
5486 for_each_index (&ref, idx_remove_ssa_names, NULL);
5491 /* Copies the reference information from OLD_REF to NEW_REF. */
5494 copy_ref_info (tree new_ref, tree old_ref)
5496 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5497 copy_mem_ref_info (new_ref, old_ref);
5499 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5502 /* Rewrites USE (address that is an iv) using candidate CAND. */
5505 rewrite_use_address (struct ivopts_data *data,
5506 struct iv_use *use, struct iv_cand *cand)
5509 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5513 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5515 unshare_aff_combination (&aff);
5517 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5518 copy_ref_info (ref, *use->op_p);
5522 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5526 rewrite_use_compare (struct ivopts_data *data,
5527 struct iv_use *use, struct iv_cand *cand)
5529 tree comp, *var_p, op, bound;
5530 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5531 enum tree_code compare;
5532 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5538 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5539 tree var_type = TREE_TYPE (var);
5542 compare = iv_elimination_compare (data, use);
5543 bound = unshare_expr (fold_convert (var_type, bound));
5544 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5546 gsi_insert_seq_on_edge_immediate (
5547 loop_preheader_edge (data->current_loop),
5550 gimple_cond_set_lhs (use->stmt, var);
5551 gimple_cond_set_code (use->stmt, compare);
5552 gimple_cond_set_rhs (use->stmt, op);
5556 /* The induction variable elimination failed; just express the original
5558 comp = get_computation (data->current_loop, use, cand);
5559 gcc_assert (comp != NULL_TREE);
5561 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5564 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5565 true, GSI_SAME_STMT);
5568 /* Rewrites USE using candidate CAND. */
5571 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5575 case USE_NONLINEAR_EXPR:
5576 rewrite_use_nonlinear_expr (data, use, cand);
5580 rewrite_use_address (data, use, cand);
5584 rewrite_use_compare (data, use, cand);
5591 update_stmt (use->stmt);
5594 /* Rewrite the uses using the selected induction variables. */
5597 rewrite_uses (struct ivopts_data *data)
5600 struct iv_cand *cand;
5603 for (i = 0; i < n_iv_uses (data); i++)
5605 use = iv_use (data, i);
5606 cand = use->selected;
5609 rewrite_use (data, use, cand);
5613 /* Removes the ivs that are not used after rewriting. */
5616 remove_unused_ivs (struct ivopts_data *data)
5620 bitmap toremove = BITMAP_ALLOC (NULL);
5622 /* Figure out an order in which to release SSA DEFs so that we don't
5623 release something that we'd have to propagate into a debug stmt
5625 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5627 struct version_info *info;
5629 info = ver_info (data, j);
5631 && !integer_zerop (info->iv->step)
5633 && !info->iv->have_use_for
5634 && !info->preserve_biv)
5635 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5638 release_defs_bitset (toremove);
5640 BITMAP_FREE (toremove);
5643 /* Frees data allocated by the optimization of a single loop. */
5646 free_loop_data (struct ivopts_data *data)
5654 pointer_map_destroy (data->niters);
5655 data->niters = NULL;
5658 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5660 struct version_info *info;
5662 info = ver_info (data, i);
5666 info->has_nonlin_use = false;
5667 info->preserve_biv = false;
5670 bitmap_clear (data->relevant);
5671 bitmap_clear (data->important_candidates);
5673 for (i = 0; i < n_iv_uses (data); i++)
5675 struct iv_use *use = iv_use (data, i);
5678 BITMAP_FREE (use->related_cands);
5679 for (j = 0; j < use->n_map_members; j++)
5680 if (use->cost_map[j].depends_on)
5681 BITMAP_FREE (use->cost_map[j].depends_on);
5682 free (use->cost_map);
5685 VEC_truncate (iv_use_p, data->iv_uses, 0);
5687 for (i = 0; i < n_iv_cands (data); i++)
5689 struct iv_cand *cand = iv_cand (data, i);
5693 if (cand->depends_on)
5694 BITMAP_FREE (cand->depends_on);
5697 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5699 if (data->version_info_size < num_ssa_names)
5701 data->version_info_size = 2 * num_ssa_names;
5702 free (data->version_info);
5703 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5706 data->max_inv_id = 0;
5708 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5709 SET_DECL_RTL (obj, NULL_RTX);
5711 VEC_truncate (tree, decl_rtl_to_reset, 0);
5714 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5718 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5720 free_loop_data (data);
5721 free (data->version_info);
5722 BITMAP_FREE (data->relevant);
5723 BITMAP_FREE (data->important_candidates);
5725 VEC_free (tree, heap, decl_rtl_to_reset);
5726 VEC_free (iv_use_p, heap, data->iv_uses);
5727 VEC_free (iv_cand_p, heap, data->iv_candidates);
5730 /* Optimizes the LOOP. Returns true if anything changed. */
5733 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5735 bool changed = false;
5736 struct iv_ca *iv_ca;
5740 gcc_assert (!data->niters);
5741 data->current_loop = loop;
5742 data->speed = optimize_loop_for_speed_p (loop);
5744 if (dump_file && (dump_flags & TDF_DETAILS))
5746 fprintf (dump_file, "Processing loop %d\n", loop->num);
5748 exit = single_dom_exit (loop);
5751 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5752 exit->src->index, exit->dest->index);
5753 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5754 fprintf (dump_file, "\n");
5757 fprintf (dump_file, "\n");
5760 body = get_loop_body (loop);
5761 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5764 /* For each ssa name determines whether it behaves as an induction variable
5766 if (!find_induction_variables (data))
5769 /* Finds interesting uses (item 1). */
5770 find_interesting_uses (data);
5771 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5774 /* Finds candidates for the induction variables (item 2). */
5775 find_iv_candidates (data);
5777 /* Calculates the costs (item 3, part 1). */
5778 determine_iv_costs (data);
5779 determine_use_iv_costs (data);
5780 determine_set_costs (data);
5782 /* Find the optimal set of induction variables (item 3, part 2). */
5783 iv_ca = find_optimal_iv_set (data);
5788 /* Create the new induction variables (item 4, part 1). */
5789 create_new_ivs (data, iv_ca);
5790 iv_ca_free (&iv_ca);
5792 /* Rewrite the uses (item 4, part 2). */
5793 rewrite_uses (data);
5795 /* Remove the ivs that are unused after rewriting. */
5796 remove_unused_ivs (data);
5798 /* We have changed the structure of induction variables; it might happen
5799 that definitions in the scev database refer to some of them that were
5804 free_loop_data (data);
5809 /* Main entry point. Optimizes induction variables in loops. */
5812 tree_ssa_iv_optimize (void)
5815 struct ivopts_data data;
5818 tree_ssa_iv_optimize_init (&data);
5820 /* Optimize the loops starting with the innermost ones. */
5821 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5823 if (dump_file && (dump_flags & TDF_DETAILS))
5824 flow_loop_dump (loop, dump_file, NULL, 1);
5826 tree_ssa_iv_optimize_loop (&data, loop);
5829 tree_ssa_iv_optimize_finalize (&data);