1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
127 unsigned regs_used; /* Number of registers used. */
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use *iv_use_p;
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
210 /* The currently optimized loop. */
211 struct loop *current_loop;
213 /* Numbers of iterations for all exits of the current loop. */
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
222 /* The bitmap of indices in version_info whose value was changed. */
225 /* The maximum invariant id. */
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
237 /* Whether to consider just related and important candidates when replacing a
239 bool consider_all_candidates;
242 /* An assignment of iv candidates to uses. */
246 /* The number of uses covered by the assignment. */
249 /* Number of uses that cannot be expressed by the candidates in the set. */
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
258 /* The candidates used. */
261 /* The number of candidates in the set. */
264 /* Total number of registers needed. */
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
270 /* Total cost of candidates. */
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
276 /* Total cost of the assignment. */
280 /* Difference of two iv candidate assignments. */
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
290 /* A new assignment. */
291 struct cost_pair *new_cp;
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
317 static VEC(tree,heap) *decl_rtl_to_reset;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
324 return VEC_length (iv_use_p, data->iv_uses);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
332 return VEC_index (iv_use_p, data->iv_uses, i);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
340 return VEC_length (iv_cand_p, data->iv_candidates);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
351 /* The data for LOOP. */
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
362 single_dom_exit (struct loop *loop)
364 edge exit = loop->single_exit;
369 if (!just_once_each_iteration_p (loop, exit->src))
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv *);
379 dump_iv (FILE *file, struct iv *iv)
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
417 fprintf (file, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use *);
424 dump_use (FILE *file, struct iv_use *use)
426 fprintf (file, "use %d\n", use->id);
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
435 fprintf (file, " outside\n");
439 fprintf (file, " address\n");
443 fprintf (file, " compare\n");
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
472 dump_uses (FILE *file, struct ivopts_data *data)
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
505 fprintf (file, " final value replacement\n");
512 fprintf (file, " incremented before exit test\n");
516 fprintf (file, " incremented at end\n");
520 fprintf (file, " original biv\n");
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
568 /* If b is still even, a is odd and there is no such x. */
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
576 for (i = 0; i < bits - 1; i++)
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
582 val = (a * inv) & mask;
584 gcc_assert (((val * b) & mask) == a);
586 if ((val >> (bits - 1)) & 1)
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
604 if (sbb == loop->latch)
610 return stmt == last_stmt (bb);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
626 if (stmt_bb != cand_bb)
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
633 if (bsi_stmt (bsi) == cand->incremented_at)
635 if (bsi_stmt (bsi) == stmt)
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
652 return stmt_after_ip_normal_pos (loop, stmt);
655 return stmt_after_ip_original_pos (cand, stmt);
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
667 /* The edge for that the number of iterations is cached. */
670 /* True if the # of iterations was successfully determined. */
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
677 /* Hash function for nfe_cache_elt E. */
680 nfe_hash (const void *e)
682 const struct nfe_cache_elt *elt = e;
684 return htab_hash_pointer (elt->exit);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
690 nfe_eq (const void *e1, const void *e2)
692 const struct nfe_cache_elt *elt1 = e1;
694 return elt1->exit == e2;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
703 struct nfe_cache_elt *nfe_desc;
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter,
722 if (!nfe_desc->valid_p)
725 return &nfe_desc->niter;
728 /* Returns structure describing number of iterations determined from
729 single dominating exit of DATA->current_loop, or NULL if something
732 static struct tree_niter_desc *
733 niter_for_single_dom_exit (struct ivopts_data *data)
735 edge exit = single_dom_exit (data->current_loop);
740 return niter_for_exit (data, exit);
743 /* Initializes data structures used by the iv optimization pass, stored
744 in DATA. LOOPS is the loop tree. */
747 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
751 data->version_info_size = 2 * num_ssa_names;
752 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
753 data->relevant = BITMAP_ALLOC (NULL);
754 data->important_candidates = BITMAP_ALLOC (NULL);
755 data->max_inv_id = 0;
756 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
758 for (i = 1; i < loops->num; i++)
759 if (loops->parray[i])
760 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
767 /* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
771 determine_base_object (tree expr)
773 enum tree_code code = TREE_CODE (expr);
774 tree base, obj, op0, op1;
776 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
785 obj = TREE_OPERAND (expr, 0);
786 base = get_base_address (obj);
791 if (TREE_CODE (base) == INDIRECT_REF)
792 return determine_base_object (TREE_OPERAND (base, 0));
794 return fold_convert (ptr_type_node,
795 build_fold_addr_expr (base));
799 op0 = determine_base_object (TREE_OPERAND (expr, 0));
800 op1 = determine_base_object (TREE_OPERAND (expr, 1));
806 return (code == PLUS_EXPR
808 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
810 return fold_build2 (code, ptr_type_node, op0, op1);
814 return determine_base_object (TREE_OPERAND (expr, 0));
817 return fold_convert (ptr_type_node, expr);
821 /* Allocates an induction variable with given initial value BASE and step STEP
825 alloc_iv (tree base, tree step)
827 struct iv *iv = XCNEW (struct iv);
829 if (step && integer_zerop (step))
833 iv->base_object = determine_base_object (base);
836 iv->have_use_for = false;
838 iv->ssa_name = NULL_TREE;
843 /* Sets STEP and BASE for induction variable IV. */
846 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
848 struct version_info *info = name_info (data, iv);
850 gcc_assert (!info->iv);
852 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
853 info->iv = alloc_iv (base, step);
854 info->iv->ssa_name = iv;
857 /* Finds induction variable declaration for VAR. */
860 get_iv (struct ivopts_data *data, tree var)
864 if (!name_info (data, var)->iv)
866 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
869 || !flow_bb_inside_loop_p (data->current_loop, bb))
870 set_iv (data, var, var, NULL_TREE);
873 return name_info (data, var)->iv;
876 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
877 not define a simple affine biv with nonzero step. */
880 determine_biv_step (tree phi)
882 struct loop *loop = bb_for_stmt (phi)->loop_father;
883 tree name = PHI_RESULT (phi);
886 if (!is_gimple_reg (name))
889 if (!simple_iv (loop, phi, name, &iv, true))
892 return (zero_p (iv.step) ? NULL_TREE : iv.step);
895 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
898 abnormal_ssa_name_p (tree exp)
903 if (TREE_CODE (exp) != SSA_NAME)
906 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
909 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
910 abnormal phi node. Callback for for_each_index. */
913 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
914 void *data ATTRIBUTE_UNUSED)
916 if (TREE_CODE (base) == ARRAY_REF)
918 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
920 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
924 return !abnormal_ssa_name_p (*index);
927 /* Returns true if EXPR contains a ssa name that occurs in an
928 abnormal phi node. */
931 contains_abnormal_ssa_name_p (tree expr)
934 enum tree_code_class class;
939 code = TREE_CODE (expr);
940 class = TREE_CODE_CLASS (code);
942 if (code == SSA_NAME)
943 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
945 if (code == INTEGER_CST
946 || is_gimple_min_invariant (expr))
949 if (code == ADDR_EXPR)
950 return !for_each_index (&TREE_OPERAND (expr, 0),
951 idx_contains_abnormal_ssa_name_p,
958 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
963 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
975 /* Finds basic ivs. */
978 find_bivs (struct ivopts_data *data)
980 tree phi, step, type, base;
982 struct loop *loop = data->current_loop;
984 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
986 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
989 step = determine_biv_step (phi);
993 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
994 base = expand_simple_operations (base);
995 if (contains_abnormal_ssa_name_p (base)
996 || contains_abnormal_ssa_name_p (step))
999 type = TREE_TYPE (PHI_RESULT (phi));
1000 base = fold_convert (type, base);
1002 step = fold_convert (type, step);
1004 set_iv (data, PHI_RESULT (phi), base, step);
1011 /* Marks basic ivs. */
1014 mark_bivs (struct ivopts_data *data)
1017 struct iv *iv, *incr_iv;
1018 struct loop *loop = data->current_loop;
1019 basic_block incr_bb;
1021 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1023 iv = get_iv (data, PHI_RESULT (phi));
1027 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1028 incr_iv = get_iv (data, var);
1032 /* If the increment is in the subloop, ignore it. */
1033 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1034 if (incr_bb->loop_father != data->current_loop
1035 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1039 incr_iv->biv_p = true;
1043 /* Checks whether STMT defines a linear induction variable and stores its
1044 parameters to IV. */
1047 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1050 struct loop *loop = data->current_loop;
1052 iv->base = NULL_TREE;
1053 iv->step = NULL_TREE;
1055 if (TREE_CODE (stmt) != MODIFY_EXPR)
1058 lhs = TREE_OPERAND (stmt, 0);
1059 if (TREE_CODE (lhs) != SSA_NAME)
1062 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1064 iv->base = expand_simple_operations (iv->base);
1066 if (contains_abnormal_ssa_name_p (iv->base)
1067 || contains_abnormal_ssa_name_p (iv->step))
1073 /* Finds general ivs in statement STMT. */
1076 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1080 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1083 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1086 /* Finds general ivs in basic block BB. */
1089 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1091 block_stmt_iterator bsi;
1093 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1094 find_givs_in_stmt (data, bsi_stmt (bsi));
1097 /* Finds general ivs. */
1100 find_givs (struct ivopts_data *data)
1102 struct loop *loop = data->current_loop;
1103 basic_block *body = get_loop_body_in_dom_order (loop);
1106 for (i = 0; i < loop->num_nodes; i++)
1107 find_givs_in_bb (data, body[i]);
1111 /* For each ssa name defined in LOOP determines whether it is an induction
1112 variable and if so, its initial value and step. */
1115 find_induction_variables (struct ivopts_data *data)
1120 if (!find_bivs (data))
1126 if (dump_file && (dump_flags & TDF_DETAILS))
1128 struct tree_niter_desc *niter;
1130 niter = niter_for_single_dom_exit (data);
1134 fprintf (dump_file, " number of iterations ");
1135 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1136 fprintf (dump_file, "\n");
1138 fprintf (dump_file, " may be zero if ");
1139 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1140 fprintf (dump_file, "\n");
1141 fprintf (dump_file, "\n");
1144 fprintf (dump_file, "Induction variables:\n\n");
1146 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1148 if (ver_info (data, i)->iv)
1149 dump_iv (dump_file, ver_info (data, i)->iv);
1156 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1158 static struct iv_use *
1159 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1160 tree stmt, enum use_type use_type)
1162 struct iv_use *use = XCNEW (struct iv_use);
1164 use->id = n_iv_uses (data);
1165 use->type = use_type;
1169 use->related_cands = BITMAP_ALLOC (NULL);
1171 /* To avoid showing ssa name in the dumps, if it was not reset by the
1173 iv->ssa_name = NULL_TREE;
1175 if (dump_file && (dump_flags & TDF_DETAILS))
1176 dump_use (dump_file, use);
1178 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1183 /* Checks whether OP is a loop-level invariant and if so, records it.
1184 NONLINEAR_USE is true if the invariant is used in a way we do not
1185 handle specially. */
1188 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1191 struct version_info *info;
1193 if (TREE_CODE (op) != SSA_NAME
1194 || !is_gimple_reg (op))
1197 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1199 && flow_bb_inside_loop_p (data->current_loop, bb))
1202 info = name_info (data, op);
1204 info->has_nonlin_use |= nonlinear_use;
1206 info->inv_id = ++data->max_inv_id;
1207 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1210 /* Checks whether the use OP is interesting and if so, records it
1213 static struct iv_use *
1214 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1222 if (TREE_CODE (op) != SSA_NAME)
1225 iv = get_iv (data, op);
1229 if (iv->have_use_for)
1231 use = iv_use (data, iv->use_id);
1233 gcc_assert (use->type == USE_NONLINEAR_EXPR
1234 || use->type == USE_OUTER);
1236 if (type == USE_NONLINEAR_EXPR)
1237 use->type = USE_NONLINEAR_EXPR;
1241 if (zero_p (iv->step))
1243 record_invariant (data, op, true);
1246 iv->have_use_for = true;
1248 civ = XNEW (struct iv);
1251 stmt = SSA_NAME_DEF_STMT (op);
1252 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1253 || TREE_CODE (stmt) == MODIFY_EXPR);
1255 use = record_use (data, NULL, civ, stmt, type);
1256 iv->use_id = use->id;
1261 /* Checks whether the use OP is interesting and if so, records it. */
1263 static struct iv_use *
1264 find_interesting_uses_op (struct ivopts_data *data, tree op)
1266 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1269 /* Records a definition of induction variable OP that is used outside of the
1272 static struct iv_use *
1273 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1275 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1278 /* Checks whether the condition *COND_P in STMT is interesting
1279 and if so, records it. */
1282 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1286 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1288 tree zero = integer_zero_node;
1290 const_iv.step = NULL_TREE;
1292 if (TREE_CODE (*cond_p) != SSA_NAME
1293 && !COMPARISON_CLASS_P (*cond_p))
1296 if (TREE_CODE (*cond_p) == SSA_NAME)
1303 op0_p = &TREE_OPERAND (*cond_p, 0);
1304 op1_p = &TREE_OPERAND (*cond_p, 1);
1307 if (TREE_CODE (*op0_p) == SSA_NAME)
1308 iv0 = get_iv (data, *op0_p);
1312 if (TREE_CODE (*op1_p) == SSA_NAME)
1313 iv1 = get_iv (data, *op1_p);
1317 if (/* When comparing with non-invariant value, we may not do any senseful
1318 induction variable elimination. */
1320 /* Eliminating condition based on two ivs would be nontrivial.
1321 ??? TODO -- it is not really important to handle this case. */
1322 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1324 find_interesting_uses_op (data, *op0_p);
1325 find_interesting_uses_op (data, *op1_p);
1329 if (zero_p (iv0->step) && zero_p (iv1->step))
1331 /* If both are invariants, this is a work for unswitching. */
1335 civ = XNEW (struct iv);
1336 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1337 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1340 /* Returns true if expression EXPR is obviously invariant in LOOP,
1341 i.e. if all its operands are defined outside of the LOOP. */
1344 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1349 if (is_gimple_min_invariant (expr))
1352 if (TREE_CODE (expr) == SSA_NAME)
1354 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1356 && flow_bb_inside_loop_p (loop, def_bb))
1365 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1366 for (i = 0; i < len; i++)
1367 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1373 /* Cumulates the steps of indices into DATA and replaces their values with the
1374 initial ones. Returns false when the value of the index cannot be determined.
1375 Callback for for_each_index. */
1377 struct ifs_ivopts_data
1379 struct ivopts_data *ivopts_data;
1385 idx_find_step (tree base, tree *idx, void *data)
1387 struct ifs_ivopts_data *dta = data;
1389 tree step, iv_step, lbound, off;
1390 struct loop *loop = dta->ivopts_data->current_loop;
1392 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1393 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1396 /* If base is a component ref, require that the offset of the reference
1398 if (TREE_CODE (base) == COMPONENT_REF)
1400 off = component_ref_field_offset (base);
1401 return expr_invariant_in_loop_p (loop, off);
1404 /* If base is array, first check whether we will be able to move the
1405 reference out of the loop (in order to take its address in strength
1406 reduction). In order for this to work we need both lower bound
1407 and step to be loop invariants. */
1408 if (TREE_CODE (base) == ARRAY_REF)
1410 step = array_ref_element_size (base);
1411 lbound = array_ref_low_bound (base);
1413 if (!expr_invariant_in_loop_p (loop, step)
1414 || !expr_invariant_in_loop_p (loop, lbound))
1418 if (TREE_CODE (*idx) != SSA_NAME)
1421 iv = get_iv (dta->ivopts_data, *idx);
1430 if (TREE_CODE (base) == ARRAY_REF)
1432 step = array_ref_element_size (base);
1434 /* We only handle addresses whose step is an integer constant. */
1435 if (TREE_CODE (step) != INTEGER_CST)
1439 /* The step for pointer arithmetics already is 1 byte. */
1440 step = build_int_cst (sizetype, 1);
1442 /* FIXME: convert_step should not be used outside chrec_convert: fix
1443 this by calling chrec_convert. */
1444 iv_step = convert_step (dta->ivopts_data->current_loop,
1445 sizetype, iv->base, iv->step, dta->stmt);
1449 /* The index might wrap. */
1453 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1456 *dta->step_p = step;
1458 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1463 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1464 object is passed to it in DATA. */
1467 idx_record_use (tree base, tree *idx,
1470 find_interesting_uses_op (data, *idx);
1471 if (TREE_CODE (base) == ARRAY_REF)
1473 find_interesting_uses_op (data, array_ref_element_size (base));
1474 find_interesting_uses_op (data, array_ref_low_bound (base));
1479 /* Returns true if memory reference REF may be unaligned. */
1482 may_be_unaligned_p (tree ref)
1486 HOST_WIDE_INT bitsize;
1487 HOST_WIDE_INT bitpos;
1489 enum machine_mode mode;
1490 int unsignedp, volatilep;
1491 unsigned base_align;
1493 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1494 thus they are not misaligned. */
1495 if (TREE_CODE (ref) == TARGET_MEM_REF)
1498 /* The test below is basically copy of what expr.c:normal_inner_ref
1499 does to check whether the object must be loaded by parts when
1500 STRICT_ALIGNMENT is true. */
1501 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1502 &unsignedp, &volatilep, true);
1503 base_type = TREE_TYPE (base);
1504 base_align = TYPE_ALIGN (base_type);
1507 && (base_align < GET_MODE_ALIGNMENT (mode)
1508 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1509 || bitpos % BITS_PER_UNIT != 0))
1515 /* Finds addresses in *OP_P inside STMT. */
1518 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1520 tree base = *op_p, step = NULL;
1522 struct ifs_ivopts_data ifs_ivopts_data;
1524 /* Do not play with volatile memory references. A bit too conservative,
1525 perhaps, but safe. */
1526 if (stmt_ann (stmt)->has_volatile_ops)
1529 /* Ignore bitfields for now. Not really something terribly complicated
1531 if (TREE_CODE (base) == COMPONENT_REF
1532 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1535 if (STRICT_ALIGNMENT
1536 && may_be_unaligned_p (base))
1539 base = unshare_expr (base);
1541 if (TREE_CODE (base) == TARGET_MEM_REF)
1543 tree type = build_pointer_type (TREE_TYPE (base));
1547 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1549 civ = get_iv (data, TMR_BASE (base));
1553 TMR_BASE (base) = civ->base;
1556 if (TMR_INDEX (base)
1557 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1559 civ = get_iv (data, TMR_INDEX (base));
1563 TMR_INDEX (base) = civ->base;
1568 if (TMR_STEP (base))
1569 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1572 step = fold_build2 (PLUS_EXPR, type, step, astep);
1580 base = tree_mem_ref_addr (type, base);
1584 ifs_ivopts_data.ivopts_data = data;
1585 ifs_ivopts_data.stmt = stmt;
1586 ifs_ivopts_data.step_p = &step;
1587 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1591 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1592 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1594 base = build_fold_addr_expr (base);
1597 civ = alloc_iv (base, step);
1598 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1602 for_each_index (op_p, idx_record_use, data);
1605 /* Finds and records invariants used in STMT. */
1608 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1611 use_operand_p use_p;
1614 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1616 op = USE_FROM_PTR (use_p);
1617 record_invariant (data, op, false);
1621 /* Finds interesting uses of induction variables in the statement STMT. */
1624 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1629 use_operand_p use_p;
1631 find_invariants_stmt (data, stmt);
1633 if (TREE_CODE (stmt) == COND_EXPR)
1635 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1639 if (TREE_CODE (stmt) == MODIFY_EXPR)
1641 lhs = TREE_OPERAND (stmt, 0);
1642 rhs = TREE_OPERAND (stmt, 1);
1644 if (TREE_CODE (lhs) == SSA_NAME)
1646 /* If the statement defines an induction variable, the uses are not
1647 interesting by themselves. */
1649 iv = get_iv (data, lhs);
1651 if (iv && !zero_p (iv->step))
1655 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1657 case tcc_comparison:
1658 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1662 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1663 if (REFERENCE_CLASS_P (lhs))
1664 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1670 if (REFERENCE_CLASS_P (lhs)
1671 && is_gimple_val (rhs))
1673 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1674 find_interesting_uses_op (data, rhs);
1678 /* TODO -- we should also handle address uses of type
1680 memory = call (whatever);
1687 if (TREE_CODE (stmt) == PHI_NODE
1688 && bb_for_stmt (stmt) == data->current_loop->header)
1690 lhs = PHI_RESULT (stmt);
1691 iv = get_iv (data, lhs);
1693 if (iv && !zero_p (iv->step))
1697 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1699 op = USE_FROM_PTR (use_p);
1701 if (TREE_CODE (op) != SSA_NAME)
1704 iv = get_iv (data, op);
1708 find_interesting_uses_op (data, op);
1712 /* Finds interesting uses of induction variables outside of loops
1713 on loop exit edge EXIT. */
1716 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1720 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1722 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1723 find_interesting_uses_outer (data, def);
1727 /* Finds uses of the induction variables that are interesting. */
1730 find_interesting_uses (struct ivopts_data *data)
1733 block_stmt_iterator bsi;
1735 basic_block *body = get_loop_body (data->current_loop);
1737 struct version_info *info;
1740 if (dump_file && (dump_flags & TDF_DETAILS))
1741 fprintf (dump_file, "Uses:\n\n");
1743 for (i = 0; i < data->current_loop->num_nodes; i++)
1748 FOR_EACH_EDGE (e, ei, bb->succs)
1749 if (e->dest != EXIT_BLOCK_PTR
1750 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1751 find_interesting_uses_outside (data, e);
1753 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1754 find_interesting_uses_stmt (data, phi);
1755 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1756 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1759 if (dump_file && (dump_flags & TDF_DETAILS))
1763 fprintf (dump_file, "\n");
1765 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1767 info = ver_info (data, i);
1770 fprintf (dump_file, " ");
1771 print_generic_expr (dump_file, info->name, TDF_SLIM);
1772 fprintf (dump_file, " is invariant (%d)%s\n",
1773 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1777 fprintf (dump_file, "\n");
1783 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1784 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1785 we are at the top-level of the processed address. */
1788 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1789 unsigned HOST_WIDE_INT *offset)
1791 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1792 enum tree_code code;
1793 tree type, orig_type = TREE_TYPE (expr);
1794 unsigned HOST_WIDE_INT off0, off1, st;
1795 tree orig_expr = expr;
1799 type = TREE_TYPE (expr);
1800 code = TREE_CODE (expr);
1806 if (!cst_and_fits_in_hwi (expr)
1810 *offset = int_cst_value (expr);
1811 return build_int_cst_type (orig_type, 0);
1815 op0 = TREE_OPERAND (expr, 0);
1816 op1 = TREE_OPERAND (expr, 1);
1818 op0 = strip_offset_1 (op0, false, false, &off0);
1819 op1 = strip_offset_1 (op1, false, false, &off1);
1821 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1822 if (op0 == TREE_OPERAND (expr, 0)
1823 && op1 == TREE_OPERAND (expr, 1))
1828 else if (zero_p (op0))
1830 if (code == PLUS_EXPR)
1833 expr = fold_build1 (NEGATE_EXPR, type, op1);
1836 expr = fold_build2 (code, type, op0, op1);
1838 return fold_convert (orig_type, expr);
1844 step = array_ref_element_size (expr);
1845 if (!cst_and_fits_in_hwi (step))
1848 st = int_cst_value (step);
1849 op1 = TREE_OPERAND (expr, 1);
1850 op1 = strip_offset_1 (op1, false, false, &off1);
1851 *offset = off1 * st;
1856 /* Strip the component reference completely. */
1857 op0 = TREE_OPERAND (expr, 0);
1858 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1868 tmp = component_ref_field_offset (expr);
1870 && cst_and_fits_in_hwi (tmp))
1872 /* Strip the component reference completely. */
1873 op0 = TREE_OPERAND (expr, 0);
1874 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1875 *offset = off0 + int_cst_value (tmp);
1881 op0 = TREE_OPERAND (expr, 0);
1882 op0 = strip_offset_1 (op0, true, true, &off0);
1885 if (op0 == TREE_OPERAND (expr, 0))
1888 expr = build_fold_addr_expr (op0);
1889 return fold_convert (orig_type, expr);
1892 inside_addr = false;
1899 /* Default handling of expressions for that we want to recurse into
1900 the first operand. */
1901 op0 = TREE_OPERAND (expr, 0);
1902 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1905 if (op0 == TREE_OPERAND (expr, 0)
1906 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1909 expr = copy_node (expr);
1910 TREE_OPERAND (expr, 0) = op0;
1912 TREE_OPERAND (expr, 1) = op1;
1914 /* Inside address, we might strip the top level component references,
1915 thus changing type of the expression. Handling of ADDR_EXPR
1917 expr = fold_convert (orig_type, expr);
1922 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1925 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1927 return strip_offset_1 (expr, false, false, offset);
1930 /* Returns variant of TYPE that can be used as base for different uses.
1931 For integer types, we return unsigned variant of the type, which
1932 avoids problems with overflows. For pointer types, we return void *. */
1935 generic_type_for (tree type)
1937 if (POINTER_TYPE_P (type))
1938 return ptr_type_node;
1940 if (TYPE_UNSIGNED (type))
1943 return unsigned_type_for (type);
1946 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1947 the bitmap to that we should store it. */
1949 static struct ivopts_data *fd_ivopts_data;
1951 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1953 bitmap *depends_on = data;
1954 struct version_info *info;
1956 if (TREE_CODE (*expr_p) != SSA_NAME)
1958 info = name_info (fd_ivopts_data, *expr_p);
1960 if (!info->inv_id || info->has_nonlin_use)
1964 *depends_on = BITMAP_ALLOC (NULL);
1965 bitmap_set_bit (*depends_on, info->inv_id);
1970 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1971 position to POS. If USE is not NULL, the candidate is set as related to
1972 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1973 replacement of the final value of the iv by a direct computation. */
1975 static struct iv_cand *
1976 add_candidate_1 (struct ivopts_data *data,
1977 tree base, tree step, bool important, enum iv_position pos,
1978 struct iv_use *use, tree incremented_at)
1981 struct iv_cand *cand = NULL;
1982 tree type, orig_type;
1986 orig_type = TREE_TYPE (base);
1987 type = generic_type_for (orig_type);
1988 if (type != orig_type)
1990 base = fold_convert (type, base);
1992 step = fold_convert (type, step);
1996 for (i = 0; i < n_iv_cands (data); i++)
1998 cand = iv_cand (data, i);
2000 if (cand->pos != pos)
2003 if (cand->incremented_at != incremented_at)
2017 if (!operand_equal_p (base, cand->iv->base, 0))
2020 if (zero_p (cand->iv->step))
2027 if (step && operand_equal_p (step, cand->iv->step, 0))
2032 if (i == n_iv_cands (data))
2034 cand = XCNEW (struct iv_cand);
2040 cand->iv = alloc_iv (base, step);
2043 if (pos != IP_ORIGINAL && cand->iv)
2045 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2046 cand->var_after = cand->var_before;
2048 cand->important = important;
2049 cand->incremented_at = incremented_at;
2050 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2053 && TREE_CODE (step) != INTEGER_CST)
2055 fd_ivopts_data = data;
2056 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2059 if (dump_file && (dump_flags & TDF_DETAILS))
2060 dump_cand (dump_file, cand);
2063 if (important && !cand->important)
2065 cand->important = true;
2066 if (dump_file && (dump_flags & TDF_DETAILS))
2067 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2072 bitmap_set_bit (use->related_cands, i);
2073 if (dump_file && (dump_flags & TDF_DETAILS))
2074 fprintf (dump_file, "Candidate %d is related to use %d\n",
2081 /* Returns true if incrementing the induction variable at the end of the LOOP
2084 The purpose is to avoid splitting latch edge with a biv increment, thus
2085 creating a jump, possibly confusing other optimization passes and leaving
2086 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2087 is not available (so we do not have a better alternative), or if the latch
2088 edge is already nonempty. */
2091 allow_ip_end_pos_p (struct loop *loop)
2093 if (!ip_normal_pos (loop))
2096 if (!empty_block_p (ip_end_pos (loop)))
2102 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2103 position to POS. If USE is not NULL, the candidate is set as related to
2104 it. The candidate computation is scheduled on all available positions. */
2107 add_candidate (struct ivopts_data *data,
2108 tree base, tree step, bool important, struct iv_use *use)
2110 if (ip_normal_pos (data->current_loop))
2111 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2112 if (ip_end_pos (data->current_loop)
2113 && allow_ip_end_pos_p (data->current_loop))
2114 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2117 /* Add a standard "0 + 1 * iteration" iv candidate for a
2118 type with SIZE bits. */
2121 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2124 tree type = lang_hooks.types.type_for_size (size, true);
2125 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2129 /* Adds standard iv candidates. */
2132 add_standard_iv_candidates (struct ivopts_data *data)
2134 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2136 /* The same for a double-integer type if it is still fast enough. */
2137 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2138 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2142 /* Adds candidates bases on the old induction variable IV. */
2145 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2148 struct iv_cand *cand;
2150 add_candidate (data, iv->base, iv->step, true, NULL);
2152 /* The same, but with initial value zero. */
2153 add_candidate (data,
2154 build_int_cst (TREE_TYPE (iv->base), 0),
2155 iv->step, true, NULL);
2157 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2158 if (TREE_CODE (phi) == PHI_NODE)
2160 /* Additionally record the possibility of leaving the original iv
2162 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2163 cand = add_candidate_1 (data,
2164 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2165 SSA_NAME_DEF_STMT (def));
2166 cand->var_before = iv->ssa_name;
2167 cand->var_after = def;
2171 /* Adds candidates based on the old induction variables. */
2174 add_old_ivs_candidates (struct ivopts_data *data)
2180 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2182 iv = ver_info (data, i)->iv;
2183 if (iv && iv->biv_p && !zero_p (iv->step))
2184 add_old_iv_candidates (data, iv);
2188 /* Adds candidates based on the value of the induction variable IV and USE. */
2191 add_iv_value_candidates (struct ivopts_data *data,
2192 struct iv *iv, struct iv_use *use)
2194 unsigned HOST_WIDE_INT offset;
2197 add_candidate (data, iv->base, iv->step, false, use);
2199 /* The same, but with initial value zero. Make such variable important,
2200 since it is generic enough so that possibly many uses may be based
2202 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2203 iv->step, true, use);
2205 /* Third, try removing the constant offset. */
2206 base = strip_offset (iv->base, &offset);
2208 add_candidate (data, base, iv->step, false, use);
2211 /* Possibly adds pseudocandidate for replacing the final value of USE by
2212 a direct computation. */
2215 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2217 struct tree_niter_desc *niter;
2219 /* We must know where we exit the loop and how many times does it roll. */
2220 niter = niter_for_single_dom_exit (data);
2222 || !zero_p (niter->may_be_zero))
2225 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2228 /* Adds candidates based on the uses. */
2231 add_derived_ivs_candidates (struct ivopts_data *data)
2235 for (i = 0; i < n_iv_uses (data); i++)
2237 struct iv_use *use = iv_use (data, i);
2244 case USE_NONLINEAR_EXPR:
2247 /* Just add the ivs based on the value of the iv used here. */
2248 add_iv_value_candidates (data, use->iv, use);
2252 add_iv_value_candidates (data, use->iv, use);
2254 /* Additionally, add the pseudocandidate for the possibility to
2255 replace the final value by a direct computation. */
2256 add_iv_outer_candidates (data, use);
2265 /* Record important candidates and add them to related_cands bitmaps
2269 record_important_candidates (struct ivopts_data *data)
2274 for (i = 0; i < n_iv_cands (data); i++)
2276 struct iv_cand *cand = iv_cand (data, i);
2278 if (cand->important)
2279 bitmap_set_bit (data->important_candidates, i);
2282 data->consider_all_candidates = (n_iv_cands (data)
2283 <= CONSIDER_ALL_CANDIDATES_BOUND);
2285 if (data->consider_all_candidates)
2287 /* We will not need "related_cands" bitmaps in this case,
2288 so release them to decrease peak memory consumption. */
2289 for (i = 0; i < n_iv_uses (data); i++)
2291 use = iv_use (data, i);
2292 BITMAP_FREE (use->related_cands);
2297 /* Add important candidates to the related_cands bitmaps. */
2298 for (i = 0; i < n_iv_uses (data); i++)
2299 bitmap_ior_into (iv_use (data, i)->related_cands,
2300 data->important_candidates);
2304 /* Finds the candidates for the induction variables. */
2307 find_iv_candidates (struct ivopts_data *data)
2309 /* Add commonly used ivs. */
2310 add_standard_iv_candidates (data);
2312 /* Add old induction variables. */
2313 add_old_ivs_candidates (data);
2315 /* Add induction variables derived from uses. */
2316 add_derived_ivs_candidates (data);
2318 /* Record the important candidates. */
2319 record_important_candidates (data);
2322 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2323 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2324 we allocate a simple list to every use. */
2327 alloc_use_cost_map (struct ivopts_data *data)
2329 unsigned i, size, s, j;
2331 for (i = 0; i < n_iv_uses (data); i++)
2333 struct iv_use *use = iv_use (data, i);
2336 if (data->consider_all_candidates)
2337 size = n_iv_cands (data);
2341 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2346 /* Round up to the power of two, so that moduling by it is fast. */
2347 for (size = 1; size < s; size <<= 1)
2351 use->n_map_members = size;
2352 use->cost_map = XCNEWVEC (struct cost_pair, size);
2356 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2357 on invariants DEPENDS_ON and that the value used in expressing it
2361 set_use_iv_cost (struct ivopts_data *data,
2362 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2363 bitmap depends_on, tree value)
2369 BITMAP_FREE (depends_on);
2373 if (data->consider_all_candidates)
2375 use->cost_map[cand->id].cand = cand;
2376 use->cost_map[cand->id].cost = cost;
2377 use->cost_map[cand->id].depends_on = depends_on;
2378 use->cost_map[cand->id].value = value;
2382 /* n_map_members is a power of two, so this computes modulo. */
2383 s = cand->id & (use->n_map_members - 1);
2384 for (i = s; i < use->n_map_members; i++)
2385 if (!use->cost_map[i].cand)
2387 for (i = 0; i < s; i++)
2388 if (!use->cost_map[i].cand)
2394 use->cost_map[i].cand = cand;
2395 use->cost_map[i].cost = cost;
2396 use->cost_map[i].depends_on = depends_on;
2397 use->cost_map[i].value = value;
2400 /* Gets cost of (USE, CANDIDATE) pair. */
2402 static struct cost_pair *
2403 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2404 struct iv_cand *cand)
2407 struct cost_pair *ret;
2412 if (data->consider_all_candidates)
2414 ret = use->cost_map + cand->id;
2421 /* n_map_members is a power of two, so this computes modulo. */
2422 s = cand->id & (use->n_map_members - 1);
2423 for (i = s; i < use->n_map_members; i++)
2424 if (use->cost_map[i].cand == cand)
2425 return use->cost_map + i;
2427 for (i = 0; i < s; i++)
2428 if (use->cost_map[i].cand == cand)
2429 return use->cost_map + i;
2434 /* Returns estimate on cost of computing SEQ. */
2442 for (; seq; seq = NEXT_INSN (seq))
2444 set = single_set (seq);
2446 cost += rtx_cost (set, SET);
2454 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2456 produce_memory_decl_rtl (tree obj, int *regno)
2461 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2463 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2464 x = gen_rtx_SYMBOL_REF (Pmode, name);
2467 x = gen_raw_REG (Pmode, (*regno)++);
2469 return gen_rtx_MEM (DECL_MODE (obj), x);
2472 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2473 walk_tree. DATA contains the actual fake register number. */
2476 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2478 tree obj = NULL_TREE;
2482 switch (TREE_CODE (*expr_p))
2485 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2486 handled_component_p (*expr_p);
2487 expr_p = &TREE_OPERAND (*expr_p, 0))
2491 x = produce_memory_decl_rtl (obj, regno);
2496 obj = SSA_NAME_VAR (*expr_p);
2497 if (!DECL_RTL_SET_P (obj))
2498 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2507 if (DECL_RTL_SET_P (obj))
2510 if (DECL_MODE (obj) == BLKmode)
2511 x = produce_memory_decl_rtl (obj, regno);
2513 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2523 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2524 SET_DECL_RTL (obj, x);
2530 /* Determines cost of the computation of EXPR. */
2533 computation_cost (tree expr)
2536 tree type = TREE_TYPE (expr);
2538 /* Avoid using hard regs in ways which may be unsupported. */
2539 int regno = LAST_VIRTUAL_REGISTER + 1;
2541 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
2543 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2547 cost = seq_cost (seq);
2549 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2554 /* Returns variable containing the value of candidate CAND at statement AT. */
2557 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2559 if (stmt_after_increment (loop, cand, stmt))
2560 return cand->var_after;
2562 return cand->var_before;
2565 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2566 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2569 tree_int_cst_sign_bit (tree t)
2571 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2572 unsigned HOST_WIDE_INT w;
2574 if (bitno < HOST_BITS_PER_WIDE_INT)
2575 w = TREE_INT_CST_LOW (t);
2578 w = TREE_INT_CST_HIGH (t);
2579 bitno -= HOST_BITS_PER_WIDE_INT;
2582 return (w >> bitno) & 1;
2585 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2586 return cst. Otherwise return NULL_TREE. */
2589 constant_multiple_of (tree type, tree top, tree bot)
2591 tree res, mby, p0, p1;
2592 enum tree_code code;
2598 if (operand_equal_p (top, bot, 0))
2599 return build_int_cst (type, 1);
2601 code = TREE_CODE (top);
2605 mby = TREE_OPERAND (top, 1);
2606 if (TREE_CODE (mby) != INTEGER_CST)
2609 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2613 return fold_binary_to_constant (MULT_EXPR, type, res,
2614 fold_convert (type, mby));
2618 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2621 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2625 return fold_binary_to_constant (code, type, p0, p1);
2628 if (TREE_CODE (bot) != INTEGER_CST)
2631 bot = fold_convert (type, bot);
2632 top = fold_convert (type, top);
2634 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2635 the result afterwards. */
2636 if (tree_int_cst_sign_bit (bot))
2639 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2644 /* Ditto for TOP. */
2645 if (tree_int_cst_sign_bit (top))
2648 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2651 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2654 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2656 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2664 /* Sets COMB to CST. */
2667 aff_combination_const (struct affine_tree_combination *comb, tree type,
2668 unsigned HOST_WIDE_INT cst)
2670 unsigned prec = TYPE_PRECISION (type);
2673 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2676 comb->rest = NULL_TREE;
2677 comb->offset = cst & comb->mask;
2680 /* Sets COMB to single element ELT. */
2683 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2685 unsigned prec = TYPE_PRECISION (type);
2688 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2691 comb->elts[0] = elt;
2693 comb->rest = NULL_TREE;
2697 /* Scales COMB by SCALE. */
2700 aff_combination_scale (struct affine_tree_combination *comb,
2701 unsigned HOST_WIDE_INT scale)
2710 aff_combination_const (comb, comb->type, 0);
2714 comb->offset = (scale * comb->offset) & comb->mask;
2715 for (i = 0, j = 0; i < comb->n; i++)
2717 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2718 comb->elts[j] = comb->elts[i];
2719 if (comb->coefs[j] != 0)
2726 if (comb->n < MAX_AFF_ELTS)
2728 comb->coefs[comb->n] = scale;
2729 comb->elts[comb->n] = comb->rest;
2730 comb->rest = NULL_TREE;
2734 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2735 build_int_cst_type (comb->type, scale));
2739 /* Adds ELT * SCALE to COMB. */
2742 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2743 unsigned HOST_WIDE_INT scale)
2750 for (i = 0; i < comb->n; i++)
2751 if (operand_equal_p (comb->elts[i], elt, 0))
2753 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2758 comb->coefs[i] = comb->coefs[comb->n];
2759 comb->elts[i] = comb->elts[comb->n];
2763 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2764 comb->coefs[comb->n] = 1;
2765 comb->elts[comb->n] = comb->rest;
2766 comb->rest = NULL_TREE;
2771 if (comb->n < MAX_AFF_ELTS)
2773 comb->coefs[comb->n] = scale;
2774 comb->elts[comb->n] = elt;
2780 elt = fold_convert (comb->type, elt);
2782 elt = fold_build2 (MULT_EXPR, comb->type,
2783 fold_convert (comb->type, elt),
2784 build_int_cst_type (comb->type, scale));
2787 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2792 /* Adds COMB2 to COMB1. */
2795 aff_combination_add (struct affine_tree_combination *comb1,
2796 struct affine_tree_combination *comb2)
2800 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2801 for (i = 0; i < comb2->n; i++)
2802 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2804 aff_combination_add_elt (comb1, comb2->rest, 1);
2807 /* Splits EXPR into an affine combination of parts. */
2810 tree_to_aff_combination (tree expr, tree type,
2811 struct affine_tree_combination *comb)
2813 struct affine_tree_combination tmp;
2814 enum tree_code code;
2815 tree cst, core, toffset;
2816 HOST_WIDE_INT bitpos, bitsize;
2817 enum machine_mode mode;
2818 int unsignedp, volatilep;
2822 code = TREE_CODE (expr);
2826 aff_combination_const (comb, type, int_cst_value (expr));
2831 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2832 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2833 if (code == MINUS_EXPR)
2834 aff_combination_scale (&tmp, -1);
2835 aff_combination_add (comb, &tmp);
2839 cst = TREE_OPERAND (expr, 1);
2840 if (TREE_CODE (cst) != INTEGER_CST)
2842 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2843 aff_combination_scale (comb, int_cst_value (cst));
2847 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2848 aff_combination_scale (comb, -1);
2852 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2853 &toffset, &mode, &unsignedp, &volatilep,
2855 if (bitpos % BITS_PER_UNIT != 0)
2857 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2858 core = build_fold_addr_expr (core);
2859 if (TREE_CODE (core) == ADDR_EXPR)
2860 aff_combination_add_elt (comb, core, 1);
2863 tree_to_aff_combination (core, type, &tmp);
2864 aff_combination_add (comb, &tmp);
2868 tree_to_aff_combination (toffset, type, &tmp);
2869 aff_combination_add (comb, &tmp);
2877 aff_combination_elt (comb, type, expr);
2880 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2883 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2884 unsigned HOST_WIDE_INT mask)
2886 enum tree_code code;
2889 elt = fold_convert (type, elt);
2896 return fold_build2 (PLUS_EXPR, type, expr, elt);
2902 return fold_build1 (NEGATE_EXPR, type, elt);
2904 return fold_build2 (MINUS_EXPR, type, expr, elt);
2908 return fold_build2 (MULT_EXPR, type, elt,
2909 build_int_cst_type (type, scale));
2911 if ((scale | (mask >> 1)) == mask)
2913 /* Scale is negative. */
2915 scale = (-scale) & mask;
2920 elt = fold_build2 (MULT_EXPR, type, elt,
2921 build_int_cst_type (type, scale));
2922 return fold_build2 (code, type, expr, elt);
2925 /* Copies the tree elements of COMB to ensure that they are not shared. */
2928 unshare_aff_combination (struct affine_tree_combination *comb)
2932 for (i = 0; i < comb->n; i++)
2933 comb->elts[i] = unshare_expr (comb->elts[i]);
2935 comb->rest = unshare_expr (comb->rest);
2938 /* Makes tree from the affine combination COMB. */
2941 aff_combination_to_tree (struct affine_tree_combination *comb)
2943 tree type = comb->type;
2944 tree expr = comb->rest;
2946 unsigned HOST_WIDE_INT off, sgn;
2948 /* Handle the special case produced by get_computation_aff when
2949 the type does not fit in HOST_WIDE_INT. */
2950 if (comb->n == 0 && comb->offset == 0)
2951 return fold_convert (type, expr);
2953 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2955 for (i = 0; i < comb->n; i++)
2956 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2959 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2961 /* Offset is negative. */
2962 off = (-comb->offset) & comb->mask;
2970 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2974 /* Determines the expression by that USE is expressed from induction variable
2975 CAND at statement AT in LOOP. The expression is stored in a decomposed
2976 form into AFF. Returns false if USE cannot be expressed using CAND. */
2979 get_computation_aff (struct loop *loop,
2980 struct iv_use *use, struct iv_cand *cand, tree at,
2981 struct affine_tree_combination *aff)
2983 tree ubase = use->iv->base;
2984 tree ustep = use->iv->step;
2985 tree cbase = cand->iv->base;
2986 tree cstep = cand->iv->step;
2987 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2991 unsigned HOST_WIDE_INT ustepi, cstepi;
2992 HOST_WIDE_INT ratioi;
2993 struct affine_tree_combination cbase_aff, expr_aff;
2994 tree cstep_orig = cstep, ustep_orig = ustep;
2996 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2998 /* We do not have a precision to express the values of use. */
3002 expr = var_at_stmt (loop, cand, at);
3004 if (TREE_TYPE (expr) != ctype)
3006 /* This may happen with the original ivs. */
3007 expr = fold_convert (ctype, expr);
3010 if (TYPE_UNSIGNED (utype))
3014 uutype = unsigned_type_for (utype);
3015 ubase = fold_convert (uutype, ubase);
3016 ustep = fold_convert (uutype, ustep);
3019 if (uutype != ctype)
3021 expr = fold_convert (uutype, expr);
3022 cbase = fold_convert (uutype, cbase);
3023 cstep = fold_convert (uutype, cstep);
3025 /* If the conversion is not noop, we must take it into account when
3026 considering the value of the step. */
3027 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3031 if (cst_and_fits_in_hwi (cstep_orig)
3032 && cst_and_fits_in_hwi (ustep_orig))
3034 ustepi = int_cst_value (ustep_orig);
3035 cstepi = int_cst_value (cstep_orig);
3037 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3039 /* TODO maybe consider case when ustep divides cstep and the ratio is
3040 a power of 2 (so that the division is fast to execute)? We would
3041 need to be much more careful with overflows etc. then. */
3045 ratio = build_int_cst_type (uutype, ratioi);
3049 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3053 /* Ratioi is only used to detect special cases when the multiplicative
3054 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3055 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3056 to integer_onep/integer_all_onesp, since the former ignores
3058 if (cst_and_fits_in_hwi (ratio))
3059 ratioi = int_cst_value (ratio);
3060 else if (integer_onep (ratio))
3062 else if (integer_all_onesp (ratio))
3068 /* We may need to shift the value if we are after the increment. */
3069 if (stmt_after_increment (loop, cand, at))
3070 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3072 /* use = ubase - ratio * cbase + ratio * var.
3074 In general case ubase + ratio * (var - cbase) could be better (one less
3075 multiplication), but often it is possible to eliminate redundant parts
3076 of computations from (ubase - ratio * cbase) term, and if it does not
3077 happen, fold is able to apply the distributive law to obtain this form
3080 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3082 /* Let's compute in trees and just return the result in AFF. This case
3083 should not be very common, and fold itself is not that bad either,
3084 so making the aff. functions more complicated to handle this case
3085 is not that urgent. */
3088 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3089 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3091 else if (ratioi == -1)
3093 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3094 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3098 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3099 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3100 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3101 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3112 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3113 possible to compute ratioi. */
3114 gcc_assert (ratioi);
3116 tree_to_aff_combination (ubase, uutype, aff);
3117 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3118 tree_to_aff_combination (expr, uutype, &expr_aff);
3119 aff_combination_scale (&cbase_aff, -ratioi);
3120 aff_combination_scale (&expr_aff, ratioi);
3121 aff_combination_add (aff, &cbase_aff);
3122 aff_combination_add (aff, &expr_aff);
3127 /* Determines the expression by that USE is expressed from induction variable
3128 CAND at statement AT in LOOP. The computation is unshared. */
3131 get_computation_at (struct loop *loop,
3132 struct iv_use *use, struct iv_cand *cand, tree at)
3134 struct affine_tree_combination aff;
3135 tree type = TREE_TYPE (use->iv->base);
3137 if (!get_computation_aff (loop, use, cand, at, &aff))
3139 unshare_aff_combination (&aff);
3140 return fold_convert (type, aff_combination_to_tree (&aff));
3143 /* Determines the expression by that USE is expressed from induction variable
3144 CAND in LOOP. The computation is unshared. */
3147 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3149 return get_computation_at (loop, use, cand, use->stmt);
3152 /* Returns cost of addition in MODE. */
3155 add_cost (enum machine_mode mode)
3157 static unsigned costs[NUM_MACHINE_MODES];
3165 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3166 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3167 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3172 cost = seq_cost (seq);
3178 if (dump_file && (dump_flags & TDF_DETAILS))
3179 fprintf (dump_file, "Addition in %s costs %d\n",
3180 GET_MODE_NAME (mode), cost);
3184 /* Entry in a hashtable of already known costs for multiplication. */
3187 HOST_WIDE_INT cst; /* The constant to multiply by. */
3188 enum machine_mode mode; /* In mode. */
3189 unsigned cost; /* The cost. */
3192 /* Counts hash value for the ENTRY. */
3195 mbc_entry_hash (const void *entry)
3197 const struct mbc_entry *e = entry;
3199 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3202 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3205 mbc_entry_eq (const void *entry1, const void *entry2)
3207 const struct mbc_entry *e1 = entry1;
3208 const struct mbc_entry *e2 = entry2;
3210 return (e1->mode == e2->mode
3211 && e1->cst == e2->cst);
3214 /* Returns cost of multiplication by constant CST in MODE. */
3217 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3219 static htab_t costs;
3220 struct mbc_entry **cached, act;
3225 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3229 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3231 return (*cached)->cost;
3233 *cached = XNEW (struct mbc_entry);
3234 (*cached)->mode = mode;
3235 (*cached)->cst = cst;
3238 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3239 gen_int_mode (cst, mode), NULL_RTX, 0);
3243 cost = seq_cost (seq);
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3246 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3247 (int) cst, GET_MODE_NAME (mode), cost);
3249 (*cached)->cost = cost;
3254 /* Returns true if multiplying by RATIO is allowed in address. */
3257 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3259 #define MAX_RATIO 128
3260 static sbitmap valid_mult;
3264 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3268 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3269 sbitmap_zero (valid_mult);
3270 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3271 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3273 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3274 if (memory_address_p (Pmode, addr))
3275 SET_BIT (valid_mult, i + MAX_RATIO);
3278 if (dump_file && (dump_flags & TDF_DETAILS))
3280 fprintf (dump_file, " allowed multipliers:");
3281 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3282 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3283 fprintf (dump_file, " %d", (int) i);
3284 fprintf (dump_file, "\n");
3285 fprintf (dump_file, "\n");
3289 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3292 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3295 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3296 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3297 variable is omitted. The created memory accesses MODE.
3299 TODO -- there must be some better way. This all is quite crude. */
3302 get_address_cost (bool symbol_present, bool var_present,
3303 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3305 static bool initialized = false;
3306 static HOST_WIDE_INT rat, off;
3307 static HOST_WIDE_INT min_offset, max_offset;
3308 static unsigned costs[2][2][2][2];
3309 unsigned cost, acost;
3310 rtx seq, addr, base;
3311 bool offset_p, ratio_p;
3313 HOST_WIDE_INT s_offset;
3314 unsigned HOST_WIDE_INT mask;
3322 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3324 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3325 for (i = 1; i <= 1 << 20; i <<= 1)
3327 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3328 if (!memory_address_p (Pmode, addr))
3331 max_offset = i >> 1;
3334 for (i = 1; i <= 1 << 20; i <<= 1)
3336 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3337 if (!memory_address_p (Pmode, addr))
3340 min_offset = -(i >> 1);
3342 if (dump_file && (dump_flags & TDF_DETAILS))
3344 fprintf (dump_file, "get_address_cost:\n");
3345 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3346 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3350 for (i = 2; i <= MAX_RATIO; i++)
3351 if (multiplier_allowed_in_address_p (i))
3358 bits = GET_MODE_BITSIZE (Pmode);
3359 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3361 if ((offset >> (bits - 1) & 1))
3366 offset_p = (s_offset != 0
3367 && min_offset <= s_offset && s_offset <= max_offset);
3368 ratio_p = (ratio != 1
3369 && multiplier_allowed_in_address_p (ratio));
3371 if (ratio != 1 && !ratio_p)
3372 cost += multiply_by_cost (ratio, Pmode);
3374 if (s_offset && !offset_p && !symbol_present)
3376 cost += add_cost (Pmode);
3380 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3383 int old_cse_not_expected;
3386 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3387 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3389 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3392 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3396 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3398 base = gen_rtx_fmt_e (CONST, Pmode,
3399 gen_rtx_fmt_ee (PLUS, Pmode,
3401 gen_int_mode (off, Pmode)));
3404 base = gen_int_mode (off, Pmode);
3409 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3412 /* To avoid splitting addressing modes, pretend that no cse will
3414 old_cse_not_expected = cse_not_expected;
3415 cse_not_expected = true;
3416 addr = memory_address (Pmode, addr);
3417 cse_not_expected = old_cse_not_expected;
3421 acost = seq_cost (seq);
3422 acost += address_cost (addr, Pmode);
3426 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3429 return cost + acost;
3432 /* Estimates cost of forcing expression EXPR into a variable. */
3435 force_expr_to_var_cost (tree expr)
3437 static bool costs_initialized = false;
3438 static unsigned integer_cost;
3439 static unsigned symbol_cost;
3440 static unsigned address_cost;
3442 unsigned cost0, cost1, cost;
3443 enum machine_mode mode;
3445 if (!costs_initialized)
3447 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3448 rtx x = gen_rtx_MEM (DECL_MODE (var),
3449 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3451 tree type = build_pointer_type (integer_type_node);
3453 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3456 SET_DECL_RTL (var, x);
3457 TREE_STATIC (var) = 1;
3458 addr = build1 (ADDR_EXPR, type, var);
3459 symbol_cost = computation_cost (addr) + 1;
3462 = computation_cost (build2 (PLUS_EXPR, type,
3464 build_int_cst_type (type, 2000))) + 1;
3465 if (dump_file && (dump_flags & TDF_DETAILS))
3467 fprintf (dump_file, "force_expr_to_var_cost:\n");
3468 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3469 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3470 fprintf (dump_file, " address %d\n", (int) address_cost);
3471 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3472 fprintf (dump_file, "\n");
3475 costs_initialized = true;
3480 if (SSA_VAR_P (expr))
3483 if (TREE_INVARIANT (expr))
3485 if (TREE_CODE (expr) == INTEGER_CST)
3486 return integer_cost;
3488 if (TREE_CODE (expr) == ADDR_EXPR)
3490 tree obj = TREE_OPERAND (expr, 0);
3492 if (TREE_CODE (obj) == VAR_DECL
3493 || TREE_CODE (obj) == PARM_DECL
3494 || TREE_CODE (obj) == RESULT_DECL)
3498 return address_cost;
3501 switch (TREE_CODE (expr))
3506 op0 = TREE_OPERAND (expr, 0);
3507 op1 = TREE_OPERAND (expr, 1);
3511 if (is_gimple_val (op0))
3514 cost0 = force_expr_to_var_cost (op0);
3516 if (is_gimple_val (op1))
3519 cost1 = force_expr_to_var_cost (op1);
3524 /* Just an arbitrary value, FIXME. */
3525 return target_spill_cost;
3528 mode = TYPE_MODE (TREE_TYPE (expr));
3529 switch (TREE_CODE (expr))
3533 cost = add_cost (mode);
3537 if (cst_and_fits_in_hwi (op0))
3538 cost = multiply_by_cost (int_cst_value (op0), mode);
3539 else if (cst_and_fits_in_hwi (op1))
3540 cost = multiply_by_cost (int_cst_value (op1), mode);
3542 return target_spill_cost;
3552 /* Bound the cost by target_spill_cost. The parts of complicated
3553 computations often are either loop invariant or at least can
3554 be shared between several iv uses, so letting this grow without
3555 limits would not give reasonable results. */
3556 return cost < target_spill_cost ? cost : target_spill_cost;
3559 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3560 invariants the computation depends on. */
3563 force_var_cost (struct ivopts_data *data,
3564 tree expr, bitmap *depends_on)
3568 fd_ivopts_data = data;
3569 walk_tree (&expr, find_depends, depends_on, NULL);
3572 return force_expr_to_var_cost (expr);
3575 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3576 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3577 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3578 invariants the computation depends on. */
3581 split_address_cost (struct ivopts_data *data,
3582 tree addr, bool *symbol_present, bool *var_present,
3583 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3586 HOST_WIDE_INT bitsize;
3587 HOST_WIDE_INT bitpos;
3589 enum machine_mode mode;
3590 int unsignedp, volatilep;
3592 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3593 &unsignedp, &volatilep, false);
3596 || bitpos % BITS_PER_UNIT != 0
3597 || TREE_CODE (core) != VAR_DECL)
3599 *symbol_present = false;
3600 *var_present = true;
3601 fd_ivopts_data = data;
3602 walk_tree (&addr, find_depends, depends_on, NULL);
3603 return target_spill_cost;
3606 *offset += bitpos / BITS_PER_UNIT;
3607 if (TREE_STATIC (core)
3608 || DECL_EXTERNAL (core))
3610 *symbol_present = true;
3611 *var_present = false;
3615 *symbol_present = false;
3616 *var_present = true;
3620 /* Estimates cost of expressing difference of addresses E1 - E2 as
3621 var + symbol + offset. The value of offset is added to OFFSET,
3622 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3623 part is missing. DEPENDS_ON is a set of the invariants the computation