1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-pass.h"
42 /* TODO: Support for predicated code motion. I.e.
53 Where COND and INV are is invariants, but evaluating INV may trap or be
54 invalid from some other reason if !COND. This may be transformed to
64 /* A type for the list of statements that have to be moved in order to be able
65 to hoist an invariant computation. */
73 /* The auxiliary data kept for each statement. */
77 struct loop *max_loop; /* The outermost loop in that the statement
80 struct loop *tgt_loop; /* The loop out of that we want to move the
83 struct loop *always_executed_in;
84 /* The outermost loop for that we are sure
85 the statement is executed if the loop
88 bool sm_done; /* True iff the store motion for a memory
89 reference in the statement has already
92 unsigned cost; /* Cost of the computation performed by the
95 struct depend *depends; /* List of statements that must be also hoisted
96 out of the loop when this statement is
97 hoisted; i.e. those that define the operands
98 of the statement and are inside of the
102 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
104 : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
106 /* Description of a memory reference location for store motion. */
110 tree *ref; /* The reference itself. */
111 tree stmt; /* The statement in that it occurs. */
112 struct mem_ref_loc *next; /* Next use in the chain. */
115 /* Description of a memory reference for store motion. */
119 tree mem; /* The memory itself. */
120 hashval_t hash; /* Its hash value. */
121 bool is_stored; /* True if there is a store to the location
123 struct mem_ref_loc *locs; /* The locations where it is found. */
124 bitmap vops; /* Vops corresponding to this memory
126 struct mem_ref *next; /* Next memory reference in the list.
127 Memory references are stored in a hash
128 table, but the hash function depends
129 on values of pointers. Thus we cannot use
130 htab_traverse, since then we would get
131 miscompares during bootstrap (although the
132 produced code would be correct). */
135 /* Minimum cost of an expensive expression. */
136 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
138 /* The outermost loop for that execution of the header guarantees that the
139 block will be executed. */
140 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
142 /* Calls CBCK for each index in memory reference ADDR_P. There are two
143 kinds situations handled; in each of these cases, the memory reference
144 and DATA are passed to the callback:
146 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
147 pass the pointer to the index to the callback.
149 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
150 pointer to addr to the callback.
152 If the callback returns false, the whole search stops and false is returned.
153 Otherwise the function returns true after traversing through the whole
154 reference *ADDR_P. */
157 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
161 for (; ; addr_p = nxt)
163 switch (TREE_CODE (*addr_p))
166 return cbck (*addr_p, addr_p, data);
168 case MISALIGNED_INDIRECT_REF:
169 case ALIGN_INDIRECT_REF:
171 nxt = &TREE_OPERAND (*addr_p, 0);
172 return cbck (*addr_p, nxt, data);
175 case VIEW_CONVERT_EXPR:
178 nxt = &TREE_OPERAND (*addr_p, 0);
182 /* If the component has varying offset, it behaves like index
184 idx = &TREE_OPERAND (*addr_p, 2);
186 && !cbck (*addr_p, idx, data))
189 nxt = &TREE_OPERAND (*addr_p, 0);
193 case ARRAY_RANGE_REF:
194 nxt = &TREE_OPERAND (*addr_p, 0);
195 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
210 idx = &TMR_BASE (*addr_p);
212 && !cbck (*addr_p, idx, data))
214 idx = &TMR_INDEX (*addr_p);
216 && !cbck (*addr_p, idx, data))
226 /* If it is possible to hoist the statement STMT unconditionally,
227 returns MOVE_POSSIBLE.
228 If it is possible to hoist the statement STMT, but we must avoid making
229 it executed if it would not be executed in the original program (e.g.
230 because it may trap), return MOVE_PRESERVE_EXECUTION.
231 Otherwise return MOVE_IMPOSSIBLE. */
234 movement_possibility (tree stmt)
238 if (flag_unswitch_loops
239 && TREE_CODE (stmt) == COND_EXPR)
241 /* If we perform unswitching, force the operands of the invariant
242 condition to be moved out of the loop. */
243 return MOVE_POSSIBLE;
246 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
247 return MOVE_IMPOSSIBLE;
249 if (stmt_ends_bb_p (stmt))
250 return MOVE_IMPOSSIBLE;
252 if (stmt_ann (stmt)->has_volatile_ops)
253 return MOVE_IMPOSSIBLE;
255 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
256 if (TREE_CODE (lhs) == SSA_NAME
257 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
258 return MOVE_IMPOSSIBLE;
260 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
262 if (TREE_SIDE_EFFECTS (rhs))
263 return MOVE_IMPOSSIBLE;
265 if (TREE_CODE (lhs) != SSA_NAME
266 || tree_could_trap_p (rhs))
267 return MOVE_PRESERVE_EXECUTION;
269 if (get_call_expr_in (stmt))
271 /* While pure or const call is guaranteed to have no side effects, we
272 cannot move it arbitrarily. Consider code like
274 char *s = something ();
284 Here the strlen call cannot be moved out of the loop, even though
285 s is invariant. In addition to possibly creating a call with
286 invalid arguments, moving out a function call that is not executed
287 may cause performance regressions in case the call is costly and
288 not executed at all. */
289 return MOVE_PRESERVE_EXECUTION;
291 return MOVE_POSSIBLE;
294 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
295 loop to that we could move the expression using DEF if it did not have
296 other operands, i.e. the outermost loop enclosing LOOP in that the value
297 of DEF is invariant. */
300 outermost_invariant_loop (tree def, struct loop *loop)
304 struct loop *max_loop;
306 if (TREE_CODE (def) != SSA_NAME)
307 return superloop_at_depth (loop, 1);
309 def_stmt = SSA_NAME_DEF_STMT (def);
310 def_bb = bb_for_stmt (def_stmt);
312 return superloop_at_depth (loop, 1);
314 max_loop = find_common_loop (loop, def_bb->loop_father);
316 if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
317 max_loop = find_common_loop (max_loop,
318 loop_outer (LIM_DATA (def_stmt)->max_loop));
319 if (max_loop == loop)
321 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
326 /* Returns the outermost superloop of LOOP in that the expression EXPR is
330 outermost_invariant_loop_expr (tree expr, struct loop *loop)
332 enum tree_code_class codeclass = TREE_CODE_CLASS (TREE_CODE (expr));
334 struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
336 if (TREE_CODE (expr) == SSA_NAME
337 || TREE_CODE (expr) == INTEGER_CST
338 || is_gimple_min_invariant (expr))
339 return outermost_invariant_loop (expr, loop);
341 if (codeclass != tcc_unary
342 && codeclass != tcc_binary
343 && codeclass != tcc_expression
344 && codeclass != tcc_vl_exp
345 && codeclass != tcc_comparison)
348 nops = TREE_OPERAND_LENGTH (expr);
349 for (i = 0; i < nops; i++)
351 aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
355 if (flow_loop_nested_p (max_loop, aloop))
362 /* DATA is a structure containing information associated with a statement
363 inside LOOP. DEF is one of the operands of this statement.
365 Find the outermost loop enclosing LOOP in that value of DEF is invariant
366 and record this in DATA->max_loop field. If DEF itself is defined inside
367 this loop as well (i.e. we need to hoist it out of the loop if we want
368 to hoist the statement represented by DATA), record the statement in that
369 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
370 add the cost of the computation of DEF to the DATA->cost.
372 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
375 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
378 tree def_stmt = SSA_NAME_DEF_STMT (def);
379 basic_block def_bb = bb_for_stmt (def_stmt);
380 struct loop *max_loop;
386 max_loop = outermost_invariant_loop (def, loop);
390 if (flow_loop_nested_p (data->max_loop, max_loop))
391 data->max_loop = max_loop;
393 if (!LIM_DATA (def_stmt))
397 /* Only add the cost if the statement defining DEF is inside LOOP,
398 i.e. if it is likely that by moving the invariants dependent
399 on it, we will be able to avoid creating a new register for
400 it (since it will be only used in these dependent invariants). */
401 && def_bb->loop_father == loop)
402 data->cost += LIM_DATA (def_stmt)->cost;
404 dep = XNEW (struct depend);
405 dep->stmt = def_stmt;
406 dep->next = data->depends;
412 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
413 are just ad-hoc constants. The estimates should be based on target-specific
417 stmt_cost (tree stmt)
422 /* Always try to create possibilities for unswitching. */
423 if (TREE_CODE (stmt) == COND_EXPR)
424 return LIM_EXPENSIVE;
426 rhs = GENERIC_TREE_OPERAND (stmt, 1);
428 /* Hoisting memory references out should almost surely be a win. */
429 if (stmt_references_memory_p (stmt))
432 switch (TREE_CODE (rhs))
435 /* We should be hoisting calls if possible. */
437 /* Unless the call is a builtin_constant_p; this always folds to a
438 constant, so moving it is useless. */
439 rhs = get_callee_fndecl (rhs);
440 if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
441 && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
458 /* Division and multiplication are usually expensive. */
474 /* Determine the outermost loop to that it is possible to hoist a statement
475 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
476 the outermost loop in that the value computed by STMT is invariant.
477 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
478 we preserve the fact whether STMT is executed. It also fills other related
479 information to LIM_DATA (STMT).
481 The function returns false if STMT cannot be hoisted outside of the loop it
482 is defined in, and true otherwise. */
485 determine_max_movement (tree stmt, bool must_preserve_exec)
487 basic_block bb = bb_for_stmt (stmt);
488 struct loop *loop = bb->loop_father;
490 struct lim_aux_data *lim_data = LIM_DATA (stmt);
494 if (must_preserve_exec)
495 level = ALWAYS_EXECUTED_IN (bb);
497 level = superloop_at_depth (loop, 1);
498 lim_data->max_loop = level;
500 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
501 if (!add_dependency (val, lim_data, loop, true))
504 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES)
505 if (!add_dependency (val, lim_data, loop, false))
508 lim_data->cost += stmt_cost (stmt);
513 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
514 and that one of the operands of this statement is computed by STMT.
515 Ensure that STMT (together with all the statements that define its
516 operands) is hoisted at least out of the loop LEVEL. */
519 set_level (tree stmt, struct loop *orig_loop, struct loop *level)
521 struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
524 stmt_loop = find_common_loop (orig_loop, stmt_loop);
525 if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
526 stmt_loop = find_common_loop (stmt_loop,
527 loop_outer (LIM_DATA (stmt)->tgt_loop));
528 if (flow_loop_nested_p (stmt_loop, level))
531 gcc_assert (LIM_DATA (stmt));
532 gcc_assert (level == LIM_DATA (stmt)->max_loop
533 || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
535 LIM_DATA (stmt)->tgt_loop = level;
536 for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
537 set_level (dep->stmt, orig_loop, level);
540 /* Determines an outermost loop from that we want to hoist the statement STMT.
541 For now we chose the outermost possible loop. TODO -- use profiling
542 information to set it more sanely. */
545 set_profitable_level (tree stmt)
547 set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
550 /* Returns true if STMT is not a pure call. */
553 nonpure_call_p (tree stmt)
555 tree call = get_call_expr_in (stmt);
560 return TREE_SIDE_EFFECTS (call) != 0;
563 /* Releases the memory occupied by DATA. */
566 free_lim_aux_data (struct lim_aux_data *data)
568 struct depend *dep, *next;
570 for (dep = data->depends; dep; dep = next)
578 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
581 rewrite_reciprocal (block_stmt_iterator *bsi)
583 tree stmt, lhs, rhs, stmt1, stmt2, var, name, tmp;
585 stmt = bsi_stmt (*bsi);
586 lhs = GENERIC_TREE_OPERAND (stmt, 0);
587 rhs = GENERIC_TREE_OPERAND (stmt, 1);
589 /* stmt must be GIMPLE_MODIFY_STMT. */
590 var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
591 add_referenced_var (var);
593 tmp = build2 (RDIV_EXPR, TREE_TYPE (rhs),
594 build_real (TREE_TYPE (rhs), dconst1),
595 TREE_OPERAND (rhs, 1));
596 stmt1 = build_gimple_modify_stmt (var, tmp);
597 name = make_ssa_name (var, stmt1);
598 GIMPLE_STMT_OPERAND (stmt1, 0) = name;
599 tmp = build2 (MULT_EXPR, TREE_TYPE (rhs),
600 name, TREE_OPERAND (rhs, 0));
601 stmt2 = build_gimple_modify_stmt (lhs, tmp);
603 /* Replace division stmt with reciprocal and multiply stmts.
604 The multiply stmt is not invariant, so update iterator
605 and avoid rescanning. */
606 bsi_replace (bsi, stmt1, true);
607 bsi_insert_after (bsi, stmt2, BSI_NEW_STMT);
608 SSA_NAME_DEF_STMT (lhs) = stmt2;
610 /* Continue processing with invariant reciprocal statement. */
614 /* Check if the pattern at *BSI is a bittest of the form
615 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
618 rewrite_bittest (block_stmt_iterator *bsi)
620 tree stmt, lhs, rhs, var, name, use_stmt, stmt1, stmt2, t;
623 stmt = bsi_stmt (*bsi);
624 lhs = GENERIC_TREE_OPERAND (stmt, 0);
625 rhs = GENERIC_TREE_OPERAND (stmt, 1);
627 /* Verify that the single use of lhs is a comparison against zero. */
628 if (TREE_CODE (lhs) != SSA_NAME
629 || !single_imm_use (lhs, &use, &use_stmt)
630 || TREE_CODE (use_stmt) != COND_EXPR)
632 t = COND_EXPR_COND (use_stmt);
633 if (TREE_OPERAND (t, 0) != lhs
634 || (TREE_CODE (t) != NE_EXPR
635 && TREE_CODE (t) != EQ_EXPR)
636 || !integer_zerop (TREE_OPERAND (t, 1)))
639 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
640 stmt1 = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0));
641 if (TREE_CODE (stmt1) != GIMPLE_MODIFY_STMT)
644 /* There is a conversion in between possibly inserted by fold. */
645 t = GIMPLE_STMT_OPERAND (stmt1, 1);
646 if (TREE_CODE (t) == NOP_EXPR
647 || TREE_CODE (t) == CONVERT_EXPR)
649 t = TREE_OPERAND (t, 0);
650 if (TREE_CODE (t) != SSA_NAME
651 || !has_single_use (t))
653 stmt1 = SSA_NAME_DEF_STMT (t);
654 if (TREE_CODE (stmt1) != GIMPLE_MODIFY_STMT)
656 t = GIMPLE_STMT_OPERAND (stmt1, 1);
659 /* Verify that B is loop invariant but A is not. Verify that with
660 all the stmt walking we are still in the same loop. */
661 if (TREE_CODE (t) == RSHIFT_EXPR
662 && loop_containing_stmt (stmt1) == loop_containing_stmt (stmt)
663 && outermost_invariant_loop_expr (TREE_OPERAND (t, 1),
664 loop_containing_stmt (stmt1)) != NULL
665 && outermost_invariant_loop_expr (TREE_OPERAND (t, 0),
666 loop_containing_stmt (stmt1)) == NULL)
668 tree a = TREE_OPERAND (t, 0);
669 tree b = TREE_OPERAND (t, 1);
672 var = create_tmp_var (TREE_TYPE (a), "shifttmp");
673 add_referenced_var (var);
674 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
675 build_int_cst (TREE_TYPE (a), 1), b);
676 stmt1 = build_gimple_modify_stmt (var, t);
677 name = make_ssa_name (var, stmt1);
678 GIMPLE_STMT_OPERAND (stmt1, 0) = name;
681 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
682 stmt2 = build_gimple_modify_stmt (var, t);
683 name = make_ssa_name (var, stmt2);
684 GIMPLE_STMT_OPERAND (stmt2, 0) = name;
687 bsi_insert_before (bsi, stmt1, BSI_SAME_STMT);
688 bsi_replace (bsi, stmt2, true);
697 /* Determine the outermost loops in that statements in basic block BB are
698 invariant, and record them to the LIM_DATA associated with the statements.
699 Callback for walk_dominator_tree. */
702 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
706 block_stmt_iterator bsi;
708 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
709 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
711 if (!loop_outer (bb->loop_father))
714 if (dump_file && (dump_flags & TDF_DETAILS))
715 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
716 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
718 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
720 stmt = bsi_stmt (bsi);
722 pos = movement_possibility (stmt);
723 if (pos == MOVE_IMPOSSIBLE)
725 if (nonpure_call_p (stmt))
733 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
735 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
737 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
738 to be hoisted out of loop, saving expensive divide. */
739 if (pos == MOVE_POSSIBLE
740 && TREE_CODE (rhs) == RDIV_EXPR
741 && flag_unsafe_math_optimizations
742 && !flag_trapping_math
743 && outermost_invariant_loop_expr (TREE_OPERAND (rhs, 1),
744 loop_containing_stmt (stmt)) != NULL
745 && outermost_invariant_loop_expr (rhs,
746 loop_containing_stmt (stmt)) == NULL)
747 stmt = rewrite_reciprocal (&bsi);
749 /* If the shift count is invariant, convert (A >> B) & 1 to
750 A & (1 << B) allowing the bit mask to be hoisted out of the loop
751 saving an expensive shift. */
752 if (pos == MOVE_POSSIBLE
753 && TREE_CODE (rhs) == BIT_AND_EXPR
754 && integer_onep (TREE_OPERAND (rhs, 1))
755 && TREE_CODE (TREE_OPERAND (rhs, 0)) == SSA_NAME
756 && has_single_use (TREE_OPERAND (rhs, 0)))
757 stmt = rewrite_bittest (&bsi);
760 stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
761 LIM_DATA (stmt)->always_executed_in = outermost;
763 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
766 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
768 LIM_DATA (stmt)->max_loop = NULL;
772 if (dump_file && (dump_flags & TDF_DETAILS))
774 print_generic_stmt_indented (dump_file, stmt, 0, 2);
775 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
776 loop_depth (LIM_DATA (stmt)->max_loop),
777 LIM_DATA (stmt)->cost);
780 if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
781 set_profitable_level (stmt);
785 /* For each statement determines the outermost loop in that it is invariant,
786 statements on whose motion it depends and the cost of the computation.
787 This information is stored to the LIM_DATA structure associated with
791 determine_invariantness (void)
793 struct dom_walk_data walk_data;
795 memset (&walk_data, 0, sizeof (struct dom_walk_data));
796 walk_data.dom_direction = CDI_DOMINATORS;
797 walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
799 init_walk_dominator_tree (&walk_data);
800 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
801 fini_walk_dominator_tree (&walk_data);
804 /* Hoist the statements in basic block BB out of the loops prescribed by
805 data stored in LIM_DATA structures associated with each statement. Callback
806 for walk_dominator_tree. */
809 move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
813 block_stmt_iterator bsi;
817 if (!loop_outer (bb->loop_father))
820 for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
822 stmt = bsi_stmt (bsi);
824 if (!LIM_DATA (stmt))
830 cost = LIM_DATA (stmt)->cost;
831 level = LIM_DATA (stmt)->tgt_loop;
832 free_lim_aux_data (LIM_DATA (stmt));
833 stmt_ann (stmt)->common.aux = NULL;
841 /* We do not really want to move conditionals out of the loop; we just
842 placed it here to force its operands to be moved if necessary. */
843 if (TREE_CODE (stmt) == COND_EXPR)
846 if (dump_file && (dump_flags & TDF_DETAILS))
848 fprintf (dump_file, "Moving statement\n");
849 print_generic_stmt (dump_file, stmt, 0);
850 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
853 bsi_insert_on_edge (loop_preheader_edge (level), stmt);
854 bsi_remove (&bsi, false);
858 /* Hoist the statements out of the loops prescribed by data stored in
859 LIM_DATA structures associated with each statement.*/
862 move_computations (void)
864 struct dom_walk_data walk_data;
866 memset (&walk_data, 0, sizeof (struct dom_walk_data));
867 walk_data.dom_direction = CDI_DOMINATORS;
868 walk_data.before_dom_children_before_stmts = move_computations_stmt;
870 init_walk_dominator_tree (&walk_data);
871 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
872 fini_walk_dominator_tree (&walk_data);
874 bsi_commit_edge_inserts ();
875 if (need_ssa_update_p ())
876 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
879 /* Checks whether the statement defining variable *INDEX can be hoisted
880 out of the loop passed in DATA. Callback for for_each_index. */
883 may_move_till (tree ref, tree *index, void *data)
885 struct loop *loop = (struct loop*) data, *max_loop;
887 /* If REF is an array reference, check also that the step and the lower
888 bound is invariant in LOOP. */
889 if (TREE_CODE (ref) == ARRAY_REF)
891 tree step = array_ref_element_size (ref);
892 tree lbound = array_ref_low_bound (ref);
894 max_loop = outermost_invariant_loop_expr (step, loop);
898 max_loop = outermost_invariant_loop_expr (lbound, loop);
903 max_loop = outermost_invariant_loop (*index, loop);
910 /* Forces statements defining (invariant) SSA names in expression EXPR to be
911 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
914 force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
916 enum tree_code_class codeclass = TREE_CODE_CLASS (TREE_CODE (expr));
919 if (TREE_CODE (expr) == SSA_NAME)
921 tree stmt = SSA_NAME_DEF_STMT (expr);
922 if (IS_EMPTY_STMT (stmt))
925 set_level (stmt, orig_loop, loop);
929 if (codeclass != tcc_unary
930 && codeclass != tcc_binary
931 && codeclass != tcc_expression
932 && codeclass != tcc_vl_exp
933 && codeclass != tcc_comparison)
936 nops = TREE_OPERAND_LENGTH (expr);
937 for (i = 0; i < nops; i++)
938 force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
941 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
942 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
948 struct loop *orig_loop;
952 force_move_till (tree ref, tree *index, void *data)
955 struct fmt_data *fmt_data = (struct fmt_data *) data;
957 if (TREE_CODE (ref) == ARRAY_REF)
959 tree step = array_ref_element_size (ref);
960 tree lbound = array_ref_low_bound (ref);
962 force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
963 force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
966 if (TREE_CODE (*index) != SSA_NAME)
969 stmt = SSA_NAME_DEF_STMT (*index);
970 if (IS_EMPTY_STMT (stmt))
973 set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
978 /* Records memory reference location *REF to the list MEM_REFS. The reference
979 occurs in statement STMT. */
982 record_mem_ref_loc (struct mem_ref_loc **mem_refs, tree stmt, tree *ref)
984 struct mem_ref_loc *aref = XNEW (struct mem_ref_loc);
989 aref->next = *mem_refs;
993 /* Releases list of memory reference locations MEM_REFS. */
996 free_mem_ref_locs (struct mem_ref_loc *mem_refs)
998 struct mem_ref_loc *act;
1003 mem_refs = mem_refs->next;
1008 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
1011 rewrite_mem_refs (tree tmp_var, struct mem_ref_loc *mem_refs)
1016 for (; mem_refs; mem_refs = mem_refs->next)
1018 FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
1019 mark_sym_for_renaming (SSA_NAME_VAR (var));
1021 *mem_refs->ref = tmp_var;
1022 update_stmt (mem_refs->stmt);
1026 /* The name and the length of the currently generated variable
1028 #define MAX_LSM_NAME_LENGTH 40
1029 static char lsm_tmp_name[MAX_LSM_NAME_LENGTH + 1];
1030 static int lsm_tmp_name_length;
1032 /* Adds S to lsm_tmp_name. */
1035 lsm_tmp_name_add (const char *s)
1037 int l = strlen (s) + lsm_tmp_name_length;
1038 if (l > MAX_LSM_NAME_LENGTH)
1041 strcpy (lsm_tmp_name + lsm_tmp_name_length, s);
1042 lsm_tmp_name_length = l;
1045 /* Stores the name for temporary variable that replaces REF to
1049 gen_lsm_tmp_name (tree ref)
1053 switch (TREE_CODE (ref))
1055 case MISALIGNED_INDIRECT_REF:
1056 case ALIGN_INDIRECT_REF:
1058 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1059 lsm_tmp_name_add ("_");
1063 case VIEW_CONVERT_EXPR:
1064 case ARRAY_RANGE_REF:
1065 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1069 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1070 lsm_tmp_name_add ("_RE");
1074 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1075 lsm_tmp_name_add ("_IM");
1079 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1080 lsm_tmp_name_add ("_");
1081 name = get_name (TREE_OPERAND (ref, 1));
1084 lsm_tmp_name_add ("_");
1085 lsm_tmp_name_add (name);
1088 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1089 lsm_tmp_name_add ("_I");
1093 ref = SSA_NAME_VAR (ref);
1098 name = get_name (ref);
1101 lsm_tmp_name_add (name);
1105 lsm_tmp_name_add ("S");
1109 lsm_tmp_name_add ("R");
1117 /* Determines name for temporary variable that replaces REF.
1118 The name is accumulated into the lsm_tmp_name variable.
1119 N is added to the name of the temporary. */
1122 get_lsm_tmp_name (tree ref, unsigned n)
1126 lsm_tmp_name_length = 0;
1127 gen_lsm_tmp_name (ref);
1128 lsm_tmp_name_add ("_lsm");
1133 lsm_tmp_name_add (ns);
1135 return lsm_tmp_name;
1138 /* Records request for store motion of memory reference REF from LOOP.
1139 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
1140 these references are rewritten by a new temporary variable.
1141 Exits from the LOOP are stored in EXITS. The initialization of the
1142 temporary variable is put to the preheader of the loop, and assignments
1143 to the reference from the temporary variable are emitted to exits. */
1146 schedule_sm (struct loop *loop, VEC (edge, heap) *exits, tree ref,
1147 struct mem_ref_loc *mem_refs)
1149 struct mem_ref_loc *aref;
1153 struct fmt_data fmt_data;
1156 if (dump_file && (dump_flags & TDF_DETAILS))
1158 fprintf (dump_file, "Executing store motion of ");
1159 print_generic_expr (dump_file, ref, 0);
1160 fprintf (dump_file, " from loop %d\n", loop->num);
1163 tmp_var = make_rename_temp (TREE_TYPE (ref),
1164 get_lsm_tmp_name (ref, ~0));
1166 fmt_data.loop = loop;
1167 fmt_data.orig_loop = loop;
1168 for_each_index (&ref, force_move_till, &fmt_data);
1170 rewrite_mem_refs (tmp_var, mem_refs);
1171 for (aref = mem_refs; aref; aref = aref->next)
1172 if (LIM_DATA (aref->stmt))
1173 LIM_DATA (aref->stmt)->sm_done = true;
1175 /* Emit the load & stores. */
1176 load = build_gimple_modify_stmt (tmp_var, ref);
1177 get_stmt_ann (load)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
1178 LIM_DATA (load)->max_loop = loop;
1179 LIM_DATA (load)->tgt_loop = loop;
1181 /* Put this into the latch, so that we are sure it will be processed after
1182 all dependencies. */
1183 bsi_insert_on_edge (loop_latch_edge (loop), load);
1185 for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
1187 store = build_gimple_modify_stmt (unshare_expr (ref), tmp_var);
1188 bsi_insert_on_edge (ex, store);
1192 /* Check whether memory reference REF can be hoisted out of the LOOP. If this
1193 is true, prepare the statements that load the value of the memory reference
1194 to a temporary variable in the loop preheader, store it back on the loop
1195 exits, and replace all the references inside LOOP by this temporary variable.
1196 EXITS is the list of exits of LOOP. CLOBBERED_VOPS is the bitmap of virtual
1197 operands that are clobbered by a call or accessed through multiple references
1201 determine_lsm_ref (struct loop *loop, VEC (edge, heap) *exits,
1202 bitmap clobbered_vops, struct mem_ref *ref)
1204 struct mem_ref_loc *aref;
1205 struct loop *must_exec;
1207 /* In case the memory is not stored to, there is nothing for SM to do. */
1208 if (!ref->is_stored)
1211 /* If the reference is aliased with any different ref, or killed by call
1212 in function, then fail. */
1213 if (bitmap_intersect_p (ref->vops, clobbered_vops))
1216 if (tree_could_trap_p (ref->mem))
1218 /* If the memory access is unsafe (i.e. it might trap), ensure that some
1219 of the statements in that it occurs is always executed when the loop
1220 is entered. This way we know that by moving the load from the
1221 reference out of the loop we will not cause the error that would not
1224 TODO -- in fact we would like to check for anticipability of the
1225 reference, i.e. that on each path from loop entry to loop exit at
1226 least one of the statements containing the memory reference is
1229 for (aref = ref->locs; aref; aref = aref->next)
1231 if (!LIM_DATA (aref->stmt))
1234 must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1238 if (must_exec == loop
1239 || flow_loop_nested_p (must_exec, loop))
1247 schedule_sm (loop, exits, ref->mem, ref->locs);
1250 /* Hoists memory references MEM_REFS out of LOOP. CLOBBERED_VOPS is the list
1251 of vops clobbered by call in loop or accessed by multiple memory references.
1252 EXITS is the list of exit edges of the LOOP. */
1255 hoist_memory_references (struct loop *loop, struct mem_ref *mem_refs,
1256 bitmap clobbered_vops, VEC (edge, heap) *exits)
1258 struct mem_ref *ref;
1260 for (ref = mem_refs; ref; ref = ref->next)
1261 determine_lsm_ref (loop, exits, clobbered_vops, ref);
1264 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
1265 for a store motion optimization (i.e. whether we can insert statement
1269 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
1270 VEC (edge, heap) *exits)
1275 for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
1276 if (ex->flags & EDGE_ABNORMAL)
1282 /* A hash function for struct mem_ref object OBJ. */
1285 memref_hash (const void *obj)
1287 return ((const struct mem_ref *) obj)->hash;
1290 /* An equality function for struct mem_ref object OBJ1 with
1291 memory reference OBJ2. */
1294 memref_eq (const void *obj1, const void *obj2)
1296 const struct mem_ref *const mem1 = (const struct mem_ref *) obj1;
1298 return operand_equal_p (mem1->mem, (tree) obj2, 0);
1301 /* Gathers memory references in statement STMT in LOOP, storing the
1302 information about them in MEM_REFS hash table. Note vops accessed through
1303 unrecognized statements in CLOBBERED_VOPS. The newly created references
1304 are also stored to MEM_REF_LIST. */
1307 gather_mem_refs_stmt (struct loop *loop, htab_t mem_refs,
1308 bitmap clobbered_vops, tree stmt,
1309 struct mem_ref **mem_ref_list)
1311 tree *lhs, *rhs, *mem = NULL;
1314 struct mem_ref *ref = NULL;
1319 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
1322 /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */
1323 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1326 lhs = &GIMPLE_STMT_OPERAND (stmt, 0);
1327 rhs = &GIMPLE_STMT_OPERAND (stmt, 1);
1329 if (TREE_CODE (*lhs) == SSA_NAME)
1331 if (!is_gimple_addressable (*rhs))
1337 else if (TREE_CODE (*rhs) == SSA_NAME
1338 || is_gimple_min_invariant (*rhs))
1346 /* If we cannot create an SSA name for the result, give up. */
1347 if (!is_gimple_reg_type (TREE_TYPE (*mem))
1348 || TREE_THIS_VOLATILE (*mem))
1351 /* If we cannot move the reference out of the loop, fail. */
1352 if (!for_each_index (mem, may_move_till, loop))
1355 hash = iterative_hash_expr (*mem, 0);
1356 slot = htab_find_slot_with_hash (mem_refs, *mem, hash, INSERT);
1359 ref = (struct mem_ref *) *slot;
1362 ref = XNEW (struct mem_ref);
1366 ref->is_stored = false;
1367 ref->vops = BITMAP_ALLOC (NULL);
1368 ref->next = *mem_ref_list;
1369 *mem_ref_list = ref;
1372 ref->is_stored |= is_stored;
1374 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi, SSA_OP_VIRTUAL_USES)
1375 bitmap_set_bit (ref->vops, DECL_UID (SSA_NAME_VAR (vname)));
1376 record_mem_ref_loc (&ref->locs, stmt, mem);
1380 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi, SSA_OP_VIRTUAL_USES)
1381 bitmap_set_bit (clobbered_vops, DECL_UID (SSA_NAME_VAR (vname)));
1384 /* Gathers memory references in LOOP. Notes vops accessed through unrecognized
1385 statements in CLOBBERED_VOPS. The list of the references found by
1386 the function is returned. */
1388 static struct mem_ref *
1389 gather_mem_refs (struct loop *loop, bitmap clobbered_vops)
1391 basic_block *body = get_loop_body (loop);
1392 block_stmt_iterator bsi;
1394 struct mem_ref *mem_ref_list = NULL;
1395 htab_t mem_refs = htab_create (100, memref_hash, memref_eq, NULL);
1397 for (i = 0; i < loop->num_nodes; i++)
1399 for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1400 gather_mem_refs_stmt (loop, mem_refs, clobbered_vops, bsi_stmt (bsi),
1406 htab_delete (mem_refs);
1407 return mem_ref_list;
1410 /* Finds the vops accessed by more than one of the memory references described
1411 in MEM_REFS and marks them in CLOBBERED_VOPS. */
1414 find_more_ref_vops (struct mem_ref *mem_refs, bitmap clobbered_vops)
1416 bitmap_head tmp, all_vops;
1417 struct mem_ref *ref;
1419 bitmap_initialize (&tmp, &bitmap_default_obstack);
1420 bitmap_initialize (&all_vops, &bitmap_default_obstack);
1422 for (ref = mem_refs; ref; ref = ref->next)
1424 /* The vops that are already in all_vops are accessed by more than
1425 one memory reference. */
1426 bitmap_and (&tmp, &all_vops, ref->vops);
1427 bitmap_ior_into (clobbered_vops, &tmp);
1428 bitmap_clear (&tmp);
1430 bitmap_ior_into (&all_vops, ref->vops);
1433 bitmap_clear (&all_vops);
1436 /* Releases the memory occupied by REF. */
1439 free_mem_ref (struct mem_ref *ref)
1441 free_mem_ref_locs (ref->locs);
1442 BITMAP_FREE (ref->vops);
1446 /* Releases the memory occupied by REFS. */
1449 free_mem_refs (struct mem_ref *refs)
1451 struct mem_ref *ref, *next;
1453 for (ref = refs; ref; ref = next)
1460 /* Try to perform store motion for all memory references modified inside
1464 determine_lsm_loop (struct loop *loop)
1466 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
1467 bitmap clobbered_vops;
1468 struct mem_ref *mem_refs;
1470 if (!loop_suitable_for_sm (loop, exits))
1472 VEC_free (edge, heap, exits);
1476 /* Find the memory references in LOOP. */
1477 clobbered_vops = BITMAP_ALLOC (NULL);
1478 mem_refs = gather_mem_refs (loop, clobbered_vops);
1480 /* Find the vops that are used for more than one reference. */
1481 find_more_ref_vops (mem_refs, clobbered_vops);
1483 /* Hoist all suitable memory references. */
1484 hoist_memory_references (loop, mem_refs, clobbered_vops, exits);
1486 free_mem_refs (mem_refs);
1487 VEC_free (edge, heap, exits);
1488 BITMAP_FREE (clobbered_vops);
1491 /* Try to perform store motion for all memory references modified inside
1495 determine_lsm (void)
1500 /* Pass the loops from the outermost and perform the store motion as
1503 FOR_EACH_LOOP (li, loop, 0)
1505 determine_lsm_loop (loop);
1508 bsi_commit_edge_inserts ();
1511 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1512 for each such basic block bb records the outermost loop for that execution
1513 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1514 blocks that contain a nonpure call. */
1517 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1519 basic_block bb = NULL, *bbs, last = NULL;
1522 struct loop *inn_loop = loop;
1524 if (!loop->header->aux)
1526 bbs = get_loop_body_in_dom_order (loop);
1528 for (i = 0; i < loop->num_nodes; i++)
1533 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1536 if (TEST_BIT (contains_call, bb->index))
1539 FOR_EACH_EDGE (e, ei, bb->succs)
1540 if (!flow_bb_inside_loop_p (loop, e->dest))
1545 /* A loop might be infinite (TODO use simple loop analysis
1546 to disprove this if possible). */
1547 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1550 if (!flow_bb_inside_loop_p (inn_loop, bb))
1553 if (bb->loop_father->header == bb)
1555 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1558 /* In a loop that is always entered we may proceed anyway.
1559 But record that we entered it and stop once we leave it. */
1560 inn_loop = bb->loop_father;
1567 if (last == loop->header)
1569 last = get_immediate_dominator (CDI_DOMINATORS, last);
1575 for (loop = loop->inner; loop; loop = loop->next)
1576 fill_always_executed_in (loop, contains_call);
1579 /* Compute the global information needed by the loop invariant motion pass. */
1582 tree_ssa_lim_initialize (void)
1584 sbitmap contains_call = sbitmap_alloc (last_basic_block);
1585 block_stmt_iterator bsi;
1589 sbitmap_zero (contains_call);
1592 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1594 if (nonpure_call_p (bsi_stmt (bsi)))
1598 if (!bsi_end_p (bsi))
1599 SET_BIT (contains_call, bb->index);
1602 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
1603 fill_always_executed_in (loop, contains_call);
1605 sbitmap_free (contains_call);
1608 /* Cleans up after the invariant motion pass. */
1611 tree_ssa_lim_finalize (void)
1621 /* Moves invariants from loops. Only "expensive" invariants are moved out --
1622 i.e. those that are likely to be win regardless of the register pressure. */
1627 tree_ssa_lim_initialize ();
1629 /* For each statement determine the outermost loop in that it is
1630 invariant and cost for computing the invariant. */
1631 determine_invariantness ();
1633 /* For each memory reference determine whether it is possible to hoist it
1634 out of the loop. Force the necessary invariants to be moved out of the
1638 /* Move the expressions that are expensive enough. */
1639 move_computations ();
1641 tree_ssa_lim_finalize ();