1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "hard-reg-set.h"
28 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-pass.h"
42 /* TODO: Support for predicated code motion. I.e.
53 Where COND and INV are is invariants, but evaluating INV may trap or be
54 invalid from some other reason if !COND. This may be transformed to
64 /* A type for the list of statements that have to be moved in order to be able
65 to hoist an invariant computation. */
73 /* The auxiliary data kept for each statement. */
77 struct loop *max_loop; /* The outermost loop in that the statement
80 struct loop *tgt_loop; /* The loop out of that we want to move the
83 struct loop *always_executed_in;
84 /* The outermost loop for that we are sure
85 the statement is executed if the loop
88 bool sm_done; /* True iff the store motion for a memory
89 reference in the statement has already
92 unsigned cost; /* Cost of the computation performed by the
95 struct depend *depends; /* List of statements that must be also hoisted
96 out of the loop when this statement is
97 hoisted; i.e. those that define the operands
98 of the statement and are inside of the
102 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
104 : (struct lim_aux_data *) (stmt_ann (STMT)->common.aux))
106 /* Description of a memory reference location for store motion. */
110 tree *ref; /* The reference itself. */
111 tree stmt; /* The statement in that it occurs. */
112 struct mem_ref_loc *next; /* Next use in the chain. */
115 /* Description of a memory reference for store motion. */
119 tree mem; /* The memory itself. */
120 hashval_t hash; /* Its hash value. */
121 bool is_stored; /* True if there is a store to the location
123 struct mem_ref_loc *locs; /* The locations where it is found. */
124 bitmap vops; /* Vops corresponding to this memory
126 struct mem_ref *next; /* Next memory reference in the list.
127 Memory references are stored in a hash
128 table, but the hash function depends
129 on values of pointers. Thus we cannot use
130 htab_traverse, since then we would get
131 miscompares during bootstrap (although the
132 produced code would be correct). */
135 /* Minimum cost of an expensive expression. */
136 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
138 /* The outermost loop for that execution of the header guarantees that the
139 block will be executed. */
140 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
142 /* Calls CBCK for each index in memory reference ADDR_P. There are two
143 kinds situations handled; in each of these cases, the memory reference
144 and DATA are passed to the callback:
146 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
147 pass the pointer to the index to the callback.
149 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
150 pointer to addr to the callback.
152 If the callback returns false, the whole search stops and false is returned.
153 Otherwise the function returns true after traversing through the whole
154 reference *ADDR_P. */
157 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
161 for (; ; addr_p = nxt)
163 switch (TREE_CODE (*addr_p))
166 return cbck (*addr_p, addr_p, data);
168 case MISALIGNED_INDIRECT_REF:
169 case ALIGN_INDIRECT_REF:
171 nxt = &TREE_OPERAND (*addr_p, 0);
172 return cbck (*addr_p, nxt, data);
175 case VIEW_CONVERT_EXPR:
178 nxt = &TREE_OPERAND (*addr_p, 0);
182 /* If the component has varying offset, it behaves like index
184 idx = &TREE_OPERAND (*addr_p, 2);
186 && !cbck (*addr_p, idx, data))
189 nxt = &TREE_OPERAND (*addr_p, 0);
193 case ARRAY_RANGE_REF:
194 nxt = &TREE_OPERAND (*addr_p, 0);
195 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
212 gcc_assert (is_gimple_min_invariant (*addr_p));
216 idx = &TMR_BASE (*addr_p);
218 && !cbck (*addr_p, idx, data))
220 idx = &TMR_INDEX (*addr_p);
222 && !cbck (*addr_p, idx, data))
232 /* If it is possible to hoist the statement STMT unconditionally,
233 returns MOVE_POSSIBLE.
234 If it is possible to hoist the statement STMT, but we must avoid making
235 it executed if it would not be executed in the original program (e.g.
236 because it may trap), return MOVE_PRESERVE_EXECUTION.
237 Otherwise return MOVE_IMPOSSIBLE. */
240 movement_possibility (tree stmt)
244 if (flag_unswitch_loops
245 && TREE_CODE (stmt) == COND_EXPR)
247 /* If we perform unswitching, force the operands of the invariant
248 condition to be moved out of the loop. */
249 return MOVE_POSSIBLE;
252 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
253 return MOVE_IMPOSSIBLE;
255 if (stmt_ends_bb_p (stmt))
256 return MOVE_IMPOSSIBLE;
258 if (stmt_ann (stmt)->has_volatile_ops)
259 return MOVE_IMPOSSIBLE;
261 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
262 if (TREE_CODE (lhs) == SSA_NAME
263 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
264 return MOVE_IMPOSSIBLE;
266 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
268 if (TREE_SIDE_EFFECTS (rhs)
269 || tree_could_throw_p (rhs))
270 return MOVE_IMPOSSIBLE;
272 if (TREE_CODE (lhs) != SSA_NAME
273 || tree_could_trap_p (rhs))
274 return MOVE_PRESERVE_EXECUTION;
276 if (get_call_expr_in (stmt))
278 /* While pure or const call is guaranteed to have no side effects, we
279 cannot move it arbitrarily. Consider code like
281 char *s = something ();
291 Here the strlen call cannot be moved out of the loop, even though
292 s is invariant. In addition to possibly creating a call with
293 invalid arguments, moving out a function call that is not executed
294 may cause performance regressions in case the call is costly and
295 not executed at all. */
296 return MOVE_PRESERVE_EXECUTION;
298 return MOVE_POSSIBLE;
301 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
302 loop to that we could move the expression using DEF if it did not have
303 other operands, i.e. the outermost loop enclosing LOOP in that the value
304 of DEF is invariant. */
307 outermost_invariant_loop (tree def, struct loop *loop)
311 struct loop *max_loop;
313 if (TREE_CODE (def) != SSA_NAME)
314 return superloop_at_depth (loop, 1);
316 def_stmt = SSA_NAME_DEF_STMT (def);
317 def_bb = bb_for_stmt (def_stmt);
319 return superloop_at_depth (loop, 1);
321 max_loop = find_common_loop (loop, def_bb->loop_father);
323 if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
324 max_loop = find_common_loop (max_loop,
325 loop_outer (LIM_DATA (def_stmt)->max_loop));
326 if (max_loop == loop)
328 max_loop = superloop_at_depth (loop, loop_depth (max_loop) + 1);
333 /* Returns the outermost superloop of LOOP in that the expression EXPR is
337 outermost_invariant_loop_expr (tree expr, struct loop *loop)
339 enum tree_code_class codeclass = TREE_CODE_CLASS (TREE_CODE (expr));
341 struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
343 if (TREE_CODE (expr) == SSA_NAME
344 || TREE_CODE (expr) == INTEGER_CST
345 || is_gimple_min_invariant (expr))
346 return outermost_invariant_loop (expr, loop);
348 if (codeclass != tcc_unary
349 && codeclass != tcc_binary
350 && codeclass != tcc_expression
351 && codeclass != tcc_vl_exp
352 && codeclass != tcc_comparison)
355 nops = TREE_OPERAND_LENGTH (expr);
356 for (i = 0; i < nops; i++)
358 aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
362 if (flow_loop_nested_p (max_loop, aloop))
369 /* DATA is a structure containing information associated with a statement
370 inside LOOP. DEF is one of the operands of this statement.
372 Find the outermost loop enclosing LOOP in that value of DEF is invariant
373 and record this in DATA->max_loop field. If DEF itself is defined inside
374 this loop as well (i.e. we need to hoist it out of the loop if we want
375 to hoist the statement represented by DATA), record the statement in that
376 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
377 add the cost of the computation of DEF to the DATA->cost.
379 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
382 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
385 tree def_stmt = SSA_NAME_DEF_STMT (def);
386 basic_block def_bb = bb_for_stmt (def_stmt);
387 struct loop *max_loop;
393 max_loop = outermost_invariant_loop (def, loop);
397 if (flow_loop_nested_p (data->max_loop, max_loop))
398 data->max_loop = max_loop;
400 if (!LIM_DATA (def_stmt))
404 /* Only add the cost if the statement defining DEF is inside LOOP,
405 i.e. if it is likely that by moving the invariants dependent
406 on it, we will be able to avoid creating a new register for
407 it (since it will be only used in these dependent invariants). */
408 && def_bb->loop_father == loop)
409 data->cost += LIM_DATA (def_stmt)->cost;
411 dep = XNEW (struct depend);
412 dep->stmt = def_stmt;
413 dep->next = data->depends;
419 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
420 are just ad-hoc constants. The estimates should be based on target-specific
424 stmt_cost (tree stmt)
429 /* Always try to create possibilities for unswitching. */
430 if (TREE_CODE (stmt) == COND_EXPR)
431 return LIM_EXPENSIVE;
433 rhs = GENERIC_TREE_OPERAND (stmt, 1);
435 /* Hoisting memory references out should almost surely be a win. */
436 if (stmt_references_memory_p (stmt))
439 switch (TREE_CODE (rhs))
442 /* We should be hoisting calls if possible. */
444 /* Unless the call is a builtin_constant_p; this always folds to a
445 constant, so moving it is useless. */
446 rhs = get_callee_fndecl (rhs);
447 if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
448 && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
465 /* Division and multiplication are usually expensive. */
481 /* Determine the outermost loop to that it is possible to hoist a statement
482 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
483 the outermost loop in that the value computed by STMT is invariant.
484 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
485 we preserve the fact whether STMT is executed. It also fills other related
486 information to LIM_DATA (STMT).
488 The function returns false if STMT cannot be hoisted outside of the loop it
489 is defined in, and true otherwise. */
492 determine_max_movement (tree stmt, bool must_preserve_exec)
494 basic_block bb = bb_for_stmt (stmt);
495 struct loop *loop = bb->loop_father;
497 struct lim_aux_data *lim_data = LIM_DATA (stmt);
501 if (must_preserve_exec)
502 level = ALWAYS_EXECUTED_IN (bb);
504 level = superloop_at_depth (loop, 1);
505 lim_data->max_loop = level;
507 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
508 if (!add_dependency (val, lim_data, loop, true))
511 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES)
512 if (!add_dependency (val, lim_data, loop, false))
515 lim_data->cost += stmt_cost (stmt);
520 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
521 and that one of the operands of this statement is computed by STMT.
522 Ensure that STMT (together with all the statements that define its
523 operands) is hoisted at least out of the loop LEVEL. */
526 set_level (tree stmt, struct loop *orig_loop, struct loop *level)
528 struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
531 stmt_loop = find_common_loop (orig_loop, stmt_loop);
532 if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
533 stmt_loop = find_common_loop (stmt_loop,
534 loop_outer (LIM_DATA (stmt)->tgt_loop));
535 if (flow_loop_nested_p (stmt_loop, level))
538 gcc_assert (LIM_DATA (stmt));
539 gcc_assert (level == LIM_DATA (stmt)->max_loop
540 || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
542 LIM_DATA (stmt)->tgt_loop = level;
543 for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
544 set_level (dep->stmt, orig_loop, level);
547 /* Determines an outermost loop from that we want to hoist the statement STMT.
548 For now we chose the outermost possible loop. TODO -- use profiling
549 information to set it more sanely. */
552 set_profitable_level (tree stmt)
554 set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
557 /* Returns true if STMT is not a pure call. */
560 nonpure_call_p (tree stmt)
562 tree call = get_call_expr_in (stmt);
567 return TREE_SIDE_EFFECTS (call) != 0;
570 /* Releases the memory occupied by DATA. */
573 free_lim_aux_data (struct lim_aux_data *data)
575 struct depend *dep, *next;
577 for (dep = data->depends; dep; dep = next)
585 /* Rewrite a/b to a*(1/b). Return the invariant stmt to process. */
588 rewrite_reciprocal (block_stmt_iterator *bsi)
590 tree stmt, lhs, rhs, stmt1, stmt2, var, name, tmp;
592 stmt = bsi_stmt (*bsi);
593 lhs = GENERIC_TREE_OPERAND (stmt, 0);
594 rhs = GENERIC_TREE_OPERAND (stmt, 1);
596 /* stmt must be GIMPLE_MODIFY_STMT. */
597 var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
598 add_referenced_var (var);
600 tmp = build2 (RDIV_EXPR, TREE_TYPE (rhs),
601 build_real (TREE_TYPE (rhs), dconst1),
602 TREE_OPERAND (rhs, 1));
603 stmt1 = build_gimple_modify_stmt (var, tmp);
604 name = make_ssa_name (var, stmt1);
605 GIMPLE_STMT_OPERAND (stmt1, 0) = name;
606 tmp = build2 (MULT_EXPR, TREE_TYPE (rhs),
607 name, TREE_OPERAND (rhs, 0));
608 stmt2 = build_gimple_modify_stmt (lhs, tmp);
610 /* Replace division stmt with reciprocal and multiply stmts.
611 The multiply stmt is not invariant, so update iterator
612 and avoid rescanning. */
613 bsi_replace (bsi, stmt1, true);
614 bsi_insert_after (bsi, stmt2, BSI_NEW_STMT);
615 SSA_NAME_DEF_STMT (lhs) = stmt2;
617 /* Continue processing with invariant reciprocal statement. */
621 /* Check if the pattern at *BSI is a bittest of the form
622 (A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. */
625 rewrite_bittest (block_stmt_iterator *bsi)
627 tree stmt, lhs, rhs, var, name, use_stmt, stmt1, stmt2, t;
630 stmt = bsi_stmt (*bsi);
631 lhs = GENERIC_TREE_OPERAND (stmt, 0);
632 rhs = GENERIC_TREE_OPERAND (stmt, 1);
634 /* Verify that the single use of lhs is a comparison against zero. */
635 if (TREE_CODE (lhs) != SSA_NAME
636 || !single_imm_use (lhs, &use, &use_stmt)
637 || TREE_CODE (use_stmt) != COND_EXPR)
639 t = COND_EXPR_COND (use_stmt);
640 if (TREE_OPERAND (t, 0) != lhs
641 || (TREE_CODE (t) != NE_EXPR
642 && TREE_CODE (t) != EQ_EXPR)
643 || !integer_zerop (TREE_OPERAND (t, 1)))
646 /* Get at the operands of the shift. The rhs is TMP1 & 1. */
647 stmt1 = SSA_NAME_DEF_STMT (TREE_OPERAND (rhs, 0));
648 if (TREE_CODE (stmt1) != GIMPLE_MODIFY_STMT)
651 /* There is a conversion in between possibly inserted by fold. */
652 t = GIMPLE_STMT_OPERAND (stmt1, 1);
653 if (TREE_CODE (t) == NOP_EXPR
654 || TREE_CODE (t) == CONVERT_EXPR)
656 t = TREE_OPERAND (t, 0);
657 if (TREE_CODE (t) != SSA_NAME
658 || !has_single_use (t))
660 stmt1 = SSA_NAME_DEF_STMT (t);
661 if (TREE_CODE (stmt1) != GIMPLE_MODIFY_STMT)
663 t = GIMPLE_STMT_OPERAND (stmt1, 1);
666 /* Verify that B is loop invariant but A is not. Verify that with
667 all the stmt walking we are still in the same loop. */
668 if (TREE_CODE (t) == RSHIFT_EXPR
669 && loop_containing_stmt (stmt1) == loop_containing_stmt (stmt)
670 && outermost_invariant_loop_expr (TREE_OPERAND (t, 1),
671 loop_containing_stmt (stmt1)) != NULL
672 && outermost_invariant_loop_expr (TREE_OPERAND (t, 0),
673 loop_containing_stmt (stmt1)) == NULL)
675 tree a = TREE_OPERAND (t, 0);
676 tree b = TREE_OPERAND (t, 1);
679 var = create_tmp_var (TREE_TYPE (a), "shifttmp");
680 add_referenced_var (var);
681 t = fold_build2 (LSHIFT_EXPR, TREE_TYPE (a),
682 build_int_cst (TREE_TYPE (a), 1), b);
683 stmt1 = build_gimple_modify_stmt (var, t);
684 name = make_ssa_name (var, stmt1);
685 GIMPLE_STMT_OPERAND (stmt1, 0) = name;
688 t = fold_build2 (BIT_AND_EXPR, TREE_TYPE (a), a, name);
689 stmt2 = build_gimple_modify_stmt (var, t);
690 name = make_ssa_name (var, stmt2);
691 GIMPLE_STMT_OPERAND (stmt2, 0) = name;
693 /* Replace the SSA_NAME we compare against zero. Adjust
694 the type of zero accordingly. */
696 TREE_OPERAND (COND_EXPR_COND (use_stmt), 1)
697 = build_int_cst_type (TREE_TYPE (name), 0);
699 bsi_insert_before (bsi, stmt1, BSI_SAME_STMT);
700 bsi_replace (bsi, stmt2, true);
709 /* Determine the outermost loops in that statements in basic block BB are
710 invariant, and record them to the LIM_DATA associated with the statements.
711 Callback for walk_dominator_tree. */
714 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
718 block_stmt_iterator bsi;
720 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
721 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
723 if (!loop_outer (bb->loop_father))
726 if (dump_file && (dump_flags & TDF_DETAILS))
727 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
728 bb->index, bb->loop_father->num, loop_depth (bb->loop_father));
730 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
732 stmt = bsi_stmt (bsi);
734 pos = movement_possibility (stmt);
735 if (pos == MOVE_IMPOSSIBLE)
737 if (nonpure_call_p (stmt))
745 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
747 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
749 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
750 to be hoisted out of loop, saving expensive divide. */
751 if (pos == MOVE_POSSIBLE
752 && TREE_CODE (rhs) == RDIV_EXPR
753 && flag_unsafe_math_optimizations
754 && !flag_trapping_math
755 && outermost_invariant_loop_expr (TREE_OPERAND (rhs, 1),
756 loop_containing_stmt (stmt)) != NULL
757 && outermost_invariant_loop_expr (rhs,
758 loop_containing_stmt (stmt)) == NULL)
759 stmt = rewrite_reciprocal (&bsi);
761 /* If the shift count is invariant, convert (A >> B) & 1 to
762 A & (1 << B) allowing the bit mask to be hoisted out of the loop
763 saving an expensive shift. */
764 if (pos == MOVE_POSSIBLE
765 && TREE_CODE (rhs) == BIT_AND_EXPR
766 && integer_onep (TREE_OPERAND (rhs, 1))
767 && TREE_CODE (TREE_OPERAND (rhs, 0)) == SSA_NAME
768 && has_single_use (TREE_OPERAND (rhs, 0)))
769 stmt = rewrite_bittest (&bsi);
772 stmt_ann (stmt)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
773 LIM_DATA (stmt)->always_executed_in = outermost;
775 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
778 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
780 LIM_DATA (stmt)->max_loop = NULL;
784 if (dump_file && (dump_flags & TDF_DETAILS))
786 print_generic_stmt_indented (dump_file, stmt, 0, 2);
787 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
788 loop_depth (LIM_DATA (stmt)->max_loop),
789 LIM_DATA (stmt)->cost);
792 if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
793 set_profitable_level (stmt);
797 /* For each statement determines the outermost loop in that it is invariant,
798 statements on whose motion it depends and the cost of the computation.
799 This information is stored to the LIM_DATA structure associated with
803 determine_invariantness (void)
805 struct dom_walk_data walk_data;
807 memset (&walk_data, 0, sizeof (struct dom_walk_data));
808 walk_data.dom_direction = CDI_DOMINATORS;
809 walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
811 init_walk_dominator_tree (&walk_data);
812 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
813 fini_walk_dominator_tree (&walk_data);
816 /* Hoist the statements in basic block BB out of the loops prescribed by
817 data stored in LIM_DATA structures associated with each statement. Callback
818 for walk_dominator_tree. */
821 move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
825 block_stmt_iterator bsi;
829 if (!loop_outer (bb->loop_father))
832 for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
834 stmt = bsi_stmt (bsi);
836 if (!LIM_DATA (stmt))
842 cost = LIM_DATA (stmt)->cost;
843 level = LIM_DATA (stmt)->tgt_loop;
844 free_lim_aux_data (LIM_DATA (stmt));
845 stmt_ann (stmt)->common.aux = NULL;
853 /* We do not really want to move conditionals out of the loop; we just
854 placed it here to force its operands to be moved if necessary. */
855 if (TREE_CODE (stmt) == COND_EXPR)
858 if (dump_file && (dump_flags & TDF_DETAILS))
860 fprintf (dump_file, "Moving statement\n");
861 print_generic_stmt (dump_file, stmt, 0);
862 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
865 bsi_insert_on_edge (loop_preheader_edge (level), stmt);
866 bsi_remove (&bsi, false);
870 /* Hoist the statements out of the loops prescribed by data stored in
871 LIM_DATA structures associated with each statement.*/
874 move_computations (void)
876 struct dom_walk_data walk_data;
878 memset (&walk_data, 0, sizeof (struct dom_walk_data));
879 walk_data.dom_direction = CDI_DOMINATORS;
880 walk_data.before_dom_children_before_stmts = move_computations_stmt;
882 init_walk_dominator_tree (&walk_data);
883 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
884 fini_walk_dominator_tree (&walk_data);
886 bsi_commit_edge_inserts ();
887 if (need_ssa_update_p ())
888 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
891 /* Checks whether the statement defining variable *INDEX can be hoisted
892 out of the loop passed in DATA. Callback for for_each_index. */
895 may_move_till (tree ref, tree *index, void *data)
897 struct loop *loop = (struct loop*) data, *max_loop;
899 /* If REF is an array reference, check also that the step and the lower
900 bound is invariant in LOOP. */
901 if (TREE_CODE (ref) == ARRAY_REF)
903 tree step = array_ref_element_size (ref);
904 tree lbound = array_ref_low_bound (ref);
906 max_loop = outermost_invariant_loop_expr (step, loop);
910 max_loop = outermost_invariant_loop_expr (lbound, loop);
915 max_loop = outermost_invariant_loop (*index, loop);
922 /* Forces statements defining (invariant) SSA names in expression EXPR to be
923 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
926 force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
928 enum tree_code_class codeclass = TREE_CODE_CLASS (TREE_CODE (expr));
931 if (TREE_CODE (expr) == SSA_NAME)
933 tree stmt = SSA_NAME_DEF_STMT (expr);
934 if (IS_EMPTY_STMT (stmt))
937 set_level (stmt, orig_loop, loop);
941 if (codeclass != tcc_unary
942 && codeclass != tcc_binary
943 && codeclass != tcc_expression
944 && codeclass != tcc_vl_exp
945 && codeclass != tcc_comparison)
948 nops = TREE_OPERAND_LENGTH (expr);
949 for (i = 0; i < nops; i++)
950 force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
953 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
954 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
960 struct loop *orig_loop;
964 force_move_till (tree ref, tree *index, void *data)
967 struct fmt_data *fmt_data = (struct fmt_data *) data;
969 if (TREE_CODE (ref) == ARRAY_REF)
971 tree step = array_ref_element_size (ref);
972 tree lbound = array_ref_low_bound (ref);
974 force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
975 force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
978 if (TREE_CODE (*index) != SSA_NAME)
981 stmt = SSA_NAME_DEF_STMT (*index);
982 if (IS_EMPTY_STMT (stmt))
985 set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
990 /* Records memory reference location *REF to the list MEM_REFS. The reference
991 occurs in statement STMT. */
994 record_mem_ref_loc (struct mem_ref_loc **mem_refs, tree stmt, tree *ref)
996 struct mem_ref_loc *aref = XNEW (struct mem_ref_loc);
1001 aref->next = *mem_refs;
1005 /* Releases list of memory reference locations MEM_REFS. */
1008 free_mem_ref_locs (struct mem_ref_loc *mem_refs)
1010 struct mem_ref_loc *act;
1015 mem_refs = mem_refs->next;
1020 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
1023 rewrite_mem_refs (tree tmp_var, struct mem_ref_loc *mem_refs)
1028 for (; mem_refs; mem_refs = mem_refs->next)
1030 FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
1031 mark_sym_for_renaming (SSA_NAME_VAR (var));
1033 *mem_refs->ref = tmp_var;
1034 update_stmt (mem_refs->stmt);
1038 /* The name and the length of the currently generated variable
1040 #define MAX_LSM_NAME_LENGTH 40
1041 static char lsm_tmp_name[MAX_LSM_NAME_LENGTH + 1];
1042 static int lsm_tmp_name_length;
1044 /* Adds S to lsm_tmp_name. */
1047 lsm_tmp_name_add (const char *s)
1049 int l = strlen (s) + lsm_tmp_name_length;
1050 if (l > MAX_LSM_NAME_LENGTH)
1053 strcpy (lsm_tmp_name + lsm_tmp_name_length, s);
1054 lsm_tmp_name_length = l;
1057 /* Stores the name for temporary variable that replaces REF to
1061 gen_lsm_tmp_name (tree ref)
1065 switch (TREE_CODE (ref))
1067 case MISALIGNED_INDIRECT_REF:
1068 case ALIGN_INDIRECT_REF:
1070 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1071 lsm_tmp_name_add ("_");
1075 case VIEW_CONVERT_EXPR:
1076 case ARRAY_RANGE_REF:
1077 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1081 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1082 lsm_tmp_name_add ("_RE");
1086 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1087 lsm_tmp_name_add ("_IM");
1091 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1092 lsm_tmp_name_add ("_");
1093 name = get_name (TREE_OPERAND (ref, 1));
1096 lsm_tmp_name_add ("_");
1097 lsm_tmp_name_add (name);
1100 gen_lsm_tmp_name (TREE_OPERAND (ref, 0));
1101 lsm_tmp_name_add ("_I");
1105 ref = SSA_NAME_VAR (ref);
1110 name = get_name (ref);
1113 lsm_tmp_name_add (name);
1117 lsm_tmp_name_add ("S");
1121 lsm_tmp_name_add ("R");
1129 /* Determines name for temporary variable that replaces REF.
1130 The name is accumulated into the lsm_tmp_name variable.
1131 N is added to the name of the temporary. */
1134 get_lsm_tmp_name (tree ref, unsigned n)
1138 lsm_tmp_name_length = 0;
1139 gen_lsm_tmp_name (ref);
1140 lsm_tmp_name_add ("_lsm");
1145 lsm_tmp_name_add (ns);
1147 return lsm_tmp_name;
1150 /* Records request for store motion of memory reference REF from LOOP.
1151 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
1152 these references are rewritten by a new temporary variable.
1153 Exits from the LOOP are stored in EXITS. The initialization of the
1154 temporary variable is put to the preheader of the loop, and assignments
1155 to the reference from the temporary variable are emitted to exits. */
1158 schedule_sm (struct loop *loop, VEC (edge, heap) *exits, tree ref,
1159 struct mem_ref_loc *mem_refs)
1161 struct mem_ref_loc *aref;
1165 struct fmt_data fmt_data;
1168 if (dump_file && (dump_flags & TDF_DETAILS))
1170 fprintf (dump_file, "Executing store motion of ");
1171 print_generic_expr (dump_file, ref, 0);
1172 fprintf (dump_file, " from loop %d\n", loop->num);
1175 tmp_var = make_rename_temp (TREE_TYPE (ref),
1176 get_lsm_tmp_name (ref, ~0));
1178 fmt_data.loop = loop;
1179 fmt_data.orig_loop = loop;
1180 for_each_index (&ref, force_move_till, &fmt_data);
1182 rewrite_mem_refs (tmp_var, mem_refs);
1183 for (aref = mem_refs; aref; aref = aref->next)
1184 if (LIM_DATA (aref->stmt))
1185 LIM_DATA (aref->stmt)->sm_done = true;
1187 /* Emit the load & stores. */
1188 load = build_gimple_modify_stmt (tmp_var, ref);
1189 get_stmt_ann (load)->common.aux = xcalloc (1, sizeof (struct lim_aux_data));
1190 LIM_DATA (load)->max_loop = loop;
1191 LIM_DATA (load)->tgt_loop = loop;
1193 /* Put this into the latch, so that we are sure it will be processed after
1194 all dependencies. */
1195 bsi_insert_on_edge (loop_latch_edge (loop), load);
1197 for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
1199 store = build_gimple_modify_stmt (unshare_expr (ref), tmp_var);
1200 bsi_insert_on_edge (ex, store);
1204 /* Check whether memory reference REF can be hoisted out of the LOOP. If this
1205 is true, prepare the statements that load the value of the memory reference
1206 to a temporary variable in the loop preheader, store it back on the loop
1207 exits, and replace all the references inside LOOP by this temporary variable.
1208 EXITS is the list of exits of LOOP. CLOBBERED_VOPS is the bitmap of virtual
1209 operands that are clobbered by a call or accessed through multiple references
1213 determine_lsm_ref (struct loop *loop, VEC (edge, heap) *exits,
1214 bitmap clobbered_vops, struct mem_ref *ref)
1216 struct mem_ref_loc *aref;
1217 struct loop *must_exec;
1219 /* In case the memory is not stored to, there is nothing for SM to do. */
1220 if (!ref->is_stored)
1223 /* If the reference is aliased with any different ref, or killed by call
1224 in function, then fail. */
1225 if (bitmap_intersect_p (ref->vops, clobbered_vops))
1228 if (tree_could_trap_p (ref->mem))
1230 /* If the memory access is unsafe (i.e. it might trap), ensure that some
1231 of the statements in that it occurs is always executed when the loop
1232 is entered. This way we know that by moving the load from the
1233 reference out of the loop we will not cause the error that would not
1236 TODO -- in fact we would like to check for anticipability of the
1237 reference, i.e. that on each path from loop entry to loop exit at
1238 least one of the statements containing the memory reference is
1241 for (aref = ref->locs; aref; aref = aref->next)
1243 if (!LIM_DATA (aref->stmt))
1246 must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1250 if (must_exec == loop
1251 || flow_loop_nested_p (must_exec, loop))
1259 schedule_sm (loop, exits, ref->mem, ref->locs);
1262 /* Hoists memory references MEM_REFS out of LOOP. CLOBBERED_VOPS is the list
1263 of vops clobbered by call in loop or accessed by multiple memory references.
1264 EXITS is the list of exit edges of the LOOP. */
1267 hoist_memory_references (struct loop *loop, struct mem_ref *mem_refs,
1268 bitmap clobbered_vops, VEC (edge, heap) *exits)
1270 struct mem_ref *ref;
1272 for (ref = mem_refs; ref; ref = ref->next)
1273 determine_lsm_ref (loop, exits, clobbered_vops, ref);
1276 /* Checks whether LOOP (with exits stored in EXITS array) is suitable
1277 for a store motion optimization (i.e. whether we can insert statement
1281 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED,
1282 VEC (edge, heap) *exits)
1287 for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
1288 if (ex->flags & EDGE_ABNORMAL)
1294 /* A hash function for struct mem_ref object OBJ. */
1297 memref_hash (const void *obj)
1299 return ((const struct mem_ref *) obj)->hash;
1302 /* An equality function for struct mem_ref object OBJ1 with
1303 memory reference OBJ2. */
1306 memref_eq (const void *obj1, const void *obj2)
1308 const struct mem_ref *const mem1 = (const struct mem_ref *) obj1;
1310 return operand_equal_p (mem1->mem, (const_tree) obj2, 0);
1313 /* Gathers memory references in statement STMT in LOOP, storing the
1314 information about them in MEM_REFS hash table. Note vops accessed through
1315 unrecognized statements in CLOBBERED_VOPS. The newly created references
1316 are also stored to MEM_REF_LIST. */
1319 gather_mem_refs_stmt (struct loop *loop, htab_t mem_refs,
1320 bitmap clobbered_vops, tree stmt,
1321 struct mem_ref **mem_ref_list)
1323 tree *lhs, *rhs, *mem = NULL;
1326 struct mem_ref *ref = NULL;
1331 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
1334 /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */
1335 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
1338 lhs = &GIMPLE_STMT_OPERAND (stmt, 0);
1339 rhs = &GIMPLE_STMT_OPERAND (stmt, 1);
1341 if (TREE_CODE (*lhs) == SSA_NAME)
1343 if (!is_gimple_addressable (*rhs))
1349 else if (TREE_CODE (*rhs) == SSA_NAME
1350 || is_gimple_min_invariant (*rhs))
1358 /* If we cannot create an SSA name for the result, give up. */
1359 if (!is_gimple_reg_type (TREE_TYPE (*mem))
1360 || TREE_THIS_VOLATILE (*mem))
1363 /* If we cannot move the reference out of the loop, fail. */
1364 if (!for_each_index (mem, may_move_till, loop))
1367 hash = iterative_hash_expr (*mem, 0);
1368 slot = htab_find_slot_with_hash (mem_refs, *mem, hash, INSERT);
1371 ref = (struct mem_ref *) *slot;
1374 ref = XNEW (struct mem_ref);
1378 ref->is_stored = false;
1379 ref->vops = BITMAP_ALLOC (NULL);
1380 ref->next = *mem_ref_list;
1381 *mem_ref_list = ref;
1384 ref->is_stored |= is_stored;
1386 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi, SSA_OP_VIRTUAL_USES)
1387 bitmap_set_bit (ref->vops, DECL_UID (SSA_NAME_VAR (vname)));
1388 record_mem_ref_loc (&ref->locs, stmt, mem);
1392 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi, SSA_OP_VIRTUAL_USES)
1393 bitmap_set_bit (clobbered_vops, DECL_UID (SSA_NAME_VAR (vname)));
1396 /* Gathers memory references in LOOP. Notes vops accessed through unrecognized
1397 statements in CLOBBERED_VOPS. The list of the references found by
1398 the function is returned. */
1400 static struct mem_ref *
1401 gather_mem_refs (struct loop *loop, bitmap clobbered_vops)
1403 basic_block *body = get_loop_body (loop);
1404 block_stmt_iterator bsi;
1406 struct mem_ref *mem_ref_list = NULL;
1407 htab_t mem_refs = htab_create (100, memref_hash, memref_eq, NULL);
1409 for (i = 0; i < loop->num_nodes; i++)
1411 for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1412 gather_mem_refs_stmt (loop, mem_refs, clobbered_vops, bsi_stmt (bsi),
1418 htab_delete (mem_refs);
1419 return mem_ref_list;
1422 /* Finds the vops accessed by more than one of the memory references described
1423 in MEM_REFS and marks them in CLOBBERED_VOPS. */
1426 find_more_ref_vops (struct mem_ref *mem_refs, bitmap clobbered_vops)
1428 bitmap_head tmp, all_vops;
1429 struct mem_ref *ref;
1431 bitmap_initialize (&tmp, &bitmap_default_obstack);
1432 bitmap_initialize (&all_vops, &bitmap_default_obstack);
1434 for (ref = mem_refs; ref; ref = ref->next)
1436 /* The vops that are already in all_vops are accessed by more than
1437 one memory reference. */
1438 bitmap_and (&tmp, &all_vops, ref->vops);
1439 bitmap_ior_into (clobbered_vops, &tmp);
1440 bitmap_clear (&tmp);
1442 bitmap_ior_into (&all_vops, ref->vops);
1445 bitmap_clear (&all_vops);
1448 /* Releases the memory occupied by REF. */
1451 free_mem_ref (struct mem_ref *ref)
1453 free_mem_ref_locs (ref->locs);
1454 BITMAP_FREE (ref->vops);
1458 /* Releases the memory occupied by REFS. */
1461 free_mem_refs (struct mem_ref *refs)
1463 struct mem_ref *ref, *next;
1465 for (ref = refs; ref; ref = next)
1472 /* Try to perform store motion for all memory references modified inside
1476 determine_lsm_loop (struct loop *loop)
1478 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
1479 bitmap clobbered_vops;
1480 struct mem_ref *mem_refs;
1482 if (!loop_suitable_for_sm (loop, exits))
1484 VEC_free (edge, heap, exits);
1488 /* Find the memory references in LOOP. */
1489 clobbered_vops = BITMAP_ALLOC (NULL);
1490 mem_refs = gather_mem_refs (loop, clobbered_vops);
1492 /* Find the vops that are used for more than one reference. */
1493 find_more_ref_vops (mem_refs, clobbered_vops);
1495 /* Hoist all suitable memory references. */
1496 hoist_memory_references (loop, mem_refs, clobbered_vops, exits);
1498 free_mem_refs (mem_refs);
1499 VEC_free (edge, heap, exits);
1500 BITMAP_FREE (clobbered_vops);
1503 /* Try to perform store motion for all memory references modified inside
1507 determine_lsm (void)
1512 /* Pass the loops from the outermost and perform the store motion as
1515 FOR_EACH_LOOP (li, loop, 0)
1517 determine_lsm_loop (loop);
1520 bsi_commit_edge_inserts ();
1523 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1524 for each such basic block bb records the outermost loop for that execution
1525 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1526 blocks that contain a nonpure call. */
1529 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1531 basic_block bb = NULL, *bbs, last = NULL;
1534 struct loop *inn_loop = loop;
1536 if (!loop->header->aux)
1538 bbs = get_loop_body_in_dom_order (loop);
1540 for (i = 0; i < loop->num_nodes; i++)
1545 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1548 if (TEST_BIT (contains_call, bb->index))
1551 FOR_EACH_EDGE (e, ei, bb->succs)
1552 if (!flow_bb_inside_loop_p (loop, e->dest))
1557 /* A loop might be infinite (TODO use simple loop analysis
1558 to disprove this if possible). */
1559 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1562 if (!flow_bb_inside_loop_p (inn_loop, bb))
1565 if (bb->loop_father->header == bb)
1567 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1570 /* In a loop that is always entered we may proceed anyway.
1571 But record that we entered it and stop once we leave it. */
1572 inn_loop = bb->loop_father;
1579 if (last == loop->header)
1581 last = get_immediate_dominator (CDI_DOMINATORS, last);
1587 for (loop = loop->inner; loop; loop = loop->next)
1588 fill_always_executed_in (loop, contains_call);
1591 /* Compute the global information needed by the loop invariant motion pass. */
1594 tree_ssa_lim_initialize (void)
1596 sbitmap contains_call = sbitmap_alloc (last_basic_block);
1597 block_stmt_iterator bsi;
1601 sbitmap_zero (contains_call);
1604 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1606 if (nonpure_call_p (bsi_stmt (bsi)))
1610 if (!bsi_end_p (bsi))
1611 SET_BIT (contains_call, bb->index);
1614 for (loop = current_loops->tree_root->inner; loop; loop = loop->next)
1615 fill_always_executed_in (loop, contains_call);
1617 sbitmap_free (contains_call);
1620 /* Cleans up after the invariant motion pass. */
1623 tree_ssa_lim_finalize (void)
1633 /* Moves invariants from loops. Only "expensive" invariants are moved out --
1634 i.e. those that are likely to be win regardless of the register pressure. */
1639 tree_ssa_lim_initialize ();
1641 /* For each statement determine the outermost loop in that it is
1642 invariant and cost for computing the invariant. */
1643 determine_invariantness ();
1645 /* For each memory reference determine whether it is possible to hoist it
1646 out of the loop. Force the necessary invariants to be moved out of the
1650 /* Move the expressions that are expensive enough. */
1651 move_computations ();
1653 tree_ssa_lim_finalize ();