/* If-conversion support.
- Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
+ Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010,
+ 2011
Free Software Foundation, Inc.
This file is part of GCC.
#include "output.h"
#include "optabs.h"
#include "diagnostic-core.h"
-#include "toplev.h"
#include "tm_p.h"
#include "cfgloop.h"
#include "target.h"
/* Forward references. */
static int count_bb_insns (const_basic_block);
-static bool cheap_bb_rtx_cost_p (const_basic_block, int);
+static bool cheap_bb_rtx_cost_p (const_basic_block, int, int);
static rtx first_active_insn (basic_block);
static rtx last_active_insn (basic_block, int);
static rtx find_active_insn_before (basic_block, rtx);
static int cond_exec_find_if_block (ce_if_block_t *);
static int find_if_case_1 (basic_block, edge, edge);
static int find_if_case_2 (basic_block, edge, edge);
-static int find_memory (rtx *, void *);
static int dead_or_predicable (basic_block, basic_block, basic_block,
- basic_block, int);
+ edge, int);
static void noce_emit_move_insn (rtx, rtx);
static rtx block_has_only_trap (basic_block);
\f
/* Determine whether the total insn_rtx_cost on non-jump insns in
basic block BB is less than MAX_COST. This function returns
- false if the cost of any instruction could not be estimated. */
+ false if the cost of any instruction could not be estimated.
+
+ The cost of the non-jump insns in BB is scaled by REG_BR_PROB_BASE
+ as those insns are being speculated. MAX_COST is scaled with SCALE
+ plus a small fudge factor. */
static bool
-cheap_bb_rtx_cost_p (const_basic_block bb, int max_cost)
+cheap_bb_rtx_cost_p (const_basic_block bb, int scale, int max_cost)
{
int count = 0;
rtx insn = BB_HEAD (bb);
bool speed = optimize_bb_for_speed_p (bb);
+ /* Our branch probability/scaling factors are just estimates and don't
+ account for cases where we can get speculation for free and other
+ secondary benefits. So we fudge the scale factor to make speculating
+ appear a little more profitable. */
+ scale += REG_BR_PROB_BASE / 8;
+ max_cost *= scale;
+
while (1)
{
if (NONJUMP_INSN_P (insn))
{
- int cost = insn_rtx_cost (PATTERN (insn), speed);
+ int cost = insn_rtx_cost (PATTERN (insn), speed) * REG_BR_PROB_BASE;
if (cost == 0)
return false;
for (insn = start; ; insn = NEXT_INSN (insn))
{
+ /* dwarf2out can't cope with conditional prologues. */
+ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
+ return FALSE;
+
if (NOTE_P (insn) || DEBUG_INSN_P (insn))
goto insn_done;
/* Look for matching sequences at the head and tail of the two blocks,
and limit the range of insns to be converted if possible. */
n_matching = flow_find_cross_jump (then_bb, else_bb,
- &then_first_tail, &else_first_tail);
+ &then_first_tail, &else_first_tail,
+ NULL);
if (then_first_tail == BB_HEAD (then_bb))
then_start = then_end = NULL_RTX;
if (else_first_tail == BB_HEAD (else_bb))
}
gcc_assert (start < (MEM_P (op) ? BITS_PER_UNIT : BITS_PER_WORD));
- store_bit_field (op, size, start, GET_MODE (x), y);
+ store_bit_field (op, size, start, 0, 0, GET_MODE (x), y);
return;
}
inner = XEXP (outer, 0);
outmode = GET_MODE (outer);
bitpos = SUBREG_BYTE (outer) * BITS_PER_UNIT;
- store_bit_field (inner, GET_MODE_BITSIZE (outmode), bitpos, outmode, y);
+ store_bit_field (inner, GET_MODE_BITSIZE (outmode), bitpos,
+ 0, 0, outmode, y);
}
/* Return sequence of instructions generated by if conversion. This
}
/* ??? We could handle this if we knew that a load from A or B could
- not fault. This is also true if we've already loaded
+ not trap or fault. This is also true if we've already loaded
from the address along the path from ENTRY. */
- else if (may_trap_p (a) || may_trap_p (b))
+ else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b))
return FALSE;
/* if (test) x = a + b; else x = c - d;
/* Copy over flags as appropriate. */
if (MEM_VOLATILE_P (if_info->a) || MEM_VOLATILE_P (if_info->b))
MEM_VOLATILE_P (tmp) = 1;
- if (MEM_IN_STRUCT_P (if_info->a) && MEM_IN_STRUCT_P (if_info->b))
- MEM_IN_STRUCT_P (tmp) = 1;
- if (MEM_SCALAR_P (if_info->a) && MEM_SCALAR_P (if_info->b))
- MEM_SCALAR_P (tmp) = 1;
if (MEM_ALIAS_SET (if_info->a) == MEM_ALIAS_SET (if_info->b))
set_mem_alias_set (tmp, MEM_ALIAS_SET (if_info->a));
set_mem_align (tmp,
&& (if_info->insn_b == NULL_RTX
|| BLOCK_FOR_INSN (if_info->insn_b) == if_info->test_bb));
if (!(t_unconditional
- || (rtx_cost (t, SET, optimize_bb_for_speed_p (if_info->test_bb))
+ || (set_src_cost (t, optimize_bb_for_speed_p (if_info->test_bb))
< COSTS_N_INSNS (2))))
return FALSE;
cond = XEXP (SET_SRC (set), 0);
tmp = XEXP (cond, 0);
- if (REG_P (tmp) && GET_MODE_CLASS (GET_MODE (tmp)) == MODE_INT)
+ if (REG_P (tmp) && GET_MODE_CLASS (GET_MODE (tmp)) == MODE_INT
+ && (GET_MODE (tmp) != BImode
+ || !targetm.small_register_classes_for_mode_p (BImode)))
{
*earliest = jump;
static int
noce_operand_ok (const_rtx op)
{
+ if (side_effects_p (op))
+ return FALSE;
+
/* We special-case memories, so handle any of them with
no address side effects. */
if (MEM_P (op))
return ! side_effects_p (XEXP (op, 0));
- if (side_effects_p (op))
- return FALSE;
-
return ! may_trap_p (op);
}
basic_block then_bb = then_edge->dest;
basic_block else_bb = else_edge->dest;
basic_block new_bb;
- int then_bb_index;
+ int then_bb_index, then_prob;
+ rtx else_target = NULL_RTX;
/* If we are partitioning hot/cold basic blocks, we don't want to
mess up unconditional or indirect jumps that cross between hot
"\nIF-CASE-1 found, start %d, then %d\n",
test_bb->index, then_bb->index);
- /* THEN is small. */
- if (! cheap_bb_rtx_cost_p (then_bb,
+ if (then_edge->probability)
+ then_prob = REG_BR_PROB_BASE - then_edge->probability;
+ else
+ then_prob = REG_BR_PROB_BASE / 2;
+
+ /* We're speculating from the THEN path, we want to make sure the cost
+ of speculation is within reason. */
+ if (! cheap_bb_rtx_cost_p (then_bb, then_prob,
COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (then_edge->src),
predictable_edge_p (then_edge)))))
return FALSE;
+ if (else_bb == EXIT_BLOCK_PTR)
+ {
+ rtx jump = BB_END (else_edge->src);
+ gcc_assert (JUMP_P (jump));
+ else_target = JUMP_LABEL (jump);
+ }
+
/* Registers set are dead, or are predicable. */
if (! dead_or_predicable (test_bb, then_bb, else_bb,
- single_succ (then_bb), 1))
+ single_succ_edge (then_bb), 1))
return FALSE;
/* Conversion went ok, including moving the insns and fixing up the
redirect_edge_succ (FALLTHRU_EDGE (test_bb), else_bb);
new_bb = 0;
}
+ else if (else_bb == EXIT_BLOCK_PTR)
+ new_bb = force_nonfallthru_and_redirect (FALLTHRU_EDGE (test_bb),
+ else_bb, else_target);
else
new_bb = redirect_edge_and_branch_force (FALLTHRU_EDGE (test_bb),
else_bb);
basic_block then_bb = then_edge->dest;
basic_block else_bb = else_edge->dest;
edge else_succ;
- rtx note;
+ int then_prob, else_prob;
/* If we are partitioning hot/cold basic blocks, we don't want to
mess up unconditional or indirect jumps that cross between hot
if (then_bb->index < NUM_FIXED_BLOCKS)
return FALSE;
+ if (else_edge->probability)
+ {
+ else_prob = else_edge->probability;
+ then_prob = REG_BR_PROB_BASE - else_prob;
+ }
+ else
+ {
+ else_prob = REG_BR_PROB_BASE / 2;
+ then_prob = REG_BR_PROB_BASE / 2;
+ }
+
/* ELSE is predicted or SUCC(ELSE) postdominates THEN. */
- note = find_reg_note (BB_END (test_bb), REG_BR_PROB, NULL_RTX);
- if (note && INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2)
+ if (else_prob > then_prob)
;
else if (else_succ->dest->index < NUM_FIXED_BLOCKS
|| dominated_by_p (CDI_POST_DOMINATORS, then_bb,
"\nIF-CASE-2 found, start %d, else %d\n",
test_bb->index, else_bb->index);
- /* ELSE is small. */
- if (! cheap_bb_rtx_cost_p (else_bb,
+ /* We're speculating from the ELSE path, we want to make sure the cost
+ of speculation is within reason. */
+ if (! cheap_bb_rtx_cost_p (else_bb, else_prob,
COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (else_edge->src),
predictable_edge_p (else_edge)))))
return FALSE;
/* Registers set are dead, or are predicable. */
- if (! dead_or_predicable (test_bb, else_bb, then_bb, else_succ->dest, 0))
+ if (! dead_or_predicable (test_bb, else_bb, then_bb, else_succ, 0))
return FALSE;
/* Conversion went ok, including moving the insns and fixing up the
return TRUE;
}
-/* A subroutine of dead_or_predicable called through for_each_rtx.
- Return 1 if a memory is found. */
-
-static int
-find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
-{
- return MEM_P (*px);
-}
-
/* Used by the code above to perform the actual rtl transformations.
Return TRUE if successful.
TEST_BB is the block containing the conditional branch. MERGE_BB
- is the block containing the code to manipulate. NEW_DEST is the
- label TEST_BB should be branching to after the conversion.
+ is the block containing the code to manipulate. DEST_EDGE is an
+ edge representing a jump to the join block; after the conversion,
+ TEST_BB should be branching to its destination.
REVERSEP is true if the sense of the branch should be reversed. */
static int
dead_or_predicable (basic_block test_bb, basic_block merge_bb,
- basic_block other_bb, basic_block new_dest, int reversep)
+ basic_block other_bb, edge dest_edge, int reversep)
{
- rtx head, end, jump, earliest = NULL_RTX, old_dest, new_label = NULL_RTX;
+ basic_block new_dest = dest_edge->dest;
+ rtx head, end, jump, earliest = NULL_RTX, old_dest;
+ bitmap merge_set = NULL;
/* Number of pending changes. */
int n_validated_changes = 0;
+ rtx new_dest_label = NULL_RTX;
jump = BB_END (test_bb);
earliest = jump;
}
#endif
+
+ /* If we allocated new pseudos (e.g. in the conditional move
+ expander called from noce_emit_cmove), we must resize the
+ array first. */
+ if (max_regno < max_reg_num ())
+ max_regno = max_reg_num ();
+
/* Try the NCE path if the CE path did not result in any changes. */
if (n_validated_changes == 0)
{
+ rtx cond, insn;
+ regset live;
+ bool success;
+
/* In the non-conditional execution case, we have to verify that there
are no trapping operations, no calls, no references to memory, and
that any registers modified are dead at the branch site. */
- rtx insn, cond, prev;
- bitmap merge_set, merge_set_noclobber, test_live, test_set;
- unsigned i, fail = 0;
- bitmap_iterator bi;
-
- /* Check for no calls or trapping operations. */
- for (insn = head; ; insn = NEXT_INSN (insn))
- {
- if (CALL_P (insn))
- return FALSE;
- if (NONDEBUG_INSN_P (insn))
- {
- if (may_trap_p (PATTERN (insn)))
- return FALSE;
-
- /* ??? Even non-trapping memories such as stack frame
- references must be avoided. For stores, we collect
- no lifetime info; for reads, we'd have to assert
- true_dependence false against every store in the
- TEST range. */
- if (for_each_rtx (&PATTERN (insn), find_memory, NULL))
- return FALSE;
- }
- if (insn == end)
- break;
- }
-
- if (! any_condjump_p (jump))
+ if (!any_condjump_p (jump))
return FALSE;
/* Find the extent of the conditional. */
cond = noce_get_condition (jump, &earliest, false);
- if (! cond)
+ if (!cond)
return FALSE;
- /* Collect:
- MERGE_SET = set of registers set in MERGE_BB
- MERGE_SET_NOCLOBBER = like MERGE_SET, but only includes registers
- that are really set, not just clobbered.
- TEST_LIVE = set of registers live at EARLIEST
- TEST_SET = set of registers set between EARLIEST and the
- end of the block. */
+ live = BITMAP_ALLOC (®_obstack);
+ simulate_backwards_to_point (merge_bb, live, end);
+ success = can_move_insns_across (head, end, earliest, jump,
+ merge_bb, live,
+ df_get_live_in (other_bb), NULL);
+ BITMAP_FREE (live);
+ if (!success)
+ return FALSE;
+ /* Collect the set of registers set in MERGE_BB. */
merge_set = BITMAP_ALLOC (®_obstack);
- merge_set_noclobber = BITMAP_ALLOC (®_obstack);
- test_live = BITMAP_ALLOC (®_obstack);
- test_set = BITMAP_ALLOC (®_obstack);
-
- /* ??? bb->local_set is only valid during calculate_global_regs_live,
- so we must recompute usage for MERGE_BB. Not so bad, I suppose,
- since we've already asserted that MERGE_BB is small. */
- /* If we allocated new pseudos (e.g. in the conditional move
- expander called from noce_emit_cmove), we must resize the
- array first. */
- if (max_regno < max_reg_num ())
- max_regno = max_reg_num ();
FOR_BB_INSNS (merge_bb, insn)
+ if (NONDEBUG_INSN_P (insn))
+ df_simulate_find_defs (insn, merge_set);
+
+#ifdef HAVE_simple_return
+ /* If shrink-wrapping, disable this optimization when test_bb is
+ the first basic block and merge_bb exits. The idea is to not
+ move code setting up a return register as that may clobber a
+ register used to pass function parameters, which then must be
+ saved in caller-saved regs. A caller-saved reg requires the
+ prologue, killing a shrink-wrap opportunity. */
+ if ((flag_shrink_wrap && HAVE_simple_return && !epilogue_completed)
+ && ENTRY_BLOCK_PTR->next_bb == test_bb
+ && single_succ_p (new_dest)
+ && single_succ (new_dest) == EXIT_BLOCK_PTR
+ && bitmap_intersect_p (df_get_live_in (new_dest), merge_set))
{
- if (NONDEBUG_INSN_P (insn))
- {
- df_simulate_find_defs (insn, merge_set);
- df_simulate_find_noclobber_defs (insn, merge_set_noclobber);
- }
- }
+ regset return_regs;
+ unsigned int i;
- /* For small register class machines, don't lengthen lifetimes of
- hard registers before reload. */
- if (! reload_completed
- && targetm.small_register_classes_for_mode_p (VOIDmode))
- {
- EXECUTE_IF_SET_IN_BITMAP (merge_set_noclobber, 0, i, bi)
- {
- if (i < FIRST_PSEUDO_REGISTER
- && ! fixed_regs[i]
- && ! global_regs[i])
- fail = 1;
- }
- }
+ return_regs = BITMAP_ALLOC (®_obstack);
- /* For TEST, we're interested in a range of insns, not a whole block.
- Moreover, we're interested in the insns live from OTHER_BB. */
+ /* Start off with the intersection of regs used to pass
+ params and regs used to return values. */
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (FUNCTION_ARG_REGNO_P (i)
+ && targetm.calls.function_value_regno_p (i))
+ bitmap_set_bit (return_regs, INCOMING_REGNO (i));
- /* The loop below takes the set of live registers
- after JUMP, and calculates the live set before EARLIEST. */
- bitmap_copy (test_live, df_get_live_in (other_bb));
- df_simulate_initialize_backwards (test_bb, test_live);
- for (insn = jump; ; insn = prev)
- {
- if (INSN_P (insn))
+ bitmap_and_into (return_regs, df_get_live_out (ENTRY_BLOCK_PTR));
+ bitmap_and_into (return_regs, df_get_live_in (EXIT_BLOCK_PTR));
+ if (!bitmap_empty_p (return_regs))
{
- df_simulate_find_defs (insn, test_set);
- df_simulate_one_insn_backwards (test_bb, insn, test_live);
+ FOR_BB_INSNS_REVERSE (new_dest, insn)
+ if (NONDEBUG_INSN_P (insn))
+ {
+ df_ref *def_rec;
+ unsigned int uid = INSN_UID (insn);
+
+ /* If this insn sets any reg in return_regs.. */
+ for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+ {
+ df_ref def = *def_rec;
+ unsigned r = DF_REF_REGNO (def);
+
+ if (bitmap_bit_p (return_regs, r))
+ break;
+ }
+ /* ..then add all reg uses to the set of regs
+ we're interested in. */
+ if (*def_rec)
+ df_simulate_uses (insn, return_regs);
+ }
+ if (bitmap_intersect_p (merge_set, return_regs))
+ {
+ BITMAP_FREE (return_regs);
+ BITMAP_FREE (merge_set);
+ return FALSE;
+ }
}
- prev = PREV_INSN (insn);
- if (insn == earliest)
- break;
+ BITMAP_FREE (return_regs);
}
-
- /* We can perform the transformation if
- MERGE_SET_NOCLOBBER & TEST_SET
- and
- MERGE_SET & TEST_LIVE)
- and
- TEST_SET & DF_LIVE_IN (merge_bb)
- are empty. */
-
- if (bitmap_intersect_p (test_set, merge_set_noclobber)
- || bitmap_intersect_p (test_live, merge_set)
- || bitmap_intersect_p (test_set, df_get_live_in (merge_bb)))
- fail = 1;
-
- BITMAP_FREE (merge_set_noclobber);
- BITMAP_FREE (merge_set);
- BITMAP_FREE (test_live);
- BITMAP_FREE (test_set);
-
- if (fail)
- return FALSE;
+#endif
}
no_body:
old_dest = JUMP_LABEL (jump);
if (other_bb != new_dest)
{
- new_label = block_label (new_dest);
+ if (JUMP_P (BB_END (dest_edge->src)))
+ new_dest_label = JUMP_LABEL (BB_END (dest_edge->src));
+ else if (new_dest == EXIT_BLOCK_PTR)
+ new_dest_label = ret_rtx;
+ else
+ new_dest_label = block_label (new_dest);
+
if (reversep
- ? ! invert_jump_1 (jump, new_label)
- : ! redirect_jump_1 (jump, new_label))
+ ? ! invert_jump_1 (jump, new_dest_label)
+ : ! redirect_jump_1 (jump, new_dest_label))
goto cancel;
}
if (other_bb != new_dest)
{
- redirect_jump_2 (jump, old_dest, new_label, 0, reversep);
+ redirect_jump_2 (jump, old_dest, new_dest_label, 0, reversep);
redirect_edge_succ (BRANCH_EDGE (test_bb), new_dest);
if (reversep)
if (end == BB_END (merge_bb))
BB_END (merge_bb) = PREV_INSN (head);
- /* PR 21767: When moving insns above a conditional branch, REG_EQUAL
- notes might become invalid. */
+ /* PR 21767: when moving insns above a conditional branch, the REG_EQUAL
+ notes being moved might become invalid. */
insn = head;
do
{
remove_note (insn, note);
} while (insn != end && (insn = NEXT_INSN (insn)));
+ /* PR46315: when moving insns above a conditional branch, the REG_EQUAL
+ notes referring to the registers being set might become invalid. */
+ if (merge_set)
+ {
+ unsigned i;
+ bitmap_iterator bi;
+
+ EXECUTE_IF_SET_IN_BITMAP (merge_set, 0, i, bi)
+ remove_reg_equal_equiv_notes_for_regno (i);
+
+ BITMAP_FREE (merge_set);
+ }
+
reorder_insns (head, end, PREV_INSN (earliest));
}
cancel:
cancel_changes (0);
+
+ if (merge_set)
+ BITMAP_FREE (merge_set);
+
return FALSE;
}
\f
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_df_finish | TODO_verify_rtl_sharing |
- TODO_dump_func /* todo_flags_finish */
+ 0 /* todo_flags_finish */
}
};
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_df_finish | TODO_verify_rtl_sharing |
- TODO_dump_func |
TODO_ggc_collect /* todo_flags_finish */
}
};
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_df_finish | TODO_verify_rtl_sharing |
- TODO_dump_func |
TODO_ggc_collect /* todo_flags_finish */
}
};