/* If-conversion support.
- Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
+ Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010,
+ 2011
Free Software Foundation, Inc.
This file is part of GCC.
#include "hard-reg-set.h"
#include "basic-block.h"
#include "expr.h"
-#include "real.h"
#include "output.h"
#include "optabs.h"
-#include "toplev.h"
+#include "diagnostic-core.h"
#include "tm_p.h"
#include "cfgloop.h"
#include "target.h"
#include "df.h"
#include "vec.h"
#include "vecprim.h"
+#include "dbgcnt.h"
-#ifndef HAVE_conditional_execution
-#define HAVE_conditional_execution 0
-#endif
#ifndef HAVE_conditional_move
#define HAVE_conditional_move 0
#endif
#ifndef HAVE_trap
#define HAVE_trap 0
#endif
-#ifndef HAVE_conditional_trap
-#define HAVE_conditional_trap 0
-#endif
#ifndef MAX_CONDITIONAL_EXECUTE
-#define MAX_CONDITIONAL_EXECUTE (BRANCH_COST + 1)
+#define MAX_CONDITIONAL_EXECUTE \
+ (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \
+ + 1)
#endif
#define IFCVT_MULTIPLE_DUMPS 1
static int cond_exec_changed_p;
/* Forward references. */
-static int count_bb_insns (basic_block);
-static bool cheap_bb_rtx_cost_p (basic_block, int);
+static int count_bb_insns (const_basic_block);
+static bool cheap_bb_rtx_cost_p (const_basic_block, int, int);
static rtx first_active_insn (basic_block);
static rtx last_active_insn (basic_block, int);
+static rtx find_active_insn_before (basic_block, rtx);
+static rtx find_active_insn_after (basic_block, rtx);
static basic_block block_fallthru (basic_block);
static int cond_exec_process_insns (ce_if_block_t *, rtx, rtx, rtx, rtx, int);
static rtx cond_exec_get_condition (rtx);
static rtx noce_get_condition (rtx, rtx *, bool);
-static int noce_operand_ok (rtx);
+static int noce_operand_ok (const_rtx);
static void merge_if_block (ce_if_block_t *);
static int find_cond_trap (basic_block, edge, edge);
static basic_block find_if_header (basic_block, int);
static int cond_exec_find_if_block (ce_if_block_t *);
static int find_if_case_1 (basic_block, edge, edge);
static int find_if_case_2 (basic_block, edge, edge);
-static int find_memory (rtx *, void *);
static int dead_or_predicable (basic_block, basic_block, basic_block,
- basic_block, int);
+ edge, int);
static void noce_emit_move_insn (rtx, rtx);
static rtx block_has_only_trap (basic_block);
\f
/* Count the number of non-jump active insns in BB. */
static int
-count_bb_insns (basic_block bb)
+count_bb_insns (const_basic_block bb)
{
int count = 0;
rtx insn = BB_HEAD (bb);
/* Determine whether the total insn_rtx_cost on non-jump insns in
basic block BB is less than MAX_COST. This function returns
- false if the cost of any instruction could not be estimated. */
+ false if the cost of any instruction could not be estimated.
+
+ The cost of the non-jump insns in BB is scaled by REG_BR_PROB_BASE
+ as those insns are being speculated. MAX_COST is scaled with SCALE
+ plus a small fudge factor. */
static bool
-cheap_bb_rtx_cost_p (basic_block bb, int max_cost)
+cheap_bb_rtx_cost_p (const_basic_block bb, int scale, int max_cost)
{
int count = 0;
rtx insn = BB_HEAD (bb);
+ bool speed = optimize_bb_for_speed_p (bb);
+
+ /* Our branch probability/scaling factors are just estimates and don't
+ account for cases where we can get speculation for free and other
+ secondary benefits. So we fudge the scale factor to make speculating
+ appear a little more profitable. */
+ scale += REG_BR_PROB_BASE / 8;
+ max_cost *= scale;
while (1)
{
if (NONJUMP_INSN_P (insn))
{
- int cost = insn_rtx_cost (PATTERN (insn));
+ int cost = insn_rtx_cost (PATTERN (insn), speed) * REG_BR_PROB_BASE;
if (cost == 0)
return false;
insn = NEXT_INSN (insn);
}
- while (NOTE_P (insn))
+ while (NOTE_P (insn) || DEBUG_INSN_P (insn))
{
if (insn == BB_END (bb))
return NULL_RTX;
while (NOTE_P (insn)
|| JUMP_P (insn)
+ || DEBUG_INSN_P (insn)
|| (skip_use_p
&& NONJUMP_INSN_P (insn)
&& GET_CODE (PATTERN (insn)) == USE))
return insn;
}
+/* Return the active insn before INSN inside basic block CURR_BB. */
+
+static rtx
+find_active_insn_before (basic_block curr_bb, rtx insn)
+{
+ if (!insn || insn == BB_HEAD (curr_bb))
+ return NULL_RTX;
+
+ while ((insn = PREV_INSN (insn)) != NULL_RTX)
+ {
+ if (NONJUMP_INSN_P (insn) || JUMP_P (insn) || CALL_P (insn))
+ break;
+
+ /* No other active insn all the way to the start of the basic block. */
+ if (insn == BB_HEAD (curr_bb))
+ return NULL_RTX;
+ }
+
+ return insn;
+}
+
+/* Return the active insn after INSN inside basic block CURR_BB. */
+
+static rtx
+find_active_insn_after (basic_block curr_bb, rtx insn)
+{
+ if (!insn || insn == BB_END (curr_bb))
+ return NULL_RTX;
+
+ while ((insn = NEXT_INSN (insn)) != NULL_RTX)
+ {
+ if (NONJUMP_INSN_P (insn) || JUMP_P (insn) || CALL_P (insn))
+ break;
+
+ /* No other active insn all the way to the end of the basic block. */
+ if (insn == BB_END (curr_bb))
+ return NULL_RTX;
+ }
+
+ return insn;
+}
+
/* Return the basic block reached by falling though the basic block BB. */
static basic_block
block_fallthru (basic_block bb)
{
- edge e;
- edge_iterator ei;
-
- FOR_EACH_EDGE (e, ei, bb->succs)
- if (e->flags & EDGE_FALLTHRU)
- break;
+ edge e = find_fallthru_edge (bb->succs);
return (e) ? e->dest : NULL_BLOCK;
}
for (insn = start; ; insn = NEXT_INSN (insn))
{
- if (NOTE_P (insn))
+ /* dwarf2out can't cope with conditional prologues. */
+ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END)
+ return FALSE;
+
+ if (NOTE_P (insn) || DEBUG_INSN_P (insn))
goto insn_done;
gcc_assert(NONJUMP_INSN_P (insn) || CALL_P (insn));
rtx false_expr; /* test for then block insns */
rtx true_prob_val; /* probability of else block */
rtx false_prob_val; /* probability of then block */
- int n_insns;
+ rtx then_last_head = NULL_RTX; /* Last match at the head of THEN */
+ rtx else_last_head = NULL_RTX; /* Last match at the head of ELSE */
+ rtx then_first_tail = NULL_RTX; /* First match at the tail of THEN */
+ rtx else_first_tail = NULL_RTX; /* First match at the tail of ELSE */
+ int then_n_insns, else_n_insns, n_insns;
enum rtx_code false_code;
/* If test is comprised of && or || elements, and we've failed at handling
number of insns and see if it is small enough to convert. */
then_start = first_active_insn (then_bb);
then_end = last_active_insn (then_bb, TRUE);
- n_insns = ce_info->num_then_insns = count_bb_insns (then_bb);
+ then_n_insns = ce_info->num_then_insns = count_bb_insns (then_bb);
+ n_insns = then_n_insns;
max = MAX_CONDITIONAL_EXECUTE;
if (else_bb)
{
+ int n_matching;
+
max *= 2;
else_start = first_active_insn (else_bb);
else_end = last_active_insn (else_bb, TRUE);
- n_insns += ce_info->num_else_insns = count_bb_insns (else_bb);
+ else_n_insns = ce_info->num_else_insns = count_bb_insns (else_bb);
+ n_insns += else_n_insns;
+
+ /* Look for matching sequences at the head and tail of the two blocks,
+ and limit the range of insns to be converted if possible. */
+ n_matching = flow_find_cross_jump (then_bb, else_bb,
+ &then_first_tail, &else_first_tail,
+ NULL);
+ if (then_first_tail == BB_HEAD (then_bb))
+ then_start = then_end = NULL_RTX;
+ if (else_first_tail == BB_HEAD (else_bb))
+ else_start = else_end = NULL_RTX;
+
+ if (n_matching > 0)
+ {
+ if (then_end)
+ then_end = find_active_insn_before (then_bb, then_first_tail);
+ if (else_end)
+ else_end = find_active_insn_before (else_bb, else_first_tail);
+ n_insns -= 2 * n_matching;
+ }
+
+ if (then_start && else_start)
+ {
+ int longest_match = MIN (then_n_insns - n_matching,
+ else_n_insns - n_matching);
+ n_matching
+ = flow_find_head_matching_sequence (then_bb, else_bb,
+ &then_last_head,
+ &else_last_head,
+ longest_match);
+
+ if (n_matching > 0)
+ {
+ rtx insn;
+
+ /* We won't pass the insns in the head sequence to
+ cond_exec_process_insns, so we need to test them here
+ to make sure that they don't clobber the condition. */
+ for (insn = BB_HEAD (then_bb);
+ insn != NEXT_INSN (then_last_head);
+ insn = NEXT_INSN (insn))
+ if (!LABEL_P (insn) && !NOTE_P (insn)
+ && !DEBUG_INSN_P (insn)
+ && modified_in_p (test_expr, insn))
+ return FALSE;
+ }
+
+ if (then_last_head == then_end)
+ then_start = then_end = NULL_RTX;
+ if (else_last_head == else_end)
+ else_start = else_end = NULL_RTX;
+
+ if (n_matching > 0)
+ {
+ if (then_start)
+ then_start = find_active_insn_after (then_bb, then_last_head);
+ if (else_start)
+ else_start = find_active_insn_after (else_bb, else_last_head);
+ n_insns -= 2 * n_matching;
+ }
+ }
}
if (n_insns > max)
fprintf (dump_file, "%d insn%s converted to conditional execution.\n",
n_insns, (n_insns == 1) ? " was" : "s were");
- /* Merge the blocks! */
+ /* Merge the blocks! If we had matching sequences, make sure to delete one
+ copy at the appropriate location first: delete the copy in the THEN branch
+ for a tail sequence so that the remaining one is executed last for both
+ branches, and delete the copy in the ELSE branch for a head sequence so
+ that the remaining one is executed first for both branches. */
+ if (then_first_tail)
+ {
+ rtx from = then_first_tail;
+ if (!INSN_P (from))
+ from = find_active_insn_after (then_bb, from);
+ delete_insn_chain (from, BB_END (then_bb), false);
+ }
+ if (else_last_head)
+ delete_insn_chain (first_active_insn (else_bb), else_last_head, false);
+
merge_if_block (ce_info);
cond_exec_changed_p = TRUE;
return TRUE;
from TEST_BB. For the noce transformations, we allow the symmetric
form as well. */
bool then_else_reversed;
+
+ /* Estimated cost of the particular branch instruction. */
+ int branch_cost;
};
static rtx noce_emit_store_flag (struct noce_if_info *, rtx, int, int);
build the store_flag insn directly. */
if (cond_complex)
- cond = XEXP (SET_SRC (pc_set (if_info->jump)), 0);
+ {
+ rtx set = pc_set (if_info->jump);
+ cond = XEXP (SET_SRC (set), 0);
+ if (GET_CODE (XEXP (SET_SRC (set), 2)) == LABEL_REF
+ && XEXP (XEXP (SET_SRC (set), 2), 0) == JUMP_LABEL (if_info->jump))
+ reversep = !reversep;
+ if (if_info->then_else_reversed)
+ reversep = !reversep;
+ }
if (reversep)
code = reversed_comparison_code (cond, if_info->jump);
}
gcc_assert (start < (MEM_P (op) ? BITS_PER_UNIT : BITS_PER_WORD));
- store_bit_field (op, size, start, GET_MODE (x), y);
+ store_bit_field (op, size, start, 0, 0, GET_MODE (x), y);
return;
}
inner = XEXP (outer, 0);
outmode = GET_MODE (outer);
bitpos = SUBREG_BYTE (outer) * BITS_PER_UNIT;
- store_bit_field (inner, GET_MODE_BITSIZE (outmode), bitpos, outmode, y);
+ store_bit_field (inner, GET_MODE_BITSIZE (outmode), bitpos,
+ 0, 0, outmode, y);
}
/* Return sequence of instructions generated by if conversion. This
int reversep;
rtx target, seq;
- if (GET_CODE (if_info->b) == CONST_INT
+ if (CONST_INT_P (if_info->b)
&& INTVAL (if_info->b) == STORE_FLAG_VALUE
&& if_info->a == const0_rtx)
reversep = 0;
else if (if_info->b == const0_rtx
- && GET_CODE (if_info->a) == CONST_INT
+ && CONST_INT_P (if_info->a)
&& INTVAL (if_info->a) == STORE_FLAG_VALUE
&& (reversed_comparison_code (if_info->cond, if_info->jump)
!= UNKNOWN))
int normalize, can_reverse;
enum machine_mode mode;
- if (GET_CODE (if_info->a) == CONST_INT
- && GET_CODE (if_info->b) == CONST_INT)
+ if (CONST_INT_P (if_info->a)
+ && CONST_INT_P (if_info->b))
{
mode = GET_MODE (if_info->x);
ifalse = INTVAL (if_info->a);
normalize = 0;
else if (ifalse == 0 && exact_log2 (itrue) >= 0
&& (STORE_FLAG_VALUE == 1
- || BRANCH_COST >= 2))
+ || if_info->branch_cost >= 2))
normalize = 1;
else if (itrue == 0 && exact_log2 (ifalse) >= 0 && can_reverse
- && (STORE_FLAG_VALUE == 1 || BRANCH_COST >= 2))
+ && (STORE_FLAG_VALUE == 1 || if_info->branch_cost >= 2))
normalize = 1, reversep = 1;
else if (itrue == -1
&& (STORE_FLAG_VALUE == -1
- || BRANCH_COST >= 2))
+ || if_info->branch_cost >= 2))
normalize = -1;
else if (ifalse == -1 && can_reverse
- && (STORE_FLAG_VALUE == -1 || BRANCH_COST >= 2))
+ && (STORE_FLAG_VALUE == -1 || if_info->branch_cost >= 2))
normalize = -1, reversep = 1;
- else if ((BRANCH_COST >= 2 && STORE_FLAG_VALUE == -1)
- || BRANCH_COST >= 3)
+ else if ((if_info->branch_cost >= 2 && STORE_FLAG_VALUE == -1)
+ || if_info->branch_cost >= 3)
normalize = -1;
else
return FALSE;
/* If that fails, construct conditional increment or decrement using
setcc. */
- if (BRANCH_COST >= 2
+ if (if_info->branch_cost >= 2
&& (XEXP (if_info->a, 1) == const1_rtx
|| XEXP (if_info->a, 1) == constm1_rtx))
{
int reversep;
reversep = 0;
- if ((BRANCH_COST >= 2
+ if ((if_info->branch_cost >= 2
|| STORE_FLAG_VALUE == -1)
&& ((if_info->a == const0_rtx
&& rtx_equal_p (if_info->b, if_info->x))
noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue)
{
+ rtx target ATTRIBUTE_UNUSED;
+ int unsignedp ATTRIBUTE_UNUSED;
+
/* If earliest == jump, try to build the cmove insn directly.
This is helpful when combine has created some complex condition
(like for alpha's cmovlbs) that we can't hope to regenerate
return NULL_RTX;
#if HAVE_conditional_move
- return emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode,
- vtrue, vfalse, GET_MODE (x),
- (code == LTU || code == GEU
- || code == LEU || code == GTU));
+ unsignedp = (code == LTU || code == GEU
+ || code == LEU || code == GTU);
+
+ target = emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode,
+ vtrue, vfalse, GET_MODE (x),
+ unsignedp);
+ if (target)
+ return target;
+
+ /* We might be faced with a situation like:
+
+ x = (reg:M TARGET)
+ vtrue = (subreg:M (reg:N VTRUE) BYTE)
+ vfalse = (subreg:M (reg:N VFALSE) BYTE)
+
+ We can't do a conditional move in mode M, but it's possible that we
+ could do a conditional move in mode N instead and take a subreg of
+ the result.
+
+ If we can't create new pseudos, though, don't bother. */
+ if (reload_completed)
+ return NULL_RTX;
+
+ if (GET_CODE (vtrue) == SUBREG && GET_CODE (vfalse) == SUBREG)
+ {
+ rtx reg_vtrue = SUBREG_REG (vtrue);
+ rtx reg_vfalse = SUBREG_REG (vfalse);
+ unsigned int byte_vtrue = SUBREG_BYTE (vtrue);
+ unsigned int byte_vfalse = SUBREG_BYTE (vfalse);
+ rtx promoted_target;
+
+ if (GET_MODE (reg_vtrue) != GET_MODE (reg_vfalse)
+ || byte_vtrue != byte_vfalse
+ || (SUBREG_PROMOTED_VAR_P (vtrue)
+ != SUBREG_PROMOTED_VAR_P (vfalse))
+ || (SUBREG_PROMOTED_UNSIGNED_P (vtrue)
+ != SUBREG_PROMOTED_UNSIGNED_P (vfalse)))
+ return NULL_RTX;
+
+ promoted_target = gen_reg_rtx (GET_MODE (reg_vtrue));
+
+ target = emit_conditional_move (promoted_target, code, cmp_a, cmp_b,
+ VOIDmode, reg_vtrue, reg_vfalse,
+ GET_MODE (reg_vtrue), unsignedp);
+ /* Nope, couldn't do it in that mode either. */
+ if (!target)
+ return NULL_RTX;
+
+ target = gen_rtx_SUBREG (GET_MODE (vtrue), promoted_target, byte_vtrue);
+ SUBREG_PROMOTED_VAR_P (target) = SUBREG_PROMOTED_VAR_P (vtrue);
+ SUBREG_PROMOTED_UNSIGNED_SET (target, SUBREG_PROMOTED_UNSIGNED_P (vtrue));
+ emit_move_insn (x, target);
+ return x;
+ }
+ else
+ return NULL_RTX;
#else
/* We'll never get here, as noce_process_if_block doesn't call the
functions involved. Ifdef code, however, should be discouraged
/* ??? FIXME: Magic number 5. */
if (cse_not_expected
&& MEM_P (a) && MEM_P (b)
- && BRANCH_COST >= 5)
+ && MEM_ADDR_SPACE (a) == MEM_ADDR_SPACE (b)
+ && if_info->branch_cost >= 5)
{
+ enum machine_mode address_mode
+ = targetm.addr_space.address_mode (MEM_ADDR_SPACE (a));
+
a = XEXP (a, 0);
b = XEXP (b, 0);
- x = gen_reg_rtx (Pmode);
+ x = gen_reg_rtx (address_mode);
is_mem = 1;
}
/* ??? We could handle this if we knew that a load from A or B could
- not fault. This is also true if we've already loaded
+ not trap or fault. This is also true if we've already loaded
from the address along the path from ENTRY. */
- else if (may_trap_p (a) || may_trap_p (b))
+ else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b))
return FALSE;
/* if (test) x = a + b; else x = c - d;
if insn_rtx_cost can't be estimated. */
if (insn_a)
{
- insn_cost = insn_rtx_cost (PATTERN (insn_a));
- if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (BRANCH_COST))
+ insn_cost
+ = insn_rtx_cost (PATTERN (insn_a),
+ optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn_a)));
+ if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost))
return FALSE;
}
else
if (insn_b)
{
- insn_cost += insn_rtx_cost (PATTERN (insn_b));
- if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (BRANCH_COST))
+ insn_cost
+ += insn_rtx_cost (PATTERN (insn_b),
+ optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn_b)));
+ if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost))
return FALSE;
}
/* Copy over flags as appropriate. */
if (MEM_VOLATILE_P (if_info->a) || MEM_VOLATILE_P (if_info->b))
MEM_VOLATILE_P (tmp) = 1;
- if (MEM_IN_STRUCT_P (if_info->a) && MEM_IN_STRUCT_P (if_info->b))
- MEM_IN_STRUCT_P (tmp) = 1;
- if (MEM_SCALAR_P (if_info->a) && MEM_SCALAR_P (if_info->b))
- MEM_SCALAR_P (tmp) = 1;
if (MEM_ALIAS_SET (if_info->a) == MEM_ALIAS_SET (if_info->b))
set_mem_alias_set (tmp, MEM_ALIAS_SET (if_info->a));
set_mem_align (tmp,
MIN (MEM_ALIGN (if_info->a), MEM_ALIGN (if_info->b)));
+ gcc_assert (MEM_ADDR_SPACE (if_info->a) == MEM_ADDR_SPACE (if_info->b));
+ set_mem_addr_space (tmp, MEM_ADDR_SPACE (if_info->a));
+
noce_emit_move_insn (if_info->x, tmp);
}
else if (target != x)
make equivalent types of changes) to get the constants we need
if they're off by one in the right direction. */
- if (GET_CODE (target) == CONST_INT)
+ if (CONST_INT_P (target))
{
enum rtx_code code = GET_CODE (if_info->cond);
rtx op_a = XEXP (if_info->cond, 0);
/* First, look to see if we put a constant in a register. */
prev_insn = prev_nonnote_insn (if_info->cond_earliest);
if (prev_insn
+ && BLOCK_FOR_INSN (prev_insn)
+ == BLOCK_FOR_INSN (if_info->cond_earliest)
&& INSN_P (prev_insn)
&& GET_CODE (PATTERN (prev_insn)) == SET)
{
rtx src = find_reg_equal_equiv_note (prev_insn);
if (!src)
src = SET_SRC (PATTERN (prev_insn));
- if (GET_CODE (src) == CONST_INT)
+ if (CONST_INT_P (src))
{
if (rtx_equal_p (op_a, SET_DEST (PATTERN (prev_insn))))
op_a = src;
else if (rtx_equal_p (op_b, SET_DEST (PATTERN (prev_insn))))
op_b = src;
- if (GET_CODE (op_a) == CONST_INT)
+ if (CONST_INT_P (op_a))
{
rtx tmp = op_a;
op_a = op_b;
/* Now, look to see if we can get the right constant by
adjusting the conditional. */
- if (GET_CODE (op_b) == CONST_INT)
+ if (CONST_INT_P (op_b))
{
HOST_WIDE_INT desired_val = INTVAL (target);
HOST_WIDE_INT actual_val = INTVAL (op_b);
return TRUE;
}
-/* Convert "if (a < 0) x = -a; else x = a;" to "x = abs(a);", etc. */
+/* Convert "if (a < 0) x = -a; else x = a;" to "x = abs(a);",
+ "if (a < 0) x = ~a; else x = a;" to "x = one_cmpl_abs(a);",
+ etc. */
static int
noce_try_abs (struct noce_if_info *if_info)
{
rtx cond, earliest, target, seq, a, b, c;
int negate;
+ bool one_cmpl = false;
+
+ /* Reject modes with signed zeros. */
+ if (HONOR_SIGNED_ZEROS (GET_MODE (if_info->x)))
+ return FALSE;
/* Recognize A and B as constituting an ABS or NABS. The canonical
form is a branch around the negation, taken when the object is the
c = a; a = b; b = c;
negate = 1;
}
+ else if (GET_CODE (a) == NOT && rtx_equal_p (XEXP (a, 0), b))
+ {
+ negate = 0;
+ one_cmpl = true;
+ }
+ else if (GET_CODE (b) == NOT && rtx_equal_p (XEXP (b, 0), a))
+ {
+ c = a; a = b; b = c;
+ negate = 1;
+ one_cmpl = true;
+ }
else
return FALSE;
{
rtx set, insn = prev_nonnote_insn (earliest);
if (insn
+ && BLOCK_FOR_INSN (insn) == BLOCK_FOR_INSN (earliest)
&& (set = single_set (insn))
&& rtx_equal_p (SET_DEST (set), c))
{
}
start_sequence ();
-
- target = expand_abs_nojump (GET_MODE (if_info->x), b, if_info->x, 1);
+ if (one_cmpl)
+ target = expand_one_cmpl_abs_nojump (GET_MODE (if_info->x), b,
+ if_info->x);
+ else
+ target = expand_abs_nojump (GET_MODE (if_info->x), b, if_info->x, 1);
/* ??? It's a quandary whether cmove would be better here, especially
for integers. Perhaps combine will clean things up. */
if (target && negate)
- target = expand_simple_unop (GET_MODE (target), NEG, target, if_info->x, 0);
+ {
+ if (one_cmpl)
+ target = expand_simple_unop (GET_MODE (target), NOT, target,
+ if_info->x, 0);
+ else
+ target = expand_simple_unop (GET_MODE (target), NEG, target,
+ if_info->x, 0);
+ }
if (! target)
{
rtx cond, t, m, c, seq;
enum machine_mode mode;
enum rtx_code code;
- bool b_unconditional;
+ bool t_unconditional;
cond = if_info->cond;
code = GET_CODE (cond);
if (GET_MODE (m) != mode)
return FALSE;
- /* This is only profitable if T is cheap, or T is unconditionally
- executed/evaluated in the original insn sequence. The latter
- happens if INSN_B was taken from TEST_BB, or if there was no
- INSN_B which can happen for e.g. conditional stores to memory. */
- b_unconditional = (if_info->insn_b == NULL_RTX
- || BLOCK_FOR_INSN (if_info->insn_b) == if_info->test_bb);
- if (rtx_cost (t, SET) >= COSTS_N_INSNS (2)
- && (!b_unconditional
- || t != if_info->b))
+ /* This is only profitable if T is unconditionally executed/evaluated in the
+ original insn sequence or T is cheap. The former happens if B is the
+ non-zero (T) value and if INSN_B was taken from TEST_BB, or there was no
+ INSN_B which can happen for e.g. conditional stores to memory. For the
+ cost computation use the block TEST_BB where the evaluation will end up
+ after the transformation. */
+ t_unconditional =
+ (t == if_info->b
+ && (if_info->insn_b == NULL_RTX
+ || BLOCK_FOR_INSN (if_info->insn_b) == if_info->test_bb));
+ if (!(t_unconditional
+ || (set_src_cost (t, optimize_bb_for_speed_p (if_info->test_bb))
+ < COSTS_N_INSNS (2))))
return FALSE;
start_sequence ();
if (GET_CODE (cond) == ZERO_EXTRACT)
{
if (XEXP (cond, 1) != const1_rtx
- || GET_CODE (XEXP (cond, 2)) != CONST_INT
+ || !CONST_INT_P (XEXP (cond, 2))
|| ! rtx_equal_p (x, XEXP (cond, 0)))
return FALSE;
bitnum = INTVAL (XEXP (cond, 2));
{
/* Check for "if (X & C) x = x op C". */
if (! rtx_equal_p (x, XEXP (a, 0))
- || GET_CODE (XEXP (a, 1)) != CONST_INT
+ || !CONST_INT_P (XEXP (a, 1))
|| (INTVAL (XEXP (a, 1)) & GET_MODE_MASK (mode))
!= (unsigned HOST_WIDE_INT) 1 << bitnum)
return FALSE;
{
/* Check for "if (X & C) x &= ~C". */
if (! rtx_equal_p (x, XEXP (a, 0))
- || GET_CODE (XEXP (a, 1)) != CONST_INT
+ || !CONST_INT_P (XEXP (a, 1))
|| (INTVAL (XEXP (a, 1)) & GET_MODE_MASK (mode))
!= (~((HOST_WIDE_INT) 1 << bitnum) & GET_MODE_MASK (mode)))
return FALSE;
/* Otherwise, fall back on canonicalize_condition to do the dirty
work of manipulating MODE_CC values and COMPARE rtx codes. */
- return canonicalize_condition (jump, cond, reverse, earliest,
- NULL_RTX, false, true);
+ tmp = canonicalize_condition (jump, cond, reverse, earliest,
+ NULL_RTX, false, true);
+
+ /* We don't handle side-effects in the condition, like handling
+ REG_INC notes and making sure no duplicate conditions are emitted. */
+ if (tmp != NULL_RTX && side_effects_p (tmp))
+ return NULL_RTX;
+
+ return tmp;
}
/* Return true if OP is ok for if-then-else processing. */
static int
-noce_operand_ok (rtx op)
+noce_operand_ok (const_rtx op)
{
+ if (side_effects_p (op))
+ return FALSE;
+
/* We special-case memories, so handle any of them with
no address side effects. */
if (MEM_P (op))
return ! side_effects_p (XEXP (op, 0));
- if (side_effects_p (op))
- return FALSE;
-
return ! may_trap_p (op);
}
/* Return true if a write into MEM may trap or fault. */
static bool
-noce_mem_write_may_trap_or_fault_p (rtx mem)
+noce_mem_write_may_trap_or_fault_p (const_rtx mem)
{
rtx addr;
addr = XEXP (addr, 1);
break;
case PLUS:
- if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
+ if (CONST_INT_P (XEXP (addr, 1)))
addr = XEXP (addr, 0);
else
return false;
return false;
}
+/* Return whether we can use store speculation for MEM. TOP_BB is the
+ basic block above the conditional block where we are considering
+ doing the speculative store. We look for whether MEM is set
+ unconditionally later in the function. */
+
+static bool
+noce_can_store_speculate_p (basic_block top_bb, const_rtx mem)
+{
+ basic_block dominator;
+
+ for (dominator = get_immediate_dominator (CDI_POST_DOMINATORS, top_bb);
+ dominator != NULL;
+ dominator = get_immediate_dominator (CDI_POST_DOMINATORS, dominator))
+ {
+ rtx insn;
+
+ FOR_BB_INSNS (dominator, insn)
+ {
+ /* If we see something that might be a memory barrier, we
+ have to stop looking. Even if the MEM is set later in
+ the function, we still don't want to set it
+ unconditionally before the barrier. */
+ if (INSN_P (insn)
+ && (volatile_insn_p (PATTERN (insn))
+ || (CALL_P (insn) && (!RTL_CONST_CALL_P (insn)))))
+ return false;
+
+ if (memory_modified_in_insn_p (mem, insn))
+ return true;
+ if (modified_in_p (XEXP (mem, 0), insn))
+ return false;
+
+ }
+ }
+
+ return false;
+}
+
/* Given a simple IF-THEN-JOIN or IF-THEN-ELSE-JOIN block, attempt to convert
it without using conditional execution. Return TRUE if we were successful
at converting the block. */
}
else
{
- insn_b = prev_nonnote_insn (if_info->cond_earliest);
+ insn_b = prev_nonnote_nondebug_insn (if_info->cond_earliest);
/* We're going to be moving the evaluation of B down from above
COND_EARLIEST to JUMP. Make sure the relevant data is still
intact. */
if (! insn_b
+ || BLOCK_FOR_INSN (insn_b) != BLOCK_FOR_INSN (if_info->cond_earliest)
|| !NONJUMP_INSN_P (insn_b)
|| (set_b = single_set (insn_b)) == NULL_RTX
|| ! rtx_equal_p (x, SET_DEST (set_b))
+ || ! noce_operand_ok (SET_SRC (set_b))
|| reg_overlap_mentioned_p (x, SET_SRC (set_b))
- || modified_between_p (SET_SRC (set_b),
- PREV_INSN (if_info->cond_earliest), jump)
+ || modified_between_p (SET_SRC (set_b), insn_b, jump)
/* Likewise with X. In particular this can happen when
noce_get_condition looks farther back in the instruction
stream than one might expect. */
|| reg_overlap_mentioned_p (x, cond)
|| reg_overlap_mentioned_p (x, a)
- || modified_between_p (x, PREV_INSN (if_info->cond_earliest), jump))
+ || modified_between_p (x, insn_b, jump))
insn_b = set_b = NULL_RTX;
}
the lifetime of hard registers on small register class machines. */
orig_x = x;
if (!REG_P (x)
- || (SMALL_REGISTER_CLASSES
- && REGNO (x) < FIRST_PSEUDO_REGISTER))
+ || (HARD_REGISTER_P (x)
+ && targetm.small_register_classes_for_mode_p (GET_MODE (x))))
{
if (GET_MODE (x) == BLKmode)
return FALSE;
- if (GET_MODE (x) == ZERO_EXTRACT
- && (GET_CODE (XEXP (x, 1)) != CONST_INT
- || GET_CODE (XEXP (x, 2)) != CONST_INT))
+ if (GET_CODE (x) == ZERO_EXTRACT
+ && (!CONST_INT_P (XEXP (x, 1))
+ || !CONST_INT_P (XEXP (x, 2))))
return FALSE;
x = gen_reg_rtx (GET_MODE (GET_CODE (x) == STRICT_LOW_PART
if (! noce_operand_ok (a) || ! noce_operand_ok (b))
return FALSE;
+ retry:
/* Set up the info block for our subroutines. */
if_info->insn_a = insn_a;
if_info->insn_b = insn_b;
goto success;
}
- /* Disallow the "if (...) x = a;" form (with an implicit "else x = x;")
- for optimizations if writing to x may trap or fault, i.e. it's a memory
- other than a static var or a stack slot, is misaligned on strict
- aligned machines or is read-only.
- If x is a read-only memory, then the program is valid only if we
- avoid the store into it. If there are stores on both the THEN and
- ELSE arms, then we can go ahead with the conversion; either the
- program is broken, or the condition is always false such that the
- other memory is selected. */
- if (!set_b && MEM_P (orig_x) && noce_mem_write_may_trap_or_fault_p (orig_x))
- return FALSE;
+ if (!set_b && MEM_P (orig_x))
+ {
+ /* Disallow the "if (...) x = a;" form (implicit "else x = x;")
+ for optimizations if writing to x may trap or fault,
+ i.e. it's a memory other than a static var or a stack slot,
+ is misaligned on strict aligned machines or is read-only. If
+ x is a read-only memory, then the program is valid only if we
+ avoid the store into it. If there are stores on both the
+ THEN and ELSE arms, then we can go ahead with the conversion;
+ either the program is broken, or the condition is always
+ false such that the other memory is selected. */
+ if (noce_mem_write_may_trap_or_fault_p (orig_x))
+ return FALSE;
+
+ /* Avoid store speculation: given "if (...) x = a" where x is a
+ MEM, we only want to do the store if x is always set
+ somewhere in the function. This avoids cases like
+ if (pthread_mutex_trylock(mutex))
+ ++global_variable;
+ where we only want global_variable to be changed if the mutex
+ is held. FIXME: This should ideally be expressed directly in
+ RTL somehow. */
+ if (!noce_can_store_speculate_p (test_bb, orig_x))
+ return FALSE;
+ }
if (noce_try_move (if_info))
goto success;
if (HAVE_conditional_move
&& noce_try_cmove (if_info))
goto success;
- if (! HAVE_conditional_execution)
+ if (! targetm.have_conditional_execution ())
{
if (noce_try_store_flag_constants (if_info))
goto success;
goto success;
}
+ if (!else_bb && set_b)
+ {
+ insn_b = set_b = NULL_RTX;
+ b = orig_x;
+ goto retry;
+ }
+
return FALSE;
success:
REGS. COND is the condition we will test. */
static int
-check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) *regs, rtx cond)
+check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) **regs,
+ rtx cond)
{
rtx insn;
{
rtx set, dest, src;
- if (!INSN_P (insn) || JUMP_P (insn))
+ if (!NONDEBUG_INSN_P (insn) || JUMP_P (insn))
continue;
set = single_set (insn);
if (!set)
dest = SET_DEST (set);
src = SET_SRC (set);
if (!REG_P (dest)
- || (SMALL_REGISTER_CLASSES && HARD_REGISTER_P (dest)))
+ || (HARD_REGISTER_P (dest)
+ && targetm.small_register_classes_for_mode_p (GET_MODE (dest))))
return FALSE;
if (!CONSTANT_P (src) && !register_operand (src, VOIDmode))
vals[REGNO (dest)] = src;
- VEC_safe_push (int, heap, regs, REGNO (dest));
+ VEC_safe_push (int, heap, *regs, REGNO (dest));
}
return TRUE;
rtx set, target, dest, t, e;
unsigned int regno;
- if (!INSN_P (insn) || JUMP_P (insn))
+ /* ??? Maybe emit conditional debug insn? */
+ if (!NONDEBUG_INSN_P (insn) || JUMP_P (insn))
continue;
set = single_set (insn);
gcc_assert (set && REG_P (SET_DEST (set)));
memset (else_vals, 0, size);
/* Make sure the blocks are suitable. */
- if (!check_cond_move_block (then_bb, then_vals, then_regs, cond)
- || (else_bb && !check_cond_move_block (else_bb, else_vals, else_regs, cond)))
- return FALSE;
+ if (!check_cond_move_block (then_bb, then_vals, &then_regs, cond)
+ || (else_bb
+ && !check_cond_move_block (else_bb, else_vals, &else_regs, cond)))
+ {
+ VEC_free (int, heap, then_regs);
+ VEC_free (int, heap, else_regs);
+ return FALSE;
+ }
/* Make sure the blocks can be used together. If the same register
is set in both blocks, and is not set to a constant in both
source register does not change after the assignment. Also count
the number of registers set in only one of the blocks. */
c = 0;
- for (i = 0; VEC_iterate (int, then_regs, i, reg); i++)
+ FOR_EACH_VEC_ELT (int, then_regs, i, reg)
{
if (!then_vals[reg] && !else_vals[reg])
continue;
if (!CONSTANT_P (then_vals[reg])
&& !CONSTANT_P (else_vals[reg])
&& !rtx_equal_p (then_vals[reg], else_vals[reg]))
- return FALSE;
+ {
+ VEC_free (int, heap, then_regs);
+ VEC_free (int, heap, else_regs);
+ return FALSE;
+ }
}
}
/* Finish off c for MAX_CONDITIONAL_EXECUTE. */
- for (i = 0; VEC_iterate (int, else_regs, i, reg); ++i)
+ FOR_EACH_VEC_ELT (int, else_regs, i, reg)
if (!then_vals[reg])
++c;
branches, since if we convert we are going to always execute
them. */
if (c > MAX_CONDITIONAL_EXECUTE)
- return FALSE;
+ {
+ VEC_free (int, heap, then_regs);
+ VEC_free (int, heap, else_regs);
+ return FALSE;
+ }
/* Try to emit the conditional moves. First do the then block,
then do anything left in the else blocks. */
then_vals, else_vals, true)))
{
end_sequence ();
+ VEC_free (int, heap, then_regs);
+ VEC_free (int, heap, else_regs);
return FALSE;
}
seq = end_ifcvt_sequence (if_info);
if (!seq)
- return FALSE;
+ {
+ VEC_free (int, heap, then_regs);
+ VEC_free (int, heap, else_regs);
+ return FALSE;
+ }
loc_insn = first_active_insn (then_bb);
if (!loc_insn)
VEC_free (int, heap, then_regs);
VEC_free (int, heap, else_regs);
-
return TRUE;
}
Return TRUE if we were successful at converting the block. */
static int
-noce_find_if_block (basic_block test_bb,
- edge then_edge, edge else_edge,
+noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge,
int pass)
{
basic_block then_bb, else_bb, join_bb;
bool then_else_reversed = false;
rtx jump, cond;
+ rtx cond_earliest;
struct noce_if_info if_info;
/* We only ever should get here before reload. */
return FALSE;
/* If this is not a standard conditional jump, we can't parse it. */
- cond = noce_get_condition (jump,
- &if_info.cond_earliest,
- then_else_reversed);
+ cond = noce_get_condition (jump, &cond_earliest, then_else_reversed);
if (!cond)
return FALSE;
if_info.else_bb = else_bb;
if_info.join_bb = join_bb;
if_info.cond = cond;
+ if_info.cond_earliest = cond_earliest;
if_info.jump = jump;
if_info.then_else_reversed = then_else_reversed;
+ if_info.branch_cost = BRANCH_COST (optimize_bb_for_speed_p (test_bb),
+ predictable_edge_p (then_edge));
/* Do the real work. */
/* Otherwise this must be a multiway branch of some sort. */
return NULL;
- memset (&ce_info, '\0', sizeof (ce_info));
+ memset (&ce_info, 0, sizeof (ce_info));
ce_info.test_bb = test_bb;
ce_info.then_bb = then_edge->dest;
ce_info.else_bb = else_edge->dest;
IFCVT_INIT_EXTRA_FIELDS (&ce_info);
#endif
- if (! reload_completed
+ if (!reload_completed
&& noce_find_if_block (test_bb, then_edge, else_edge, pass))
goto success;
- if (HAVE_conditional_execution && reload_completed
+ if (reload_completed
+ && targetm.have_conditional_execution ()
&& cond_exec_find_if_block (&ce_info))
goto success;
- if (HAVE_trap && HAVE_conditional_trap
+ if (HAVE_trap
+ && optab_handler (ctrap_optab, word_mode) != CODE_FOR_nothing
&& find_cond_trap (test_bb, then_edge, else_edge))
goto success;
if (dom_info_state (CDI_POST_DOMINATORS) >= DOM_NO_FAST_QUERY
- && (! HAVE_conditional_execution || reload_completed))
+ && (reload_completed || !targetm.have_conditional_execution ()))
{
if (find_if_case_1 (test_bb, then_edge, else_edge))
goto success;
if (INSN_P (insn)
&& !JUMP_P (insn)
+ && !DEBUG_INSN_P (insn)
&& GET_CODE (PATTERN (insn)) != USE
&& GET_CODE (PATTERN (insn)) != CLOBBER)
n_insns++;
ce_info->last_test_bb = test_bb;
/* We only ever should get here after reload,
- and only if we have conditional execution. */
- gcc_assert (HAVE_conditional_execution && reload_completed);
+ and if we have conditional execution. */
+ gcc_assert (reload_completed && targetm.have_conditional_execution ());
/* Discover if any fall through predecessors of the current test basic block
were && tests (which jump to the else block) or || tests (which jump to
if (EDGE_COUNT (then_bb->succs) > 0
&& (!single_succ_p (then_bb)
|| (single_succ_edge (then_bb)->flags & EDGE_COMPLEX)
- || (epilogue_completed && tablejump_p (BB_END (then_bb), NULL, NULL))))
+ || (epilogue_completed
+ && tablejump_p (BB_END (then_bb), NULL, NULL))))
return FALSE;
/* If the THEN block has no successors, conditional execution can still
else if (single_succ_p (else_bb)
&& single_succ (then_bb) == single_succ (else_bb)
&& single_pred_p (else_bb)
- && ! (single_succ_edge (else_bb)->flags & EDGE_COMPLEX)
- && ! (epilogue_completed && tablejump_p (BB_END (else_bb), NULL, NULL)))
+ && !(single_succ_edge (else_bb)->flags & EDGE_COMPLEX)
+ && !(epilogue_completed
+ && tablejump_p (BB_END (else_bb), NULL, NULL)))
join_bb = single_succ (else_bb);
/* Otherwise it is not an IF-THEN or IF-THEN-ELSE combination. */
basic_block then_bb = then_edge->dest;
basic_block else_bb = else_edge->dest;
basic_block new_bb;
- int then_bb_index;
+ int then_bb_index, then_prob;
+ rtx else_target = NULL_RTX;
/* If we are partitioning hot/cold basic blocks, we don't want to
mess up unconditional or indirect jumps that cross between hot
"\nIF-CASE-1 found, start %d, then %d\n",
test_bb->index, then_bb->index);
- /* THEN is small. */
- if (! cheap_bb_rtx_cost_p (then_bb, COSTS_N_INSNS (BRANCH_COST)))
+ if (then_edge->probability)
+ then_prob = REG_BR_PROB_BASE - then_edge->probability;
+ else
+ then_prob = REG_BR_PROB_BASE / 2;
+
+ /* We're speculating from the THEN path, we want to make sure the cost
+ of speculation is within reason. */
+ if (! cheap_bb_rtx_cost_p (then_bb, then_prob,
+ COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (then_edge->src),
+ predictable_edge_p (then_edge)))))
return FALSE;
+ if (else_bb == EXIT_BLOCK_PTR)
+ {
+ rtx jump = BB_END (else_edge->src);
+ gcc_assert (JUMP_P (jump));
+ else_target = JUMP_LABEL (jump);
+ }
+
/* Registers set are dead, or are predicable. */
if (! dead_or_predicable (test_bb, then_bb, else_bb,
- single_succ (then_bb), 1))
+ single_succ_edge (then_bb), 1))
return FALSE;
/* Conversion went ok, including moving the insns and fixing up the
redirect_edge_succ (FALLTHRU_EDGE (test_bb), else_bb);
new_bb = 0;
}
+ else if (else_bb == EXIT_BLOCK_PTR)
+ new_bb = force_nonfallthru_and_redirect (FALLTHRU_EDGE (test_bb),
+ else_bb, else_target);
else
new_bb = redirect_edge_and_branch_force (FALLTHRU_EDGE (test_bb),
else_bb);
basic_block then_bb = then_edge->dest;
basic_block else_bb = else_edge->dest;
edge else_succ;
- rtx note;
+ int then_prob, else_prob;
/* If we are partitioning hot/cold basic blocks, we don't want to
mess up unconditional or indirect jumps that cross between hot
if (then_bb->index < NUM_FIXED_BLOCKS)
return FALSE;
+ if (else_edge->probability)
+ {
+ else_prob = else_edge->probability;
+ then_prob = REG_BR_PROB_BASE - else_prob;
+ }
+ else
+ {
+ else_prob = REG_BR_PROB_BASE / 2;
+ then_prob = REG_BR_PROB_BASE / 2;
+ }
+
/* ELSE is predicted or SUCC(ELSE) postdominates THEN. */
- note = find_reg_note (BB_END (test_bb), REG_BR_PROB, NULL_RTX);
- if (note && INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2)
+ if (else_prob > then_prob)
;
else if (else_succ->dest->index < NUM_FIXED_BLOCKS
|| dominated_by_p (CDI_POST_DOMINATORS, then_bb,
"\nIF-CASE-2 found, start %d, else %d\n",
test_bb->index, else_bb->index);
- /* ELSE is small. */
- if (! cheap_bb_rtx_cost_p (else_bb, COSTS_N_INSNS (BRANCH_COST)))
+ /* We're speculating from the ELSE path, we want to make sure the cost
+ of speculation is within reason. */
+ if (! cheap_bb_rtx_cost_p (else_bb, else_prob,
+ COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (else_edge->src),
+ predictable_edge_p (else_edge)))))
return FALSE;
/* Registers set are dead, or are predicable. */
- if (! dead_or_predicable (test_bb, else_bb, then_bb, else_succ->dest, 0))
+ if (! dead_or_predicable (test_bb, else_bb, then_bb, else_succ, 0))
return FALSE;
/* Conversion went ok, including moving the insns and fixing up the
return TRUE;
}
-/* A subroutine of dead_or_predicable called through for_each_rtx.
- Return 1 if a memory is found. */
-
-static int
-find_memory (rtx *px, void *data ATTRIBUTE_UNUSED)
-{
- return MEM_P (*px);
-}
-
/* Used by the code above to perform the actual rtl transformations.
Return TRUE if successful.
TEST_BB is the block containing the conditional branch. MERGE_BB
- is the block containing the code to manipulate. NEW_DEST is the
- label TEST_BB should be branching to after the conversion.
+ is the block containing the code to manipulate. DEST_EDGE is an
+ edge representing a jump to the join block; after the conversion,
+ TEST_BB should be branching to its destination.
REVERSEP is true if the sense of the branch should be reversed. */
static int
dead_or_predicable (basic_block test_bb, basic_block merge_bb,
- basic_block other_bb, basic_block new_dest, int reversep)
+ basic_block other_bb, edge dest_edge, int reversep)
{
- rtx head, end, jump, earliest = NULL_RTX, old_dest, new_label = NULL_RTX;
+ basic_block new_dest = dest_edge->dest;
+ rtx head, end, jump, earliest = NULL_RTX, old_dest;
+ bitmap merge_set = NULL;
+ /* Number of pending changes. */
+ int n_validated_changes = 0;
+ rtx new_dest_label = NULL_RTX;
jump = BB_END (test_bb);
head = BB_HEAD (merge_bb);
end = BB_END (merge_bb);
+ while (DEBUG_INSN_P (end) && end != head)
+ end = PREV_INSN (end);
+
/* If merge_bb ends with a tablejump, predicating/moving insn's
into test_bb and then deleting merge_bb will result in the jumptable
that follows merge_bb being removed along with merge_bb and then we
if (LABEL_P (head))
head = NEXT_INSN (head);
+ while (DEBUG_INSN_P (head) && head != end)
+ head = NEXT_INSN (head);
if (NOTE_P (head))
{
if (head == end)
goto no_body;
}
head = NEXT_INSN (head);
+ while (DEBUG_INSN_P (head) && head != end)
+ head = NEXT_INSN (head);
}
if (JUMP_P (end))
goto no_body;
}
end = PREV_INSN (end);
+ while (DEBUG_INSN_P (end) && end != head)
+ end = PREV_INSN (end);
}
/* Disable handling dead code by conditional execution if the machine needs
to do anything funny with the tests, etc. */
#ifndef IFCVT_MODIFY_TESTS
- if (HAVE_conditional_execution)
+ if (targetm.have_conditional_execution ())
{
/* In the conditional execution case, we have things easy. We know
the condition is reversible. We don't have to check life info
prob_val = GEN_INT (REG_BR_PROB_BASE - INTVAL (prob_val));
}
- if (! cond_exec_process_insns ((ce_if_block_t *)0, head, end, cond,
- prob_val, 0))
- goto cancel;
+ if (cond_exec_process_insns (NULL, head, end, cond, prob_val, 0)
+ && verify_changes (0))
+ n_validated_changes = num_validated_changes ();
+ else
+ cancel_changes (0);
earliest = jump;
}
- else
#endif
+
+ /* If we allocated new pseudos (e.g. in the conditional move
+ expander called from noce_emit_cmove), we must resize the
+ array first. */
+ if (max_regno < max_reg_num ())
+ max_regno = max_reg_num ();
+
+ /* Try the NCE path if the CE path did not result in any changes. */
+ if (n_validated_changes == 0)
{
+ rtx cond, insn;
+ regset live;
+ bool success;
+
/* In the non-conditional execution case, we have to verify that there
are no trapping operations, no calls, no references to memory, and
that any registers modified are dead at the branch site. */
- rtx insn, cond, prev;
- bitmap merge_set, test_live, test_set;
- unsigned i, fail = 0;
- bitmap_iterator bi;
-
- /* Check for no calls or trapping operations. */
- for (insn = head; ; insn = NEXT_INSN (insn))
- {
- if (CALL_P (insn))
- return FALSE;
- if (INSN_P (insn))
- {
- if (may_trap_p (PATTERN (insn)))
- return FALSE;
-
- /* ??? Even non-trapping memories such as stack frame
- references must be avoided. For stores, we collect
- no lifetime info; for reads, we'd have to assert
- true_dependence false against every store in the
- TEST range. */
- if (for_each_rtx (&PATTERN (insn), find_memory, NULL))
- return FALSE;
- }
- if (insn == end)
- break;
- }
-
- if (! any_condjump_p (jump))
+ if (!any_condjump_p (jump))
return FALSE;
/* Find the extent of the conditional. */
cond = noce_get_condition (jump, &earliest, false);
- if (! cond)
+ if (!cond)
return FALSE;
- /* Collect:
- MERGE_SET = set of registers set in MERGE_BB
- TEST_LIVE = set of registers live at EARLIEST
- TEST_SET = set of registers set between EARLIEST and the
- end of the block. */
+ live = BITMAP_ALLOC (®_obstack);
+ simulate_backwards_to_point (merge_bb, live, end);
+ success = can_move_insns_across (head, end, earliest, jump,
+ merge_bb, live,
+ df_get_live_in (other_bb), NULL);
+ BITMAP_FREE (live);
+ if (!success)
+ return FALSE;
+ /* Collect the set of registers set in MERGE_BB. */
merge_set = BITMAP_ALLOC (®_obstack);
- test_live = BITMAP_ALLOC (®_obstack);
- test_set = BITMAP_ALLOC (®_obstack);
-
- /* ??? bb->local_set is only valid during calculate_global_regs_live,
- so we must recompute usage for MERGE_BB. Not so bad, I suppose,
- since we've already asserted that MERGE_BB is small. */
- /* If we allocated new pseudos (e.g. in the conditional move
- expander called from noce_emit_cmove), we must resize the
- array first. */
- if (max_regno < max_reg_num ())
- max_regno = max_reg_num ();
FOR_BB_INSNS (merge_bb, insn)
+ if (NONDEBUG_INSN_P (insn))
+ df_simulate_find_defs (insn, merge_set);
+
+#ifdef HAVE_simple_return
+ /* If shrink-wrapping, disable this optimization when test_bb is
+ the first basic block and merge_bb exits. The idea is to not
+ move code setting up a return register as that may clobber a
+ register used to pass function parameters, which then must be
+ saved in caller-saved regs. A caller-saved reg requires the
+ prologue, killing a shrink-wrap opportunity. */
+ if ((flag_shrink_wrap && HAVE_simple_return && !epilogue_completed)
+ && ENTRY_BLOCK_PTR->next_bb == test_bb
+ && single_succ_p (new_dest)
+ && single_succ (new_dest) == EXIT_BLOCK_PTR
+ && bitmap_intersect_p (df_get_live_in (new_dest), merge_set))
{
- if (INSN_P (insn))
+ regset return_regs;
+ unsigned int i;
+
+ return_regs = BITMAP_ALLOC (®_obstack);
+
+ /* Start off with the intersection of regs used to pass
+ params and regs used to return values. */
+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+ if (FUNCTION_ARG_REGNO_P (i)
+ && targetm.calls.function_value_regno_p (i))
+ bitmap_set_bit (return_regs, INCOMING_REGNO (i));
+
+ bitmap_and_into (return_regs, df_get_live_out (ENTRY_BLOCK_PTR));
+ bitmap_and_into (return_regs, df_get_live_in (EXIT_BLOCK_PTR));
+ if (!bitmap_empty_p (return_regs))
{
- unsigned int uid = INSN_UID (insn);
- struct df_ref **def_rec;
- for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+ FOR_BB_INSNS_REVERSE (new_dest, insn)
+ if (NONDEBUG_INSN_P (insn))
+ {
+ df_ref *def_rec;
+ unsigned int uid = INSN_UID (insn);
+
+ /* If this insn sets any reg in return_regs.. */
+ for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+ {
+ df_ref def = *def_rec;
+ unsigned r = DF_REF_REGNO (def);
+
+ if (bitmap_bit_p (return_regs, r))
+ break;
+ }
+ /* ..then add all reg uses to the set of regs
+ we're interested in. */
+ if (*def_rec)
+ df_simulate_uses (insn, return_regs);
+ }
+ if (bitmap_intersect_p (merge_set, return_regs))
{
- struct df_ref *def = *def_rec;
- bitmap_set_bit (merge_set, DF_REF_REGNO (def));
+ BITMAP_FREE (return_regs);
+ BITMAP_FREE (merge_set);
+ return FALSE;
}
}
+ BITMAP_FREE (return_regs);
}
-
- /* For small register class machines, don't lengthen lifetimes of
- hard registers before reload. */
- if (SMALL_REGISTER_CLASSES && ! reload_completed)
- {
- EXECUTE_IF_SET_IN_BITMAP (merge_set, 0, i, bi)
- {
- if (i < FIRST_PSEUDO_REGISTER
- && ! fixed_regs[i]
- && ! global_regs[i])
- fail = 1;
- }
- }
-
- /* For TEST, we're interested in a range of insns, not a whole block.
- Moreover, we're interested in the insns live from OTHER_BB. */
-
- /* The loop below takes the set of live registers
- after JUMP, and calculates the live set before EARLIEST. */
- bitmap_copy (test_live, df_get_live_in (other_bb));
- df_simulate_artificial_refs_at_end (test_bb, test_live);
- for (insn = jump; ; insn = prev)
- {
- if (INSN_P (insn))
- {
- df_simulate_find_defs (insn, test_set);
- df_simulate_one_insn_backwards (test_bb, insn, test_live);
- }
- prev = PREV_INSN (insn);
- if (insn == earliest)
- break;
- }
-
- /* We can perform the transformation if
- MERGE_SET & (TEST_SET | TEST_LIVE)
- and
- TEST_SET & DF_LIVE_IN (merge_bb)
- are empty. */
-
- if (bitmap_intersect_p (test_set, merge_set)
- || bitmap_intersect_p (test_live, merge_set)
- || bitmap_intersect_p (test_set, df_get_live_in (merge_bb)))
- fail = 1;
-
- BITMAP_FREE (merge_set);
- BITMAP_FREE (test_live);
- BITMAP_FREE (test_set);
-
- if (fail)
- return FALSE;
+#endif
}
no_body:
old_dest = JUMP_LABEL (jump);
if (other_bb != new_dest)
{
- new_label = block_label (new_dest);
+ if (JUMP_P (BB_END (dest_edge->src)))
+ new_dest_label = JUMP_LABEL (BB_END (dest_edge->src));
+ else if (new_dest == EXIT_BLOCK_PTR)
+ new_dest_label = ret_rtx;
+ else
+ new_dest_label = block_label (new_dest);
+
if (reversep
- ? ! invert_jump_1 (jump, new_label)
- : ! redirect_jump_1 (jump, new_label))
+ ? ! invert_jump_1 (jump, new_dest_label)
+ : ! redirect_jump_1 (jump, new_dest_label))
goto cancel;
}
- if (! apply_change_group ())
- return FALSE;
+ if (verify_changes (n_validated_changes))
+ confirm_change_group ();
+ else
+ goto cancel;
if (other_bb != new_dest)
{
- redirect_jump_2 (jump, old_dest, new_label, 0, reversep);
+ redirect_jump_2 (jump, old_dest, new_dest_label, 0, reversep);
redirect_edge_succ (BRANCH_EDGE (test_bb), new_dest);
if (reversep)
if (end == BB_END (merge_bb))
BB_END (merge_bb) = PREV_INSN (head);
- /* PR 21767: When moving insns above a conditional branch, REG_EQUAL
- notes might become invalid. */
+ /* PR 21767: when moving insns above a conditional branch, the REG_EQUAL
+ notes being moved might become invalid. */
insn = head;
do
{
if (! note)
continue;
set = single_set (insn);
- if (!set || !function_invariant_p (SET_SRC (set)))
+ if (!set || !function_invariant_p (SET_SRC (set))
+ || !function_invariant_p (XEXP (note, 0)))
remove_note (insn, note);
} while (insn != end && (insn = NEXT_INSN (insn)));
+ /* PR46315: when moving insns above a conditional branch, the REG_EQUAL
+ notes referring to the registers being set might become invalid. */
+ if (merge_set)
+ {
+ unsigned i;
+ bitmap_iterator bi;
+
+ EXECUTE_IF_SET_IN_BITMAP (merge_set, 0, i, bi)
+ remove_reg_equal_equiv_notes_for_regno (i);
+
+ BITMAP_FREE (merge_set);
+ }
+
reorder_insns (head, end, PREV_INSN (earliest));
}
cancel:
cancel_changes (0);
+
+ if (merge_set)
+ BITMAP_FREE (merge_set);
+
return FALSE;
}
\f
/* Main entry point for all if-conversion. */
static void
-if_convert (bool recompute_dominance)
+if_convert (void)
{
basic_block bb;
int pass;
loop_optimizer_finalize ();
free_dominance_info (CDI_DOMINATORS);
- /* Compute postdominators if we think we'll use them. */
- if (HAVE_conditional_execution || recompute_dominance)
- calculate_dominance_info (CDI_POST_DOMINATORS);
+ /* Compute postdominators. */
+ calculate_dominance_info (CDI_POST_DOMINATORS);
df_set_flags (DF_LR_RUN_DCE);
FOR_EACH_BB (bb)
{
basic_block new_bb;
- while (!df_get_bb_dirty (bb)
+ while (!df_get_bb_dirty (bb)
&& (new_bb = find_if_header (bb, pass)) != NULL)
bb = new_bb;
}
#ifdef IFCVT_MULTIPLE_DUMPS
if (dump_file && cond_exec_changed_p)
- print_rtl_with_bb (dump_file, get_insns ());
+ {
+ if (dump_flags & TDF_SLIM)
+ print_rtl_slim_with_bb (dump_file, get_insns (), dump_flags);
+ else
+ print_rtl_with_bb (dump_file, get_insns ());
+ }
#endif
}
while (cond_exec_changed_p);
static bool
gate_handle_if_conversion (void)
{
- return (optimize > 0);
+ return (optimize > 0)
+ && dbg_cnt (if_conversion);
}
/* If-conversion and CFG cleanup. */
if (dump_file)
dump_flow_info (dump_file, dump_flags);
cleanup_cfg (CLEANUP_EXPENSIVE);
- if_convert (false);
+ if_convert ();
}
cleanup_cfg (0);
return 0;
}
-struct tree_opt_pass pass_rtl_ifcvt =
+struct rtl_opt_pass pass_rtl_ifcvt =
{
+ {
+ RTL_PASS,
"ce1", /* name */
gate_handle_if_conversion, /* gate */
rest_of_handle_if_conversion, /* execute */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
- TODO_df_finish |
- TODO_dump_func, /* todo_flags_finish */
- 'C' /* letter */
+ TODO_df_finish | TODO_verify_rtl_sharing |
+ 0 /* todo_flags_finish */
+ }
};
static bool
gate_handle_if_after_combine (void)
{
- return (optimize > 0 && flag_if_conversion);
+ return optimize > 0 && flag_if_conversion
+ && dbg_cnt (if_after_combine);
}
static unsigned int
rest_of_handle_if_after_combine (void)
{
- if_convert (true);
+ if_convert ();
return 0;
}
-struct tree_opt_pass pass_if_after_combine =
+struct rtl_opt_pass pass_if_after_combine =
{
+ {
+ RTL_PASS,
"ce2", /* name */
gate_handle_if_after_combine, /* gate */
rest_of_handle_if_after_combine, /* execute */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
- TODO_df_finish |
- TODO_dump_func |
- TODO_ggc_collect, /* todo_flags_finish */
- 'C' /* letter */
+ TODO_df_finish | TODO_verify_rtl_sharing |
+ TODO_ggc_collect /* todo_flags_finish */
+ }
};
static bool
gate_handle_if_after_reload (void)
{
- return (optimize > 0 && flag_if_conversion2);
+ return optimize > 0 && flag_if_conversion2
+ && dbg_cnt (if_after_reload);
}
static unsigned int
rest_of_handle_if_after_reload (void)
{
- if_convert (true);
+ if_convert ();
return 0;
}
-struct tree_opt_pass pass_if_after_reload =
+struct rtl_opt_pass pass_if_after_reload =
{
+ {
+ RTL_PASS,
"ce3", /* name */
gate_handle_if_after_reload, /* gate */
rest_of_handle_if_after_reload, /* execute */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
- TODO_df_finish |
- TODO_dump_func |
- TODO_ggc_collect, /* todo_flags_finish */
- 'E' /* letter */
+ TODO_df_finish | TODO_verify_rtl_sharing |
+ TODO_ggc_collect /* todo_flags_finish */
+ }
};