X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fifcvt.c;h=e4e13abe0aa29830de89c0e2a8269e0487eee342;hp=4fd98d010d37e37bf0ca28a13b80096940900f0c;hb=54f730e7ae5f1ad587c6f6a4a6be590d22efee1a;hpb=ba0b5cfcbe928c833e113105e1caf18d56f481d4 diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c index 4fd98d010d3..e4e13abe0aa 100644 --- a/gcc/ifcvt.c +++ b/gcc/ifcvt.c @@ -1,5 +1,6 @@ /* If-conversion support. - Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 + Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010, + 2011 Free Software Foundation, Inc. This file is part of GCC. @@ -33,10 +34,9 @@ #include "hard-reg-set.h" #include "basic-block.h" #include "expr.h" -#include "real.h" #include "output.h" #include "optabs.h" -#include "toplev.h" +#include "diagnostic-core.h" #include "tm_p.h" #include "cfgloop.h" #include "target.h" @@ -47,9 +47,6 @@ #include "vecprim.h" #include "dbgcnt.h" -#ifndef HAVE_conditional_execution -#define HAVE_conditional_execution 0 -#endif #ifndef HAVE_conditional_move #define HAVE_conditional_move 0 #endif @@ -62,12 +59,11 @@ #ifndef HAVE_trap #define HAVE_trap 0 #endif -#ifndef HAVE_conditional_trap -#define HAVE_conditional_trap 0 -#endif #ifndef MAX_CONDITIONAL_EXECUTE -#define MAX_CONDITIONAL_EXECUTE (BRANCH_COST + 1) +#define MAX_CONDITIONAL_EXECUTE \ + (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \ + + 1) #endif #define IFCVT_MULTIPLE_DUMPS 1 @@ -89,9 +85,11 @@ static int cond_exec_changed_p; /* Forward references. */ static int count_bb_insns (const_basic_block); -static bool cheap_bb_rtx_cost_p (const_basic_block, int); +static bool cheap_bb_rtx_cost_p (const_basic_block, int, int); static rtx first_active_insn (basic_block); static rtx last_active_insn (basic_block, int); +static rtx find_active_insn_before (basic_block, rtx); +static rtx find_active_insn_after (basic_block, rtx); static basic_block block_fallthru (basic_block); static int cond_exec_process_insns (ce_if_block_t *, rtx, rtx, rtx, rtx, int); static rtx cond_exec_get_condition (rtx); @@ -105,9 +103,8 @@ static int noce_find_if_block (basic_block, edge, edge, int); static int cond_exec_find_if_block (ce_if_block_t *); static int find_if_case_1 (basic_block, edge, edge); static int find_if_case_2 (basic_block, edge, edge); -static int find_memory (rtx *, void *); static int dead_or_predicable (basic_block, basic_block, basic_block, - basic_block, int); + edge, int); static void noce_emit_move_insn (rtx, rtx); static rtx block_has_only_trap (basic_block); @@ -134,19 +131,31 @@ count_bb_insns (const_basic_block bb) /* Determine whether the total insn_rtx_cost on non-jump insns in basic block BB is less than MAX_COST. This function returns - false if the cost of any instruction could not be estimated. */ + false if the cost of any instruction could not be estimated. + + The cost of the non-jump insns in BB is scaled by REG_BR_PROB_BASE + as those insns are being speculated. MAX_COST is scaled with SCALE + plus a small fudge factor. */ static bool -cheap_bb_rtx_cost_p (const_basic_block bb, int max_cost) +cheap_bb_rtx_cost_p (const_basic_block bb, int scale, int max_cost) { int count = 0; rtx insn = BB_HEAD (bb); + bool speed = optimize_bb_for_speed_p (bb); + + /* Our branch probability/scaling factors are just estimates and don't + account for cases where we can get speculation for free and other + secondary benefits. So we fudge the scale factor to make speculating + appear a little more profitable. */ + scale += REG_BR_PROB_BASE / 8; + max_cost *= scale; while (1) { if (NONJUMP_INSN_P (insn)) { - int cost = insn_rtx_cost (PATTERN (insn)); + int cost = insn_rtx_cost (PATTERN (insn), speed) * REG_BR_PROB_BASE; if (cost == 0) return false; @@ -194,7 +203,7 @@ first_active_insn (basic_block bb) insn = NEXT_INSN (insn); } - while (NOTE_P (insn)) + while (NOTE_P (insn) || DEBUG_INSN_P (insn)) { if (insn == BB_END (bb)) return NULL_RTX; @@ -217,6 +226,7 @@ last_active_insn (basic_block bb, int skip_use_p) while (NOTE_P (insn) || JUMP_P (insn) + || DEBUG_INSN_P (insn) || (skip_use_p && NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE)) @@ -232,17 +242,54 @@ last_active_insn (basic_block bb, int skip_use_p) return insn; } +/* Return the active insn before INSN inside basic block CURR_BB. */ + +static rtx +find_active_insn_before (basic_block curr_bb, rtx insn) +{ + if (!insn || insn == BB_HEAD (curr_bb)) + return NULL_RTX; + + while ((insn = PREV_INSN (insn)) != NULL_RTX) + { + if (NONJUMP_INSN_P (insn) || JUMP_P (insn) || CALL_P (insn)) + break; + + /* No other active insn all the way to the start of the basic block. */ + if (insn == BB_HEAD (curr_bb)) + return NULL_RTX; + } + + return insn; +} + +/* Return the active insn after INSN inside basic block CURR_BB. */ + +static rtx +find_active_insn_after (basic_block curr_bb, rtx insn) +{ + if (!insn || insn == BB_END (curr_bb)) + return NULL_RTX; + + while ((insn = NEXT_INSN (insn)) != NULL_RTX) + { + if (NONJUMP_INSN_P (insn) || JUMP_P (insn) || CALL_P (insn)) + break; + + /* No other active insn all the way to the end of the basic block. */ + if (insn == BB_END (curr_bb)) + return NULL_RTX; + } + + return insn; +} + /* Return the basic block reached by falling though the basic block BB. */ static basic_block block_fallthru (basic_block bb) { - edge e; - edge_iterator ei; - - FOR_EACH_EDGE (e, ei, bb->succs) - if (e->flags & EDGE_FALLTHRU) - break; + edge e = find_fallthru_edge (bb->succs); return (e) ? e->dest : NULL_BLOCK; } @@ -269,7 +316,11 @@ cond_exec_process_insns (ce_if_block_t *ce_info ATTRIBUTE_UNUSED, for (insn = start; ; insn = NEXT_INSN (insn)) { - if (NOTE_P (insn)) + /* dwarf2out can't cope with conditional prologues. */ + if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END) + return FALSE; + + if (NOTE_P (insn) || DEBUG_INSN_P (insn)) goto insn_done; gcc_assert(NONJUMP_INSN_P (insn) || CALL_P (insn)); @@ -387,7 +438,11 @@ cond_exec_process_if_block (ce_if_block_t * ce_info, rtx false_expr; /* test for then block insns */ rtx true_prob_val; /* probability of else block */ rtx false_prob_val; /* probability of then block */ - int n_insns; + rtx then_last_head = NULL_RTX; /* Last match at the head of THEN */ + rtx else_last_head = NULL_RTX; /* Last match at the head of ELSE */ + rtx then_first_tail = NULL_RTX; /* First match at the tail of THEN */ + rtx else_first_tail = NULL_RTX; /* First match at the tail of ELSE */ + int then_n_insns, else_n_insns, n_insns; enum rtx_code false_code; /* If test is comprised of && or || elements, and we've failed at handling @@ -420,15 +475,79 @@ cond_exec_process_if_block (ce_if_block_t * ce_info, number of insns and see if it is small enough to convert. */ then_start = first_active_insn (then_bb); then_end = last_active_insn (then_bb, TRUE); - n_insns = ce_info->num_then_insns = count_bb_insns (then_bb); + then_n_insns = ce_info->num_then_insns = count_bb_insns (then_bb); + n_insns = then_n_insns; max = MAX_CONDITIONAL_EXECUTE; if (else_bb) { + int n_matching; + max *= 2; else_start = first_active_insn (else_bb); else_end = last_active_insn (else_bb, TRUE); - n_insns += ce_info->num_else_insns = count_bb_insns (else_bb); + else_n_insns = ce_info->num_else_insns = count_bb_insns (else_bb); + n_insns += else_n_insns; + + /* Look for matching sequences at the head and tail of the two blocks, + and limit the range of insns to be converted if possible. */ + n_matching = flow_find_cross_jump (then_bb, else_bb, + &then_first_tail, &else_first_tail, + NULL); + if (then_first_tail == BB_HEAD (then_bb)) + then_start = then_end = NULL_RTX; + if (else_first_tail == BB_HEAD (else_bb)) + else_start = else_end = NULL_RTX; + + if (n_matching > 0) + { + if (then_end) + then_end = find_active_insn_before (then_bb, then_first_tail); + if (else_end) + else_end = find_active_insn_before (else_bb, else_first_tail); + n_insns -= 2 * n_matching; + } + + if (then_start && else_start) + { + int longest_match = MIN (then_n_insns - n_matching, + else_n_insns - n_matching); + n_matching + = flow_find_head_matching_sequence (then_bb, else_bb, + &then_last_head, + &else_last_head, + longest_match); + + if (n_matching > 0) + { + rtx insn; + + /* We won't pass the insns in the head sequence to + cond_exec_process_insns, so we need to test them here + to make sure that they don't clobber the condition. */ + for (insn = BB_HEAD (then_bb); + insn != NEXT_INSN (then_last_head); + insn = NEXT_INSN (insn)) + if (!LABEL_P (insn) && !NOTE_P (insn) + && !DEBUG_INSN_P (insn) + && modified_in_p (test_expr, insn)) + return FALSE; + } + + if (then_last_head == then_end) + then_start = then_end = NULL_RTX; + if (else_last_head == else_end) + else_start = else_end = NULL_RTX; + + if (n_matching > 0) + { + if (then_start) + then_start = find_active_insn_after (then_bb, then_last_head); + if (else_start) + else_start = find_active_insn_after (else_bb, else_last_head); + n_insns -= 2 * n_matching; + } + } } if (n_insns > max) @@ -572,7 +691,21 @@ cond_exec_process_if_block (ce_if_block_t * ce_info, fprintf (dump_file, "%d insn%s converted to conditional execution.\n", n_insns, (n_insns == 1) ? " was" : "s were"); - /* Merge the blocks! */ + /* Merge the blocks! If we had matching sequences, make sure to delete one + copy at the appropriate location first: delete the copy in the THEN branch + for a tail sequence so that the remaining one is executed last for both + branches, and delete the copy in the ELSE branch for a head sequence so + that the remaining one is executed first for both branches. */ + if (then_first_tail) + { + rtx from = then_first_tail; + if (!INSN_P (from)) + from = find_active_insn_after (then_bb, from); + delete_insn_chain (from, BB_END (then_bb), false); + } + if (else_last_head) + delete_insn_chain (first_active_insn (else_bb), else_last_head, false); + merge_if_block (ce_info); cond_exec_changed_p = TRUE; return TRUE; @@ -626,6 +759,9 @@ struct noce_if_info from TEST_BB. For the noce transformations, we allow the symmetric form as well. */ bool then_else_reversed; + + /* Estimated cost of the particular branch instruction. */ + int branch_cost; }; static rtx noce_emit_store_flag (struct noce_if_info *, rtx, int, int); @@ -660,7 +796,15 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep, build the store_flag insn directly. */ if (cond_complex) - cond = XEXP (SET_SRC (pc_set (if_info->jump)), 0); + { + rtx set = pc_set (if_info->jump); + cond = XEXP (SET_SRC (set), 0); + if (GET_CODE (XEXP (SET_SRC (set), 2)) == LABEL_REF + && XEXP (XEXP (SET_SRC (set), 2), 0) == JUMP_LABEL (if_info->jump)) + reversep = !reversep; + if (if_info->then_else_reversed) + reversep = !reversep; + } if (reversep) code = reversed_comparison_code (cond, if_info->jump); @@ -752,7 +896,7 @@ noce_emit_move_insn (rtx x, rtx y) } gcc_assert (start < (MEM_P (op) ? BITS_PER_UNIT : BITS_PER_WORD)); - store_bit_field (op, size, start, GET_MODE (x), y); + store_bit_field (op, size, start, 0, 0, GET_MODE (x), y); return; } @@ -806,7 +950,8 @@ noce_emit_move_insn (rtx x, rtx y) inner = XEXP (outer, 0); outmode = GET_MODE (outer); bitpos = SUBREG_BYTE (outer) * BITS_PER_UNIT; - store_bit_field (inner, GET_MODE_BITSIZE (outmode), bitpos, outmode, y); + store_bit_field (inner, GET_MODE_BITSIZE (outmode), bitpos, + 0, 0, outmode, y); } /* Return sequence of instructions generated by if conversion. This @@ -894,12 +1039,12 @@ noce_try_store_flag (struct noce_if_info *if_info) int reversep; rtx target, seq; - if (GET_CODE (if_info->b) == CONST_INT + if (CONST_INT_P (if_info->b) && INTVAL (if_info->b) == STORE_FLAG_VALUE && if_info->a == const0_rtx) reversep = 0; else if (if_info->b == const0_rtx - && GET_CODE (if_info->a) == CONST_INT + && CONST_INT_P (if_info->a) && INTVAL (if_info->a) == STORE_FLAG_VALUE && (reversed_comparison_code (if_info->cond, if_info->jump) != UNKNOWN)) @@ -941,8 +1086,8 @@ noce_try_store_flag_constants (struct noce_if_info *if_info) int normalize, can_reverse; enum machine_mode mode; - if (GET_CODE (if_info->a) == CONST_INT - && GET_CODE (if_info->b) == CONST_INT) + if (CONST_INT_P (if_info->a) + && CONST_INT_P (if_info->b)) { mode = GET_MODE (if_info->x); ifalse = INTVAL (if_info->a); @@ -963,20 +1108,20 @@ noce_try_store_flag_constants (struct noce_if_info *if_info) normalize = 0; else if (ifalse == 0 && exact_log2 (itrue) >= 0 && (STORE_FLAG_VALUE == 1 - || BRANCH_COST >= 2)) + || if_info->branch_cost >= 2)) normalize = 1; else if (itrue == 0 && exact_log2 (ifalse) >= 0 && can_reverse - && (STORE_FLAG_VALUE == 1 || BRANCH_COST >= 2)) + && (STORE_FLAG_VALUE == 1 || if_info->branch_cost >= 2)) normalize = 1, reversep = 1; else if (itrue == -1 && (STORE_FLAG_VALUE == -1 - || BRANCH_COST >= 2)) + || if_info->branch_cost >= 2)) normalize = -1; else if (ifalse == -1 && can_reverse - && (STORE_FLAG_VALUE == -1 || BRANCH_COST >= 2)) + && (STORE_FLAG_VALUE == -1 || if_info->branch_cost >= 2)) normalize = -1, reversep = 1; - else if ((BRANCH_COST >= 2 && STORE_FLAG_VALUE == -1) - || BRANCH_COST >= 3) + else if ((if_info->branch_cost >= 2 && STORE_FLAG_VALUE == -1) + || if_info->branch_cost >= 3) normalize = -1; else return FALSE; @@ -1107,7 +1252,7 @@ noce_try_addcc (struct noce_if_info *if_info) /* If that fails, construct conditional increment or decrement using setcc. */ - if (BRANCH_COST >= 2 + if (if_info->branch_cost >= 2 && (XEXP (if_info->a, 1) == const1_rtx || XEXP (if_info->a, 1) == constm1_rtx)) { @@ -1158,7 +1303,7 @@ noce_try_store_flag_mask (struct noce_if_info *if_info) int reversep; reversep = 0; - if ((BRANCH_COST >= 2 + if ((if_info->branch_cost >= 2 || STORE_FLAG_VALUE == -1) && ((if_info->a == const0_rtx && rtx_equal_p (if_info->b, if_info->x)) @@ -1204,6 +1349,9 @@ static rtx noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code, rtx cmp_a, rtx cmp_b, rtx vfalse, rtx vtrue) { + rtx target ATTRIBUTE_UNUSED; + int unsignedp ATTRIBUTE_UNUSED; + /* If earliest == jump, try to build the cmove insn directly. This is helpful when combine has created some complex condition (like for alpha's cmovlbs) that we can't hope to regenerate @@ -1238,10 +1386,62 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code, return NULL_RTX; #if HAVE_conditional_move - return emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode, - vtrue, vfalse, GET_MODE (x), - (code == LTU || code == GEU - || code == LEU || code == GTU)); + unsignedp = (code == LTU || code == GEU + || code == LEU || code == GTU); + + target = emit_conditional_move (x, code, cmp_a, cmp_b, VOIDmode, + vtrue, vfalse, GET_MODE (x), + unsignedp); + if (target) + return target; + + /* We might be faced with a situation like: + + x = (reg:M TARGET) + vtrue = (subreg:M (reg:N VTRUE) BYTE) + vfalse = (subreg:M (reg:N VFALSE) BYTE) + + We can't do a conditional move in mode M, but it's possible that we + could do a conditional move in mode N instead and take a subreg of + the result. + + If we can't create new pseudos, though, don't bother. */ + if (reload_completed) + return NULL_RTX; + + if (GET_CODE (vtrue) == SUBREG && GET_CODE (vfalse) == SUBREG) + { + rtx reg_vtrue = SUBREG_REG (vtrue); + rtx reg_vfalse = SUBREG_REG (vfalse); + unsigned int byte_vtrue = SUBREG_BYTE (vtrue); + unsigned int byte_vfalse = SUBREG_BYTE (vfalse); + rtx promoted_target; + + if (GET_MODE (reg_vtrue) != GET_MODE (reg_vfalse) + || byte_vtrue != byte_vfalse + || (SUBREG_PROMOTED_VAR_P (vtrue) + != SUBREG_PROMOTED_VAR_P (vfalse)) + || (SUBREG_PROMOTED_UNSIGNED_P (vtrue) + != SUBREG_PROMOTED_UNSIGNED_P (vfalse))) + return NULL_RTX; + + promoted_target = gen_reg_rtx (GET_MODE (reg_vtrue)); + + target = emit_conditional_move (promoted_target, code, cmp_a, cmp_b, + VOIDmode, reg_vtrue, reg_vfalse, + GET_MODE (reg_vtrue), unsignedp); + /* Nope, couldn't do it in that mode either. */ + if (!target) + return NULL_RTX; + + target = gen_rtx_SUBREG (GET_MODE (vtrue), promoted_target, byte_vtrue); + SUBREG_PROMOTED_VAR_P (target) = SUBREG_PROMOTED_VAR_P (vtrue); + SUBREG_PROMOTED_UNSIGNED_SET (target, SUBREG_PROMOTED_UNSIGNED_P (vtrue)); + emit_move_insn (x, target); + return x; + } + else + return NULL_RTX; #else /* We'll never get here, as noce_process_if_block doesn't call the functions involved. Ifdef code, however, should be discouraged @@ -1317,18 +1517,22 @@ noce_try_cmove_arith (struct noce_if_info *if_info) /* ??? FIXME: Magic number 5. */ if (cse_not_expected && MEM_P (a) && MEM_P (b) - && BRANCH_COST >= 5) + && MEM_ADDR_SPACE (a) == MEM_ADDR_SPACE (b) + && if_info->branch_cost >= 5) { + enum machine_mode address_mode + = targetm.addr_space.address_mode (MEM_ADDR_SPACE (a)); + a = XEXP (a, 0); b = XEXP (b, 0); - x = gen_reg_rtx (Pmode); + x = gen_reg_rtx (address_mode); is_mem = 1; } /* ??? We could handle this if we knew that a load from A or B could - not fault. This is also true if we've already loaded + not trap or fault. This is also true if we've already loaded from the address along the path from ENTRY. */ - else if (may_trap_p (a) || may_trap_p (b)) + else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b)) return FALSE; /* if (test) x = a + b; else x = c - d; @@ -1346,8 +1550,10 @@ noce_try_cmove_arith (struct noce_if_info *if_info) if insn_rtx_cost can't be estimated. */ if (insn_a) { - insn_cost = insn_rtx_cost (PATTERN (insn_a)); - if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (BRANCH_COST)) + insn_cost + = insn_rtx_cost (PATTERN (insn_a), + optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn_a))); + if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost)) return FALSE; } else @@ -1355,8 +1561,10 @@ noce_try_cmove_arith (struct noce_if_info *if_info) if (insn_b) { - insn_cost += insn_rtx_cost (PATTERN (insn_b)); - if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (BRANCH_COST)) + insn_cost + += insn_rtx_cost (PATTERN (insn_b), + optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn_b))); + if (insn_cost == 0 || insn_cost > COSTS_N_INSNS (if_info->branch_cost)) return FALSE; } @@ -1459,15 +1667,14 @@ noce_try_cmove_arith (struct noce_if_info *if_info) /* Copy over flags as appropriate. */ if (MEM_VOLATILE_P (if_info->a) || MEM_VOLATILE_P (if_info->b)) MEM_VOLATILE_P (tmp) = 1; - if (MEM_IN_STRUCT_P (if_info->a) && MEM_IN_STRUCT_P (if_info->b)) - MEM_IN_STRUCT_P (tmp) = 1; - if (MEM_SCALAR_P (if_info->a) && MEM_SCALAR_P (if_info->b)) - MEM_SCALAR_P (tmp) = 1; if (MEM_ALIAS_SET (if_info->a) == MEM_ALIAS_SET (if_info->b)) set_mem_alias_set (tmp, MEM_ALIAS_SET (if_info->a)); set_mem_align (tmp, MIN (MEM_ALIGN (if_info->a), MEM_ALIGN (if_info->b))); + gcc_assert (MEM_ADDR_SPACE (if_info->a) == MEM_ADDR_SPACE (if_info->b)); + set_mem_addr_space (tmp, MEM_ADDR_SPACE (if_info->a)); + noce_emit_move_insn (if_info->x, tmp); } else if (target != x) @@ -1525,7 +1732,7 @@ noce_get_alt_condition (struct noce_if_info *if_info, rtx target, make equivalent types of changes) to get the constants we need if they're off by one in the right direction. */ - if (GET_CODE (target) == CONST_INT) + if (CONST_INT_P (target)) { enum rtx_code code = GET_CODE (if_info->cond); rtx op_a = XEXP (if_info->cond, 0); @@ -1535,21 +1742,22 @@ noce_get_alt_condition (struct noce_if_info *if_info, rtx target, /* First, look to see if we put a constant in a register. */ prev_insn = prev_nonnote_insn (if_info->cond_earliest); if (prev_insn - && BLOCK_NUM (prev_insn) == BLOCK_NUM (if_info->cond_earliest) + && BLOCK_FOR_INSN (prev_insn) + == BLOCK_FOR_INSN (if_info->cond_earliest) && INSN_P (prev_insn) && GET_CODE (PATTERN (prev_insn)) == SET) { rtx src = find_reg_equal_equiv_note (prev_insn); if (!src) src = SET_SRC (PATTERN (prev_insn)); - if (GET_CODE (src) == CONST_INT) + if (CONST_INT_P (src)) { if (rtx_equal_p (op_a, SET_DEST (PATTERN (prev_insn)))) op_a = src; else if (rtx_equal_p (op_b, SET_DEST (PATTERN (prev_insn)))) op_b = src; - if (GET_CODE (op_a) == CONST_INT) + if (CONST_INT_P (op_a)) { rtx tmp = op_a; op_a = op_b; @@ -1561,7 +1769,7 @@ noce_get_alt_condition (struct noce_if_info *if_info, rtx target, /* Now, look to see if we can get the right constant by adjusting the conditional. */ - if (GET_CODE (op_b) == CONST_INT) + if (CONST_INT_P (op_b)) { HOST_WIDE_INT desired_val = INTVAL (target); HOST_WIDE_INT actual_val = INTVAL (op_b); @@ -1730,13 +1938,20 @@ noce_try_minmax (struct noce_if_info *if_info) return TRUE; } -/* Convert "if (a < 0) x = -a; else x = a;" to "x = abs(a);", etc. */ +/* Convert "if (a < 0) x = -a; else x = a;" to "x = abs(a);", + "if (a < 0) x = ~a; else x = a;" to "x = one_cmpl_abs(a);", + etc. */ static int noce_try_abs (struct noce_if_info *if_info) { rtx cond, earliest, target, seq, a, b, c; int negate; + bool one_cmpl = false; + + /* Reject modes with signed zeros. */ + if (HONOR_SIGNED_ZEROS (GET_MODE (if_info->x))) + return FALSE; /* Recognize A and B as constituting an ABS or NABS. The canonical form is a branch around the negation, taken when the object is the @@ -1750,6 +1965,17 @@ noce_try_abs (struct noce_if_info *if_info) c = a; a = b; b = c; negate = 1; } + else if (GET_CODE (a) == NOT && rtx_equal_p (XEXP (a, 0), b)) + { + negate = 0; + one_cmpl = true; + } + else if (GET_CODE (b) == NOT && rtx_equal_p (XEXP (b, 0), a)) + { + c = a; a = b; b = c; + negate = 1; + one_cmpl = true; + } else return FALSE; @@ -1774,7 +2000,7 @@ noce_try_abs (struct noce_if_info *if_info) { rtx set, insn = prev_nonnote_insn (earliest); if (insn - && BLOCK_NUM (insn) == BLOCK_NUM (earliest) + && BLOCK_FOR_INSN (insn) == BLOCK_FOR_INSN (earliest) && (set = single_set (insn)) && rtx_equal_p (SET_DEST (set), c)) { @@ -1821,13 +2047,23 @@ noce_try_abs (struct noce_if_info *if_info) } start_sequence (); - - target = expand_abs_nojump (GET_MODE (if_info->x), b, if_info->x, 1); + if (one_cmpl) + target = expand_one_cmpl_abs_nojump (GET_MODE (if_info->x), b, + if_info->x); + else + target = expand_abs_nojump (GET_MODE (if_info->x), b, if_info->x, 1); /* ??? It's a quandary whether cmove would be better here, especially for integers. Perhaps combine will clean things up. */ if (target && negate) - target = expand_simple_unop (GET_MODE (target), NEG, target, if_info->x, 0); + { + if (one_cmpl) + target = expand_simple_unop (GET_MODE (target), NOT, target, + if_info->x, 0); + else + target = expand_simple_unop (GET_MODE (target), NEG, target, + if_info->x, 0); + } if (! target) { @@ -1857,7 +2093,7 @@ noce_try_sign_mask (struct noce_if_info *if_info) rtx cond, t, m, c, seq; enum machine_mode mode; enum rtx_code code; - bool b_unconditional; + bool t_unconditional; cond = if_info->cond; code = GET_CODE (cond); @@ -1886,15 +2122,19 @@ noce_try_sign_mask (struct noce_if_info *if_info) if (GET_MODE (m) != mode) return FALSE; - /* This is only profitable if T is cheap, or T is unconditionally - executed/evaluated in the original insn sequence. The latter - happens if INSN_B was taken from TEST_BB, or if there was no - INSN_B which can happen for e.g. conditional stores to memory. */ - b_unconditional = (if_info->insn_b == NULL_RTX - || BLOCK_FOR_INSN (if_info->insn_b) == if_info->test_bb); - if (rtx_cost (t, SET) >= COSTS_N_INSNS (2) - && (!b_unconditional - || t != if_info->b)) + /* This is only profitable if T is unconditionally executed/evaluated in the + original insn sequence or T is cheap. The former happens if B is the + non-zero (T) value and if INSN_B was taken from TEST_BB, or there was no + INSN_B which can happen for e.g. conditional stores to memory. For the + cost computation use the block TEST_BB where the evaluation will end up + after the transformation. */ + t_unconditional = + (t == if_info->b + && (if_info->insn_b == NULL_RTX + || BLOCK_FOR_INSN (if_info->insn_b) == if_info->test_bb)); + if (!(t_unconditional + || (set_src_cost (t, optimize_bb_for_speed_p (if_info->test_bb)) + < COSTS_N_INSNS (2)))) return FALSE; start_sequence (); @@ -1952,7 +2192,7 @@ noce_try_bitop (struct noce_if_info *if_info) if (GET_CODE (cond) == ZERO_EXTRACT) { if (XEXP (cond, 1) != const1_rtx - || GET_CODE (XEXP (cond, 2)) != CONST_INT + || !CONST_INT_P (XEXP (cond, 2)) || ! rtx_equal_p (x, XEXP (cond, 0))) return FALSE; bitnum = INTVAL (XEXP (cond, 2)); @@ -1970,7 +2210,7 @@ noce_try_bitop (struct noce_if_info *if_info) { /* Check for "if (X & C) x = x op C". */ if (! rtx_equal_p (x, XEXP (a, 0)) - || GET_CODE (XEXP (a, 1)) != CONST_INT + || !CONST_INT_P (XEXP (a, 1)) || (INTVAL (XEXP (a, 1)) & GET_MODE_MASK (mode)) != (unsigned HOST_WIDE_INT) 1 << bitnum) return FALSE; @@ -1996,7 +2236,7 @@ noce_try_bitop (struct noce_if_info *if_info) { /* Check for "if (X & C) x &= ~C". */ if (! rtx_equal_p (x, XEXP (a, 0)) - || GET_CODE (XEXP (a, 1)) != CONST_INT + || !CONST_INT_P (XEXP (a, 1)) || (INTVAL (XEXP (a, 1)) & GET_MODE_MASK (mode)) != (~((HOST_WIDE_INT) 1 << bitnum) & GET_MODE_MASK (mode))) return FALSE; @@ -2055,7 +2295,9 @@ noce_get_condition (rtx jump, rtx *earliest, bool then_else_reversed) cond = XEXP (SET_SRC (set), 0); tmp = XEXP (cond, 0); - if (REG_P (tmp) && GET_MODE_CLASS (GET_MODE (tmp)) == MODE_INT) + if (REG_P (tmp) && GET_MODE_CLASS (GET_MODE (tmp)) == MODE_INT + && (GET_MODE (tmp) != BImode + || !targetm.small_register_classes_for_mode_p (BImode))) { *earliest = jump; @@ -2067,8 +2309,15 @@ noce_get_condition (rtx jump, rtx *earliest, bool then_else_reversed) /* Otherwise, fall back on canonicalize_condition to do the dirty work of manipulating MODE_CC values and COMPARE rtx codes. */ - return canonicalize_condition (jump, cond, reverse, earliest, - NULL_RTX, false, true); + tmp = canonicalize_condition (jump, cond, reverse, earliest, + NULL_RTX, false, true); + + /* We don't handle side-effects in the condition, like handling + REG_INC notes and making sure no duplicate conditions are emitted. */ + if (tmp != NULL_RTX && side_effects_p (tmp)) + return NULL_RTX; + + return tmp; } /* Return true if OP is ok for if-then-else processing. */ @@ -2076,14 +2325,14 @@ noce_get_condition (rtx jump, rtx *earliest, bool then_else_reversed) static int noce_operand_ok (const_rtx op) { + if (side_effects_p (op)) + return FALSE; + /* We special-case memories, so handle any of them with no address side effects. */ if (MEM_P (op)) return ! side_effects_p (XEXP (op, 0)); - if (side_effects_p (op)) - return FALSE; - return ! may_trap_p (op); } @@ -2121,7 +2370,7 @@ noce_mem_write_may_trap_or_fault_p (const_rtx mem) addr = XEXP (addr, 1); break; case PLUS: - if (GET_CODE (XEXP (addr, 1)) == CONST_INT) + if (CONST_INT_P (XEXP (addr, 1))) addr = XEXP (addr, 0); else return false; @@ -2164,9 +2413,7 @@ noce_can_store_speculate_p (basic_block top_bb, const_rtx mem) unconditionally before the barrier. */ if (INSN_P (insn) && (volatile_insn_p (PATTERN (insn)) - || (CALL_P (insn) - && (!CONST_OR_PURE_CALL_P (insn) - || pure_call_p (insn))))) + || (CALL_P (insn) && (!RTL_CONST_CALL_P (insn))))) return false; if (memory_modified_in_insn_p (mem, insn)) @@ -2236,25 +2483,24 @@ noce_process_if_block (struct noce_if_info *if_info) } else { - insn_b = prev_nonnote_insn (if_info->cond_earliest); + insn_b = prev_nonnote_nondebug_insn (if_info->cond_earliest); /* We're going to be moving the evaluation of B down from above COND_EARLIEST to JUMP. Make sure the relevant data is still intact. */ if (! insn_b - || BLOCK_NUM (insn_b) != BLOCK_NUM (if_info->cond_earliest) + || BLOCK_FOR_INSN (insn_b) != BLOCK_FOR_INSN (if_info->cond_earliest) || !NONJUMP_INSN_P (insn_b) || (set_b = single_set (insn_b)) == NULL_RTX || ! rtx_equal_p (x, SET_DEST (set_b)) || ! noce_operand_ok (SET_SRC (set_b)) || reg_overlap_mentioned_p (x, SET_SRC (set_b)) - || modified_between_p (SET_SRC (set_b), - PREV_INSN (if_info->cond_earliest), jump) + || modified_between_p (SET_SRC (set_b), insn_b, jump) /* Likewise with X. In particular this can happen when noce_get_condition looks farther back in the instruction stream than one might expect. */ || reg_overlap_mentioned_p (x, cond) || reg_overlap_mentioned_p (x, a) - || modified_between_p (x, PREV_INSN (if_info->cond_earliest), jump)) + || modified_between_p (x, insn_b, jump)) insn_b = set_b = NULL_RTX; } @@ -2272,15 +2518,15 @@ noce_process_if_block (struct noce_if_info *if_info) the lifetime of hard registers on small register class machines. */ orig_x = x; if (!REG_P (x) - || (SMALL_REGISTER_CLASSES - && REGNO (x) < FIRST_PSEUDO_REGISTER)) + || (HARD_REGISTER_P (x) + && targetm.small_register_classes_for_mode_p (GET_MODE (x)))) { if (GET_MODE (x) == BLKmode) return FALSE; - if (GET_MODE (x) == ZERO_EXTRACT - && (GET_CODE (XEXP (x, 1)) != CONST_INT - || GET_CODE (XEXP (x, 2)) != CONST_INT)) + if (GET_CODE (x) == ZERO_EXTRACT + && (!CONST_INT_P (XEXP (x, 1)) + || !CONST_INT_P (XEXP (x, 2)))) return FALSE; x = gen_reg_rtx (GET_MODE (GET_CODE (x) == STRICT_LOW_PART @@ -2374,7 +2620,7 @@ noce_process_if_block (struct noce_if_info *if_info) if (HAVE_conditional_move && noce_try_cmove (if_info)) goto success; - if (! HAVE_conditional_execution) + if (! targetm.have_conditional_execution ()) { if (noce_try_store_flag_constants (if_info)) goto success; @@ -2448,7 +2694,8 @@ noce_process_if_block (struct noce_if_info *if_info) REGS. COND is the condition we will test. */ static int -check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) *regs, rtx cond) +check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) **regs, + rtx cond) { rtx insn; @@ -2462,7 +2709,7 @@ check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) *regs, rtx con { rtx set, dest, src; - if (!INSN_P (insn) || JUMP_P (insn)) + if (!NONDEBUG_INSN_P (insn) || JUMP_P (insn)) continue; set = single_set (insn); if (!set) @@ -2471,7 +2718,8 @@ check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) *regs, rtx con dest = SET_DEST (set); src = SET_SRC (set); if (!REG_P (dest) - || (SMALL_REGISTER_CLASSES && HARD_REGISTER_P (dest))) + || (HARD_REGISTER_P (dest) + && targetm.small_register_classes_for_mode_p (GET_MODE (dest)))) return FALSE; if (!CONSTANT_P (src) && !register_operand (src, VOIDmode)) @@ -2509,7 +2757,7 @@ check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) *regs, rtx con vals[REGNO (dest)] = src; - VEC_safe_push (int, heap, regs, REGNO (dest)); + VEC_safe_push (int, heap, *regs, REGNO (dest)); } return TRUE; @@ -2540,7 +2788,8 @@ cond_move_convert_if_block (struct noce_if_info *if_infop, rtx set, target, dest, t, e; unsigned int regno; - if (!INSN_P (insn) || JUMP_P (insn)) + /* ??? Maybe emit conditional debug insn? */ + if (!NONDEBUG_INSN_P (insn) || JUMP_P (insn)) continue; set = single_set (insn); gcc_assert (set && REG_P (SET_DEST (set))); @@ -2610,9 +2859,14 @@ cond_move_process_if_block (struct noce_if_info *if_info) memset (else_vals, 0, size); /* Make sure the blocks are suitable. */ - if (!check_cond_move_block (then_bb, then_vals, then_regs, cond) - || (else_bb && !check_cond_move_block (else_bb, else_vals, else_regs, cond))) - return FALSE; + if (!check_cond_move_block (then_bb, then_vals, &then_regs, cond) + || (else_bb + && !check_cond_move_block (else_bb, else_vals, &else_regs, cond))) + { + VEC_free (int, heap, then_regs); + VEC_free (int, heap, else_regs); + return FALSE; + } /* Make sure the blocks can be used together. If the same register is set in both blocks, and is not set to a constant in both @@ -2621,7 +2875,7 @@ cond_move_process_if_block (struct noce_if_info *if_info) source register does not change after the assignment. Also count the number of registers set in only one of the blocks. */ c = 0; - for (i = 0; VEC_iterate (int, then_regs, i, reg); i++) + FOR_EACH_VEC_ELT (int, then_regs, i, reg) { if (!then_vals[reg] && !else_vals[reg]) continue; @@ -2633,12 +2887,16 @@ cond_move_process_if_block (struct noce_if_info *if_info) if (!CONSTANT_P (then_vals[reg]) && !CONSTANT_P (else_vals[reg]) && !rtx_equal_p (then_vals[reg], else_vals[reg])) - return FALSE; + { + VEC_free (int, heap, then_regs); + VEC_free (int, heap, else_regs); + return FALSE; + } } } /* Finish off c for MAX_CONDITIONAL_EXECUTE. */ - for (i = 0; VEC_iterate (int, else_regs, i, reg); ++i) + FOR_EACH_VEC_ELT (int, else_regs, i, reg) if (!then_vals[reg]) ++c; @@ -2647,7 +2905,11 @@ cond_move_process_if_block (struct noce_if_info *if_info) branches, since if we convert we are going to always execute them. */ if (c > MAX_CONDITIONAL_EXECUTE) - return FALSE; + { + VEC_free (int, heap, then_regs); + VEC_free (int, heap, else_regs); + return FALSE; + } /* Try to emit the conditional moves. First do the then block, then do anything left in the else blocks. */ @@ -2659,11 +2921,17 @@ cond_move_process_if_block (struct noce_if_info *if_info) then_vals, else_vals, true))) { end_sequence (); + VEC_free (int, heap, then_regs); + VEC_free (int, heap, else_regs); return FALSE; } seq = end_ifcvt_sequence (if_info); if (!seq) - return FALSE; + { + VEC_free (int, heap, then_regs); + VEC_free (int, heap, else_regs); + return FALSE; + } loc_insn = first_active_insn (then_bb); if (!loc_insn) @@ -2696,7 +2964,6 @@ cond_move_process_if_block (struct noce_if_info *if_info) VEC_free (int, heap, then_regs); VEC_free (int, heap, else_regs); - return TRUE; } @@ -2710,8 +2977,7 @@ cond_move_process_if_block (struct noce_if_info *if_info) Return TRUE if we were successful at converting the block. */ static int -noce_find_if_block (basic_block test_bb, - edge then_edge, edge else_edge, +noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge, int pass) { basic_block then_bb, else_bb, join_bb; @@ -2792,9 +3058,7 @@ noce_find_if_block (basic_block test_bb, return FALSE; /* If this is not a standard conditional jump, we can't parse it. */ - cond = noce_get_condition (jump, - &cond_earliest, - then_else_reversed); + cond = noce_get_condition (jump, &cond_earliest, then_else_reversed); if (!cond) return FALSE; @@ -2812,6 +3076,8 @@ noce_find_if_block (basic_block test_bb, if_info.cond_earliest = cond_earliest; if_info.jump = jump; if_info.then_else_reversed = then_else_reversed; + if_info.branch_cost = BRANCH_COST (optimize_bb_for_speed_p (test_bb), + predictable_edge_p (then_edge)); /* Do the real work. */ @@ -2984,7 +3250,7 @@ find_if_header (basic_block test_bb, int pass) /* Otherwise this must be a multiway branch of some sort. */ return NULL; - memset (&ce_info, '\0', sizeof (ce_info)); + memset (&ce_info, 0, sizeof (ce_info)); ce_info.test_bb = test_bb; ce_info.then_bb = then_edge->dest; ce_info.else_bb = else_edge->dest; @@ -2994,20 +3260,22 @@ find_if_header (basic_block test_bb, int pass) IFCVT_INIT_EXTRA_FIELDS (&ce_info); #endif - if (! reload_completed + if (!reload_completed && noce_find_if_block (test_bb, then_edge, else_edge, pass)) goto success; - if (HAVE_conditional_execution && reload_completed + if (reload_completed + && targetm.have_conditional_execution () && cond_exec_find_if_block (&ce_info)) goto success; - if (HAVE_trap && HAVE_conditional_trap + if (HAVE_trap + && optab_handler (ctrap_optab, word_mode) != CODE_FOR_nothing && find_cond_trap (test_bb, then_edge, else_edge)) goto success; if (dom_info_state (CDI_POST_DOMINATORS) >= DOM_NO_FAST_QUERY - && (! HAVE_conditional_execution || reload_completed)) + && (reload_completed || !targetm.have_conditional_execution ())) { if (find_if_case_1 (test_bb, then_edge, else_edge)) goto success; @@ -3081,6 +3349,7 @@ block_jumps_and_fallthru_p (basic_block cur_bb, basic_block target_bb) if (INSN_P (insn) && !JUMP_P (insn) + && !DEBUG_INSN_P (insn) && GET_CODE (PATTERN (insn)) != USE && GET_CODE (PATTERN (insn)) != CLOBBER) n_insns++; @@ -3112,8 +3381,8 @@ cond_exec_find_if_block (struct ce_if_block * ce_info) ce_info->last_test_bb = test_bb; /* We only ever should get here after reload, - and only if we have conditional execution. */ - gcc_assert (HAVE_conditional_execution && reload_completed); + and if we have conditional execution. */ + gcc_assert (reload_completed && targetm.have_conditional_execution ()); /* Discover if any fall through predecessors of the current test basic block were && tests (which jump to the else block) or || tests (which jump to @@ -3194,7 +3463,8 @@ cond_exec_find_if_block (struct ce_if_block * ce_info) if (EDGE_COUNT (then_bb->succs) > 0 && (!single_succ_p (then_bb) || (single_succ_edge (then_bb)->flags & EDGE_COMPLEX) - || (epilogue_completed && tablejump_p (BB_END (then_bb), NULL, NULL)))) + || (epilogue_completed + && tablejump_p (BB_END (then_bb), NULL, NULL)))) return FALSE; /* If the THEN block has no successors, conditional execution can still @@ -3240,8 +3510,9 @@ cond_exec_find_if_block (struct ce_if_block * ce_info) else if (single_succ_p (else_bb) && single_succ (then_bb) == single_succ (else_bb) && single_pred_p (else_bb) - && ! (single_succ_edge (else_bb)->flags & EDGE_COMPLEX) - && ! (epilogue_completed && tablejump_p (BB_END (else_bb), NULL, NULL))) + && !(single_succ_edge (else_bb)->flags & EDGE_COMPLEX) + && !(epilogue_completed + && tablejump_p (BB_END (else_bb), NULL, NULL))) join_bb = single_succ (else_bb); /* Otherwise it is not an IF-THEN or IF-THEN-ELSE combination. */ @@ -3534,7 +3805,8 @@ find_if_case_1 (basic_block test_bb, edge then_edge, edge else_edge) basic_block then_bb = then_edge->dest; basic_block else_bb = else_edge->dest; basic_block new_bb; - int then_bb_index; + int then_bb_index, then_prob; + rtx else_target = NULL_RTX; /* If we are partitioning hot/cold basic blocks, we don't want to mess up unconditional or indirect jumps that cross between hot @@ -3577,13 +3849,28 @@ find_if_case_1 (basic_block test_bb, edge then_edge, edge else_edge) "\nIF-CASE-1 found, start %d, then %d\n", test_bb->index, then_bb->index); - /* THEN is small. */ - if (! cheap_bb_rtx_cost_p (then_bb, COSTS_N_INSNS (BRANCH_COST))) + if (then_edge->probability) + then_prob = REG_BR_PROB_BASE - then_edge->probability; + else + then_prob = REG_BR_PROB_BASE / 2; + + /* We're speculating from the THEN path, we want to make sure the cost + of speculation is within reason. */ + if (! cheap_bb_rtx_cost_p (then_bb, then_prob, + COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (then_edge->src), + predictable_edge_p (then_edge))))) return FALSE; + if (else_bb == EXIT_BLOCK_PTR) + { + rtx jump = BB_END (else_edge->src); + gcc_assert (JUMP_P (jump)); + else_target = JUMP_LABEL (jump); + } + /* Registers set are dead, or are predicable. */ if (! dead_or_predicable (test_bb, then_bb, else_bb, - single_succ (then_bb), 1)) + single_succ_edge (then_bb), 1)) return FALSE; /* Conversion went ok, including moving the insns and fixing up the @@ -3600,6 +3887,9 @@ find_if_case_1 (basic_block test_bb, edge then_edge, edge else_edge) redirect_edge_succ (FALLTHRU_EDGE (test_bb), else_bb); new_bb = 0; } + else if (else_bb == EXIT_BLOCK_PTR) + new_bb = force_nonfallthru_and_redirect (FALLTHRU_EDGE (test_bb), + else_bb, else_target); else new_bb = redirect_edge_and_branch_force (FALLTHRU_EDGE (test_bb), else_bb); @@ -3635,7 +3925,7 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge) basic_block then_bb = then_edge->dest; basic_block else_bb = else_edge->dest; edge else_succ; - rtx note; + int then_prob, else_prob; /* If we are partitioning hot/cold basic blocks, we don't want to mess up unconditional or indirect jumps that cross between hot @@ -3674,9 +3964,19 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge) if (then_bb->index < NUM_FIXED_BLOCKS) return FALSE; + if (else_edge->probability) + { + else_prob = else_edge->probability; + then_prob = REG_BR_PROB_BASE - else_prob; + } + else + { + else_prob = REG_BR_PROB_BASE / 2; + then_prob = REG_BR_PROB_BASE / 2; + } + /* ELSE is predicted or SUCC(ELSE) postdominates THEN. */ - note = find_reg_note (BB_END (test_bb), REG_BR_PROB, NULL_RTX); - if (note && INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) + if (else_prob > then_prob) ; else if (else_succ->dest->index < NUM_FIXED_BLOCKS || dominated_by_p (CDI_POST_DOMINATORS, then_bb, @@ -3691,12 +3991,15 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge) "\nIF-CASE-2 found, start %d, else %d\n", test_bb->index, else_bb->index); - /* ELSE is small. */ - if (! cheap_bb_rtx_cost_p (else_bb, COSTS_N_INSNS (BRANCH_COST))) + /* We're speculating from the ELSE path, we want to make sure the cost + of speculation is within reason. */ + if (! cheap_bb_rtx_cost_p (else_bb, else_prob, + COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (else_edge->src), + predictable_edge_p (else_edge))))) return FALSE; /* Registers set are dead, or are predicable. */ - if (! dead_or_predicable (test_bb, else_bb, then_bb, else_succ->dest, 0)) + if (! dead_or_predicable (test_bb, else_bb, then_bb, else_succ, 0)) return FALSE; /* Conversion went ok, including moving the insns and fixing up the @@ -3715,28 +4018,25 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge) return TRUE; } -/* A subroutine of dead_or_predicable called through for_each_rtx. - Return 1 if a memory is found. */ - -static int -find_memory (rtx *px, void *data ATTRIBUTE_UNUSED) -{ - return MEM_P (*px); -} - /* Used by the code above to perform the actual rtl transformations. Return TRUE if successful. TEST_BB is the block containing the conditional branch. MERGE_BB - is the block containing the code to manipulate. NEW_DEST is the - label TEST_BB should be branching to after the conversion. + is the block containing the code to manipulate. DEST_EDGE is an + edge representing a jump to the join block; after the conversion, + TEST_BB should be branching to its destination. REVERSEP is true if the sense of the branch should be reversed. */ static int dead_or_predicable (basic_block test_bb, basic_block merge_bb, - basic_block other_bb, basic_block new_dest, int reversep) + basic_block other_bb, edge dest_edge, int reversep) { - rtx head, end, jump, earliest = NULL_RTX, old_dest, new_label = NULL_RTX; + basic_block new_dest = dest_edge->dest; + rtx head, end, jump, earliest = NULL_RTX, old_dest; + bitmap merge_set = NULL; + /* Number of pending changes. */ + int n_validated_changes = 0; + rtx new_dest_label = NULL_RTX; jump = BB_END (test_bb); @@ -3744,6 +4044,9 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, head = BB_HEAD (merge_bb); end = BB_END (merge_bb); + while (DEBUG_INSN_P (end) && end != head) + end = PREV_INSN (end); + /* If merge_bb ends with a tablejump, predicating/moving insn's into test_bb and then deleting merge_bb will result in the jumptable that follows merge_bb being removed along with merge_bb and then we @@ -3753,6 +4056,8 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, if (LABEL_P (head)) head = NEXT_INSN (head); + while (DEBUG_INSN_P (head) && head != end) + head = NEXT_INSN (head); if (NOTE_P (head)) { if (head == end) @@ -3761,6 +4066,8 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, goto no_body; } head = NEXT_INSN (head); + while (DEBUG_INSN_P (head) && head != end) + head = NEXT_INSN (head); } if (JUMP_P (end)) @@ -3771,12 +4078,14 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, goto no_body; } end = PREV_INSN (end); + while (DEBUG_INSN_P (end) && end != head) + end = PREV_INSN (end); } /* Disable handling dead code by conditional execution if the machine needs to do anything funny with the tests, etc. */ #ifndef IFCVT_MODIFY_TESTS - if (HAVE_conditional_execution) + if (targetm.have_conditional_execution ()) { /* In the conditional execution case, we have things easy. We know the condition is reversible. We don't have to check life info @@ -3805,136 +4114,116 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, prob_val = GEN_INT (REG_BR_PROB_BASE - INTVAL (prob_val)); } - if (! cond_exec_process_insns ((ce_if_block_t *)0, head, end, cond, - prob_val, 0)) - goto cancel; + if (cond_exec_process_insns (NULL, head, end, cond, prob_val, 0) + && verify_changes (0)) + n_validated_changes = num_validated_changes (); + else + cancel_changes (0); earliest = jump; } - else #endif + + /* If we allocated new pseudos (e.g. in the conditional move + expander called from noce_emit_cmove), we must resize the + array first. */ + if (max_regno < max_reg_num ()) + max_regno = max_reg_num (); + + /* Try the NCE path if the CE path did not result in any changes. */ + if (n_validated_changes == 0) { + rtx cond, insn; + regset live; + bool success; + /* In the non-conditional execution case, we have to verify that there are no trapping operations, no calls, no references to memory, and that any registers modified are dead at the branch site. */ - rtx insn, cond, prev; - bitmap merge_set, test_live, test_set; - unsigned i, fail = 0; - bitmap_iterator bi; - - /* Check for no calls or trapping operations. */ - for (insn = head; ; insn = NEXT_INSN (insn)) - { - if (CALL_P (insn)) - return FALSE; - if (INSN_P (insn)) - { - if (may_trap_p (PATTERN (insn))) - return FALSE; - - /* ??? Even non-trapping memories such as stack frame - references must be avoided. For stores, we collect - no lifetime info; for reads, we'd have to assert - true_dependence false against every store in the - TEST range. */ - if (for_each_rtx (&PATTERN (insn), find_memory, NULL)) - return FALSE; - } - if (insn == end) - break; - } - - if (! any_condjump_p (jump)) + if (!any_condjump_p (jump)) return FALSE; /* Find the extent of the conditional. */ cond = noce_get_condition (jump, &earliest, false); - if (! cond) + if (!cond) return FALSE; - /* Collect: - MERGE_SET = set of registers set in MERGE_BB - TEST_LIVE = set of registers live at EARLIEST - TEST_SET = set of registers set between EARLIEST and the - end of the block. */ + live = BITMAP_ALLOC (®_obstack); + simulate_backwards_to_point (merge_bb, live, end); + success = can_move_insns_across (head, end, earliest, jump, + merge_bb, live, + df_get_live_in (other_bb), NULL); + BITMAP_FREE (live); + if (!success) + return FALSE; + /* Collect the set of registers set in MERGE_BB. */ merge_set = BITMAP_ALLOC (®_obstack); - test_live = BITMAP_ALLOC (®_obstack); - test_set = BITMAP_ALLOC (®_obstack); - - /* ??? bb->local_set is only valid during calculate_global_regs_live, - so we must recompute usage for MERGE_BB. Not so bad, I suppose, - since we've already asserted that MERGE_BB is small. */ - /* If we allocated new pseudos (e.g. in the conditional move - expander called from noce_emit_cmove), we must resize the - array first. */ - if (max_regno < max_reg_num ()) - max_regno = max_reg_num (); FOR_BB_INSNS (merge_bb, insn) + if (NONDEBUG_INSN_P (insn)) + df_simulate_find_defs (insn, merge_set); + +#ifdef HAVE_simple_return + /* If shrink-wrapping, disable this optimization when test_bb is + the first basic block and merge_bb exits. The idea is to not + move code setting up a return register as that may clobber a + register used to pass function parameters, which then must be + saved in caller-saved regs. A caller-saved reg requires the + prologue, killing a shrink-wrap opportunity. */ + if ((flag_shrink_wrap && HAVE_simple_return && !epilogue_completed) + && ENTRY_BLOCK_PTR->next_bb == test_bb + && single_succ_p (new_dest) + && single_succ (new_dest) == EXIT_BLOCK_PTR + && bitmap_intersect_p (df_get_live_in (new_dest), merge_set)) { - if (INSN_P (insn)) + regset return_regs; + unsigned int i; + + return_regs = BITMAP_ALLOC (®_obstack); + + /* Start off with the intersection of regs used to pass + params and regs used to return values. */ + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (FUNCTION_ARG_REGNO_P (i) + && targetm.calls.function_value_regno_p (i)) + bitmap_set_bit (return_regs, INCOMING_REGNO (i)); + + bitmap_and_into (return_regs, df_get_live_out (ENTRY_BLOCK_PTR)); + bitmap_and_into (return_regs, df_get_live_in (EXIT_BLOCK_PTR)); + if (!bitmap_empty_p (return_regs)) { - unsigned int uid = INSN_UID (insn); - struct df_ref **def_rec; - for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++) + FOR_BB_INSNS_REVERSE (new_dest, insn) + if (NONDEBUG_INSN_P (insn)) + { + df_ref *def_rec; + unsigned int uid = INSN_UID (insn); + + /* If this insn sets any reg in return_regs.. */ + for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++) + { + df_ref def = *def_rec; + unsigned r = DF_REF_REGNO (def); + + if (bitmap_bit_p (return_regs, r)) + break; + } + /* ..then add all reg uses to the set of regs + we're interested in. */ + if (*def_rec) + df_simulate_uses (insn, return_regs); + } + if (bitmap_intersect_p (merge_set, return_regs)) { - struct df_ref *def = *def_rec; - bitmap_set_bit (merge_set, DF_REF_REGNO (def)); + BITMAP_FREE (return_regs); + BITMAP_FREE (merge_set); + return FALSE; } } + BITMAP_FREE (return_regs); } - - /* For small register class machines, don't lengthen lifetimes of - hard registers before reload. */ - if (SMALL_REGISTER_CLASSES && ! reload_completed) - { - EXECUTE_IF_SET_IN_BITMAP (merge_set, 0, i, bi) - { - if (i < FIRST_PSEUDO_REGISTER - && ! fixed_regs[i] - && ! global_regs[i]) - fail = 1; - } - } - - /* For TEST, we're interested in a range of insns, not a whole block. - Moreover, we're interested in the insns live from OTHER_BB. */ - - /* The loop below takes the set of live registers - after JUMP, and calculates the live set before EARLIEST. */ - bitmap_copy (test_live, df_get_live_in (other_bb)); - df_simulate_artificial_refs_at_end (test_bb, test_live); - for (insn = jump; ; insn = prev) - { - if (INSN_P (insn)) - { - df_simulate_find_defs (insn, test_set); - df_simulate_one_insn_backwards (test_bb, insn, test_live); - } - prev = PREV_INSN (insn); - if (insn == earliest) - break; - } - - /* We can perform the transformation if - MERGE_SET & (TEST_SET | TEST_LIVE) - and - TEST_SET & DF_LIVE_IN (merge_bb) - are empty. */ - - if (bitmap_intersect_p (test_set, merge_set) - || bitmap_intersect_p (test_live, merge_set) - || bitmap_intersect_p (test_set, df_get_live_in (merge_bb))) - fail = 1; - - BITMAP_FREE (merge_set); - BITMAP_FREE (test_live); - BITMAP_FREE (test_set); - - if (fail) - return FALSE; +#endif } no_body: @@ -3945,19 +4234,27 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, old_dest = JUMP_LABEL (jump); if (other_bb != new_dest) { - new_label = block_label (new_dest); + if (JUMP_P (BB_END (dest_edge->src))) + new_dest_label = JUMP_LABEL (BB_END (dest_edge->src)); + else if (new_dest == EXIT_BLOCK_PTR) + new_dest_label = ret_rtx; + else + new_dest_label = block_label (new_dest); + if (reversep - ? ! invert_jump_1 (jump, new_label) - : ! redirect_jump_1 (jump, new_label)) + ? ! invert_jump_1 (jump, new_dest_label) + : ! redirect_jump_1 (jump, new_dest_label)) goto cancel; } - if (! apply_change_group ()) - return FALSE; + if (verify_changes (n_validated_changes)) + confirm_change_group (); + else + goto cancel; if (other_bb != new_dest) { - redirect_jump_2 (jump, old_dest, new_label, 0, reversep); + redirect_jump_2 (jump, old_dest, new_dest_label, 0, reversep); redirect_edge_succ (BRANCH_EDGE (test_bb), new_dest); if (reversep) @@ -3982,8 +4279,8 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, if (end == BB_END (merge_bb)) BB_END (merge_bb) = PREV_INSN (head); - /* PR 21767: When moving insns above a conditional branch, REG_EQUAL - notes might become invalid. */ + /* PR 21767: when moving insns above a conditional branch, the REG_EQUAL + notes being moved might become invalid. */ insn = head; do { @@ -3995,10 +4292,24 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, if (! note) continue; set = single_set (insn); - if (!set || !function_invariant_p (SET_SRC (set))) + if (!set || !function_invariant_p (SET_SRC (set)) + || !function_invariant_p (XEXP (note, 0))) remove_note (insn, note); } while (insn != end && (insn = NEXT_INSN (insn))); + /* PR46315: when moving insns above a conditional branch, the REG_EQUAL + notes referring to the registers being set might become invalid. */ + if (merge_set) + { + unsigned i; + bitmap_iterator bi; + + EXECUTE_IF_SET_IN_BITMAP (merge_set, 0, i, bi) + remove_reg_equal_equiv_notes_for_regno (i); + + BITMAP_FREE (merge_set); + } + reorder_insns (head, end, PREV_INSN (earliest)); } @@ -4015,6 +4326,10 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb, cancel: cancel_changes (0); + + if (merge_set) + BITMAP_FREE (merge_set); + return FALSE; } @@ -4066,14 +4381,19 @@ if_convert (void) FOR_EACH_BB (bb) { basic_block new_bb; - while (!df_get_bb_dirty (bb) + while (!df_get_bb_dirty (bb) && (new_bb = find_if_header (bb, pass)) != NULL) bb = new_bb; } #ifdef IFCVT_MULTIPLE_DUMPS if (dump_file && cond_exec_changed_p) - print_rtl_with_bb (dump_file, get_insns ()); + { + if (dump_flags & TDF_SLIM) + print_rtl_slim_with_bb (dump_file, get_insns (), dump_flags); + else + print_rtl_with_bb (dump_file, get_insns ()); + } #endif } while (cond_exec_changed_p); @@ -4139,8 +4459,10 @@ rest_of_handle_if_conversion (void) return 0; } -struct tree_opt_pass pass_rtl_ifcvt = +struct rtl_opt_pass pass_rtl_ifcvt = { + { + RTL_PASS, "ce1", /* name */ gate_handle_if_conversion, /* gate */ rest_of_handle_if_conversion, /* execute */ @@ -4153,8 +4475,8 @@ struct tree_opt_pass pass_rtl_ifcvt = 0, /* properties_destroyed */ 0, /* todo_flags_start */ TODO_df_finish | TODO_verify_rtl_sharing | - TODO_dump_func, /* todo_flags_finish */ - 'C' /* letter */ + 0 /* todo_flags_finish */ + } }; static bool @@ -4174,8 +4496,10 @@ rest_of_handle_if_after_combine (void) return 0; } -struct tree_opt_pass pass_if_after_combine = +struct rtl_opt_pass pass_if_after_combine = { + { + RTL_PASS, "ce2", /* name */ gate_handle_if_after_combine, /* gate */ rest_of_handle_if_after_combine, /* execute */ @@ -4188,9 +4512,8 @@ struct tree_opt_pass pass_if_after_combine = 0, /* properties_destroyed */ 0, /* todo_flags_start */ TODO_df_finish | TODO_verify_rtl_sharing | - TODO_dump_func | - TODO_ggc_collect, /* todo_flags_finish */ - 'C' /* letter */ + TODO_ggc_collect /* todo_flags_finish */ + } }; @@ -4209,8 +4532,10 @@ rest_of_handle_if_after_reload (void) } -struct tree_opt_pass pass_if_after_reload = +struct rtl_opt_pass pass_if_after_reload = { + { + RTL_PASS, "ce3", /* name */ gate_handle_if_after_reload, /* gate */ rest_of_handle_if_after_reload, /* execute */ @@ -4223,7 +4548,6 @@ struct tree_opt_pass pass_if_after_reload = 0, /* properties_destroyed */ 0, /* todo_flags_start */ TODO_df_finish | TODO_verify_rtl_sharing | - TODO_dump_func | - TODO_ggc_collect, /* todo_flags_finish */ - 'E' /* letter */ + TODO_ggc_collect /* todo_flags_finish */ + } };