PR c++/54325

[pf3gnuchains/gcc-fork.git] / gcc / ifcvt.c
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c

index cf3d245..49b489e 100644 (file)
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -44,6 +44,7 @@
  #include "tree-pass.h"
  #include "df.h"
  #include "vec.h"
+#include "pointer-set.h"
  #include "vecprim.h"
  #include "dbgcnt.h"
  
@@ -85,7 +86,7 @@ static int cond_exec_changed_p;
  
  /* Forward references.  */
  static int count_bb_insns (const_basic_block);
-static bool cheap_bb_rtx_cost_p (const_basic_block, int);
+static bool cheap_bb_rtx_cost_p (const_basic_block, int, int);
  static rtx first_active_insn (basic_block);
  static rtx last_active_insn (basic_block, int);
  static rtx find_active_insn_before (basic_block, rtx);
@@ -131,20 +132,31 @@ count_bb_insns (const_basic_block bb)
  
  /* Determine whether the total insn_rtx_cost on non-jump insns in
     basic block BB is less than MAX_COST.  This function returns
-   false if the cost of any instruction could not be estimated.  */
+   false if the cost of any instruction could not be estimated. 
+
+   The cost of the non-jump insns in BB is scaled by REG_BR_PROB_BASE
+   as those insns are being speculated.  MAX_COST is scaled with SCALE
+   plus a small fudge factor.  */
  
  static bool
-cheap_bb_rtx_cost_p (const_basic_block bb, int max_cost)
+cheap_bb_rtx_cost_p (const_basic_block bb, int scale, int max_cost)
  {
    int count = 0;
    rtx insn = BB_HEAD (bb);
    bool speed = optimize_bb_for_speed_p (bb);
  
+  /* Our branch probability/scaling factors are just estimates and don't
+     account for cases where we can get speculation for free and other
+     secondary benefits.  So we fudge the scale factor to make speculating
+     appear a little more profitable.  */
+  scale += REG_BR_PROB_BASE / 8;
+  max_cost *= scale;
+
    while (1)
      {
        if (NONJUMP_INSN_P (insn))
         {
-         int cost = insn_rtx_cost (PATTERN (insn), speed);
+         int cost = insn_rtx_cost (PATTERN (insn), speed) * REG_BR_PROB_BASE;
           if (cost == 0)
             return false;
  
@@ -1519,9 +1531,9 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
      }
  
    /* ??? We could handle this if we knew that a load from A or B could
-     not fault.  This is also true if we've already loaded
+     not trap or fault.  This is also true if we've already loaded
       from the address along the path from ENTRY.  */
-  else if (may_trap_p (a) || may_trap_p (b))
+  else if (may_trap_or_fault_p (a) || may_trap_or_fault_p (b))
      return FALSE;
  
    /* if (test) x = a + b; else x = c - d;
@@ -1656,10 +1668,6 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
        /* Copy over flags as appropriate.  */
        if (MEM_VOLATILE_P (if_info->a) || MEM_VOLATILE_P (if_info->b))
         MEM_VOLATILE_P (tmp) = 1;
-      if (MEM_IN_STRUCT_P (if_info->a) && MEM_IN_STRUCT_P (if_info->b))
-       MEM_IN_STRUCT_P (tmp) = 1;
-      if (MEM_SCALAR_P (if_info->a) && MEM_SCALAR_P (if_info->b))
-       MEM_SCALAR_P (tmp) = 1;
        if (MEM_ALIAS_SET (if_info->a) == MEM_ALIAS_SET (if_info->b))
         set_mem_alias_set (tmp, MEM_ALIAS_SET (if_info->a));
        set_mem_align (tmp,
@@ -2288,7 +2296,9 @@ noce_get_condition (rtx jump, rtx *earliest, bool then_else_reversed)
  
    cond = XEXP (SET_SRC (set), 0);
    tmp = XEXP (cond, 0);
-  if (REG_P (tmp) && GET_MODE_CLASS (GET_MODE (tmp)) == MODE_INT)
+  if (REG_P (tmp) && GET_MODE_CLASS (GET_MODE (tmp)) == MODE_INT
+      && (GET_MODE (tmp) != BImode
+          || !targetm.small_register_classes_for_mode_p (BImode)))
      {
        *earliest = jump;
  
@@ -2316,14 +2326,14 @@ noce_get_condition (rtx jump, rtx *earliest, bool then_else_reversed)
  static int
  noce_operand_ok (const_rtx op)
  {
+  if (side_effects_p (op))
+    return FALSE;
+
    /* We special-case memories, so handle any of them with
       no address side effects.  */
    if (MEM_P (op))
      return ! side_effects_p (XEXP (op, 0));
  
-  if (side_effects_p (op))
-    return FALSE;
-
    return ! may_trap_p (op);
  }
  
@@ -2680,12 +2690,14 @@ noce_process_if_block (struct noce_if_info *if_info)
  
  /* Check whether a block is suitable for conditional move conversion.
     Every insn must be a simple set of a register to a constant or a
-   register.  For each assignment, store the value in the array VALS,
-   indexed by register number, then store the register number in
-   REGS.  COND is the condition we will test.  */
+   register.  For each assignment, store the value in the pointer map
+   VALS, keyed indexed by register pointer, then store the register
+   pointer in REGS.  COND is the condition we will test.  */
  
  static int
-check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) **regs,
+check_cond_move_block (basic_block bb,
+                      struct pointer_map_t *vals,
+                      VEC (rtx, heap) **regs,
                        rtx cond)
  {
    rtx insn;
@@ -2699,6 +2711,7 @@ check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) **regs,
    FOR_BB_INSNS (bb, insn)
      {
        rtx set, dest, src;
+      void **slot;
  
        if (!NONDEBUG_INSN_P (insn) || JUMP_P (insn))
         continue;
@@ -2725,14 +2738,14 @@ check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) **regs,
        /* Don't try to handle this if the source register was
          modified earlier in the block.  */
        if ((REG_P (src)
-          && vals[REGNO (src)] != NULL)
+          && pointer_map_contains (vals, src))
           || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
-             && vals[REGNO (SUBREG_REG (src))] != NULL))
+             && pointer_map_contains (vals, SUBREG_REG (src))))
         return FALSE;
  
        /* Don't try to handle this if the destination register was
          modified earlier in the block.  */
-      if (vals[REGNO (dest)] != NULL)
+      if (pointer_map_contains (vals, dest))
         return FALSE;
  
        /* Don't try to handle this if the condition uses the
@@ -2746,17 +2759,18 @@ check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) **regs,
           && modified_between_p (src, insn, NEXT_INSN (BB_END (bb))))
         return FALSE;
  
-      vals[REGNO (dest)] = src;
+      slot = pointer_map_insert (vals, (void *) dest);
+      *slot = (void *) src;
  
-      VEC_safe_push (int, heap, *regs, REGNO (dest));
+      VEC_safe_push (rtx, heap, *regs, dest);
      }
  
    return TRUE;
  }
  
  /* Given a basic block BB suitable for conditional move conversion,
-   a condition COND, and arrays THEN_VALS and ELSE_VALS containing the
-   register values depending on COND, emit the insns in the block as
+   a condition COND, and pointer maps THEN_VALS and ELSE_VALS containing
+   the register values depending on COND, emit the insns in the block as
     conditional moves.  If ELSE_BLOCK is true, THEN_BB was already
     processed.  The caller has started a sequence for the conversion.
     Return true if successful, false if something goes wrong.  */
@@ -2764,7 +2778,8 @@ check_cond_move_block (basic_block bb, rtx *vals, VEC (int, heap) **regs,
  static bool
  cond_move_convert_if_block (struct noce_if_info *if_infop,
                             basic_block bb, rtx cond,
-                           rtx *then_vals, rtx *else_vals,
+                           struct pointer_map_t *then_vals,
+                           struct pointer_map_t *else_vals,
                             bool else_block_p)
  {
    enum rtx_code code;
@@ -2777,7 +2792,7 @@ cond_move_convert_if_block (struct noce_if_info *if_infop,
    FOR_BB_INSNS (bb, insn)
      {
        rtx set, target, dest, t, e;
-      unsigned int regno;
+      void **then_slot, **else_slot;
  
        /* ??? Maybe emit conditional debug insn?  */
        if (!NONDEBUG_INSN_P (insn) || JUMP_P (insn))
@@ -2786,10 +2801,11 @@ cond_move_convert_if_block (struct noce_if_info *if_infop,
        gcc_assert (set && REG_P (SET_DEST (set)));
  
        dest = SET_DEST (set);
-      regno = REGNO (dest);
  
-      t = then_vals[regno];
-      e = else_vals[regno];
+      then_slot = pointer_map_contains (then_vals, dest);
+      else_slot = pointer_map_contains (else_vals, dest);
+      t = then_slot ? (rtx) *then_slot : NULL_RTX;
+      e = else_slot ? (rtx) *else_slot : NULL_RTX;
  
        if (else_block_p)
         {
@@ -2833,31 +2849,25 @@ cond_move_process_if_block (struct noce_if_info *if_info)
    rtx jump = if_info->jump;
    rtx cond = if_info->cond;
    rtx seq, loc_insn;
-  int max_reg, size, c, reg;
-  rtx *then_vals;
-  rtx *else_vals;
-  VEC (int, heap) *then_regs = NULL;
-  VEC (int, heap) *else_regs = NULL;
+  rtx reg;
+  int c;
+  struct pointer_map_t *then_vals;
+  struct pointer_map_t *else_vals;
+  VEC (rtx, heap) *then_regs = NULL;
+  VEC (rtx, heap) *else_regs = NULL;
    unsigned int i;
+  int success_p = FALSE;
  
    /* Build a mapping for each block to the value used for each
       register.  */
-  max_reg = max_reg_num ();
-  size = (max_reg + 1) * sizeof (rtx);
-  then_vals = (rtx *) alloca (size);
-  else_vals = (rtx *) alloca (size);
-  memset (then_vals, 0, size);
-  memset (else_vals, 0, size);
+  then_vals = pointer_map_create ();
+  else_vals = pointer_map_create ();
  
    /* Make sure the blocks are suitable.  */
    if (!check_cond_move_block (then_bb, then_vals, &then_regs, cond)
        || (else_bb
           && !check_cond_move_block (else_bb, else_vals, &else_regs, cond)))
-    {
-      VEC_free (int, heap, then_regs);
-      VEC_free (int, heap, else_regs);
-      return FALSE;
-    }
+    goto done;
  
    /* Make sure the blocks can be used together.  If the same register
       is set in both blocks, and is not set to a constant in both
@@ -2866,41 +2876,38 @@ cond_move_process_if_block (struct noce_if_info *if_info)
       source register does not change after the assignment.  Also count
       the number of registers set in only one of the blocks.  */
    c = 0;
-  FOR_EACH_VEC_ELT (int, then_regs, i, reg)
+  FOR_EACH_VEC_ELT (rtx, then_regs, i, reg)
      {
-      if (!then_vals[reg] && !else_vals[reg])
-       continue;
+      void **then_slot = pointer_map_contains (then_vals, reg);
+      void **else_slot = pointer_map_contains (else_vals, reg);
  
-      if (!else_vals[reg])
+      gcc_checking_assert (then_slot);
+      if (!else_slot)
         ++c;
        else
         {
-         if (!CONSTANT_P (then_vals[reg])
-             && !CONSTANT_P (else_vals[reg])
-             && !rtx_equal_p (then_vals[reg], else_vals[reg]))
-           {
-             VEC_free (int, heap, then_regs);
-             VEC_free (int, heap, else_regs);
-             return FALSE;
-           }
+         rtx then_val = (rtx) *then_slot;
+         rtx else_val = (rtx) *else_slot;
+         if (!CONSTANT_P (then_val) && !CONSTANT_P (else_val)
+             && !rtx_equal_p (then_val, else_val))
+           goto done;
         }
      }
  
    /* Finish off c for MAX_CONDITIONAL_EXECUTE.  */
-  FOR_EACH_VEC_ELT (int, else_regs, i, reg)
-    if (!then_vals[reg])
-      ++c;
+  FOR_EACH_VEC_ELT (rtx, else_regs, i, reg)
+    {
+      gcc_checking_assert (pointer_map_contains (else_vals, reg));
+      if (!pointer_map_contains (then_vals, reg))
+       ++c;
+    }
  
    /* Make sure it is reasonable to convert this block.  What matters
       is the number of assignments currently made in only one of the
       branches, since if we convert we are going to always execute
       them.  */
    if (c > MAX_CONDITIONAL_EXECUTE)
-    {
-      VEC_free (int, heap, then_regs);
-      VEC_free (int, heap, else_regs);
-      return FALSE;
-    }
+    goto done;
  
    /* Try to emit the conditional moves.  First do the then block,
       then do anything left in the else blocks.  */
@@ -2912,17 +2919,11 @@ cond_move_process_if_block (struct noce_if_info *if_info)
                                           then_vals, else_vals, true)))
      {
        end_sequence ();
-      VEC_free (int, heap, then_regs);
-      VEC_free (int, heap, else_regs);
-      return FALSE;
+      goto done;
      }
    seq = end_ifcvt_sequence (if_info);
    if (!seq)
-    {
-      VEC_free (int, heap, then_regs);
-      VEC_free (int, heap, else_regs);
-      return FALSE;
-    }
+    goto done;
  
    loc_insn = first_active_insn (then_bb);
    if (!loc_insn)
@@ -2953,9 +2954,14 @@ cond_move_process_if_block (struct noce_if_info *if_info)
  
    num_updated_if_blocks++;
  
-  VEC_free (int, heap, then_regs);
-  VEC_free (int, heap, else_regs);
-  return TRUE;
+  success_p = TRUE;
+
+done:
+  pointer_map_destroy (then_vals);
+  pointer_map_destroy (else_vals);
+  VEC_free (rtx, heap, then_regs);
+  VEC_free (rtx, heap, else_regs);
+  return success_p;
  }
  
  \f
@@ -3796,7 +3802,8 @@ find_if_case_1 (basic_block test_bb, edge then_edge, edge else_edge)
    basic_block then_bb = then_edge->dest;
    basic_block else_bb = else_edge->dest;
    basic_block new_bb;
-  int then_bb_index;
+  int then_bb_index, then_prob;
+  rtx else_target = NULL_RTX;
  
    /* If we are partitioning hot/cold basic blocks, we don't want to
       mess up unconditional or indirect jumps that cross between hot
@@ -3839,12 +3846,25 @@ find_if_case_1 (basic_block test_bb, edge then_edge, edge else_edge)
              "\nIF-CASE-1 found, start %d, then %d\n",
              test_bb->index, then_bb->index);
  
-  /* THEN is small.  */
-  if (! cheap_bb_rtx_cost_p (then_bb,
+  if (then_edge->probability)
+    then_prob = REG_BR_PROB_BASE - then_edge->probability;
+  else
+    then_prob = REG_BR_PROB_BASE / 2;
+
+  /* We're speculating from the THEN path, we want to make sure the cost
+     of speculation is within reason.  */
+  if (! cheap_bb_rtx_cost_p (then_bb, then_prob,
         COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (then_edge->src),
                                     predictable_edge_p (then_edge)))))
      return FALSE;
  
+  if (else_bb == EXIT_BLOCK_PTR)
+    {
+      rtx jump = BB_END (else_edge->src);
+      gcc_assert (JUMP_P (jump));
+      else_target = JUMP_LABEL (jump);
+    }
+
    /* Registers set are dead, or are predicable.  */
    if (! dead_or_predicable (test_bb, then_bb, else_bb,
                             single_succ_edge (then_bb), 1))
@@ -3864,6 +3884,9 @@ find_if_case_1 (basic_block test_bb, edge then_edge, edge else_edge)
        redirect_edge_succ (FALLTHRU_EDGE (test_bb), else_bb);
        new_bb = 0;
      }
+  else if (else_bb == EXIT_BLOCK_PTR)
+    new_bb = force_nonfallthru_and_redirect (FALLTHRU_EDGE (test_bb),
+                                            else_bb, else_target);
    else
      new_bb = redirect_edge_and_branch_force (FALLTHRU_EDGE (test_bb),
                                              else_bb);
@@ -3899,7 +3922,7 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge)
    basic_block then_bb = then_edge->dest;
    basic_block else_bb = else_edge->dest;
    edge else_succ;
-  rtx note;
+  int then_prob, else_prob;
  
    /* If we are partitioning hot/cold basic blocks, we don't want to
       mess up unconditional or indirect jumps that cross between hot
@@ -3938,9 +3961,19 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge)
    if (then_bb->index < NUM_FIXED_BLOCKS)
      return FALSE;
  
+  if (else_edge->probability)
+    {
+      else_prob = else_edge->probability;
+      then_prob = REG_BR_PROB_BASE - else_prob;
+    }
+  else
+    {
+      else_prob = REG_BR_PROB_BASE / 2;
+      then_prob = REG_BR_PROB_BASE / 2;
+    }
+
    /* ELSE is predicted or SUCC(ELSE) postdominates THEN.  */
-  note = find_reg_note (BB_END (test_bb), REG_BR_PROB, NULL_RTX);
-  if (note && INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2)
+  if (else_prob > then_prob)
      ;
    else if (else_succ->dest->index < NUM_FIXED_BLOCKS
            || dominated_by_p (CDI_POST_DOMINATORS, then_bb,
@@ -3955,8 +3988,9 @@ find_if_case_2 (basic_block test_bb, edge then_edge, edge else_edge)
              "\nIF-CASE-2 found, start %d, else %d\n",
              test_bb->index, else_bb->index);
  
-  /* ELSE is small.  */
-  if (! cheap_bb_rtx_cost_p (else_bb,
+  /* We're speculating from the ELSE path, we want to make sure the cost
+     of speculation is within reason.  */
+  if (! cheap_bb_rtx_cost_p (else_bb, else_prob,
         COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (else_edge->src),
                                     predictable_edge_p (else_edge)))))
      return FALSE;
@@ -4127,6 +4161,66 @@ dead_or_predicable (basic_block test_bb, basic_block merge_bb,
        FOR_BB_INSNS (merge_bb, insn)
         if (NONDEBUG_INSN_P (insn))
           df_simulate_find_defs (insn, merge_set);
+
+#ifdef HAVE_simple_return
+      /* If shrink-wrapping, disable this optimization when test_bb is
+        the first basic block and merge_bb exits.  The idea is to not
+        move code setting up a return register as that may clobber a
+        register used to pass function parameters, which then must be
+        saved in caller-saved regs.  A caller-saved reg requires the
+        prologue, killing a shrink-wrap opportunity.  */
+      if ((flag_shrink_wrap && HAVE_simple_return && !epilogue_completed)
+         && ENTRY_BLOCK_PTR->next_bb == test_bb
+         && single_succ_p (new_dest)
+         && single_succ (new_dest) == EXIT_BLOCK_PTR
+         && bitmap_intersect_p (df_get_live_in (new_dest), merge_set))
+       {
+         regset return_regs;
+         unsigned int i;
+
+         return_regs = BITMAP_ALLOC (&reg_obstack);
+
+         /* Start off with the intersection of regs used to pass
+            params and regs used to return values.  */
+         for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+           if (FUNCTION_ARG_REGNO_P (i)
+               && targetm.calls.function_value_regno_p (i))
+             bitmap_set_bit (return_regs, INCOMING_REGNO (i));
+
+         bitmap_and_into (return_regs, df_get_live_out (ENTRY_BLOCK_PTR));
+         bitmap_and_into (return_regs, df_get_live_in (EXIT_BLOCK_PTR));
+         if (!bitmap_empty_p (return_regs))
+           {
+             FOR_BB_INSNS_REVERSE (new_dest, insn)
+               if (NONDEBUG_INSN_P (insn))
+                 {
+                   df_ref *def_rec;
+                   unsigned int uid = INSN_UID (insn);
+
+                   /* If this insn sets any reg in return_regs..  */
+                   for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
+                     {
+                       df_ref def = *def_rec;
+                       unsigned r = DF_REF_REGNO (def);
+
+                       if (bitmap_bit_p (return_regs, r))
+                         break;
+                     }
+                   /* ..then add all reg uses to the set of regs
+                      we're interested in.  */
+                   if (*def_rec)
+                     df_simulate_uses (insn, return_regs);
+                 }
+             if (bitmap_intersect_p (merge_set, return_regs))
+               {
+                 BITMAP_FREE (return_regs);
+                 BITMAP_FREE (merge_set);
+                 return FALSE;
+               }
+           }
+         BITMAP_FREE (return_regs);
+       }
+#endif
      }
  
   no_body: