compiler: Use backend interface for type sizes and alignments.

[pf3gnuchains/gcc-fork.git] / gcc / function.c
diff --git a/gcc/function.c b/gcc/function.c

index 5c31955..cd82da4 100644 (file)
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -1,7 +1,7 @@
  /* Expands front end tree to back end RTL for GCC.
     Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
     1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
-   2010  Free Software Foundation, Inc.
+   2010, 2011  Free Software Foundation, Inc.
  
  This file is part of GCC.
  
@@ -37,7 +37,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "system.h"
  #include "coretypes.h"
  #include "tm.h"
-#include "rtl.h"
+#include "rtl-error.h"
  #include "tree.h"
  #include "flags.h"
  #include "except.h"
@@ -51,13 +51,13 @@ along with GCC; see the file COPYING3.  If not see
  #include "recog.h"
  #include "output.h"
  #include "basic-block.h"
-#include "toplev.h"
  #include "hashtab.h"
  #include "ggc.h"
  #include "tm_p.h"
  #include "integrate.h"
  #include "langhooks.h"
  #include "target.h"
+#include "common/common-target.h"
  #include "cfglayout.h"
  #include "gimple.h"
  #include "tree-pass.h"
@@ -65,6 +65,8 @@ along with GCC; see the file COPYING3.  If not see
  #include "df.h"
  #include "timevar.h"
  #include "vecprim.h"
+#include "params.h"
+#include "bb-reorder.h"
  
  /* So we can assign to cfun in this file.  */
  #undef cfun
@@ -147,9 +149,6 @@ extern tree debug_find_var_in_block_tree (tree, tree);
     can always export `prologue_epilogue_contains'.  */
  static void record_insns (rtx, rtx, htab_t *) ATTRIBUTE_UNUSED;
  static bool contains (const_rtx, htab_t);
-#ifdef HAVE_return
-static void emit_return_into_block (basic_block);
-#endif
  static void prepare_function_start (void);
  static void do_clobber_return_reg (rtx, void *);
  static void do_use_return_reg (rtx, void *);
@@ -212,8 +211,7 @@ free_after_compilation (struct function *f)
    prologue_insn_hash = NULL;
    epilogue_insn_hash = NULL;
  
-  if (crtl->emit.regno_pointer_align)
-    free (crtl->emit.regno_pointer_align);
+  free (crtl->emit.regno_pointer_align);
  
    memset (crtl, 0, sizeof (struct rtl_data));
    f->eh = NULL;
@@ -356,14 +354,17 @@ add_frame_space (HOST_WIDE_INT start, HOST_WIDE_INT end)
     -2 means use BITS_PER_UNIT,
     positive specifies alignment boundary in bits.
  
-   If REDUCE_ALIGNMENT_OK is true, it is OK to reduce alignment.
+   KIND has ASLK_REDUCE_ALIGN bit set if it is OK to reduce
+   alignment and ASLK_RECORD_PAD bit set if we should remember
+   extra space we allocated for alignment purposes.  When we are
+   called from assign_stack_temp_for_type, it is not set so we don't
+   track the same stack slot in two independent lists.
  
     We do not round to stack_boundary here.  */
  
  rtx
  assign_stack_local_1 (enum machine_mode mode, HOST_WIDE_INT size,
-                     int align,
-                     bool reduce_alignment_ok ATTRIBUTE_UNUSED)
+                     int align, int kind)
  {
    rtx x, addr;
    int bigend_correction = 0;
@@ -413,7 +414,7 @@ assign_stack_local_1 (enum machine_mode mode, HOST_WIDE_INT size,
                   /* It is OK to reduce the alignment as long as the
                      requested size is 0 or the estimated stack
                      alignment >= mode alignment.  */
-                 gcc_assert (reduce_alignment_ok
+                 gcc_assert ((kind & ASLK_REDUCE_ALIGN)
                               || size == 0
                               || (crtl->stack_alignment_estimated
                                   >= GET_MODE_ALIGNMENT (mode)));
@@ -431,21 +432,24 @@ assign_stack_local_1 (enum machine_mode mode, HOST_WIDE_INT size,
  
    if (mode != BLKmode || size != 0)
      {
-      struct frame_space **psp;
-
-      for (psp = &crtl->frame_space_list; *psp; psp = &(*psp)->next)
+      if (kind & ASLK_RECORD_PAD)
         {
-         struct frame_space *space = *psp;
-         if (!try_fit_stack_local (space->start, space->length, size,
-                                   alignment, &slot_offset))
-           continue;
-         *psp = space->next;
-         if (slot_offset > space->start)
-           add_frame_space (space->start, slot_offset);
-         if (slot_offset + size < space->start + space->length)
-           add_frame_space (slot_offset + size,
-                            space->start + space->length);
-         goto found_space;
+         struct frame_space **psp;
+
+         for (psp = &crtl->frame_space_list; *psp; psp = &(*psp)->next)
+           {
+             struct frame_space *space = *psp;
+             if (!try_fit_stack_local (space->start, space->length, size,
+                                       alignment, &slot_offset))
+               continue;
+             *psp = space->next;
+             if (slot_offset > space->start)
+               add_frame_space (space->start, slot_offset);
+             if (slot_offset + size < space->start + space->length)
+               add_frame_space (slot_offset + size,
+                                space->start + space->length);
+             goto found_space;
+           }
         }
      }
    else if (!STACK_ALIGNMENT_NEEDED)
@@ -461,20 +465,26 @@ assign_stack_local_1 (enum machine_mode mode, HOST_WIDE_INT size,
        frame_offset -= size;
        try_fit_stack_local (frame_offset, size, size, alignment, &slot_offset);
  
-      if (slot_offset > frame_offset)
-       add_frame_space (frame_offset, slot_offset);
-      if (slot_offset + size < old_frame_offset)
-       add_frame_space (slot_offset + size, old_frame_offset);
+      if (kind & ASLK_RECORD_PAD)
+       {
+         if (slot_offset > frame_offset)
+           add_frame_space (frame_offset, slot_offset);
+         if (slot_offset + size < old_frame_offset)
+           add_frame_space (slot_offset + size, old_frame_offset);
+       }
      }
    else
      {
        frame_offset += size;
        try_fit_stack_local (old_frame_offset, size, size, alignment, &slot_offset);
  
-      if (slot_offset > old_frame_offset)
-       add_frame_space (old_frame_offset, slot_offset);
-      if (slot_offset + size < frame_offset)
-       add_frame_space (slot_offset + size, frame_offset);
+      if (kind & ASLK_RECORD_PAD)
+       {
+         if (slot_offset > old_frame_offset)
+           add_frame_space (old_frame_offset, slot_offset);
+         if (slot_offset + size < frame_offset)
+           add_frame_space (slot_offset + size, frame_offset);
+       }
      }
  
   found_space:
@@ -514,7 +524,7 @@ assign_stack_local_1 (enum machine_mode mode, HOST_WIDE_INT size,
  rtx
  assign_stack_local (enum machine_mode mode, HOST_WIDE_INT size, int align)
  {
-  return assign_stack_local_1 (mode, size, align, false);
+  return assign_stack_local_1 (mode, size, align, ASLK_RECORD_PAD);
  }
  \f
  \f
@@ -869,11 +879,13 @@ assign_stack_temp_for_type (enum machine_mode mode, HOST_WIDE_INT size,
          and round it now.  We also make sure ALIGNMENT is at least
          BIGGEST_ALIGNMENT.  */
        gcc_assert (mode != BLKmode || align == BIGGEST_ALIGNMENT);
-      p->slot = assign_stack_local (mode,
-                                   (mode == BLKmode
-                                    ? CEIL_ROUND (size, (int) align / BITS_PER_UNIT)
-                                    : size),
-                                   align);
+      p->slot = assign_stack_local_1 (mode,
+                                     (mode == BLKmode
+                                      ? CEIL_ROUND (size,
+                                                    (int) align
+                                                    / BITS_PER_UNIT)
+                                      : size),
+                                     align, 0);
  
        p->align = align;
  
@@ -929,8 +941,11 @@ assign_stack_temp_for_type (enum machine_mode mode, HOST_WIDE_INT size,
    if (type != 0)
      {
        MEM_VOLATILE_P (slot) = TYPE_VOLATILE (type);
-      MEM_SET_IN_STRUCT_P (slot, (AGGREGATE_TYPE_P (type)
-                                 || TREE_CODE (type) == COMPLEX_TYPE));
+      gcc_checking_assert (!MEM_SCALAR_P (slot) && !MEM_IN_STRUCT_P (slot));
+      if (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
+       MEM_IN_STRUCT_P (slot) = 1;
+      else
+       MEM_SCALAR_P (slot) = 1;
      }
    MEM_NOTRAP_P (slot) = 1;
  
@@ -1406,6 +1421,11 @@ instantiate_new_reg (rtx x, HOST_WIDE_INT *poffset)
  #endif
        offset = cfa_offset;
      }
+  else if (x == virtual_preferred_stack_boundary_rtx)
+    {
+      new_rtx = GEN_INT (crtl->preferred_stack_boundary / BITS_PER_UNIT);
+      offset = 0;
+    }
    else
      return NULL_RTX;
  
@@ -1472,16 +1492,7 @@ instantiate_virtual_regs_in_rtx (rtx *loc, void *data)
  static int
  safe_insn_predicate (int code, int operand, rtx x)
  {
-  const struct insn_operand_data *op_data;
-
-  if (code < 0)
-    return true;
-
-  op_data = &insn_data[code].operand[operand];
-  if (op_data->predicate == NULL)
-    return true;
-
-  return op_data->predicate (x, op_data->mode);
+  return code < 0 || insn_operand_matches ((enum insn_code) code, operand, x);
  }
  
  /* A subroutine of instantiate_virtual_regs.  Instantiate any virtual
@@ -1780,8 +1791,21 @@ instantiate_expr (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
    if (! EXPR_P (t))
      {
        *walk_subtrees = 0;
-      if (DECL_P (t) && DECL_RTL_SET_P (t))
-       instantiate_decl_rtl (DECL_RTL (t));
+      if (DECL_P (t))
+       {
+         if (DECL_RTL_SET_P (t))
+           instantiate_decl_rtl (DECL_RTL (t));
+         if (TREE_CODE (t) == PARM_DECL && DECL_NAMELESS (t)
+             && DECL_INCOMING_RTL (t))
+           instantiate_decl_rtl (DECL_INCOMING_RTL (t));
+         if ((TREE_CODE (t) == VAR_DECL
+              || TREE_CODE (t) == RESULT_DECL)
+             && DECL_HAS_VALUE_EXPR_P (t))
+           {
+             tree v = DECL_VALUE_EXPR (t);
+             walk_tree (&v, instantiate_expr, NULL, NULL);
+           }
+       }
      }
    return NULL;
  }
@@ -1794,7 +1818,7 @@ instantiate_decls_1 (tree let)
  {
    tree t;
  
-  for (t = BLOCK_VARS (let); t; t = TREE_CHAIN (t))
+  for (t = BLOCK_VARS (let); t; t = DECL_CHAIN (t))
      {
        if (DECL_RTL_SET_P (t))
         instantiate_decl_rtl (DECL_RTL (t));
@@ -1816,10 +1840,11 @@ instantiate_decls_1 (tree let)
  static void
  instantiate_decls (tree fndecl)
  {
-  tree decl, t, next;
+  tree decl;
+  unsigned ix;
  
    /* Process all parameters of the function.  */
-  for (decl = DECL_ARGUMENTS (fndecl); decl; decl = TREE_CHAIN (decl))
+  for (decl = DECL_ARGUMENTS (fndecl); decl; decl = DECL_CHAIN (decl))
      {
        instantiate_decl_rtl (DECL_RTL (decl));
        instantiate_decl_rtl (DECL_INCOMING_RTL (decl));
@@ -1830,19 +1855,25 @@ instantiate_decls (tree fndecl)
         }
      }
  
-  /* Now process all variables defined in the function or its subblocks.  */
-  instantiate_decls_1 (DECL_INITIAL (fndecl));
-
-  t = cfun->local_decls;
-  cfun->local_decls = NULL_TREE;
-  for (; t; t = next)
+  if ((decl = DECL_RESULT (fndecl))
+      && TREE_CODE (decl) == RESULT_DECL)
      {
-      next = TREE_CHAIN (t);
-      decl = TREE_VALUE (t);
        if (DECL_RTL_SET_P (decl))
         instantiate_decl_rtl (DECL_RTL (decl));
-      ggc_free (t);
+      if (DECL_HAS_VALUE_EXPR_P (decl))
+       {
+         tree v = DECL_VALUE_EXPR (decl);
+         walk_tree (&v, instantiate_expr, NULL, NULL);
+       }
      }
+
+  /* Now process all variables defined in the function or its subblocks.  */
+  instantiate_decls_1 (DECL_INITIAL (fndecl));
+
+  FOR_EACH_LOCAL_DECL (cfun, ix, decl)
+    if (DECL_RTL_SET_P (decl))
+      instantiate_decl_rtl (DECL_RTL (decl));
+  VEC_free (tree, gc, cfun->local_decls);
  }
  
  /* Pass through the INSNS of function FNDECL and convert virtual register
@@ -1905,6 +1936,7 @@ instantiate_virtual_regs (void)
    /* Indicate that, from now on, assign_stack_local should use
       frame_pointer_rtx.  */
    virtuals_instantiated = 1;
+
    return 0;
  }
  
@@ -1923,7 +1955,7 @@ struct rtl_opt_pass pass_instantiate_virtual_regs =
    0,                                    /* properties_provided */
    0,                                    /* properties_destroyed */
    0,                                    /* todo_flags_start */
-  TODO_dump_func                        /* todo_flags_finish */
+  0                                     /* todo_flags_finish */
   }
  };
  
@@ -2095,7 +2127,8 @@ pass_by_reference (CUMULATIVE_ARGS *ca, enum machine_mode mode,
         }
      }
  
-  return targetm.calls.pass_by_reference (ca, mode, type, named_arg);
+  return targetm.calls.pass_by_reference (pack_cumulative_args (ca), mode,
+                                         type, named_arg);
  }
  
  /* Return true if TYPE, which is passed by reference, should be callee
@@ -2107,7 +2140,8 @@ reference_callee_copied (CUMULATIVE_ARGS *ca, enum machine_mode mode,
  {
    if (type && TREE_ADDRESSABLE (type))
      return false;
-  return targetm.calls.callee_copies (ca, mode, type, named_arg);
+  return targetm.calls.callee_copies (pack_cumulative_args (ca), mode, type,
+                                     named_arg);
  }
  
  /* Structures to communicate between the subroutines of assign_parms.
@@ -2116,7 +2150,10 @@ reference_callee_copied (CUMULATIVE_ARGS *ca, enum machine_mode mode,
  
  struct assign_parm_data_all
  {
-  CUMULATIVE_ARGS args_so_far;
+  /* When INIT_CUMULATIVE_ARGS gets revamped, allocating CUMULATIVE_ARGS
+     should become a job of the target or otherwise encapsulated.  */
+  CUMULATIVE_ARGS args_so_far_v;
+  cumulative_args_t args_so_far;
    struct args_size stack_args_size;
    tree function_result_decl;
    tree orig_fnargs;
@@ -2156,11 +2193,12 @@ assign_parms_initialize_all (struct assign_parm_data_all *all)
    fntype = TREE_TYPE (current_function_decl);
  
  #ifdef INIT_CUMULATIVE_INCOMING_ARGS
-  INIT_CUMULATIVE_INCOMING_ARGS (all->args_so_far, fntype, NULL_RTX);
+  INIT_CUMULATIVE_INCOMING_ARGS (all->args_so_far_v, fntype, NULL_RTX);
  #else
-  INIT_CUMULATIVE_ARGS (all->args_so_far, fntype, NULL_RTX,
+  INIT_CUMULATIVE_ARGS (all->args_so_far_v, fntype, NULL_RTX,
                         current_function_decl, -1);
  #endif
+  all->args_so_far = pack_cumulative_args (&all->args_so_far_v);
  
  #ifdef REG_PARM_STACK_SPACE
    all->reg_parm_stack_space = REG_PARM_STACK_SPACE (current_function_decl);
@@ -2177,7 +2215,7 @@ split_complex_args (VEC(tree, heap) **args)
    unsigned i;
    tree p;
  
-  for (i = 0; VEC_iterate (tree, *args, i, p); ++i)
+  FOR_EACH_VEC_ELT (tree, *args, i, p)
      {
        tree type = TREE_TYPE (p);
        if (TREE_CODE (type) == COMPLEX_TYPE
@@ -2228,7 +2266,7 @@ assign_parms_augmented_arg_list (struct assign_parm_data_all *all)
    VEC(tree, heap) *fnargs = NULL;
    tree arg;
  
-  for (arg = DECL_ARGUMENTS (fndecl); arg; arg = TREE_CHAIN (arg))
+  for (arg = DECL_ARGUMENTS (fndecl); arg; arg = DECL_CHAIN (arg))
      VEC_safe_push (tree, heap, fnargs, arg);
  
    all->orig_fnargs = DECL_ARGUMENTS (fndecl);
@@ -2242,12 +2280,13 @@ assign_parms_augmented_arg_list (struct assign_parm_data_all *all)
        tree decl;
  
        decl = build_decl (DECL_SOURCE_LOCATION (fndecl),
-                        PARM_DECL, NULL_TREE, type);
+                        PARM_DECL, get_identifier (".result_ptr"), type);
        DECL_ARG_TYPE (decl) = type;
        DECL_ARTIFICIAL (decl) = 1;
-      DECL_IGNORED_P (decl) = 1;
+      DECL_NAMELESS (decl) = 1;
+      TREE_CONSTANT (decl) = 1;
  
-      TREE_CHAIN (decl) = all->orig_fnargs;
+      DECL_CHAIN (decl) = all->orig_fnargs;
        all->orig_fnargs = decl;
        VEC_safe_insert (tree, heap, fnargs, 0, decl);
  
@@ -2278,9 +2317,9 @@ assign_parm_find_data_types (struct assign_parm_data_all *all, tree parm,
    /* NAMED_ARG is a misnomer.  We really mean 'non-variadic'. */
    if (!cfun->stdarg)
      data->named_arg = 1;  /* No variadic parms.  */
-  else if (TREE_CHAIN (parm))
+  else if (DECL_CHAIN (parm))
      data->named_arg = 1;  /* Not the last non-variadic parm. */
-  else if (targetm.calls.strict_argument_naming (&all->args_so_far))
+  else if (targetm.calls.strict_argument_naming (all->args_so_far))
      data->named_arg = 1;  /* Only variadic ones are unnamed.  */
    else
      data->named_arg = 0;  /* Treat as variadic.  */
@@ -2316,7 +2355,7 @@ assign_parm_find_data_types (struct assign_parm_data_all *all, tree parm,
      passed_type = TREE_TYPE (first_field (passed_type));
  
    /* See if this arg was passed by invisible reference.  */
-  if (pass_by_reference (&all->args_so_far, passed_mode,
+  if (pass_by_reference (&all->args_so_far_v, passed_mode,
                          passed_type, data->named_arg))
      {
        passed_type = nominal_type = build_pointer_type (passed_type);
@@ -2345,7 +2384,7 @@ assign_parms_setup_varargs (struct assign_parm_data_all *all,
  {
    int varargs_pretend_bytes = 0;
  
-  targetm.calls.setup_incoming_varargs (&all->args_so_far,
+  targetm.calls.setup_incoming_varargs (all->args_so_far,
                                         data->promoted_mode,
                                         data->passed_type,
                                         &varargs_pretend_bytes, no_rtl);
@@ -2374,13 +2413,10 @@ assign_parm_find_entry_rtl (struct assign_parm_data_all *all,
        return;
      }
  
-#ifdef FUNCTION_INCOMING_ARG
-  entry_parm = FUNCTION_INCOMING_ARG (all->args_so_far, data->promoted_mode,
-                                     data->passed_type, data->named_arg);
-#else
-  entry_parm = FUNCTION_ARG (all->args_so_far, data->promoted_mode,
-                            data->passed_type, data->named_arg);
-#endif
+  entry_parm = targetm.calls.function_incoming_arg (all->args_so_far,
+                                                   data->promoted_mode,
+                                                   data->passed_type,
+                                                   data->named_arg);
  
    if (entry_parm == 0)
      data->promoted_mode = data->passed_mode;
@@ -2401,16 +2437,12 @@ assign_parm_find_entry_rtl (struct assign_parm_data_all *all,
  #endif
    if (!in_regs && !data->named_arg)
      {
-      if (targetm.calls.pretend_outgoing_varargs_named (&all->args_so_far))
+      if (targetm.calls.pretend_outgoing_varargs_named (all->args_so_far))
         {
           rtx tem;
-#ifdef FUNCTION_INCOMING_ARG
-         tem = FUNCTION_INCOMING_ARG (all->args_so_far, data->promoted_mode,
-                                      data->passed_type, true);
-#else
-         tem = FUNCTION_ARG (all->args_so_far, data->promoted_mode,
-                             data->passed_type, true);
-#endif
+         tem = targetm.calls.function_incoming_arg (all->args_so_far,
+                                                    data->promoted_mode,
+                                                    data->passed_type, true);
           in_regs = tem != NULL;
         }
      }
@@ -2425,7 +2457,7 @@ assign_parm_find_entry_rtl (struct assign_parm_data_all *all,
      {
        int partial;
  
-      partial = targetm.calls.arg_partial_bytes (&all->args_so_far,
+      partial = targetm.calls.arg_partial_bytes (all->args_so_far,
                                                  data->promoted_mode,
                                                  data->passed_type,
                                                  data->named_arg);
@@ -2543,16 +2575,13 @@ assign_parm_find_stack_rtl (tree parm, struct assign_parm_data_one *data)
        if (data->promoted_mode != BLKmode
           && data->promoted_mode != DECL_MODE (parm))
         {
-         set_mem_size (stack_parm,
-                       GEN_INT (GET_MODE_SIZE (data->promoted_mode)));
-         if (MEM_EXPR (stack_parm) && MEM_OFFSET (stack_parm))
+         set_mem_size (stack_parm, GET_MODE_SIZE (data->promoted_mode));
+         if (MEM_EXPR (stack_parm) && MEM_OFFSET_KNOWN_P (stack_parm))
             {
               int offset = subreg_lowpart_offset (DECL_MODE (parm),
                                                   data->promoted_mode);
               if (offset)
-               set_mem_offset (stack_parm,
-                               plus_constant (MEM_OFFSET (stack_parm),
-                                              -offset));
+               set_mem_offset (stack_parm, MEM_OFFSET (stack_parm) - offset);
             }
         }
      }
@@ -2561,7 +2590,7 @@ assign_parm_find_stack_rtl (tree parm, struct assign_parm_data_one *data)
    align = BITS_PER_UNIT;
  
    /* If we're padding upward, we know that the alignment of the slot
-     is FUNCTION_ARG_BOUNDARY.  If we're using slot_offset, we're
+     is TARGET_FUNCTION_ARG_BOUNDARY.  If we're using slot_offset, we're
       intentionally forcing upward padding.  Otherwise we have to come
       up with a guess at the alignment based on OFFSET_RTX.  */
    if (data->locate.where_pad != downward || data->entry_parm)
@@ -2839,9 +2868,7 @@ assign_parm_setup_block (struct assign_parm_data_all *all,
               int by = (UNITS_PER_WORD - size) * BITS_PER_UNIT;
               rtx reg = gen_rtx_REG (word_mode, REGNO (entry_parm));
  
-             x = expand_shift (LSHIFT_EXPR, word_mode, reg,
-                               build_int_cst (NULL_TREE, by),
-                               NULL_RTX, 1);
+             x = expand_shift (LSHIFT_EXPR, word_mode, reg, by, NULL_RTX, 1);
               tem = change_address (mem, word_mode, 0);
               emit_move_insn (tem, x);
             }
@@ -2874,10 +2901,12 @@ static void
  assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
                        struct assign_parm_data_one *data)
  {
-  rtx parmreg;
+  rtx parmreg, validated_mem;
+  rtx equiv_stack_parm;
    enum machine_mode promoted_nominal_mode;
    int unsignedp = TYPE_UNSIGNED (TREE_TYPE (parm));
    bool did_conversion = false;
+  bool need_conversion, moved;
  
    /* Store the parm in a pseudoregister during the function, but we may
       need to do it in a wider mode.  Using 2 here makes the result
@@ -2906,11 +2935,19 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
  
    /* Copy the value into the register, thus bridging between
       assign_parm_find_data_types and expand_expr_real_1.  */
-  if (data->nominal_mode != data->passed_mode
-      || promoted_nominal_mode != data->promoted_mode)
-    {
-      int save_tree_used;
  
+  equiv_stack_parm = data->stack_parm;
+  validated_mem = validize_mem (data->entry_parm);
+
+  need_conversion = (data->nominal_mode != data->passed_mode
+                    || promoted_nominal_mode != data->promoted_mode);
+  moved = false;
+
+  if (need_conversion
+      && GET_MODE_CLASS (data->nominal_mode) == MODE_INT
+      && data->nominal_mode == data->passed_mode
+      && data->nominal_mode == GET_MODE (data->entry_parm))
+    {
        /* ENTRY_PARM has been converted to PROMOTED_MODE, its
          mode, by the caller.  We now have to convert it to
          NOMINAL_MODE, if different.  However, PARMREG may be in
@@ -2926,13 +2963,71 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
  
          In addition, the conversion may involve a call, which could
          clobber parameters which haven't been copied to pseudo
-        registers yet.  Therefore, we must first copy the parm to
-        a pseudo reg here, and save the conversion until after all
+        registers yet.
+
+        First, we try to emit an insn which performs the necessary
+        conversion.  We verify that this insn does not clobber any
+        hard registers.  */
+
+      enum insn_code icode;
+      rtx op0, op1;
+
+      icode = can_extend_p (promoted_nominal_mode, data->passed_mode,
+                           unsignedp);
+
+      op0 = parmreg;
+      op1 = validated_mem;
+      if (icode != CODE_FOR_nothing
+         && insn_operand_matches (icode, 0, op0)
+         && insn_operand_matches (icode, 1, op1))
+       {
+         enum rtx_code code = unsignedp ? ZERO_EXTEND : SIGN_EXTEND;
+         rtx insn, insns;
+         HARD_REG_SET hardregs;
+
+         start_sequence ();
+         insn = gen_extend_insn (op0, op1, promoted_nominal_mode,
+                                 data->passed_mode, unsignedp);
+         emit_insn (insn);
+         insns = get_insns ();
+
+         moved = true;
+         CLEAR_HARD_REG_SET (hardregs);
+         for (insn = insns; insn && moved; insn = NEXT_INSN (insn))
+           {
+             if (INSN_P (insn))
+               note_stores (PATTERN (insn), record_hard_reg_sets,
+                            &hardregs);
+             if (!hard_reg_set_empty_p (hardregs))
+               moved = false;
+           }
+
+         end_sequence ();
+
+         if (moved)
+           {
+             emit_insn (insns);
+             if (equiv_stack_parm != NULL_RTX)
+               equiv_stack_parm = gen_rtx_fmt_e (code, GET_MODE (parmreg),
+                                                 equiv_stack_parm);
+           }
+       }
+    }
+
+  if (moved)
+    /* Nothing to do.  */
+    ;
+  else if (need_conversion)
+    {
+      /* We did not have an insn to convert directly, or the sequence
+        generated appeared unsafe.  We must first copy the parm to a
+        pseudo reg, and save the conversion until after all
          parameters have been moved.  */
  
+      int save_tree_used;
        rtx tempreg = gen_reg_rtx (GET_MODE (data->entry_parm));
  
-      emit_move_insn (tempreg, validize_mem (data->entry_parm));
+      emit_move_insn (tempreg, validated_mem);
  
        push_to_sequence2 (all->first_conversion_insn, all->last_conversion_insn);
        tempreg = convert_to_mode (data->nominal_mode, tempreg, unsignedp);
@@ -2962,7 +3057,7 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
        did_conversion = true;
      }
    else
-    emit_move_insn (parmreg, validize_mem (data->entry_parm));
+    emit_move_insn (parmreg, validated_mem);
  
    /* If we were passed a pointer but the actual value can safely live
       in a register, put it in one.  */
@@ -3045,9 +3140,8 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm,
                 set_unique_reg_note (sinsn, REG_EQUIV, stackr);
             }
         }
-      else if ((set = single_set (linsn)) != 0
-              && SET_DEST (set) == parmreg)
-       set_unique_reg_note (linsn, REG_EQUIV, data->stack_parm);
+      else 
+       set_dst_reg_note (linsn, REG_EQUIV, equiv_stack_parm, parmreg);
      }
  
    /* For pointer data type, suggest pointer register.  */
@@ -3089,10 +3183,9 @@ assign_parm_setup_stack (struct assign_parm_data_all *all, tree parm,
           /* ??? This may need a big-endian conversion on sparc64.  */
           data->stack_parm
             = adjust_address (data->stack_parm, data->nominal_mode, 0);
-         if (offset && MEM_OFFSET (data->stack_parm))
+         if (offset && MEM_OFFSET_KNOWN_P (data->stack_parm))
             set_mem_offset (data->stack_parm,
-                           plus_constant (MEM_OFFSET (data->stack_parm),
-                                          offset));
+                           MEM_OFFSET (data->stack_parm) + offset);
         }
      }
  
@@ -3225,7 +3318,7 @@ assign_parms (tree fndecl)
    assign_parms_initialize_all (&all);
    fnargs = assign_parms_augmented_arg_list (&all);
  
-  for (i = 0; VEC_iterate (tree, fnargs, i, parm); ++i)
+  FOR_EACH_VEC_ELT (tree, fnargs, i, parm)
      {
        struct assign_parm_data_one data;
  
@@ -3243,8 +3336,9 @@ assign_parms (tree fndecl)
        /* Estimate stack alignment from parameter alignment.  */
        if (SUPPORTS_STACK_ALIGNMENT)
          {
-          unsigned int align = FUNCTION_ARG_BOUNDARY (data.promoted_mode,
-                                                     data.passed_type);
+          unsigned int align
+           = targetm.calls.function_arg_boundary (data.promoted_mode,
+                                                  data.passed_type);
           align = MINIMUM_ALIGNMENT (data.passed_type, data.promoted_mode,
                                      align);
           if (TYPE_ALIGN (data.nominal_type) > align)
@@ -3258,7 +3352,7 @@ assign_parms (tree fndecl)
             }
         }
  
-      if (cfun->stdarg && !TREE_CHAIN (parm))
+      if (cfun->stdarg && !DECL_CHAIN (parm))
         assign_parms_setup_varargs (&all, &data, false);
  
        /* Find out where the parameter arrives in this function.  */
@@ -3272,11 +3366,19 @@ assign_parms (tree fndecl)
         }
  
        /* Record permanently how this parm was passed.  */
-      set_decl_incoming_rtl (parm, data.entry_parm, data.passed_pointer);
+      if (data.passed_pointer)
+       {
+         rtx incoming_rtl
+           = gen_rtx_MEM (TYPE_MODE (TREE_TYPE (data.passed_type)),
+                          data.entry_parm);
+         set_decl_incoming_rtl (parm, incoming_rtl, true);
+       }
+      else
+       set_decl_incoming_rtl (parm, data.entry_parm, false);
  
        /* Update info on where next arg arrives in registers.  */
-      FUNCTION_ARG_ADVANCE (all.args_so_far, data.promoted_mode,
-                           data.passed_type, data.named_arg);
+      targetm.calls.function_arg_advance (all.args_so_far, data.promoted_mode,
+                                         data.passed_type, data.named_arg);
  
        assign_parm_adjust_stack_rtl (&data);
  
@@ -3330,13 +3432,22 @@ assign_parms (tree fndecl)
        rtx x;
  
        if (DECL_BY_REFERENCE (result))
-       x = addr;
+       {
+         SET_DECL_VALUE_EXPR (result, all.function_result_decl);
+         x = addr;
+       }
        else
         {
+         SET_DECL_VALUE_EXPR (result,
+                              build1 (INDIRECT_REF, TREE_TYPE (result),
+                                      all.function_result_decl));
           addr = convert_memory_address (Pmode, addr);
           x = gen_rtx_MEM (DECL_MODE (result), addr);
           set_mem_attributes (x, result, 1);
         }
+
+      DECL_HAS_VALUE_EXPR_P (result) = 1;
+
        SET_DECL_RTL (result, x);
      }
  
@@ -3369,13 +3480,14 @@ assign_parms (tree fndecl)
    /* See how many bytes, if any, of its args a function should try to pop
       on return.  */
  
-  crtl->args.pops_args = RETURN_POPS_ARGS (fndecl, TREE_TYPE (fndecl),
-                                                crtl->args.size);
+  crtl->args.pops_args = targetm.calls.return_pops_args (fndecl,
+                                                        TREE_TYPE (fndecl),
+                                                        crtl->args.size);
  
    /* For stdarg.h function, save info about
       regs and stack space used by the named args.  */
  
-  crtl->args.info = all.args_so_far;
+  crtl->args.info = all.args_so_far_v;
  
    /* Set the rtx used for the function return value.  Put this in its
       own variable so any optimizers that need this information don't have
@@ -3452,7 +3564,7 @@ gimplify_parameters (void)
    assign_parms_initialize_all (&all);
    fnargs = assign_parms_augmented_arg_list (&all);
  
-  for (i = 0; VEC_iterate (tree, fnargs, i, parm); ++i)
+  FOR_EACH_VEC_ELT (tree, fnargs, i, parm)
      {
        struct assign_parm_data_one data;
  
@@ -3464,8 +3576,8 @@ gimplify_parameters (void)
         continue;
  
        /* Update info on where next arg arrives in registers.  */
-      FUNCTION_ARG_ADVANCE (all.args_so_far, data.promoted_mode,
-                           data.passed_type, data.named_arg);
+      targetm.calls.function_arg_advance (all.args_so_far, data.promoted_mode,
+                                         data.passed_type, data.named_arg);
  
        /* ??? Once upon a time variable_size stuffed parameter list
          SAVE_EXPRs (amongst others) onto a pending sizes list.  This
@@ -3483,7 +3595,7 @@ gimplify_parameters (void)
        if (data.passed_pointer)
         {
            tree type = TREE_TYPE (data.passed_type);
-         if (reference_callee_copied (&all.args_so_far, TYPE_MODE (type),
+         if (reference_callee_copied (&all.args_so_far_v, TYPE_MODE (type),
                                        type, data.named_arg))
             {
               tree local, t;
@@ -3499,24 +3611,29 @@ gimplify_parameters (void)
                   DECL_IGNORED_P (local) = 0;
                   /* If PARM was addressable, move that flag over
                      to the local copy, as its address will be taken,
-                    not the PARMs.  */
+                    not the PARMs.  Keep the parms address taken
+                    as we'll query that flag during gimplification.  */
                   if (TREE_ADDRESSABLE (parm))
-                   {
-                     TREE_ADDRESSABLE (parm) = 0;
-                     TREE_ADDRESSABLE (local) = 1;
-                   }
+                   TREE_ADDRESSABLE (local) = 1;
+                 else if (TREE_CODE (type) == COMPLEX_TYPE
+                          || TREE_CODE (type) == VECTOR_TYPE)
+                   DECL_GIMPLE_REG_P (local) = 1;
                 }
               else
                 {
                   tree ptr_type, addr;
  
                   ptr_type = build_pointer_type (type);
-                 addr = create_tmp_var (ptr_type, get_name (parm));
+                 addr = create_tmp_reg (ptr_type, get_name (parm));
                   DECL_IGNORED_P (addr) = 0;
                   local = build_fold_indirect_ref (addr);
  
-                 t = built_in_decls[BUILT_IN_ALLOCA];
-                 t = build_call_expr (t, 1, DECL_SIZE_UNIT (parm));
+                 t = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
+                 t = build_call_expr (t, 2, DECL_SIZE_UNIT (parm),
+                                      size_int (DECL_ALIGN (parm)));
+
+                 /* The call has been built for a variable-sized object.  */
+                 CALL_ALLOCA_FOR_VAR_P (t) = 1;
                   t = fold_convert (ptr_type, t);
                   t = build2 (MODIFY_EXPR, TREE_TYPE (addr), addr, t);
                   gimplify_and_add (t, &stmts);
@@ -3553,9 +3670,10 @@ gimplify_parameters (void)
     FNDECL is the function in which the argument was defined.
  
     There are two types of rounding that are done.  The first, controlled by
-   FUNCTION_ARG_BOUNDARY, forces the offset from the start of the argument
-   list to be aligned to the specific boundary (in bits).  This rounding
-   affects the initial and starting offsets, but not the argument size.
+   TARGET_FUNCTION_ARG_BOUNDARY, forces the offset from the start of the
+   argument list to be aligned to the specific boundary (in bits).  This
+   rounding affects the initial and starting offsets, but not the argument
+   size.
  
     The second, controlled by FUNCTION_ARG_PADDING and PARM_BOUNDARY,
     optionally rounds the size of the parm to PARM_BOUNDARY.  The
@@ -3575,7 +3693,7 @@ locate_and_pad_parm (enum machine_mode passed_mode, tree type, int in_regs,
  {
    tree sizetree;
    enum direction where_pad;
-  unsigned int boundary;
+  unsigned int boundary, round_boundary;
    int reg_parm_stack_space = 0;
    int part_size_in_regs;
  
@@ -3606,7 +3724,9 @@ locate_and_pad_parm (enum machine_mode passed_mode, tree type, int in_regs,
    sizetree
      = type ? size_in_bytes (type) : size_int (GET_MODE_SIZE (passed_mode));
    where_pad = FUNCTION_ARG_PADDING (passed_mode, type);
-  boundary = FUNCTION_ARG_BOUNDARY (passed_mode, type);
+  boundary = targetm.calls.function_arg_boundary (passed_mode, type);
+  round_boundary = targetm.calls.function_arg_round_boundary (passed_mode,
+                                                             type);
    locate->where_pad = where_pad;
  
    /* Alignment can't exceed MAX_SUPPORTED_STACK_ALIGNMENT.  */
@@ -3653,8 +3773,8 @@ locate_and_pad_parm (enum machine_mode passed_mode, tree type, int in_regs,
      tree s2 = sizetree;
      if (where_pad != none
         && (!host_integerp (sizetree, 1)
-           || (tree_low_cst (sizetree, 1) * BITS_PER_UNIT) % PARM_BOUNDARY))
-      s2 = round_up (s2, PARM_BOUNDARY / BITS_PER_UNIT);
+           || (tree_low_cst (sizetree, 1) * BITS_PER_UNIT) % round_boundary))
+      s2 = round_up (s2, round_boundary / BITS_PER_UNIT);
      SUB_PARM_SIZE (locate->slot_offset, s2);
    }
  
@@ -3706,8 +3826,8 @@ locate_and_pad_parm (enum machine_mode passed_mode, tree type, int in_regs,
  
    if (where_pad != none
        && (!host_integerp (sizetree, 1)
-         || (tree_low_cst (sizetree, 1) * BITS_PER_UNIT) % PARM_BOUNDARY))
-    sizetree = round_up (sizetree, PARM_BOUNDARY / BITS_PER_UNIT);
+         || (tree_low_cst (sizetree, 1) * BITS_PER_UNIT) % round_boundary))
+    sizetree = round_up (sizetree, round_boundary / BITS_PER_UNIT);
  
    ADD_PARM_SIZE (locate->size, sizetree);
  
@@ -3837,7 +3957,7 @@ setjmp_vars_warning (bitmap setjmp_crosses, tree block)
  {
    tree decl, sub;
  
-  for (decl = BLOCK_VARS (block); decl; decl = TREE_CHAIN (decl))
+  for (decl = BLOCK_VARS (block); decl; decl = DECL_CHAIN (decl))
      {
        if (TREE_CODE (decl) == VAR_DECL
           && DECL_RTL_SET_P (decl)
@@ -3859,7 +3979,7 @@ setjmp_args_warning (bitmap setjmp_crosses)
  {
    tree decl;
    for (decl = DECL_ARGUMENTS (current_function_decl);
-       decl; decl = TREE_CHAIN (decl))
+       decl; decl = DECL_CHAIN (decl))
      if (DECL_RTL (decl) != 0
         && REG_P (DECL_RTL (decl))
         && regno_clobbered_at_setjmp (setjmp_crosses, REGNO (DECL_RTL (decl))))
@@ -3884,6 +4004,46 @@ generate_setjmp_warnings (void)
  }
  
  \f
+/* Reverse the order of elements in the fragment chain T of blocks,
+   and return the new head of the chain (old last element).  */
+
+static tree
+block_fragments_nreverse (tree t)
+{
+  tree prev = 0, block, next;
+  for (block = t; block; block = next)
+    {
+      next = BLOCK_FRAGMENT_CHAIN (block);
+      BLOCK_FRAGMENT_CHAIN (block) = prev;
+      prev = block;
+    }
+  return prev;
+}
+
+/* Reverse the order of elements in the chain T of blocks,
+   and return the new head of the chain (old last element).
+   Also do the same on subblocks and reverse the order of elements
+   in BLOCK_FRAGMENT_CHAIN as well.  */
+
+static tree
+blocks_nreverse_all (tree t)
+{
+  tree prev = 0, block, next;
+  for (block = t; block; block = next)
+    {
+      next = BLOCK_CHAIN (block);
+      BLOCK_CHAIN (block) = prev;
+      BLOCK_SUBBLOCKS (block) = blocks_nreverse_all (BLOCK_SUBBLOCKS (block));
+      if (BLOCK_FRAGMENT_CHAIN (block)
+         && BLOCK_FRAGMENT_ORIGIN (block) == NULL_TREE)
+       BLOCK_FRAGMENT_CHAIN (block)
+         = block_fragments_nreverse (BLOCK_FRAGMENT_CHAIN (block));
+      prev = block;
+    }
+  return prev;
+}
+
+
  /* Identify BLOCKs referenced by more than one NOTE_INSN_BLOCK_{BEG,END},
     and create duplicate blocks.  */
  /* ??? Need an option to either create block fragments or to create
@@ -3910,7 +4070,7 @@ reorder_blocks (void)
  
    /* Recreate the block tree from the note nesting.  */
    reorder_blocks_1 (get_insns (), block, &block_stack);
-  BLOCK_SUBBLOCKS (block) = blocks_nreverse (BLOCK_SUBBLOCKS (block));
+  BLOCK_SUBBLOCKS (block) = blocks_nreverse_all (BLOCK_SUBBLOCKS (block));
  
    VEC_free (tree, heap, block_stack);
  }
@@ -3942,9 +4102,8 @@ reorder_blocks_1 (rtx insns, tree current_block, VEC(tree,heap) **p_block_stack)
               tree block = NOTE_BLOCK (insn);
               tree origin;
  
-             origin = (BLOCK_FRAGMENT_ORIGIN (block)
-                       ? BLOCK_FRAGMENT_ORIGIN (block)
-                       : block);
+             gcc_assert (BLOCK_FRAGMENT_ORIGIN (block) == NULL_TREE);
+             origin = block;
  
               /* If we have seen this block before, that means it now
                  spans multiple address regions.  Create a new fragment.  */
@@ -3981,8 +4140,6 @@ reorder_blocks_1 (rtx insns, tree current_block, VEC(tree,heap) **p_block_stack)
           else if (NOTE_KIND (insn) == NOTE_INSN_BLOCK_END)
             {
               NOTE_BLOCK (insn) = VEC_pop (tree, *p_block_stack);
-             BLOCK_SUBBLOCKS (current_block)
-               = blocks_nreverse (BLOCK_SUBBLOCKS (current_block));
               current_block = BLOCK_SUPERCONTEXT (current_block);
             }
         }
@@ -3995,16 +4152,44 @@ reorder_blocks_1 (rtx insns, tree current_block, VEC(tree,heap) **p_block_stack)
  tree
  blocks_nreverse (tree t)
  {
-  tree prev = 0, decl, next;
-  for (decl = t; decl; decl = next)
+  tree prev = 0, block, next;
+  for (block = t; block; block = next)
      {
-      next = BLOCK_CHAIN (decl);
-      BLOCK_CHAIN (decl) = prev;
-      prev = decl;
+      next = BLOCK_CHAIN (block);
+      BLOCK_CHAIN (block) = prev;
+      prev = block;
      }
    return prev;
  }
  
+/* Concatenate two chains of blocks (chained through BLOCK_CHAIN)
+   by modifying the last node in chain 1 to point to chain 2.  */
+
+tree
+block_chainon (tree op1, tree op2)
+{
+  tree t1;
+
+  if (!op1)
+    return op2;
+  if (!op2)
+    return op1;
+
+  for (t1 = op1; BLOCK_CHAIN (t1); t1 = BLOCK_CHAIN (t1))
+    continue;
+  BLOCK_CHAIN (t1) = op2;
+
+#ifdef ENABLE_TREE_CHECKING
+  {
+    tree t2;
+    for (t2 = op2; t2; t2 = BLOCK_CHAIN (t2))
+      gcc_assert (t2 != t1);
+  }
+#endif
+
+  return op1;
+}
+
  /* Count the subblocks of the list starting with BLOCK.  If VECTOR is
     non-NULL, list them all into VECTOR, in a depth-first preorder
     traversal of the block tree.  Also clear TREE_ASM_WRITTEN in all
@@ -4129,7 +4314,7 @@ invoke_set_current_function_hook (tree fndecl)
        if (optimization_current_node != opts)
         {
           optimization_current_node = opts;
-         cl_optimization_restore (TREE_OPTIMIZATION (opts));
+         cl_optimization_restore (&global_options, TREE_OPTIMIZATION (opts));
         }
  
        targetm.set_current_function (fndecl);
@@ -4177,6 +4362,13 @@ get_next_funcdef_no (void)
    return funcdef_no++;
  }
  
+/* Return value of funcdef.  */
+int
+get_last_funcdef_no (void)
+{
+  return funcdef_no;
+}
+
  /* Allocate a function structure for FNDECL and set its contents
     to the defaults.  Set cfun to the newly-allocated object.
     Some of the helper functions invoked during initialization assume
@@ -4224,11 +4416,7 @@ allocate_struct_function (tree fndecl, bool abstract_p)
           cfun->returns_struct = 1;
         }
  
-      cfun->stdarg
-       = (fntype
-          && TYPE_ARG_TYPES (fntype) != 0
-          && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
-              != void_type_node));
+      cfun->stdarg = stdarg_p (fntype);
  
        /* Assume all registers in stdarg functions need to be saved.  */
        cfun->va_list_gpr_size = VA_LIST_MAX_GPR_SIZE;
@@ -4263,6 +4451,12 @@ prepare_function_start (void)
    init_expr ();
    default_rtl_profile ();
  
+  if (flag_stack_usage_info)
+    {
+      cfun->su = ggc_alloc_cleared_stack_usage ();
+      cfun->su->static_stack_size = -1;
+    }
+
    cse_not_expected = ! optimize;
  
    /* Caller save not needed yet.  */
@@ -4307,6 +4501,7 @@ init_function_start (tree subr)
    else
      allocate_struct_function (subr, false);
    prepare_function_start ();
+  decide_function_section (subr);
  
    /* Warn if this value is an aggregate type,
       regardless of which calling convention we are using for it.  */
@@ -4561,7 +4756,7 @@ expand_function_start (tree subr)
        /* Mark the register as eliminable, similar to parameters.  */
        if (MEM_P (chain)
           && reg_mentioned_p (arg_pointer_rtx, XEXP (chain, 0)))
-       set_unique_reg_note (insn, REG_EQUIV, chain);
+       set_dst_reg_note (insn, REG_EQUIV, chain, local);
      }
  
    /* If the function receives a non-local goto, then store the
@@ -4577,11 +4772,12 @@ expand_function_start (tree subr)
        if (!DECL_RTL_SET_P (var))
         expand_decl (var);
  
-      t_save = build4 (ARRAY_REF, ptr_type_node,
+      t_save = build4 (ARRAY_REF,
+                      TREE_TYPE (TREE_TYPE (cfun->nonlocal_goto_save_area)),
                        cfun->nonlocal_goto_save_area,
                        integer_zero_node, NULL_TREE, NULL_TREE);
        r_save = expand_expr (t_save, NULL_RTX, VOIDmode, EXPAND_WRITE);
-      r_save = convert_memory_address (Pmode, r_save);
+      gcc_assert (GET_MODE (r_save) == Pmode);
  
        emit_move_insn (r_save, targetm.builtin_setjmp_frame_value ());
        update_nonlocal_goto_save_area ();
@@ -4604,9 +4800,8 @@ expand_function_start (tree subr)
  #endif
      }
  
-  /* After the display initializations is where the stack checking
-     probe should go.  */
-  if(flag_stack_check)
+  /* If we are doing generic stack checking, the probe should go here.  */
+  if (flag_stack_check == GENERIC_STACK_CHECK)
      stack_check_probe_note = emit_note (NOTE_INSN_DELETED);
  
    /* Make sure there is a line number after the function entry setup code.  */
@@ -4701,7 +4896,7 @@ do_warn_unused_parameter (tree fn)
    tree decl;
  
    for (decl = DECL_ARGUMENTS (fn);
-       decl; decl = TREE_CHAIN (decl))
+       decl; decl = DECL_CHAIN (decl))
      if (!TREE_USED (decl) && TREE_CODE (decl) == PARM_DECL
         && DECL_NAME (decl) && !DECL_ARTIFICIAL (decl)
         && !TREE_NO_WARNING (decl))
@@ -4740,6 +4935,7 @@ expand_function_end (void)
               probe_stack_range (STACK_OLD_CHECK_PROTECT, max_frame_size);
             seq = get_insns ();
             end_sequence ();
+           set_insn_locators (seq, prologue_locator);
             emit_insn_before (seq, stack_check_probe_note);
             break;
           }
@@ -4770,7 +4966,7 @@ expand_function_end (void)
    /* Output the label for the actual return from the function.  */
    emit_label (return_label);
  
-  if (USING_SJLJ_EXCEPTIONS)
+  if (targetm_common.except_unwind_info (&global_options) == UI_SJLJ)
      {
        /* Let except.c know where it should emit the call to unregister
          the function context for sjlj exceptions.  */
@@ -4928,7 +5124,8 @@ expand_function_end (void)
    /* @@@ This is a kludge.  We want to ensure that instructions that
       may trap are not moved into the epilogue by scheduling, because
       we don't always emit unwind information for the epilogue.  */
-  if (!USING_SJLJ_EXCEPTIONS && cfun->can_throw_non_call_exceptions)
+  if (cfun->can_throw_non_call_exceptions
+      && targetm_common.except_unwind_info (&global_options) != UI_SJLJ)
      emit_insn (gen_blockage ());
  
    /* If stack protection is enabled for this function, check the guard.  */
@@ -4941,10 +5138,15 @@ expand_function_end (void)
    if (! EXIT_IGNORE_STACK
        && cfun->calls_alloca)
      {
-      rtx tem = 0;
+      rtx tem = 0, seq;
+
+      start_sequence ();
+      emit_stack_save (SAVE_FUNCTION, &tem);
+      seq = get_insns ();
+      end_sequence ();
+      emit_insn_before (seq, parm_birth_insn);
  
-      emit_stack_save (SAVE_FUNCTION, &tem, parm_birth_insn);
-      emit_stack_restore (SAVE_FUNCTION, tem, NULL_RTX);
+      emit_stack_restore (SAVE_FUNCTION, tem);
      }
  
    /* ??? This should no longer be necessary since stupid is no longer with
@@ -4981,6 +5183,8 @@ get_arg_pointer_save_area (void)
        push_topmost_sequence ();
        emit_insn_after (seq, entry_of_function ());
        pop_topmost_sequence ();
+
+      crtl->arg_pointer_save_area_init = true;
      }
  
    return ret;
@@ -5007,19 +5211,25 @@ record_insns (rtx insns, rtx end, htab_t *hashp)
      }
  }
  
-/* INSN has been duplicated as COPY, as part of duping a basic block.
-   If INSN is an epilogue insn, then record COPY as epilogue as well.  */
+/* INSN has been duplicated or replaced by as COPY, perhaps by duplicating a
+   basic block, splitting or peepholes.  If INSN is a prologue or epilogue
+   insn, then record COPY as well.  */
  
  void
-maybe_copy_epilogue_insn (rtx insn, rtx copy)
+maybe_copy_prologue_epilogue_insn (rtx insn, rtx copy)
  {
+  htab_t hash;
    void **slot;
  
-  if (epilogue_insn_hash == NULL
-      || htab_find (epilogue_insn_hash, insn) == NULL)
-    return;
+  hash = epilogue_insn_hash;
+  if (!hash || !htab_find (hash, insn))
+    {
+      hash = prologue_insn_hash;
+      if (!hash || !htab_find (hash, insn))
+       return;
+    }
  
-  slot = htab_find_slot (epilogue_insn_hash, copy, INSERT);
+  slot = htab_find_slot (hash, copy, INSERT);
    gcc_assert (*slot == NULL);
    *slot = copy;
  }
@@ -5067,35 +5277,580 @@ prologue_epilogue_contains (const_rtx insn)
    return 0;
  }
  
+#ifdef HAVE_simple_return
+
+/* Return true if INSN requires the stack frame to be set up.
+   PROLOGUE_USED contains the hard registers used in the function
+   prologue.  SET_UP_BY_PROLOGUE is the set of registers we expect the
+   prologue to set up for the function.  */
+bool
+requires_stack_frame_p (rtx insn, HARD_REG_SET prologue_used,
+                       HARD_REG_SET set_up_by_prologue)
+{
+  df_ref *df_rec;
+  HARD_REG_SET hardregs;
+  unsigned regno;
+
+  if (CALL_P (insn))
+    return !SIBLING_CALL_P (insn);
+
+  CLEAR_HARD_REG_SET (hardregs);
+  for (df_rec = DF_INSN_DEFS (insn); *df_rec; df_rec++)
+    {
+      rtx dreg = DF_REF_REG (*df_rec);
+
+      if (!REG_P (dreg))
+       continue;
+
+      add_to_hard_reg_set (&hardregs, GET_MODE (dreg),
+                          REGNO (dreg));
+    }
+  if (hard_reg_set_intersect_p (hardregs, prologue_used))
+    return true;
+  AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (TEST_HARD_REG_BIT (hardregs, regno)
+       && df_regs_ever_live_p (regno))
+      return true;
+
+  for (df_rec = DF_INSN_USES (insn); *df_rec; df_rec++)
+    {
+      rtx reg = DF_REF_REG (*df_rec);
+
+      if (!REG_P (reg))
+       continue;
+
+      add_to_hard_reg_set (&hardregs, GET_MODE (reg),
+                          REGNO (reg));
+    }
+  if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
+    return true;
+
+  return false;
+}
+
+/* See whether BB has a single successor that uses [REGNO, END_REGNO),
+   and if BB is its only predecessor.  Return that block if so,
+   otherwise return null.  */
+
+static basic_block
+next_block_for_reg (basic_block bb, int regno, int end_regno)
+{
+  edge e, live_edge;
+  edge_iterator ei;
+  bitmap live;
+  int i;
+
+  live_edge = NULL;
+  FOR_EACH_EDGE (e, ei, bb->succs)
+    {
+      live = df_get_live_in (e->dest);
+      for (i = regno; i < end_regno; i++)
+       if (REGNO_REG_SET_P (live, i))
+         {
+           if (live_edge && live_edge != e)
+             return NULL;
+           live_edge = e;
+         }
+    }
+
+  /* We can sometimes encounter dead code.  Don't try to move it
+     into the exit block.  */
+  if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR)
+    return NULL;
+
+  /* Reject targets of abnormal edges.  This is needed for correctness
+     on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
+     exception edges even though it is generally treated as call-saved
+     for the majority of the compilation.  Moving across abnormal edges
+     isn't going to be interesting for shrink-wrap usage anyway.  */
+  if (live_edge->flags & EDGE_ABNORMAL)
+    return NULL;
+
+  if (EDGE_COUNT (live_edge->dest->preds) > 1)
+    return NULL;
+
+  return live_edge->dest;
+}
+
+/* Try to move INSN from BB to a successor.  Return true on success.
+   USES and DEFS are the set of registers that are used and defined
+   after INSN in BB.  */
+
+static bool
+move_insn_for_shrink_wrap (basic_block bb, rtx insn,
+                          const HARD_REG_SET uses,
+                          const HARD_REG_SET defs)
+{
+  rtx set, src, dest;
+  bitmap live_out, live_in, bb_uses, bb_defs;
+  unsigned int i, dregno, end_dregno, sregno, end_sregno;
+  basic_block next_block;
+
+  /* Look for a simple register copy.  */
+  set = single_set (insn);
+  if (!set)
+    return false;
+  src = SET_SRC (set);
+  dest = SET_DEST (set);
+  if (!REG_P (dest) || !REG_P (src))
+    return false;
+
+  /* Make sure that the source register isn't defined later in BB.  */
+  sregno = REGNO (src);
+  end_sregno = END_REGNO (src);
+  if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
+    return false;
+
+  /* Make sure that the destination register isn't referenced later in BB.  */
+  dregno = REGNO (dest);
+  end_dregno = END_REGNO (dest);
+  if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
+      || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
+    return false;
+
+  /* See whether there is a successor block to which we could move INSN.  */
+  next_block = next_block_for_reg (bb, dregno, end_dregno);
+  if (!next_block)
+    return false;
+
+  /* At this point we are committed to moving INSN, but let's try to
+     move it as far as we can.  */
+  do
+    {
+      live_out = df_get_live_out (bb);
+      live_in = df_get_live_in (next_block);
+      bb = next_block;
+
+      /* Check whether BB uses DEST or clobbers DEST.  We need to add
+        INSN to BB if so.  Either way, DEST is no longer live on entry,
+        except for any part that overlaps SRC (next loop).  */
+      bb_uses = &DF_LR_BB_INFO (bb)->use;
+      bb_defs = &DF_LR_BB_INFO (bb)->def;
+      for (i = dregno; i < end_dregno; i++)
+       {
+         if (REGNO_REG_SET_P (bb_uses, i) || REGNO_REG_SET_P (bb_defs, i))
+           next_block = NULL;
+         CLEAR_REGNO_REG_SET (live_out, i);
+         CLEAR_REGNO_REG_SET (live_in, i);
+       }
+
+      /* Check whether BB clobbers SRC.  We need to add INSN to BB if so.
+        Either way, SRC is now live on entry.  */
+      for (i = sregno; i < end_sregno; i++)
+       {
+         if (REGNO_REG_SET_P (bb_defs, i))
+           next_block = NULL;
+         SET_REGNO_REG_SET (live_out, i);
+         SET_REGNO_REG_SET (live_in, i);
+       }
+
+      /* If we don't need to add the move to BB, look for a single
+        successor block.  */
+      if (next_block)
+       next_block = next_block_for_reg (next_block, dregno, end_dregno);
+    }
+  while (next_block);
+
+  /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
+     (next loop).  */
+  for (i = dregno; i < end_dregno; i++)
+    {
+      CLEAR_REGNO_REG_SET (bb_uses, i);
+      SET_REGNO_REG_SET (bb_defs, i);
+    }
+
+  /* BB now uses SRC.  */
+  for (i = sregno; i < end_sregno; i++)
+    SET_REGNO_REG_SET (bb_uses, i);
+
+  emit_insn_after (PATTERN (insn), bb_note (bb));
+  delete_insn (insn);
+  return true;
+}
+
+/* Look for register copies in the first block of the function, and move
+   them down into successor blocks if the register is used only on one
+   path.  This exposes more opportunities for shrink-wrapping.  These
+   kinds of sets often occur when incoming argument registers are moved
+   to call-saved registers because their values are live across one or
+   more calls during the function.  */
+
+static void
+prepare_shrink_wrap (basic_block entry_block)
+{
+  rtx insn, curr, x;
+  HARD_REG_SET uses, defs;
+  df_ref *ref;
+
+  CLEAR_HARD_REG_SET (uses);
+  CLEAR_HARD_REG_SET (defs);
+  FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
+    if (NONDEBUG_INSN_P (insn)
+       && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs))
+      {
+       /* Add all defined registers to DEFs.  */
+       for (ref = DF_INSN_DEFS (insn); *ref; ref++)
+         {
+           x = DF_REF_REG (*ref);
+           if (REG_P (x) && HARD_REGISTER_P (x))
+             SET_HARD_REG_BIT (defs, REGNO (x));
+         }
+
+       /* Add all used registers to USESs.  */
+       for (ref = DF_INSN_USES (insn); *ref; ref++)
+         {
+           x = DF_REF_REG (*ref);
+           if (REG_P (x) && HARD_REGISTER_P (x))
+             SET_HARD_REG_BIT (uses, REGNO (x));
+         }
+      }
+}
+
+#endif
+
  #ifdef HAVE_return
-/* Insert gen_return at the end of block BB.  This also means updating
-   block_for_insn appropriately.  */
+/* Insert use of return register before the end of BB.  */
  
  static void
-emit_return_into_block (basic_block bb)
+emit_use_return_register_into_block (basic_block bb)
+{
+  rtx seq;
+  start_sequence ();
+  use_return_register ();
+  seq = get_insns ();
+  end_sequence ();
+  emit_insn_before (seq, BB_END (bb));
+}
+
+
+/* Create a return pattern, either simple_return or return, depending on
+   simple_p.  */
+
+static rtx
+gen_return_pattern (bool simple_p)
+{
+#ifdef HAVE_simple_return
+  return simple_p ? gen_simple_return () : gen_return ();
+#else
+  gcc_assert (!simple_p);
+  return gen_return ();
+#endif
+}
+
+/* Insert an appropriate return pattern at the end of block BB.  This
+   also means updating block_for_insn appropriately.  SIMPLE_P is
+   the same as in gen_return_pattern and passed to it.  */
+
+static void
+emit_return_into_block (bool simple_p, basic_block bb)
+{
+  rtx jump, pat;
+  jump = emit_jump_insn_after (gen_return_pattern (simple_p), BB_END (bb));
+  pat = PATTERN (jump);
+  if (GET_CODE (pat) == PARALLEL)
+    pat = XVECEXP (pat, 0, 0);
+  gcc_assert (ANY_RETURN_P (pat));
+  JUMP_LABEL (jump) = pat;
+}
+#endif
+
+/* Set JUMP_LABEL for a return insn.  */
+
+void
+set_return_jump_label (rtx returnjump)
+{
+  rtx pat = PATTERN (returnjump);
+  if (GET_CODE (pat) == PARALLEL)
+    pat = XVECEXP (pat, 0, 0);
+  if (ANY_RETURN_P (pat))
+    JUMP_LABEL (returnjump) = pat;
+  else
+    JUMP_LABEL (returnjump) = ret_rtx;
+}
+
+#ifdef HAVE_simple_return
+/* Create a copy of BB instructions and insert at BEFORE.  Redirect
+   preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE.  */
+static void
+dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx before,
+                       bitmap_head *need_prologue)
+{
+  edge_iterator ei;
+  edge e;
+  rtx insn = BB_END (bb);
+
+  /* We know BB has a single successor, so there is no need to copy a
+     simple jump at the end of BB.  */
+  if (simplejump_p (insn))
+    insn = PREV_INSN (insn);
+
+  start_sequence ();
+  duplicate_insn_chain (BB_HEAD (bb), insn);
+  if (dump_file)
+    {
+      unsigned count = 0;
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+       if (active_insn_p (insn))
+         ++count;
+      fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n",
+              bb->index, copy_bb->index, count);
+    }
+  insn = get_insns ();
+  end_sequence ();
+  emit_insn_before (insn, before);
+
+  /* Redirect all the paths that need no prologue into copy_bb.  */
+  for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
+    if (!bitmap_bit_p (need_prologue, e->src->index))
+      {
+       redirect_edge_and_branch_force (e, copy_bb);
+       continue;
+      }
+    else
+      ei_next (&ei);
+}
+#endif
+
+#if defined (HAVE_return) || defined (HAVE_simple_return)
+/* Return true if there are any active insns between HEAD and TAIL.  */
+static bool
+active_insn_between (rtx head, rtx tail)
+{
+  while (tail)
+    {
+      if (active_insn_p (tail))
+       return true;
+      if (tail == head)
+       return false;
+      tail = PREV_INSN (tail);
+    }
+  return false;
+}
+
+/* LAST_BB is a block that exits, and empty of active instructions.
+   Examine its predecessors for jumps that can be converted to
+   (conditional) returns.  */
+static VEC (edge, heap) *
+convert_jumps_to_returns (basic_block last_bb, bool simple_p,
+                         VEC (edge, heap) *unconverted ATTRIBUTE_UNUSED)
+{
+  int i;
+  basic_block bb;
+  rtx label;
+  edge_iterator ei;
+  edge e;
+  VEC(basic_block,heap) *src_bbs;
+
+  src_bbs = VEC_alloc (basic_block, heap, EDGE_COUNT (last_bb->preds));
+  FOR_EACH_EDGE (e, ei, last_bb->preds)
+    if (e->src != ENTRY_BLOCK_PTR)
+      VEC_quick_push (basic_block, src_bbs, e->src);
+
+  label = BB_HEAD (last_bb);
+
+  FOR_EACH_VEC_ELT (basic_block, src_bbs, i, bb)
+    {
+      rtx jump = BB_END (bb);
+
+      if (!JUMP_P (jump) || JUMP_LABEL (jump) != label)
+       continue;
+
+      e = find_edge (bb, last_bb);
+
+      /* If we have an unconditional jump, we can replace that
+        with a simple return instruction.  */
+      if (simplejump_p (jump))
+       {
+         /* The use of the return register might be present in the exit
+            fallthru block.  Either:
+            - removing the use is safe, and we should remove the use in
+            the exit fallthru block, or
+            - removing the use is not safe, and we should add it here.
+            For now, we conservatively choose the latter.  Either of the
+            2 helps in crossjumping.  */
+         emit_use_return_register_into_block (bb);
+
+         emit_return_into_block (simple_p, bb);
+         delete_insn (jump);
+       }
+
+      /* If we have a conditional jump branching to the last
+        block, we can try to replace that with a conditional
+        return instruction.  */
+      else if (condjump_p (jump))
+       {
+         rtx dest;
+
+         if (simple_p)
+           dest = simple_return_rtx;
+         else
+           dest = ret_rtx;
+         if (!redirect_jump (jump, dest, 0))
+           {
+#ifdef HAVE_simple_return
+             if (simple_p)
+               {
+                 if (dump_file)
+                   fprintf (dump_file,
+                            "Failed to redirect bb %d branch.\n", bb->index);
+                 VEC_safe_push (edge, heap, unconverted, e);
+               }
+#endif
+             continue;
+           }
+
+         /* See comment in simplejump_p case above.  */
+         emit_use_return_register_into_block (bb);
+
+         /* If this block has only one successor, it both jumps
+            and falls through to the fallthru block, so we can't
+            delete the edge.  */
+         if (single_succ_p (bb))
+           continue;
+       }
+      else
+       {
+#ifdef HAVE_simple_return
+         if (simple_p)
+           {
+             if (dump_file)
+               fprintf (dump_file,
+                        "Failed to redirect bb %d branch.\n", bb->index);
+             VEC_safe_push (edge, heap, unconverted, e);
+           }
+#endif
+         continue;
+       }
+
+      /* Fix up the CFG for the successful change we just made.  */
+      redirect_edge_succ (e, EXIT_BLOCK_PTR);
+      e->flags &= ~EDGE_CROSSING;
+    }
+  VEC_free (basic_block, heap, src_bbs);
+  return unconverted;
+}
+
+/* Emit a return insn for the exit fallthru block.  */
+static basic_block
+emit_return_for_exit (edge exit_fallthru_edge, bool simple_p)
  {
-  emit_jump_insn_after (gen_return (), BB_END (bb));
+  basic_block last_bb = exit_fallthru_edge->src;
+
+  if (JUMP_P (BB_END (last_bb)))
+    {
+      last_bb = split_edge (exit_fallthru_edge);
+      exit_fallthru_edge = single_succ_edge (last_bb);
+    }
+  emit_barrier_after (BB_END (last_bb));
+  emit_return_into_block (simple_p, last_bb);
+  exit_fallthru_edge->flags &= ~EDGE_FALLTHRU;
+  return last_bb;
  }
-#endif /* HAVE_return */
+#endif
+
  
  /* Generate the prologue and epilogue RTL if the machine supports it.  Thread
     this into place with notes indicating where the prologue ends and where
-   the epilogue begins.  Update the basic block information when possible.  */
+   the epilogue begins.  Update the basic block information when possible.
+
+   Notes on epilogue placement:
+   There are several kinds of edges to the exit block:
+   * a single fallthru edge from LAST_BB
+   * possibly, edges from blocks containing sibcalls
+   * possibly, fake edges from infinite loops
+
+   The epilogue is always emitted on the fallthru edge from the last basic
+   block in the function, LAST_BB, into the exit block.
+
+   If LAST_BB is empty except for a label, it is the target of every
+   other basic block in the function that ends in a return.  If a
+   target has a return or simple_return pattern (possibly with
+   conditional variants), these basic blocks can be changed so that a
+   return insn is emitted into them, and their target is adjusted to
+   the real exit block.
+
+   Notes on shrink wrapping: We implement a fairly conservative
+   version of shrink-wrapping rather than the textbook one.  We only
+   generate a single prologue and a single epilogue.  This is
+   sufficient to catch a number of interesting cases involving early
+   exits.
+
+   First, we identify the blocks that require the prologue to occur before
+   them.  These are the ones that modify a call-saved register, or reference
+   any of the stack or frame pointer registers.  To simplify things, we then
+   mark everything reachable from these blocks as also requiring a prologue.
+   This takes care of loops automatically, and avoids the need to examine
+   whether MEMs reference the frame, since it is sufficient to check for
+   occurrences of the stack or frame pointer.
+
+   We then compute the set of blocks for which the need for a prologue
+   is anticipatable (borrowing terminology from the shrink-wrapping
+   description in Muchnick's book).  These are the blocks which either
+   require a prologue themselves, or those that have only successors
+   where the prologue is anticipatable.  The prologue needs to be
+   inserted on all edges from BB1->BB2 where BB2 is in ANTIC and BB1
+   is not.  For the moment, we ensure that only one such edge exists.
+
+   The epilogue is placed as described above, but we make a
+   distinction between inserting return and simple_return patterns
+   when modifying other blocks that end in a return.  Blocks that end
+   in a sibcall omit the sibcall_epilogue if the block is not in
+   ANTIC.  */
  
  static void
  thread_prologue_and_epilogue_insns (void)
  {
-  int inserted = 0;
-  edge e;
-#if defined (HAVE_sibcall_epilogue) || defined (HAVE_epilogue) || defined (HAVE_return) || defined (HAVE_prologue)
-  rtx seq;
-#endif
-#if defined (HAVE_epilogue) || defined(HAVE_return)
-  rtx epilogue_end = NULL_RTX;
+  bool inserted;
+#ifdef HAVE_simple_return
+  VEC (edge, heap) *unconverted_simple_returns = NULL;
+  bool nonempty_prologue;
+  bitmap_head bb_flags;
+  unsigned max_grow_size;
  #endif
+  rtx returnjump;
+  rtx seq ATTRIBUTE_UNUSED, epilogue_end ATTRIBUTE_UNUSED;
+  rtx prologue_seq ATTRIBUTE_UNUSED, split_prologue_seq ATTRIBUTE_UNUSED;
+  edge e, entry_edge, orig_entry_edge, exit_fallthru_edge;
    edge_iterator ei;
  
+  df_analyze ();
+
    rtl_profile_for_bb (ENTRY_BLOCK_PTR);
+
+  inserted = false;
+  seq = NULL_RTX;
+  epilogue_end = NULL_RTX;
+  returnjump = NULL_RTX;
+
+  /* Can't deal with multiple successors of the entry block at the
+     moment.  Function should always have at least one entry
+     point.  */
+  gcc_assert (single_succ_p (ENTRY_BLOCK_PTR));
+  entry_edge = single_succ_edge (ENTRY_BLOCK_PTR);
+  orig_entry_edge = entry_edge;
+
+  split_prologue_seq = NULL_RTX;
+  if (flag_split_stack
+      && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
+         == NULL))
+    {
+#ifndef HAVE_split_stack_prologue
+      gcc_unreachable ();
+#else
+      gcc_assert (HAVE_split_stack_prologue);
+
+      start_sequence ();
+      emit_insn (gen_split_stack_prologue ());
+      split_prologue_seq = get_insns ();
+      end_sequence ();
+
+      record_insns (split_prologue_seq, NULL, &prologue_insn_hash);
+      set_insn_locators (split_prologue_seq, prologue_locator);
+#endif
+    }
+
+  prologue_seq = NULL_RTX;
  #ifdef HAVE_prologue
    if (HAVE_prologue)
      {
@@ -5112,132 +5867,454 @@ thread_prologue_and_epilogue_insns (void)
        record_insns (seq, NULL, &prologue_insn_hash);
        emit_note (NOTE_INSN_PROLOGUE_END);
  
-#ifndef PROFILE_BEFORE_PROLOGUE
        /* Ensure that instructions are not moved into the prologue when
          profiling is on.  The call to the profiling routine can be
          emitted within the live range of a call-clobbered register.  */
-      if (crtl->profile)
+      if (!targetm.profile_before_prologue () && crtl->profile)
          emit_insn (gen_blockage ());
-#endif
  
-      seq = get_insns ();
+      prologue_seq = get_insns ();
        end_sequence ();
-      set_insn_locators (seq, prologue_locator);
-
-      /* Can't deal with multiple successors of the entry block
-         at the moment.  Function should always have at least one
-         entry point.  */
-      gcc_assert (single_succ_p (ENTRY_BLOCK_PTR));
-
-      insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
-      inserted = 1;
+      set_insn_locators (prologue_seq, prologue_locator);
      }
  #endif
  
-  /* If the exit block has no non-fake predecessors, we don't need
-     an epilogue.  */
-  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
-    if ((e->flags & EDGE_FAKE) == 0)
-      break;
-  if (e == NULL)
-    goto epilogue_done;
+#ifdef HAVE_simple_return
+  bitmap_initialize (&bb_flags, &bitmap_default_obstack);
  
-  rtl_profile_for_bb (EXIT_BLOCK_PTR);
-#ifdef HAVE_return
-  if (optimize && HAVE_return)
+  /* Try to perform a kind of shrink-wrapping, making sure the
+     prologue/epilogue is emitted only around those parts of the
+     function that require it.  */
+
+  nonempty_prologue = false;
+  for (seq = prologue_seq; seq; seq = NEXT_INSN (seq))
+    if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END)
+      {
+       nonempty_prologue = true;
+       break;
+      }
+      
+  if (flag_shrink_wrap && HAVE_simple_return
+      && (targetm.profile_before_prologue () || !crtl->profile)
+      && nonempty_prologue && !crtl->calls_eh_return)
      {
-      /* If we're allowed to generate a simple return instruction,
-        then by definition we don't need a full epilogue.  Examine
-        the block that falls through to EXIT.   If it does not
-        contain any code, examine its predecessors and try to
-        emit (conditional) return instructions.  */
+      HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge;
+      HARD_REG_SET set_up_by_prologue;
+      rtx p_insn;
+      VEC(basic_block, heap) *vec;
+      basic_block bb;
+      bitmap_head bb_antic_flags;
+      bitmap_head bb_on_list;
+      bitmap_head bb_tail;
+
+      if (dump_file)
+       fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
+
+      /* Compute the registers set and used in the prologue.  */
+      CLEAR_HARD_REG_SET (prologue_clobbered);
+      CLEAR_HARD_REG_SET (prologue_used);
+      for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
+       {
+         HARD_REG_SET this_used;
+         if (!NONDEBUG_INSN_P (p_insn))
+           continue;
  
-      basic_block last;
-      rtx label;
+         CLEAR_HARD_REG_SET (this_used);
+         note_uses (&PATTERN (p_insn), record_hard_reg_uses,
+                    &this_used);
+         AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered);
+         IOR_HARD_REG_SET (prologue_used, this_used);
+         note_stores (PATTERN (p_insn), record_hard_reg_sets,
+                      &prologue_clobbered);
+       }
  
-      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
-       if (e->flags & EDGE_FALLTHRU)
-         break;
-      if (e == NULL)
-       goto epilogue_done;
-      last = e->src;
+      prepare_shrink_wrap (entry_edge->dest);
+
+      bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
+      bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
+      bitmap_initialize (&bb_tail, &bitmap_default_obstack);
+
+      /* Find the set of basic blocks that require a stack frame,
+        and blocks that are too big to be duplicated.  */
+
+      vec = VEC_alloc (basic_block, heap, n_basic_blocks);
+
+      CLEAR_HARD_REG_SET (set_up_by_prologue);
+      add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
+      add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
+      if (frame_pointer_needed)
+       add_to_hard_reg_set (&set_up_by_prologue, Pmode,
+                            HARD_FRAME_POINTER_REGNUM);
+      if (pic_offset_table_rtx)
+       add_to_hard_reg_set (&set_up_by_prologue, Pmode,
+                            PIC_OFFSET_TABLE_REGNUM);
+      if (stack_realign_drap && crtl->drap_reg)
+       add_to_hard_reg_set (&set_up_by_prologue, GET_MODE (crtl->drap_reg),
+                            REGNO (crtl->drap_reg));
+
+      /* We don't use a different max size depending on
+        optimize_bb_for_speed_p because increasing shrink-wrapping
+        opportunities by duplicating tail blocks can actually result
+        in an overall decrease in code size.  */
+      max_grow_size = get_uncond_jump_length ();
+      max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
+
+      FOR_EACH_BB (bb)
+       {
+         rtx insn;
+         unsigned size = 0;
+
+         FOR_BB_INSNS (bb, insn)
+           if (NONDEBUG_INSN_P (insn))
+             {
+               if (requires_stack_frame_p (insn, prologue_used,
+                                           set_up_by_prologue))
+                 {
+                   if (bb == entry_edge->dest)
+                     goto fail_shrinkwrap;
+                   bitmap_set_bit (&bb_flags, bb->index);
+                   VEC_quick_push (basic_block, vec, bb);
+                   break;
+                 }
+               else if (size <= max_grow_size)
+                 {
+                   size += get_attr_min_length (insn);
+                   if (size > max_grow_size)
+                     bitmap_set_bit (&bb_on_list, bb->index);
+                 }
+             }
+       }
+
+      /* Blocks that really need a prologue, or are too big for tails.  */
+      bitmap_ior_into (&bb_on_list, &bb_flags);
  
-      /* Verify that there are no active instructions in the last block.  */
-      label = BB_END (last);
-      while (label && !LABEL_P (label))
+      /* For every basic block that needs a prologue, mark all blocks
+        reachable from it, so as to ensure they are also seen as
+        requiring a prologue.  */
+      while (!VEC_empty (basic_block, vec))
         {
-         if (active_insn_p (label))
-           break;
-         label = PREV_INSN (label);
+         basic_block tmp_bb = VEC_pop (basic_block, vec);
+
+         FOR_EACH_EDGE (e, ei, tmp_bb->succs)
+           if (e->dest != EXIT_BLOCK_PTR
+               && bitmap_set_bit (&bb_flags, e->dest->index))
+             VEC_quick_push (basic_block, vec, e->dest);
         }
  
-      if (BB_HEAD (last) == label && LABEL_P (label))
+      /* Find the set of basic blocks that need no prologue, have a
+        single successor, can be duplicated, meet a max size
+        requirement, and go to the exit via like blocks.  */
+      VEC_quick_push (basic_block, vec, EXIT_BLOCK_PTR);
+      while (!VEC_empty (basic_block, vec))
         {
-         edge_iterator ei2;
+         basic_block tmp_bb = VEC_pop (basic_block, vec);
  
-         for (ei2 = ei_start (last->preds); (e = ei_safe_edge (ei2)); )
-           {
-             basic_block bb = e->src;
-             rtx jump;
+         FOR_EACH_EDGE (e, ei, tmp_bb->preds)
+           if (single_succ_p (e->src)
+               && !bitmap_bit_p (&bb_on_list, e->src->index)
+               && can_duplicate_block_p (e->src))
+             {
+               edge pe;
+               edge_iterator pei;
+
+               /* If there is predecessor of e->src which doesn't
+                  need prologue and the edge is complex,
+                  we might not be able to redirect the branch
+                  to a copy of e->src.  */
+               FOR_EACH_EDGE (pe, pei, e->src->preds)
+                 if ((pe->flags & EDGE_COMPLEX) != 0
+                     && !bitmap_bit_p (&bb_flags, pe->src->index))
+                   break;
+               if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index))
+                 VEC_quick_push (basic_block, vec, e->src);
+             }
+       }
+
+      /* Now walk backwards from every block that is marked as needing
+        a prologue to compute the bb_antic_flags bitmap.  Exclude
+        tail blocks; They can be duplicated to be used on paths not
+        needing a prologue.  */
+      bitmap_clear (&bb_on_list);
+      bitmap_and_compl (&bb_antic_flags, &bb_flags, &bb_tail);
+      FOR_EACH_BB (bb)
+       {
+         if (!bitmap_bit_p (&bb_antic_flags, bb->index))
+           continue;
+         FOR_EACH_EDGE (e, ei, bb->preds)
+           if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
+               && bitmap_set_bit (&bb_on_list, e->src->index))
+             VEC_quick_push (basic_block, vec, e->src);
+       }
+      while (!VEC_empty (basic_block, vec))
+       {
+         basic_block tmp_bb = VEC_pop (basic_block, vec);
+         bool all_set = true;
  
-             if (bb == ENTRY_BLOCK_PTR)
+         bitmap_clear_bit (&bb_on_list, tmp_bb->index);
+         FOR_EACH_EDGE (e, ei, tmp_bb->succs)
+           if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
+             {
+               all_set = false;
+               break;
+             }
+
+         if (all_set)
+           {
+             bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
+             FOR_EACH_EDGE (e, ei, tmp_bb->preds)
+               if (!bitmap_bit_p (&bb_antic_flags, e->src->index)
+                   && bitmap_set_bit (&bb_on_list, e->src->index))
+                 VEC_quick_push (basic_block, vec, e->src);
+           }
+       }
+      /* Find exactly one edge that leads to a block in ANTIC from
+        a block that isn't.  */
+      if (!bitmap_bit_p (&bb_antic_flags, entry_edge->dest->index))
+       FOR_EACH_BB (bb)
+         {
+           if (!bitmap_bit_p (&bb_antic_flags, bb->index))
+             continue;
+           FOR_EACH_EDGE (e, ei, bb->preds)
+             if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
                 {
-                 ei_next (&ei2);
-                 continue;
+                 if (entry_edge != orig_entry_edge)
+                   {
+                     entry_edge = orig_entry_edge;
+                     if (dump_file)
+                       fprintf (dump_file, "More than one candidate edge.\n");
+                     goto fail_shrinkwrap;
+                   }
+                 if (dump_file)
+                   fprintf (dump_file, "Found candidate edge for "
+                            "shrink-wrapping, %d->%d.\n", e->src->index,
+                            e->dest->index);
+                 entry_edge = e;
                 }
+         }
  
-             jump = BB_END (bb);
-             if (!JUMP_P (jump) || JUMP_LABEL (jump) != label)
-               {
-                 ei_next (&ei2);
+      if (entry_edge != orig_entry_edge)
+       {
+         /* Test whether the prologue is known to clobber any register
+            (other than FP or SP) which are live on the edge.  */
+         CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
+         if (frame_pointer_needed)
+           CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
+         CLEAR_HARD_REG_SET (live_on_edge);
+         reg_set_to_hard_reg_set (&live_on_edge,
+                                  df_get_live_in (entry_edge->dest));
+         if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
+           {
+             entry_edge = orig_entry_edge;
+             if (dump_file)
+               fprintf (dump_file,
+                        "Shrink-wrapping aborted due to clobber.\n");
+           }
+       }
+      if (entry_edge != orig_entry_edge)
+       {
+         crtl->shrink_wrapped = true;
+         if (dump_file)
+           fprintf (dump_file, "Performing shrink-wrapping.\n");
+
+         /* Find tail blocks reachable from both blocks needing a
+            prologue and blocks not needing a prologue.  */
+         if (!bitmap_empty_p (&bb_tail))
+           FOR_EACH_BB (bb)
+             {
+               bool some_pro, some_no_pro;
+               if (!bitmap_bit_p (&bb_tail, bb->index))
                   continue;
-               }
+               some_pro = some_no_pro = false;
+               FOR_EACH_EDGE (e, ei, bb->preds)
+                 {
+                   if (bitmap_bit_p (&bb_flags, e->src->index))
+                     some_pro = true;
+                   else
+                     some_no_pro = true;
+                 }
+               if (some_pro && some_no_pro)
+                 VEC_quick_push (basic_block, vec, bb);
+               else
+                 bitmap_clear_bit (&bb_tail, bb->index);
+             }
+         /* Find the head of each tail.  */
+         while (!VEC_empty (basic_block, vec))
+           {
+             basic_block tbb = VEC_pop (basic_block, vec);
  
-             /* If we have an unconditional jump, we can replace that
-                with a simple return instruction.  */
-             if (simplejump_p (jump))
-               {
-                 emit_return_into_block (bb);
-                 delete_insn (jump);
-               }
+             if (!bitmap_bit_p (&bb_tail, tbb->index))
+               continue;
  
-             /* If we have a conditional jump, we can try to replace
-                that with a conditional return instruction.  */
-             else if (condjump_p (jump))
+             while (single_succ_p (tbb))
                 {
-                 if (! redirect_jump (jump, 0, 0))
-                   {
-                     ei_next (&ei2);
-                     continue;
-                   }
-
-                 /* If this block has only one successor, it both jumps
-                    and falls through to the fallthru block, so we can't
-                    delete the edge.  */
-                 if (single_succ_p (bb))
-                   {
-                     ei_next (&ei2);
-                     continue;
-                   }
+                 tbb = single_succ (tbb);
+                 bitmap_clear_bit (&bb_tail, tbb->index);
                 }
-             else
-               {
-                 ei_next (&ei2);
+           }
+         /* Now duplicate the tails.  */
+         if (!bitmap_empty_p (&bb_tail))
+           FOR_EACH_BB_REVERSE (bb)
+             {
+               basic_block copy_bb, tbb;
+               rtx insert_point;
+               int eflags;
+
+               if (!bitmap_clear_bit (&bb_tail, bb->index))
                   continue;
-               }
  
-             /* Fix up the CFG for the successful change we just made.  */
-             redirect_edge_succ (e, EXIT_BLOCK_PTR);
+               /* Create a copy of BB, instructions and all, for
+                  use on paths that don't need a prologue.
+                  Ideal placement of the copy is on a fall-thru edge
+                  or after a block that would jump to the copy.  */ 
+               FOR_EACH_EDGE (e, ei, bb->preds)
+                 if (!bitmap_bit_p (&bb_flags, e->src->index)
+                     && single_succ_p (e->src))
+                   break;
+               if (e)
+                 {
+                   copy_bb = create_basic_block (NEXT_INSN (BB_END (e->src)),
+                                                 NULL_RTX, e->src);
+                   BB_COPY_PARTITION (copy_bb, e->src);
+                 }
+               else
+                 {
+                   /* Otherwise put the copy at the end of the function.  */
+                   copy_bb = create_basic_block (NULL_RTX, NULL_RTX,
+                                                 EXIT_BLOCK_PTR->prev_bb);
+                   BB_COPY_PARTITION (copy_bb, bb);
+                 }
+
+               insert_point = emit_note_after (NOTE_INSN_DELETED,
+                                               BB_END (copy_bb));
+               emit_barrier_after (BB_END (copy_bb));
+
+               tbb = bb;
+               while (1)
+                 {
+                   dup_block_and_redirect (tbb, copy_bb, insert_point,
+                                           &bb_flags);
+                   tbb = single_succ (tbb);
+                   if (tbb == EXIT_BLOCK_PTR)
+                     break;
+                   e = split_block (copy_bb, PREV_INSN (insert_point));
+                   copy_bb = e->dest;
+                 }
+
+               /* Quiet verify_flow_info by (ab)using EDGE_FAKE.
+                  We have yet to add a simple_return to the tails,
+                  as we'd like to first convert_jumps_to_returns in
+                  case the block is no longer used after that.  */
+               eflags = EDGE_FAKE;
+               if (CALL_P (PREV_INSN (insert_point))
+                   && SIBLING_CALL_P (PREV_INSN (insert_point)))
+                 eflags = EDGE_SIBCALL | EDGE_ABNORMAL;
+               make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR, eflags);
+
+               /* verify_flow_info doesn't like a note after a
+                  sibling call.  */
+               delete_insn (insert_point);
+               if (bitmap_empty_p (&bb_tail))
+                 break;
+             }
+       }
+
+    fail_shrinkwrap:
+      bitmap_clear (&bb_tail);
+      bitmap_clear (&bb_antic_flags);
+      bitmap_clear (&bb_on_list);
+      VEC_free (basic_block, heap, vec);
+    }
+#endif
+
+  if (split_prologue_seq != NULL_RTX)
+    {
+      insert_insn_on_edge (split_prologue_seq, orig_entry_edge);
+      inserted = true;
+    }
+  if (prologue_seq != NULL_RTX)
+    {
+      insert_insn_on_edge (prologue_seq, entry_edge);
+      inserted = true;
+    }
+
+  /* If the exit block has no non-fake predecessors, we don't need
+     an epilogue.  */
+  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+    if ((e->flags & EDGE_FAKE) == 0)
+      break;
+  if (e == NULL)
+    goto epilogue_done;
+
+  rtl_profile_for_bb (EXIT_BLOCK_PTR);
+
+  exit_fallthru_edge = find_fallthru_edge (EXIT_BLOCK_PTR->preds);
+
+  /* If we're allowed to generate a simple return instruction, then by
+     definition we don't need a full epilogue.  If the last basic
+     block before the exit block does not contain active instructions,
+     examine its predecessors and try to emit (conditional) return
+     instructions.  */
+#ifdef HAVE_simple_return
+  if (entry_edge != orig_entry_edge)
+    {
+      if (optimize)
+       {
+         unsigned i, last;
+
+         /* convert_jumps_to_returns may add to EXIT_BLOCK_PTR->preds
+            (but won't remove).  Stop at end of current preds.  */
+         last = EDGE_COUNT (EXIT_BLOCK_PTR->preds);
+         for (i = 0; i < last; i++)
+           {
+             e = EDGE_I (EXIT_BLOCK_PTR->preds, i);
+             if (LABEL_P (BB_HEAD (e->src))
+                 && !bitmap_bit_p (&bb_flags, e->src->index)
+                 && !active_insn_between (BB_HEAD (e->src), BB_END (e->src)))
+               unconverted_simple_returns
+                 = convert_jumps_to_returns (e->src, true,
+                                             unconverted_simple_returns);
             }
+       }
+
+      if (exit_fallthru_edge != NULL
+         && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0
+         && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index))
+       {
+         basic_block last_bb;
+
+         last_bb = emit_return_for_exit (exit_fallthru_edge, true);
+         returnjump = BB_END (last_bb);
+         exit_fallthru_edge = NULL;
+       }
+    }
+#endif
+#ifdef HAVE_return
+  if (HAVE_return)
+    {
+      if (exit_fallthru_edge == NULL)
+       goto epilogue_done;
+
+      if (optimize)
+       {
+         basic_block last_bb = exit_fallthru_edge->src;
  
-         /* Emit a return insn for the exit fallthru block.  Whether
-            this is still reachable will be determined later.  */
+         if (LABEL_P (BB_HEAD (last_bb))
+             && !active_insn_between (BB_HEAD (last_bb), BB_END (last_bb)))
+           convert_jumps_to_returns (last_bb, false, NULL);
  
-         emit_barrier_after (BB_END (last));
-         emit_return_into_block (last);
-         epilogue_end = BB_END (last);
-         single_succ_edge (last)->flags &= ~EDGE_FALLTHRU;
-         goto epilogue_done;
+         if (EDGE_COUNT (last_bb->preds) != 0
+             && single_succ_p (last_bb))
+           {
+             last_bb = emit_return_for_exit (exit_fallthru_edge, false);
+             epilogue_end = returnjump = BB_END (last_bb);
+#ifdef HAVE_simple_return
+             /* Emitting the return may add a basic block.
+                Fix bb_flags for the added block.  */
+             if (last_bb != exit_fallthru_edge->src)
+               bitmap_set_bit (&bb_flags, last_bb->index);
+#endif
+             goto epilogue_done;
+           }
         }
      }
  #endif
@@ -5273,15 +6350,10 @@ thread_prologue_and_epilogue_insns (void)
      }
  #endif
  
-  /* Find the edge that falls through to EXIT.  Other edges may exist
-     due to RETURN instructions, but those don't need epilogues.
-     There really shouldn't be a mixture -- either all should have
-     been converted or none, however...  */
+  /* If nothing falls through into the exit block, we don't need an
+     epilogue.  */
  
-  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
-    if (e->flags & EDGE_FALLTHRU)
-      break;
-  if (e == NULL)
+  if (exit_fallthru_edge == NULL)
      goto epilogue_done;
  
  #ifdef HAVE_epilogue
@@ -5290,32 +6362,37 @@ thread_prologue_and_epilogue_insns (void)
        start_sequence ();
        epilogue_end = emit_note (NOTE_INSN_EPILOGUE_BEG);
        seq = gen_epilogue ();
-      emit_jump_insn (seq);
+      if (seq)
+       emit_jump_insn (seq);
  
        /* Retain a map of the epilogue insns.  */
        record_insns (seq, NULL, &epilogue_insn_hash);
        set_insn_locators (seq, epilogue_locator);
  
        seq = get_insns ();
+      returnjump = get_last_insn ();
        end_sequence ();
  
-      insert_insn_on_edge (seq, e);
-      inserted = 1;
+      insert_insn_on_edge (seq, exit_fallthru_edge);
+      inserted = true;
+
+      if (JUMP_P (returnjump))
+       set_return_jump_label (returnjump);
      }
    else
  #endif
      {
        basic_block cur_bb;
  
-      if (! next_active_insn (BB_END (e->src)))
+      if (! next_active_insn (BB_END (exit_fallthru_edge->src)))
         goto epilogue_done;
        /* We have a fall-through edge to the exit block, the source is not
           at the end of the function, and there will be an assembler epilogue
           at the end of the function.
           We can't use force_nonfallthru here, because that would try to
-         use return.  Inserting a jump 'by hand' is extremely messy, so
+        use return.  Inserting a jump 'by hand' is extremely messy, so
          we take advantage of cfg_layout_finalize using
-       fixup_fallthru_exit_predecessor.  */
+        fixup_fallthru_exit_predecessor.  */
        cfg_layout_initialize (0);
        FOR_EACH_BB (cur_bb)
         if (cur_bb->index >= NUM_FIXED_BLOCKS
@@ -5323,13 +6400,25 @@ thread_prologue_and_epilogue_insns (void)
           cur_bb->aux = cur_bb->next_bb;
        cfg_layout_finalize ();
      }
+
  epilogue_done:
+
    default_rtl_profile ();
  
    if (inserted)
      {
+      sbitmap blocks;
+
        commit_edge_insertions ();
  
+      /* Look for basic blocks within the prologue insns.  */
+      blocks = sbitmap_alloc (last_basic_block);
+      sbitmap_zero (blocks);
+      SET_BIT (blocks, entry_edge->dest->index);
+      SET_BIT (blocks, orig_entry_edge->dest->index);
+      find_many_sub_basic_blocks (blocks);
+      sbitmap_free (blocks);
+
        /* The epilogue insns we inserted may cause the exit edge to no longer
          be fallthru.  */
        FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
@@ -5340,33 +6429,138 @@ epilogue_done:
         }
      }
  
+#ifdef HAVE_simple_return
+  /* If there were branches to an empty LAST_BB which we tried to
+     convert to conditional simple_returns, but couldn't for some
+     reason, create a block to hold a simple_return insn and redirect
+     those remaining edges.  */
+  if (!VEC_empty (edge, unconverted_simple_returns))
+    {
+      basic_block simple_return_block_hot = NULL;
+      basic_block simple_return_block_cold = NULL;
+      edge pending_edge_hot = NULL;
+      edge pending_edge_cold = NULL;
+      basic_block exit_pred = EXIT_BLOCK_PTR->prev_bb;
+      int i;
+
+      gcc_assert (entry_edge != orig_entry_edge);
+
+      /* See if we can reuse the last insn that was emitted for the
+        epilogue.  */
+      if (returnjump != NULL_RTX
+         && JUMP_LABEL (returnjump) == simple_return_rtx)
+       {
+         e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump));
+         if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
+           simple_return_block_hot = e->dest;
+         else
+           simple_return_block_cold = e->dest;
+       }
+
+      /* Also check returns we might need to add to tail blocks.  */
+      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+       if (EDGE_COUNT (e->src->preds) != 0
+           && (e->flags & EDGE_FAKE) != 0
+           && !bitmap_bit_p (&bb_flags, e->src->index))
+         {
+           if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
+             pending_edge_hot = e;
+           else
+             pending_edge_cold = e;
+         }
+
+      FOR_EACH_VEC_ELT (edge, unconverted_simple_returns, i, e)
+       {
+         basic_block *pdest_bb;
+         edge pending;
+
+         if (BB_PARTITION (e->src) == BB_HOT_PARTITION)
+           {
+             pdest_bb = &simple_return_block_hot;
+             pending = pending_edge_hot;
+           }
+         else
+           {
+             pdest_bb = &simple_return_block_cold;
+             pending = pending_edge_cold;
+           }
+
+         if (*pdest_bb == NULL && pending != NULL)
+           {
+             emit_return_into_block (true, pending->src);
+             pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
+             *pdest_bb = pending->src;
+           }
+         else if (*pdest_bb == NULL)
+           {
+             basic_block bb;
+             rtx start;
+
+             bb = create_basic_block (NULL, NULL, exit_pred);
+             BB_COPY_PARTITION (bb, e->src);
+             start = emit_jump_insn_after (gen_simple_return (),
+                                           BB_END (bb));
+             JUMP_LABEL (start) = simple_return_rtx;
+             emit_barrier_after (start);
+
+             *pdest_bb = bb;
+             make_edge (bb, EXIT_BLOCK_PTR, 0);
+           }
+         redirect_edge_and_branch_force (e, *pdest_bb);
+       }
+      VEC_free (edge, heap, unconverted_simple_returns);
+    }
+
+  if (entry_edge != orig_entry_edge)
+    {
+      FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+       if (EDGE_COUNT (e->src->preds) != 0
+           && (e->flags & EDGE_FAKE) != 0
+           && !bitmap_bit_p (&bb_flags, e->src->index))
+         {
+           emit_return_into_block (true, e->src);
+           e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE);
+         }
+    }
+#endif
+
  #ifdef HAVE_sibcall_epilogue
    /* Emit sibling epilogues before any sibling call sites.  */
    for (ei = ei_start (EXIT_BLOCK_PTR->preds); (e = ei_safe_edge (ei)); )
      {
        basic_block bb = e->src;
        rtx insn = BB_END (bb);
+      rtx ep_seq;
  
        if (!CALL_P (insn)
-         || ! SIBLING_CALL_P (insn))
+         || ! SIBLING_CALL_P (insn)
+#ifdef HAVE_simple_return
+         || (entry_edge != orig_entry_edge
+             && !bitmap_bit_p (&bb_flags, bb->index))
+#endif
+         )
         {
           ei_next (&ei);
           continue;
         }
  
-      start_sequence ();
-      emit_note (NOTE_INSN_EPILOGUE_BEG);
-      emit_insn (gen_sibcall_epilogue ());
-      seq = get_insns ();
-      end_sequence ();
+      ep_seq = gen_sibcall_epilogue ();
+      if (ep_seq)
+       {
+         start_sequence ();
+         emit_note (NOTE_INSN_EPILOGUE_BEG);
+         emit_insn (ep_seq);
+         seq = get_insns ();
+         end_sequence ();
  
-      /* Retain a map of the epilogue insns.  Used in life analysis to
-        avoid getting rid of sibcall epilogue insns.  Do this before we
-        actually emit the sequence.  */
-      record_insns (seq, NULL, &epilogue_insn_hash);
-      set_insn_locators (seq, epilogue_locator);
+         /* Retain a map of the epilogue insns.  Used in life analysis to
+            avoid getting rid of sibcall epilogue insns.  Do this before we
+            actually emit the sequence.  */
+         record_insns (seq, NULL, &epilogue_insn_hash);
+         set_insn_locators (seq, epilogue_locator);
  
-      emit_insn_before (seq, insn);
+         emit_insn_before (seq, insn);
+       }
        ei_next (&ei);
      }
  #endif
@@ -5391,6 +6585,10 @@ epilogue_done:
      }
  #endif
  
+#ifdef HAVE_simple_return
+  bitmap_clear (&bb_flags);
+#endif
+
    /* Threading the prologue and epilogue changes the artificial refs
       in the entry and exit blocks.  */
    epilogue_completed = 1;
@@ -5551,6 +6749,8 @@ used_types_insert (tree t)
        break;
      else
        t = TREE_TYPE (t);
+  if (TREE_CODE (t) == ERROR_MARK)
+    return;
    if (TYPE_NAME (t) == NULL_TREE
        || TYPE_NAME (t) == TYPE_NAME (TYPE_MAIN_VARIANT (t)))
      t = TYPE_MAIN_VARIANT (t);
@@ -5653,12 +6853,17 @@ rest_of_handle_thread_prologue_and_epilogue (void)
  {
    if (optimize)
      cleanup_cfg (CLEANUP_EXPENSIVE);
+
    /* On some machines, the prologue and epilogue code, or parts thereof,
       can be represented as RTL.  Doing so lets us schedule insns between
       it and the rest of the code and also allows delayed branch
       scheduling to operate in the epilogue.  */
-
    thread_prologue_and_epilogue_insns ();
+
+  /* The stack usage info is finalized during prologue expansion.  */
+  if (flag_stack_usage_info)
+    output_stack_usage ();
+
    return 0;
  }
  
@@ -5677,7 +6882,6 @@ struct rtl_opt_pass pass_thread_prologue_and_epilogue =
    0,                                    /* properties_provided */
    0,                                    /* properties_destroyed */
    TODO_verify_flow,                     /* todo_flags_start */
-  TODO_dump_func |
    TODO_df_verify |
    TODO_df_finish | TODO_verify_rtl_sharing |
    TODO_ggc_collect                      /* todo_flags_finish */
@@ -5879,7 +7083,7 @@ struct rtl_opt_pass pass_match_asm_constraints =
    0,                                    /* properties_provided */
    0,                                    /* properties_destroyed */
    0,                                   /* todo_flags_start */
-  TODO_dump_func                       /* todo_flags_finish */
+  0                                     /* todo_flags_finish */
   }
  };