PR target/6512, PR target/5628

[pf3gnuchains/gcc-fork.git] / gcc / config / sparc / sparc.h
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h

index d48b8dd..d70f542 100644 (file)
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -122,9 +122,11 @@ extern enum cmodel sparc_cmodel;
  #define TARGET_CPU_sparcv9     7       /* alias */
  #define TARGET_CPU_sparc64     7       /* alias */
  #define TARGET_CPU_ultrasparc  8
+#define TARGET_CPU_ultrasparc3 9
  
  #if TARGET_CPU_DEFAULT == TARGET_CPU_v9 \
- || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc \
+ || TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
  
  #define CPP_CPU32_DEFAULT_SPEC ""
  #define ASM_CPU32_DEFAULT_SPEC ""
@@ -141,6 +143,10 @@ extern enum cmodel sparc_cmodel;
  #define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
  #define ASM_CPU64_DEFAULT_SPEC "-Av9a"
  #endif
+#if TARGET_CPU_DEFAULT == TARGET_CPU_ultrasparc3
+#define CPP_CPU64_DEFAULT_SPEC "-D__sparc_v9__"
+#define ASM_CPU64_DEFAULT_SPEC "-Av9b"
+#endif
  
  #else
  
@@ -230,6 +236,7 @@ Unrecognized value in TARGET_CPU_DEFAULT.
  %{mcpu=sparclite86x:-D__sparclite86x__} \
  %{mcpu=v9:-D__sparc_v9__} \
  %{mcpu=ultrasparc:-D__sparc_v9__} \
+%{mcpu=ultrasparc3:-D__sparc_v9__} \
  %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(cpp_cpu_default)}}}}}}} \
  "
  
@@ -296,6 +303,7 @@ Unrecognized value in TARGET_CPU_DEFAULT.
  %{mv8plus:-Av8plus} \
  %{mcpu=v9:-Av9} \
  %{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
+%{mcpu=ultrasparc3:%{!mv8plus:-Av9b}} \
  %{!mcpu*:%{!mcypress:%{!msparclite:%{!mf930:%{!mf934:%{!mv8:%{!msupersparc:%(asm_cpu_default)}}}}}}} \
  "
  
@@ -623,7 +631,8 @@ enum processor_type {
    PROCESSOR_SPARCLET,
    PROCESSOR_TSC701,
    PROCESSOR_V9,
-  PROCESSOR_ULTRASPARC
+  PROCESSOR_ULTRASPARC,
+  PROCESSOR_ULTRASPARC3
  };
  
  /* This is set from -m{cpu,tune}=xxx.  */
@@ -1414,7 +1423,10 @@ extern const char leaf_reg_remap[];
  
     We need a temporary when loading/storing a HImode/QImode value
     between memory and the FPU registers.  This can happen when combine puts
-   a paradoxical subreg in a float/fix conversion insn.  */
+   a paradoxical subreg in a float/fix conversion insn.
+
+   We need a temporary when loading/storing a DFmode value between
+   unaligned memory and the upper FPU registers.  */
  
  #define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, IN)          \
    ((FP_REG_CLASS_P (CLASS)                                     \
@@ -1423,28 +1435,36 @@ extern const char leaf_reg_remap[];
          || ((GET_CODE (IN) == REG || GET_CODE (IN) == SUBREG)  \
              && true_regnum (IN) == -1)))                       \
     ? GENERAL_REGS                                              \
-   : (((TARGET_CM_MEDANY                                       \
-        && symbolic_operand ((IN), (MODE)))                    \
-       || (TARGET_CM_EMBMEDANY                                 \
-           && text_segment_operand ((IN), (MODE))))            \
-      && !flag_pic)                                            \
-     ? GENERAL_REGS                                            \
-     : NO_REGS)
+   : ((CLASS) == EXTRA_FP_REGS && (MODE) == DFmode             \
+      && GET_CODE (IN) == MEM && TARGET_ARCH32                 \
+      && ! mem_min_alignment ((IN), 8))                                \
+     ? FP_REGS                                                 \
+     : (((TARGET_CM_MEDANY                                     \
+         && symbolic_operand ((IN), (MODE)))                   \
+        || (TARGET_CM_EMBMEDANY                                \
+            && text_segment_operand ((IN), (MODE))))           \
+       && !flag_pic)                                           \
+       ? GENERAL_REGS                                          \
+       : NO_REGS)
  
  #define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, IN)         \
-   ((FP_REG_CLASS_P (CLASS)                                    \
+  ((FP_REG_CLASS_P (CLASS)                                     \
       && ((MODE) == HImode || (MODE) == QImode)                 \
       && (GET_CODE (IN) == MEM                                  \
           || ((GET_CODE (IN) == REG || GET_CODE (IN) == SUBREG) \
               && true_regnum (IN) == -1)))                      \
-    ? GENERAL_REGS                                             \
-   : (((TARGET_CM_MEDANY                                       \
-        && symbolic_operand ((IN), (MODE)))                    \
-       || (TARGET_CM_EMBMEDANY                                 \
-           && text_segment_operand ((IN), (MODE))))            \
-      && !flag_pic)                                            \
-     ? GENERAL_REGS                                            \
-     : NO_REGS)
+   ? GENERAL_REGS                                              \
+   : ((CLASS) == EXTRA_FP_REGS && (MODE) == DFmode             \
+      && GET_CODE (IN) == MEM && TARGET_ARCH32                 \
+      && ! mem_min_alignment ((IN), 8))                                \
+     ? FP_REGS                                                 \
+     : (((TARGET_CM_MEDANY                                     \
+         && symbolic_operand ((IN), (MODE)))                   \
+        || (TARGET_CM_EMBMEDANY                                \
+            && text_segment_operand ((IN), (MODE))))           \
+       && !flag_pic)                                           \
+       ? GENERAL_REGS                                          \
+       : NO_REGS)
  
  /* On SPARC it is not possible to directly move data between 
     GENERAL_REGS and FP_REGS.  */
@@ -1902,8 +1922,25 @@ do {                                                                     \
  #define STRICT_ARGUMENT_NAMING TARGET_V9
  
  /* We do not allow sibling calls if -mflat, nor
-   we do not allow indirect calls to be optimized into sibling calls.  */
-#define FUNCTION_OK_FOR_SIBCALL(DECL) (DECL && ! TARGET_FLAT)
+   we do not allow indirect calls to be optimized into sibling calls.
+
+   Also, on sparc 32-bit we cannot emit a sibling call when the
+   current function returns a structure.  This is because the "unimp
+   after call" convention would cause the callee to return to the
+   wrong place.  The generic code already disallows cases where the
+   function being called returns a structure.
+
+   It may seem strange how this last case could occur.  Usually there
+   is code after the call which jumps to epilogue code which dumps the
+   return value into the struct return area.  That ought to invalidate
+   the sibling call right?  Well, in the c++ case we can end up passing
+   the pointer to the struct return area to a constructor (which returns
+   void) and then nothing else happens.  Such a sibling call would look
+   valid without the added check here.  */
+#define FUNCTION_OK_FOR_SIBCALL(DECL) \
+       (DECL \
+        && ! TARGET_FLAT \
+        && (TARGET_ARCH64 || ! current_function_returns_struct))
  
  /* Generate RTL to flush the register windows so as to make arbitrary frames
     available.  */
@@ -2605,7 +2642,25 @@ do {                                                                    \
    (((FP_REG_CLASS_P (CLASS1) && GENERAL_OR_I64 (CLASS2)) \
      || (GENERAL_OR_I64 (CLASS1) && FP_REG_CLASS_P (CLASS2)) \
      || (CLASS1) == FPCC_REGS || (CLASS2) == FPCC_REGS)         \
-   ? (sparc_cpu == PROCESSOR_ULTRASPARC ? 12 : 6) : 2)
+   ? ((sparc_cpu == PROCESSOR_ULTRASPARC \
+       || sparc_cpu == PROCESSOR_ULTRASPARC3) ? 12 : 6) : 2)
+
+/* Provide the cost of a branch.  For pre-v9 processors we use
+   a value of 3 to take into account the potential annulling of
+   the delay slot (which ends up being a bubble in the pipeline slot)
+   plus a cycle to take into consideration the instruction cache
+   effects.
+
+   On v9 and later, which have branch prediction facilities, we set
+   it to the depth of the pipeline as that is the cost of a
+   mispredicted branch.  */
+
+#define BRANCH_COST \
+       ((sparc_cpu == PROCESSOR_V9 \
+         || sparc_cpu == PROCESSOR_ULTRASPARC) \
+        ? 7 \
+         : (sparc_cpu == PROCESSOR_ULTRASPARC3 \
+            ? 9 : 3))
  
  /* Provide the costs of a rtl expression.  This is in the body of a
     switch on CODE.  The purpose for the cost of MULT is to encourage
@@ -2619,6 +2674,8 @@ do {                                                                    \
      if (sparc_cpu == PROCESSOR_ULTRASPARC)             \
        return (GET_MODE (X) == DImode ?                 \
                COSTS_N_INSNS (34) : COSTS_N_INSNS (19));        \
+    if (sparc_cpu == PROCESSOR_ULTRASPARC3)            \
+      return COSTS_N_INSNS (6);                                \
      return TARGET_HARD_MUL ? COSTS_N_INSNS (5) : COSTS_N_INSNS (25); \
    case DIV:                                            \
    case UDIV:                                           \
@@ -2627,12 +2684,26 @@ do {                                                                    \
      if (sparc_cpu == PROCESSOR_ULTRASPARC)             \
        return (GET_MODE (X) == DImode ?                 \
                COSTS_N_INSNS (68) : COSTS_N_INSNS (37));        \
+    if (sparc_cpu == PROCESSOR_ULTRASPARC3)            \
+      return (GET_MODE (X) == DImode ?                 \
+              COSTS_N_INSNS (71) : COSTS_N_INSNS (40));        \
      return COSTS_N_INSNS (25);                         \
    /* Make FLOAT and FIX more expensive than CONST_DOUBLE,\
       so that cse will favor the latter.  */            \
    case FLOAT:                                          \
    case FIX:                                            \
      return 19;
+
+#define PREFETCH_BLOCK \
+       ((sparc_cpu == PROCESSOR_ULTRASPARC \
+          || sparc_cpu == PROCESSOR_ULTRASPARC3) \
+         ? 64 : 32)
+
+#define SIMULTANEOUS_PREFETCHES \
+       ((sparc_cpu == PROCESSOR_ULTRASPARC) \
+         ? 2 \
+         : (sparc_cpu == PROCESSOR_ULTRASPARC3 \
+            ? 8 : 3))
  \f
  /* Control the assembler format that we output.  */
  
@@ -2947,6 +3018,7 @@ do {                                                                      \
  #define PREDICATE_CODES                                                        \
  {"reg_or_0_operand", {SUBREG, REG, CONST_INT, CONST_DOUBLE}},          \
  {"fp_zero_operand", {CONST_DOUBLE}},                                   \
+{"fp_register_operand", {SUBREG, REG}},                                        \
  {"intreg_operand", {SUBREG, REG}},                                     \
  {"fcc_reg_operand", {REG}},                                            \
  {"fcc0_reg_operand", {REG}},                                           \