OSDN Git Service

Add -mveclibabi=mass
authormeissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 23 Aug 2010 16:41:10 +0000 (16:41 +0000)
committermeissner <meissner@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 23 Aug 2010 16:41:10 +0000 (16:41 +0000)
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@163470 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/rs6000/rs6000.c
gcc/config/rs6000/rs6000.opt
gcc/doc/invoke.texi
gcc/testsuite/ChangeLog

index 13eb21b..406909b 100644 (file)
@@ -1,3 +1,16 @@
+2010-08-23  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       * config/rs6000/rs6000.opt (-mmass): New option to enable the
+       compiler to autovectorize mathmetical functions for power7 using
+       the Mathematical Acceleration Subsystem library.
+
+       * config/rs6000/rs6000.c (rs6000_builtin_vectorized_libmass): New
+       function to handle auto vectorizing math functions that are in the
+       MASS library.
+       (rs6000_builtin_vectorized_function): Call it.
+
+       * doc/invoke.texi (RS/6000 and PowerPC Options): Document -mmass.
+
 2010-08-22  John David Anglin  <dave.anglin@nrc-cnrc.gc.ca>
 
        PR boehm-gc/34544
index 3928901..fc15198 100644 (file)
@@ -989,6 +989,7 @@ static rtx rs6000_emit_stack_reset (rs6000_stack_t *, rtx, rtx, int, bool);
 static rtx rs6000_make_savres_rtx (rs6000_stack_t *, rtx, int,
                                   enum machine_mode, bool, bool, bool);
 static bool rs6000_reg_live_or_pic_offset_p (int);
+static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
 static tree rs6000_builtin_vectorized_function (tree, tree, tree);
 static int rs6000_savres_strategy (rs6000_stack_t *, bool, int, int);
 static void rs6000_restore_saved_cr (rtx, int);
@@ -3602,6 +3603,145 @@ rs6000_parse_fpu_option (const char *option)
   return FPU_NONE;
 }
 
+
+/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
+   library with vectorized intrinsics.  */
+
+static tree
+rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
+{
+  char name[32];
+  const char *suffix = NULL;
+  tree fntype, new_fndecl, bdecl = NULL_TREE;
+  int n_args = 1;
+  const char *bname;
+  enum machine_mode el_mode, in_mode;
+  int n, in_n;
+
+  /* Libmass is suitable for unsafe math only as it does not correctly support
+     parts of IEEE with the required precision such as denormals.  Only support
+     it if we have VSX to use the simd d2 or f4 functions.
+     XXX: Add variable length support.  */
+  if (!flag_unsafe_math_optimizations || !TARGET_VSX)
+    return NULL_TREE;
+
+  el_mode = TYPE_MODE (TREE_TYPE (type_out));
+  n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  if (el_mode != in_mode
+      || n != in_n)
+    return NULL_TREE;
+
+  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
+    {
+      enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
+      switch (fn)
+       {
+       case BUILT_IN_ATAN2:
+       case BUILT_IN_HYPOT:
+       case BUILT_IN_POW:
+         n_args = 2;
+         /* fall through */
+
+       case BUILT_IN_ACOS:
+       case BUILT_IN_ACOSH:
+       case BUILT_IN_ASIN:
+       case BUILT_IN_ASINH:
+       case BUILT_IN_ATAN:
+       case BUILT_IN_ATANH:
+       case BUILT_IN_CBRT:
+       case BUILT_IN_COS:
+       case BUILT_IN_COSH:
+       case BUILT_IN_ERF:
+       case BUILT_IN_ERFC:
+       case BUILT_IN_EXP2:
+       case BUILT_IN_EXP:
+       case BUILT_IN_EXPM1:
+       case BUILT_IN_LGAMMA:
+       case BUILT_IN_LOG10:
+       case BUILT_IN_LOG1P:
+       case BUILT_IN_LOG2:
+       case BUILT_IN_LOG:
+       case BUILT_IN_SIN:
+       case BUILT_IN_SINH:
+       case BUILT_IN_SQRT:
+       case BUILT_IN_TAN:
+       case BUILT_IN_TANH:
+         bdecl = implicit_built_in_decls[fn];
+         suffix = "d2";                                /* pow -> powd2 */
+         if (el_mode != DFmode
+             || n != 2)
+           return NULL_TREE;
+         break;
+
+       case BUILT_IN_ATAN2F:
+       case BUILT_IN_HYPOTF:
+       case BUILT_IN_POWF:
+         n_args = 2;
+         /* fall through */
+
+       case BUILT_IN_ACOSF:
+       case BUILT_IN_ACOSHF:
+       case BUILT_IN_ASINF:
+       case BUILT_IN_ASINHF:
+       case BUILT_IN_ATANF:
+       case BUILT_IN_ATANHF:
+       case BUILT_IN_CBRTF:
+       case BUILT_IN_COSF:
+       case BUILT_IN_COSHF:
+       case BUILT_IN_ERFF:
+       case BUILT_IN_ERFCF:
+       case BUILT_IN_EXP2F:
+       case BUILT_IN_EXPF:
+       case BUILT_IN_EXPM1F:
+       case BUILT_IN_LGAMMAF:
+       case BUILT_IN_LOG10F:
+       case BUILT_IN_LOG1PF:
+       case BUILT_IN_LOG2F:
+       case BUILT_IN_LOGF:
+       case BUILT_IN_SINF:
+       case BUILT_IN_SINHF:
+       case BUILT_IN_SQRTF:
+       case BUILT_IN_TANF:
+       case BUILT_IN_TANHF:
+         bdecl = implicit_built_in_decls[fn];
+         suffix = "4";                                 /* powf -> powf4 */
+         if (el_mode != SFmode
+             || n != 4)
+           return NULL_TREE;
+         break;
+
+       default:
+         return NULL_TREE;
+       }
+    }
+  else
+    return NULL_TREE;
+
+  gcc_assert (suffix != NULL);
+  bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
+  strcpy (name, bname + sizeof ("__builtin_") - 1);
+  strcat (name, suffix);
+
+  if (n_args == 1)
+    fntype = build_function_type_list (type_out, type_in, NULL);
+  else if (n_args == 2)
+    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+  else
+    gcc_unreachable ();
+
+  /* Build a function declaration for the vectorized function.  */
+  new_fndecl = build_decl (BUILTINS_LOCATION,
+                          FUNCTION_DECL, get_identifier (name), fntype);
+  TREE_PUBLIC (new_fndecl) = 1;
+  DECL_EXTERNAL (new_fndecl) = 1;
+  DECL_IS_NOVOPS (new_fndecl) = 1;
+  TREE_READONLY (new_fndecl) = 1;
+
+  return new_fndecl;
+}
+
 /* Returns a function decl for a vectorized version of the builtin function
    with builtin function code FN and the result vector type TYPE, or NULL_TREE
    if it is not available.  */
@@ -3768,6 +3908,10 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
        }
     }
 
+  /* Generate calls to libmass if appropriate.  */
+  if (TARGET_MASS)
+    return rs6000_builtin_vectorized_libmass (fndecl, type_out, type_in);
+
   return NULL_TREE;
 }
 
index 30f9b39..dbd7580 100644 (file)
@@ -115,6 +115,10 @@ mpopcntd
 Target Report Mask(POPCNTD)
 Use PowerPC V2.06 popcntd instruction
 
+mmass
+Target Report Var(TARGET_MASS) Init(0)
+Use the Mathematical Acceleration Subsystem library high performance math libraries.
+
 mvsx
 Target Report Mask(VSX)
 Use vector/scalar (VSX) instructions
index b2d72d9..4cdda3d 100644 (file)
@@ -786,7 +786,9 @@ See RS/6000 and PowerPC Options.
 -mprototype  -mno-prototype @gol
 -msim  -mmvme  -mads  -myellowknife  -memb  -msdata @gol
 -msdata=@var{opt}  -mvxworks  -G @var{num}  -pthread @gol
--mrecip -mrecip=@var{opt} -mno-recip -mrecip-precision -mno-recip-precision}
+-mrecip -mrecip=@var{opt} -mno-recip -mrecip-precision
+-mno-recip-precision @gol
+-mmass}
 
 @emph{RX Options}
 @gccoptlist{-m64bit-doubles  -m32bit-doubles  -fpu  -nofpu@gol
@@ -15847,6 +15849,29 @@ automatically selects @option{-mrecip-precision}.  The double
 precision square root estimate instructions are not generated by
 default on low precision machines, since they do not provide an
 estimate that converges after three steps.
+
+@item -mmass
+@itemx -mno-mass
+@opindex mmass
+Specifies to use IBM's Mathematical Acceleration Subsystem (MASS)
+libraries for vectorizing intrinsics using external libraries.  GCC
+will currently emit calls to @code{acosd2}, @code{acosf4},
+@code{acoshd2}, @code{acoshf4}, @code{asind2}, @code{asinf4},
+@code{asinhd2}, @code{asinhf4}, @code{atan2d2}, @code{atan2f4},
+@code{atand2}, @code{atanf4}, @code{atanhd2}, @code{atanhf4},
+@code{cbrtd2}, @code{cbrtf4}, @code{cosd2}, @code{cosf4},
+@code{coshd2}, @code{coshf4}, @code{erfcd2}, @code{erfcf4},
+@code{erfd2}, @code{erff4}, @code{exp2d2}, @code{exp2f4},
+@code{expd2}, @code{expf4}, @code{expm1d2}, @code{expm1f4},
+@code{hypotd2}, @code{hypotf4}, @code{lgammad2}, @code{lgammaf4},
+@code{log10d2}, @code{log10f4}, @code{log1pd2}, @code{log1pf4},
+@code{log2d2}, @code{log2f4}, @code{logd2}, @code{logf4},
+@code{powd2}, @code{powf4}, @code{sind2}, @code{sinf4}, @code{sinhd2},
+@code{sinhf4}, @code{sqrtd2}, @code{sqrtf4}, @code{tand2},
+@code{tanf4}, @code{tanhd2}, and @code{tanhf4} when generating code
+for power7.  Both @option{-ftree-vectorize} and
+@option{-funsafe-math-optimizations} have to be enabled.  The MASS
+libraries will have to be specified at link time.
 @end table
 
 @node RX Options
index f117080..d705cf4 100644 (file)
@@ -1,3 +1,7 @@
+2010-08-23  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+       * gcc.target/powerpc/vsx-mass-1.c: New file, test -mmass.
+
 2010-08-23  Janus Weil  <janus@gcc.gnu.org>
 
        PR fortran/45366