gcc/
2010-09-13 H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386-protos.h (ix86_units_per_simd_word): New.
* config/i386/i386.c (initial_ix86_tune_features): Add
X86_TUNE_VECTORIZE_DOUBLE.
(ix86_units_per_simd_word): New.
* config/i386/i386.h (ix86_tune_indices): Add
X86_TUNE_VECTORIZE_DOUBLE.
(TARGET_VECTORIZE_DOUBLE): New.
(UNITS_PER_SIMD_WORD): Defined with ix86_units_per_simd_word.
gcc/testsuite/
2010-09-13 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/fma4-256-vector.c: Add -mtune=generic.
* gcc.target/i386/fma4-vector.c: Likewise.
* gcc.target/i386/vectorize2.c: Likewise.
* gcc.target/i386/vectorize4.c: Likewise.
* gcc.target/i386/vectorize5.c: Likewise.
* gcc.target/i386/vectorize6.c: Likewise.
* gcc.target/i386/vectorize8.c: Likewise.
* gcc.target/i386/vect-double-1.c: New.
* gcc.target/i386/vect-double-1a.c: Likewise.
* gcc.target/i386/vect-double-2.c: Likewise.
* gcc.target/i386/vect-double-2a.c: Likewise.
* lib/target-supports.exp (check_effective_target_vect_double):
Set et_vect_double_saved to 0 when tuning for Atom.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@164255
138bc75d-0d04-0410-961f-
82ee72b054a4
+2010-09-13 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/i386-protos.h (ix86_units_per_simd_word): New.
+
+ * config/i386/i386.c (initial_ix86_tune_features): Add
+ X86_TUNE_VECTORIZE_DOUBLE.
+ (ix86_units_per_simd_word): New.
+
+ * config/i386/i386.h (ix86_tune_indices): Add
+ X86_TUNE_VECTORIZE_DOUBLE.
+ (TARGET_VECTORIZE_DOUBLE): New.
+ (UNITS_PER_SIMD_WORD): Defined with ix86_units_per_simd_word.
+
2010-09-13 Pat Haugen <pthaugen@us.ibm.com>
* tree-ssa-ter.c (temp_expr_table_d): Add call_cnt field.
extern bool x86_extended_reg_mentioned_p (rtx);
extern bool x86_maybe_negate_const_int (rtx *, enum machine_mode);
extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
+extern unsigned int ix86_units_per_simd_word (enum machine_mode);
extern int avx_vpermilp_parallel (rtx par, enum machine_mode mode);
extern int avx_vperm2f128_parallel (rtx par, enum machine_mode mode);
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
m_ATOM,
+
+ /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
+ instructions. */
+ ~m_ATOM,
};
/* Feature tests against the various architecture variations. */
return false;
}
+/* ??? No autovectorization into MMX or 3DNOW until we can reliably
+ place emms and femms instructions. */
+
+unsigned int
+ix86_units_per_simd_word (enum machine_mode mode)
+{
+ /* Disable double precision vectorizer if needed. */
+ if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
+ return UNITS_PER_WORD;
+
+#if 0
+ /* FIXME: AVX has 32byte floating point vector operations and 16byte
+ integer vector operations. But vectorizer doesn't support
+ different sizes for integer and floating point vectors. We limit
+ vector size to 16byte. */
+ if (TARGET_AVX)
+ return (mode == DFmode || mode == SFmode) ? 32 : 16;
+ else
+#endif
+ return TARGET_SSE ? 16 : UNITS_PER_WORD;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
X86_TUNE_USE_VECTOR_CONVERTS,
X86_TUNE_FUSE_CMP_AND_BRANCH,
X86_TUNE_OPT_AGU,
+ X86_TUNE_VECTORIZE_DOUBLE,
X86_TUNE_LAST
};
#define TARGET_FUSE_CMP_AND_BRANCH \
ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH]
#define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU]
+#define TARGET_VECTORIZE_DOUBLE \
+ ix86_tune_features[X86_TUNE_VECTORIZE_DOUBLE]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
|| (MODE) == V2SImode || (MODE) == SImode \
|| (MODE) == V4HImode || (MODE) == V8QImode)
-/* ??? No autovectorization into MMX or 3DNOW until we can reliably
- place emms and femms instructions.
- FIXME: AVX has 32byte floating point vector operations and 16byte
- integer vector operations. But vectorizer doesn't support
- different sizes for integer and floating point vectors. We limit
- vector size to 16byte. */
-#define UNITS_PER_SIMD_WORD(MODE) \
- (TARGET_AVX ? (((MODE) == DFmode || (MODE) == SFmode) ? 16 : 16) \
- : (TARGET_SSE ? 16 : UNITS_PER_WORD))
+#define UNITS_PER_SIMD_WORD(MODE) ix86_units_per_simd_word (MODE)
#define VALID_DFP_MODE_P(MODE) \
((MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode)
+2010-09-13 H.J. Lu <hongjiu.lu@intel.com>
+
+ * gcc.target/i386/fma4-256-vector.c: Add -mtune=generic.
+ * gcc.target/i386/fma4-vector.c: Likewise.
+ * gcc.target/i386/vectorize2.c: Likewise.
+ * gcc.target/i386/vectorize4.c: Likewise.
+ * gcc.target/i386/vectorize5.c: Likewise.
+ * gcc.target/i386/vectorize6.c: Likewise.
+ * gcc.target/i386/vectorize8.c: Likewise.
+
+ * gcc.target/i386/vect-double-1.c: New.
+ * gcc.target/i386/vect-double-1a.c: Likewise.
+ * gcc.target/i386/vect-double-2.c: Likewise.
+ * gcc.target/i386/vect-double-2a.c: Likewise.
+
+ * lib/target-supports.exp (check_effective_target_vect_double):
+ Set et_vect_double_saved to 0 when tuning for Atom.
+
2010-09-13 Jan Hubicka <jh@suse.cz>
* gcc.dg/torture/pr23821.c: Drop static keyword.
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
-/* { dg-options "-O2 -mfma4 -ftree-vectorize" } */
+/* { dg-options "-O2 -mfma4 -ftree-vectorize -mtune=generic" } */
extern void exit (int);
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
-/* { dg-options "-O2 -mfma4 -ftree-vectorize" } */
+/* { dg-options "-O2 -mfma4 -ftree-vectorize -mtune=generic" } */
extern void exit (int);
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mfpmath=sse -march=core2 -fdump-tree-vect-stats" } */
+
+extern void abort (void);
+
+#ifndef STATIC
+#define STATIC
+#endif
+
+#define N 16
+
+double cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+double ca[N];
+
+STATIC void
+__attribute__ ((noinline))
+sse2_test (void)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ ca[i] = cb[i];
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ca[i] != cb[i])
+ abort ();
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-options "-O2 -ftree-vectorize -mfpmath=sse -msse2 -mtune=core2" } */
+
+#define STATIC static
+
+#include "vect-double-1.c"
+#include "sse2-check.h"
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats" } */
+
+extern void abort (void);
+
+#ifndef STATIC
+#define STATIC
+#endif
+
+#define N 16
+
+double cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+double ca[N];
+
+STATIC void
+__attribute__ ((noinline))
+sse2_test (void)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ ca[i] = cb[i];
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (ca[i] != cb[i])
+ abort ();
+ }
+}
+
+/* { dg-final { scan-tree-dump-not "vectorized 1 loops" "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target sse2 } */
+/* { dg-options "-O2 -ftree-vectorize -mfpmath=sse -msse2 -mtune=atom" } */
+
+#define STATIC static
+
+#include "vect-double-2.c"
+#include "sse2-check.h"
/* { dg-do compile } */
/* { dg-require-effective-target ilp32 } */
-/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2 -mfpmath=sse" } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2 -mfpmath=sse -mtune=generic" } */
double a[256];
int b[256];
/* { dg-do compile } */
/* { dg-require-effective-target ilp32 } */
-/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2 --param ggc-min-expand=0 --param ggc-min-heapsize=0" } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2 -mtune=generic --param ggc-min-expand=0 --param ggc-min-heapsize=0" } */
/* This test, tests two thing, we vectorize square root and also we don't crash due to a GC issue. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
-/* { dg-options "-O2 -ftree-vectorize -mveclibabi=acml -ffast-math" } */
+/* { dg-options "-O2 -ftree-vectorize -mveclibabi=acml -ffast-math -mtune=generic" } */
double x[256];
/* { dg-do compile } */
-/* { dg-options "-O2 -msse2 -ftree-vectorize -mveclibabi=svml -ffast-math" } */
+/* { dg-options "-O2 -msse2 -ftree-vectorize -mveclibabi=svml -ffast-math -mtune=generic" } */
double x[256];
/* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+/* { dg-options "-O2 -ftree-vectorize -msse2 -mtune=generic" } */
unsigned int a[256];
double b[256];
} else {
set et_vect_double_saved 0
if { [istarget i?86-*-*]
- || [istarget x86_64-*-*]
- || [istarget spu-*-*] } {
+ || [istarget x86_64-*-*] } {
+ if { [check_no_compiler_messages vect_double assembly {
+ #ifdef __tune_atom__
+ # error No double vectorizer support.
+ #endif
+ }] } {
+ set et_vect_double_saved 1
+ } else {
+ set et_vect_double_saved 0
+ }
+ } elseif { [istarget spu-*-*] } {
set et_vect_double_saved 1
}
}