OSDN Git Service

gcc:
authorbonzini <bonzini@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 27 Apr 2005 09:12:05 +0000 (09:12 +0000)
committerbonzini <bonzini@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 27 Apr 2005 09:12:05 +0000 (09:12 +0000)
2004-04-27  Paolo Bonzini  <bonzini@gnu.org>

* tree-complex.c (expand_vector_operation): New, extracted from
expand_vector_operations_1.
(tree_vec_extract): Build a NOP_EXPR.
(expand_vec_parallel): Do not care about returning the correct type.
(expand_vector_operations_1): Call expand_vector_operation.
Build the VIEW_CONVERT_EXPR on the left side of MODIFY_EXPRs.

* tree-complex.c (gate_expand_vector_operations): New.
(pass_lower_vector_ssa): Use it.
* tree-optimize.c (init_tree_optimization_passes): Include
pass_lower_vector_ssa.
* tree-vect-transform.c (vect_min_worthwhile_factor): New.
(vectorizable_operation): Use it.
* tree-vectorizer.c (get_vectype_for_scalar_type): Accept
integer modes for the vector type.

* defaults.h (UNITS_PER_SIMD_WORD): Default to UNITS_PER_WORD.
* tree-vect-analyze.c (vect_enhance_data_refs_alignment):
Do not cope with UNITS_PER_SIMD_WORD == 0.
* tree-vectorizer.c (get_vectype_for_scalar_type): Check
if the scalar type is not bigger than UNITS_PER_SIMD_WORD.
(vectorize_loops): Do not check that UNITS_PER_SIMD_WORD > 0.
* config/i386/i386.h (UNITS_PER_SIMD_WORD): Default to UNITS_PER_WORD.
* config/mips/mips.h (UNITS_PER_SIMD_WORD): Likewise.
* config/rs6000/rs6000.h (UNITS_PER_SIMD_WORD): Likewise.
* config/sparc/sparc.h (UNITS_PER_SIMD_WORD): Likewise.

* config/alpha/alpha.h (UNITS_PER_SIMD_WORD): Remove.
* config/bfin/bfin.h (UNITS_PER_SIMD_WORD): Remove.
* config/ia64/ia64.h (UNITS_PER_SIMD_WORD): Remove.

* doc/tm.texi (UNITS_PER_WORD): Rephrase more accurately.
(UNITS_PER_SIMD_WORD): New.

gcc/testsuite:
2004-04-27  Paolo Bonzini  <bonzini@gnu.org>

        * gcc.dg/tree-ssa/gen-vect-11.c, gcc.dg/tree-ssa/gen-vect-11a.c,
        gcc.dg/tree-ssa/gen-vect-11b.c, gcc.dg/tree-ssa/gen-vect-11c.c,
        gcc.dg/tree-ssa/gen-vect-2.c, gcc.dg/tree-ssa/gen-vect-25.c,
        gcc.dg/tree-ssa/gen-vect-26.c, gcc.dg/tree-ssa/gen-vect-28.c,
        gcc.dg/tree-ssa/gen-vect-32.c: New.
        * gcc.dg/vect/vect-82.c, gcc.dg/vect/vect-83.c: Fix dg-final.
        * gcc.dg/vect/vect-82_64.c, gcc.dg/vect/vect-83_64.c: Remove xfail,
        don't run on PPC32.

CVS: Enter Log.  Lines beginning with `CVS:' are removed automatically
CVS:
CVS: Committing in .
CVS:
CVS: Modified Files:
CVS:  tree-complex.c tree-optimize.c tree-vect-analyze.c defaults.h
CVS:  tree-vect-transform.c tree-vectorizer.c ChangeLog
CVS:  testsuite/ChangeLog config/alpha/alpha.h config/bfin/bfin.h
CVS:  config/i386/i386.h config/ia64/ia64.h config/mips/mips.h
CVS:  config/rs6000/rs6000.h
CVS:  config/sparc/sparc.h doc/tm.texi
CVS:  testsuite/gcc.dg/vect/vect-82.c
CVS:  testsuite/gcc.dg/vect/vect-82_64.c
CVS:  testsuite/gcc.dg/vect/vect-83.c
CVS:  testsuite/gcc.dg/vect/vect-83_64.c
CVS: Added Files:
CVS:  testsuite/gcc.dg/tree-ssa/gen-vect-11.c
CVS:  testsuite/gcc.dg/tree-ssa/gen-vect-11a.c
CVS:  testsuite/gcc.dg/tree-ssa/gen-vect-11b.c
CVS:  testsuite/gcc.dg/tree-ssa/gen-vect-11c.c
CVS:  testsuite/gcc.dg/tree-ssa/gen-vect-2.c
CVS:  testsuite/gcc.dg/tree-ssa/gen-vect-25.c
CVS:  testsuite/gcc.dg/tree-ssa/gen-vect-26.c
CVS:  testsuite/gcc.dg/tree-ssa/gen-vect-28.c
CVS:  testsuite/gcc.dg/tree-ssa/gen-vect-32.c
CVS: ----------------------------------------------------------------------

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@98818 138bc75d-0d04-0410-961f-82ee72b054a4

29 files changed:
gcc/ChangeLog
gcc/config/alpha/alpha.h
gcc/config/bfin/bfin.h
gcc/config/i386/i386.h
gcc/config/ia64/ia64.h
gcc/config/mips/mips.h
gcc/config/rs6000/rs6000.h
gcc/config/sparc/sparc.h
gcc/defaults.h
gcc/doc/tm.texi
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11a.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/vect-82.c
gcc/testsuite/gcc.dg/vect/vect-82_64.c
gcc/testsuite/gcc.dg/vect/vect-83.c
gcc/testsuite/gcc.dg/vect/vect-83_64.c
gcc/tree-complex.c
gcc/tree-optimize.c
gcc/tree-vect-analyze.c
gcc/tree-vect-transform.c
gcc/tree-vectorizer.c

index 368698a..5a99ffa 100644 (file)
@@ -1,3 +1,39 @@
+2004-04-27  Paolo Bonzini  <bonzini@gnu.org>
+
+       * tree-complex.c (expand_vector_operation): New, extracted from
+       expand_vector_operations_1.
+       (tree_vec_extract): Build a NOP_EXPR.
+       (expand_vec_parallel): Do not care about returning the correct type.
+       (expand_vector_operations_1): Call expand_vector_operation.
+       Build the VIEW_CONVERT_EXPR on the left side of MODIFY_EXPRs.
+
+       * tree-complex.c (gate_expand_vector_operations): New.
+       (pass_lower_vector_ssa): Use it.
+       * tree-optimize.c (init_tree_optimization_passes): Include
+       pass_lower_vector_ssa.
+       * tree-vect-transform.c (vect_min_worthwhile_factor): New.
+       (vectorizable_operation): Use it.
+       * tree-vectorizer.c (get_vectype_for_scalar_type): Accept
+       integer modes for the vector type.
+
+       * defaults.h (UNITS_PER_SIMD_WORD): Default to UNITS_PER_WORD.
+       * tree-vect-analyze.c (vect_enhance_data_refs_alignment):
+       Do not cope with UNITS_PER_SIMD_WORD == 0.
+       * tree-vectorizer.c (get_vectype_for_scalar_type): Check
+       if the scalar type is not bigger than UNITS_PER_SIMD_WORD.
+       (vectorize_loops): Do not check that UNITS_PER_SIMD_WORD > 0.
+       * config/i386/i386.h (UNITS_PER_SIMD_WORD): Default to UNITS_PER_WORD.
+       * config/mips/mips.h (UNITS_PER_SIMD_WORD): Likewise.
+       * config/rs6000/rs6000.h (UNITS_PER_SIMD_WORD): Likewise.
+       * config/sparc/sparc.h (UNITS_PER_SIMD_WORD): Likewise.
+
+       * config/alpha/alpha.h (UNITS_PER_SIMD_WORD): Remove.
+       * config/bfin/bfin.h (UNITS_PER_SIMD_WORD): Remove.
+       * config/ia64/ia64.h (UNITS_PER_SIMD_WORD): Remove.
+
+       * doc/tm.texi (UNITS_PER_WORD): Rephrase more accurately.
+       (UNITS_PER_SIMD_WORD): New.
+
 2005-04-27  Nathan Sidwell  <nathan@codesourcery.com>
 
        * config/ia64/ia64.c (ia64_encode_addr_area): Use gcc_assert and
        (INDEX_REGISTER_P): New.
        (BASE_REGISTER_P): New.
        (indirectable_constant_address_p): New.  Adapted from
+<<<<<<< ChangeLog
+       INDIRECTABLE_CONSTANT_ADDRESS_P in vax.h.
+       Use SYMBOL_REF_LOCAL_P.
+=======
        INDIRECTABLE_CONSTANT_ADDRESS_P in vax.h. Use SYMBOL_REF_LOCAL_P.
+>>>>>>> 2.8478
        (indirectable_address_p): New.  Adapted from
        INDIRECTABLE_ADDRESS_P in vax.h.
        (nonindexed_address_p): New.  Adapted from
+<<<<<<< ChangeLog
+       GO_IF_NONINDEXED_ADDRESS in vax.h.
+       (index_temp_p): New.  Adapted from
+       INDEX_TERM_P in vax.h.
+       (reg_plus_index_p): New.  Adapted from
+       GO_IF_REG_PLUS_INDEX in vax.h.
+=======
        GO_IF_NONINDEXED_ADDRESS in vax.h.
        (index_temp_p): New.  Adapted from INDEX_TERM_P in vax.h.
        (reg_plus_index_p): New.  Adapted from GO_IF_REG_PLUS_INDEX in vax.h.
+>>>>>>> 2.8478
        (legitimate_address_p): New.  Adapted from
+<<<<<<< ChangeLog
+       GO_IF_LEGITIMATE_ADDRESS in vax.h
+=======
        GO_IF_LEGITIMATE_ADDRESS in vax.h.
+>>>>>>> 2.8478
        (vax_mode_dependent_address_p): New.  Adapted from
+<<<<<<< ChangeLog
+       GO_IF_MODE_DEPENDENT_ADDRESS in vax.h
+=======
        GO_IF_MODE_DEPENDENT_ADDRESS in vax.h.
+>>>>>>> 2.8478
        * config/vax/vax.h (CONSTANT_ADDRESS_P): Use
        legitimate_constant_address_p.
        (CONSTANT_P): Use legitimate_constant_p.
        (GO_IF_NONINDEXED_ADDRESS): Removed.
        (INDEX_TEMP_P): Removed.
        (GO_IF_REG_PLUS_INDEX): Removed.
+<<<<<<< ChangeLog
+       (GO_IF_LEGITIMATE_ADDRESS): Use legitimate_address_p.
+       Two definitions, depending on whether REG_OK_STRICT is defined.
+=======
        (GO_IF_LEGITIMATE_ADDRESS): Use legitimate_address_p. Two
        definitions, depending on whether REG_OK_STRICT is defined.
+>>>>>>> 2.8478
        (GO_IF_MODE_DEPENDENT_ADDRESS): Use vax_mode_dependent_address_p.
        Two definitions, depending on whether REG_OK_STRICT is defined.
        * config/vax/vax-protos.h (legitimate_constant_address_p): Prototype
index 780cf4a..97bd9ef 100644 (file)
@@ -394,9 +394,6 @@ extern int alpha_tls_size;
 
 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
 
-/* Our SIMD is all done on single integer registers.  */
-#define UNITS_PER_SIMD_WORD UNITS_PER_WORD
-\f
 /* Standard register usage.  */
 
 /* Number of actual hardware registers.
index 512525a..608188a 100644 (file)
@@ -773,9 +773,6 @@ do {                                              \
 /* Width of a word, in units (bytes).  */
 #define UNITS_PER_WORD 4
 
-/* Size of a vector for autovectorization.  */
-#define UNITS_PER_SIMD_WORD 4
-
 /* Width in bits of a pointer.
    See also the macro `Pmode1' defined below.  */
 #define POINTER_SIZE 32
index fc0596b..9b04fad 100644 (file)
@@ -836,7 +836,7 @@ do {                                                                        \
 
 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
    place emms and femms instructions.  */
-#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : 0)
+#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD)
 
 #define VALID_FP_MODE_P(MODE)                                          \
     ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode          \
index 362064a..4aca024 100644 (file)
@@ -168,8 +168,6 @@ extern enum processor_type ia64_tune;
 
 #define UNITS_PER_WORD 8
 
-#define UNITS_PER_SIMD_WORD UNITS_PER_WORD
-
 #define POINTER_SIZE (TARGET_ILP32 ? 32 : 64)
 
 /* A C expression whose value is zero if pointers that need to be extended
index e2118e5..f81e7e9 100644 (file)
@@ -966,7 +966,7 @@ extern const struct mips_cpu_info *mips_tune_info;
 /* The number of bytes in a double.  */
 #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
 
-#define UNITS_PER_SIMD_WORD (TARGET_PAIRED_SINGLE_FLOAT ? 8 : 0)
+#define UNITS_PER_SIMD_WORD (TARGET_PAIRED_SINGLE_FLOAT ? 8 : UNITS_PER_WORD)
 
 /* Set the sizes of the core types.  */
 #define SHORT_TYPE_SIZE 16
index 137e6dd..c2f78a5 100644 (file)
@@ -1043,8 +1043,9 @@ extern const char *rs6000_warn_altivec_long_switch;
          || (MODE) == V1DImode          \
          || (MODE) == V2SImode)
 
-#define UNITS_PER_SIMD_WORD     \
-        (TARGET_ALTIVEC ? 16 : (TARGET_SPE ? 8 : 0) )
+#define UNITS_PER_SIMD_WORD                                    \
+        (TARGET_ALTIVEC ? UNITS_PER_ALTIVEC_WORD               \
+        : (TARGET_SPE ? UNITS_PER_SPE_WORD : UNITS_PER_WORD))
 
 /* Value is TRUE if hard register REGNO can hold a value of
    machine-mode MODE.  */
index dbad1b9..fddb2e0 100644 (file)
@@ -588,7 +588,7 @@ extern struct sparc_cpu_select sparc_select[];
 #define MIN_UNITS_PER_WORD     4
 #endif
 
-#define UNITS_PER_SIMD_WORD    (TARGET_VIS ? 8 : 0)
+#define UNITS_PER_SIMD_WORD    (TARGET_VIS ? 8 : UNITS_PER_WORD)
 
 /* Now define the sizes of the C data types.  */
 
index 28a0d83..e4ef7f3 100644 (file)
@@ -702,8 +702,10 @@ do { fputs (integer_asm_op (POINTER_SIZE / BITS_PER_UNIT, TRUE), FILE); \
 #define HAS_LONG_UNCOND_BRANCH 0
 #endif
 
+/* By default, only attempt to parallelize bitwise operations, and
+   possibly adds/subtracts using bit-twiddling.  */
 #ifndef UNITS_PER_SIMD_WORD
-#define UNITS_PER_SIMD_WORD 0
+#define UNITS_PER_SIMD_WORD UNITS_PER_WORD
 #endif
 
 /* Determine whether __cxa_atexit, rather than atexit, is used to
index 7714616..514f59d 100644 (file)
@@ -1076,7 +1076,8 @@ largest value that @code{BITS_PER_WORD} can have at run-time.
 @end defmac
 
 @defmac UNITS_PER_WORD
-Number of storage units in a word; normally 4.
+Number of storage units in a word; normally the size of a general-purpose
+register, a power of two from 1 or 8.
 @end defmac
 
 @defmac MIN_UNITS_PER_WORD
@@ -1085,6 +1086,13 @@ Minimum number of units in a word.  If this is undefined, the default is
 smallest value that @code{UNITS_PER_WORD} can have at run-time.
 @end defmac
 
+@defmac UNITS_PER_SIMD_WORD
+Number of units in the vectors that the vectorizer can produce.
+The default is equal to @code{UNITS_PER_WORD}, because the vectorizer
+can do some transformations even in absence of specialized @acronym{SIMD}
+hardware.
+@end defmac
+
 @defmac POINTER_SIZE
 Width of a pointer, in bits.  You must specify a value no wider than the
 width of @code{Pmode}.  If it is not equal to the width of @code{Pmode},
index bd88d35..0f69c32 100644 (file)
@@ -1,3 +1,14 @@
+2004-04-27  Paolo Bonzini  <bonzini@gnu.org>
+
+        * gcc.dg/tree-ssa/gen-vect-11.c, gcc.dg/tree-ssa/gen-vect-11a.c,
+        gcc.dg/tree-ssa/gen-vect-11b.c, gcc.dg/tree-ssa/gen-vect-11c.c,
+        gcc.dg/tree-ssa/gen-vect-2.c, gcc.dg/tree-ssa/gen-vect-25.c,
+        gcc.dg/tree-ssa/gen-vect-26.c, gcc.dg/tree-ssa/gen-vect-28.c,
+        gcc.dg/tree-ssa/gen-vect-32.c: New.
+        * gcc.dg/vect/vect-82.c, gcc.dg/vect/vect-83.c: Fix dg-final.
+        * gcc.dg/vect/vect-82_64.c, gcc.dg/vect/vect-83_64.c: Remove xfail,
+        don't run on PPC32.
+        
 2005-04-27  Joseph S. Myers  <joseph@codesourcery.com>
 
        PR c/21213
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11.c
new file mode 100644 (file)
index 0000000..bc6c286
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+
+int main ()
+{
+  int i;
+  char ia[N];
+  char ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+  char ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+
+  for (i = 0; i < N; i++)
+    {
+      ia[i] = ib[i] + ic[i];
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (ia[i] != ib[i] + ic[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11a.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11a.c
new file mode 100644 (file)
index 0000000..75ec7ce
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+
+#if __LONG_MAX__ == 2147483647
+typedef short half_word;
+#else
+typedef int half_word;
+#endif
+
+int main ()
+{
+  int i;
+  half_word ia[N];
+  half_word ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+  half_word ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+
+  for (i = 0; i < N; i++)
+    {
+      ia[i] = ib[i] & ic[i];
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (ia[i] != ib[i] & ic[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11b.c
new file mode 100644 (file)
index 0000000..2083353
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+
+int main ()
+{
+  int i;
+  char ia[N];
+  char ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+  char ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+
+  /* Not vectorizable, multiplication */
+  for (i = 0; i < N; i++)
+    {
+      ia[i] = ib[i] * ic[i];
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (ia[i] != (char) (ib[i] * ic[i]))
+        abort ();
+    }
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-11c.c
new file mode 100644 (file)
index 0000000..8632ae4
--- /dev/null
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+
+#if LONG_MAX == 2147483647
+typedef short half_word;
+#else
+typedef int half_word;
+#endif
+
+int main ()
+{
+  int i;
+  half_word ia[N];
+  half_word ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+  half_word ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+
+  /* Not worthwhile, only 2 parts per int */
+  for (i = 0; i < N; i++)
+    {
+      ia[i] = ib[i] + ic[i];
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (ia[i] != ib[i] + ic[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-2.c
new file mode 100644 (file)
index 0000000..be89c26
--- /dev/null
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+#if __LONG_MAX__ == 2147483647
+typedef short half_word;
+#else
+typedef int half_word;
+#endif
+
+int main ()
+{
+  half_word cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+  half_word ca[N];
+  int i;
+
+  for (i = 0; i < N; i++)
+    {
+      ca[i] = cb[i];
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (ca[i] != cb[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-25.c
new file mode 100644 (file)
index 0000000..1e0c2c1
--- /dev/null
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 128
+
+#if __LONG_MAX__ == 2147483647
+typedef short half_word;
+#else
+typedef int half_word;
+#endif
+
+int main (int n, int *p)
+{
+  int i;
+  half_word ib[N];
+  half_word ia[N];
+  int k;
+
+  for (i = 0; i < N; i++)
+    {
+      ia[i] = n;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (ia[i] != n)
+        abort ();
+    }
+
+  k = *p;
+  for (i = 0; i < N; i++)
+    {
+      ib[i] = k;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (ib[i] != k)
+        abort ();
+    }
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-26.c
new file mode 100644 (file)
index 0000000..b90413a
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 128
+
+/* unaligned store.  */
+
+int main ()
+{
+  int i;
+  char ia[N+1];
+
+  for (i = 1; i <= N; i++)
+    {
+      ia[i] = 5;
+    }
+
+  /* check results:  */
+  for (i = 1; i <= N; i++)
+    {
+      if (ia[i] != 5)
+        abort ();
+    }
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-28.c
new file mode 100644 (file)
index 0000000..0d01752
--- /dev/null
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 128
+#define OFF 3
+
+/* unaligned store.  */
+
+int main (int off)
+{
+  int i;
+  char ia[N+OFF];
+
+  for (i = 0; i < N; i++)
+    {
+      ia[i+off] = 5;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (ia[i+off] != 5)
+        abort ();
+    }
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c b/gcc/testsuite/gcc.dg/tree-ssa/gen-vect-32.c
new file mode 100644 (file)
index 0000000..681c707
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -ftree-vectorizer-verbose=3 -fdump-tree-vect-stats" } */
+
+#include <stdlib.h>
+
+#define N 16
+int main ()
+{  
+  struct {
+    char ca[N];
+  } s;
+  int i;
+
+  for (i = 0; i < N; i++)
+    {
+      s.ca[i] = 5;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (s.ca[i] != 5)
+        abort ();
+    }
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
index ac682c7..aecd619 100644 (file)
@@ -32,5 +32,5 @@ int main (void)
   return main1 ();
 } 
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
index d6e9752..a21ed12 100644 (file)
@@ -1,4 +1,5 @@
-/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-do run { target { powerpc*-*-* && lp64 } } } */
+/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */
 /* { dg-options "-O2 -ftree-vectorize -mpowerpc64 -fdump-tree-vect-stats -maltivec" } */
 
 #include <stdarg.h>
@@ -33,5 +34,5 @@ int main (void)
   return main1 ();
 } 
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
index 7a23f5b..5c21cbb 100644 (file)
@@ -32,5 +32,5 @@ int main (void)
   return main1 ();
 } 
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
index ea99a9e..b5f6f6f 100644 (file)
@@ -1,4 +1,5 @@
-/* { dg-do run { target powerpc*-*-* } } */
+/* { dg-do run { target { powerpc*-*-* && lp64 } } } */
+/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */
 /* { dg-options "-O2 -ftree-vectorize -mpowerpc64 -fdump-tree-vect-stats -maltivec" } */
 
 #include <stdarg.h>
@@ -33,5 +34,5 @@ int main (void)
   return main1 ();
 } 
 
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
index 18582c1..98b6c56 100644 (file)
@@ -632,8 +632,13 @@ tree_vec_extract (block_stmt_iterator *bsi, tree type,
 {
   if (bitpos)
     return gimplify_build3 (bsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
-  else
+
+  /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
+     anyway be stored in memory, so prefer NOP_EXPR.  */
+  else if (TYPE_MODE (type) == BLKmode)
     return gimplify_build1 (bsi, VIEW_CONVERT_EXPR, type, t);
+  else
+    return gimplify_build1 (bsi, NOP_EXPR, type, t);
 }
 
 static tree
@@ -783,7 +788,7 @@ expand_vector_parallel (block_stmt_iterator *bsi, elem_op_func f, tree type,
       result = f (bsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
     }
 
-  return build1 (VIEW_CONVERT_EXPR, type, result);
+  return result;
 }
 
 /* Expand a vector operation to scalars; for integer types we can use
@@ -810,6 +815,60 @@ expand_vector_addition (block_stmt_iterator *bsi,
                                    a, b, code);
 }
 
+static tree
+expand_vector_operation (block_stmt_iterator *bsi, tree type, tree compute_type,
+                        tree rhs, enum tree_code code)
+{
+  enum machine_mode compute_mode = TYPE_MODE (compute_type);
+
+  /* If the compute mode is not a vector mode (hence we are not decomposing
+     a BLKmode vector to smaller, hardware-supported vectors), we may want
+     to expand the operations in parallel.  */
+  if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
+      && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
+    switch (code)
+      {
+      case PLUS_EXPR:
+      case MINUS_EXPR:
+        if (!TYPE_TRAP_SIGNED (type))
+          return expand_vector_addition (bsi, do_binop, do_plus_minus, type,
+                                        TREE_OPERAND (rhs, 0),
+                                        TREE_OPERAND (rhs, 1), code);
+       break;
+
+      case NEGATE_EXPR:
+        if (!TYPE_TRAP_SIGNED (type))
+          return expand_vector_addition (bsi, do_unop, do_negate, type,
+                                        TREE_OPERAND (rhs, 0),
+                                        NULL_TREE, code);
+       break;
+
+      case BIT_AND_EXPR:
+      case BIT_IOR_EXPR:
+      case BIT_XOR_EXPR:
+        return expand_vector_parallel (bsi, do_binop, type,
+                                      TREE_OPERAND (rhs, 0),
+                                      TREE_OPERAND (rhs, 1), code);
+
+      case BIT_NOT_EXPR:
+        return expand_vector_parallel (bsi, do_unop, type,
+                                      TREE_OPERAND (rhs, 0),
+                                      NULL_TREE, code);
+
+      default:
+       break;
+      }
+
+  if (TREE_CODE_CLASS (code) == tcc_unary)
+    return expand_vector_piecewise (bsi, do_unop, type, compute_type,
+                                   TREE_OPERAND (rhs, 0),
+                                   NULL_TREE, code);
+  else
+    return expand_vector_piecewise (bsi, do_binop, type, compute_type,
+                                   TREE_OPERAND (rhs, 0),
+                                   TREE_OPERAND (rhs, 1), code);
+}
+\f
 /* Return a type for the widest vector mode whose components are of mode
    INNER_MODE, or NULL_TREE if none is found.  */
 static tree
@@ -841,7 +900,7 @@ static void
 expand_vector_operations_1 (block_stmt_iterator *bsi)
 {
   tree stmt = bsi_stmt (*bsi);
-  tree *p_rhs, rhs, type, compute_type;
+  tree *p_lhs, *p_rhs, lhs, rhs, type, compute_type;
   enum tree_code code;
   enum machine_mode compute_mode;
   optab op;
@@ -856,7 +915,9 @@ expand_vector_operations_1 (block_stmt_iterator *bsi)
       /* FALLTHRU */
 
     case MODIFY_EXPR:
+      p_lhs = &TREE_OPERAND (stmt, 0);
       p_rhs = &TREE_OPERAND (stmt, 1);
+      lhs = *p_lhs;
       rhs = *p_rhs;
       break;
 
@@ -897,86 +958,48 @@ expand_vector_operations_1 (block_stmt_iterator *bsi)
         compute_type = vector_compute_type;
     }
 
-  compute_mode = TYPE_MODE (compute_type);
-
   /* If we are breaking a BLKmode vector into smaller pieces,
      type_for_widest_vector_mode has already looked into the optab,
      so skip these checks.  */
   if (compute_type == type)
     {
+      compute_mode = TYPE_MODE (compute_type);
       if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
           || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT)
           && op != NULL
          && op->handlers[compute_mode].insn_code != CODE_FOR_nothing)
        return;
       else
-       {
-         /* There is no operation in hardware, so fall back to scalars.  */
-         compute_type = TREE_TYPE (type);
-         compute_mode = TYPE_MODE (compute_type);
-       }
+       /* There is no operation in hardware, so fall back to scalars.  */
+       compute_type = TREE_TYPE (type);
     }
 
-  /* If the compute mode is not a vector mode (hence we are decomposing
-     a BLKmode vector to smaller, hardware-supported vectors), we may
-     want to expand the operations in parallel.  */
-  if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
-      && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT)
-    switch (code)
-      {
-      case PLUS_EXPR:
-      case MINUS_EXPR:
-        if (TYPE_TRAP_SIGNED (type))
-         break;
-
-        *p_rhs = expand_vector_addition (bsi, do_binop, do_plus_minus, type,
-                                        TREE_OPERAND (rhs, 0),
-                                        TREE_OPERAND (rhs, 1), code);
-       mark_stmt_modified (bsi_stmt (*bsi));
-        return;
-
-      case NEGATE_EXPR:
-        if (TYPE_TRAP_SIGNED (type))
-         break;
-
-        *p_rhs = expand_vector_addition (bsi, do_unop, do_negate, type,
-                                        TREE_OPERAND (rhs, 0),
-                                        NULL_TREE, code);
-       mark_stmt_modified (bsi_stmt (*bsi));
-        return;
-
-      case BIT_AND_EXPR:
-      case BIT_IOR_EXPR:
-      case BIT_XOR_EXPR:
-        *p_rhs = expand_vector_parallel (bsi, do_binop, type,
-                                        TREE_OPERAND (rhs, 0),
-                                        TREE_OPERAND (rhs, 1), code);
-       mark_stmt_modified (bsi_stmt (*bsi));
-        return;
-
-      case BIT_NOT_EXPR:
-        *p_rhs = expand_vector_parallel (bsi, do_unop, type,
-                                        TREE_OPERAND (rhs, 0),
-                                        NULL_TREE, code);
-       mark_stmt_modified (bsi_stmt (*bsi));
-        return;
-
-      default:
-       break;
-      }
-
-  if (TREE_CODE_CLASS (code) == tcc_unary)
-    *p_rhs = expand_vector_piecewise (bsi, do_unop, type, compute_type,
-                                     TREE_OPERAND (rhs, 0),
-                                     NULL_TREE, code);
+  rhs = expand_vector_operation (bsi, type, compute_type, rhs, code);
+  if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
+    *p_rhs = rhs;
   else
-    *p_rhs = expand_vector_piecewise (bsi, do_binop, type, compute_type,
-                                     TREE_OPERAND (rhs, 0),
-                                     TREE_OPERAND (rhs, 1), code);
+    {
+      /* Build a conversion; VIEW_CONVERT_EXPR is very expensive unless T will
+         be stored in memory anyway, so prefer NOP_EXPR.  Also, perform the
+        VIEW_CONVERT_EXPR on the left side of the assignment.  */
+      if (TYPE_MODE (TREE_TYPE (rhs)) == BLKmode)
+        *p_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (rhs), lhs);
+      else
+       *p_rhs = gimplify_build1 (bsi, NOP_EXPR, TREE_TYPE (lhs), rhs);
+    }
 
   mark_stmt_modified (bsi_stmt (*bsi));
 }
 \f
+/* Use this to lower vector operations introduced by the vectorizer,
+   if it may need the bit-twiddling tricks implemented in this file.  */
+
+static bool
+gate_expand_vector_operations (void)
+{
+  return flag_tree_vectorize != 0;
+}
+
 static void
 expand_vector_operations (void)
 {
@@ -1015,8 +1038,8 @@ tree_lower_operations (void)
 
 struct tree_opt_pass pass_lower_vector_ssa = 
 {
-  "vector",                            /* name */
-  NULL,                                        /* gate */
+  "veclower",                          /* name */
+  gate_expand_vector_operations,       /* gate */
   expand_vector_operations,            /* execute */
   NULL,                                        /* sub */
   NULL,                                        /* next */
index 40020ac..e77c29c 100644 (file)
@@ -437,6 +437,7 @@ init_tree_optimization_passes (void)
   NEXT_PASS (pass_iv_canon);
   NEXT_PASS (pass_if_conversion);
   NEXT_PASS (pass_vectorize);
+  NEXT_PASS (pass_lower_vector_ssa);
   NEXT_PASS (pass_complete_unroll);
   NEXT_PASS (pass_iv_optimize);
   NEXT_PASS (pass_loop_done);
index 71f7663..1416bc4 100644 (file)
@@ -1076,13 +1076,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
   struct data_reference *dr0 = NULL;
   unsigned int i, j;
 
-  /* Sigh, a hack to make targets that do not define UNITS_PER_SIMD_WORD
-     bootstrap.  Copy UNITS_PER_SIMD_WORD to a local variable to avoid a
-     "division by zero" error.  This error would be issued because we
-     we do "... % UNITS_PER_SIMD_WORD" below, and UNITS_PER_SIMD_WORD
-     defaults to 0 if it is not defined by the target.  */
-  int units_per_simd_word = UNITS_PER_SIMD_WORD;
-
   /*
      This pass will require a cost model to guide it whether to apply peeling 
      or versioning or a combination of the two. For example, the scheme that
@@ -1237,7 +1230,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
                  int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
 
                  DR_MISALIGNMENT (dr) += npeel * drsize;
-                 DR_MISALIGNMENT (dr) %= units_per_simd_word;
+                 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
                }
              else
                DR_MISALIGNMENT (dr) = -1;
index e51d56f..68222e9 100644 (file)
@@ -720,6 +720,32 @@ vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
 }
 
 
+/* Function vect_min_worthwhile_factor.
+
+   For a loop where we could vectorize the operation indicated by CODE,
+   return the minimum vectorization factor that makes it worthwhile
+   to use generic vectors.  */
+static int
+vect_min_worthwhile_factor (enum tree_code code)
+{
+  switch (code)
+    {
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+    case NEGATE_EXPR:
+      return 4;
+
+    case BIT_AND_EXPR:
+    case BIT_IOR_EXPR:
+    case BIT_XOR_EXPR:
+    case BIT_NOT_EXPR:
+      return 2;
+
+    default:
+      return INT_MAX;
+    }
+}
+
 /* Function vectorizable_operation.
 
    Check if STMT performs a binary or unary operation that can be vectorized. 
@@ -792,6 +818,16 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
       return false;
     }
 
+  /* Worthwhile without SIMD support?  */
+  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
+      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+        < vect_min_worthwhile_factor (code))
+    {
+      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
+       fprintf (vect_dump, "not worthwhile without SIMD support.");
+      return false;
+    }
+
   if (!vec_stmt) /* transformation not required.  */
     {
       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
index ed95eba..2285bb1 100644 (file)
@@ -1525,7 +1525,7 @@ get_vectype_for_scalar_type (tree scalar_type)
   int nunits;
   tree vectype;
 
-  if (nbytes == 0)
+  if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
     return NULL_TREE;
 
   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
@@ -1548,11 +1548,9 @@ get_vectype_for_scalar_type (tree scalar_type)
       print_generic_expr (vect_dump, vectype, TDF_SLIM);
     }
 
-  if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
+  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
+      && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
     {
-      /* TODO: tree-complex.c sometimes can parallelize operations
-         on generic vectors.  We can vectorize the loop in that case,
-         but then we should re-run the lowering pass.  */
       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
         fprintf (vect_dump, "mode not supported by target.");
       return NULL_TREE;
@@ -1733,15 +1731,6 @@ vectorize_loops (struct loops *loops)
   /* Fix the verbosity level if not defined explicitly by the user.  */
   vect_set_dump_settings ();
 
-  /* Does the target support SIMD?  */
-  /* FORNOW: until more sophisticated machine modelling is in place.  */
-  if (!UNITS_PER_SIMD_WORD)
-    {
-      if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
-       fprintf (vect_dump, "vectorizer: target vector size is not defined.");
-      return;
-    }
-
   /*  ----------- Analyze loops. -----------  */
 
   /* If some loop was duplicated, it gets bigger number