OSDN Git Service

* config.gcc: Add an extra_header for ARM targets.
authornickc <nickc@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 18 Jun 2003 16:36:13 +0000 (16:36 +0000)
committernickc <nickc@138bc75d-0d04-0410-961f-82ee72b054a4>
Wed, 18 Jun 2003 16:36:13 +0000 (16:36 +0000)
        Support configuring with --with-cpu=iwmmxt.
* doc/invoke.texi: Document new value for -mcpu= ARM switch.
        * config/arm/aof.h (REGISTER_NAMES): Add iwmmxt register
names.  Fix formatting.
        * config/arm/aout.h (REGISTER_NAMES): Add iwmmxt register
names.
        * config/arm/arm-protos.h (arm_emit_vector_const): New
prototype.
(arm_output_load_gr): New prototype.
* config/arm/arm.c (extra_reg_names1): Delete.
        (TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN, FL_IWMMXT,
* arch_is_iwmmxt): Define.
(all_cores, all_architecture): Add entry for iwmmxt.
(arm_override_options): Add support for iwmmxt.
(use_return_insn, arm_function_arg, arm_legitimate_index_p,
arm_print_value, arm_rtx_costs_1, output_move_double,
arm_compute_save_reg_mask, arm_output_epilogue,
arm_get_frame_size, arm_expand_prologue, arm_print_operand,
arm_assemble_integer, arm_hard_regno_ok, arm_regno_class):
Likewise.
(arm_init_cumulative_args): Count iwmmxt registers.
(arm_function_ok_for_sibcall): Return false of sibcall_blocked
has been set.
(struct minipool_node): Add fix_size field.
(add_minipool_forward_ref): Add support for 8-byte aligning of
the pool.
(add_minipool_backward_ref, add_minipool_offsets,
dump_minipool, push_minipool_fix): Likewise.
(struct builtin_description): New struct.
        (builtin_description): New array of iwmmxt builtin functions.
        (arm_init_iwmmxt_builtins): New function.
        (arm_init_builtins): New function.
        (safe_vector_operand): New function.
        (arm_expand_binop_builtin): New function.
        (arm_expand_unop_builtin): New function.
        (arm_expand_builtin): New function.
        (arm_emit_vector_const): New function.
        (arm_output_load_gr): New function.
        * config/arm/arm.h (TARGET_CPU_iwmmxt, TARGET_IWMMXT,
TARGET_REALLY_IWMMXT, arm_arch_iwmmxt, IWMMXT_ALIGNMENT,
TYPE_NEEDS_IWMMXT_ALIGNMENT, ADJUST_FIELD_ALIGN,
DATA_ALIGNMENT, LOCAL_ALIGNMENT, VECTOR_MODE_SUPPORTED_P): Define.
        (BIGGEST_ALIGNMENT): Set to 64 if ATPCS support is enabled.
        (CPP_CPU_ARCH_SPEC): Add entries for iwmmxt.
        (FIXED_REGISTERS, CALL_USED_REGISTERS, REG_ALLOC_ORDER,
reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS,
REG_CLASS_FOR_LETTER): Add iwmmxt registers.
        (SUBTARGET_CONDITIONAL_REGISTER_USAGE): Disable iwmmxt
registers unless the iwmmxt target is selected.
        (FIRST_IWMMXT_GR_REGNUM, LAST_IWMMXT_GR_REGNUM,
FIRST_IWMMXT_REGNUM, LAST_IWMMXT_REGNUM, IS_IWMMXT_REGNUM,
IS_IWMMXT_GR_REGNUM): Define.
        (FIRST_PSEUDO_REGISTER): Bump to 63.
        (struct machine_function): Add sibcall_blocked field.
        (Struct CUMULATIVE_ARGS): Add iwmmxt_nregs, named_count and
nargs fields.
        (enum arm_builtins): New enum list.
        * config/arm/arm.md (UNSPEC_WSHUFH, UNSPEC_WACC,
UNSPEC_TMOVMSK, UNSPEC_WSAD, UNSPEC_WSADZ, UNSPEC_WMACS,
UNSPEC_WMACU, UNSPEC_WMACSZ, UNSPEC_WMACUZ, UNSPEC_CLRDI,
UNSPEC_WMADDS, UNSPEC_WMADDU): New unspecs.
(VUNSPEC_TMRC, VUNSPEC_TMCR, VUNSPEC_ALIGN8, VUNSPEC_WCMP_EQ,
VUNSPEC_WCMP_GTU, VUNSPEC_WCMP_GT): New vunspecs.
        (movv2si, movv4hi, movv8qi): New expands for vector moves.
        Include iwmmxt.md.
* config/arm/t-xscale-elf (MULTILIB_OPITONS): Add iwmmxt
multilib.
        (MULTILIB_DIRNAMES, MULTILIB_REDUNDANT_DIRS): Likewise.
        * config/arm/mmintrin.h: New ARM specific header file.
        * config/arm/iwmmx.md: New iWMMXt specific machine patterns.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@68157 138bc75d-0d04-0410-961f-82ee72b054a4

13 files changed:
gcc/ChangeLog
gcc/config.gcc
gcc/config/arm/aof.h
gcc/config/arm/aout.h
gcc/config/arm/arm-protos.h
gcc/config/arm/arm.c
gcc/config/arm/arm.h
gcc/config/arm/arm.md
gcc/config/arm/iwmmxt.md [new file with mode: 0644]
gcc/config/arm/lib1funcs.asm
gcc/config/arm/mmintrin.h [new file with mode: 0644]
gcc/config/arm/t-xscale-elf
gcc/doc/invoke.texi

index 142d4fe..e1a63b4 100644 (file)
@@ -1,3 +1,77 @@
+2003-06-18  Nick Clifton  <nickc@redhat.com>
+
+       * config.gcc: Add an extra_header for ARM targets.
+        Support configuring with --with-cpu=iwmmxt.
+       * doc/invoke.texi: Document new value for -mcpu= ARM switch.
+        * config/arm/aof.h (REGISTER_NAMES): Add iwmmxt register
+       names.  Fix formatting.        
+        * config/arm/aout.h (REGISTER_NAMES): Add iwmmxt register
+       names.
+        * config/arm/arm-protos.h (arm_emit_vector_const): New
+       prototype.
+       (arm_output_load_gr): New prototype.
+       * config/arm/arm.c (extra_reg_names1): Delete.
+        (TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN, FL_IWMMXT,
+       * arch_is_iwmmxt): Define.
+       (all_cores, all_architecture): Add entry for iwmmxt.
+       (arm_override_options): Add support for iwmmxt.
+       (use_return_insn, arm_function_arg, arm_legitimate_index_p,
+       arm_print_value, arm_rtx_costs_1, output_move_double,
+       arm_compute_save_reg_mask, arm_output_epilogue,
+       arm_get_frame_size, arm_expand_prologue, arm_print_operand,
+       arm_assemble_integer, arm_hard_regno_ok, arm_regno_class):
+       Likewise.
+       (arm_init_cumulative_args): Count iwmmxt registers.
+       (arm_function_ok_for_sibcall): Return false of sibcall_blocked
+       has been set.
+       (struct minipool_node): Add fix_size field.
+       (add_minipool_forward_ref): Add support for 8-byte aligning of
+       the pool.
+       (add_minipool_backward_ref, add_minipool_offsets,
+       dump_minipool, push_minipool_fix): Likewise.
+       (struct builtin_description): New struct.
+        (builtin_description): New array of iwmmxt builtin functions.
+        (arm_init_iwmmxt_builtins): New function.
+        (arm_init_builtins): New function.
+        (safe_vector_operand): New function.
+        (arm_expand_binop_builtin): New function.
+        (arm_expand_unop_builtin): New function.
+        (arm_expand_builtin): New function.
+        (arm_emit_vector_const): New function.
+        (arm_output_load_gr): New function.
+        * config/arm/arm.h (TARGET_CPU_iwmmxt, TARGET_IWMMXT,
+       TARGET_REALLY_IWMMXT, arm_arch_iwmmxt, IWMMXT_ALIGNMENT,
+       TYPE_NEEDS_IWMMXT_ALIGNMENT, ADJUST_FIELD_ALIGN,
+       DATA_ALIGNMENT, LOCAL_ALIGNMENT, VECTOR_MODE_SUPPORTED_P): Define.
+        (BIGGEST_ALIGNMENT): Set to 64 if ATPCS support is enabled.
+        (CPP_CPU_ARCH_SPEC): Add entries for iwmmxt.
+        (FIXED_REGISTERS, CALL_USED_REGISTERS, REG_ALLOC_ORDER,
+       reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS,
+       REG_CLASS_FOR_LETTER): Add iwmmxt registers.
+        (SUBTARGET_CONDITIONAL_REGISTER_USAGE): Disable iwmmxt
+       registers unless the iwmmxt target is selected.
+        (FIRST_IWMMXT_GR_REGNUM, LAST_IWMMXT_GR_REGNUM,
+       FIRST_IWMMXT_REGNUM, LAST_IWMMXT_REGNUM, IS_IWMMXT_REGNUM,
+       IS_IWMMXT_GR_REGNUM): Define.
+        (FIRST_PSEUDO_REGISTER): Bump to 63.
+        (struct machine_function): Add sibcall_blocked field.
+        (Struct CUMULATIVE_ARGS): Add iwmmxt_nregs, named_count and
+       nargs fields.
+        (enum arm_builtins): New enum list.
+        * config/arm/arm.md (UNSPEC_WSHUFH, UNSPEC_WACC,
+       UNSPEC_TMOVMSK, UNSPEC_WSAD, UNSPEC_WSADZ, UNSPEC_WMACS,
+       UNSPEC_WMACU, UNSPEC_WMACSZ, UNSPEC_WMACUZ, UNSPEC_CLRDI,
+       UNSPEC_WMADDS, UNSPEC_WMADDU): New unspecs.
+       (VUNSPEC_TMRC, VUNSPEC_TMCR, VUNSPEC_ALIGN8, VUNSPEC_WCMP_EQ,
+       VUNSPEC_WCMP_GTU, VUNSPEC_WCMP_GT): New vunspecs.
+        (movv2si, movv4hi, movv8qi): New expands for vector moves.
+        Include iwmmxt.md.
+       * config/arm/t-xscale-elf (MULTILIB_OPITONS): Add iwmmxt
+       multilib.
+        (MULTILIB_DIRNAMES, MULTILIB_REDUNDANT_DIRS): Likewise.
+        * config/arm/mmintrin.h: New ARM specific header file.
+        * config/arm/iwmmx.md: New iWMMXt specific machine patterns.
+
 2003-06-18  J"orn Rennecke <joern.rennecke@superh.com>
 
        * toplev.c (Remaining -d letters summary): Update.
index e7e5313..d517d1e 100644 (file)
@@ -281,12 +281,14 @@ strongarm*-*-*)
        ;;
 arm*-*-*)
        cpu_type=arm
+       extra_headers="mmintrin.h"
        ;;
 ep9312*-*-*)
        cpu_type=arm
        ;;
 xscale-*-*)
        cpu_type=arm
+       extra_headers="mmintrin.h"
        ;;
 i[34567]86-*-*)
        cpu_type=i386
@@ -2303,7 +2305,7 @@ then
                        | xarm920t | xarm940t | xarm9e | xarm10tdmi \
                        | xarm7100 | xarm7500 | xarm7500fe | xarm810 \
                        | xarm1020t \
-                       | xxscale \
+                       | xxscale | xiwmmxt \
                        | xep9312 \
                        | xstrongarm | xstrongarm110 | xstrongarm11[01]0)
                                # OK
index 82749ff..39f2cc3 100644 (file)
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler, for Advanced RISC Machines
    ARM compilation, AOF Assembler.
-   Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1995, 1996, 1997, 2000, 2003 Free Software Foundation, Inc.
    Contributed by Richard Earnshaw (rearnsha@armltd.co.uk)
 
    This file is part of GCC.
 #define EXTRA_SECTIONS in_zero_init, in_common
 
 #define EXTRA_SECTION_FUNCTIONS        \
-ZERO_INIT_SECTION              \
-COMMON_SECTION
+  ZERO_INIT_SECTION            \
+  COMMON_SECTION
 
 #define ZERO_INIT_SECTION                                      \
-void                                                           \
-zero_init_section ()                                           \
-{                                                              \
-  static int zero_init_count = 1;                              \
-  if (in_section != in_zero_init)                              \
-    {                                                          \
-      fprintf (asm_out_file, "\tAREA |C$$zidata%d|,NOINIT\n",  \
-              zero_init_count++);                              \
-      in_section = in_zero_init;                               \
-    }                                                          \
-}
+  void                                                         \
+  zero_init_section ()                                         \
+  {                                                            \
+    static int zero_init_count = 1;                            \
+                                                               \
+    if (in_section != in_zero_init)                            \
+      {                                                                \
+        fprintf (asm_out_file, "\tAREA |C$$zidata%d|,NOINIT\n",        \
+                zero_init_count++);                            \
+        in_section = in_zero_init;                             \
+      }                                                                \
+  }
 
 /* Used by ASM_OUTPUT_COMMON (below) to tell varasm.c that we've
    changed areas.  */
 #define COMMON_SECTION                                         \
-void                                                           \
-common_section ()                                              \
-{                                                              \
-  if (in_section != in_common)                                 \
-    {                                                          \
+  void                                                         \
+  common_section ()                                            \
+  {                                                            \
+    if (in_section != in_common)                               \
       in_section = in_common;                                  \
-    }                                                          \
-}
-#define CTOR_LIST_BEGIN                                        \
-asm (CTORS_SECTION_ASM_OP);                            \
-extern func_ptr __CTOR_END__[1];                       \
-func_ptr __CTOR_LIST__[1] = {__CTOR_END__};
+  }
 
-#define CTOR_LIST_END                                  \
-asm (CTORS_SECTION_ASM_OP);                            \
-func_ptr __CTOR_END__[1] = { (func_ptr) 0 };
+#define CTOR_LIST_BEGIN                                \
+  asm (CTORS_SECTION_ASM_OP);                  \
+  extern func_ptr __CTOR_END__[1];             \
+  func_ptr __CTOR_LIST__[1] = {__CTOR_END__};
 
-#define DO_GLOBAL_CTORS_BODY           \
-do {                                   \
-  func_ptr *ptr = __CTOR_LIST__ + 1;   \
-  while (*ptr)                         \
-    (*ptr++) ();                       \
-} while (0)
+#define CTOR_LIST_END                          \
+  asm (CTORS_SECTION_ASM_OP);                  \
+  func_ptr __CTOR_END__[1] = { (func_ptr) 0 };
 
-#define DTOR_LIST_BEGIN                                        \
-asm (DTORS_SECTION_ASM_OP);                            \
-extern func_ptr __DTOR_END__[1];                       \
-func_ptr __DTOR_LIST__[1] = {__DTOR_END__};
+#define DO_GLOBAL_CTORS_BODY                   \
+  do                                           \
+    {                                          \
+      func_ptr *ptr = __CTOR_LIST__ + 1;       \
+                                               \
+      while (*ptr)                             \
+        (*ptr++) ();                           \
+    }                                          \
+  while (0)
 
-#define DTOR_LIST_END                                  \
-asm (DTORS_SECTION_ASM_OP);                            \
-func_ptr __DTOR_END__[1] = { (func_ptr) 0 };
+#define DTOR_LIST_BEGIN                                \
+  asm (DTORS_SECTION_ASM_OP);                  \
+  extern func_ptr __DTOR_END__[1];             \
+  func_ptr __DTOR_LIST__[1] = {__DTOR_END__};
 
-#define DO_GLOBAL_DTORS_BODY           \
-do {                                   \
-  func_ptr *ptr = __DTOR_LIST__ + 1;   \
-  while (*ptr)                         \
-    (*ptr++) ();                       \
-} while (0)
+#define DTOR_LIST_END                          \
+  asm (DTORS_SECTION_ASM_OP);                  \
+  func_ptr __DTOR_END__[1] = { (func_ptr) 0 };
+
+#define DO_GLOBAL_DTORS_BODY                   \
+  do                                           \
+    {                                          \
+      func_ptr *ptr = __DTOR_LIST__ + 1;       \
+                                               \
+      while (*ptr)                             \
+        (*ptr++) ();                           \
+    }                                          \
+  while (0)
 
 /* We really want to put Thumb tables in a read-only data section, but
    switching to another section during function output is not
@@ -171,12 +177,10 @@ do {                                      \
 #define SYMBOL__MAIN __gccmain
 
 #define ASM_COMMENT_START ";"
+#define ASM_APP_ON        ""
+#define ASM_APP_OFF       ""
 
-#define ASM_APP_ON ""
-
-#define ASM_APP_OFF ""
-
-#define ASM_OUTPUT_ASCII(STREAM,PTR,LEN)               \
+#define ASM_OUTPUT_ASCII(STREAM, PTR, LEN)             \
 {                                                      \
   int i;                                               \
   const char *ptr = (PTR);                             \
@@ -184,23 +188,23 @@ do {                                      \
   for (i = 0; i < (long)(LEN); i++)                    \
     fprintf ((STREAM), " &%02x%s",                     \
             (unsigned ) *(ptr++),                      \
-            (i + 1 < (long)(LEN)                               \
+            (i + 1 < (long)(LEN)                       \
              ? ((i & 3) == 3 ? "\n\tDCB" : ",")        \
              : "\n"));                                 \
 }
 
 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == '\n')
 
-/* Output of Uninitialized Variables */
+/* Output of Uninitialized Variables */
 
-#define ASM_OUTPUT_COMMON(STREAM,NAME,SIZE,ROUNDED)            \
+#define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED)         \
   (common_section (),                                          \
    fprintf ((STREAM), "\tAREA "),                              \
    assemble_name ((STREAM), (NAME)),                           \
    fprintf ((STREAM), ", DATA, COMMON\n\t%% %d\t%s size=%d\n", \
            (int)(ROUNDED), ASM_COMMENT_START, (int)(SIZE)))
 
-#define ASM_OUTPUT_LOCAL(STREAM,NAME,SIZE,ROUNDED)     \
+#define ASM_OUTPUT_LOCAL(STREAM, NAME, SIZE, ROUNDED)  \
    (zero_init_section (),                              \
     assemble_name ((STREAM), (NAME)),                  \
     fprintf ((STREAM), "\n"),                          \
@@ -208,7 +212,6 @@ do {                                        \
             (int)(ROUNDED), ASM_COMMENT_START, (int)(SIZE)))
 
 /* Output and Generation of Labels */
-
 extern int arm_main_function;
 
 /* Globalizing directive for a label.  */
@@ -258,22 +261,31 @@ do {                                      \
 #define ASM_GENERATE_INTERNAL_LABEL(STRING,PREFIX,NUM) \
   sprintf ((STRING), "*|%s..%ld|", (PREFIX), (long)(NUM))
 
-/* How initialization functions are handled */
+/* How initialization functions are handled */
 
 #define CTORS_SECTION_ASM_OP "\tAREA\t|C$$gnu_ctorsvec|, DATA, READONLY"
 #define DTORS_SECTION_ASM_OP "\tAREA\t|C$$gnu_dtorsvec|, DATA, READONLY"
 
-/* Output of Assembler Instructions */
-
-#define REGISTER_NAMES                 \
-{                                      \
-  "a1", "a2", "a3", "a4",      \
-  "v1", "v2", "v3", "v4",      \
-  "v5", "v6", "sl", "fp",      \
-  "ip", "sp", "lr", "pc",      \
-  "f0", "f1", "f2", "f3",      \
-  "f4", "f5", "f6", "f7",      \
-  "cc", "sfp", "afp"           \
+/* Output of Assembler Instructions.  */
+
+#define REGISTER_NAMES                         \
+{                                              \
+  "a1", "a2", "a3", "a4",                      \
+  "v1", "v2", "v3", "v4",                      \
+  "v5", "v6", "sl", "fp",                      \
+  "ip", "sp", "lr", "pc",                      \
+  "f0", "f1", "f2", "f3",                      \
+  "f4", "f5", "f6", "f7",                      \
+  "cc", "sfp", "afp",                          \
+  "mv0",   "mv1",   "mv2",   "mv3",            \
+  "mv4",   "mv5",   "mv6",   "mv7",            \
+  "mv8",   "mv9",   "mv10",  "mv11",           \
+  "mv12",  "mv13",  "mv14",  "mv15",           \
+  "wcgr0", "wcgr1", "wcgr2", "wcgr3",          \
+  "wr0",   "wr1",   "wr2",   "wr3",            \
+  "wr4",   "wr5",   "wr6",   "wr7",            \
+  "wr8",   "wr9",   "wr10",  "wr11",           \
+  "wr12",  "wr13",  "wr14",  "wr15"
 }
 
 #define ADDITIONAL_REGISTER_NAMES              \
@@ -303,37 +315,40 @@ do {                                      \
 /* AOF does not prefix user function names with an underscore.  */
 #define ARM_MCOUNT_NAME "_mcount"
 
-/* Output of Dispatch Tables */
-
-#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM,BODY,VALUE,REL)                        \
-  do {                                                                 \
-    if (TARGET_ARM)                                                    \
-      fprintf ((STREAM), "\tb\t|L..%d|\n", (VALUE));                   \
-    else                                                               \
-      fprintf ((STREAM), "\tDCD\t|L..%d| - |L..%d|\n", (VALUE), (REL));        \
-  } while (0)
-
-#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM,VALUE)  \
+/* Output of Dispatch Tables.  */
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)                     \
+  do                                                                           \
+    {                                                                          \
+      if (TARGET_ARM)                                                          \
+        fprintf ((STREAM), "\tb\t|L..%d|\n", (VALUE));                         \
+      else                                                                     \
+        fprintf ((STREAM), "\tDCD\t|L..%d| - |L..%d|\n", (VALUE), (REL));      \
+    }                                                                          \
+  while (0)
+
+#define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE) \
   fprintf ((STREAM), "\tDCD\t|L..%d|\n", (VALUE))
 
 /* A label marking the start of a jump table is a data label. */
-#define ASM_OUTPUT_CASE_LABEL(STREAM,PREFIX,NUM,TABLE) \
+#define ASM_OUTPUT_CASE_LABEL(STREAM, PREFIX, NUM, TABLE)      \
   fprintf ((STREAM), "\tALIGN\n|%s..%d|\n", (PREFIX), (NUM))
 
-/* Assembler Commands for Alignment */
-
-#define ASM_OUTPUT_SKIP(STREAM,NBYTES)         \
- fprintf ((STREAM), "\t%%\t%d\n", (int)(NBYTES))
-
-#define ASM_OUTPUT_ALIGN(STREAM,POWER)                 \
-do {                                                   \
-  register int amount = 1 << (POWER);                  \
-  if (amount == 2)                                     \
-    fprintf ((STREAM), "\tALIGN 2\n");                 \
-  else if (amount == 4)                                        \
-    fprintf ((STREAM), "\tALIGN\n");                   \
-  else                                                 \
-    fprintf ((STREAM), "\tALIGN %d\n", amount);                \
-} while (0)
+/* Assembler Commands for Alignment.  */
+#define ASM_OUTPUT_SKIP(STREAM, NBYTES)                \
+ fprintf ((STREAM), "\t%%\t%d\n", (int) (NBYTES))
+
+#define ASM_OUTPUT_ALIGN(STREAM, POWER)                        \
+  do                                                   \
+    {                                                  \
+      int amount = 1 << (POWER);                       \
+                                                       \
+      if (amount == 2)                                 \
+        fprintf ((STREAM), "\tALIGN 2\n");             \
+      else if (amount == 4)                            \
+        fprintf ((STREAM), "\tALIGN\n");               \
+      else                                             \
+        fprintf ((STREAM), "\tALIGN %d\n", amount);    \
+    }                                                  \
+  while (0)
 
 #undef DBX_DEBUGGING_INFO
index 263c334..23679eb 100644 (file)
@@ -65,7 +65,6 @@
 #define LOCAL_LABEL_PREFIX     ""
 #endif
 
-
 /* The assembler's names for the registers.  */
 #ifndef REGISTER_NAMES
 #define REGISTER_NAMES                            \
   "r8", "r9", "sl", "fp", "ip", "sp", "lr", "pc",  \
   "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",  \
   "cc", "sfp", "afp",                             \
-  "mv0",  "mv1",   "mv2",  "mv3",  "mv4",   "mv5", \
-  "mv6",  "mv7",   "mv8",  "mv9",  "mv10",  "mv11",\
-  "mv12", "mv13",  "mv14", "mv15"                 \
+  "mv0",   "mv1",   "mv2",   "mv3",               \
+  "mv4",   "mv5",   "mv6",   "mv7",               \
+  "mv8",   "mv9",   "mv10",  "mv11",              \
+  "mv12",  "mv13",  "mv14",  "mv15",              \
+  "wcgr0", "wcgr1", "wcgr2", "wcgr3",             \
+  "wr0",   "wr1",   "wr2",   "wr3",               \
+  "wr4",   "wr5",   "wr6",   "wr7",               \
+  "wr8",   "wr9",   "wr10",  "wr11",              \
+  "wr12",  "wr13",  "wr14",  "wr15"               \
 }
 #endif
 
 
 #undef  ASM_OUTPUT_ASCII
 #define ASM_OUTPUT_ASCII(STREAM, PTR, LEN)  \
-  output_ascii_pseudo_op (STREAM, (const unsigned char *)(PTR), LEN)
+  output_ascii_pseudo_op (STREAM, (const unsigned char *) (PTR), LEN)
 
 /* Output a gap.  In fact we fill it with nulls.  */
 #undef  ASM_OUTPUT_SKIP
 #define ASM_OUTPUT_SKIP(STREAM, NBYTES)        \
-  fprintf (STREAM, "\t.space\t%d\n", (int)(NBYTES))
+  fprintf (STREAM, "\t.space\t%d\n", (int) (NBYTES))
 
 /* Align output to a power of two.  Horrible /bin/as.  */
 #ifndef ASM_OUTPUT_ALIGN  
index 62674bc..fe424d7 100644 (file)
@@ -135,6 +135,8 @@ extern void arm_final_prescan_insn (rtx);
 extern int arm_go_if_legitimate_address (enum machine_mode, rtx);
 extern int arm_debugger_arg_offset (int, rtx);
 extern int arm_is_longcall_p (rtx, int, int);
+extern int    arm_emit_vector_const (FILE *, rtx);
+extern const char * arm_output_load_gr (rtx *);
 
 #if defined TREE_CODE
 extern rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
index cd2b149..e5b1324 100644 (file)
 typedef struct minipool_node    Mnode;
 typedef struct minipool_fixup   Mfix;
 
-const char extra_reg_names1[][16] =
-{ "mv0", "mv1", "mv2",  "mv3",  "mv4",  "mv5",  "mv6",  "mv7",
-  "mv8", "mv9", "mv10", "mv11", "mv12", "mv13", "mv14", "mv15"
-};
-#define extra_reg_names1 bogus1_regnames
-
 const struct attribute_spec arm_attribute_table[];
 
 /* Forward function declarations.  */
@@ -135,6 +129,14 @@ static int arm_address_cost (rtx);
 static bool arm_memory_load_p (rtx);
 static bool arm_cirrus_insn_p (rtx);
 static void cirrus_reorg (rtx);
+static void arm_init_builtins (void);
+static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static void arm_init_iwmmxt_builtins (void);
+static rtx safe_vector_operand (rtx, enum machine_mode);
+static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
+static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
+static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+
 #ifdef OBJECT_FORMAT_ELF
 static void arm_elf_asm_named_section (const char *, unsigned int);
 #endif
@@ -201,28 +203,33 @@ static void aof_file_end (void);
 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 #endif
 
-#undef TARGET_STRIP_NAME_ENCODING
+#undef  TARGET_STRIP_NAME_ENCODING
 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 
-#undef TARGET_ASM_INTERNAL_LABEL
+#undef  TARGET_ASM_INTERNAL_LABEL
 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 
-#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 
-#undef TARGET_ASM_OUTPUT_MI_THUNK
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
-#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
 
-#undef TARGET_RTX_COSTS
+#undef  TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS arm_rtx_costs
-#undef TARGET_ADDRESS_COST
+#undef  TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST arm_address_cost
 
-#undef TARGET_MACHINE_DEPENDENT_REORG
+#undef  TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS  arm_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN arm_expand_builtin
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 \f
 /* Obstack for minipool constant handling.  */
@@ -271,6 +278,7 @@ int    arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
 #define FL_ARCH5E     (1 << 9)        /* DSP extensions to v5 */
 #define FL_XSCALE     (1 << 10)              /* XScale */
 #define FL_CIRRUS     (1 << 11)              /* Cirrus/DSP.  */
+#define FL_IWMMXT     (1 << 29)              /* XScale v2 or "Intel Wireless MMX technology".  */
 
 /* The bits in this mask specify which
    instructions we are allowed to generate.  */
@@ -303,6 +311,9 @@ int arm_ld_sched = 0;
 /* Nonzero if this chip is a StrongARM.  */
 int arm_is_strong = 0;
 
+/* Nonzero if this chip supports Intel Wireless MMX technology.  */
+int arm_arch_iwmmxt = 0;
+
 /* Nonzero if this chip is an XScale.  */
 int arm_arch_xscale = 0;
 
@@ -413,6 +424,7 @@ static const struct processors all_cores[] =
   {"arm10tdmi",                                 FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED             | FL_ARCH5 },
   {"arm1020t",                          FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED             | FL_ARCH5 },
   {"xscale",                             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED | FL_STRONG | FL_ARCH5 | FL_ARCH5E | FL_XSCALE },
+  {"iwmmxt",                             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED | FL_STRONG | FL_ARCH5 | FL_ARCH5E | FL_XSCALE | FL_IWMMXT },
 
   {NULL, 0}
 };
@@ -433,6 +445,7 @@ static const struct processors all_architectures[] =
   { "armv5t",    FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 },
   { "armv5te",   FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 | FL_ARCH5E },
   { "ep9312",                            FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_LDSCHED | FL_CIRRUS },
+  {"iwmmxt",                             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED | FL_STRONG | FL_ARCH5 | FL_ARCH5E | FL_XSCALE | FL_IWMMXT },
   { NULL, 0 }
 };
 
@@ -530,6 +543,7 @@ arm_override_options (void)
        { TARGET_CPU_strongarm, "strongarm" },
        { TARGET_CPU_xscale,    "xscale" },
        { TARGET_CPU_ep9312,    "ep9312" },
+       { TARGET_CPU_iwmmxt,    "iwmmxt" },
        { TARGET_CPU_generic,   "arm" },
        { 0, 0 }
       };
@@ -730,6 +744,10 @@ arm_override_options (void)
                       && !(tune_flags & FL_ARCH4))) != 0;
   arm_tune_xscale       = (tune_flags & FL_XSCALE) != 0;
   arm_is_cirrus            = (tune_flags & FL_CIRRUS) != 0;
+  arm_arch_iwmmxt   = (insn_flags & FL_IWMMXT) != 0;
+
+  if (TARGET_IWMMXT && (! TARGET_ATPCS))
+    target_flags |= ARM_FLAG_ATPCS;    
 
   if (arm_is_cirrus)
     {
@@ -888,7 +906,7 @@ arm_isr_value (tree argument)
   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
 
   /* Check it against the list of known arguments.  */
-  for (ptr = isr_attribute_args; ptr->arg != NULL; ptr ++)
+  for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
     if (streq (arg, ptr->arg))
       return ptr->return_value;
 
@@ -1020,6 +1038,11 @@ use_return_insn (int iscond)
       if (regs_ever_live[regno] && !call_used_regs[regno])
        return 0;
 
+  if (TARGET_REALLY_IWMMXT)
+    for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
+      if (regs_ever_live[regno] && ! call_used_regs [regno])
+       return 0;
+
   return 1;
 }
 
@@ -1943,6 +1966,7 @@ arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
 {
   /* On the ARM, the offset starts at 0.  */
   pcum->nregs = ((fntype && aggregate_value_p (TREE_TYPE (fntype))) ? 1 : 0);
+  pcum->iwmmxt_nregs = 0;
   
   pcum->call_cookie = CALL_NORMAL;
 
@@ -1958,6 +1982,24 @@ arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
       else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (fntype)))
        pcum->call_cookie = CALL_LONG;
     }
+
+  /* Varargs vectors are treated the same as long long.
+     named_count avoids having to change the way arm handles 'named' */
+  pcum->named_count = 0;
+  pcum->nargs = 0;
+
+  if (TARGET_REALLY_IWMMXT && fntype)
+    {
+      tree fn_arg;
+
+      for (fn_arg = TYPE_ARG_TYPES (fntype);
+          fn_arg;
+          fn_arg = TREE_CHAIN (fn_arg))
+       pcum->named_count += 1;
+
+      if (! pcum->named_count)
+       pcum->named_count = INT_MAX;
+    }
 }
 
 /* Determine where to put an argument to a function.
@@ -1977,6 +2019,30 @@ rtx
 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
                  tree type ATTRIBUTE_UNUSED, int named)
 {
+  if (TARGET_REALLY_IWMMXT)
+    {
+      if (VECTOR_MODE_SUPPORTED_P (mode))
+       {
+         /* varargs vectors are treated the same as long long.
+            named_count avoids having to change the way arm handles 'named' */
+         if (pcum->named_count <= pcum->nargs + 1)
+           {
+             if (pcum->nregs == 1)
+               pcum->nregs += 1;
+             if (pcum->nregs <= 2)
+               return gen_rtx_REG (mode, pcum->nregs);
+             else
+               return NULL_RTX;
+           }
+         else if (pcum->iwmmxt_nregs <= 9)
+           return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
+         else
+           return NULL_RTX;
+       }
+      else if ((mode == DImode || mode == DFmode) && pcum->nregs & 1)
+       pcum->nregs += 1;
+    }
+
   if (mode == VOIDmode)
     /* Compute operand 2 of the call insn.  */
     return GEN_INT (pcum->call_cookie);
@@ -2010,6 +2076,26 @@ arm_va_arg (tree valist, tree type)
       return gen_rtx_MEM (ptr_mode, force_reg (Pmode, addr));
     }
 
+  if (FUNCTION_ARG_BOUNDARY (TYPE_MODE (type), NULL) == IWMMXT_ALIGNMENT)
+    {
+      tree minus_eight;
+      tree t;
+
+      /* Maintain 64-bit alignment of the valist pointer by
+        contructing:   valist = ((valist + (8 - 1)) & -8).  */
+      minus_eight = build_int_2 (- (IWMMXT_ALIGNMENT / BITS_PER_UNIT), -1);
+      t = build_int_2 ((IWMMXT_ALIGNMENT / BITS_PER_UNIT) - 1, 0);
+      t = build (PLUS_EXPR,    TREE_TYPE (valist), valist, t);
+      t = build (BIT_AND_EXPR, TREE_TYPE (t), t, minus_eight);
+      t = build (MODIFY_EXPR,  TREE_TYPE (valist), valist, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      /* This is to stop the combine pass optimising
+        away the alignment adjustment.  */
+      mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
+    }
+
   return std_expand_builtin_va_arg (valist, type);
 }
 \f
@@ -2316,6 +2402,9 @@ arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
 {
   int call_type = TARGET_LONG_CALLS ? CALL_LONG : CALL_NORMAL;
 
+  if (cfun->machine->sibcall_blocked)
+    return false;
+
   /* Never tailcall something for which we have no decl, or if we
      are in Thumb mode.  */
   if (decl == NULL || TARGET_THUMB)
@@ -2661,6 +2750,11 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
       && GET_MODE_SIZE (mode) <= 4)
     return 1;
 
+  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
+    return (code == CONST_INT
+           && INTVAL (index) < 256
+           && INTVAL (index) > -256);
+
   /* XXX What about ldrsb?  */
   if (GET_MODE_SIZE (mode) <= 4  && code == MULT
       && (!arm_arch4 || (mode) != HImode))
@@ -3298,6 +3392,13 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
        case SImode:
          return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
 
+       case V8QImode:
+       case V4HImode:
+       case V2SImode:
+       case V4QImode:
+       case V2HImode:
+           return 1;
+
        default:
          break;
        }
@@ -5820,6 +5921,21 @@ arm_print_value (FILE *f, rtx x)
       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
       return;
 
+    case CONST_VECTOR:
+      {
+       int i;
+
+       fprintf (f, "<");
+       for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
+         {
+           fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
+           if (i < (CONST_VECTOR_NUNITS (x) - 1))
+             fputc (',', f);
+         }
+       fprintf (f, ">");
+      }
+      return;
+
     case CONST_STRING:
       fprintf (f, "\"%s\"", XSTR (x, 0));
       return;
@@ -5940,6 +6056,8 @@ struct minipool_node
   rtx value;
   /* The mode of value.  */
   enum machine_mode mode;
+  /* The size of the value.  With iWMMXt enabled
+     sizes > 4 also imply an alignment of 8-bytes.  */
   int fix_size;
 };
 
@@ -6115,6 +6233,19 @@ add_minipool_forward_ref (Mfix *fix)
       if (max_mp == NULL
          && mp->max_address > max_address)
        max_mp = mp;
+
+      /* If we are inserting an 8-bytes aligned quantity and
+        we have not already found an insertion point, then
+        make sure that all such 8-byte aligned quantities are
+        placed at the start of the pool.  */
+      if (TARGET_REALLY_IWMMXT
+         && max_mp == NULL
+         && fix->fix_size == 8
+         && mp->fix_size != 8)
+       {
+         max_mp = mp;
+         max_address = mp->max_address;
+       }
     }
 
   /* The value is not currently in the minipool, so we need to create
@@ -6288,7 +6419,14 @@ add_minipool_backward_ref (Mfix *fix)
        {
          /* Note the insertion point if necessary.  */
          if (mp->min_address < min_address)
-           min_mp = mp;
+           {
+             /* For now, we do not allow the insertion of 8-byte alignment
+                requiring nodes anywhere but at the start of the pool.  */
+             if (TARGET_REALLY_IWMMXT && fix->fix_size == 8 && mp->fix_size != 8)
+               return NULL;
+             else
+               min_mp = mp;
+           }
          else if (mp->max_address
                   < minipool_barrier->address + mp->offset + fix->fix_size)
            {
@@ -6299,6 +6437,18 @@ add_minipool_backward_ref (Mfix *fix)
              min_mp = mp;
              min_address = mp->min_address + fix->fix_size;
            }
+         /* If we are inserting an 8-bytes aligned quantity and
+            we have not already found an insertion point, then
+            make sure that all such 8-byte aligned quantities are
+            placed at the start of the pool.  */
+         else if (TARGET_REALLY_IWMMXT
+                  && min_mp == NULL
+                  && fix->fix_size == 8
+                  && mp->fix_size < 8)
+           {
+             min_mp = mp;
+             min_address = mp->min_address + fix->fix_size;
+           }
        }
     }
 
@@ -6385,16 +6535,25 @@ assign_minipool_offsets (Mfix *barrier)
 static void
 dump_minipool (rtx scan)
 {
-  Mnode *mp;
-  Mnode *nmp;
+  Mnode * mp;
+  Mnode * nmp;
+  int align64 = 0;
+
+  if (TARGET_REALLY_IWMMXT)
+    for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
+      if (mp->refcount > 0 && mp->fix_size == 8)
+       {
+         align64 = 1;
+         break;
+       }
 
   if (rtl_dump_file)
     fprintf (rtl_dump_file,
-            ";; Emitting minipool after insn %u; address %ld\n",
-            INSN_UID (scan), (unsigned long) minipool_barrier->address);
+            ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
+            INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
 
   scan = emit_label_after (gen_label_rtx (), scan);
-  scan = emit_insn_after (gen_align_4 (), scan);
+  scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
   scan = emit_label_after (minipool_vector_label, scan);
 
   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
@@ -6622,6 +6781,13 @@ push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
   if (fix->forwards == 0 && fix->backwards == 0)
     abort ();
 
+  /* With iWMMXt enabled, the pool is aligned to an 8-byte boundary.
+     So there might be an empty word before the start of the pool.
+     Hence we reduce the forward range by 4 to allow for this
+     possibility.  */
+  if (TARGET_REALLY_IWMMXT && fix->fix_size == 8)
+    fix->forwards -= 4;
+
   if (rtl_dump_file)
     {
       fprintf (rtl_dump_file,
@@ -7137,6 +7303,105 @@ output_move_double (rtx *operands)
          else
            output_asm_insn ("mov%?\t%R0, %R1\n\tmov%?\t%Q0, %Q1", operands);
        }
+      else if (code1 == CONST_VECTOR)
+       {
+         HOST_WIDE_INT hint = 0;
+
+         switch (GET_MODE (operands[1]))
+           {
+           case V2SImode:
+             otherops[1] = GEN_INT (INTVAL (CONST_VECTOR_ELT (operands[1], 1)));
+             operands[1] = GEN_INT (INTVAL (CONST_VECTOR_ELT (operands[1], 0)));
+             break;
+
+           case V4HImode:
+             if (BYTES_BIG_ENDIAN)
+               {
+                 hint = INTVAL (CONST_VECTOR_ELT (operands[1], 2));
+                 hint <<= 16;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 3));
+               }
+             else
+               {
+                 hint = INTVAL (CONST_VECTOR_ELT (operands[1], 3));
+                 hint <<= 16;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 2));
+               }
+
+             otherops[1] = GEN_INT (hint);
+             hint = 0;
+
+             if (BYTES_BIG_ENDIAN)
+               {
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 0));
+                 hint <<= 16;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 1));
+               }
+             else
+               {
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 1));
+                 hint <<= 16;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 0));
+               }
+
+             operands[1] = GEN_INT (hint);
+             break;
+
+           case V8QImode:
+             if (BYTES_BIG_ENDIAN)
+               {
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 4));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 5));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 6));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 7));
+               }
+             else
+               {
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 7));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 6));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 5));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 4));
+               }
+
+             otherops[1] = GEN_INT (hint);
+             hint = 0;
+
+             if (BYTES_BIG_ENDIAN)
+               {
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 0));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 1));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 2));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 3));
+               }
+             else
+               {
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 3));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 2));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 1));
+                 hint <<= 8;
+                 hint |= INTVAL (CONST_VECTOR_ELT (operands[1], 0));
+               }
+
+             operands[1] = GEN_INT (hint);
+             break;
+             
+           default:
+             abort ();
+           }
+         output_mov_immediate (operands);
+         output_mov_immediate (otherops);
+       }
       else if (code1 == CONST_DOUBLE)
        {
          if (GET_MODE (operands[1]) == DFmode)
@@ -7367,9 +7632,9 @@ output_mov_immediate (rtx *operands)
       int i;
 
       /* If all else fails, make it out of ORRs or BICs as appropriate.  */
-      for (i = 0; i < 32; i ++)
+      for (i = 0; i < 32; i++)
        if (n & 1 << i)
-         n_ones ++;
+         n_ones++;
 
       if (n_ones > 16)  /* Shorter to use MVN with BIC in this case.  */
        output_multi_immediate (operands, "mvn%?\t%0, %1", "bic%?\t%0, %0, %1", 1, ~ n);
@@ -7751,6 +8016,34 @@ arm_compute_save_reg_mask (void)
   if (cfun->machine->lr_save_eliminated)
     save_reg_mask &= ~ (1 << LR_REGNUM);
 
+  if (TARGET_REALLY_IWMMXT
+      && ((bit_count (save_reg_mask)
+          + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
+    {
+      unsigned int reg;
+
+      /* The total number of registers that are going to be pushed
+        onto the stack is odd.  We need to ensure that the stack
+        is 64-bit aligned before we start to save iWMMXt registers,
+        and also before we start to create locals.  (A local variable
+        might be a double or long long which we will load/store using
+        an iWMMXt instruction).  Therefore we need to push another
+        ARM register, so that the stack will be 64-bit aligned.  We
+        try to avoid using the arg registers (r0 -r3) as they might be
+        used to pass values in a tail call.  */
+      for (reg = 4; reg <= 12; reg++)
+       if ((save_reg_mask & (1 << reg)) == 0)
+         break;
+
+      if (reg <= 12)
+       save_reg_mask |= (1 << reg);
+      else
+       {
+         cfun->machine->sibcall_blocked = 1;
+         save_reg_mask |= (1 << 3);
+       }
+    }
+
   return save_reg_mask;
 }
 
@@ -8068,6 +8361,7 @@ arm_output_epilogue (int really_return)
   int frame_size = arm_get_frame_size ();
   FILE * f = asm_out_file;
   rtx eh_ofs = cfun->machine->eh_epilogue_sp_ofs;
+  unsigned int lrm_count = 0;
 
   /* If we have already generated the return instruction
      then it is futile to generate anything else.  */
@@ -8099,12 +8393,15 @@ arm_output_epilogue (int really_return)
     abort ();
   
   saved_regs_mask = arm_compute_save_reg_mask ();
-  
+
+  if (TARGET_IWMMXT)
+    lrm_count = bit_count (saved_regs_mask);
+
   /* XXX We should adjust floats_offset for any anonymous args, and then
      re-adjust vfp_offset below to compensate.  */
 
   /* Compute how far away the floats will be.  */
-  for (reg = 0; reg <= LAST_ARM_REGNUM; reg ++)
+  for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
     if (saved_regs_mask & (1 << reg))
       floats_offset += 4;
   
@@ -8157,6 +8454,26 @@ arm_output_epilogue (int really_return)
                         FP_REGNUM, floats_offset - vfp_offset);
        }
 
+      if (TARGET_IWMMXT)
+       {
+         /* The frame pointer is guaranteed to be non-double-word aligned.
+            This is because it is set to (old_stack_pointer - 4) and the
+            old_stack_pointer was double word aligned.  Thus the offset to
+            the iWMMXt registers to be loaded must also be non-double-word
+            sized, so that the resultant address *is* double-word aligned.
+            We can ignore floats_offset since that was already included in
+            the live_regs_mask.  */
+         lrm_count += (lrm_count % 2 ? 2 : 1);
+             
+         for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
+           if (regs_ever_live[reg] && !call_used_regs[reg])
+             {
+               asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n", 
+                            reg, FP_REGNUM, lrm_count * 4);
+               lrm_count += 2; 
+             }
+       }
+
       /* saved_regs_mask should contain the IP, which at the time of stack
         frame generation actually contains the old stack pointer.  So a
         quick way to unwind the stack is just pop the IP register directly
@@ -8234,6 +8551,11 @@ arm_output_epilogue (int really_return)
                         start_reg, reg - start_reg, SP_REGNUM);
        }
 
+      if (TARGET_IWMMXT)
+       for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
+         if (regs_ever_live[reg] && !call_used_regs[reg])
+           asm_fprintf (f, "\twldrd\t%r, [%r, #+8]!\n", reg, SP_REGNUM);
+
       /* If we can, restore the LR into the PC.  */
       if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
          && really_return
@@ -8602,34 +8924,32 @@ arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
 
       /* Make sure that we compute which registers will be saved
         on the stack using the same algorithm that is used by
-        arm_compute_save_reg_mask().  */
-      reg_mask = arm_compute_save_reg0_reg12_mask ();
+        the prologue creation code.  */
+      reg_mask = arm_compute_save_reg_mask ();
 
       /* Now count the number of bits set in save_reg_mask.
-        For each set bit we need 4 bytes of stack space.  */
-      while (reg_mask)
-       {
-         call_saved_registers += 4;
-         reg_mask = reg_mask & ~ (reg_mask & - reg_mask);
-       }
-
-      if ((regs_ever_live[LR_REGNUM]
-          /* If optimizing for size, then we save the link register if
-             any other integer register is saved.  This gives a smaller
-             return sequence.  */
-          || (optimize_size && call_saved_registers > 0))
-         /* But if a stack frame is going to be created, the LR will
-            be saved as part of that, so we do not need to allow for
-            it here.  */
-         && ! frame_pointer_needed)
-       call_saved_registers += 4;
+        If we have already counted the registers in the stack
+        frame, do not count them again.  Non call-saved registers
+        might be saved in the call-save area of the stack, if
+        doing so will preserve the stack's alignment.  Hence we
+        must count them here.  For each set bit we need 4 bytes
+        of stack space.  */
+      if (frame_pointer_needed)
+       reg_mask &= 0x07ff;
+      call_saved_registers += 4 * bit_count (reg_mask);
 
       /* If the hard floating point registers are going to be
         used then they must be saved on the stack as well.
          Each register occupies 12 bytes of stack space.  */
-      for (reg = FIRST_ARM_FP_REGNUM; reg <= LAST_ARM_FP_REGNUM; reg ++)
+      for (reg = FIRST_ARM_FP_REGNUM; reg <= LAST_ARM_FP_REGNUM; reg++)
        if (regs_ever_live[reg] && ! call_used_regs[reg])
          call_saved_registers += 12;
+
+      if (TARGET_REALLY_IWMMXT)
+       /* Check for the call-saved iWMMXt registers.  */
+       for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
+         if (regs_ever_live[reg] && ! call_used_regs [reg])
+           call_saved_registers += 8;
     }
 
   /* The stack frame contains 4 registers - the old frame pointer,
@@ -8771,6 +9091,14 @@ arm_get_frame_size (void)
        entry_size += 12;
     }
 
+  if (TARGET_REALLY_IWMMXT)
+    {
+      /* Check for the call-saved iWMMXt registers.  */
+      for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
+       if (regs_ever_live [regno] && ! call_used_regs [regno])
+         entry_size += 8;
+    }
+
   if ((entry_size + base_size + current_function_outgoing_args_size) & 7)
     base_size += 4;
   if ((entry_size + base_size + current_function_outgoing_args_size) & 7)
@@ -8942,6 +9270,17 @@ arm_expand_prologue (void)
       RTX_FRAME_RELATED_P (insn) = 1;
     }
 
+  if (TARGET_IWMMXT)
+    for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
+      if (regs_ever_live[reg] && ! call_used_regs [reg])
+       {
+         insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
+         insn = gen_rtx_MEM (V2SImode, insn);
+         insn = emit_insn (gen_rtx_SET (VOIDmode, insn,
+                                        gen_rtx_REG (V2SImode, reg)));
+         RTX_FRAME_RELATED_P (insn) = 1;
+       }
+
   if (! IS_VOLATILE (func_type))
     {
       /* Save any floating point call-saved registers used by this
@@ -9248,7 +9587,6 @@ arm_print_operand (FILE *stream, rtx x, int code)
        fputs (thumb_condition_code (x, 1), stream);
       return;
 
-
     /* Cirrus registers can be accessed in a variety of ways:
          single floating point (f)
         double floating point (d)
@@ -9285,6 +9623,37 @@ arm_print_operand (FILE *stream, rtx x, int code)
        return;
       }
 
+    case 'U':
+      if (GET_CODE (x) != REG
+         || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
+         || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
+       /* Bad value for wCG register number.  */
+       abort ();
+      else
+       fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
+      return;
+
+      /* Print an iWMMXt control register name.  */
+    case 'w':
+      if (GET_CODE (x) != CONST_INT
+         || INTVAL (x) < 0
+         || INTVAL (x) >= 16)
+       /* Bad value for wC register number.  */
+       abort ();
+      else
+       {
+         static const char * wc_reg_names [16] =
+           {
+             "wCID",  "wCon",  "wCSSF", "wCASF",
+             "wC4",   "wC5",   "wC6",   "wC7",
+             "wCGR0", "wCGR1", "wCGR2", "wCGR3",
+             "wC12",  "wC13",  "wC14",  "wC15"
+           };
+         
+         fprintf (stream, wc_reg_names [INTVAL (x)]);
+       }
+      return;
+
     default:
       if (x == 0)
        abort ();
@@ -9337,6 +9706,36 @@ arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
       return true;
     }
 
+  if (VECTOR_MODE_SUPPORTED_P (GET_MODE (x)))
+    {
+      int i, units;
+
+      if (GET_CODE (x) != CONST_VECTOR)
+       abort ();
+
+      units = CONST_VECTOR_NUNITS (x);
+
+      switch (GET_MODE (x))
+       {
+       case V2SImode: size = 4; break;
+       case V4HImode: size = 2; break;
+       case V8QImode: size = 1; break;
+       default:
+         abort ();
+       }
+
+      for (i = 0; i < units; i++)
+       {
+         rtx elt;
+
+         elt = CONST_VECTOR_ELT (x, i);
+         assemble_integer
+           (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
+       }
+
+      return true;
+    }
+
   return default_assemble_integer (x, size, aligned_p);
 }
 #endif
@@ -9869,6 +10268,12 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
        get sign extended to 64bits-- aldyh.  */
     return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
 
+  if (IS_IWMMXT_GR_REGNUM (regno))
+    return mode == SImode;
+
+  if (IS_IWMMXT_REGNUM (regno))
+    return VALID_IWMMXT_REG_MODE (mode);
+
   if (regno <= LAST_ARM_REGNUM)
     /* We allow any value to be stored in the general regisetrs.  */
     return 1;
@@ -9910,6 +10315,9 @@ arm_regno_class (int regno)
   if (IS_CIRRUS_REGNUM (regno))
     return CIRRUS_REGS;
 
+  if (IS_IWMMXT_REGNUM (regno))
+    return IWMMXT_REGS;
+
   return FPA_REGS;
 }
 
@@ -9996,6 +10404,796 @@ arm_debugger_arg_offset (int value, rtx addr)
   return value;
 }
 \f
+#define def_mbuiltin(MASK, NAME, TYPE, CODE)                           \
+  do                                                                   \
+    {                                                                  \
+      if ((MASK) & insn_flags)                                         \
+        builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, NULL_TREE);       \
+    }                                                                  \
+  while (0)
+
+struct builtin_description
+{
+  const unsigned int       mask;
+  const enum insn_code     icode;
+  const char * const       name;
+  const enum arm_builtins  code;
+  const enum rtx_code      comparison;
+  const unsigned int       flag;
+};
+
+static const struct builtin_description bdesc_2arg[] =
+{
+#define IWMMXT_BUILTIN(code, string, builtin) \
+  { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
+    ARM_BUILTIN_##builtin, 0, 0 },
+
+  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
+  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
+  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
+  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
+  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
+  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
+  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
+  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
+  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
+  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
+  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
+  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
+  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
+  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
+  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
+  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
+  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
+  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
+  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
+  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsh", WMULSH)
+  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmuluh", WMULUH)
+  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
+  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
+  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
+  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
+  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
+  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
+  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
+  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
+  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
+  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
+  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
+  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
+  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
+  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
+  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
+  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
+  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
+  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
+  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
+  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
+  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
+  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
+  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
+  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
+  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
+  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
+  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
+  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
+  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
+  IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
+  IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
+
+#define IWMMXT_BUILTIN2(code, builtin) \
+  { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
+  
+  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
+  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
+  IWMMXT_BUILTIN2 (ashlv4hi3_di,    WSLLH)
+  IWMMXT_BUILTIN2 (ashlv4hi3,       WSLLHI)
+  IWMMXT_BUILTIN2 (ashlv2si3_di,    WSLLW)
+  IWMMXT_BUILTIN2 (ashlv2si3,       WSLLWI)
+  IWMMXT_BUILTIN2 (ashldi3_di,      WSLLD)
+  IWMMXT_BUILTIN2 (ashldi3_iwmmxt,  WSLLDI)
+  IWMMXT_BUILTIN2 (lshrv4hi3_di,    WSRLH)
+  IWMMXT_BUILTIN2 (lshrv4hi3,       WSRLHI)
+  IWMMXT_BUILTIN2 (lshrv2si3_di,    WSRLW)
+  IWMMXT_BUILTIN2 (lshrv2si3,       WSRLWI)
+  IWMMXT_BUILTIN2 (lshrdi3_di,      WSRLD)
+  IWMMXT_BUILTIN2 (lshrdi3,         WSRLDI)
+  IWMMXT_BUILTIN2 (ashrv4hi3_di,    WSRAH)
+  IWMMXT_BUILTIN2 (ashrv4hi3,       WSRAHI)
+  IWMMXT_BUILTIN2 (ashrv2si3_di,    WSRAW)
+  IWMMXT_BUILTIN2 (ashrv2si3,       WSRAWI)
+  IWMMXT_BUILTIN2 (ashrdi3_di,      WSRAD)
+  IWMMXT_BUILTIN2 (ashrdi3,         WSRADI)
+  IWMMXT_BUILTIN2 (rorv4hi3_di,     WRORH)
+  IWMMXT_BUILTIN2 (rorv4hi3,        WRORHI)
+  IWMMXT_BUILTIN2 (rorv2si3_di,     WRORW)
+  IWMMXT_BUILTIN2 (rorv2si3,        WRORWI)
+  IWMMXT_BUILTIN2 (rordi3_di,       WRORD)
+  IWMMXT_BUILTIN2 (rordi3,          WRORDI)
+  IWMMXT_BUILTIN2 (iwmmxt_wmacuz,   WMACUZ)
+  IWMMXT_BUILTIN2 (iwmmxt_wmacsz,   WMACSZ)
+};
+
+static const struct builtin_description bdesc_1arg[] =
+{
+  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
+  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
+  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
+  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
+  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
+  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
+  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
+};
+
+/* Set up all the iWMMXt builtins.  This is
+   not called if TARGET_IWMMXT is zero.  */
+
+static void
+arm_init_iwmmxt_builtins (void)
+{
+  const struct builtin_description * d;
+  size_t i;
+  tree endlink = void_list_node;
+
+  tree int_ftype_int
+    = build_function_type (integer_type_node,
+                          tree_cons (NULL_TREE, integer_type_node, endlink));
+  tree v8qi_ftype_v8qi_v8qi_int
+    = build_function_type (V8QI_type_node,
+                          tree_cons (NULL_TREE, V8QI_type_node,
+                                     tree_cons (NULL_TREE, V8QI_type_node,
+                                                tree_cons (NULL_TREE,
+                                                           integer_type_node,
+                                                           endlink))));
+  tree v4hi_ftype_v4hi_int
+    = build_function_type (V4HI_type_node,
+                          tree_cons (NULL_TREE, V4HI_type_node,
+                                     tree_cons (NULL_TREE, integer_type_node,
+                                                endlink)));
+  tree v2si_ftype_v2si_int
+    = build_function_type (V2SI_type_node,
+                          tree_cons (NULL_TREE, V2SI_type_node,
+                                     tree_cons (NULL_TREE, integer_type_node,
+                                                endlink)));
+  tree v2si_ftype_di_di
+    = build_function_type (V2SI_type_node,
+                          tree_cons (NULL_TREE, long_long_integer_type_node,
+                                     tree_cons (NULL_TREE, long_long_integer_type_node,
+                                                endlink)));
+  tree di_ftype_di_int
+    = build_function_type (long_long_integer_type_node,
+                          tree_cons (NULL_TREE, long_long_integer_type_node,
+                                     tree_cons (NULL_TREE, integer_type_node,
+                                                endlink)));
+  tree di_ftype_di_int_int
+    = build_function_type (long_long_integer_type_node,
+                          tree_cons (NULL_TREE, long_long_integer_type_node,
+                                     tree_cons (NULL_TREE, integer_type_node,
+                                                tree_cons (NULL_TREE,
+                                                           integer_type_node,
+                                                           endlink))));
+  tree int_ftype_v8qi
+    = build_function_type (integer_type_node,
+                          tree_cons (NULL_TREE, V8QI_type_node,
+                                     endlink));
+  tree int_ftype_v4hi
+    = build_function_type (integer_type_node,
+                          tree_cons (NULL_TREE, V4HI_type_node,
+                                     endlink));
+  tree int_ftype_v2si
+    = build_function_type (integer_type_node,
+                          tree_cons (NULL_TREE, V2SI_type_node,
+                                     endlink));
+  tree int_ftype_v8qi_int
+    = build_function_type (integer_type_node,
+                          tree_cons (NULL_TREE, V8QI_type_node,
+                                     tree_cons (NULL_TREE, integer_type_node,
+                                                endlink)));
+  tree int_ftype_v4hi_int
+    = build_function_type (integer_type_node,
+                          tree_cons (NULL_TREE, V4HI_type_node,
+                                     tree_cons (NULL_TREE, integer_type_node,
+                                                endlink)));
+  tree int_ftype_v2si_int
+    = build_function_type (integer_type_node,
+                          tree_cons (NULL_TREE, V2SI_type_node,
+                                     tree_cons (NULL_TREE, integer_type_node,
+                                                endlink)));
+  tree v8qi_ftype_v8qi_int_int
+    = build_function_type (V8QI_type_node,
+                          tree_cons (NULL_TREE, V8QI_type_node,
+                                     tree_cons (NULL_TREE, integer_type_node,
+                                                tree_cons (NULL_TREE,
+                                                           integer_type_node,
+                                                           endlink))));
+  tree v4hi_ftype_v4hi_int_int
+    = build_function_type (V4HI_type_node,
+                          tree_cons (NULL_TREE, V4HI_type_node,
+                                     tree_cons (NULL_TREE, integer_type_node,
+                                                tree_cons (NULL_TREE,
+                                                           integer_type_node,
+                                                           endlink))));
+  tree v2si_ftype_v2si_int_int
+    = build_function_type (V2SI_type_node,
+                          tree_cons (NULL_TREE, V2SI_type_node,
+                                     tree_cons (NULL_TREE, integer_type_node,
+                                                tree_cons (NULL_TREE,
+                                                           integer_type_node,
+                                                           endlink))));
+  /* Miscellaneous.  */
+  tree v8qi_ftype_v4hi_v4hi
+    = build_function_type (V8QI_type_node,
+                          tree_cons (NULL_TREE, V4HI_type_node,
+                                     tree_cons (NULL_TREE, V4HI_type_node,
+                                                endlink)));
+  tree v4hi_ftype_v2si_v2si
+    = build_function_type (V4HI_type_node,
+                          tree_cons (NULL_TREE, V2SI_type_node,
+                                     tree_cons (NULL_TREE, V2SI_type_node,
+                                                endlink)));
+  tree v2si_ftype_v4hi_v4hi
+    = build_function_type (V2SI_type_node,
+                          tree_cons (NULL_TREE, V4HI_type_node,
+                                     tree_cons (NULL_TREE, V4HI_type_node,
+                                                endlink)));
+  tree v2si_ftype_v8qi_v8qi
+    = build_function_type (V2SI_type_node,
+                          tree_cons (NULL_TREE, V8QI_type_node,
+                                     tree_cons (NULL_TREE, V8QI_type_node,
+                                                endlink)));
+  tree v4hi_ftype_v4hi_di
+    = build_function_type (V4HI_type_node,
+                          tree_cons (NULL_TREE, V4HI_type_node,
+                                     tree_cons (NULL_TREE,
+                                                long_long_integer_type_node,
+                                                endlink)));
+  tree v2si_ftype_v2si_di
+    = build_function_type (V2SI_type_node,
+                          tree_cons (NULL_TREE, V2SI_type_node,
+                                     tree_cons (NULL_TREE,
+                                                long_long_integer_type_node,
+                                                endlink)));
+  tree void_ftype_int_int
+    = build_function_type (void_type_node,
+                          tree_cons (NULL_TREE, integer_type_node,
+                                     tree_cons (NULL_TREE, integer_type_node,
+                                                endlink)));
+  tree di_ftype_void
+    = build_function_type (long_long_unsigned_type_node, endlink);
+  tree di_ftype_v8qi
+    = build_function_type (long_long_integer_type_node,
+                          tree_cons (NULL_TREE, V8QI_type_node,
+                                     endlink));
+  tree di_ftype_v4hi
+    = build_function_type (long_long_integer_type_node,
+                          tree_cons (NULL_TREE, V4HI_type_node,
+                                     endlink));
+  tree di_ftype_v2si
+    = build_function_type (long_long_integer_type_node,
+                          tree_cons (NULL_TREE, V2SI_type_node,
+                                     endlink));
+  tree v2si_ftype_v4hi
+    = build_function_type (V2SI_type_node,
+                          tree_cons (NULL_TREE, V4HI_type_node,
+                                     endlink));
+  tree v4hi_ftype_v8qi
+    = build_function_type (V4HI_type_node,
+                          tree_cons (NULL_TREE, V8QI_type_node,
+                                     endlink));
+
+  tree di_ftype_di_v4hi_v4hi
+    = build_function_type (long_long_unsigned_type_node,
+                          tree_cons (NULL_TREE,
+                                     long_long_unsigned_type_node,
+                                     tree_cons (NULL_TREE, V4HI_type_node,
+                                                tree_cons (NULL_TREE,
+                                                           V4HI_type_node,
+                                                           endlink))));
+
+  tree di_ftype_v4hi_v4hi
+    = build_function_type (long_long_unsigned_type_node,
+                          tree_cons (NULL_TREE, V4HI_type_node,
+                                     tree_cons (NULL_TREE, V4HI_type_node,
+                                                endlink)));
+
+  /* Normal vector binops.  */
+  tree v8qi_ftype_v8qi_v8qi
+    = build_function_type (V8QI_type_node,
+                          tree_cons (NULL_TREE, V8QI_type_node,
+                                     tree_cons (NULL_TREE, V8QI_type_node,
+                                                endlink)));
+  tree v4hi_ftype_v4hi_v4hi
+    = build_function_type (V4HI_type_node,
+                          tree_cons (NULL_TREE, V4HI_type_node,
+                                     tree_cons (NULL_TREE, V4HI_type_node,
+                                                endlink)));
+  tree v2si_ftype_v2si_v2si
+    = build_function_type (V2SI_type_node,
+                          tree_cons (NULL_TREE, V2SI_type_node,
+                                     tree_cons (NULL_TREE, V2SI_type_node,
+                                                endlink)));
+  tree di_ftype_di_di
+    = build_function_type (long_long_unsigned_type_node,
+                          tree_cons (NULL_TREE, long_long_unsigned_type_node,
+                                     tree_cons (NULL_TREE,
+                                                long_long_unsigned_type_node,
+                                                endlink)));
+
+  /* Add all builtins that are more or less simple operations on two
+     operands.  */
+  for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
+    {
+      /* Use one of the operands; the target can have a different mode for
+        mask-generating compares.  */
+      enum machine_mode mode;
+      tree type;
+
+      if (d->name == 0)
+       continue;
+
+      mode = insn_data[d->icode].operand[1].mode;
+
+      switch (mode)
+       {
+       case V8QImode:
+         type = v8qi_ftype_v8qi_v8qi;
+         break;
+       case V4HImode:
+         type = v4hi_ftype_v4hi_v4hi;
+         break;
+       case V2SImode:
+         type = v2si_ftype_v2si_v2si;
+         break;
+       case DImode:
+         type = di_ftype_di_di;
+         break;
+
+       default:
+         abort ();
+       }
+
+      def_mbuiltin (d->mask, d->name, type, d->code);
+    }
+
+  /* Add the remaining MMX insns with somewhat more complicated types.  */
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
+
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
+  def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
+}
+
+static void
+arm_init_builtins (void)
+{
+  if (TARGET_REALLY_IWMMXT)
+    arm_init_iwmmxt_builtins ();
+}
+
+/* Errors in the source file can cause expand_expr to return const0_rtx
+   where we expect a vector.  To avoid crashing, use one of the vector
+   clear instructions.  */
+
+static rtx
+safe_vector_operand (rtx x, enum machine_mode mode)
+{
+  if (x != const0_rtx)
+    return x;
+  x = gen_reg_rtx (mode);
+
+  emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
+                              : gen_rtx_SUBREG (DImode, x, 0)));
+  return x;
+}
+
+/* Subroutine of arm_expand_builtin to take care of binop insns.  */
+
+static rtx
+arm_expand_binop_builtin (enum insn_code icode,
+                         tree arglist, rtx target)
+{
+  rtx pat;
+  tree arg0 = TREE_VALUE (arglist);
+  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
+
+  if (VECTOR_MODE_P (mode0))
+    op0 = safe_vector_operand (op0, mode0);
+  if (VECTOR_MODE_P (mode1))
+    op1 = safe_vector_operand (op1, mode1);
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  /* In case the insn wants input operands in modes different from
+     the result, abort.  */
+  if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
+    abort ();
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Subroutine of arm_expand_builtin to take care of unop insns.  */
+
+static rtx
+arm_expand_unop_builtin (enum insn_code icode,
+                        tree arglist, rtx target, int do_load)
+{
+  rtx pat;
+  tree arg0 = TREE_VALUE (arglist);
+  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+  enum machine_mode tmode = insn_data[icode].operand[0].mode;
+  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+  if (do_load)
+    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
+  else
+    {
+      if (VECTOR_MODE_P (mode0))
+       op0 = safe_vector_operand (op0, mode0);
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+       op0 = copy_to_mode_reg (mode0, op0);
+    }
+
+  pat = GEN_FCN (icode) (target, op0);
+  if (! pat)
+    return 0;
+  emit_insn (pat);
+  return target;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+arm_expand_builtin (tree exp,
+                   rtx target,
+                   rtx subtarget ATTRIBUTE_UNUSED,
+                   enum machine_mode mode ATTRIBUTE_UNUSED,
+                   int ignore ATTRIBUTE_UNUSED)
+{
+  const struct builtin_description * d;
+  enum insn_code    icode;
+  tree              fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+  tree              arglist = TREE_OPERAND (exp, 1);
+  tree              arg0;
+  tree              arg1;
+  tree              arg2;
+  rtx               op0;
+  rtx               op1;
+  rtx               op2;
+  rtx               pat;
+  int               fcode = DECL_FUNCTION_CODE (fndecl);
+  size_t            i;
+  enum machine_mode tmode;
+  enum machine_mode mode0;
+  enum machine_mode mode1;
+  enum machine_mode mode2;
+
+  switch (fcode)
+    {
+    case ARM_BUILTIN_TEXTRMSB:
+    case ARM_BUILTIN_TEXTRMUB:
+    case ARM_BUILTIN_TEXTRMSH:
+    case ARM_BUILTIN_TEXTRMUH:
+    case ARM_BUILTIN_TEXTRMSW:
+    case ARM_BUILTIN_TEXTRMUW:
+      icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
+              : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
+              : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
+              : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
+              : CODE_FOR_iwmmxt_textrmw);
+
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+       op0 = copy_to_mode_reg (mode0, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+       {
+         /* @@@ better error message */
+         error ("selector must be an immediate");
+         return gen_reg_rtx (tmode);
+       }
+      if (target == 0
+         || GET_MODE (target) != tmode
+         || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+       target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+       return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_TINSRB:
+    case ARM_BUILTIN_TINSRH:
+    case ARM_BUILTIN_TINSRW:
+      icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
+              : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
+              : CODE_FOR_iwmmxt_tinsrw);
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+      mode2 = insn_data[icode].operand[3].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+       op0 = copy_to_mode_reg (mode0, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+       op1 = copy_to_mode_reg (mode1, op1);
+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+       {
+         /* @@@ better error message */
+         error ("selector must be an immediate");
+         return const0_rtx;
+       }
+      if (target == 0
+         || GET_MODE (target) != tmode
+         || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+       target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1, op2);
+      if (! pat)
+       return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_SETWCX:
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      emit_insn (gen_iwmmxt_tmcr (op0, op1));
+      return 0;
+
+    case ARM_BUILTIN_GETWCX:
+      arg0 = TREE_VALUE (arglist);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      target = gen_reg_rtx (SImode);
+      emit_insn (gen_iwmmxt_tmrc (target, op0));
+      return target;
+
+    case ARM_BUILTIN_WSHUFH:
+      icode = CODE_FOR_iwmmxt_wshufh;
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+       op0 = copy_to_mode_reg (mode1, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+       {
+         /* @@@ better error message */
+         error ("mask must be an immediate");
+         return const0_rtx;
+       }
+      if (target == 0
+         || GET_MODE (target) != tmode
+         || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+       target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+       return 0;
+      emit_insn (pat);
+      return target;
+
+    case ARM_BUILTIN_WSADB:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, arglist, target);
+    case ARM_BUILTIN_WSADH:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, arglist, target);
+    case ARM_BUILTIN_WSADBZ:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, arglist, target);
+    case ARM_BUILTIN_WSADHZ:
+      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, arglist, target);
+
+      /* Several three-argument builtins.  */
+    case ARM_BUILTIN_WMACS:
+    case ARM_BUILTIN_WMACU:
+    case ARM_BUILTIN_WALIGN:
+    case ARM_BUILTIN_TMIA:
+    case ARM_BUILTIN_TMIAPH:
+    case ARM_BUILTIN_TMIATT:
+    case ARM_BUILTIN_TMIATB:
+    case ARM_BUILTIN_TMIABT:
+    case ARM_BUILTIN_TMIABB:
+      icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
+              : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
+              : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
+              : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
+              : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
+              : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
+              : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
+              : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
+              : CODE_FOR_iwmmxt_walign);
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode0 = insn_data[icode].operand[1].mode;
+      mode1 = insn_data[icode].operand[2].mode;
+      mode2 = insn_data[icode].operand[3].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+       op0 = copy_to_mode_reg (mode0, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+       op1 = copy_to_mode_reg (mode1, op1);
+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+       op2 = copy_to_mode_reg (mode2, op2);
+      if (target == 0
+         || GET_MODE (target) != tmode
+         || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+       target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1, op2);
+      if (! pat)
+       return 0;
+      emit_insn (pat);
+      return target;
+      
+    case ARM_BUILTIN_WZERO:
+      target = gen_reg_rtx (DImode);
+      emit_insn (gen_iwmmxt_clrdi (target));
+      return target;
+
+    default:
+      break;
+    }
+
+  for (i = 0, d = bdesc_2arg; i < sizeof (bdesc_2arg) / sizeof *d; i++, d++)
+    if (d->code == (const enum arm_builtins) fcode)
+      return arm_expand_binop_builtin (d->icode, arglist, target);
+
+  for (i = 0, d = bdesc_1arg; i < sizeof (bdesc_1arg) / sizeof *d; i++, d++)
+    if (d->code == (const enum arm_builtins) fcode)
+      return arm_expand_unop_builtin (d->icode, arglist, target, 0);
+
+  /* @@@ Should really do something sensible here.  */
+  return NULL_RTX;
+}
+\f
 /* Recursively search through all of the blocks in a function
    checking to see if any of the variables created in that
    function match the RTX called 'orig'.  If they do then
@@ -11783,3 +12981,64 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
     fputs ("(PLT)", file);
   fputc ('\n', file);
 }
+
+int
+arm_emit_vector_const (file, x)
+     FILE * file;
+     rtx    x;
+{
+  int i;
+  const char * pattern;
+
+  if (GET_CODE (x) != CONST_VECTOR)
+    abort ();
+
+  switch (GET_MODE (x))
+    {
+    case V2SImode: pattern = "%08x"; break;
+    case V4HImode: pattern = "%04x"; break;
+    case V8QImode: pattern = "%02x"; break;
+    default:       abort ();
+    }
+
+  fprintf (file, "0x");
+  for (i = CONST_VECTOR_NUNITS (x); i--;)
+    {
+      rtx element;
+
+      element = CONST_VECTOR_ELT (x, i);
+      fprintf (file, pattern, INTVAL (element));
+    }
+
+  return 1;
+}
+
+const char *
+arm_output_load_gr (operands)
+     rtx * operands;
+{
+  rtx reg;
+  rtx offset;
+  rtx wcgr;
+  rtx sum;
+  
+  if (GET_CODE (operands [1]) != MEM
+      || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
+      || GET_CODE (reg = XEXP (sum, 0)) != REG
+      || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
+      || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
+    return "wldrw%?\t%0, %1";
+  
+  /* Fix up an out-of-range load of a GR register.  */  
+  output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
+  wcgr = operands[0];
+  operands[0] = reg;
+  output_asm_insn ("ldr%?\t%0, %1", operands);
+
+  operands[0] = wcgr;
+  operands[1] = reg;
+  output_asm_insn ("tmcr%?\t%0, %1", operands);
+  output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
+
+  return "";
+}
index c923e01..0a3dcda 100644 (file)
@@ -97,6 +97,7 @@
 #define TARGET_CPU_arm9tdmi    0x0080
 #define TARGET_CPU_xscale       0x0100
 #define TARGET_CPU_ep9312      0x0200
+#define TARGET_CPU_iwmmxt      0x0400
 /* Configure didn't specify.  */
 #define TARGET_CPU_generic     0x8000
 
@@ -265,6 +266,9 @@ Unrecognized value in TARGET_CPU_DEFAULT.
  %{mcpu=xscale:-D__XSCALE__} \
  %{mcpu=ep9312:-D__ARM_ARCH_4T__} \
  %{mcpu=ep9312:-D__MAVERICK__} \
+ %{mcpu=iwmmxt:-D__ARM_ARCH_5TE__} \
+ %{mcpu=iwmmxt:-D__XSCALE__} \
+ %{mcpu=iwmmxt:-D__IWMMXT__} \
  %{!mcpu*:%(cpp_cpu_arch_default)}} \
 "
 
@@ -406,6 +410,8 @@ Unrecognized value in TARGET_CPU_DEFAULT.
 #define TARGET_HARD_FLOAT              (! TARGET_SOFT_FLOAT)
 #define TARGET_CIRRUS                  (arm_is_cirrus)
 #define TARGET_ANY_HARD_FLOAT          (TARGET_HARD_FLOAT || TARGET_CIRRUS)
+#define TARGET_IWMMXT                  (arm_arch_iwmmxt)
+#define TARGET_REALLY_IWMMXT           (TARGET_IWMMXT && TARGET_ARM)
 #define TARGET_VFP                     (target_flags & ARM_FLAG_VFP)
 #define TARGET_BIG_END                 (target_flags & ARM_FLAG_BIG_END)
 #define TARGET_INTERWORK               (target_flags & ARM_FLAG_INTERWORK)
@@ -623,6 +629,9 @@ extern int arm_is_strong;
 /* Nonzero if this chip is a Cirrus variant.  */
 extern int arm_is_cirrus;
 
+/* Nonzero if this chip supports Intel XScale with Wireless MMX technology.  */
+extern int arm_arch_iwmmxt;
+
 /* Nonzero if this chip is an XScale.  */
 extern int arm_arch_xscale;
 
@@ -729,6 +738,8 @@ extern int arm_is_6_or_7;
 
 #define PARM_BOUNDARY          32
 
+#define IWMMXT_ALIGNMENT   64
+
 #define STACK_BOUNDARY  32
 
 #define PREFERRED_STACK_BOUNDARY (TARGET_ATPCS ? 64 : 32)
@@ -742,13 +753,46 @@ extern int arm_is_6_or_7;
 
 #define EMPTY_FIELD_BOUNDARY  32
 
-#define BIGGEST_ALIGNMENT  32
+#define BIGGEST_ALIGNMENT  (TARGET_ATPCS ? 64 : 32)
+
+#define TYPE_NEEDS_IWMMXT_ALIGNMENT(TYPE)      \
+ (TARGET_REALLY_IWMMXT                         \
+   && ((TREE_CODE (TYPE) == VECTOR_TYPE) || (TYPE_MODE (TYPE) == DImode) || (TYPE_MODE (TYPE) == DFmode)))
+
+/* An expression for the alignment of a structure field FIELD if the
+   alignment computed in the usual way is COMPUTED.  GCC uses this
+   value instead of the value in `BIGGEST_ALIGNMENT' or
+   `BIGGEST_FIELD_ALIGNMENT', if defined, for structure fields only.  */
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED)            \
+  (TYPE_NEEDS_IWMMXT_ALIGNMENT (TREE_TYPE (FIELD))     \
+   ? IWMMXT_ALIGNMENT                                  \
+   : (COMPUTED))
+
+/* If defined, a C expression to compute the alignment for a static variable.
+   TYPE is the data type, and ALIGN is the alignment that the object
+   would ordinarily have.  The value of this macro is used instead of that
+   alignment to align the object.
+
+   If this macro is not defined, then ALIGN is used.  */
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+  (TYPE_NEEDS_IWMMXT_ALIGNMENT (TYPE) ? IWMMXT_ALIGNMENT : ALIGN)
+
+/* If defined, a C expression to compute the alignment for a
+   variables in the local store.  TYPE is the data type, and
+   BASIC-ALIGN is the alignment that the object would ordinarily
+   have.  The value of this macro is used instead of that alignment
+   to align the object.
+
+   If this macro is not defined, then BASIC-ALIGN is used.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+  (TYPE_NEEDS_IWMMXT_ALIGNMENT (TYPE) ? IWMMXT_ALIGNMENT : ALIGN)
 
 /* Make strings word-aligned so strcpy from constants will be faster.  */
 #define CONSTANT_ALIGNMENT_FACTOR (TARGET_THUMB || ! arm_arch_xscale ? 1 : 2)
     
 #define CONSTANT_ALIGNMENT(EXP, ALIGN)                         \
-  ((TREE_CODE (EXP) == STRING_CST                              \
+  ((TARGET_REALLY_IWMMXT && TREE_CODE (EXP) == VECTOR_TYPE) ? IWMMXT_ALIGNMENT : \
+   (TREE_CODE (EXP) == STRING_CST                              \
     && (ALIGN) < BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR)    \
    ? BITS_PER_WORD * CONSTANT_ALIGNMENT_FACTOR : (ALIGN))
 
@@ -848,7 +892,10 @@ extern const char * structure_size_string;
   0,0,0,0,0,0,0,0,      \
   1,1,1,               \
   1,1,1,1,1,1,1,1,     \
-  1,1,1,1,1,1,1,1      \
+  1,1,1,1,1,1,1,1,      \
+  1,1,1,1,1,1,1,1,      \
+  1,1,1,1,1,1,1,1,      \
+  1,1,1,1               \
 }
 
 /* 1 for registers not available across function calls.
@@ -866,7 +913,10 @@ extern const char * structure_size_string;
   1,1,1,1,0,0,0,0,          \
   1,1,1,                    \
   1,1,1,1,1,1,1,1,          \
-  1,1,1,1,1,1,1,1           \
+  1,1,1,1,1,1,1,1,          \
+  1,1,1,1,1,1,1,1,          \
+  1,1,1,1,1,1,1,1,          \
+  1,1,1,1                   \
 }
 
 #ifndef SUBTARGET_CONDITIONAL_REGISTER_USAGE
@@ -897,6 +947,26 @@ extern const char * structure_size_string;
        }                                                       \
     }                                                          \
                                                                \
+  if (TARGET_REALLY_IWMMXT)                                    \
+    {                                                          \
+      regno = FIRST_IWMMXT_GR_REGNUM;                          \
+      /* The 2002/10/09 revision of the XScale ABI has wCG0     \
+         and wCG1 as call-preserved registers.  The 2002/11/21  \
+         revision changed this so that all wCG registers are    \
+         scratch registers.  */                                        \
+      for (regno = FIRST_IWMMXT_GR_REGNUM;                     \
+          regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)            \
+       fixed_regs[regno] = call_used_regs[regno] = 0;          \
+      /* The XScale ABI has wR0 - wR9 as scratch registers,     \
+        the rest as call-preserved registers.  */              \
+      for (regno = FIRST_IWMMXT_REGNUM;                                \
+          regno <= LAST_IWMMXT_REGNUM; ++ regno)               \
+       {                                                       \
+         fixed_regs[regno] = 0;                                \
+         call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10; \
+       }                                                       \
+    }                                                          \
+                                                               \
   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)    \
     {                                                          \
       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;                 \
@@ -1014,6 +1084,15 @@ extern const char * structure_size_string;
 #define FIRST_ARM_FP_REGNUM    16
 #define LAST_ARM_FP_REGNUM     23
 
+#define FIRST_IWMMXT_GR_REGNUM 43
+#define LAST_IWMMXT_GR_REGNUM  46
+#define FIRST_IWMMXT_REGNUM    47
+#define LAST_IWMMXT_REGNUM     62
+#define IS_IWMMXT_REGNUM(REGNUM) \
+  (((REGNUM) >= FIRST_IWMMXT_REGNUM) && ((REGNUM) <= LAST_IWMMXT_REGNUM))
+#define IS_IWMMXT_GR_REGNUM(REGNUM) \
+  (((REGNUM) >= FIRST_IWMMXT_GR_REGNUM) && ((REGNUM) <= LAST_IWMMXT_GR_REGNUM))
+
 /* Base register for access to local variables of the function.  */
 #define FRAME_POINTER_REGNUM   25
 
@@ -1027,7 +1106,8 @@ extern const char * structure_size_string;
 
 /* The number of hard registers is 16 ARM + 8 FPA + 1 CC + 1 SFP + 1 AFP.  */
 /* + 16 Cirrus registers take us up to 43.  */
-#define FIRST_PSEUDO_REGISTER  43
+/* Intel Wireless MMX Technology registers add 16 + 4 more.  */
+#define FIRST_PSEUDO_REGISTER   63
 
 /* Value should be nonzero if functions must have frame pointers.
    Zero means the frame pointer need not be set up (and parms may be accessed
@@ -1064,6 +1144,12 @@ extern const char * structure_size_string;
 #define MODES_TIEABLE_P(MODE1, MODE2)  \
   (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2))
 
+#define VECTOR_MODE_SUPPORTED_P(MODE) \
+ ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode)
+
+#define VALID_IWMMXT_REG_MODE(MODE) \
+ (VECTOR_MODE_SUPPORTED_P (MODE) || (MODE) == DImode)
+
 /* The order in which register should be allocated.  It is good to use ip
    since no saving is required (though calls clobber it) and it never contains
    function parameters.  It is quite good to use lr since other calls may
@@ -1077,6 +1163,9 @@ extern const char * structure_size_string;
     16, 17, 18, 19, 20, 21, 22, 23, \
     27, 28, 29, 30, 31, 32, 33, 34, \
     35, 36, 37, 38, 39, 40, 41, 42, \
+    43, 44, 45, 46, 47, 48, 49, 50, \
+    51, 52, 53, 54, 55, 56, 57, 58, \
+    59, 60, 61, 62,                \
     24, 25, 26                     \
 }
 
@@ -1096,6 +1185,8 @@ enum reg_class
   NO_REGS,
   FPA_REGS,
   CIRRUS_REGS,
+  IWMMXT_GR_REGS,
+  IWMMXT_REGS,
   LO_REGS,
   STACK_REG,
   BASE_REGS,
@@ -1114,6 +1205,8 @@ enum reg_class
   "NO_REGS",           \
   "FPA_REGS",          \
   "CIRRUS_REGS",       \
+  "IWMMXT_GR_REGS",    \
+  "IWMMXT_REGS",       \
   "LO_REGS",           \
   "STACK_REG",         \
   "BASE_REGS",         \
@@ -1131,13 +1224,15 @@ enum reg_class
   { 0x00000000, 0x0 },        /* NO_REGS  */   \
   { 0x00FF0000, 0x0 },        /* FPA_REGS */   \
   { 0xF8000000, 0x000007FF }, /* CIRRUS_REGS */        \
+  { 0x00000000, 0x00007800 }, /* IWMMXT_GR_REGS */\
+  { 0x00000000, 0x7FFF8000 }, /* IWMMXT_REGS */        \
   { 0x000000FF, 0x0 },        /* LO_REGS */    \
   { 0x00002000, 0x0 },        /* STACK_REG */  \
   { 0x000020FF, 0x0 },        /* BASE_REGS */  \
   { 0x0000FF00, 0x0 },        /* HI_REGS */    \
   { 0x01000000, 0x0 },        /* CC_REG */     \
   { 0x0200FFFF, 0x0 },        /* GENERAL_REGS */\
-  { 0xFAFFFFFF, 0x000007FF }  /* ALL_REGS */   \
+  { 0xFAFFFFFF, 0x7FFFFFFF }  /* ALL_REGS */   \
 }
 
 /* The same information, inverted:
@@ -1177,6 +1272,8 @@ enum reg_class
 #define REG_CLASS_FROM_LETTER(C)       \
   (  (C) == 'f' ? FPA_REGS             \
    : (C) == 'v' ? CIRRUS_REGS          \
+   : (C) == 'y' ? IWMMXT_REGS          \
+   : (C) == 'z' ? IWMMXT_GR_REGS       \
    : (C) == 'l' ? (TARGET_ARM ? GENERAL_REGS : LO_REGS)        \
    : TARGET_ARM ? NO_REGS              \
    : (C) == 'h' ? HI_REGS              \
@@ -1292,6 +1389,9 @@ enum reg_class
      && (CONSTANT_P (X) || GET_CODE (X) == SYMBOL_REF))                \
     ? GENERAL_REGS :                                           \
   (TARGET_ARM ?                                                        \
+   (((CLASS) == IWMMXT_REGS || (CLASS) == IWMMXT_GR_REGS)      \
+      && CONSTANT_P (X))                                       \
+   ? GENERAL_REGS :                                            \
    (((MODE) == HImode && ! arm_arch4 && TARGET_MMU_TRAPS       \
      && (GET_CODE (X) == MEM                                   \
         || ((GET_CODE (X) == REG || GET_CODE (X) == SUBREG)    \
@@ -1405,6 +1505,9 @@ enum reg_class
   (TARGET_ARM ?                                                \
    ((FROM) == FPA_REGS && (TO) != FPA_REGS ? 20 :      \
     (FROM) != FPA_REGS && (TO) == FPA_REGS ? 20 :      \
+    (FROM) == IWMMXT_REGS && (TO) != IWMMXT_REGS ? 4 :  \
+    (FROM) != IWMMXT_REGS && (TO) == IWMMXT_REGS ? 4 :  \
+    (FROM) == IWMMXT_GR_REGS || (TO) == IWMMXT_GR_REGS ? 20 :  \
     (FROM) == CIRRUS_REGS && (TO) != CIRRUS_REGS ? 20 :        \
     (FROM) != CIRRUS_REGS && (TO) == CIRRUS_REGS ? 20 :        \
    2)                                                  \
@@ -1461,6 +1564,8 @@ enum reg_class
    ? gen_rtx_REG (MODE, FIRST_ARM_FP_REGNUM) \
    : TARGET_ARM && TARGET_CIRRUS && GET_MODE_CLASS (MODE) == MODE_FLOAT \
    ? gen_rtx_REG (MODE, FIRST_CIRRUS_FP_REGNUM)                        \
+   : TARGET_REALLY_IWMMXT && VECTOR_MODE_SUPPORTED_P (MODE)            \
+   ? gen_rtx_REG (MODE, FIRST_IWMMXT_REGNUM)                           \
    : gen_rtx_REG (MODE, ARG_REGISTER (1)))
 
 /* Define how to find the value returned by a function.
@@ -1476,6 +1581,7 @@ enum reg_class
 #define FUNCTION_VALUE_REGNO_P(REGNO)  \
   ((REGNO) == ARG_REGISTER (1) \
    || (TARGET_ARM && ((REGNO) == FIRST_CIRRUS_FP_REGNUM) && TARGET_CIRRUS) \
+   || (TARGET_ARM && ((REGNO) == FIRST_IWMMXT_REGNUM) && TARGET_IWMMXT) \
    || (TARGET_ARM && ((REGNO) == FIRST_ARM_FP_REGNUM) && TARGET_HARD_FLOAT))
 
 /* How large values are returned */
@@ -1546,6 +1652,9 @@ typedef struct machine_function GTY(())
   unsigned long func_type;
   /* Record if the function has a variable argument list.  */
   int uses_anonymous_args;
+  /* Records if sibcalls are blocked because an argument
+     register is needed to preserve stack alignment.  */
+  int sibcall_blocked;
 }
 machine_function;
 
@@ -1556,6 +1665,10 @@ typedef struct
 {
   /* This is the number of registers of arguments scanned so far.  */
   int nregs;
+  /* This is the number of iWMMXt register arguments scanned so far.  */
+  int iwmmxt_nregs;
+  int named_count;
+  int nargs;
   /* One of CALL_NORMAL, CALL_LONG or CALL_SHORT . */
   int call_cookie;
 } CUMULATIVE_ARGS;
@@ -1585,7 +1698,8 @@ typedef struct
    this is the number of registers used.
    For args passed entirely in registers or entirely in memory, zero.  */
 #define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED)     \
-  (    NUM_ARG_REGS > (CUM).nregs                              \
+  (VECTOR_MODE_SUPPORTED_P (MODE) ? 0 :                                \
+       NUM_ARG_REGS > (CUM).nregs                              \
    && (NUM_ARG_REGS < ((CUM).nregs + ARM_NUM_REGS2 (MODE, TYPE)))      \
    ?   NUM_ARG_REGS - (CUM).nregs : 0)
 
@@ -1608,11 +1722,27 @@ typedef struct
    of mode MODE and data type TYPE.
    (TYPE is null for libcalls where that information may not be available.)  */
 #define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED)   \
+  (CUM).nargs += 1;                                    \
+  if (VECTOR_MODE_SUPPORTED_P (MODE))                  \
+     if ((CUM).named_count <= (CUM).nargs)             \
+        (CUM).nregs += 2;                              \
+     else                                              \
+        (CUM).iwmmxt_nregs += 1;                       \
+  else                                                 \
   (CUM).nregs += ARM_NUM_REGS2 (MODE, TYPE)
 
+/* If defined, a C expression that gives the alignment boundary, in bits, of an
+   argument with the specified mode and type.  If it is not defined,
+   `PARM_BOUNDARY' is used for all arguments.  */
+#define FUNCTION_ARG_BOUNDARY(MODE,TYPE) \
+  (TARGET_REALLY_IWMMXT && (VALID_IWMMXT_REG_MODE (MODE) || ((MODE) == DFmode)) \
+   ? IWMMXT_ALIGNMENT : PARM_BOUNDARY)
+
 /* 1 if N is a possible register number for function argument passing.
    On the ARM, r0-r3 are used to pass args.  */
-#define FUNCTION_ARG_REGNO_P(REGNO)    (IN_RANGE ((REGNO), 0, 3))
+#define FUNCTION_ARG_REGNO_P(REGNO)    \
+   (IN_RANGE ((REGNO), 0, 3)           \
+    || (TARGET_REALLY_IWMMXT && IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9)))
 
 /* Implement `va_arg'.  */
 #define EXPAND_BUILTIN_VA_ARG(valist, type) \
@@ -2333,14 +2463,14 @@ extern int making_const_table;
 /* To support -falign-* switches we need to use .p2align so
    that alignment directives in code sections will be padded
    with no-op instructions, rather than zeroes.  */
-#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)           \
+#define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE, LOG, MAX_SKIP)         \
   if ((LOG) != 0)                                              \
     {                                                          \
       if ((MAX_SKIP) == 0)                                     \
-        fprintf ((FILE), "\t.p2align %d\n", (LOG));            \
+        fprintf ((FILE), "\t.p2align %d\n", (int) (LOG));      \
       else                                                     \
         fprintf ((FILE), "\t.p2align %d,,%d\n",                        \
-                 (LOG), (MAX_SKIP));                           \
+                 (int) (LOG), (int) (MAX_SKIP));               \
     }
 #endif
 \f
@@ -2491,7 +2621,12 @@ extern int making_const_table;
     ARM_PRINT_OPERAND_ADDRESS (STREAM, X)      \
   else                                         \
     THUMB_PRINT_OPERAND_ADDRESS (STREAM, X)
-     
+
+#define OUTPUT_ADDR_CONST_EXTRA(FILE, X, FAIL) \
+  if (GET_CODE (X) != CONST_VECTOR             \
+      || ! arm_emit_vector_const (FILE, X))    \
+    goto FAIL;
+
 /* A C expression whose value is RTL representing the value of the return
    address for the frame COUNT steps up from the current frame.  */
 
@@ -2567,4 +2702,168 @@ extern int making_const_table;
 #define SPECIAL_MODE_PREDICATES                        \
  "cc_register", "dominant_cc_register",
 
+enum arm_builtins
+{
+  ARM_BUILTIN_GETWCX,
+  ARM_BUILTIN_SETWCX,
+
+  ARM_BUILTIN_WZERO,
+
+  ARM_BUILTIN_WAVG2BR,
+  ARM_BUILTIN_WAVG2HR,
+  ARM_BUILTIN_WAVG2B,
+  ARM_BUILTIN_WAVG2H,
+
+  ARM_BUILTIN_WACCB,
+  ARM_BUILTIN_WACCH,
+  ARM_BUILTIN_WACCW,
+
+  ARM_BUILTIN_WMACS,
+  ARM_BUILTIN_WMACSZ,
+  ARM_BUILTIN_WMACU,
+  ARM_BUILTIN_WMACUZ,
+
+  ARM_BUILTIN_WSADB,
+  ARM_BUILTIN_WSADBZ,
+  ARM_BUILTIN_WSADH,
+  ARM_BUILTIN_WSADHZ,
+
+  ARM_BUILTIN_WALIGN,
+
+  ARM_BUILTIN_TMIA,
+  ARM_BUILTIN_TMIAPH,
+  ARM_BUILTIN_TMIABB,
+  ARM_BUILTIN_TMIABT,
+  ARM_BUILTIN_TMIATB,
+  ARM_BUILTIN_TMIATT,
+
+  ARM_BUILTIN_TMOVMSKB,
+  ARM_BUILTIN_TMOVMSKH,
+  ARM_BUILTIN_TMOVMSKW,
+
+  ARM_BUILTIN_TBCSTB,
+  ARM_BUILTIN_TBCSTH,
+  ARM_BUILTIN_TBCSTW,
+
+  ARM_BUILTIN_WMADDS,
+  ARM_BUILTIN_WMADDU,
+
+  ARM_BUILTIN_WPACKHSS,
+  ARM_BUILTIN_WPACKWSS,
+  ARM_BUILTIN_WPACKDSS,
+  ARM_BUILTIN_WPACKHUS,
+  ARM_BUILTIN_WPACKWUS,
+  ARM_BUILTIN_WPACKDUS,
+
+  ARM_BUILTIN_WADDB,
+  ARM_BUILTIN_WADDH,
+  ARM_BUILTIN_WADDW,
+  ARM_BUILTIN_WADDSSB,
+  ARM_BUILTIN_WADDSSH,
+  ARM_BUILTIN_WADDSSW,
+  ARM_BUILTIN_WADDUSB,
+  ARM_BUILTIN_WADDUSH,
+  ARM_BUILTIN_WADDUSW,
+  ARM_BUILTIN_WSUBB,
+  ARM_BUILTIN_WSUBH,
+  ARM_BUILTIN_WSUBW,
+  ARM_BUILTIN_WSUBSSB,
+  ARM_BUILTIN_WSUBSSH,
+  ARM_BUILTIN_WSUBSSW,
+  ARM_BUILTIN_WSUBUSB,
+  ARM_BUILTIN_WSUBUSH,
+  ARM_BUILTIN_WSUBUSW,
+
+  ARM_BUILTIN_WAND,
+  ARM_BUILTIN_WANDN,
+  ARM_BUILTIN_WOR,
+  ARM_BUILTIN_WXOR,
+
+  ARM_BUILTIN_WCMPEQB,
+  ARM_BUILTIN_WCMPEQH,
+  ARM_BUILTIN_WCMPEQW,
+  ARM_BUILTIN_WCMPGTUB,
+  ARM_BUILTIN_WCMPGTUH,
+  ARM_BUILTIN_WCMPGTUW,
+  ARM_BUILTIN_WCMPGTSB,
+  ARM_BUILTIN_WCMPGTSH,
+  ARM_BUILTIN_WCMPGTSW,
+
+  ARM_BUILTIN_TEXTRMSB,
+  ARM_BUILTIN_TEXTRMSH,
+  ARM_BUILTIN_TEXTRMSW,
+  ARM_BUILTIN_TEXTRMUB,
+  ARM_BUILTIN_TEXTRMUH,
+  ARM_BUILTIN_TEXTRMUW,
+  ARM_BUILTIN_TINSRB,
+  ARM_BUILTIN_TINSRH,
+  ARM_BUILTIN_TINSRW,
+
+  ARM_BUILTIN_WMAXSW,
+  ARM_BUILTIN_WMAXSH,
+  ARM_BUILTIN_WMAXSB,
+  ARM_BUILTIN_WMAXUW,
+  ARM_BUILTIN_WMAXUH,
+  ARM_BUILTIN_WMAXUB,
+  ARM_BUILTIN_WMINSW,
+  ARM_BUILTIN_WMINSH,
+  ARM_BUILTIN_WMINSB,
+  ARM_BUILTIN_WMINUW,
+  ARM_BUILTIN_WMINUH,
+  ARM_BUILTIN_WMINUB,
+
+  ARM_BUILTIN_WMULUH,
+  ARM_BUILTIN_WMULSH,
+  ARM_BUILTIN_WMULUL,
+
+  ARM_BUILTIN_PSADBH,
+  ARM_BUILTIN_WSHUFH,
+
+  ARM_BUILTIN_WSLLH,
+  ARM_BUILTIN_WSLLW,
+  ARM_BUILTIN_WSLLD,
+  ARM_BUILTIN_WSRAH,
+  ARM_BUILTIN_WSRAW,
+  ARM_BUILTIN_WSRAD,
+  ARM_BUILTIN_WSRLH,
+  ARM_BUILTIN_WSRLW,
+  ARM_BUILTIN_WSRLD,
+  ARM_BUILTIN_WRORH,
+  ARM_BUILTIN_WRORW,
+  ARM_BUILTIN_WRORD,
+  ARM_BUILTIN_WSLLHI,
+  ARM_BUILTIN_WSLLWI,
+  ARM_BUILTIN_WSLLDI,
+  ARM_BUILTIN_WSRAHI,
+  ARM_BUILTIN_WSRAWI,
+  ARM_BUILTIN_WSRADI,
+  ARM_BUILTIN_WSRLHI,
+  ARM_BUILTIN_WSRLWI,
+  ARM_BUILTIN_WSRLDI,
+  ARM_BUILTIN_WRORHI,
+  ARM_BUILTIN_WRORWI,
+  ARM_BUILTIN_WRORDI,
+
+  ARM_BUILTIN_WUNPCKIHB,
+  ARM_BUILTIN_WUNPCKIHH,
+  ARM_BUILTIN_WUNPCKIHW,
+  ARM_BUILTIN_WUNPCKILB,
+  ARM_BUILTIN_WUNPCKILH,
+  ARM_BUILTIN_WUNPCKILW,
+
+  ARM_BUILTIN_WUNPCKEHSB,
+  ARM_BUILTIN_WUNPCKEHSH,
+  ARM_BUILTIN_WUNPCKEHSW,
+  ARM_BUILTIN_WUNPCKEHUB,
+  ARM_BUILTIN_WUNPCKEHUH,
+  ARM_BUILTIN_WUNPCKEHUW,
+  ARM_BUILTIN_WUNPCKELSB,
+  ARM_BUILTIN_WUNPCKELSH,
+  ARM_BUILTIN_WUNPCKELSW,
+  ARM_BUILTIN_WUNPCKELUB,
+  ARM_BUILTIN_WUNPCKELUH,
+  ARM_BUILTIN_WUNPCKELUW,
+
+  ARM_BUILTIN_MAX
+};
 #endif /* ! GCC_ARM_H */
index d03c193..b3456c0 100644 (file)
                        ; and stack frame generation.  Operand 0 is the
                        ; register to "use".
    (UNSPEC_CHECK_ARCH 7); Set CCs to indicate 26-bit or 32-bit mode.
+   (UNSPEC_WSHUFH    8) ; Used by the instrinsic form of the iWMMXt WSHUFH instruction.
+   (UNSPEC_WACC      9) ; Used by the instrinsic form of the iWMMXt WACC instruction.
+   (UNSPEC_TMOVMSK  10) ; Used by the instrinsic form of the iWMMXt TMOVMSK instruction.
+   (UNSPEC_WSAD     11) ; Used by the instrinsic form of the iWMMXt WSAD instruction.
+   (UNSPEC_WSADZ    12) ; Used by the instrinsic form of the iWMMXt WSADZ instruction.
+   (UNSPEC_WMACS    13) ; Used by the instrinsic form of the iWMMXt WMACS instruction.
+   (UNSPEC_WMACU    14) ; Used by the instrinsic form of the iWMMXt WMACU instruction.
+   (UNSPEC_WMACSZ   15) ; Used by the instrinsic form of the iWMMXt WMACSZ instruction.
+   (UNSPEC_WMACUZ   16) ; Used by the instrinsic form of the iWMMXt WMACUZ instruction.
+   (UNSPEC_CLRDI    17) ; Used by the instrinsic form of the iWMMXt CLRDI instruction.
+   (UNSPEC_WMADDS   18) ; Used by the instrinsic form of the iWMMXt WMADDS instruction.
+   (UNSPEC_WMADDU   19) ; Used by the instrinsic form of the iWMMXt WMADDU instruction.
   ]
 )
 
                        ;   a 32-bit object.
    (VUNSPEC_POOL_8   7) ; `pool-entry(8)'.  An entry in the constant pool for
                        ;   a 64-bit object.
+   (VUNSPEC_TMRC     8) ; Used by the iWMMXt TMRC instruction.
+   (VUNSPEC_TMCR     9) ; Used by the iWMMXt TMCR instruction.
+   (VUNSPEC_ALIGN8   10) ; 8-byte alignment version of VUNSPEC_ALIGN
+   (VUNSPEC_WCMP_EQ  11) ; Used by the iWMMXt WCMPEQ instructions
+   (VUNSPEC_WCMP_GTU 12) ; Used by the iWMMXt WCMPGTU instructions
+   (VUNSPEC_WCMP_GT  13) ; Used by the iwMMXT WCMPGT instructions
   ]
 )
 \f
        (match_operator:DI 6 "logical_binary_operator"
          [(match_operand:DI 1 "s_register_operand" "")
           (match_operand:DI 2 "s_register_operand" "")]))]
-  "TARGET_ARM && reload_completed"
+  "TARGET_ARM && reload_completed && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))"
   [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)]))
    (set (match_dup 3) (match_op_dup:SI 6 [(match_dup 4) (match_dup 5)]))]
   "
   [(set (match_operand:DI         0 "s_register_operand" "=&r,&r")
        (and:DI (match_operand:DI 1 "s_register_operand"  "%0,r")
                (match_operand:DI 2 "s_register_operand"   "r,r")))]
-  "TARGET_ARM"
+  "TARGET_ARM && ! TARGET_IWMMXT"
   "#"
   [(set_attr "length" "8")]
 )
                (match_operand:DI 2 "s_register_operand" "0,r")))]
   "TARGET_ARM"
   "#"
-  "TARGET_ARM && reload_completed"
+  "TARGET_ARM && reload_completed && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))"
   [(set (match_dup 0) (and:SI (not:SI (match_dup 1)) (match_dup 2)))
    (set (match_dup 3) (and:SI (not:SI (match_dup 4)) (match_dup 5)))]
   "
   [(set (match_operand:DI         0 "s_register_operand" "=&r,&r")
        (ior:DI (match_operand:DI 1 "s_register_operand"  "%0,r")
                (match_operand:DI 2 "s_register_operand"   "r,r")))]
-  "TARGET_ARM"
+  "TARGET_ARM && ! TARGET_IWMMXT"
   "#"
   [(set_attr "length" "8")
    (set_attr "predicable" "yes")]
   [(set (match_operand:DI         0 "s_register_operand" "=&r,&r")
        (xor:DI (match_operand:DI 1 "s_register_operand"  "%0,r")
                (match_operand:DI 2 "s_register_operand"   "r,r")))]
-  "TARGET_ARM"
+  "TARGET_ARM && !TARGET_IWMMXT"
   "#"
   [(set_attr "length" "8")
    (set_attr "predicable" "yes")]
   [(set (match_operand:DI            0 "s_register_operand" "")
        (ashift:DI (match_operand:DI 1 "general_operand"    "")
                   (match_operand:SI 2 "general_operand"    "")))]
-  "TARGET_ARM && (TARGET_CIRRUS)"
+  "TARGET_ARM && (TARGET_IWMMXT || TARGET_CIRRUS)"
   "
   if (! s_register_operand (operands[1], DImode))
     operands[1] = copy_to_mode_reg (DImode, operands[1]);
 (define_insn "*arm_movdi"
   [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, o<>")
        (match_operand:DI 1 "di_operand"              "rIK,mi,r"))]
-  "TARGET_ARM && !TARGET_CIRRUS"
+  "TARGET_ARM && !TARGET_CIRRUS && ! TARGET_IWMMXT"
   "*
   return (output_move_double (operands));
   "
 (define_insn "*arm_movsi_insn"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m")
        (match_operand:SI 1 "general_operand"      "rI,K,mi,r"))]
-  "TARGET_ARM
+  "TARGET_ARM && ! TARGET_IWMMXT
    && (   register_operand (operands[0], SImode)
        || register_operand (operands[1], SImode))"
   "@
    (set_attr "type" "*,load,store2,load,store2,*")
    (set_attr "pool_range" "*,*,*,1020,*,*")]
 )
+
+;; Vector Moves
+(define_expand "movv2si"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "")
+       (match_operand:V2SI 1 "general_operand" ""))]
+  "TARGET_REALLY_IWMMXT"
+{
+})
+
+(define_expand "movv4hi"
+  [(set (match_operand:V4HI 0 "nonimmediate_operand" "")
+       (match_operand:V4HI 1 "general_operand" ""))]
+  "TARGET_REALLY_IWMMXT"
+{
+})
+
+(define_expand "movv8qi"
+  [(set (match_operand:V8QI 0 "nonimmediate_operand" "")
+       (match_operand:V8QI 1 "general_operand" ""))]
+  "TARGET_REALLY_IWMMXT"
+{
+})
 \f
 
 ;; load- and store-multiple insns
 )
 
 (define_insn "*call_value_reg"
-  [(set (match_operand 0 "" "=r,f,v")
-        (call (mem:SI (match_operand:SI 1 "s_register_operand" "r,r,r"))
+  [(set (match_operand 0 "" "=ryfv")
+        (call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
              (match_operand 2 "" "")))
    (use (match_operand 3 "" ""))
    (clobber (reg:SI LR_REGNUM))]
 )
 
 (define_insn "*call_value_mem"
-  [(set (match_operand 0 "" "=r,f,v")
-       (call (mem:SI (match_operand:SI 1 "memory_operand" "m,m,m"))
+  [(set (match_operand 0 "" "=ryfv")
+       (call (mem:SI (match_operand:SI 1 "memory_operand" "m"))
              (match_operand 2 "" "")))
    (use (match_operand 3 "" ""))
    (clobber (reg:SI LR_REGNUM))]
 )
 
 (define_insn "*call_value_symbol"
-  [(set (match_operand 0 "s_register_operand" "=r,f,v")
-       (call (mem:SI (match_operand:SI 1 "" "X,X,X"))
+  [(set (match_operand 0 "s_register_operand" "=ryfv")
+       (call (mem:SI (match_operand:SI 1 "" "X"))
        (match_operand:SI 2 "" "")))
    (use (match_operand 3 "" ""))
    (clobber (reg:SI LR_REGNUM))]
 )
 
 (define_insn "*sibcall_value_insn"
- [(set (match_operand 0 "s_register_operand" "=r,f,v")
-       (call (mem:SI (match_operand:SI 1 "" "X,X,X"))
+ [(set (match_operand 0 "s_register_operand" "=ryfv")
+       (call (mem:SI (match_operand:SI 1 "" "X"))
             (match_operand 2 "" "")))
   (return)
   (use (match_operand 3 "" ""))]
     return output_return_instruction (const_true_rtx, TRUE, FALSE);
   }"
   [(set_attr "type" "load")
+   (set_attr "length" "12")
    (set_attr "predicable" "yes")]
 )
 
     return output_return_instruction (operands[0], TRUE, FALSE);
   }"
   [(set_attr "conds" "use")
+   (set_attr "length" "12")
    (set_attr "type" "load")]
 )
 
                         (match_dup 0)
                         (match_operand 4 "" "")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM && reload_completed"
+  ;; Note we have to suppress this split for the iwmmxt because it
+  ;; creates a conditional movsi and the iwmmxt_movsi_insn pattern
+  ;; is not predicable.  This sucks.
+  "TARGET_ARM && reload_completed && ! TARGET_IWMMXT"
   [(set (match_dup 5) (match_dup 6))
    (cond_exec (match_dup 7)
              (set (match_dup 0) (match_dup 4)))]
                         (match_operand 4 "" "")
                         (match_dup 0)))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM && reload_completed"
+  ;; Note we have to suppress this split for the iwmmxt because it
+  ;; creates a conditional movsi and the iwmmxt_movsi_insn pattern
+  ;; is not predicable.  This sucks.
+  "TARGET_ARM && reload_completed && ! TARGET_IWMMXT"
   [(set (match_dup 5) (match_dup 6))
    (cond_exec (match_op_dup 1 [(match_dup 5) (const_int 0)])
              (set (match_dup 0) (match_dup 4)))]
                         (match_operand 4 "" "")
                         (match_operand 5 "" "")))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM && reload_completed"
+  ;; Note we have to suppress this split for the iwmmxt because it
+  ;; creates a conditional movsi and the iwmmxt_movsi_insn pattern
+  ;; is not predicable.  This sucks.
+  "TARGET_ARM && reload_completed && ! TARGET_IWMMXT"
   [(set (match_dup 6) (match_dup 7))
    (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)])
              (set (match_dup 0) (match_dup 4)))
                         (not:SI
                          (match_operand:SI 5 "s_register_operand" ""))))
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_ARM && reload_completed"
+  ;; Note we have to suppress this split for the iwmmxt because it
+  ;; creates a conditional movsi and the iwmmxt_movsi_insn pattern
+  ;; is not predicable.  This sucks.
+  "TARGET_ARM && reload_completed && ! TARGET_IWMMXT"
   [(set (match_dup 6) (match_dup 7))
    (cond_exec (match_op_dup 1 [(match_dup 6) (const_int 0)])
              (set (match_dup 0) (match_dup 4)))
   "
 )
 
+(define_insn "align_8"
+  [(unspec_volatile [(const_int 0)] VUNSPEC_ALIGN8)]
+  "TARGET_REALLY_IWMMXT"
+  "*
+  assemble_align (64);
+  return \"\";
+  "
+)
+
 (define_insn "consttable_end"
   [(unspec_volatile [(const_int 0)] VUNSPEC_POOL_END)]
   "TARGET_EITHER"
 (include "fpa.md")
 ;; Load the Maverick co-processor patterns
 (include "cirrus.md")
+;; Load the Intel Wireless Multimedia Extension patterns
+(include "iwmmxt.md")
diff --git a/gcc/config/arm/iwmmxt.md b/gcc/config/arm/iwmmxt.md
new file mode 100644 (file)
index 0000000..5f3fd7b
--- /dev/null
@@ -0,0 +1,1495 @@
+;; Patterns for the Intel Wireless MMX technology architecture.
+;; Copyright (C) 2003 Free Software Foundation, Inc.
+;; Contributed by Red Hat.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 2, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING.  If not, write to
+;; the Free Software Foundation, 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+(define_insn "iwmmxt_iordi3"
+  [(set (match_operand:DI         0 "register_operand" "=y,?&r,?&r")
+        (ior:DI (match_operand:DI 1 "register_operand" "%y,0,r")
+               (match_operand:DI 2 "register_operand"  "y,r,r")))]
+  "TARGET_REALLY_IWMMXT"
+  "@
+   wor%?\\t%0, %1, %2
+   #
+   #"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4,8,8")])
+
+(define_insn "iwmmxt_xordi3"
+  [(set (match_operand:DI         0 "register_operand" "=y,?&r,?&r")
+        (xor:DI (match_operand:DI 1 "register_operand" "%y,0,r")
+               (match_operand:DI 2 "register_operand"  "y,r,r")))]
+  "TARGET_REALLY_IWMMXT"
+  "@
+   wxor%?\\t%0, %1, %2
+   #
+   #"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4,8,8")])
+
+(define_insn "iwmmxt_anddi3"
+  [(set (match_operand:DI         0 "register_operand" "=y,?&r,?&r")
+        (and:DI (match_operand:DI 1 "register_operand" "%y,0,r")
+               (match_operand:DI 2 "register_operand"  "y,r,r")))]
+  "TARGET_REALLY_IWMMXT"
+  "@
+   wand%?\\t%0, %1, %2
+   #
+   #"
+  [(set_attr "predicable" "yes")
+   (set_attr "length" "4,8,8")])
+
+(define_insn "iwmmxt_nanddi3"
+  [(set (match_operand:DI                 0 "register_operand" "=y")
+        (and:DI (match_operand:DI         1 "register_operand"  "y")
+               (not:DI (match_operand:DI 2 "register_operand"  "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wandn%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "*iwmmxt_arm_movdi"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, o<>,y,y,yr,y,yrm")
+       (match_operand:DI 1 "di_operand"              "rIK,mi,r  ,y,yr,y,yrm,y"))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+      return output_move_double (operands);
+    case 3:
+      return \"wmov%?\\t%0,%1\";
+    case 4:
+      return \"tmcrr%?\\t%0,%Q1,%R1\";
+    case 5:
+      return \"tmrrc%?\\t%Q0,%R0,%1\";
+    case 6:
+      return \"wldrd%?\\t%0,%1\";
+    case 7:
+      return \"wstrd%?\\t%1,%0\";
+    }
+}"
+  [(set_attr "length"         "8,8,8,4,4,4,4,4")
+   (set_attr "type"           "*,load,store2,*,*,*,*,*")
+   (set_attr "pool_range"     "*,1020,*,*,*,*,*,*")
+   (set_attr "neg_pool_range" "*,1012,*,*,*,*,*,*")]
+)
+
+(define_insn "*iwmmxt_movsi_insn"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m,z,r,?z,m,z")
+       (match_operand:SI 1 "general_operand"      "rI,K,mi,r,r,z,m,z,z"))]
+  "TARGET_REALLY_IWMMXT
+   && (   register_operand (operands[0], SImode)
+       || register_operand (operands[1], SImode))"
+  "*
+   switch (which_alternative)
+   {
+   case 0: return \"mov\\t%0, %1\";
+   case 1: return \"mvn\\t%0, #%B1\";
+   case 2: return \"ldr\\t%0, %1\";
+   case 3: return \"str\\t%1, %0\";
+   case 4: return \"tmcr\\t%0, %1\";
+   case 5: return \"tmrc\\t%0, %1\";
+   case 6: return arm_output_load_gr (operands);
+   case 7: return \"wstrw\\t%1, %0\";
+   default:return \"wstrw\\t%1, [sp, #-4]!\;wldrw\\t%0, [sp], #4\\t@move CG reg\";
+  }"
+  [(set_attr "type"           "*,*,load,store1,*,*,load,store1,*")
+   (set_attr "length"         "*,*,*,        *,*,*,  16,     *,8")
+   (set_attr "pool_range"     "*,*,4096,     *,*,*,1024,     *,*")
+   (set_attr "neg_pool_range" "*,*,4084,     *,*,*,   *,  1012,*")
+   ;; Note - the "predicable" attribute is not allowed to have alternatives.
+   ;; Since the wSTRw wCx instruction is not predicable, we cannot support
+   ;; predicating any of the alternatives in this template.  This sucks.
+   (set_attr "predicable"     "no")
+   ;; Also - we have to pretend that these insns clobber the condition code
+   ;; bits as otherwise arm_final_prescan_insn() will try to conditionalize
+   ;; them.
+   (set_attr "conds" "clob")]
+)
+
+(define_insn "movv8qi_internal"
+  [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m,y,?r,?y,?r")
+       (match_operand:V8QI 1 "general_operand"       "y,y,m,y,r,i"))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+   switch (which_alternative)
+   {
+   case 0: return \"wmov%?\\t%0, %1\";
+   case 1: return \"wstrd%?\\t%1, %0\";
+   case 2: return \"wldrd%?\\t%0, %1\";
+   case 3: return \"tmrrc%?\\t%Q0, %R0, %1\";
+   case 4: return \"tmcrr%?\\t%0, %Q1, %R1\";
+   default: return output_move_double (operands);
+   }"
+  [(set_attr "predicable" "yes")
+   (set_attr "length"         "4,     4,   4,4,4,   8")
+   (set_attr "type"           "*,store1,load,*,*,load")
+   (set_attr "pool_range"     "*,     *, 256,*,*, 256")
+   (set_attr "neg_pool_range" "*,     *, 244,*,*, 244")])
+
+(define_insn "movv4hi_internal"
+  [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m,y,?r,?y,?r")
+       (match_operand:V4HI 1 "general_operand"       "y,y,m,y,r,i"))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+   switch (which_alternative)
+   {
+   case 0: return \"wmov%?\\t%0, %1\";
+   case 1: return \"wstrd%?\\t%1, %0\";
+   case 2: return \"wldrd%?\\t%0, %1\";
+   case 3: return \"tmrrc%?\\t%Q0, %R0, %1\";
+   case 4: return \"tmcrr%?\\t%0, %Q1, %R1\";
+   default: return output_move_double (operands);
+   }"
+  [(set_attr "predicable" "yes")
+   (set_attr "length"         "4,     4,   4,4,4,   8")
+   (set_attr "type"           "*,store1,load,*,*,load")
+   (set_attr "pool_range"     "*,     *, 256,*,*, 256")
+   (set_attr "neg_pool_range" "*,     *, 244,*,*, 244")])
+
+(define_insn "movv2si_internal"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m,y,?r,?y,?r")
+       (match_operand:V2SI 1 "general_operand"       "y,y,m,y,r,i"))]
+  "TARGET_REALLY_IWMMXT"
+  "*
+   switch (which_alternative)
+   {
+   case 0: return \"wmov%?\\t%0, %1\";
+   case 1: return \"wstrd%?\\t%1, %0\";
+   case 2: return \"wldrd%?\\t%0, %1\";
+   case 3: return \"tmrrc%?\\t%Q0, %R0, %1\";
+   case 4: return \"tmcrr%?\\t%0, %Q1, %R1\";
+   default: return output_move_double (operands);
+   }"
+  [(set_attr "predicable" "yes")
+   (set_attr "length"         "4,     4,   4,4,4,  24")
+   (set_attr "type"           "*,store1,load,*,*,load")
+   (set_attr "pool_range"     "*,     *, 256,*,*, 256")
+   (set_attr "neg_pool_range" "*,     *, 244,*,*, 244")])
+
+;; This pattern should not be needed.  It is to match a
+;; wierd case generated by GCC when no optimisations are
+;; enabled.  (Try compiling gcc/testsuite/gcc.c-torture/
+;; compile/simd-5.c at -O0).  The mode for operands[1] is
+;; deliberately omitted.
+(define_insn "movv2si_internal_2"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=?r")
+       (match_operand      1 "immediate_operand"      "i"))]
+  "TARGET_REALLY_IWMMXT"
+  "* return output_move_double (operands);"
+  [(set_attr "predicable"     "yes")
+   (set_attr "length"         "8")
+   (set_attr "type"           "load")
+   (set_attr "pool_range"     "256")
+   (set_attr "neg_pool_range" "244")])
+
+;; Vector add/subtract
+
+(define_insn "addv8qi3"
+  [(set (match_operand:V8QI            0 "register_operand" "=y")
+        (plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
+                  (match_operand:V8QI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "addv4hi3"
+  [(set (match_operand:V4HI            0 "register_operand" "=y")
+        (plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
+                  (match_operand:V4HI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "addv2si3"
+  [(set (match_operand:V2SI            0 "register_operand" "=y")
+        (plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
+                  (match_operand:V2SI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ssaddv8qi3"
+  [(set (match_operand:V8QI               0 "register_operand" "=y")
+        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
+                     (match_operand:V8QI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddbss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ssaddv4hi3"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
+                     (match_operand:V4HI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddhss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ssaddv2si3"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+        (ss_plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
+                     (match_operand:V2SI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddwss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "usaddv8qi3"
+  [(set (match_operand:V8QI               0 "register_operand" "=y")
+        (us_plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
+                     (match_operand:V8QI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddbus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "usaddv4hi3"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (us_plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
+                     (match_operand:V4HI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddhus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "usaddv2si3"
+  [(set (match_operand:V2SI               0 "register_operand" "=y")
+        (us_plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
+                     (match_operand:V2SI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "waddwus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "subv8qi3"
+  [(set (match_operand:V8QI             0 "register_operand" "=y")
+        (minus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
+                   (match_operand:V8QI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "subv4hi3"
+  [(set (match_operand:V4HI             0 "register_operand" "=y")
+        (minus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
+                   (match_operand:V4HI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "subv2si3"
+  [(set (match_operand:V2SI             0 "register_operand" "=y")
+        (minus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
+                   (match_operand:V2SI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "sssubv8qi3"
+  [(set (match_operand:V8QI                0 "register_operand" "=y")
+        (ss_minus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
+                      (match_operand:V8QI 2 "register_operand"  "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubbss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "sssubv4hi3"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                      (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubhss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "sssubv2si3"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (ss_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                      (match_operand:V2SI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubwss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ussubv8qi3"
+  [(set (match_operand:V8QI                0 "register_operand" "=y")
+        (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "y")
+                      (match_operand:V8QI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubbus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ussubv4hi3"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                      (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubhus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ussubv2si3"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (us_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                      (match_operand:V2SI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsubwus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "mulv4hi3"
+  [(set (match_operand:V4HI            0 "register_operand" "=y")
+        (mult:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                  (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulul%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "smulv4hi3_highpart"
+  [(set (match_operand:V4HI                                0 "register_operand" "=y")
+       (truncate:V4HI
+        (lshiftrt:V4SI
+         (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+                    (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
+         (const_int 16))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulsm%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "umulv4hi3_highpart"
+  [(set (match_operand:V4HI                                0 "register_operand" "=y")
+       (truncate:V4HI
+        (lshiftrt:V4SI
+         (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
+                    (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
+         (const_int 16))))]
+  "TARGET_REALLY_IWMMXT"
+  "wmulum%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmacs"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+       (unspec:DI [(match_operand:DI   1 "register_operand" "0")
+                   (match_operand:V4HI 2 "register_operand" "y")
+                   (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACS))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacs%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmacsz"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+       (unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
+                   (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACSZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacsz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmacu"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+       (unspec:DI [(match_operand:DI   1 "register_operand" "0")
+                   (match_operand:V4HI 2 "register_operand" "y")
+                   (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACU))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacu%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmacuz"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+       (unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
+                   (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACUZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wmacuz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+;; Same as xordi3, but don't show input operands so that we don't think
+;; they are live.
+(define_insn "iwmmxt_clrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI [(const_int 0)] UNSPEC_CLRDI))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")])
+
+;; Seems like cse likes to generate these, so we have to support them.
+
+(define_insn "*iwmmxt_clrv8qi"
+  [(set (match_operand:V8QI 0 "register_operand" "=y")
+        (const_vector:V8QI [(const_int 0) (const_int 0)
+                           (const_int 0) (const_int 0)
+                           (const_int 0) (const_int 0)
+                           (const_int 0) (const_int 0)]))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "*iwmmxt_clrv4hi"
+  [(set (match_operand:V4HI 0 "register_operand" "=y")
+        (const_vector:V4HI [(const_int 0) (const_int 0)
+                           (const_int 0) (const_int 0)]))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "*iwmmxt_clrv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=y")
+        (const_vector:V2SI [(const_int 0) (const_int 0)]))]
+  "TARGET_REALLY_IWMMXT"
+  "wxor%?\\t%0, %0, %0"
+  [(set_attr "predicable" "yes")])
+
+;; Unsigned averages/sum of absolute differences
+
+(define_insn "iwmmxt_uavgrndv8qi3"
+  [(set (match_operand:V8QI              0 "register_operand" "=y")
+        (ashiftrt:V8QI
+        (plus:V8QI (plus:V8QI
+                    (match_operand:V8QI 1 "register_operand" "y")
+                    (match_operand:V8QI 2 "register_operand" "y"))
+                   (const_vector:V8QI [(const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)]))
+        (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2br%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_uavgrndv4hi3"
+  [(set (match_operand:V4HI              0 "register_operand" "=y")
+        (ashiftrt:V4HI
+        (plus:V4HI (plus:V4HI
+                    (match_operand:V4HI 1 "register_operand" "y")
+                    (match_operand:V4HI 2 "register_operand" "y"))
+                   (const_vector:V4HI [(const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)
+                                       (const_int 1)]))
+        (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2hr%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+
+(define_insn "iwmmxt_uavgv8qi3"
+  [(set (match_operand:V8QI                 0 "register_operand" "=y")
+        (ashiftrt:V8QI (plus:V8QI
+                       (match_operand:V8QI 1 "register_operand" "y")
+                       (match_operand:V8QI 2 "register_operand" "y"))
+                      (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2b%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_uavgv4hi3"
+  [(set (match_operand:V4HI                 0 "register_operand" "=y")
+        (ashiftrt:V4HI (plus:V4HI
+                       (match_operand:V4HI 1 "register_operand" "y")
+                       (match_operand:V4HI 2 "register_operand" "y"))
+                      (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wavg2h%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_psadbw"
+  [(set (match_operand:V8QI                       0 "register_operand" "=y")
+        (abs:V8QI (minus:V8QI (match_operand:V8QI 1 "register_operand" "y")
+                             (match_operand:V8QI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "psadbw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+
+;; Insert/extract/shuffle
+
+(define_insn "iwmmxt_tinsrb"
+  [(set (match_operand:V8QI                             0 "register_operand"    "=y")
+        (vec_merge:V8QI (match_operand:V8QI             1 "register_operand"     "0")
+                       (vec_duplicate:V8QI
+                        (truncate:QI (match_operand:SI 2 "nonimmediate_operand" "r")))
+                       (match_operand:SI               3 "immediate_operand"    "i")))]
+  "TARGET_REALLY_IWMMXT"
+  "tinsrb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tinsrh"
+  [(set (match_operand:V4HI                             0 "register_operand"    "=y")
+        (vec_merge:V4HI (match_operand:V4HI             1 "register_operand"     "0")
+                       (vec_duplicate:V4HI
+                        (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "r")))
+                       (match_operand:SI               3 "immediate_operand"    "i")))]
+  "TARGET_REALLY_IWMMXT"
+  "tinsrh%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tinsrw"
+  [(set (match_operand:V2SI                 0 "register_operand"    "=y")
+        (vec_merge:V2SI (match_operand:V2SI 1 "register_operand"     "0")
+                       (vec_duplicate:V2SI
+                        (match_operand:SI  2 "nonimmediate_operand" "r"))
+                       (match_operand:SI   3 "immediate_operand"    "i")))]
+  "TARGET_REALLY_IWMMXT"
+  "tinsrw%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_textrmub"
+  [(set (match_operand:SI                                  0 "register_operand" "=r")
+        (zero_extend:SI (vec_select:QI (match_operand:V8QI 1 "register_operand" "y")
+                                      (parallel
+                                       [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmub%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_textrmsb"
+  [(set (match_operand:SI                                  0 "register_operand" "=r")
+        (sign_extend:SI (vec_select:QI (match_operand:V8QI 1 "register_operand" "y")
+                                      (parallel
+                                       [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmsb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_textrmuh"
+  [(set (match_operand:SI                                  0 "register_operand" "=r")
+        (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
+                                      (parallel
+                                       [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmuh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_textrmsh"
+  [(set (match_operand:SI                                  0 "register_operand" "=r")
+        (sign_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
+                                      (parallel
+                                       [(match_operand:SI 2 "immediate_operand" "i")]))))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmsh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+;; There are signed/unsigned variants of this instruction, but they are
+;; pointless.
+(define_insn "iwmmxt_textrmw"
+  [(set (match_operand:SI                           0 "register_operand" "=r")
+        (vec_select:SI (match_operand:V2SI          1 "register_operand" "y")
+                      (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
+  "TARGET_REALLY_IWMMXT"
+  "textrmsw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wshufh"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+                     (match_operand:SI   2 "immediate_operand" "i")] UNSPEC_WSHUFH))]
+  "TARGET_REALLY_IWMMXT"
+  "wshufh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+;; Mask-generating comparisons
+;;
+;; Note - you cannot use patterns like these here:
+;;
+;;   (set:<vector> (match:<vector>) (<comparator>:<vector> (match:<vector>) (match:<vector>)))
+;;
+;; Because GCC will assume that the truth value (1 or 0) is installed
+;; into the entire destination vector, (with the '1' going into the least
+;; significant element of the vector).  This is not how these instructions
+;; behave.
+;;
+;; Unfortunately the current patterns are illegal.  They are SET insns
+;; without a SET in them.  They work in most cases for ordinary code
+;; generation, but there are circumstances where they can cause gcc to fail.
+;; XXX - FIXME.
+
+(define_insn "eqv8qi3"
+  [(unspec_volatile [(match_operand:V8QI 0 "register_operand" "=y")
+                    (match_operand:V8QI 1 "register_operand"  "y")
+                    (match_operand:V8QI 2 "register_operand"  "y")]
+                   VUNSPEC_WCMP_EQ)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpeqb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "eqv4hi3"
+  [(unspec_volatile [(match_operand:V4HI 0 "register_operand" "=y")
+                    (match_operand:V4HI 1 "register_operand"  "y")
+                    (match_operand:V4HI 2 "register_operand"  "y")]
+                   VUNSPEC_WCMP_EQ)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpeqh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "eqv2si3"
+  [(unspec_volatile:V2SI [(match_operand:V2SI 0 "register_operand" "=y")
+                         (match_operand:V2SI 1 "register_operand"  "y")
+                         (match_operand:V2SI 2 "register_operand"  "y")]
+                        VUNSPEC_WCMP_EQ)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpeqw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtuv8qi3"
+  [(unspec_volatile [(match_operand:V8QI 0 "register_operand" "=y")
+                    (match_operand:V8QI 1 "register_operand"  "y")
+                    (match_operand:V8QI 2 "register_operand"  "y")]
+                   VUNSPEC_WCMP_GTU)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtub%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtuv4hi3"
+  [(unspec_volatile [(match_operand:V4HI 0 "register_operand" "=y")
+                    (match_operand:V4HI 1 "register_operand"  "y")
+                    (match_operand:V4HI 2 "register_operand"  "y")]
+                   VUNSPEC_WCMP_GTU)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtuh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtuv2si3"
+  [(unspec_volatile [(match_operand:V2SI 0 "register_operand" "=y")
+                    (match_operand:V2SI 1 "register_operand"  "y")
+                    (match_operand:V2SI 2 "register_operand"  "y")]
+                   VUNSPEC_WCMP_GTU)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtuw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtv8qi3"
+  [(unspec_volatile [(match_operand:V8QI 0 "register_operand" "=y")
+                    (match_operand:V8QI 1 "register_operand"  "y")
+                    (match_operand:V8QI 2 "register_operand"  "y")]
+                   VUNSPEC_WCMP_GT)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtsb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtv4hi3"
+  [(unspec_volatile [(match_operand:V4HI 0 "register_operand" "=y")
+                    (match_operand:V4HI 1 "register_operand"  "y")
+                    (match_operand:V4HI 2 "register_operand"  "y")]
+                   VUNSPEC_WCMP_GT)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtsh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "gtv2si3"
+  [(unspec_volatile [(match_operand:V2SI 0 "register_operand" "=y")
+                    (match_operand:V2SI 1 "register_operand"  "y")
+                    (match_operand:V2SI 2 "register_operand"  "y")]
+                   VUNSPEC_WCMP_GT)]
+  "TARGET_REALLY_IWMMXT"
+  "wcmpgtsw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+;; Max/min insns
+
+(define_insn "smaxv8qi3"
+  [(set (match_operand:V8QI            0 "register_operand" "=y")
+        (smax:V8QI (match_operand:V8QI 1 "register_operand" "y")
+                  (match_operand:V8QI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaxsb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "umaxv8qi3"
+  [(set (match_operand:V8QI            0 "register_operand" "=y")
+        (umax:V8QI (match_operand:V8QI 1 "register_operand" "y")
+                  (match_operand:V8QI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaxub%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "smaxv4hi3"
+  [(set (match_operand:V4HI            0 "register_operand" "=y")
+        (smax:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                  (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaxsh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "umaxv4hi3"
+  [(set (match_operand:V4HI            0 "register_operand" "=y")
+        (umax:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                  (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaxuh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "smaxv2si3"
+  [(set (match_operand:V2SI            0 "register_operand" "=y")
+        (smax:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                  (match_operand:V2SI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaxsw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "umaxv2si3"
+  [(set (match_operand:V2SI            0 "register_operand" "=y")
+        (umax:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                  (match_operand:V2SI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaxuw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "sminv8qi3"
+  [(set (match_operand:V8QI            0 "register_operand" "=y")
+        (smin:V8QI (match_operand:V8QI 1 "register_operand" "y")
+                  (match_operand:V8QI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wminsb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "uminv8qi3"
+  [(set (match_operand:V8QI            0 "register_operand" "=y")
+        (umin:V8QI (match_operand:V8QI 1 "register_operand" "y")
+                  (match_operand:V8QI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wminub%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "sminv4hi3"
+  [(set (match_operand:V4HI            0 "register_operand" "=y")
+        (smin:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                  (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wminsh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "uminv4hi3"
+  [(set (match_operand:V4HI            0 "register_operand" "=y")
+        (umin:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                  (match_operand:V4HI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wminuh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "sminv2si3"
+  [(set (match_operand:V2SI            0 "register_operand" "=y")
+        (smin:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                  (match_operand:V2SI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wminsw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "uminv2si3"
+  [(set (match_operand:V2SI            0 "register_operand" "=y")
+        (umin:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                  (match_operand:V2SI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wminuw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+;; Pack/unpack insns.
+
+(define_insn "iwmmxt_wpackhss"
+  [(set (match_operand:V8QI                    0 "register_operand" "=y")
+       (vec_concat:V8QI
+        (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
+        (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackhss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wpackwss"
+  [(set (match_operand:V4HI                    0 "register_operand" "=y")
+       (vec_concat:V4HI
+        (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
+        (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackwss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wpackdss"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+       (vec_concat:V2SI
+        (ss_truncate:SI (match_operand:DI 1 "register_operand" "y"))
+        (ss_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackdss%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wpackhus"
+  [(set (match_operand:V8QI                    0 "register_operand" "=y")
+       (vec_concat:V8QI
+        (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
+        (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackhus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wpackwus"
+  [(set (match_operand:V4HI                    0 "register_operand" "=y")
+       (vec_concat:V4HI
+        (us_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
+        (us_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackwus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wpackdus"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+       (vec_concat:V2SI
+        (us_truncate:SI (match_operand:DI 1 "register_operand" "y"))
+        (us_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
+  "TARGET_REALLY_IWMMXT"
+  "wpackdus%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+
+(define_insn "iwmmxt_wunpckihb"
+  [(set (match_operand:V8QI                   0 "register_operand" "=y")
+       (vec_merge:V8QI
+        (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
+                         (parallel [(const_int 4)
+                                    (const_int 0)
+                                    (const_int 5)
+                                    (const_int 1)
+                                    (const_int 6)
+                                    (const_int 2)
+                                    (const_int 7)
+                                    (const_int 3)]))
+        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+                         (parallel [(const_int 0)
+                                    (const_int 4)
+                                    (const_int 1)
+                                    (const_int 5)
+                                    (const_int 2)
+                                    (const_int 6)
+                                    (const_int 3)
+                                    (const_int 7)]))
+        (const_int 85)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckihb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckihh"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+       (vec_merge:V4HI
+        (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                         (parallel [(const_int 0)
+                                    (const_int 2)
+                                    (const_int 1)
+                                    (const_int 3)]))
+        (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+                         (parallel [(const_int 2)
+                                    (const_int 0)
+                                    (const_int 3)
+                                    (const_int 1)]))
+        (const_int 5)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckihh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckihw"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+       (vec_merge:V2SI
+        (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                         (parallel [(const_int 0)
+                                    (const_int 1)]))
+        (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+                         (parallel [(const_int 1)
+                                    (const_int 0)]))
+        (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckihw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckilb"
+  [(set (match_operand:V8QI                   0 "register_operand" "=y")
+       (vec_merge:V8QI
+        (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
+                         (parallel [(const_int 0)
+                                    (const_int 4)
+                                    (const_int 1)
+                                    (const_int 5)
+                                    (const_int 2)
+                                    (const_int 6)
+                                    (const_int 3)
+                                    (const_int 7)]))
+        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
+                         (parallel [(const_int 4)
+                                    (const_int 0)
+                                    (const_int 5)
+                                    (const_int 1)
+                                    (const_int 6)
+                                    (const_int 2)
+                                    (const_int 7)
+                                    (const_int 3)]))
+        (const_int 85)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckilb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckilh"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+       (vec_merge:V4HI
+        (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                         (parallel [(const_int 2)
+                                    (const_int 0)
+                                    (const_int 3)
+                                    (const_int 1)]))
+        (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
+                         (parallel [(const_int 0)
+                                    (const_int 2)
+                                    (const_int 1)
+                                    (const_int 3)]))
+        (const_int 5)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckilh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckilw"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+       (vec_merge:V2SI
+        (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                          (parallel [(const_int 1)
+                                     (const_int 0)]))
+        (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
+                         (parallel [(const_int 0)
+                                    (const_int 1)]))
+        (const_int 1)))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckilw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehub"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+       (zero_extend:V4HI
+        (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y")
+                         (parallel [(const_int 4) (const_int 5)
+                                    (const_int 6) (const_int 7)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehub%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehuh"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+       (zero_extend:V2SI
+        (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y")
+                         (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehuh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehuw"
+  [(set (match_operand:DI                   0 "register_operand" "=y")
+       (zero_extend:DI
+        (vec_select:SI (match_operand:V2SI 1 "register_operand" "y")
+                       (parallel [(const_int 1)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehuw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehsb"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+       (sign_extend:V4HI
+        (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y")
+                         (parallel [(const_int 4) (const_int 5)
+                                    (const_int 6) (const_int 7)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehsb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehsh"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+       (sign_extend:V2SI
+        (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y")
+                         (parallel [(const_int 2) (const_int 3)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehsh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckehsw"
+  [(set (match_operand:DI                   0 "register_operand" "=y")
+       (sign_extend:DI
+        (vec_select:SI (match_operand:V2SI 1 "register_operand" "y")
+                       (parallel [(const_int 1)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckehsw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckelub"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+       (zero_extend:V4HI
+        (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y")
+                         (parallel [(const_int 0) (const_int 1)
+                                    (const_int 2) (const_int 3)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelub%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckeluh"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+       (zero_extend:V2SI
+        (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y")
+                         (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckeluh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckeluw"
+  [(set (match_operand:DI                   0 "register_operand" "=y")
+       (zero_extend:DI
+        (vec_select:SI (match_operand:V2SI 1 "register_operand" "y")
+                       (parallel [(const_int 0)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckeluw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckelsb"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+       (sign_extend:V4HI
+        (vec_select:V4QI (match_operand:V8QI 1 "register_operand" "y")
+                         (parallel [(const_int 0) (const_int 1)
+                                    (const_int 2) (const_int 3)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelsb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckelsh"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+       (sign_extend:V2SI
+        (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "y")
+                         (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelsh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wunpckelsw"
+  [(set (match_operand:DI                   0 "register_operand" "=y")
+       (sign_extend:DI
+        (vec_select:SI (match_operand:V2SI 1 "register_operand" "y")
+                       (parallel [(const_int 0)]))))]
+  "TARGET_REALLY_IWMMXT"
+  "wunpckelsw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+;; Shifts
+
+(define_insn "rorv4hi3"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (rotatert:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                      (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrorhg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "rorv2si3"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (rotatert:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                      (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrorwg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "rordi3"
+  [(set (match_operand:DI              0 "register_operand" "=y")
+       (rotatert:DI (match_operand:DI 1 "register_operand" "y")
+                  (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrordg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashrv4hi3"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                      (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrahg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashrv2si3"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                      (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrawg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashrdi3"
+  [(set (match_operand:DI              0 "register_operand" "=y")
+       (ashiftrt:DI (match_operand:DI 1 "register_operand" "y")
+                  (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsradg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "lshrv4hi3"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                      (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrlhg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "lshrv2si3"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                      (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrlwg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "lshrdi3"
+  [(set (match_operand:DI              0 "register_operand" "=y")
+       (lshiftrt:DI (match_operand:DI 1 "register_operand" "y")
+                    (match_operand:SI 2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrldg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashlv4hi3"
+  [(set (match_operand:V4HI              0 "register_operand" "=y")
+        (ashift:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                    (match_operand:SI   2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsllhg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashlv2si3"
+  [(set (match_operand:V2SI              0 "register_operand" "=y")
+        (ashift:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                      (match_operand:SI 2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsllwg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashldi3_iwmmxt"
+  [(set (match_operand:DI            0 "register_operand" "=y")
+       (ashift:DI (match_operand:DI 1 "register_operand" "y")
+                  (match_operand:SI 2 "register_operand" "z")))]
+  "TARGET_REALLY_IWMMXT"
+  "wslldg%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "rorv4hi3_di"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (rotatert:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                      (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrorh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "rorv2si3_di"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (rotatert:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                      (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrorw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "rordi3_di"
+  [(set (match_operand:DI              0 "register_operand" "=y")
+       (rotatert:DI (match_operand:DI 1 "register_operand" "y")
+                  (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wrord%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashrv4hi3_di"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                      (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrah%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashrv2si3_di"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                      (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsraw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashrdi3_di"
+  [(set (match_operand:DI              0 "register_operand" "=y")
+       (ashiftrt:DI (match_operand:DI 1 "register_operand" "y")
+                  (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrad%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "lshrv4hi3_di"
+  [(set (match_operand:V4HI                0 "register_operand" "=y")
+        (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                      (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrlh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "lshrv2si3_di"
+  [(set (match_operand:V2SI                0 "register_operand" "=y")
+        (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                      (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrlw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "lshrdi3_di"
+  [(set (match_operand:DI              0 "register_operand" "=y")
+       (lshiftrt:DI (match_operand:DI 1 "register_operand" "y")
+                    (match_operand:DI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsrld%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashlv4hi3_di"
+  [(set (match_operand:V4HI              0 "register_operand" "=y")
+        (ashift:V4HI (match_operand:V4HI 1 "register_operand" "y")
+                    (match_operand:DI   2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsllh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashlv2si3_di"
+  [(set (match_operand:V2SI              0 "register_operand" "=y")
+        (ashift:V2SI (match_operand:V2SI 1 "register_operand" "y")
+                      (match_operand:DI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wsllw%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "ashldi3_di"
+  [(set (match_operand:DI            0 "register_operand" "=y")
+       (ashift:DI (match_operand:DI 1 "register_operand" "y")
+                  (match_operand:DI 2 "register_operand" "y")))]
+  "TARGET_REALLY_IWMMXT"
+  "wslld%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmadds"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+                     (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMADDS))]
+  "TARGET_REALLY_IWMMXT"
+  "wmadds%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wmaddu"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+                     (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMADDU))]
+  "TARGET_REALLY_IWMMXT"
+  "wmaddu%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmia"
+  [(set (match_operand:DI                    0 "register_operand" "=y")
+       (plus:DI (match_operand:DI           1 "register_operand" "0")
+                (mult:DI (sign_extend:DI
+                          (match_operand:SI 2 "register_operand" "r"))
+                         (sign_extend:DI
+                          (match_operand:SI 3 "register_operand" "r")))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmia%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmiaph"
+  [(set (match_operand:DI          0 "register_operand" "=y")
+       (plus:DI (match_operand:DI 1 "register_operand" "0")
+                (plus:DI
+                 (mult:DI (sign_extend:DI
+                           (truncate:HI (match_operand:SI 2 "register_operand" "r")))
+                          (sign_extend:DI
+                           (truncate:HI (match_operand:SI 3 "register_operand" "r"))))
+                 (mult:DI (sign_extend:DI
+                           (truncate:HI (ashiftrt:SI (match_dup 2) (const_int 16))))
+                          (sign_extend:DI
+                           (truncate:HI (ashiftrt:SI (match_dup 3) (const_int 16))))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiaph%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmiabb"
+  [(set (match_operand:DI          0 "register_operand" "=y")
+       (plus:DI (match_operand:DI 1 "register_operand" "0")
+                (mult:DI (sign_extend:DI
+                          (truncate:HI (match_operand:SI 2 "register_operand" "r")))
+                         (sign_extend:DI
+                          (truncate:HI (match_operand:SI 3 "register_operand" "r"))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiabb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmiatb"
+  [(set (match_operand:DI          0 "register_operand" "=y")
+       (plus:DI (match_operand:DI 1 "register_operand" "0")
+                (mult:DI (sign_extend:DI
+                          (truncate:HI (ashiftrt:SI
+                                        (match_operand:SI 2 "register_operand" "r")
+                                        (const_int 16))))
+                         (sign_extend:DI
+                          (truncate:HI (match_operand:SI 3 "register_operand" "r"))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiatb%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmiabt"
+  [(set (match_operand:DI          0 "register_operand" "=y")
+       (plus:DI (match_operand:DI 1 "register_operand" "0")
+                (mult:DI (sign_extend:DI
+                          (truncate:HI (match_operand:SI 2 "register_operand" "r")))
+                         (sign_extend:DI
+                          (truncate:HI (ashiftrt:SI
+                                        (match_operand:SI 3 "register_operand" "r")
+                                        (const_int 16)))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiabt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmiatt"
+  [(set (match_operand:DI          0 "register_operand" "=y")
+       (plus:DI (match_operand:DI 1 "register_operand" "0")
+                (mult:DI (sign_extend:DI
+                          (truncate:HI (ashiftrt:SI
+                                        (match_operand:SI 2 "register_operand" "r")
+                                        (const_int 16))))
+                         (sign_extend:DI
+                          (truncate:HI (ashiftrt:SI
+                                        (match_operand:SI 3 "register_operand" "r")
+                                        (const_int 16)))))))]
+  "TARGET_REALLY_IWMMXT"
+  "tmiatt%?\\t%0, %2, %3"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tbcstqi"
+  [(set (match_operand:V8QI                   0 "register_operand" "=y")
+       (vec_duplicate:V8QI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_REALLY_IWMMXT"
+  "tbcstb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tbcsthi"
+  [(set (match_operand:V4HI                   0 "register_operand" "=y")
+       (vec_duplicate:V4HI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_REALLY_IWMMXT"
+  "tbcsth%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tbcstsi"
+  [(set (match_operand:V2SI                   0 "register_operand" "=y")
+       (vec_duplicate:V2SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_REALLY_IWMMXT"
+  "tbcstw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmovmskb"
+  [(set (match_operand:SI               0 "register_operand" "=r")
+       (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
+  "TARGET_REALLY_IWMMXT"
+  "tmovmskb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmovmskh"
+  [(set (match_operand:SI               0 "register_operand" "=r")
+       (unspec:SI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
+  "TARGET_REALLY_IWMMXT"
+  "tmovmskh%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmovmskw"
+  [(set (match_operand:SI               0 "register_operand" "=r")
+       (unspec:SI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
+  "TARGET_REALLY_IWMMXT"
+  "tmovmskw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_waccb"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+       (unspec:DI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_WACC))]
+  "TARGET_REALLY_IWMMXT"
+  "waccb%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wacch"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+       (unspec:DI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_WACC))]
+  "TARGET_REALLY_IWMMXT"
+  "wacch%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_waccw"
+  [(set (match_operand:DI               0 "register_operand" "=y")
+       (unspec:DI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_WACC))]
+  "TARGET_REALLY_IWMMXT"
+  "waccw%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_walign"
+  [(set (match_operand:V8QI                           0 "register_operand" "=y,y")
+       (subreg:V8QI (ashiftrt:TI
+                     (subreg:TI (vec_concat:V16QI
+                                 (match_operand:V8QI 1 "register_operand" "y,y")
+                                 (match_operand:V8QI 2 "register_operand" "y,y")) 0)
+                     (mult:SI
+                      (match_operand:SI              3 "nonmemory_operand" "i,z")
+                      (const_int 8))) 0))]
+  "TARGET_REALLY_IWMMXT"
+  "@
+   waligni%?\\t%0, %1, %2, %3
+   walignr%U3%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmrc"
+  [(set (match_operand:SI                      0 "register_operand" "=r")
+       (unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "i")]
+                           VUNSPEC_TMRC))]
+  "TARGET_REALLY_IWMMXT"
+  "tmrc%?\\t%0, %w1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_tmcr"
+  [(unspec_volatile:SI [(match_operand:SI 0 "immediate_operand" "i")
+                       (match_operand:SI 1 "register_operand"  "r")]
+                      VUNSPEC_TMCR)]
+  "TARGET_REALLY_IWMMXT"
+  "tmcr%?\\t%w0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wsadb"
+  [(set (match_operand:V8QI               0 "register_operand" "=y")
+        (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+                     (match_operand:V8QI 2 "register_operand" "y")] UNSPEC_WSAD))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadb%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wsadh"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+                     (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WSAD))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadh%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wsadbz"
+  [(set (match_operand:V8QI               0 "register_operand" "=y")
+        (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
+                     (match_operand:V8QI 2 "register_operand" "y")] UNSPEC_WSADZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadbz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "iwmmxt_wsadhz"
+  [(set (match_operand:V4HI               0 "register_operand" "=y")
+        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
+                     (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WSADZ))]
+  "TARGET_REALLY_IWMMXT"
+  "wsadhz%?\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")])
+
index 03b71a8..f80af2d 100644 (file)
@@ -158,11 +158,12 @@ divisor           .req    r1
 overdone       .req    r2
 result         .req    r2
 curbit         .req    r3
+#if 0
 ip             .req    r12
 sp             .req    r13
 lr             .req    r14
 pc             .req    r15
-
+#endif
 /* ------------------------------------------------------------------------ */
 /*             Bodies of the divsion and modulo routines.                  */
 /* ------------------------------------------------------------------------ */ 
diff --git a/gcc/config/arm/mmintrin.h b/gcc/config/arm/mmintrin.h
new file mode 100644 (file)
index 0000000..4dc1d45
--- /dev/null
@@ -0,0 +1,1257 @@
+/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 2, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to the Free
+   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+   02111-1307, USA.  */
+
+/* As a special exception, if you include this header file into source
+   files compiled by GCC, this header file does not by itself cause
+   the resulting executable to be covered by the GNU General Public
+   License.  This exception does not however invalidate any other
+   reasons why the executable file might be covered by the GNU General
+   Public License.  */
+
+#ifndef _MMINTRIN_H_INCLUDED
+#define _MMINTRIN_H_INCLUDED
+
+/* The data type intended for user use.  */
+typedef unsigned long long __m64, __int64;
+
+/* Internal data types for implementing the intrinsics.  */
+typedef int __v2si __attribute__ ((__mode__ (__V2SI__)));
+typedef int __v4hi __attribute__ ((__mode__ (__V4HI__)));
+typedef int __v8qi __attribute__ ((__mode__ (__V8QI__)));
+
+/* "Convert" __m64 and __int64 into each other.  */
+static __inline __m64 
+_mm_cvtsi64_m64 (__int64 __i)
+{
+  return __i;
+}
+
+static __inline __int64
+_mm_cvtm64_si64 (__m64 __i)
+{
+  return __i;
+}
+
+static __inline int
+_mm_cvtsi64_si32 (__int64 __i)
+{
+  return __i;
+}
+
+static __inline __int64
+_mm_cvtsi32_si64 (int __i)
+{
+  return __i;
+}
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+   the result, and the four 16-bit values from M2 into the upper four 8-bit
+   values of the result, all with signed saturation.  */
+static __inline __m64
+_mm_packs_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackhss ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
+   the result, and the two 32-bit values from M2 into the upper two 16-bit
+   values of the result, all with signed saturation.  */
+static __inline __m64
+_mm_packs_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackwss ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and
+   the 64-bit value from M2 into the upper 32-bits of the result, all with
+   signed saturation for values that do not fit exactly into 32-bits.  */
+static __inline __m64
+_mm_packs_pi64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackdss ((long long)__m1, (long long)__m2);
+}
+
+/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
+   the result, and the four 16-bit values from M2 into the upper four 8-bit
+   values of the result, all with unsigned saturation.  */
+static __inline __m64
+_mm_packs_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackhus ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Pack the two 32-bit values from M1 into the lower two 16-bit values of
+   the result, and the two 32-bit values from M2 into the upper two 16-bit
+   values of the result, all with unsigned saturation.  */
+static __inline __m64
+_mm_packs_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackwus ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and
+   the 64-bit value from M2 into the upper 32-bits of the result, all with
+   unsigned saturation for values that do not fit exactly into 32-bits.  */
+static __inline __m64
+_mm_packs_pu64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wpackdus ((long long)__m1, (long long)__m2);
+}
+
+/* Interleave the four 8-bit values from the high half of M1 with the four
+   8-bit values from the high half of M2.  */
+static __inline __m64
+_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckihb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Interleave the two 16-bit values from the high half of M1 with the two
+   16-bit values from the high half of M2.  */
+static __inline __m64
+_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckihh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Interleave the 32-bit value from the high half of M1 with the 32-bit
+   value from the high half of M2.  */
+static __inline __m64
+_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckihw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Interleave the four 8-bit values from the low half of M1 with the four
+   8-bit values from the low half of M2.  */
+static __inline __m64
+_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckilb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Interleave the two 16-bit values from the low half of M1 with the two
+   16-bit values from the low half of M2.  */
+static __inline __m64
+_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckilh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Interleave the 32-bit value from the low half of M1 with the 32-bit
+   value from the low half of M2.  */
+static __inline __m64
+_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wunpckilw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Take the four 8-bit values from the low half of M1, sign extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pi8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelsb ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the low half of M1, sign extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pi16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelsh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the low half of M1, and return it sign extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackel_pi32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelsw ((__v2si)__m1);
+}
+
+/* Take the four 8-bit values from the high half of M1, sign extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pi8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehsb ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the high half of M1, sign extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pi16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehsh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the high half of M1, and return it sign extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackeh_pi32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehsw ((__v2si)__m1);
+}
+
+/* Take the four 8-bit values from the low half of M1, zero extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pu8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckelub ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the low half of M1, zero extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackel_pu16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckeluh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the low half of M1, and return it zero extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackel_pu32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckeluw ((__v2si)__m1);
+}
+
+/* Take the four 8-bit values from the high half of M1, zero extend them,
+   and return the result as a vector of four 16-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pu8 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehub ((__v8qi)__m1);
+}
+
+/* Take the two 16-bit values from the high half of M1, zero extend them,
+   and return the result as a vector of two 32-bit quantities.  */
+static __inline __m64
+_mm_unpackeh_pu16 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehuh ((__v4hi)__m1);
+}
+
+/* Take the 32-bit value from the high half of M1, and return it zero extended
+  to 64 bits.  */
+static __inline __m64
+_mm_unpackeh_pu32 (__m64 __m1)
+{
+  return (__m64) __builtin_arm_wunpckehuw ((__v2si)__m1);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2.  */
+static __inline __m64
+_mm_add_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2.  */
+static __inline __m64
+_mm_add_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2.  */
+static __inline __m64
+_mm_add_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddbss ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddhss ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2 using signed
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddwss ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddbus ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddhus ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Add the 32-bit values in M1 to the 32-bit values in M2 using unsigned
+   saturated arithmetic.  */
+static __inline __m64
+_mm_adds_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_waddwus ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
+static __inline __m64
+_mm_sub_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
+static __inline __m64
+_mm_sub_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
+static __inline __m64
+_mm_sub_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
+   saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubbss ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+   signed saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubhss ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using
+   signed saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubwss ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
+   unsigned saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubbus ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
+   unsigned saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubhus ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using
+   unsigned saturating arithmetic.  */
+static __inline __m64
+_mm_subs_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wsubwus ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
+   four 32-bit intermediate results, which are then summed by pairs to
+   produce two 32-bit results.  */
+static __inline __m64
+_mm_madd_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmadds ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
+   four 32-bit intermediate results, which are then summed by pairs to
+   produce two 32-bit results.  */
+static __inline __m64
+_mm_madd_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmaddu ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
+   M2 and produce the high 16 bits of the 32-bit results.  */
+static __inline __m64
+_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmulsh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
+   M2 and produce the high 16 bits of the 32-bit results.  */
+static __inline __m64
+_mm_mulhi_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmuluh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
+   the low 16 bits of the results.  */
+static __inline __m64
+_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wmulul ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Shift four 16-bit values in M left by COUNT.  */
+static __inline __m64
+_mm_sll_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsllh ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_slli_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsllhi ((__v4hi)__m, __count);
+}
+
+/* Shift two 32-bit values in M left by COUNT.  */
+static __inline __m64
+_mm_sll_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsllw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_slli_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsllwi ((__v2si)__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT.  */
+static __inline __m64
+_mm_sll_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wslld (__m, __count);
+}
+
+static __inline __m64
+_mm_slli_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wslldi (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
+static __inline __m64
+_mm_sra_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrah ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_srai_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrahi ((__v4hi)__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
+static __inline __m64
+_mm_sra_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsraw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_srai_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrawi ((__v2si)__m, __count);
+}
+
+/* Shift the 64-bit value in M right by COUNT; shift in the sign bit.  */
+static __inline __m64
+_mm_sra_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrad (__m, __count);
+}
+
+static __inline __m64
+_mm_srai_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsradi (__m, __count);
+}
+
+/* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
+static __inline __m64
+_mm_srl_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrlh ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_srli_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrlhi ((__v4hi)__m, __count);
+}
+
+/* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
+static __inline __m64
+_mm_srl_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrlw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_srli_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrlwi ((__v2si)__m, __count);
+}
+
+/* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
+static __inline __m64
+_mm_srl_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wsrld (__m, __count);
+}
+
+static __inline __m64
+_mm_srli_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wsrldi (__m, __count);
+}
+
+/* Rotate four 16-bit values in M right by COUNT.  */
+static __inline __m64
+_mm_ror_pi16 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wrorh ((__v4hi)__m, __count);
+}
+
+static __inline __m64
+_mm_rori_pi16 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wrorhi ((__v4hi)__m, __count);
+}
+
+/* Rotate two 32-bit values in M right by COUNT.  */
+static __inline __m64
+_mm_ror_pi32 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wrorw ((__v2si)__m, __count);
+}
+
+static __inline __m64
+_mm_rori_pi32 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wrorwi ((__v2si)__m, __count);
+}
+
+/* Rotate two 64-bit values in M right by COUNT.  */
+static __inline __m64
+_mm_ror_si64 (__m64 __m, __m64 __count)
+{
+  return (__m64) __builtin_arm_wrord (__m, __count);
+}
+
+static __inline __m64
+_mm_rori_si64 (__m64 __m, int __count)
+{
+  return (__m64) __builtin_arm_wrordi (__m, __count);
+}
+
+/* Bit-wise AND the 64-bit values in M1 and M2.  */
+static __inline __m64
+_mm_and_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wand (__m1, __m2);
+}
+
+/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
+   64-bit value in M2.  */
+static __inline __m64
+_mm_andnot_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wandn (__m1, __m2);
+}
+
+/* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
+static __inline __m64
+_mm_or_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wor (__m1, __m2);
+}
+
+/* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
+static __inline __m64
+_mm_xor_si64 (__m64 __m1, __m64 __m2)
+{
+  return __builtin_arm_wxor (__m1, __m2);
+}
+
+/* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
+   test is true and zero if false.  */
+static __inline __m64
+_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtsb ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pu8 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtub ((__v8qi)__m1, (__v8qi)__m2);
+}
+
+/* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
+   the test is true and zero if false.  */
+static __inline __m64
+_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpeqh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtsh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pu16 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtuh ((__v4hi)__m1, (__v4hi)__m2);
+}
+
+/* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
+   the test is true and zero if false.  */
+static __inline __m64
+_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpeqw ((__v2si)__m1, (__v2si)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtsw ((__v2si)__m1, (__v2si)__m2);
+}
+
+static __inline __m64
+_mm_cmpgt_pu32 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_arm_wcmpgtuw ((__v2si)__m1, (__v2si)__m2);
+}
+
+/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed
+   by accumulate across all elements and __A.  */
+static __inline __m64
+_mm_mac_pu16 (__m64 __A, __m64 __B, __m64 __C)
+{
+  return __builtin_arm_wmacu (__A, (__v4hi)__B, (__v4hi)__C);
+}
+
+/* Element-wise multiplication of signed 16-bit values __B and __C, followed
+   by accumulate across all elements and __A.  */
+static __inline __m64
+_mm_mac_pi16 (__m64 __A, __m64 __B, __m64 __C)
+{
+  return __builtin_arm_wmacs (__A, (__v4hi)__B, (__v4hi)__C);
+}
+
+/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed
+   by accumulate across all elements.  */
+static __inline __m64
+_mm_macz_pu16 (__m64 __A, __m64 __B)
+{
+  return __builtin_arm_wmacuz ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Element-wise multiplication of signed 16-bit values __B and __C, followed
+   by accumulate across all elements.  */
+static __inline __m64
+_mm_macz_pi16 (__m64 __A, __m64 __B)
+{
+  return __builtin_arm_wmacsz ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Accumulate across all unsigned 8-bit values in __A.  */
+static __inline __m64
+_mm_acc_pu8 (__m64 __A)
+{
+  return __builtin_arm_waccb ((__v8qi)__A);
+}
+
+/* Accumulate across all unsigned 16-bit values in __A.  */
+static __inline __m64
+_mm_acc_pu16 (__m64 __A)
+{
+  return __builtin_arm_wacch ((__v4hi)__A);
+}
+
+/* Accumulate across all unsigned 32-bit values in __A.  */
+static __inline __m64
+_mm_acc_pu32 (__m64 __A)
+{
+  return __builtin_arm_waccw ((__v2si)__A);
+}
+
+static __inline __m64
+_mm_mia_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmia (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miaph_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiaph (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miabb_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiabb (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miabt_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiabt (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miatb_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiatb (__A, __B, __C);
+}
+
+static __inline __m64
+_mm_miatt_si64 (__m64 __A, int __B, int __C)
+{
+  return __builtin_arm_tmiatt (__A, __B, __C);
+}
+
+/* Extract one of the elements of A and sign extend.  The selector N must
+   be immediate.  */
+#define _mm_extract_pi8(A, N) __builtin_arm_textrmsb ((__v8qi)(A), (N))
+#define _mm_extract_pi16(A, N) __builtin_arm_textrmsh ((__v4hi)(A), (N))
+#define _mm_extract_pi32(A, N) __builtin_arm_textrmsw ((__v2si)(A), (N))
+
+/* Extract one of the elements of A and zero extend.  The selector N must
+   be immediate.  */
+#define _mm_extract_pu8(A, N) __builtin_arm_textrmub ((__v8qi)(A), (N))
+#define _mm_extract_pu16(A, N) __builtin_arm_textrmuh ((__v4hi)(A), (N))
+#define _mm_extract_pu32(A, N) __builtin_arm_textrmuw ((__v2si)(A), (N))
+
+/* Inserts word D into one of the elements of A.  The selector N must be
+   immediate.  */
+#define _mm_insert_pi8(A, D, N) \
+  ((__m64) __builtin_arm_tinsrb ((__v8qi)(A), (D), (N)))
+#define _mm_insert_pi16(A, D, N) \
+  ((__m64) __builtin_arm_tinsrh ((__v4hi)(A), (D), (N)))
+#define _mm_insert_pi32(A, D, N) \
+  ((__m64) __builtin_arm_tinsrw ((__v2si)(A), (D), (N)))
+
+/* Compute the element-wise maximum of signed 8-bit values.  */
+static __inline __m64
+_mm_max_pi8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxsb ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise maximum of signed 16-bit values.  */
+static __inline __m64
+_mm_max_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxsh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise maximum of signed 32-bit values.  */
+static __inline __m64
+_mm_max_pi32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxsw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Compute the element-wise maximum of unsigned 8-bit values.  */
+static __inline __m64
+_mm_max_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxub ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise maximum of unsigned 16-bit values.  */
+static __inline __m64
+_mm_max_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxuh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise maximum of unsigned 32-bit values.  */
+static __inline __m64
+_mm_max_pu32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wmaxuw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values.  */
+static __inline __m64
+_mm_min_pi8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminsb ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise minimum of signed 16-bit values.  */
+static __inline __m64
+_mm_min_pi16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminsh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise minimum of signed 32-bit values.  */
+static __inline __m64
+_mm_min_pi32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminsw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Compute the element-wise minimum of unsigned 16-bit values.  */
+static __inline __m64
+_mm_min_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminub ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the element-wise minimum of unsigned 16-bit values.  */
+static __inline __m64
+_mm_min_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminuh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the element-wise minimum of unsigned 32-bit values.  */
+static __inline __m64
+_mm_min_pu32 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wminuw ((__v2si)__A, (__v2si)__B);
+}
+
+/* Create an 8-bit mask of the signs of 8-bit values.  */
+static __inline int
+_mm_movemask_pi8 (__m64 __A)
+{
+  return __builtin_arm_tmovmskb ((__v8qi)__A);
+}
+
+/* Create an 8-bit mask of the signs of 16-bit values.  */
+static __inline int
+_mm_movemask_pi16 (__m64 __A)
+{
+  return __builtin_arm_tmovmskh ((__v4hi)__A);
+}
+
+/* Create an 8-bit mask of the signs of 32-bit values.  */
+static __inline int
+_mm_movemask_pi32 (__m64 __A)
+{
+  return __builtin_arm_tmovmskw ((__v2si)__A);
+}
+
+/* Return a combination of the four 16-bit values in A.  The selector
+   must be an immediate.  */
+#define _mm_shuffle_pi16(A, N) \
+  ((__m64) __builtin_arm_wshufh ((__v4hi)(A), (N)))
+
+
+/* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
+static __inline __m64
+_mm_avg_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2br ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
+static __inline __m64
+_mm_avg_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2hr ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the averages of the unsigned 8-bit values in A and B.  */
+static __inline __m64
+_mm_avg2_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2b ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the averages of the unsigned 16-bit values in A and B.  */
+static __inline __m64
+_mm_avg2_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wavg2h ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+   values in A and B.  Return the value in the lower 16-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sad_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadb ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 16-bit
+   values in A and B.  Return the value in the lower 32-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sad_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadh ((__v4hi)__A, (__v4hi)__B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 8-bit
+   values in A and B.  Return the value in the lower 16-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sadz_pu8 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B);
+}
+
+/* Compute the sum of the absolute differences of the unsigned 16-bit
+   values in A and B.  Return the value in the lower 32-bit word; the
+   upper words are cleared.  */
+static __inline __m64
+_mm_sadz_pu16 (__m64 __A, __m64 __B)
+{
+  return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B);
+}
+
+static __inline __m64
+_mm_align_si64 (__m64 __A, __m64 __B, int __C)
+{
+  return (__m64) __builtin_arm_walign ((__v8qi)__A, (__v8qi)__B, __C);
+}
+
+/* Creates a 64-bit zero.  */
+static __inline __m64
+_mm_setzero_si64 (void)
+{
+  return __builtin_arm_wzero ();
+}
+
+/* Set and Get arbitrary iWMMXt Control registers.
+   Note only registers 0-3 and 8-11 are currently defined,
+   the rest are reserved.  */
+
+static __inline void
+_mm_setwcx (const int __regno, const int __value)
+{
+  switch (__regno)
+    {
+    case 0:  __builtin_arm_setwcx (0, __value); break;
+    case 1:  __builtin_arm_setwcx (1, __value); break;
+    case 2:  __builtin_arm_setwcx (2, __value); break;
+    case 3:  __builtin_arm_setwcx (3, __value); break;
+    case 8:  __builtin_arm_setwcx (8, __value); break;
+    case 9:  __builtin_arm_setwcx (9, __value); break;
+    case 10: __builtin_arm_setwcx (10, __value); break;
+    case 11: __builtin_arm_setwcx (11, __value); break;
+    default: break;
+    }
+}
+
+static __inline int
+_mm_getwcx (const int __regno)
+{
+  switch (__regno)
+    {
+    case 0:  return __builtin_arm_getwcx (0);
+    case 1:  return __builtin_arm_getwcx (1);
+    case 2:  return __builtin_arm_getwcx (2);
+    case 3:  return __builtin_arm_getwcx (3);
+    case 8:  return __builtin_arm_getwcx (8);
+    case 9:  return __builtin_arm_getwcx (9);
+    case 10: return __builtin_arm_getwcx (10);
+    case 11: return __builtin_arm_getwcx (11);
+    default: return 0;
+    }
+}
+
+/* Creates a vector of two 32-bit values; I0 is least significant.  */
+static __inline __m64
+_mm_set_pi32 (int __i1, int __i0)
+{
+  union {
+    __m64 __q;
+    struct {
+      unsigned int __i0;
+      unsigned int __i1;
+    } __s;
+  } __u;
+
+  __u.__s.__i0 = __i0;
+  __u.__s.__i1 = __i1;
+
+  return __u.__q;
+}
+
+/* Creates a vector of four 16-bit values; W0 is least significant.  */
+static __inline __m64
+_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
+{
+  unsigned int __i1 = (unsigned short)__w3 << 16 | (unsigned short)__w2;
+  unsigned int __i0 = (unsigned short)__w1 << 16 | (unsigned short)__w0;
+  return _mm_set_pi32 (__i1, __i0);
+                      
+}
+
+/* Creates a vector of eight 8-bit values; B0 is least significant.  */
+static __inline __m64
+_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
+            char __b3, char __b2, char __b1, char __b0)
+{
+  unsigned int __i1, __i0;
+
+  __i1 = (unsigned char)__b7;
+  __i1 = __i1 << 8 | (unsigned char)__b6;
+  __i1 = __i1 << 8 | (unsigned char)__b5;
+  __i1 = __i1 << 8 | (unsigned char)__b4;
+
+  __i0 = (unsigned char)__b3;
+  __i0 = __i0 << 8 | (unsigned char)__b2;
+  __i0 = __i0 << 8 | (unsigned char)__b1;
+  __i0 = __i0 << 8 | (unsigned char)__b0;
+
+  return _mm_set_pi32 (__i1, __i0);
+}
+
+/* Similar, but with the arguments in reverse order.  */
+static __inline __m64
+_mm_setr_pi32 (int __i0, int __i1)
+{
+  return _mm_set_pi32 (__i1, __i0);
+}
+
+static __inline __m64
+_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
+{
+  return _mm_set_pi16 (__w3, __w2, __w1, __w0);
+}
+
+static __inline __m64
+_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
+             char __b4, char __b5, char __b6, char __b7)
+{
+  return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
+}
+
+/* Creates a vector of two 32-bit values, both elements containing I.  */
+static __inline __m64
+_mm_set1_pi32 (int __i)
+{
+  return _mm_set_pi32 (__i, __i);
+}
+
+/* Creates a vector of four 16-bit values, all elements containing W.  */
+static __inline __m64
+_mm_set1_pi16 (short __w)
+{
+  unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w;
+  return _mm_set1_pi32 (__i);
+}
+
+/* Creates a vector of four 16-bit values, all elements containing B.  */
+static __inline __m64
+_mm_set1_pi8 (char __b)
+{
+  unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b;
+  unsigned int __i = __w << 16 | __w;
+  return _mm_set1_pi32 (__i);
+}
+
+/* Convert an integer to a __m64 object.  */
+static __inline __m64
+_m_from_int (int __a)
+{
+  return (__m64)__a;
+}
+
+#define _m_packsswb _mm_packs_pi16
+#define _m_packssdw _mm_packs_pi32
+#define _m_packuswb _mm_packs_pu16
+#define _m_packusdw _mm_packs_pu32
+#define _m_packssqd _mm_packs_pi64
+#define _m_packusqd _mm_packs_pu64
+#define _mm_packs_si64 _mm_packs_pi64
+#define _mm_packs_su64 _mm_packs_pu64
+#define _m_punpckhbw _mm_unpackhi_pi8
+#define _m_punpckhwd _mm_unpackhi_pi16
+#define _m_punpckhdq _mm_unpackhi_pi32
+#define _m_punpcklbw _mm_unpacklo_pi8
+#define _m_punpcklwd _mm_unpacklo_pi16
+#define _m_punpckldq _mm_unpacklo_pi32
+#define _m_punpckehsbw _mm_unpackeh_pi8
+#define _m_punpckehswd _mm_unpackeh_pi16
+#define _m_punpckehsdq _mm_unpackeh_pi32
+#define _m_punpckehubw _mm_unpackeh_pu8
+#define _m_punpckehuwd _mm_unpackeh_pu16
+#define _m_punpckehudq _mm_unpackeh_pu32
+#define _m_punpckelsbw _mm_unpackel_pi8
+#define _m_punpckelswd _mm_unpackel_pi16
+#define _m_punpckelsdq _mm_unpackel_pi32
+#define _m_punpckelubw _mm_unpackel_pu8
+#define _m_punpckeluwd _mm_unpackel_pu16
+#define _m_punpckeludq _mm_unpackel_pu32
+#define _m_paddb _mm_add_pi8
+#define _m_paddw _mm_add_pi16
+#define _m_paddd _mm_add_pi32
+#define _m_paddsb _mm_adds_pi8
+#define _m_paddsw _mm_adds_pi16
+#define _m_paddsd _mm_adds_pi32
+#define _m_paddusb _mm_adds_pu8
+#define _m_paddusw _mm_adds_pu16
+#define _m_paddusd _mm_adds_pu32
+#define _m_psubb _mm_sub_pi8
+#define _m_psubw _mm_sub_pi16
+#define _m_psubd _mm_sub_pi32
+#define _m_psubsb _mm_subs_pi8
+#define _m_psubsw _mm_subs_pi16
+#define _m_psubuw _mm_subs_pi32
+#define _m_psubusb _mm_subs_pu8
+#define _m_psubusw _mm_subs_pu16
+#define _m_psubusd _mm_subs_pu32
+#define _m_pmaddwd _mm_madd_pi16
+#define _m_pmadduwd _mm_madd_pu16
+#define _m_pmulhw _mm_mulhi_pi16
+#define _m_pmulhuw _mm_mulhi_pu16
+#define _m_pmullw _mm_mullo_pi16
+#define _m_pmacsw _mm_mac_pi16
+#define _m_pmacuw _mm_mac_pu16
+#define _m_pmacszw _mm_macz_pi16
+#define _m_pmacuzw _mm_macz_pu16
+#define _m_paccb _mm_acc_pu8
+#define _m_paccw _mm_acc_pu16
+#define _m_paccd _mm_acc_pu32
+#define _m_pmia _mm_mia_si64
+#define _m_pmiaph _mm_miaph_si64
+#define _m_pmiabb _mm_miabb_si64
+#define _m_pmiabt _mm_miabt_si64
+#define _m_pmiatb _mm_miatb_si64
+#define _m_pmiatt _mm_miatt_si64
+#define _m_psllw _mm_sll_pi16
+#define _m_psllwi _mm_slli_pi16
+#define _m_pslld _mm_sll_pi32
+#define _m_pslldi _mm_slli_pi32
+#define _m_psllq _mm_sll_si64
+#define _m_psllqi _mm_slli_si64
+#define _m_psraw _mm_sra_pi16
+#define _m_psrawi _mm_srai_pi16
+#define _m_psrad _mm_sra_pi32
+#define _m_psradi _mm_srai_pi32
+#define _m_psraq _mm_sra_si64
+#define _m_psraqi _mm_srai_si64
+#define _m_psrlw _mm_srl_pi16
+#define _m_psrlwi _mm_srli_pi16
+#define _m_psrld _mm_srl_pi32
+#define _m_psrldi _mm_srli_pi32
+#define _m_psrlq _mm_srl_si64
+#define _m_psrlqi _mm_srli_si64
+#define _m_prorw _mm_ror_pi16
+#define _m_prorwi _mm_rori_pi16
+#define _m_prord _mm_ror_pi32
+#define _m_prordi _mm_rori_pi32
+#define _m_prorq _mm_ror_si64
+#define _m_prorqi _mm_rori_si64
+#define _m_pand _mm_and_si64
+#define _m_pandn _mm_andnot_si64
+#define _m_por _mm_or_si64
+#define _m_pxor _mm_xor_si64
+#define _m_pcmpeqb _mm_cmpeq_pi8
+#define _m_pcmpeqw _mm_cmpeq_pi16
+#define _m_pcmpeqd _mm_cmpeq_pi32
+#define _m_pcmpgtb _mm_cmpgt_pi8
+#define _m_pcmpgtub _mm_cmpgt_pu8
+#define _m_pcmpgtw _mm_cmpgt_pi16
+#define _m_pcmpgtuw _mm_cmpgt_pu16
+#define _m_pcmpgtd _mm_cmpgt_pi32
+#define _m_pcmpgtud _mm_cmpgt_pu32
+#define _m_pextrb _mm_extract_pi8
+#define _m_pextrw _mm_extract_pi16
+#define _m_pextrd _mm_extract_pi32
+#define _m_pextrub _mm_extract_pu8
+#define _m_pextruw _mm_extract_pu16
+#define _m_pextrud _mm_extract_pu32
+#define _m_pinsrb _mm_insert_pi8
+#define _m_pinsrw _mm_insert_pi16
+#define _m_pinsrd _mm_insert_pi32
+#define _m_pmaxsb _mm_max_pi8
+#define _m_pmaxsw _mm_max_pi16
+#define _m_pmaxsd _mm_max_pi32
+#define _m_pmaxub _mm_max_pu8
+#define _m_pmaxuw _mm_max_pu16
+#define _m_pmaxud _mm_max_pu32
+#define _m_pminsb _mm_min_pi8
+#define _m_pminsw _mm_min_pi16
+#define _m_pminsd _mm_min_pi32
+#define _m_pminub _mm_min_pu8
+#define _m_pminuw _mm_min_pu16
+#define _m_pminud _mm_min_pu32
+#define _m_pmovmskb _mm_movemask_pi8
+#define _m_pmovmskw _mm_movemask_pi16
+#define _m_pmovmskd _mm_movemask_pi32
+#define _m_pshufw _mm_shuffle_pi16
+#define _m_pavgb _mm_avg_pu8
+#define _m_pavgw _mm_avg_pu16
+#define _m_pavg2b _mm_avg2_pu8
+#define _m_pavg2w _mm_avg2_pu16
+#define _m_psadbw _mm_sad_pu8
+#define _m_psadwd _mm_sad_pu16
+#define _m_psadzbw _mm_sadz_pu8
+#define _m_psadzwd _mm_sadz_pu16
+#define _m_paligniq _mm_align_si64
+#define _m_cvt_si2pi _mm_cvtsi64_m64
+#define _m_cvt_pi2si _mm_cvtm64_si64
+
+#endif /* _MMINTRIN_H_INCLUDED */
index 0efc1d5..b72c21c 100644 (file)
@@ -37,6 +37,10 @@ MULTILIB_EXCEPTIONS += *mhard-float/*mthumb*
 
 MULTILIB_REDUNDANT_DIRS = interwork/thumb=thumb
 
+MULTILIB_OPTIONS    += mcpu=iwmmxt
+MULTILIB_DIRNAMES   += iwmmxt
+MULTILIB_REDUNDANT_DIRS += interwork/thumb/iwmmxt=thumb
+
 EXTRA_MULTILIB_PARTS = crtbegin.o crtend.o crti.o crtn.o
 
 LIBGCC               = stmp-multilib
index 7ff3b2a..a8ea9a7 100644 (file)
@@ -6338,7 +6338,7 @@ assembly code.  Permissible names are: @samp{arm2}, @samp{arm250},
 @samp{strongarm}, @samp{strongarm110}, @samp{strongarm1100},
 @samp{arm8}, @samp{arm810}, @samp{arm9}, @samp{arm9e}, @samp{arm920},
 @samp{arm920t}, @samp{arm940t}, @samp{arm9tdmi}, @samp{arm10tdmi},
-@samp{arm1020t}, @samp{xscale}, @samp{ep9312}.
+@samp{arm1020t}, @samp{xscale}, @samp{iwmmxt}, @samp{ep9312}.
 
 @itemx -mtune=@var{name}
 @opindex mtune
@@ -6358,7 +6358,7 @@ name to determine what kind of instructions it can emit when generating
 assembly code.  This option can be used in conjunction with or instead
 of the @option{-mcpu=} option.  Permissible names are: @samp{armv2},
 @samp{armv2a}, @samp{armv3}, @samp{armv3m}, @samp{armv4}, @samp{armv4t},
-@samp{armv5}, @samp{armv5t}, @samp{armv5te}, @samp{ep9312}.
+@samp{armv5}, @samp{armv5t}, @samp{armv5te}, @samp{iwmmxt}, @samp{ep9312}.
 
 @item -mfpe=@var{number}
 @itemx -mfp=@var{number}