OSDN Git Service

* config/i386/sse.md (vec_interleave_highv16qi,
authorrth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>
Fri, 27 Nov 2009 15:58:50 +0000 (15:58 +0000)
committerrth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>
Fri, 27 Nov 2009 15:58:50 +0000 (15:58 +0000)
vec_interleave_lowv16qi, vec_interleave_highv8hi,
vec_interleave_lowv8hi, vec_interleave_highv4si,
vec_interleave_lowv4si, vec_interleave_highv2di,
vec_interleave_lowv2di, vec_interleave_highv4sf,
vec_interleave_lowv4sf, vec_interleave_highv2df,
vec_interleave_lowv2df): Remove expanders.
(sse_unpcklps, sse_unpckhps, sse2_unpckhpd_exp, sse2_unpcklpd_exp,
sse2_punpckhqdq, sse2_punpcklqdq, sse2_punpckhbw, sse2_punpcklbw,
sse2_punpckhwd, sse2_punpcklwd, sse2_punpckhdq, sse2_punpckldq):
Rename to their proper vec_interleave equivalent.  Update all users
to the new name.
* config/i386/i386.c, config/i386/i386.md: Update all users.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@154711 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/i386.md
gcc/config/i386/sse.md

index 8165df5..c38e970 100644 (file)
@@ -1,3 +1,19 @@
+2009-11-27  Richard Henderson  <rth@redhat.com>
+
+       * config/i386/sse.md (vec_interleave_highv16qi,
+       vec_interleave_lowv16qi, vec_interleave_highv8hi,
+       vec_interleave_lowv8hi, vec_interleave_highv4si,
+       vec_interleave_lowv4si, vec_interleave_highv2di,
+       vec_interleave_lowv2di, vec_interleave_highv4sf,
+       vec_interleave_lowv4sf, vec_interleave_highv2df,
+       vec_interleave_lowv2df): Remove expanders.
+       (sse_unpcklps, sse_unpckhps, sse2_unpckhpd_exp, sse2_unpcklpd_exp,
+       sse2_punpckhqdq, sse2_punpcklqdq, sse2_punpckhbw, sse2_punpcklbw,
+       sse2_punpckhwd, sse2_punpcklwd, sse2_punpckhdq, sse2_punpckldq):
+       Rename to their proper vec_interleave equivalent.  Update all users
+       to the new name.
+       * config/i386/i386.c, config/i386/i386.md: Update all users.
+
 2009-11-27  Michael Matz  <matz@suse.de>
 
        PR rtl-optimization/42084
index 6cbc2dc..443b4eb 100644 (file)
@@ -13944,7 +13944,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
   exponents = validize_mem (force_const_mem (V4SImode, x));
 
   /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
-  emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
+  emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
 
   /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
      yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
@@ -13970,7 +13970,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
   else
     {
       x = copy_to_mode_reg (V2DFmode, fp_xmm);
-      emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
+      emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
       emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
     }
 
@@ -21690,8 +21690,8 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
-  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
-  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+  { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
 
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
@@ -21799,8 +21799,8 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3,  "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
 
@@ -21845,14 +21845,14 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
 
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI  },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN,  (int) V4SI_FTYPE_V4SI_V4SI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI  },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN,  (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
@@ -26483,6 +26483,7 @@ x86_emit_floatuns (rtx operands[2])
 \f
 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
    with all elements equal to VAR.  Return true if successful.  */
+/* ??? Call into the vec_perm support to implement the broadcast.  */
 
 static bool
 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
@@ -26552,7 +26553,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
          tmp1 = gen_reg_rtx (V8HImode);
          emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
          /* Duplicate the low short through the whole low SImode word.  */
-         emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
+         emit_insn (gen_vec_interleave_lowv8hi (tmp1, tmp1, tmp1));
          /* Cast the V8HImode vector back to a V4SImode vector.  */
          tmp2 = gen_reg_rtx (V4SImode);
          emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
@@ -26584,8 +26585,8 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
          tmp1 = gen_reg_rtx (V16QImode);
          emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
          /* Duplicate the low byte through the whole low SImode word.  */
-         emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
-         emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
+         emit_insn (gen_vec_interleave_lowv16qi (tmp1, tmp1, tmp1));
+         emit_insn (gen_vec_interleave_lowv16qi (tmp1, tmp1, tmp1));
          /* Cast the V16QImode vector back to a V4SImode vector.  */
          tmp2 = gen_reg_rtx (V4SImode);
          emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
@@ -27417,7 +27418,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
          /* tmp = target = A B C D */
          tmp = copy_to_reg (target);
          /* target = A A B B */
-         emit_insn (gen_sse_unpcklps (target, target, target));
+         emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
          /* target = X A B B */
          ix86_expand_vector_set (false, target, val, 0);
          /* target = A X C D  */
@@ -27627,7 +27628,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
 
        case 2:
          tmp = gen_reg_rtx (mode);
-         emit_insn (gen_sse_unpckhps (tmp, vec, vec));
+         emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
          break;
 
        default:
@@ -27661,7 +27662,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
 
            case 2:
              tmp = gen_reg_rtx (mode);
-             emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
+             emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
              break;
 
            default:
@@ -29730,14 +29731,15 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
             with interleave. */
          t1 = gen_reg_rtx (V8HImode);
          t2 = gen_reg_rtx (V8HImode);
-         emit_insn (gen_sse2_punpckhwd (t1, d->op0, d->op1));
-         emit_insn (gen_sse2_punpcklwd (d->target, d->op0, d->op1));
-         emit_insn (gen_sse2_punpckhwd (t2, d->target, t1));
-         emit_insn (gen_sse2_punpcklwd (d->target, d->target, t1));
+         emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
+         emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
+         emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
+         emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
          if (odd)
-           emit_insn (gen_sse2_punpckhwd (d->target, d->target, t2));
+           t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
          else
-           emit_insn (gen_sse2_punpcklwd (d->target, d->target, t2));
+           t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
+         emit_insn (t3);
        }
       break;
 
@@ -29749,16 +29751,17 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
          t1 = gen_reg_rtx (V16QImode);
          t2 = gen_reg_rtx (V16QImode);
          t3 = gen_reg_rtx (V16QImode);
-         emit_insn (gen_sse2_punpckhbw (t1, d->op0, d->op1));
-         emit_insn (gen_sse2_punpcklbw (d->target, d->op0, d->op1));
-         emit_insn (gen_sse2_punpckhbw (t2, d->target, t1));
-         emit_insn (gen_sse2_punpcklbw (d->target, d->target, t1));
-         emit_insn (gen_sse2_punpckhbw (t3, d->target, t2));
-         emit_insn (gen_sse2_punpcklbw (d->target, d->target, t2));
+         emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
+         emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
+         emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
+         emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
+         emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
+         emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
          if (odd)
-           emit_insn (gen_sse2_punpckhbw (d->target, d->target, t3));
+           t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
          else
-           emit_insn (gen_sse2_punpcklbw (d->target, d->target, t3));
+           t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
+         emit_insn (t3);
        }
       break;
 
index 6059dd4..d401f92 100644 (file)
        }
       else
        operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-      emit_insn (gen_sse_unpcklps (operands[3], operands[3], operands[3]));
+      emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
+                                            operands[3]));
     }
   else
     emit_insn (gen_vec_setv4sf_0 (operands[3],
                              gen_rtx_SUBREG (SImode, operands[1], 0)));
   emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
                              gen_rtx_SUBREG (SImode, operands[1], 4)));
-  emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4]));
+  emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
+                                        operands[4]));
 
   operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
 })
index 12c5b17..27c7a8b 100644 (file)
                                  gen_rtx_SUBREG (SImode, operands[1], 0)));
       emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
                                  gen_rtx_SUBREG (SImode, operands[1], 4)));
-      emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
+      emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
+                                            operands[2]));
     }
  else if (memory_operand (operands[1], DImode))
-      emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
+   emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
+                                 operands[1], const0_rtx));
  else
-      gcc_unreachable ();
+   gcc_unreachable ();
 })
 
 (define_split
 
   emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
   emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
-  emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
-                                 gen_lowpart (V2DImode, r1),
-                                 gen_lowpart (V2DImode, r2)));
+  emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
+                                        gen_lowpart (V2DImode, r1),
+                                        gen_lowpart (V2DImode, r2)));
   DONE;
 })
 
 
   emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
   emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
-  emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
-                                 gen_lowpart (V2DImode, r1),
-                                 gen_lowpart (V2DImode, r2)));
+  emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
+                                        gen_lowpart (V2DImode, r1),
+                                        gen_lowpart (V2DImode, r2)));
   DONE;
 })
 
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V4SF,V2SF,V2SF")])
 
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
 (define_insn "avx_unpckhps256"
   [(set (match_operand:V8SF 0 "register_operand" "=x")
        (vec_select:V8SF
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "*avx_unpckhps"
+(define_insn "*avx_interleave_highv4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
        (vec_select:V4SF
          (vec_concat:V8SF
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4SF")])
 
-(define_insn "sse_unpckhps"
+(define_insn "vec_interleave_highv4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
        (vec_select:V4SF
          (vec_concat:V8SF
   [(set_attr "type" "sselog")
    (set_attr "mode" "V4SF")])
 
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
 (define_insn "avx_unpcklps256"
   [(set (match_operand:V8SF 0 "register_operand" "=x")
        (vec_select:V8SF
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "*avx_unpcklps"
+(define_insn "*avx_interleave_lowv4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
        (vec_select:V4SF
          (vec_concat:V8SF
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4SF")])
 
-(define_insn "sse_unpcklps"
+(define_insn "vec_interleave_lowv4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
        (vec_select:V4SF
          (vec_concat:V8SF
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
 (define_insn "avx_unpckhpd256"
   [(set (match_operand:V4DF 0 "register_operand" "=x")
        (vec_select:V4DF
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4DF")])
 
-(define_expand "sse2_unpckhpd_exp"
+(define_expand "vec_interleave_highv2df"
   [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
        (vec_select:V2DF
          (vec_concat:V4DF
   "TARGET_SSE2"
   "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
 
-(define_insn "*avx_unpckhpd"
+(define_insn "*avx_interleave_highv2df"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,m")
        (vec_select:V2DF
          (vec_concat:V4DF
    (set_attr "prefix" "vex")
    (set_attr "mode" "V2DF,V1DF,V1DF")])
 
-(define_insn "sse2_unpckhpd"
+(define_insn "*sse2_interleave_highv2df"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,m")
        (vec_select:V2DF
          (vec_concat:V4DF
   DONE;
 })
 
+;; Recall that the 256-bit unpck insns only shuffle within their lanes.
 (define_insn "avx_unpcklpd256"
   [(set (match_operand:V4DF 0 "register_operand" "=x")
        (vec_select:V4DF
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4DF")])
 
-(define_expand "sse2_unpcklpd_exp"
+(define_expand "vec_interleave_lowv2df"
   [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
        (vec_select:V2DF
          (vec_concat:V4DF
   "TARGET_SSE2"
   "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
 
-(define_insn "*avx_unpcklpd"
+(define_insn "*avx_interleave_lowv2df"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o")
        (vec_select:V2DF
          (vec_concat:V4DF
    (set_attr "prefix" "vex")
    (set_attr "mode" "V2DF,V1DF,V1DF")])
 
-(define_insn "sse2_unpcklpd"
+(define_insn "*sse2_interleave_lowv2df"
   [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o")
        (vec_select:V2DF
          (vec_concat:V4DF
 })
 
 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
-(define_insn "*avx_punpckhqdq"
+(define_insn "*avx_interleave_highv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
        (vec_select:V2DI
          (vec_concat:V4DI
    (set_attr "prefix" "vex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_punpckhqdq"
+(define_insn "vec_interleave_highv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
        (vec_select:V2DI
          (vec_concat:V4DI
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx_punpcklqdq"
+(define_insn "*avx_interleave_lowv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
        (vec_select:V2DI
          (vec_concat:V4DI
    (set_attr "prefix" "vex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_punpcklqdq"
+(define_insn "vec_interleave_lowv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
        (vec_select:V2DI
          (vec_concat:V4DI
      each word.  We don't care what goes into the high byte of each word.
      Rather than trying to get zero in there, most convenient is to let
      it be a copy of the low byte.  */
-  emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
-  emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
-  emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
-  emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
+  emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
+  emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
+  emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
+  emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
 
   /* Multiply words.  The end-of-line annotations here give a picture of what
      the output of that instruction looks like.  Dot means don't care; the
                                const0_rtx, const0_rtx));
 
   /* Merge the parts back together.  */
-  emit_insn (gen_sse2_punpckldq (op0, t5, t6));
+  emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
   DONE;
 })
 
   DONE;
 })
 
-(define_expand "vec_interleave_highv16qi"
-  [(set (match_operand:V16QI 0 "register_operand" "")
-        (vec_select:V16QI
-          (vec_concat:V32QI
-            (match_operand:V16QI 1 "register_operand" "")
-            (match_operand:V16QI 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 8)  (const_int 24)
-                     (const_int 9)  (const_int 25)
-                     (const_int 10) (const_int 26)
-                     (const_int 11) (const_int 27)
-                     (const_int 12) (const_int 28)
-                     (const_int 13) (const_int 29)
-                     (const_int 14) (const_int 30)
-                     (const_int 15) (const_int 31)])))]
-  "TARGET_SSE2"
-{
-  emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "vec_interleave_lowv16qi"
-  [(set (match_operand:V16QI 0 "register_operand" "")
-        (vec_select:V16QI
-          (vec_concat:V32QI
-            (match_operand:V16QI 1 "register_operand" "")
-            (match_operand:V16QI 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 0) (const_int 16)
-                     (const_int 1) (const_int 17)
-                     (const_int 2) (const_int 18)
-                     (const_int 3) (const_int 19)
-                     (const_int 4) (const_int 20)
-                     (const_int 5) (const_int 21)
-                     (const_int 6) (const_int 22)
-                     (const_int 7) (const_int 23)])))]
-  "TARGET_SSE2"
-{
-  emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "vec_interleave_highv8hi"
-  [(set (match_operand:V8HI 0 "register_operand" "=")
-        (vec_select:V8HI
-          (vec_concat:V16HI
-            (match_operand:V8HI 1 "register_operand" "")
-            (match_operand:V8HI 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 4) (const_int 12)
-                     (const_int 5) (const_int 13)
-                     (const_int 6) (const_int 14)
-                     (const_int 7) (const_int 15)])))]
-  "TARGET_SSE2"
-{
-  emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "vec_interleave_lowv8hi"
-  [(set (match_operand:V8HI 0 "register_operand" "")
-        (vec_select:V8HI
-          (vec_concat:V16HI
-            (match_operand:V8HI 1 "register_operand" "")
-            (match_operand:V8HI 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 0) (const_int 8)
-                     (const_int 1) (const_int 9)
-                     (const_int 2) (const_int 10)
-                     (const_int 3) (const_int 11)])))]
-  "TARGET_SSE2"
-{
-  emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "vec_interleave_highv4si"
-  [(set (match_operand:V4SI 0 "register_operand" "")
-        (vec_select:V4SI
-          (vec_concat:V8SI
-            (match_operand:V4SI 1 "register_operand" "")
-            (match_operand:V4SI 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 2) (const_int 6)
-                     (const_int 3) (const_int 7)])))]
-  "TARGET_SSE2"
-{
-  emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "vec_interleave_lowv4si"
-  [(set (match_operand:V4SI 0 "register_operand" "")
-        (vec_select:V4SI
-          (vec_concat:V8SI
-            (match_operand:V4SI 1 "register_operand" "")
-            (match_operand:V4SI 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 0) (const_int 4)
-                     (const_int 1) (const_int 5)])))]
-  "TARGET_SSE2"
-{
-  emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "vec_interleave_highv2di"
-  [(set (match_operand:V2DI 0 "register_operand" "")
-        (vec_select:V2DI
-          (vec_concat:V4DI
-            (match_operand:V2DI 1 "register_operand" "")
-            (match_operand:V2DI 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 1)
-                     (const_int 3)])))]
-  "TARGET_SSE2"
-{
-  emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "vec_interleave_lowv2di"
-  [(set (match_operand:V2DI 0 "register_operand" "")
-        (vec_select:V2DI
-          (vec_concat:V4DI
-            (match_operand:V2DI 1 "register_operand" "")
-            (match_operand:V2DI 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 0)
-                     (const_int 2)])))]
-  "TARGET_SSE2"
-{
-  emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "vec_interleave_highv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-        (vec_select:V4SF
-          (vec_concat:V8SF
-            (match_operand:V4SF 1 "register_operand" "")
-            (match_operand:V4SF 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 2) (const_int 6)
-                     (const_int 3) (const_int 7)])))]
-  "TARGET_SSE")
-
-(define_expand "vec_interleave_lowv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-        (vec_select:V4SF
-          (vec_concat:V8SF
-            (match_operand:V4SF 1 "register_operand" "")
-            (match_operand:V4SF 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 0) (const_int 4)
-                     (const_int 1) (const_int 5)])))]
-  "TARGET_SSE")
-
-(define_expand "vec_interleave_highv2df"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-        (vec_select:V2DF
-          (vec_concat:V4DF
-            (match_operand:V2DF 1 "register_operand" "")
-            (match_operand:V2DF 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 1)
-                     (const_int 3)])))]
-  "TARGET_SSE2")
-
-(define_expand "vec_interleave_lowv2df"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-        (vec_select:V2DF
-          (vec_concat:V4DF
-            (match_operand:V2DF 1 "register_operand" "")
-            (match_operand:V2DF 2 "nonimmediate_operand" ""))
-          (parallel [(const_int 0)
-                     (const_int 2)])))]
-  "TARGET_SSE2")
-
 (define_insn "*avx_packsswb"
   [(set (match_operand:V16QI 0 "register_operand" "=x")
        (vec_concat:V16QI
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx_punpckhbw"
+(define_insn "*avx_interleave_highv16qi"
   [(set (match_operand:V16QI 0 "register_operand" "=x")
        (vec_select:V16QI
          (vec_concat:V32QI
    (set_attr "prefix" "vex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_punpckhbw"
+(define_insn "vec_interleave_highv16qi"
   [(set (match_operand:V16QI 0 "register_operand" "=x")
        (vec_select:V16QI
          (vec_concat:V32QI
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx_punpcklbw"
+(define_insn "*avx_interleave_lowv16qi"
   [(set (match_operand:V16QI 0 "register_operand" "=x")
        (vec_select:V16QI
          (vec_concat:V32QI
    (set_attr "prefix" "vex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_punpcklbw"
+(define_insn "vec_interleave_lowv16qi"
   [(set (match_operand:V16QI 0 "register_operand" "=x")
        (vec_select:V16QI
          (vec_concat:V32QI
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx_punpckhwd"
+(define_insn "*avx_interleave_highv8hi"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
        (vec_select:V8HI
          (vec_concat:V16HI
    (set_attr "prefix" "vex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_punpckhwd"
+(define_insn "vec_interleave_highv8hi"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
        (vec_select:V8HI
          (vec_concat:V16HI
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx_punpcklwd"
+(define_insn "*avx_interleave_lowv8hi"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
        (vec_select:V8HI
          (vec_concat:V16HI
    (set_attr "prefix" "vex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_punpcklwd"
+(define_insn "vec_interleave_lowv8hi"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
        (vec_select:V8HI
          (vec_concat:V16HI
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx_punpckhdq"
+(define_insn "*avx_interleave_highv4si"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
        (vec_select:V4SI
          (vec_concat:V8SI
    (set_attr "prefix" "vex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_punpckhdq"
+(define_insn "vec_interleave_highv4si"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
        (vec_select:V4SI
          (vec_concat:V8SI
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx_punpckldq"
+(define_insn "*avx_interleave_lowv4si"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
        (vec_select:V4SI
          (vec_concat:V8SI
    (set_attr "prefix" "vex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_punpckldq"
+(define_insn "vec_interleave_lowv4si"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
        (vec_select:V4SI
          (vec_concat:V8SI