OSDN Git Service

2007-05-26 H.J. Lu <hongjiu.lu@intel.com>
authorhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>
Sat, 26 May 2007 14:34:21 +0000 (14:34 +0000)
committerhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>
Sat, 26 May 2007 14:34:21 +0000 (14:34 +0000)
* config/i386/i386-protos.h (ix86_expand_sse4_unpack): New.

* config/i386/i386.c (ix86_expand_sse4_unpack): New.

* config/i386/sse.md (vec_unpacku_hi_v16qi): Call
ix86_expand_sse4_unpack if SSE4.1 is enabled.
(vec_unpacks_hi_v16qi): Likewise.
(vec_unpacku_lo_v16qi): Likewise.
(vec_unpacks_lo_v16qi): Likewise.
(vec_unpacku_hi_v8hi): Likewise.
(vec_unpacks_hi_v8hi): Likewise.
(vec_unpacku_lo_v8hi): Likewise.
(vec_unpacks_lo_v8hi): Likewise.
(vec_unpacku_hi_v4si): Likewise.
(vec_unpacks_hi_v4si): Likewise.
(vec_unpacku_lo_v4si): Likewise.
(vec_unpacks_lo_v4si): Likewise.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@125093 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/i386/i386-protos.h
gcc/config/i386/i386.c
gcc/config/i386/sse.md

index 85ddc22..cdd0b84 100644 (file)
@@ -1,3 +1,23 @@
+2007-05-26  H.J. Lu  <hongjiu.lu@intel.com>
+
+       * config/i386/i386-protos.h (ix86_expand_sse4_unpack): New.
+
+       * config/i386/i386.c (ix86_expand_sse4_unpack): New.
+
+       * config/i386/sse.md (vec_unpacku_hi_v16qi): Call
+       ix86_expand_sse4_unpack if SSE4.1 is enabled.
+       (vec_unpacks_hi_v16qi): Likewise.
+       (vec_unpacku_lo_v16qi): Likewise.
+       (vec_unpacks_lo_v16qi): Likewise.
+       (vec_unpacku_hi_v8hi): Likewise.
+       (vec_unpacks_hi_v8hi): Likewise.
+       (vec_unpacku_lo_v8hi): Likewise.
+       (vec_unpacks_lo_v8hi): Likewise.
+       (vec_unpacku_hi_v4si): Likewise.
+       (vec_unpacks_hi_v4si): Likewise.
+       (vec_unpacku_lo_v4si): Likewise.
+       (vec_unpacks_lo_v4si): Likewise.
+
 2007-05-26  Kazu Hirata  <kazu@codesourcery.com>
 
        * c-typeck.c, config/arm/arm.c, config/darwin.c,
index 2f32039..60b4955 100644 (file)
@@ -112,6 +112,7 @@ extern int ix86_expand_fp_movcc (rtx[]);
 extern bool ix86_expand_fp_vcond (rtx[]);
 extern bool ix86_expand_int_vcond (rtx[]);
 extern void ix86_expand_sse_unpack (rtx[], bool, bool);
+extern void ix86_expand_sse4_unpack (rtx[], bool, bool);
 extern int ix86_expand_int_addcc (rtx[]);
 extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
 extern void x86_initialize_trampoline (rtx, rtx, rtx);
index 7bc5fe0..b0db950 100644 (file)
@@ -12843,6 +12843,55 @@ ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
   emit_insn (unpack (dest, operands[1], se));
 }
 
+/* This function performs the same task as ix86_expand_sse_unpack,
+   but with SSE4.1 instructions.  */
+
+void
+ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+  enum machine_mode imode = GET_MODE (operands[1]);
+  rtx (*unpack)(rtx, rtx);
+  rtx src, dest;
+
+  switch (imode)
+    {
+    case V16QImode:
+      if (unsigned_p)
+       unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+      else
+       unpack = gen_sse4_1_extendv8qiv8hi2;
+      break;
+    case V8HImode:
+      if (unsigned_p)
+       unpack = gen_sse4_1_zero_extendv4hiv4si2;
+      else
+       unpack = gen_sse4_1_extendv4hiv4si2;
+      break;
+    case V4SImode:
+      if (unsigned_p)
+       unpack = gen_sse4_1_zero_extendv2siv2di2;
+      else
+       unpack = gen_sse4_1_extendv2siv2di2;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  dest = operands[0];
+  if (high_p)
+    {
+      /* Shift higher 8 bytes to lower 8 bytes.  */
+      src = gen_reg_rtx (imode);
+      emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
+                                  gen_lowpart (TImode, operands[1]),
+                                  GEN_INT (64)));
+    }
+  else
+    src = operands[1];
+
+  emit_insn (unpack (dest, src));
+}
+
 /* Expand conditional increment or decrement using adb/sbb instructions.
    The default case using setcc followed by the conditional move can be
    done by generic code.  */
index 800807c..81ff925 100644 (file)
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
   DONE;
 })
 
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
   DONE;
 })
 
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
   DONE;
 })
 
    (match_operand:V16QI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
   DONE;
 })
 
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
   DONE;
 })
 
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
   DONE;
 })
 
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
   DONE;
 })
 
    (match_operand:V8HI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
   DONE;
 })
 
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, true);
+  else
+    ix86_expand_sse_unpack (operands, true, true);
   DONE;
 })
 
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, true);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, true);
+  else
+    ix86_expand_sse_unpack (operands, false, true);
   DONE;
 })
 
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, true, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, true, false);
+  else
+    ix86_expand_sse_unpack (operands, true, false);
   DONE;
 })
 
    (match_operand:V4SI 1 "register_operand" "")]
   "TARGET_SSE2"
 {
-  ix86_expand_sse_unpack (operands, false, false);
+  if (TARGET_SSE4_1)
+    ix86_expand_sse4_unpack (operands, false, false);
+  else
+    ix86_expand_sse_unpack (operands, false, false);
   DONE;
 })