+2007-05-26 H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/i386-protos.h (ix86_expand_sse4_unpack): New.
+
+ * config/i386/i386.c (ix86_expand_sse4_unpack): New.
+
+ * config/i386/sse.md (vec_unpacku_hi_v16qi): Call
+ ix86_expand_sse4_unpack if SSE4.1 is enabled.
+ (vec_unpacks_hi_v16qi): Likewise.
+ (vec_unpacku_lo_v16qi): Likewise.
+ (vec_unpacks_lo_v16qi): Likewise.
+ (vec_unpacku_hi_v8hi): Likewise.
+ (vec_unpacks_hi_v8hi): Likewise.
+ (vec_unpacku_lo_v8hi): Likewise.
+ (vec_unpacks_lo_v8hi): Likewise.
+ (vec_unpacku_hi_v4si): Likewise.
+ (vec_unpacks_hi_v4si): Likewise.
+ (vec_unpacku_lo_v4si): Likewise.
+ (vec_unpacks_lo_v4si): Likewise.
+
2007-05-26 Kazu Hirata <kazu@codesourcery.com>
* c-typeck.c, config/arm/arm.c, config/darwin.c,
emit_insn (unpack (dest, operands[1], se));
}
+/* This function performs the same task as ix86_expand_sse_unpack,
+ but with SSE4.1 instructions. */
+
+void
+ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+{
+ enum machine_mode imode = GET_MODE (operands[1]);
+ rtx (*unpack)(rtx, rtx);
+ rtx src, dest;
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+ else
+ unpack = gen_sse4_1_extendv8qiv8hi2;
+ break;
+ case V8HImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv4hiv4si2;
+ else
+ unpack = gen_sse4_1_extendv4hiv4si2;
+ break;
+ case V4SImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv2siv2di2;
+ else
+ unpack = gen_sse4_1_extendv2siv2di2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ dest = operands[0];
+ if (high_p)
+ {
+ /* Shift higher 8 bytes to lower 8 bytes. */
+ src = gen_reg_rtx (imode);
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
+ gen_lowpart (TImode, operands[1]),
+ GEN_INT (64)));
+ }
+ else
+ src = operands[1];
+
+ emit_insn (unpack (dest, src));
+}
+
/* Expand conditional increment or decrement using adb/sbb instructions.
The default case using setcc followed by the conditional move can be
done by generic code. */
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
DONE;
})
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
DONE;
})
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
DONE;
})
(match_operand:V16QI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
DONE;
})
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
DONE;
})
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
DONE;
})
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
DONE;
})
(match_operand:V8HI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
DONE;
})
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, true);
+ else
+ ix86_expand_sse_unpack (operands, true, true);
DONE;
})
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, true);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, true);
+ else
+ ix86_expand_sse_unpack (operands, false, true);
DONE;
})
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, true, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, true, false);
+ else
+ ix86_expand_sse_unpack (operands, true, false);
DONE;
})
(match_operand:V4SI 1 "register_operand" "")]
"TARGET_SSE2"
{
- ix86_expand_sse_unpack (operands, false, false);
+ if (TARGET_SSE4_1)
+ ix86_expand_sse4_unpack (operands, false, false);
+ else
+ ix86_expand_sse_unpack (operands, false, false);
DONE;
})