+2007-10-12 Nathan Froyd <froydnj@codesourcery.com>
+
+ PR 11001
+ * config/i386/i386.md (strmov): Check for esi and edi usage.
+ * config/i386/i386.c (decide_alg): Check whether we can use a
+ rep prefix and adjust algorithm choice accordingly.
+ (ix86_expand_strlen): Check for eax, ecx, and edi usage.
+
2007-10-12 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* config/c4x/c4x.md (mulqi3, mulhi3): Use optab_libfunc.
int *dynamic_check)
{
const struct stringop_algs * algs;
+ /* Algorithms using the rep prefix want at least edi and ecx;
+ additionally, memset wants eax and memcpy wants esi. Don't
+ consider such algorithms if the user has appropriated those
+ registers for their own purposes. */
+ bool rep_prefix_usable = !(global_regs[2] || global_regs[5]
+ || (memset ? global_regs[0] : global_regs[4]));
+
+#define ALG_USABLE_P(alg) (rep_prefix_usable \
+ || (alg != rep_prefix_1_byte \
+ && alg != rep_prefix_4_byte \
+ && alg != rep_prefix_8_byte))
*dynamic_check = -1;
if (memset)
algs = &ix86_cost->memset[TARGET_64BIT != 0];
else
algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
- if (stringop_alg != no_stringop)
+ if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
return stringop_alg;
/* rep; movq or rep; movl is the smallest variant. */
else if (optimize_size)
{
if (!count || (count & 3))
- return rep_prefix_1_byte;
+ return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
else
- return rep_prefix_4_byte;
+ return rep_prefix_usable ? rep_prefix_4_byte : loop;
}
/* Very tiny blocks are best handled via the loop, REP is expensive to setup.
*/
enum stringop_alg alg = libcall;
for (i = 0; i < NAX_STRINGOP_ALGS; i++)
{
- gcc_assert (algs->size[i].max);
+ /* We get here if the algorithms that were not libcall-based
+ were rep-prefix based and we are unable to use rep prefixes
+ based on global register usage. Break out of the loop and
+ use the heuristic below. */
+ if (algs->size[i].max == 0)
+ break;
if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
{
- if (algs->size[i].alg != libcall)
- alg = algs->size[i].alg;
+ enum stringop_alg candidate = algs->size[i].alg;
+
+ if (candidate != libcall && ALG_USABLE_P (candidate))
+ alg = candidate;
/* Honor TARGET_INLINE_ALL_STRINGOPS by picking
- last non-libcall inline algorithm. */
+ last non-libcall inline algorithm. */
if (TARGET_INLINE_ALL_STRINGOPS)
{
/* When the current size is best to be copied by a libcall,
- but we are still forced to inline, run the heuristic bellow
+ but we are still forced to inline, run the heuristic below
that will pick code for medium sized blocks. */
if (alg != libcall)
return alg;
break;
}
- else
- return algs->size[i].alg;
+ else if (ALG_USABLE_P (candidate))
+ return candidate;
}
}
- gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
+ gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
}
/* When asked to inline the call anyway, try to pick meaningful choice.
We look for maximal size of block that is faster to copy by hand and
If this turns out to be bad, we might simply specify the preferred
choice in ix86_costs. */
if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
- && algs->unknown_size == libcall)
+ && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
{
int max = -1;
enum stringop_alg alg;
int i;
+ bool any_alg_usable_p = true;
for (i = 0; i < NAX_STRINGOP_ALGS; i++)
- if (algs->size[i].alg != libcall && algs->size[i].alg)
- max = algs->size[i].max;
+ {
+ enum stringop_alg candidate = algs->size[i].alg;
+ any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
+
+ if (candidate != libcall && candidate
+ && ALG_USABLE_P (candidate))
+ max = algs->size[i].max;
+ }
+ /* If there aren't any usable algorithms, then recursing on
+ smaller sizes isn't going to find anything. Just return the
+ simple byte-at-a-time copy loop. */
+ if (!any_alg_usable_p)
+ {
+ /* Pick something reasonable. */
+ if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+ *dynamic_check = 128;
+ return loop_1_byte;
+ }
if (max == -1)
max = 4096;
alg = decide_alg (count, max / 2, memset, dynamic_check);
*dynamic_check = max;
return alg;
}
- return algs->unknown_size;
+ return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
+#undef ALG_USABLE_P
}
/* Decide on alignment. We know that the operand is already aligned to ALIGN
else
{
rtx unspec;
+
+ /* Can't use this if the user has appropriated eax, ecx, or edi. */
+ if (global_regs[0] || global_regs[2] || global_regs[5])
+ return false;
+
scratch2 = gen_reg_rtx (Pmode);
scratch3 = gen_reg_rtx (Pmode);
scratch4 = force_reg (Pmode, constm1_rtx);
operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust);
operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust);
- if (TARGET_SINGLE_STRINGOP || optimize_size)
+ /* Can't use this if the user has appropriated esi or edi. */
+ if ((TARGET_SINGLE_STRINGOP || optimize_size)
+ && !(global_regs[4] || global_regs[5]))
{
emit_insn (gen_strmov_singleop (operands[0], operands[1],
operands[2], operands[3],
+2007-10-12 Nathan Froyd <froydnj@codesourcery.com>
+
+ PR 11001
+ * gcc.target/i386/pr11001-strlen-1.c: New testcase.
+ * gcc.target/i386/pr11001-strlen-2.c: New testcase.
+ * gcc.target/i386/pr11001-strlen-3.c: New testcase.
+ * gcc.target/i386/pr11001-memset-1.c: New testcase.
+ * gcc.target/i386/pr11001-memset-2.c: New testcase.
+ * gcc.target/i386/pr11001-memset-3.c: New testcase.
+ * gcc.target/i386/pr11001-memcpy-1.c: New testcase.
+ * gcc.target/i386/pr11001-memcpy-2.c: New testcase.
+ * gcc.target/i386/pr11001-memcpy-3.c: New testcase.
+
2007-10-12 Richard Guenther <rguenther@suse.de>
PR middle-end/26198
--- /dev/null
+/* Ensure that we don't use 'rep movX' in the presence of register globals. */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memcpy (void *, const void *, __SIZE_TYPE__);
+
+register int regvar asm("%esi");
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+ memcpy (foo, bar, sizeof foo);
+ memcpy (baz, quux, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep movsl" } } */
+/* { dg-final { scan-assembler-not "rep movsb" } } */
--- /dev/null
+/* Ensure that we don't use 'rep movX' in the presence of register globals. */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memcpy (void *, const void *, __SIZE_TYPE__);
+
+register int regvar asm("%edi");
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+ memcpy (foo, bar, sizeof foo);
+ memcpy (baz, quux, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep movsl" } } */
+/* { dg-final { scan-assembler-not "rep movsb" } } */
--- /dev/null
+/* Ensure that we don't use 'rep movX' in the presence of register globals. */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memcpy (void *, const void *, __SIZE_TYPE__);
+
+register int regvar asm("%ecx"); /* { dg-warning "call-clobbered register" } */
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+ memcpy (foo, bar, sizeof foo);
+ memcpy (baz, quux, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep movsl" } } */
+/* { dg-final { scan-assembler-not "rep movsb" } } */
--- /dev/null
+/* Ensure that we don't use 'rep stoX' in the presence of register globals. */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memset (void *, int, __SIZE_TYPE__);
+
+register int regvar asm("%eax"); /* { dg-warning "call-clobbered register" } */
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+ memset (foo, 0, sizeof foo);
+ memset (baz, 0, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep stosl" } } */
+/* { dg-final { scan-assembler-not "rep stosb" } } */
--- /dev/null
+/* Ensure that we don't use 'rep stoX' in the presence of register globals. */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memset (void *, int, __SIZE_TYPE__);
+
+register int regvar asm("%ecx"); /* { dg-warning "call-clobbered register" } */
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+ memset (foo, 0, sizeof foo);
+ memset (baz, 0, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep stosl" } } */
+/* { dg-final { scan-assembler-not "rep stosb" } } */
--- /dev/null
+/* Ensure that we don't use 'rep stoX' in the presence of register globals. */
+/* { dg-do compile } */
+/* { dg-options "-Os -m32" } */
+
+extern void *memset (void *, int, __SIZE_TYPE__);
+
+register int regvar asm("%edi");
+
+int foo[10];
+int bar[10];
+
+char baz[15];
+char quux[15];
+
+void
+do_copy ()
+{
+ memset (foo, 0, sizeof foo);
+ memset (baz, 0, sizeof baz);
+}
+
+/* { dg-final { scan-assembler-not "rep stosl" } } */
+/* { dg-final { scan-assembler-not "rep stosb" } } */
--- /dev/null
+/* Ensure that we don't use 'repnz scasb' in the presence of register globals. */
+/* { dg-do compile } */
+/* { dg-options "-O1 -m32" } */
+
+extern __SIZE_TYPE__ strlen (const char *);
+extern void *malloc (__SIZE_TYPE__);
+
+register int regvar asm("%edi");
+
+char *
+do_copy (char *str)
+{
+ return malloc (strlen (str) + 1);
+}
+
+/* { dg-final { scan-assembler-not "repnz scasb" } } */
--- /dev/null
+/* Ensure that we don't use 'repnz scasb' in the presence of register globals. */
+/* { dg-do compile } */
+/* { dg-options "-O1 -m32" } */
+
+extern __SIZE_TYPE__ strlen (const char *);
+extern void *malloc (__SIZE_TYPE__);
+
+register int regvar asm("%eax"); /* { dg-warning "call-clobbered register" } */
+
+char *
+do_copy (char *str)
+{
+ return malloc (strlen (str) + 1);
+}
+
+/* { dg-final { scan-assembler-not "repnz scasb" } } */
--- /dev/null
+/* Ensure that we don't use 'repnz scasb' in the presence of register globals. */
+/* { dg-do compile } */
+/* { dg-options "-O1 -m32" } */
+
+extern __SIZE_TYPE__ strlen (const char *);
+extern void *malloc (__SIZE_TYPE__);
+
+register int regvar asm("%ecx"); /* { dg-warning "call-clobbered register" } */
+
+char *
+do_copy (char *str)
+{
+ return malloc (strlen (str) + 1);
+}
+
+/* { dg-final { scan-assembler-not "repnz scasb" } } */