{&i386_cost, 4, 3, 4, 3, 4},
{&i486_cost, 16, 15, 16, 15, 16},
{&pentium_cost, 16, 7, 16, 7, 16},
- {&pentiumpro_cost, 16, 15, 16, 7, 16},
+ {&pentiumpro_cost, 16, 15, 16, 10, 16},
{&geode_cost, 0, 0, 0, 0, 0},
{&k6_cost, 32, 7, 32, 7, 32},
{&athlon_cost, 16, 7, 16, 7, 16},
{&pentium4_cost, 0, 0, 0, 0, 0},
{&k8_cost, 16, 7, 16, 7, 16},
{&nocona_cost, 0, 0, 0, 0, 0},
- {&core2_cost, 16, 7, 16, 7, 16},
+ {&core2_cost, 16, 10, 16, 10, 16},
{&generic32_cost, 16, 7, 16, 7, 16},
- {&generic64_cost, 16, 7, 16, 7, 16},
+ {&generic64_cost, 16, 10, 16, 10, 16},
{&amdfam10_cost, 32, 24, 32, 7, 32}
};
}
src_addr = fold_convert (addr_type, src_addr);
src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
- build_int_cst (addr_type, src_offset));
+ size_int (src_offset));
src = build_va_arg_indirect_ref (src_addr);
dest_addr = fold_convert (addr_type, addr);
dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
- build_int_cst (addr_type, INTVAL (XEXP (slot, 1))));
+ size_int (INTVAL (XEXP (slot, 1))));
dest = build_va_arg_indirect_ref (dest_addr);
t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
gcc_assert (n < MAX_386_STACK_LOCALS);
+ /* Virtual slot is valid only before vregs are instantiated. */
+ gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
+
for (s = ix86_stack_locals; s; s = s->next)
if (s->mode == mode && s->n == n)
return copy_rtx (s->rtl);
case IX86_BUILTIN_LDMXCSR:
op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
- target = assign_386_stack_local (SImode, SLOT_TEMP);
+ target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
emit_move_insn (target, op0);
emit_insn (gen_sse_ldmxcsr (target));
return 0;
case IX86_BUILTIN_STMXCSR:
- target = assign_386_stack_local (SImode, SLOT_TEMP);
+ target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
emit_insn (gen_sse_stmxcsr (target));
return copy_to_mode_reg (SImode, target);
reciprocal of the function, or NULL_TREE if not available. */
static tree
-ix86_builtin_reciprocal (unsigned int code, bool sqrt ATTRIBUTE_UNUSED)
+ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
+ bool sqrt ATTRIBUTE_UNUSED)
{
if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations))
return NULL_TREE;
- switch (code)
- {
- /* Sqrt to rsqrt conversion. */
- case BUILT_IN_SQRTF:
- return ix86_builtins[IX86_BUILTIN_RSQRTF];
+ if (md_fn)
+ /* Machine dependent builtins. */
+ switch (fn)
+ {
+ /* Vectorized version of sqrt to rsqrt conversion. */
+ case IX86_BUILTIN_SQRTPS:
+ return ix86_builtins[IX86_BUILTIN_RSQRTPS];
- /* Vectorized version of sqrt to rsqrt conversion. */
- case IX86_BUILTIN_SQRTPS:
- return ix86_builtins[IX86_BUILTIN_RSQRTPS];
+ default:
+ return NULL_TREE;
+ }
+ else
+ /* Normal builtins. */
+ switch (fn)
+ {
+ /* Sqrt to rsqrt conversion. */
+ case BUILT_IN_SQRTF:
+ return ix86_builtins[IX86_BUILTIN_RSQRTF];
- default:
- return NULL_TREE;
- }
+ default:
+ return NULL_TREE;
+ }
}
/* Store OPERAND to the memory after reload is completed. This means
void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
bool recip)
{
- rtx x0, e0, e1, e2, e3, three, half;
+ rtx x0, e0, e1, e2, e3, three, half, zero, mask;
x0 = gen_reg_rtx (mode);
e0 = gen_reg_rtx (mode);
three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
+ mask = gen_reg_rtx (mode);
+
if (VECTOR_MODE_P (mode))
{
three = ix86_build_const_vector (SFmode, true, three);
three = force_reg (mode, three);
half = force_reg (mode, half);
+ zero = force_reg (mode, CONST0_RTX(mode));
+
/* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
+ /* Compare a to zero. */
+ emit_insn (gen_rtx_SET (VOIDmode, mask,
+ gen_rtx_NE (mode, a, zero)));
+
/* x0 = 1./sqrt(a) estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
UNSPEC_RSQRT)));
+ /* Filter out infinity. */
+ if (VECTOR_MODE_P (mode))
+ emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
+ gen_rtx_AND (mode,
+ gen_lowpart (V4SFmode, x0),
+ gen_lowpart (V4SFmode, mask))));
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_AND (mode, x0, mask)));
+
/* e0 = x0 * a */
emit_insn (gen_rtx_SET (VOIDmode, e0,
gen_rtx_MULT (mode, x0, a)));