to limit number of prefetches at all, as their execution also takes some
time). */
100, /* number of parallel prefetches */
- 5, /* Branch cost */
+ 3, /* Branch cost */
COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (4), /* cost of FMUL instruction. */
COSTS_N_INSNS (19), /* cost of FDIV instruction. */
2, /* vec_align_load_cost. */
3, /* vec_unalign_load_cost. */
3, /* vec_store_cost. */
- 6, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 2, /* cond_not_taken_branch_cost. */
};
struct processor_costs amdfam10_cost = {
to limit number of prefetches at all, as their execution also takes some
time). */
100, /* number of parallel prefetches */
- 5, /* Branch cost */
+ 2, /* Branch cost */
COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (4), /* cost of FMUL instruction. */
COSTS_N_INSNS (19), /* cost of FDIV instruction. */
2, /* vec_align_load_cost. */
2, /* vec_unalign_load_cost. */
2, /* vec_store_cost. */
- 6, /* cond_taken_branch_cost. */
+ 2, /* cond_taken_branch_cost. */
1, /* cond_not_taken_branch_cost. */
};
was set or cleared on the command line. */
static int ix86_isa_flags_explicit;
-/* Define a set of ISAs which aren't available for a given ISA. MMX
- and SSE ISAs are handled separately. */
+/* Define a set of ISAs which are available when a given ISA is
+ enabled. MMX and SSE ISAs are handled separately. */
+
+#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
+#define OPTION_MASK_ISA_3DNOW_SET \
+ (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
+
+#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
+#define OPTION_MASK_ISA_SSE2_SET \
+ (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
+#define OPTION_MASK_ISA_SSE3_SET \
+ (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_SSSE3_SET \
+ (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_SSE4_1_SET \
+ (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
+#define OPTION_MASK_ISA_SSE4_2_SET \
+ (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
+
+/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
+ as -msse4.2. */
+#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
+
+#define OPTION_MASK_ISA_SSE4A_SET \
+ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_SSE5_SET \
+ (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
+
+/* Define a set of ISAs which aren't available when a given ISA is
+ disabled. MMX and SSE ISAs are handled separately. */
#define OPTION_MASK_ISA_MMX_UNSET \
- (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
-#define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
+ (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
+#define OPTION_MASK_ISA_3DNOW_UNSET \
+ (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
+#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
#define OPTION_MASK_ISA_SSE_UNSET \
- (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
+ (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
#define OPTION_MASK_ISA_SSE2_UNSET \
- (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
+ (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
#define OPTION_MASK_ISA_SSE3_UNSET \
- (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
+ (OPTION_MASK_ISA_SSE3 \
+ | OPTION_MASK_ISA_SSSE3_UNSET \
+ | OPTION_MASK_ISA_SSE4A_UNSET )
#define OPTION_MASK_ISA_SSSE3_UNSET \
- (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
+ (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
#define OPTION_MASK_ISA_SSE4_1_UNSET \
- (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
-#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
+ (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
+#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
-/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
- as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
-#define OPTION_MASK_ISA_SSE4 \
- (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
+/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
+ as -mno-sse4.1. */
#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
-#define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
+#define OPTION_MASK_ISA_SSE4A_UNSET \
+ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
-#define OPTION_MASK_ISA_SSE5_UNSET \
- (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
+#define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
/* Vectorization library interface and handlers. */
tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
switch (code)
{
case OPT_mmmx:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
return true;
case OPT_m3dnow:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
return false;
case OPT_msse:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
return true;
case OPT_msse2:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
return true;
case OPT_msse3:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
return true;
case OPT_mssse3:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
return true;
case OPT_msse4_1:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
return true;
case OPT_msse4_2:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
return true;
case OPT_msse4:
- ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
return true;
case OPT_mno_sse4:
return true;
case OPT_msse4a:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
return true;
case OPT_msse5:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
if (!TARGET_80387)
target_flags |= MASK_NO_FANCY_MATH_387;
- /* Turn on SSE4A bultins for -msse5. */
- if (TARGET_SSE5)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
-
- /* Turn on SSE4.1 builtins for -msse4.2. */
- if (TARGET_SSE4_2)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
-
- /* Turn on SSSE3 builtins for -msse4.1. */
- if (TARGET_SSE4_1)
- ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
-
- /* Turn on SSE3 builtins for -mssse3. */
- if (TARGET_SSSE3)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
-
- /* Turn on SSE3 builtins for -msse4a. */
- if (TARGET_SSE4A)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
-
- /* Turn on SSE2 builtins for -msse3. */
- if (TARGET_SSE3)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
-
- /* Turn on SSE builtins for -msse2. */
- if (TARGET_SSE2)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE;
-
/* Turn on MMX builtins for -msse. */
if (TARGET_SSE)
{
x86_prefetch_sse = true;
}
- /* Turn on MMX builtins for 3Dnow. */
- if (TARGET_3DNOW)
- ix86_isa_flags |= OPTION_MASK_ISA_MMX;
-
/* Turn on popcnt instruction for -msse4.2 or -mabm. */
if (TARGET_SSE4_2 || TARGET_ABM)
x86_popcnt = true;
indirectly or considering a libcall. Otherwise return 0. */
static int
-ix86_function_sseregparm (const_tree type, const_tree decl)
+ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
{
gcc_assert (!TARGET_64BIT);
{
if (!TARGET_SSE)
{
- if (decl)
- error ("Calling %qD with attribute sseregparm without "
- "SSE/SSE2 enabled", decl);
- else
- error ("Calling %qT with attribute sseregparm without "
- "SSE/SSE2 enabled", type);
+ if (warn)
+ {
+ if (decl)
+ error ("Calling %qD with attribute sseregparm without "
+ "SSE/SSE2 enabled", decl);
+ else
+ error ("Calling %qT with attribute sseregparm without "
+ "SSE/SSE2 enabled", type);
+ }
return 0;
}
rtx libname, /* SYMBOL_REF of library name or 0 */
tree fndecl)
{
+ struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
memset (cum, 0, sizeof (*cum));
/* Set up the number of registers to use for passing arguments. */
cum->mmx_nregs = MMX_REGPARM_MAX;
cum->warn_sse = true;
cum->warn_mmx = true;
+
+ /* Because type might mismatch in between caller and callee, we need to
+ use actual type of function for local calls.
+ FIXME: cgraph_analyze can be told to actually record if function uses
+ va_start so for local functions maybe_vaarg can be made aggressive
+ helping K&R code.
+ FIXME: once typesytem is fixed, we won't need this code anymore. */
+ if (i && i->local)
+ fntype = TREE_TYPE (fndecl);
cum->maybe_vaarg = (fntype
? (!prototype_p (fntype) || stdarg_p (fntype))
: !libname);
/* Set up the number of SSE registers used for passing SFmode
and DFmode arguments. Warn for mismatching ABI. */
- cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
+ cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
}
}
SSE math is enabled or for functions with sseregparm attribute. */
if ((fn || fntype) && (mode == SFmode || mode == DFmode))
{
- int sse_level = ix86_function_sseregparm (fntype, fn);
+ int sse_level = ix86_function_sseregparm (fntype, fn, false);
if ((sse_level >= 1 && mode == SFmode)
|| (sse_level == 2 && mode == DFmode))
regno = FIRST_SSE_REG;
rtx operands[])
{
rtx mask, set, use, clob, dst, src;
- bool matching_memory;
bool use_sse = false;
bool vector_mode = VECTOR_MODE_P (mode);
enum machine_mode elt_mode = mode;
dst = operands[0];
src = operands[1];
- /* If the destination is memory, and we don't have matching source
- operands or we're using the x87, do things in registers. */
- matching_memory = false;
- if (MEM_P (dst))
- {
- if (use_sse && rtx_equal_p (dst, src))
- matching_memory = true;
- else
- dst = gen_reg_rtx (mode);
- }
- if (MEM_P (src) && !matching_memory)
- src = force_reg (mode, src);
-
if (vector_mode)
{
set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
else
emit_insn (set);
}
-
- if (dst != operands[0])
- emit_move_insn (operands[0], dst);
}
/* Expand a copysign operation. Special case operand 0 being a constant. */
ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
bool sqrt ATTRIBUTE_UNUSED)
{
- if (! (TARGET_SSE_MATH && !optimize_size
+ if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations))
return NULL_TREE;
where integer modes in MMX/SSE registers are not tieable
because of missing QImode and HImode moves to, from or between
MMX/SSE registers. */
- return MAX (ix86_cost->mmxsse_to_integer, 8);
+ return MAX (8, ix86_cost->mmxsse_to_integer);
if (MAYBE_FLOAT_CLASS_P (class1))
return ix86_cost->fp_move;
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
- if (!CONSTANT_P (x))
+ if (!(CONST_INT_P (x)
+ || GET_CODE (x) == CONST_DOUBLE
+ || GET_CODE (x) == CONST_FIXED))
n_var++, one_var = i;
else if (x != CONST0_RTX (inner_mode))
all_const_zero = false;