/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ Free Software Foundation, Inc.
This file is part of GCC.
to limit number of prefetches at all, as their execution also takes some
time). */
100, /* number of parallel prefetches */
- 5, /* Branch cost */
+ 3, /* Branch cost */
COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (4), /* cost of FMUL instruction. */
COSTS_N_INSNS (19), /* cost of FDIV instruction. */
2, /* vec_align_load_cost. */
3, /* vec_unalign_load_cost. */
3, /* vec_store_cost. */
- 6, /* cond_taken_branch_cost. */
- 1, /* cond_not_taken_branch_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 2, /* cond_not_taken_branch_cost. */
};
struct processor_costs amdfam10_cost = {
to limit number of prefetches at all, as their execution also takes some
time). */
100, /* number of parallel prefetches */
- 5, /* Branch cost */
+ 2, /* Branch cost */
COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (4), /* cost of FMUL instruction. */
COSTS_N_INSNS (19), /* cost of FDIV instruction. */
2, /* vec_align_load_cost. */
2, /* vec_unalign_load_cost. */
2, /* vec_store_cost. */
- 6, /* cond_taken_branch_cost. */
+ 2, /* cond_taken_branch_cost. */
1, /* cond_not_taken_branch_cost. */
};
replacement is long decoded, so this split helps here as well. */
m_K6,
- /* X86_TUNE_USE_VECTOR_CONVERTS: Preffer vector packed SSE conversion
+ /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
from integer to FP. */
m_AMDFAM10,
};
was set or cleared on the command line. */
static int ix86_isa_flags_explicit;
-/* Define a set of ISAs which aren't available for a given ISA. MMX
- and SSE ISAs are handled separately. */
+/* Define a set of ISAs which are available when a given ISA is
+ enabled. MMX and SSE ISAs are handled separately. */
+
+#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
+#define OPTION_MASK_ISA_3DNOW_SET \
+ (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
+
+#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
+#define OPTION_MASK_ISA_SSE2_SET \
+ (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
+#define OPTION_MASK_ISA_SSE3_SET \
+ (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_SSSE3_SET \
+ (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_SSE4_1_SET \
+ (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
+#define OPTION_MASK_ISA_SSE4_2_SET \
+ (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
+
+/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
+ as -msse4.2. */
+#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
+
+#define OPTION_MASK_ISA_SSE4A_SET \
+ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_SSE5_SET \
+ (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
+
+/* Define a set of ISAs which aren't available when a given ISA is
+ disabled. MMX and SSE ISAs are handled separately. */
#define OPTION_MASK_ISA_MMX_UNSET \
- (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
-#define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
+ (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
+#define OPTION_MASK_ISA_3DNOW_UNSET \
+ (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
+#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
#define OPTION_MASK_ISA_SSE_UNSET \
- (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
+ (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
#define OPTION_MASK_ISA_SSE2_UNSET \
- (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
+ (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
#define OPTION_MASK_ISA_SSE3_UNSET \
- (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
+ (OPTION_MASK_ISA_SSE3 \
+ | OPTION_MASK_ISA_SSSE3_UNSET \
+ | OPTION_MASK_ISA_SSE4A_UNSET )
#define OPTION_MASK_ISA_SSSE3_UNSET \
- (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
+ (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
#define OPTION_MASK_ISA_SSE4_1_UNSET \
- (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
-#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
+ (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
+#define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
-/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
- as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
-#define OPTION_MASK_ISA_SSE4 \
- (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
+/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
+ as -mno-sse4.1. */
#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
-#define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
+#define OPTION_MASK_ISA_SSE4A_UNSET \
+ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
-#define OPTION_MASK_ISA_SSE5_UNSET \
- (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
+#define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
/* Vectorization library interface and handlers. */
tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
+static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
/* Implement TARGET_HANDLE_OPTION. */
switch (code)
{
case OPT_mmmx:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
return true;
case OPT_m3dnow:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
return false;
case OPT_msse:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
return true;
case OPT_msse2:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
return true;
case OPT_msse3:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
return true;
case OPT_mssse3:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
return true;
case OPT_msse4_1:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
return true;
case OPT_msse4_2:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
return true;
case OPT_msse4:
- ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
return true;
case OPT_mno_sse4:
return true;
case OPT_msse4a:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
return true;
case OPT_msse5:
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5;
- if (!value)
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
+ }
+ else
{
ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
{&amdfam10_cost, 32, 24, 32, 7, 32}
};
- static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
+ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
+ {
+ "generic",
+ "i386",
+ "i486",
+ "pentium",
+ "pentium-mmx",
+ "pentiumpro",
+ "pentium2",
+ "pentium3",
+ "pentium4",
+ "pentium-m",
+ "prescott",
+ "nocona",
+ "core2",
+ "geode",
+ "k6",
+ "k6-2",
+ "k6-3",
+ "athlon",
+ "athlon-4",
+ "k8",
+ "amdfam10"
+ };
+
enum pta_flags
{
PTA_SSE = 1 << 0,
ix86_tune_string = ix86_arch_string;
if (!ix86_tune_string)
{
- ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
+ ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
ix86_tune_defaulted = 1;
}
if (!TARGET_80387)
target_flags |= MASK_NO_FANCY_MATH_387;
- /* Turn on SSE4A bultins for -msse5. */
- if (TARGET_SSE5)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
-
- /* Turn on SSE4.1 builtins for -msse4.2. */
- if (TARGET_SSE4_2)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
-
- /* Turn on SSSE3 builtins for -msse4.1. */
- if (TARGET_SSE4_1)
- ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
-
- /* Turn on SSE3 builtins for -mssse3. */
- if (TARGET_SSSE3)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
-
- /* Turn on SSE3 builtins for -msse4a. */
- if (TARGET_SSE4A)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
-
- /* Turn on SSE2 builtins for -msse3. */
- if (TARGET_SSE3)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
-
- /* Turn on SSE builtins for -msse2. */
- if (TARGET_SSE2)
- ix86_isa_flags |= OPTION_MASK_ISA_SSE;
-
/* Turn on MMX builtins for -msse. */
if (TARGET_SSE)
{
x86_prefetch_sse = true;
}
- /* Turn on MMX builtins for 3Dnow. */
- if (TARGET_3DNOW)
- ix86_isa_flags |= OPTION_MASK_ISA_MMX;
-
/* Turn on popcnt instruction for -msse4.2 or -mabm. */
if (TARGET_SSE4_2 || TARGET_ABM)
x86_popcnt = true;
/* Use external vectorized library in vectorizing intrinsics. */
if (ix86_veclibabi_string)
{
- if (strcmp (ix86_veclibabi_string, "acml") == 0)
+ if (strcmp (ix86_veclibabi_string, "svml") == 0)
+ ix86_veclib_handler = ix86_veclibabi_svml;
+ else if (strcmp (ix86_veclibabi_string, "acml") == 0)
ix86_veclib_handler = ix86_veclibabi_acml;
else
error ("unknown vectorization library ABI type (%s) for "
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
}
+ /* If stack probes are required, the space used for large function
+ arguments on the stack must also be probed, so enable
+ -maccumulate-outgoing-args so this happens in the prologue. */
+ if (TARGET_STACK_PROBE
+ && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
+ {
+ if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
+ warning (0, "stack probing requires -maccumulate-outgoing-args "
+ "for correctness");
+ target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
+ }
+
/* For sane SSE instruction set generation we need fcomi instruction.
It is safe to enable all CMOVE instructions. */
if (TARGET_SSE)
set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
+
+ /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
+ can be optimized to ap = __builtin_next_arg (0). */
+ if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
+ targetm.expand_builtin_va_start = NULL;
}
\f
/* Return true if this goes in large data/bss. */
flag_omit_frame_pointer = 2;
flag_pcc_struct_return = 2;
flag_asynchronous_unwind_tables = 2;
+ flag_vect_cost_model = 1;
#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
SUBTARGET_OPTIMIZATION_OPTIONS;
#endif
/* Check for mismatch of non-default calling convention. */
const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
- if (TREE_CODE (type1) != FUNCTION_TYPE)
+ if (TREE_CODE (type1) != FUNCTION_TYPE
+ && TREE_CODE (type1) != METHOD_TYPE)
return 1;
/* Check for mismatched fastcall/regparm types. */
struct function *f;
/* Make sure no regparm register is taken by a
- global register variable. */
- for (local_regparm = 0; local_regparm < 3; local_regparm++)
- if (global_regs[local_regparm])
+ fixed register variable. */
+ for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
+ if (fixed_regs[local_regparm])
break;
/* We can't use regparm(3) for nested functions as these use
TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
local_regparm = 2;
- /* Each global register variable increases register preassure,
- so the more global reg vars there are, the smaller regparm
- optimization use, unless requested by the user explicitly. */
- for (regno = 0; regno < 6; regno++)
- if (global_regs[regno])
+ /* Each fixed register usage increases register pressure,
+ so less registers should be used for argument passing.
+ This functionality can be overriden by an explicit
+ regparm value. */
+ for (regno = 0; regno <= DI_REG; regno++)
+ if (fixed_regs[regno])
globals++;
+
local_regparm
= globals < local_regparm ? local_regparm - globals : 0;
indirectly or considering a libcall. Otherwise return 0. */
static int
-ix86_function_sseregparm (const_tree type, const_tree decl)
+ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
{
gcc_assert (!TARGET_64BIT);
{
if (!TARGET_SSE)
{
- if (decl)
- error ("Calling %qD with attribute sseregparm without "
- "SSE/SSE2 enabled", decl);
- else
- error ("Calling %qT with attribute sseregparm without "
- "SSE/SSE2 enabled", type);
+ if (warn)
+ {
+ if (decl)
+ error ("Calling %qD with attribute sseregparm without "
+ "SSE/SSE2 enabled", decl);
+ else
+ error ("Calling %qT with attribute sseregparm without "
+ "SSE/SSE2 enabled", type);
+ }
return 0;
}
}
/* RAX is used as hidden argument to va_arg functions. */
- if (!TARGET_64BIT_MS_ABI && regno == 0)
+ if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
return true;
if (TARGET_64BIT_MS_ABI)
rtx libname, /* SYMBOL_REF of library name or 0 */
tree fndecl)
{
+ struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
memset (cum, 0, sizeof (*cum));
/* Set up the number of registers to use for passing arguments. */
cum->mmx_nregs = MMX_REGPARM_MAX;
cum->warn_sse = true;
cum->warn_mmx = true;
+
+ /* Because type might mismatch in between caller and callee, we need to
+ use actual type of function for local calls.
+ FIXME: cgraph_analyze can be told to actually record if function uses
+ va_start so for local functions maybe_vaarg can be made aggressive
+ helping K&R code.
+ FIXME: once typesytem is fixed, we won't need this code anymore. */
+ if (i && i->local)
+ fntype = TREE_TYPE (fndecl);
cum->maybe_vaarg = (fntype
? (!prototype_p (fntype) || stdarg_p (fntype))
: !libname);
/* Set up the number of SSE registers used for passing SFmode
and DFmode arguments. Warn for mismatching ABI. */
- cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
+ cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
}
}
}
/* for V1xx modes, just use the base mode */
- if (VECTOR_MODE_P (mode)
+ if (VECTOR_MODE_P (mode) && mode != V1DImode
&& GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
mode = GET_MODE_INNER (mode);
classes[0] = X86_64_SSE_CLASS;
classes[1] = X86_64_SSEUP_CLASS;
return 2;
+ case V1DImode:
case V2SFmode:
case V2SImode:
case V4HImode:
case V4HImode:
case V2SImode:
case V2SFmode:
+ case V1DImode:
if (!type || !AGGREGATE_TYPE_P (type))
{
cum->mmx_words += words;
int regno = cum->regno;
/* Fastcall allocates the first two DWORD (SImode) or
- smaller arguments to ECX and EDX. */
+ smaller arguments to ECX and EDX if it isn't an
+ aggregate type . */
if (cum->fastcall)
{
- if (mode == BLKmode || mode == DImode)
+ if (mode == BLKmode
+ || mode == DImode
+ || (type && AGGREGATE_TYPE_P (type)))
break;
/* ECX not EAX is the first allocated register. */
- if (regno == 0)
- regno = 2;
+ if (regno == AX_REG)
+ regno = CX_REG;
}
return gen_rtx_REG (mode, regno);
}
case V4HImode:
case V2SImode:
case V2SFmode:
+ case V1DImode:
if (!type || !AGGREGATE_TYPE_P (type))
{
if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
align = GET_MODE_ALIGNMENT (mode);
if (align < PARM_BOUNDARY)
align = PARM_BOUNDARY;
- if (!TARGET_64BIT)
+ /* Decimal floating point is aligned to its natural boundary. */
+ if (!TARGET_64BIT && !VALID_DFP_MODE_P (mode))
{
/* i386 ABI defines all arguments to be 4 byte aligned. We have to
make an exception for SSE modes since these require 128bit
align = PARM_BOUNDARY;
}
}
- if (align > 128)
- align = 128;
+ if (align > BIGGEST_ALIGNMENT)
+ align = BIGGEST_ALIGNMENT;
return align;
}
regno = FIRST_FLOAT_REG;
else
/* Most things go in %eax. */
- regno = 0;
+ regno = AX_REG;
/* Override FP return register with %xmm0 for local functions when
SSE math is enabled or for functions with sseregparm attribute. */
if ((fn || fntype) && (mode == SFmode || mode == DFmode))
{
- int sse_level = ix86_function_sseregparm (fntype, fn);
+ int sse_level = ix86_function_sseregparm (fntype, fn, false);
if ((sse_level >= 1 && mode == SFmode)
|| (sse_level == 2 && mode == DFmode))
regno = FIRST_SSE_REG;
case TCmode:
return NULL;
default:
- return gen_rtx_REG (mode, 0);
+ return gen_rtx_REG (mode, AX_REG);
}
}
/* For zero sized structures, construct_container returns NULL, but we
need to keep rest of compiler happy by returning meaningful value. */
if (!ret)
- ret = gen_rtx_REG (orig_mode, 0);
+ ret = gen_rtx_REG (orig_mode, AX_REG);
return ret;
}
static rtx
function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
{
- unsigned int regno = 0;
+ unsigned int regno = AX_REG;
if (TARGET_SSE)
{
HOST_WIDE_INT size = int_size_in_bytes (type);
/* __m128 and friends are returned in xmm0. */
- if (size == 16 && VECTOR_MODE_P (mode))
+ if (!COMPLEX_MODE_P (mode) && size == 16 && VECTOR_MODE_P (mode))
return 0;
- /* Otherwise, the size must be exactly in [1248]. */
- return (size != 1 && size != 2 && size != 4 && size != 8);
+ /* Otherwise, the size must be exactly in [1248]. But not for complex. */
+ return (size != 1 && size != 2 && size != 4 && size != 8)
+ || COMPLEX_MODE_P (mode);
}
int
We also may end up assuming that only 64bit values are stored in SSE
register let some floating point program work. */
- if (ix86_preferred_stack_boundary >= 128)
- cfun->stack_alignment_needed = 128;
+ if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
+ cfun->stack_alignment_needed = BIGGEST_ALIGNMENT;
save_area = frame_pointer_rtx;
set = get_varargs_alias_set ();
label - 5*eax + nnamed_sse_arguments*5 */
tmp_reg = gen_reg_rtx (Pmode);
nsse_reg = gen_reg_rtx (Pmode);
- emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
+ emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
gen_rtx_MULT (Pmode, nsse_reg,
GEN_INT (4))));
/* Implement va_start. */
-void
+static void
ix86_va_start (tree valist, rtx nextarg)
{
HOST_WIDE_INT words, n_gpr, n_fpr;
ix86_force_align_arg_pointer_string);
return virtual_incoming_args_rtx;
}
- cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
+ cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
return copy_to_reg (cfun->machine->force_align_arg_pointer);
}
else
allocate += frame.nregs * UNITS_PER_WORD;
/* When using red zone we may start register saving before allocating
- the stack frame saving one cycle of the prologue. */
- if (TARGET_RED_ZONE && frame.save_regs_using_mov)
+ the stack frame saving one cycle of the prologue. However I will
+ avoid doing this if I am going to have to probe the stack since
+ at least on x86_64 the stack probe can turn into a call that clobbers
+ a red zone location */
+ if (TARGET_RED_ZONE && frame.save_regs_using_mov
+ && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
: stack_pointer_rtx,
-frame.nregs * UNITS_PER_WORD);
else
{
/* Only valid for Win32. */
- rtx eax = gen_rtx_REG (Pmode, 0);
+ rtx eax = gen_rtx_REG (Pmode, AX_REG);
bool eax_live;
rtx t;
}
}
- if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
+ if (frame.save_regs_using_mov
+ && !(TARGET_RED_ZONE
+ && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
{
if (!frame_pointer_needed || !frame.to_allocate)
ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
{
if (ix86_cmodel == CM_LARGE_PIC)
{
- rtx tmp_reg = gen_rtx_REG (DImode,
- FIRST_REX_INT_REG + 3 /* R11 */);
+ rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
rtx label = gen_label_rtx ();
emit_label (label);
LABEL_PRESERVE_P (label) = 1;
if (current_function_pops_args >= 65536)
{
- rtx ecx = gen_rtx_REG (SImode, 2);
+ rtx ecx = gen_rtx_REG (SImode, CX_REG);
/* There is no "pascal" calling convention in any 64bit ABI. */
gcc_assert (!TARGET_64BIT);
/* Special case: on K6, [%esi] makes the instruction vector decoded.
Avoid this by transforming to [%esi+0]. */
- if (ix86_tune == PROCESSOR_K6 && !optimize_size
+ if (TARGET_K6 && !optimize_size
&& base_reg && !index_reg && !disp
&& REG_P (base_reg)
&& REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
if (TARGET_64BIT && ! TARGET_GNU2_TLS)
{
- rtx rax = gen_rtx_REG (Pmode, 0), insns;
+ rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
start_sequence ();
emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
if (TARGET_64BIT && ! TARGET_GNU2_TLS)
{
- rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
+ rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
start_sequence ();
emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
set_mem_alias_set (rtl, ix86_GOT_alias_set ());
SET_DECL_RTL (to, rtl);
+ SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
return to;
}
fputs ("@PLTOFF", file);
break;
case UNSPEC_GOTPCREL:
- fputs ("@GOTPCREL(%rip)", file);
+ fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+ "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
break;
case UNSPEC_GOTTPOFF:
/* FIXME: This might be @TPOFF in Sun ld too. */
break;
case UNSPEC_GOTNTPOFF:
if (TARGET_64BIT)
- fputs ("@GOTTPOFF(%rip)", file);
+ fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+ "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
else
fputs ("@GOTNTPOFF", file);
break;
void
print_reg (rtx x, int code, FILE *file)
{
- gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
- && REGNO (x) != FRAME_POINTER_REGNUM
- && REGNO (x) != FLAGS_REG
- && REGNO (x) != FPSR_REG
- && REGNO (x) != FPCR_REG);
+ gcc_assert (x == pc_rtx
+ || (REGNO (x) != ARG_POINTER_REGNUM
+ && REGNO (x) != FRAME_POINTER_REGNUM
+ && REGNO (x) != FLAGS_REG
+ && REGNO (x) != FPSR_REG
+ && REGNO (x) != FPCR_REG));
- if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
+ if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('%', file);
+ if (x == pc_rtx)
+ {
+ gcc_assert (TARGET_64BIT);
+ fputs ("rip", file);
+ return;
+ }
+
if (code == 'w' || MMX_REG_P (x))
code = 2;
else if (code == 'b')
else if (MEM_P (x))
{
- /* No `byte ptr' prefix for call instructions. */
- if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
+ /* No `byte ptr' prefix for call instructions or BLKmode operands. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
+ && GET_MODE (x) != BLKmode)
{
const char * size;
switch (GET_MODE_SIZE (GET_MODE (x)))
case 4: size = "DWORD"; break;
case 8: size = "QWORD"; break;
case 12: size = "XWORD"; break;
- case 16: size = "XMMWORD"; break;
+ case 16:
+ if (GET_MODE (x) == XFmode)
+ size = "XWORD";
+ else
+ size = "XMMWORD";
+ break;
default:
gcc_unreachable ();
}
break;
case SEG_FS:
case SEG_GS:
- if (USER_LABEL_PREFIX[0] == 0)
+ if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('%', file);
fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
break;
gcc_unreachable ();
}
+ /* Use one byte shorter RIP relative addressing for 64bit mode. */
+ if (TARGET_64BIT && !base && !index)
+ {
+ rtx symbol = disp;
+
+ if (GET_CODE (disp) == CONST
+ && GET_CODE (XEXP (disp, 0)) == PLUS
+ && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
+ symbol = XEXP (XEXP (disp, 0), 0);
+
+ if (GET_CODE (symbol) == LABEL_REF
+ || (GET_CODE (symbol) == SYMBOL_REF
+ && SYMBOL_REF_TLS_MODEL (symbol) == 0))
+ base = pc_rtx;
+ }
if (!base && !index)
{
/* Displacement only requires special attention. */
if (CONST_INT_P (disp))
{
if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
- {
- if (USER_LABEL_PREFIX[0] == 0)
- putc ('%', file);
- fputs ("ds:", file);
- }
+ fputs ("ds:", file);
fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
}
else if (flag_pic)
output_pic_addr_const (file, disp, 0);
else
output_addr_const (file, disp);
-
- /* Use one byte shorter RIP relative addressing for 64bit mode. */
- if (TARGET_64BIT)
- {
- if (GET_CODE (disp) == CONST
- && GET_CODE (XEXP (disp, 0)) == PLUS
- && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
- disp = XEXP (XEXP (disp, 0), 0);
- if (GET_CODE (disp) == LABEL_REF
- || (GET_CODE (disp) == SYMBOL_REF
- && SYMBOL_REF_TLS_MODEL (disp) == 0))
- fputs ("(%rip)", file);
- }
}
else
{
case UNSPEC_GOTNTPOFF:
output_addr_const (file, op);
if (TARGET_64BIT)
- fputs ("@GOTTPOFF(%rip)", file);
+ fputs (ASSEMBLER_DIALECT == ASM_ATT ?
+ "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
else
fputs ("@GOTNTPOFF", file);
break;
void
ix86_expand_move (enum machine_mode mode, rtx operands[])
{
- int strict = (reload_in_progress || reload_completed);
rtx op0, op1;
enum tls_model model;
/* Force large constants in 64bit compilation into register
to get them CSEed. */
- if (TARGET_64BIT && mode == DImode
+ if (can_create_pseudo_p ()
+ && (mode == DImode) && TARGET_64BIT
&& immediate_operand (op1, mode)
&& !x86_64_zext_immediate_operand (op1, VOIDmode)
&& !register_operand (op0, mode)
- && optimize && !reload_completed && !reload_in_progress)
+ && optimize)
op1 = copy_to_mode_reg (mode, op1);
- if (FLOAT_MODE_P (mode))
+ if (can_create_pseudo_p ()
+ && FLOAT_MODE_P (mode)
+ && GET_CODE (op1) == CONST_DOUBLE)
{
/* If we are loading a floating point constant to a register,
force the value to memory now, since we'll get better code
out the back end. */
- if (strict)
- ;
- else if (GET_CODE (op1) == CONST_DOUBLE)
+ op1 = validize_mem (force_const_mem (mode, op1));
+ if (!register_operand (op0, mode))
{
- op1 = validize_mem (force_const_mem (mode, op1));
- if (!register_operand (op0, mode))
- {
- rtx temp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
- emit_move_insn (op0, temp);
- return;
- }
+ rtx temp = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
+ emit_move_insn (op0, temp);
+ return;
}
}
}
the instructions used to build constants modify the upper 64 bits
of the register, once we have that information we may be able
to handle some of them more efficiently. */
- if ((reload_in_progress | reload_completed) == 0
+ if (can_create_pseudo_p ()
&& register_operand (op0, mode)
&& (CONSTANT_P (op1)
|| (GET_CODE (op1) == SUBREG
&& standard_sse_constant_p (op1) <= 0)
op1 = validize_mem (force_const_mem (mode, op1));
- /* TDmode values are passed as TImode on the stack. Timode values
+ /* TDmode values are passed as TImode on the stack. TImode values
are moved via xmm registers, and moving them to stack can result in
unaligned memory access. Use ix86_expand_vector_move_misalign()
if memory operand is not aligned correctly. */
ix86_expand_vector_extract (false, target, fp_xmm, 0);
}
+/* Not used, but eases macroization of patterns. */
+void
+ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
+ rtx input ATTRIBUTE_UNUSED)
+{
+ gcc_unreachable ();
+}
+
/* Convert an unsigned SImode value into a DFmode. Only currently used
for SSE, but applicable anywhere. */
rtx operands[])
{
rtx mask, set, use, clob, dst, src;
- bool matching_memory;
bool use_sse = false;
bool vector_mode = VECTOR_MODE_P (mode);
enum machine_mode elt_mode = mode;
dst = operands[0];
src = operands[1];
- /* If the destination is memory, and we don't have matching source
- operands or we're using the x87, do things in registers. */
- matching_memory = false;
- if (MEM_P (dst))
- {
- if (use_sse && rtx_equal_p (dst, src))
- matching_memory = true;
- else
- dst = gen_reg_rtx (mode);
- }
- if (MEM_P (src) && !matching_memory)
- src = force_reg (mode, src);
-
if (vector_mode)
{
set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
else
emit_insn (set);
}
-
- if (dst != operands[0])
- emit_move_insn (operands[0], dst);
}
/* Expand a copysign operation. Special case operand 0 being a constant. */
/* Otherwise, if we are doing less-than or greater-or-equal-than,
op1 is a constant and the low word is zero, then we can just
- examine the high word. */
+ examine the high word. Similarly for low word -1 and
+ less-or-equal-than or greater-than. */
- if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
+ if (CONST_INT_P (hi[1]))
switch (code)
{
case LT: case LTU: case GE: case GEU:
- ix86_compare_op0 = hi[0];
- ix86_compare_op1 = hi[1];
- ix86_expand_branch (code, label);
- return;
+ if (lo[1] == const0_rtx)
+ {
+ ix86_compare_op0 = hi[0];
+ ix86_compare_op1 = hi[1];
+ ix86_expand_branch (code, label);
+ return;
+ }
+ break;
+ case LE: case LEU: case GT: case GTU:
+ if (lo[1] == constm1_rtx)
+ {
+ ix86_compare_op0 = hi[0];
+ ix86_compare_op1 = hi[1];
+ ix86_expand_branch (code, label);
+ return;
+ }
+ break;
default:
break;
}
#define PPERM_REV_INV 0x60 /* bit reverse & invert src */
#define PPERM_ZERO 0x80 /* all 0's */
#define PPERM_ONES 0xa0 /* all 1's */
-#define PPERM_SIGN 0xc0 /* propigate sign bit */
-#define PPERM_INV_SIGN 0xe0 /* invert & propigate sign */
+#define PPERM_SIGN 0xc0 /* propagate sign bit */
+#define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
#define PPERM_SRC1 0x00 /* use first source byte */
#define PPERM_SRC2 0x10 /* use second source byte */
for (i = 0; i < 16; i++)
RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
- for (i = 0; i < 4; i++)
+ for (i = 0; i < 2; i++)
RTVEC_ELT (vs, i) = GEN_INT (i + h2);
p = gen_rtx_PARALLEL (VOIDmode, vs);
x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
if (unsigned_p)
- emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
+ emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
else
- emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
+ emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
break;
default:
int *dynamic_check)
{
const struct stringop_algs * algs;
+ /* Algorithms using the rep prefix want at least edi and ecx;
+ additionally, memset wants eax and memcpy wants esi. Don't
+ consider such algorithms if the user has appropriated those
+ registers for their own purposes. */
+ bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
+ || (memset
+ ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
+
+#define ALG_USABLE_P(alg) (rep_prefix_usable \
+ || (alg != rep_prefix_1_byte \
+ && alg != rep_prefix_4_byte \
+ && alg != rep_prefix_8_byte))
*dynamic_check = -1;
if (memset)
algs = &ix86_cost->memset[TARGET_64BIT != 0];
else
algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
- if (stringop_alg != no_stringop)
+ if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
return stringop_alg;
/* rep; movq or rep; movl is the smallest variant. */
else if (optimize_size)
{
if (!count || (count & 3))
- return rep_prefix_1_byte;
+ return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
else
- return rep_prefix_4_byte;
+ return rep_prefix_usable ? rep_prefix_4_byte : loop;
}
/* Very tiny blocks are best handled via the loop, REP is expensive to setup.
*/
enum stringop_alg alg = libcall;
for (i = 0; i < NAX_STRINGOP_ALGS; i++)
{
- gcc_assert (algs->size[i].max);
+ /* We get here if the algorithms that were not libcall-based
+ were rep-prefix based and we are unable to use rep prefixes
+ based on global register usage. Break out of the loop and
+ use the heuristic below. */
+ if (algs->size[i].max == 0)
+ break;
if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
{
- if (algs->size[i].alg != libcall)
- alg = algs->size[i].alg;
+ enum stringop_alg candidate = algs->size[i].alg;
+
+ if (candidate != libcall && ALG_USABLE_P (candidate))
+ alg = candidate;
/* Honor TARGET_INLINE_ALL_STRINGOPS by picking
- last non-libcall inline algorithm. */
+ last non-libcall inline algorithm. */
if (TARGET_INLINE_ALL_STRINGOPS)
{
/* When the current size is best to be copied by a libcall,
- but we are still forced to inline, run the heuristic bellow
+ but we are still forced to inline, run the heuristic below
that will pick code for medium sized blocks. */
if (alg != libcall)
return alg;
break;
}
- else
- return algs->size[i].alg;
+ else if (ALG_USABLE_P (candidate))
+ return candidate;
}
}
- gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
+ gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
}
/* When asked to inline the call anyway, try to pick meaningful choice.
We look for maximal size of block that is faster to copy by hand and
If this turns out to be bad, we might simply specify the preferred
choice in ix86_costs. */
if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
- && algs->unknown_size == libcall)
+ && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
{
int max = -1;
enum stringop_alg alg;
int i;
+ bool any_alg_usable_p = true;
for (i = 0; i < NAX_STRINGOP_ALGS; i++)
- if (algs->size[i].alg != libcall && algs->size[i].alg)
- max = algs->size[i].max;
+ {
+ enum stringop_alg candidate = algs->size[i].alg;
+ any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
+
+ if (candidate != libcall && candidate
+ && ALG_USABLE_P (candidate))
+ max = algs->size[i].max;
+ }
+ /* If there aren't any usable algorithms, then recursing on
+ smaller sizes isn't going to find anything. Just return the
+ simple byte-at-a-time copy loop. */
+ if (!any_alg_usable_p)
+ {
+ /* Pick something reasonable. */
+ if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
+ *dynamic_check = 128;
+ return loop_1_byte;
+ }
if (max == -1)
max = 4096;
alg = decide_alg (count, max / 2, memset, dynamic_check);
*dynamic_check = max;
return alg;
}
- return algs->unknown_size;
+ return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
+#undef ALG_USABLE_P
}
/* Decide on alignment. We know that the operand is already aligned to ALIGN
}
/* Expand string move (memcpy) operation. Use i386 string operations when
- profitable. expand_clrmem contains similar code. The code depends upon
+ profitable. expand_setmem contains similar code. The code depends upon
architecture, block size and alignment, but always has the same
overall structure:
if (CONST_INT_P (expected_size_exp) && count == 0)
expected_size = INTVAL (expected_size_exp);
+ /* Make sure we don't need to care about overflow later on. */
+ if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
+ return 0;
+
/* Step 0: Decide on preferred algorithm, desired alignment and
size of chunks to be copied by main loop. */
/* Alignment code needs count to be in register. */
if (CONST_INT_P (count_exp) && desired_align > align)
- {
- enum machine_mode mode = SImode;
- if (TARGET_64BIT && (count & ~0xffffffff))
- mode = DImode;
- count_exp = force_reg (mode, count_exp);
- }
+ count_exp = force_reg (counter_mode (count_exp), count_exp);
gcc_assert (desired_align >= 1 && align >= 1);
/* Ensure that alignment prologue won't copy past end of block. */
Make sure it is power of 2. */
epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
- label = gen_label_rtx ();
- emit_cmp_and_jump_insns (count_exp,
- GEN_INT (epilogue_size_needed),
- LTU, 0, counter_mode (count_exp), 1, label);
- if (GET_CODE (count_exp) == CONST_INT)
- ;
- else if (expected_size == -1 || expected_size < epilogue_size_needed)
- predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ if (CONST_INT_P (count_exp))
+ {
+ if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
+ goto epilogue;
+ }
else
- predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (count_exp,
+ GEN_INT (epilogue_size_needed),
+ LTU, 0, counter_mode (count_exp), 1, label);
+ if (expected_size == -1 || expected_size < epilogue_size_needed)
+ predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ else
+ predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ }
}
+
/* Emit code to decide on runtime whether library call or inline should be
used. */
if (dynamic_check != -1)
{
- rtx hot_label = gen_label_rtx ();
- jump_around_label = gen_label_rtx ();
- emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
- LEU, 0, GET_MODE (count_exp), 1, hot_label);
- predict_jump (REG_BR_PROB_BASE * 90 / 100);
- emit_block_move_via_libcall (dst, src, count_exp, false);
- emit_jump (jump_around_label);
- emit_label (hot_label);
+ if (CONST_INT_P (count_exp))
+ {
+ if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
+ {
+ emit_block_move_via_libcall (dst, src, count_exp, false);
+ count_exp = const0_rtx;
+ goto epilogue;
+ }
+ }
+ else
+ {
+ rtx hot_label = gen_label_rtx ();
+ jump_around_label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
+ LEU, 0, GET_MODE (count_exp), 1, hot_label);
+ predict_jump (REG_BR_PROB_BASE * 90 / 100);
+ emit_block_move_via_libcall (dst, src, count_exp, false);
+ emit_jump (jump_around_label);
+ emit_label (hot_label);
+ }
}
/* Step 2: Alignment prologue. */
}
/* Step 4: Epilogue to copy the remaining bytes. */
-
+ epilogue:
if (label)
{
/* When the main loop is done, COUNT_EXP might hold original count,
if (CONST_INT_P (expected_size_exp) && count == 0)
expected_size = INTVAL (expected_size_exp);
+ /* Make sure we don't need to care about overflow later on. */
+ if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
+ return 0;
+
/* Step 0: Decide on preferred algorithm, desired alignment and
size of chunks to be copied by main loop. */
else
{
rtx unspec;
+
+ /* Can't use this if the user has appropriated eax, ecx, or edi. */
+ if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
+ return false;
+
scratch2 = gen_reg_rtx (Pmode);
scratch3 = gen_reg_rtx (Pmode);
scratch4 = force_reg (Pmode, constm1_rtx);
if (TARGET_64BIT && INTVAL (callarg2) >= 0)
{
- rtx al = gen_rtx_REG (QImode, 0);
+ rtx al = gen_rtx_REG (QImode, AX_REG);
emit_move_insn (al, callarg2);
use_reg (&use, al);
}
static int
ia32_multipass_dfa_lookahead (void)
{
- if (ix86_tune == PROCESSOR_PENTIUM)
- return 2;
+ switch (ix86_tune)
+ {
+ case PROCESSOR_PENTIUM:
+ return 2;
- if (ix86_tune == PROCESSOR_PENTIUMPRO
- || ix86_tune == PROCESSOR_K6)
- return 1;
+ case PROCESSOR_PENTIUMPRO:
+ case PROCESSOR_K6:
+ return 1;
- else
- return 0;
+ default:
+ return 0;
+ }
}
\f
int
ix86_constant_alignment (tree exp, int align)
{
- if (TREE_CODE (exp) == REAL_CST)
+ if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
+ || TREE_CODE (exp) == INTEGER_CST)
{
if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
return 64;
IX86_BUILTIN_RCPPS,
IX86_BUILTIN_RCPSS,
IX86_BUILTIN_RSQRTPS,
+ IX86_BUILTIN_RSQRTPS_NR,
IX86_BUILTIN_RSQRTSS,
IX86_BUILTIN_RSQRTF,
IX86_BUILTIN_SQRTPS,
+ IX86_BUILTIN_SQRTPS_NR,
IX86_BUILTIN_SQRTSS,
IX86_BUILTIN_UNPCKHPS,
{ OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
-
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
-
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
-
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
static const struct builtin_description bdesc_1arg[] =
{
+ /* SSE */
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS_NR, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
+ /* SSE2 */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+ tree V1DI_type_node
+ = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
tree V2DI_type_node
= build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
tree v4hi_ftype_v4hi_int
= build_function_type_list (V4HI_type_node,
V4HI_type_node, integer_type_node, NULL_TREE);
- tree v4hi_ftype_v4hi_di
- = build_function_type_list (V4HI_type_node,
- V4HI_type_node, long_long_unsigned_type_node,
- NULL_TREE);
- tree v2si_ftype_v2si_di
+ tree v2si_ftype_v2si_int
= build_function_type_list (V2SI_type_node,
- V2SI_type_node, long_long_unsigned_type_node,
- NULL_TREE);
+ V2SI_type_node, integer_type_node, NULL_TREE);
+ tree v1di_ftype_v1di_int
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, integer_type_node, NULL_TREE);
+
tree void_ftype_void
= build_function_type (void_type_node, void_list_node);
tree void_ftype_unsigned
tree v2si_ftype_v2si_v2si
= build_function_type_list (V2SI_type_node,
V2SI_type_node, V2SI_type_node, NULL_TREE);
- tree di_ftype_di_di
- = build_function_type_list (long_long_unsigned_type_node,
- long_long_unsigned_type_node,
- long_long_unsigned_type_node, NULL_TREE);
+ tree v1di_ftype_v1di_v1di
+ = build_function_type_list (V1DI_type_node,
+ V1DI_type_node, V1DI_type_node, NULL_TREE);
tree di_ftype_di_di_int
= build_function_type_list (long_long_unsigned_type_node,
tree v4si_ftype_v8hi_v8hi
= build_function_type_list (V4SI_type_node,
V8HI_type_node, V8HI_type_node, NULL_TREE);
- tree di_ftype_v8qi_v8qi
- = build_function_type_list (long_long_unsigned_type_node,
+ tree v1di_ftype_v8qi_v8qi
+ = build_function_type_list (V1DI_type_node,
V8QI_type_node, V8QI_type_node, NULL_TREE);
- tree di_ftype_v2si_v2si
- = build_function_type_list (long_long_unsigned_type_node,
+ tree v1di_ftype_v2si_v2si
+ = build_function_type_list (V1DI_type_node,
V2SI_type_node, V2SI_type_node, NULL_TREE);
tree v2di_ftype_v16qi_v16qi
= build_function_type_list (V2DI_type_node,
case V2SImode:
type = v2si_ftype_v2si_v2si;
break;
- case DImode:
- type = di_ftype_di_di;
+ case V1DImode:
+ type = v1di_ftype_v1di_v1di;
break;
default:
/* Add the remaining MMX insns with somewhat more complicated types. */
def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
-
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSLLW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSLLD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ);
+
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRLW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRLD);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ);
+
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRAW);
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRAD);
def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
- def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", v1di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
+ def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS_NR);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
ftype = build_function_type_list (float_type_node,
float_type_node,
NULL_TREE);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
+ def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS_NR);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
- def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", v1di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
case CODE_FOR_sse4_1_roundsd:
case CODE_FOR_sse4_1_roundss:
+ case CODE_FOR_sse4_1_blendps:
error ("the third argument must be a 4-bit immediate");
return const0_rtx;
+ case CODE_FOR_sse4_1_blendpd:
+ error ("the third argument must be a 2-bit immediate");
+ return const0_rtx;
+
default:
error ("the third argument must be an 8-bit immediate");
return const0_rtx;
emit_insn (pat);
return target;
- case IX86_BUILTIN_PSLLWI128:
- icode = CODE_FOR_ashlv8hi3;
- goto do_pshifti;
- case IX86_BUILTIN_PSLLDI128:
- icode = CODE_FOR_ashlv4si3;
- goto do_pshifti;
- case IX86_BUILTIN_PSLLQI128:
- icode = CODE_FOR_ashlv2di3;
- goto do_pshifti;
- case IX86_BUILTIN_PSRAWI128:
- icode = CODE_FOR_ashrv8hi3;
- goto do_pshifti;
- case IX86_BUILTIN_PSRADI128:
- icode = CODE_FOR_ashrv4si3;
- goto do_pshifti;
- case IX86_BUILTIN_PSRLWI128:
- icode = CODE_FOR_lshrv8hi3;
- goto do_pshifti;
- case IX86_BUILTIN_PSRLDI128:
- icode = CODE_FOR_lshrv4si3;
- goto do_pshifti;
- case IX86_BUILTIN_PSRLQI128:
- icode = CODE_FOR_lshrv2di3;
- goto do_pshifti;
- do_pshifti:
- arg0 = CALL_EXPR_ARG (exp, 0);
- arg1 = CALL_EXPR_ARG (exp, 1);
- op0 = expand_normal (arg0);
- op1 = expand_normal (arg1);
-
- if (!CONST_INT_P (op1))
- {
- error ("shift must be an immediate");
- return const0_rtx;
- }
- if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
- op1 = GEN_INT (255);
-
- tmode = insn_data[icode].operand[0].mode;
- mode1 = insn_data[icode].operand[1].mode;
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
- op0 = copy_to_reg (op0);
-
- target = gen_reg_rtx (tmode);
- pat = GEN_FCN (icode) (target, op0, op1);
- if (!pat)
- return 0;
- emit_insn (pat);
- return target;
+ case IX86_BUILTIN_PSLLW:
+ case IX86_BUILTIN_PSLLWI:
+ icode = CODE_FOR_mmx_ashlv4hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSLLD:
+ case IX86_BUILTIN_PSLLDI:
+ icode = CODE_FOR_mmx_ashlv2si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSLLQ:
+ case IX86_BUILTIN_PSLLQI:
+ icode = CODE_FOR_mmx_ashlv1di3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRAW:
+ case IX86_BUILTIN_PSRAWI:
+ icode = CODE_FOR_mmx_ashrv4hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRAD:
+ case IX86_BUILTIN_PSRADI:
+ icode = CODE_FOR_mmx_ashrv2si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLW:
+ case IX86_BUILTIN_PSRLWI:
+ icode = CODE_FOR_mmx_lshrv4hi3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLD:
+ case IX86_BUILTIN_PSRLDI:
+ icode = CODE_FOR_mmx_lshrv2si3;
+ goto do_pshift;
+ case IX86_BUILTIN_PSRLQ:
+ case IX86_BUILTIN_PSRLQI:
+ icode = CODE_FOR_mmx_lshrv1di3;
+ goto do_pshift;
case IX86_BUILTIN_PSLLW128:
+ case IX86_BUILTIN_PSLLWI128:
icode = CODE_FOR_ashlv8hi3;
goto do_pshift;
case IX86_BUILTIN_PSLLD128:
+ case IX86_BUILTIN_PSLLDI128:
icode = CODE_FOR_ashlv4si3;
goto do_pshift;
case IX86_BUILTIN_PSLLQ128:
+ case IX86_BUILTIN_PSLLQI128:
icode = CODE_FOR_ashlv2di3;
goto do_pshift;
case IX86_BUILTIN_PSRAW128:
+ case IX86_BUILTIN_PSRAWI128:
icode = CODE_FOR_ashrv8hi3;
goto do_pshift;
case IX86_BUILTIN_PSRAD128:
+ case IX86_BUILTIN_PSRADI128:
icode = CODE_FOR_ashrv4si3;
goto do_pshift;
case IX86_BUILTIN_PSRLW128:
+ case IX86_BUILTIN_PSRLWI128:
icode = CODE_FOR_lshrv8hi3;
goto do_pshift;
case IX86_BUILTIN_PSRLD128:
+ case IX86_BUILTIN_PSRLDI128:
icode = CODE_FOR_lshrv4si3;
goto do_pshift;
case IX86_BUILTIN_PSRLQ128:
+ case IX86_BUILTIN_PSRLQI128:
icode = CODE_FOR_lshrv2di3;
- goto do_pshift;
+
do_pshift:
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
op0 = copy_to_reg (op0);
- op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
+ if (!CONST_INT_P (op1))
+ op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
+
if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
op1 = copy_to_reg (op1);
case BUILT_IN_SQRTF:
if (out_mode == SFmode && out_n == 4
&& in_mode == SFmode && in_n == 4)
- return ix86_builtins[IX86_BUILTIN_SQRTPS];
+ return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
break;
case BUILT_IN_LRINT:
return NULL_TREE;
}
-/* Handler for an ACML-style interface to a library with vectorized
- intrinsics. */
+/* Handler for an SVML-style interface to
+ a library with vectorized intrinsics. */
+
+static tree
+ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
+{
+ char name[20];
+ tree fntype, new_fndecl, args;
+ unsigned arity;
+ const char *bname;
+ enum machine_mode el_mode, in_mode;
+ int n, in_n;
+
+ /* The SVML is suitable for unsafe math only. */
+ if (!flag_unsafe_math_optimizations)
+ return NULL_TREE;
+
+ el_mode = TYPE_MODE (TREE_TYPE (type_out));
+ n = TYPE_VECTOR_SUBPARTS (type_out);
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
+ if (el_mode != in_mode
+ || n != in_n)
+ return NULL_TREE;
+
+ switch (fn)
+ {
+ case BUILT_IN_EXP:
+ case BUILT_IN_LOG:
+ case BUILT_IN_LOG10:
+ case BUILT_IN_POW:
+ case BUILT_IN_TANH:
+ case BUILT_IN_TAN:
+ case BUILT_IN_ATAN:
+ case BUILT_IN_ATAN2:
+ case BUILT_IN_ATANH:
+ case BUILT_IN_CBRT:
+ case BUILT_IN_SINH:
+ case BUILT_IN_SIN:
+ case BUILT_IN_ASINH:
+ case BUILT_IN_ASIN:
+ case BUILT_IN_COSH:
+ case BUILT_IN_COS:
+ case BUILT_IN_ACOSH:
+ case BUILT_IN_ACOS:
+ if (el_mode != DFmode || n != 2)
+ return NULL_TREE;
+ break;
+
+ case BUILT_IN_EXPF:
+ case BUILT_IN_LOGF:
+ case BUILT_IN_LOG10F:
+ case BUILT_IN_POWF:
+ case BUILT_IN_TANHF:
+ case BUILT_IN_TANF:
+ case BUILT_IN_ATANF:
+ case BUILT_IN_ATAN2F:
+ case BUILT_IN_ATANHF:
+ case BUILT_IN_CBRTF:
+ case BUILT_IN_SINHF:
+ case BUILT_IN_SINF:
+ case BUILT_IN_ASINHF:
+ case BUILT_IN_ASINF:
+ case BUILT_IN_COSHF:
+ case BUILT_IN_COSF:
+ case BUILT_IN_ACOSHF:
+ case BUILT_IN_ACOSF:
+ if (el_mode != SFmode || n != 4)
+ return NULL_TREE;
+ break;
+
+ default:
+ return NULL_TREE;
+ }
+
+ bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
+
+ if (fn == BUILT_IN_LOGF)
+ strcpy (name, "vmlsLn4");
+ else if (fn == BUILT_IN_LOG)
+ strcpy (name, "vmldLn2");
+ else if (n == 4)
+ {
+ sprintf (name, "vmls%s", bname+10);
+ name[strlen (name)-1] = '4';
+ }
+ else
+ sprintf (name, "vmld%s2", bname+10);
+
+ /* Convert to uppercase. */
+ name[4] &= ~0x20;
+
+ arity = 0;
+ for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
+ args = TREE_CHAIN (args))
+ arity++;
+
+ if (arity == 1)
+ fntype = build_function_type_list (type_out, type_in, NULL);
+ else
+ fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+ /* Build a function declaration for the vectorized function. */
+ new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
+ TREE_PUBLIC (new_fndecl) = 1;
+ DECL_EXTERNAL (new_fndecl) = 1;
+ DECL_IS_NOVOPS (new_fndecl) = 1;
+ TREE_READONLY (new_fndecl) = 1;
+
+ return new_fndecl;
+}
+
+/* Handler for an ACML-style interface to
+ a library with vectorized intrinsics. */
static tree
ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
switch (fn)
{
/* Vectorized version of sqrt to rsqrt conversion. */
- case IX86_BUILTIN_SQRTPS:
- return ix86_builtins[IX86_BUILTIN_RSQRTPS];
+ case IX86_BUILTIN_SQRTPS_NR:
+ return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
default:
return NULL_TREE;
where integer modes in MMX/SSE registers are not tieable
because of missing QImode and HImode moves to, from or between
MMX/SSE registers. */
- return MAX (ix86_cost->mmxsse_to_integer, 8);
+ return MAX (8, ix86_cost->mmxsse_to_integer);
if (MAYBE_FLOAT_CLASS_P (class1))
return ix86_cost->fp_move;
nbits = 7;
/* Compute costs correctly for widening multiplication. */
- if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
+ if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
&& GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
== GET_MODE_SIZE (mode))
{
{
tree type = TREE_TYPE (function);
bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
+ int nregs;
if (TARGET_64BIT)
{
return gen_rtx_REG (DImode, parm_regs[aggr]);
}
- if (ix86_function_regparm (type, function) > 0 && !stdarg_p (type))
+ nregs = ix86_function_regparm (type, function);
+
+ if (nregs > 0 && !stdarg_p (type))
{
- int regno = 0;
+ int regno;
+
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
- regno = 2;
+ regno = aggr ? DX_REG : CX_REG;
+ else
+ {
+ regno = AX_REG;
+ if (aggr)
+ {
+ regno = DX_REG;
+ if (nregs == 1)
+ return gen_rtx_MEM (SImode,
+ plus_constant (stack_pointer_rtx, 4));
+ }
+ }
return gen_rtx_REG (SImode, regno);
}
{
/* Put the this parameter into %eax. */
xops[0] = this_param;
- xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
+ xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
}
else
tmp = gen_rtx_REG (DImode, R10_REG);
else
{
- int tmp_regno = 2 /* ECX */;
+ int tmp_regno = CX_REG;
if (lookup_attribute ("fastcall",
TYPE_ATTRIBUTES (TREE_TYPE (function))))
- tmp_regno = 0 /* EAX */;
+ tmp_regno = AX_REG;
tmp = gen_rtx_REG (SImode, tmp_regno);
}
else
#endif /* TARGET_MACHO */
{
- tmp = gen_rtx_REG (SImode, 2 /* ECX */);
+ tmp = gen_rtx_REG (SImode, CX_REG);
output_set_got (tmp, NULL_RTX);
xops[1] = tmp;
if (X86_FILE_START_FLTUSED)
fputs ("\t.global\t__fltused\n", asm_out_file);
if (ix86_asm_dialect == ASM_INTEL)
- fputs ("\t.intel_syntax\n", asm_out_file);
+ fputs ("\t.intel_syntax noprefix\n", asm_out_file);
}
int
for (i = 0; i < n_elts; ++i)
{
x = XVECEXP (vals, 0, i);
- if (!CONSTANT_P (x))
+ if (!(CONST_INT_P (x)
+ || GET_CODE (x) == CONST_DOUBLE
+ || GET_CODE (x) == CONST_FIXED))
n_var++, one_var = i;
else if (x != CONST0_RTX (inner_mode))
all_const_zero = false;
/* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
- /* x0 = 1./b estimate */
+ /* x0 = rcp(b) estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
UNSPEC_RCP)));
void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
bool recip)
{
- rtx x0, e0, e1, e2, e3, three, half, zero, mask;
+ rtx x0, e0, e1, e2, e3, mthree, mhalf;
+ REAL_VALUE_TYPE r;
x0 = gen_reg_rtx (mode);
e0 = gen_reg_rtx (mode);
e2 = gen_reg_rtx (mode);
e3 = gen_reg_rtx (mode);
- three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
- half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
+ real_from_integer (&r, VOIDmode, -3, -1, 0);
+ mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
- mask = gen_reg_rtx (mode);
+ real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
+ mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
if (VECTOR_MODE_P (mode))
{
- three = ix86_build_const_vector (SFmode, true, three);
- half = ix86_build_const_vector (SFmode, true, half);
+ mthree = ix86_build_const_vector (SFmode, true, mthree);
+ mhalf = ix86_build_const_vector (SFmode, true, mhalf);
}
- three = force_reg (mode, three);
- half = force_reg (mode, half);
-
- zero = force_reg (mode, CONST0_RTX(mode));
+ /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
+ rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
- /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
- 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
-
- /* Compare a to zero. */
- emit_insn (gen_rtx_SET (VOIDmode, mask,
- gen_rtx_NE (mode, a, zero)));
-
- /* x0 = 1./sqrt(a) estimate */
+ /* x0 = rsqrt(a) estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
UNSPEC_RSQRT)));
- /* Filter out infinity. */
- if (VECTOR_MODE_P (mode))
- emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
- gen_rtx_AND (mode,
- gen_lowpart (V4SFmode, x0),
- gen_lowpart (V4SFmode, mask))));
- else
- emit_insn (gen_rtx_SET (VOIDmode, x0,
- gen_rtx_AND (mode, x0, mask)));
+
+ /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
+ if (!recip)
+ {
+ rtx zero, mask;
+
+ zero = gen_reg_rtx (mode);
+ mask = gen_reg_rtx (mode);
+
+ zero = force_reg (mode, CONST0_RTX(mode));
+ emit_insn (gen_rtx_SET (VOIDmode, mask,
+ gen_rtx_NE (mode, zero, a)));
+
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_AND (mode, x0, mask)));
+ }
/* e0 = x0 * a */
emit_insn (gen_rtx_SET (VOIDmode, e0,
/* e1 = e0 * x0 */
emit_insn (gen_rtx_SET (VOIDmode, e1,
gen_rtx_MULT (mode, e0, x0)));
- /* e2 = 3. - e1 */
+
+ /* e2 = e1 - 3. */
+ mthree = force_reg (mode, mthree);
emit_insn (gen_rtx_SET (VOIDmode, e2,
- gen_rtx_MINUS (mode, three, e1)));
+ gen_rtx_PLUS (mode, e1, mthree)));
+
+ mhalf = force_reg (mode, mhalf);
if (recip)
- /* e3 = .5 * x0 */
+ /* e3 = -.5 * x0 */
emit_insn (gen_rtx_SET (VOIDmode, e3,
- gen_rtx_MULT (mode, half, x0)));
+ gen_rtx_MULT (mode, x0, mhalf)));
else
- /* e3 = .5 * e0 */
+ /* e3 = -.5 * e0 */
emit_insn (gen_rtx_SET (VOIDmode, e3,
- gen_rtx_MULT (mode, half, e0)));
+ gen_rtx_MULT (mode, e0, mhalf)));
/* ret = e2 * e3 */
emit_insn (gen_rtx_SET (VOIDmode, res,
gen_rtx_MULT (mode, e2, e3)));
/* Validate whether a SSE5 instruction is valid or not.
OPERANDS is the array of operands.
NUM is the number of operands.
- USES_OC0 is true if the instruction uses OC0 and provides 4 varients.
+ USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
NUM_MEMORY is the maximum number of memory operands to accept. */
-bool ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
+bool
+ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
{
int mem_mask;
int mem_count;
else if (num == 4 && num_memory == 2)
{
/* If there are two memory operations, we can load one of the memory ops
- into the destination register. This is for optimizating the
+ into the destination register. This is for optimizing the
multiply/add ops, which the combiner has optimized both the multiply
and the add insns to have a memory operation. We have to be careful
that the destination doesn't overlap with the inputs. */
#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
+
#undef TARGET_MD_ASM_CLOBBERS
#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers