/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
Free Software Foundation, Inc.
This file is part of GCC.
#include "df.h"
#include "tm-constrs.h"
#include "params.h"
+#include "cselib.h"
static int x86_builtin_vectorization_cost (bool);
static rtx legitimize_dllimport_symbol (rtx, bool);
2, /* cost of reg,reg fld/fst */
{6, 6, 6}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
- {4, 4, 4}, /* cost of loading integer registers */
+ {4, 4, 4}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
2, /* cost of moving MMX register */
{6, 6}, /* cost of loading MMX registers
in SImode and DImode */
/* X86_TUNE_ZERO_EXTEND_WITH_AND */
m_486 | m_PENT,
- /* X86_TUNE_USE_BIT_TEST */
- m_386,
-
/* X86_TUNE_UNROLL_STRLEN */
m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
replacement is long decoded, so this split helps here as well. */
m_K6,
+ /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
+ from FP to FP. */
+ m_AMDFAM10 | m_GENERIC,
+
/* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
from integer to FP. */
m_AMDFAM10,
rtx ix86_compare_op1 = NULL_RTX;
rtx ix86_compare_emitted = NULL_RTX;
-/* Size of the register save area. */
-#define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
-
/* Define the structure for the machine field in struct function. */
struct stack_local_entry GTY(())
<- HARD_FRAME_POINTER
[saved regs]
+ [padding0]
+
+ [saved SSE regs]
+
[padding1] \
)
[va_arg registers] (
*/
struct ix86_frame
{
+ int padding0;
+ int nsseregs;
int nregs;
int padding1;
int va_arg_size;
enum fpmath_unit ix86_fpmath;
/* Which cpu are we scheduling for. */
+enum attr_cpu ix86_schedule;
+
+/* Which cpu are we optimizing for. */
enum processor_type ix86_tune;
/* Which instruction set architecture to use. */
static void ix86_compute_frame_layout (struct ix86_frame *);
static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
rtx, rtx, int);
+static void ix86_add_new_builtins (int);
enum ix86_function_specific_strings
{
static void ix86_function_specific_restore (struct cl_target_option *);
static void ix86_function_specific_print (FILE *, int,
struct cl_target_option *);
-static bool ix86_valid_option_attribute_p (tree, tree, tree, int);
-static bool ix86_valid_option_attribute_inner_p (tree, char *[]);
+static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
+static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
static bool ix86_can_inline_p (tree, tree);
static void ix86_set_current_function (tree);
{ "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
};
- const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0])
- + sizeof (flag_opts) / sizeof (flag_opts[0])
- + 6)][2];
+ const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
char isa_other[40];
char target_other[40];
}
/* Pick out the options in isa options. */
- for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++)
+ for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
{
if ((isa & isa_opts[i].mask) != 0)
{
}
/* Add flag options. */
- for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++)
+ for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
{
if ((flags & flag_opts[i].mask) != 0)
{
if (num == 0)
return NULL;
- gcc_assert (num < sizeof (opts) / sizeof (opts[0]));
+ gcc_assert (num < ARRAY_SIZE (opts));
/* Size the string. */
len = 0;
{
const char *const name; /* processor name or nickname. */
const enum processor_type processor;
+ const enum attr_cpu schedule;
const unsigned /*enum pta_flags*/ flags;
}
const processor_alias_table[] =
{
- {"i386", PROCESSOR_I386, 0},
- {"i486", PROCESSOR_I486, 0},
- {"i586", PROCESSOR_PENTIUM, 0},
- {"pentium", PROCESSOR_PENTIUM, 0},
- {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
- {"winchip-c6", PROCESSOR_I486, PTA_MMX},
- {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
- {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
- {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
- {"i686", PROCESSOR_PENTIUMPRO, 0},
- {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
- {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
- {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
- {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
- {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
- {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
- {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
- {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
- {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
- | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_CX16 | PTA_NO_SAHF)},
- {"core2", PROCESSOR_CORE2, (PTA_64BIT
- | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSSE3
- | PTA_CX16)},
- {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- |PTA_PREFETCH_SSE)},
- {"k6", PROCESSOR_K6, PTA_MMX},
- {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
- {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
- {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_PREFETCH_SSE)},
- {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_PREFETCH_SSE)},
- {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE)},
- {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE)},
- {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE)},
- {"x86-64", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_SSE | PTA_SSE2
- | PTA_NO_SAHF)},
- {"k8", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2
- | PTA_NO_SAHF)},
- {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_NO_SAHF)},
- {"opteron", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2
- | PTA_NO_SAHF)},
- {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_NO_SAHF)},
- {"athlon64", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2
- | PTA_NO_SAHF)},
- {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_NO_SAHF)},
- {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2
- | PTA_NO_SAHF)},
- {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSE4A
- | PTA_CX16 | PTA_ABM)},
- {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
- | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
- | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSE4A
- | PTA_CX16 | PTA_ABM)},
- {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
- {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
+ {"i386", PROCESSOR_I386, CPU_NONE, 0},
+ {"i486", PROCESSOR_I486, CPU_NONE, 0},
+ {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
+ {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
+ {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
+ {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
+ {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
+ {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
+ {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
+ {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
+ {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
+ {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
+ {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+ PTA_MMX | PTA_SSE},
+ {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+ PTA_MMX | PTA_SSE},
+ {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
+ PTA_MMX | PTA_SSE | PTA_SSE2},
+ {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
+ PTA_MMX |PTA_SSE | PTA_SSE2},
+ {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
+ PTA_MMX | PTA_SSE | PTA_SSE2},
+ {"prescott", PROCESSOR_NOCONA, CPU_NONE,
+ PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
+ {"nocona", PROCESSOR_NOCONA, CPU_NONE,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_CX16 | PTA_NO_SAHF},
+ {"core2", PROCESSOR_CORE2, CPU_CORE2,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_CX16},
+ {"geode", PROCESSOR_GEODE, CPU_GEODE,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
+ {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
+ {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
+ {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
+ {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
+ {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
+ {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
+ {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
+ {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
+ {"x86-64", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
+ {"k8", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF},
+ {"k8-sse3", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
+ {"opteron", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF},
+ {"opteron-sse3", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
+ {"athlon64", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF},
+ {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
+ {"athlon-fx", PROCESSOR_K8, CPU_K8,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_NO_SAHF},
+ {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
+ {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
+ {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
+ 0 /* flags are only used for -march switch. */ },
+ {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
+ PTA_64BIT /* flags are only used for -march switch. */ },
};
int const pta_size = ARRAY_SIZE (processor_alias_table);
/* Set up prefix/suffix so the error messages refer to either the command
- line argument, or the attribute(option). */
+ line argument, or the attribute(target). */
if (main_args_p)
{
prefix = "-m";
for (i = 0; i < pta_size; i++)
if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
{
+ ix86_schedule = processor_alias_table[i].schedule;
ix86_arch = processor_alias_table[i].processor;
/* Default cpu tuning to the architecture. */
ix86_tune = ix86_arch;
for (i = 0; i < pta_size; i++)
if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
{
+ ix86_schedule = processor_alias_table[i].schedule;
ix86_tune = processor_alias_table[i].processor;
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
{
if (! strcmp (ix86_tune_string,
processor_alias_table[i].name))
break;
+ ix86_schedule = processor_alias_table[i].schedule;
ix86_tune = processor_alias_table[i].processor;
}
else
ix86_function_specific_save (struct cl_target_option *ptr)
{
gcc_assert (IN_RANGE (ix86_arch, 0, 255));
+ gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
gcc_assert (IN_RANGE (ix86_tune, 0, 255));
gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
ptr->arch = ix86_arch;
+ ptr->schedule = ix86_schedule;
ptr->tune = ix86_tune;
ptr->fpmath = ix86_fpmath;
ptr->branch_cost = ix86_branch_cost;
int i;
ix86_arch = ptr->arch;
+ ix86_schedule = ptr->schedule;
ix86_tune = ptr->tune;
ix86_fpmath = ptr->fpmath;
ix86_branch_cost = ptr->branch_cost;
}
\f
-/* Inner function to process the attribute((option(...))), take an argument and
+/* Inner function to process the attribute((target(...))), take an argument and
set the current options from the argument. If we have a list, recursively go
over the list. */
static bool
-ix86_valid_option_attribute_inner_p (tree args, char *p_strings[])
+ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
{
char *next_optstr;
bool ret = true;
IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
IX86_ATTR_ISA ("abm", OPT_mabm),
IX86_ATTR_ISA ("aes", OPT_maes),
+ IX86_ATTR_ISA ("avx", OPT_mavx),
IX86_ATTR_ISA ("mmx", OPT_mmmx),
IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
for (; args; args = TREE_CHAIN (args))
if (TREE_VALUE (args)
- && !ix86_valid_option_attribute_inner_p (TREE_VALUE (args), p_strings))
+ && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
ret = false;
return ret;
/* Find the option. */
ch = *p;
opt = N_OPTS;
- for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++)
+ for (i = 0; i < ARRAY_SIZE (attrs); i++)
{
type = attrs[i].type;
opt_len = attrs[i].len;
/* Process the option. */
if (opt == N_OPTS)
{
- error ("attribute(option(\"%s\")) is unknown", orig_p);
+ error ("attribute(target(\"%s\")) is unknown", orig_p);
ret = false;
}
/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
tree
-ix86_valid_option_attribute_tree (tree args)
+ix86_valid_target_attribute_tree (tree args)
{
const char *orig_arch_string = ix86_arch_string;
const char *orig_tune_string = ix86_tune_string;
= TREE_TARGET_OPTION (target_option_default_node);
/* Process each of the options on the chain. */
- if (! ix86_valid_option_attribute_inner_p (args, option_strings))
+ if (! ix86_valid_target_attribute_inner_p (args, option_strings))
return NULL_TREE;
/* If the changed options are different from the default, rerun override_options,
/* Do any overrides, such as arch=xxx, or tune=xxx support. */
override_options (false);
+ /* Add any builtin functions with the new isa if any. */
+ ix86_add_new_builtins (ix86_isa_flags);
+
/* Save the current options unless we are validating options for
#pragma. */
t = build_target_option_node ();
return t;
}
-/* Hook to validate attribute((option("string"))). */
+/* Hook to validate attribute((target("string"))). */
static bool
-ix86_valid_option_attribute_p (tree fndecl,
+ix86_valid_target_attribute_p (tree fndecl,
tree ARG_UNUSED (name),
tree args,
int ARG_UNUSED (flags))
{
- struct cl_target_option cur_opts;
+ struct cl_target_option cur_target;
bool ret = true;
- tree new_opts;
-
- cl_target_option_save (&cur_opts);
- new_opts = ix86_valid_option_attribute_tree (args);
- if (!new_opts)
+ tree old_optimize = build_optimization_node ();
+ tree new_target, new_optimize;
+ tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
+
+ /* If the function changed the optimization levels as well as setting target
+ options, start with the optimizations specified. */
+ if (func_optimize && func_optimize != old_optimize)
+ cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
+
+ /* The target attributes may also change some optimization flags, so update
+ the optimization options if necessary. */
+ cl_target_option_save (&cur_target);
+ new_target = ix86_valid_target_attribute_tree (args);
+ new_optimize = build_optimization_node ();
+
+ if (!new_target)
ret = false;
else if (fndecl)
- DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_opts;
+ {
+ DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
+
+ if (old_optimize != new_optimize)
+ DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
+ }
+
+ cl_target_option_restore (&cur_target);
+
+ if (old_optimize != new_optimize)
+ cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
- cl_target_option_restore (&cur_opts);
return ret;
}
/* Dllimport'd functions are also called indirectly. */
if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
+ && !TARGET_64BIT
&& decl && DECL_DLLIMPORT_P (decl)
&& ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
return false;
+ /* If we need to align the outgoing stack, then sibcalling would
+ unalign the stack, which may break the called function. */
+ if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
+ return false;
+
/* Otherwise okay. That also includes certain types of indirect calls. */
return true;
}
int
ix86_reg_parm_stack_space (const_tree fndecl)
{
- int call_abi = 0;
- /* For libcalls it is possible that there is no fndecl at hand.
- Therefore assume for this case the default abi of the target. */
- if (!fndecl)
- call_abi = DEFAULT_ABI;
- else
+ int call_abi = SYSV_ABI;
+ if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
call_abi = ix86_function_abi (fndecl);
- if (call_abi == 1)
+ else
+ call_abi = ix86_function_type_abi (fndecl);
+ if (call_abi == MS_ABI)
return 32;
return 0;
}
/* Implementation of call abi switching target hook. Specific to FNDECL
the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
- for more details.
- To prevent redudant calls of costy function init_regs (), it checks not to
- reset register usage for default abi. */
+ for more details. */
void
ix86_call_abi_override (const_tree fndecl)
{
cfun->machine->call_abi = DEFAULT_ABI;
else
cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
- if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
- {
- if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
- {
- call_used_regs[4 /*RSI*/] = 0;
- call_used_regs[5 /*RDI*/] = 0;
- init_regs ();
- }
- }
- else if (TARGET_64BIT)
- {
- if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
- {
- call_used_regs[4 /*RSI*/] = 1;
- call_used_regs[5 /*RDI*/] = 1;
- init_regs ();
- }
- }
+}
+
+/* MS and SYSV ABI have different set of call used registers. Avoid expensive
+ re-initialization of init_regs each time we switch function context since
+ this is needed only during RTL expansion. */
+static void
+ix86_maybe_switch_abi (void)
+{
+ if (TARGET_64BIT &&
+ call_used_regs[4 /*RSI*/] == (cfun->machine->call_abi == MS_ABI))
+ init_regs ();
}
/* Initialize a variable CUM of type CUMULATIVE_ARGS
struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
memset (cum, 0, sizeof (*cum));
- cum->call_abi = ix86_function_type_abi (fntype);
+ if (fndecl)
+ cum->call_abi = ix86_function_abi (fndecl);
+ else
+ cum->call_abi = ix86_function_type_abi (fntype);
/* Set up the number of registers to use for passing arguments. */
+
+ if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
+ sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
cum->nregs = ix86_regparm;
if (TARGET_64BIT)
{
/* The partial classes are now full classes. */
if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
subclasses[0] = X86_64_SSE_CLASS;
- if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
+ if (subclasses[0] == X86_64_INTEGERSI_CLASS
+ && !((bit_offset % 64) == 0 && bytes == 4))
subclasses[0] = X86_64_INTEGER_CLASS;
for (i = 0; i < words; i++)
case CSImode:
case CHImode:
case CQImode:
- if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
- classes[0] = X86_64_INTEGERSI_CLASS;
- else
- classes[0] = X86_64_INTEGER_CLASS;
- return 1;
+ {
+ int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
+
+ if (size <= 32)
+ {
+ classes[0] = X86_64_INTEGERSI_CLASS;
+ return 1;
+ }
+ else if (size <= 64)
+ {
+ classes[0] = X86_64_INTEGER_CLASS;
+ return 1;
+ }
+ else if (size <= 64+32)
+ {
+ classes[0] = X86_64_INTEGER_CLASS;
+ classes[1] = X86_64_INTEGERSI_CLASS;
+ return 2;
+ }
+ else if (size <= 64+64)
+ {
+ classes[0] = classes[1] = X86_64_INTEGER_CLASS;
+ return 2;
+ }
+ else
+ gcc_unreachable ();
+ }
case CDImode:
case TImode:
classes[0] = classes[1] = X86_64_INTEGER_CLASS;
{
unsigned int regno;
- /* Avoid the AL settings for the Unix64 ABI. */
+ /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
+ We use value of -2 to specify that current function call is MSABI. */
if (mode == VOIDmode)
- return constm1_rtx;
+ return GEN_INT (-2);
/* If we've run out of registers, it goes on the stack. */
if (cum->nregs == 0)
int i;
int regparm = ix86_regparm;
- if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
+ if (cum->call_abi != DEFAULT_ABI)
regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
- if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
- return;
+ /* GPR size of varargs save area. */
+ if (cfun->va_list_gpr_size)
+ ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
+ else
+ ix86_varargs_gpr_size = 0;
- /* Indicate to allocate space on the stack for varargs save area. */
- ix86_save_varrargs_registers = 1;
+ /* FPR size of varargs save area. We don't need it if we don't pass
+ anything in SSE registers. */
+ if (cum->sse_nregs && cfun->va_list_fpr_size)
+ ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
+ else
+ ix86_varargs_fpr_size = 0;
+
+ if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
+ return;
save_area = frame_pointer_rtx;
set = get_varargs_alias_set ();
x86_64_int_parameter_registers[i]));
}
- if (cum->sse_nregs && cfun->va_list_fpr_size)
+ if (ix86_varargs_fpr_size)
{
/* Now emit code to save SSE registers. The AX parameter contains number
of SSE parameter registers used to call this function. We use
tmp_reg = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
plus_constant (save_area,
- 8 * X86_64_REGPARM_MAX + 127)));
+ ix86_varargs_gpr_size + 127)));
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
if (stdarg_p (fntype))
function_arg_advance (&next_cum, mode, type, 1);
- if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ if (cum->call_abi == MS_ABI)
setup_incoming_varargs_ms_64 (&next_cum);
else
setup_incoming_varargs_64 (&next_cum);
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
- if (cfun->va_list_fpr_size)
+ if (TARGET_SSE && cfun->va_list_fpr_size)
{
type = TREE_TYPE (fpr);
t = build2 (MODIFY_EXPR, type, fpr,
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
- if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
+ if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
{
/* Find the register save area.
Prologue of the function save it right above stack frame. */
type = TREE_TYPE (sav);
t = make_tree (type, frame_pointer_rtx);
+ if (!ix86_varargs_gpr_size)
+ t = build2 (POINTER_PLUS_EXPR, type, t,
+ size_int (-8 * X86_64_REGPARM_MAX));
t = build2 (MODIFY_EXPR, type, sav, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
f_ovf = TREE_CHAIN (f_fpr);
f_sav = TREE_CHAIN (f_ovf);
+ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
+ build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
valist = build_va_arg_indirect_ref (valist);
- gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
enum machine_mode mode = GET_MODE (reg);
tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
tree addr_type = build_pointer_type (piece_type);
+ tree daddr_type = build_pointer_type_for_mode (piece_type,
+ ptr_mode, true);
tree src_addr, src;
int src_offset;
tree dest_addr, dest;
size_int (src_offset));
src = build_va_arg_indirect_ref (src_addr);
- dest_addr = fold_convert (addr_type, addr);
- dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
+ dest_addr = fold_convert (daddr_type, addr);
+ dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
size_int (INTVAL (XEXP (slot, 1))));
dest = build_va_arg_indirect_ref (dest_addr);
/* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
is what will be referenced by the Mach-O PIC subsystem. */
if (!label)
- ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
+ ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
#endif
(*targetm.asm_out.internal_label) (asm_out_file, "L",
is what will be referenced by the Mach-O PIC subsystem. */
#if TARGET_MACHO
if (!label)
- ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
+ ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
else
targetm.asm_out.internal_label (asm_out_file, "L",
CODE_LABEL_NUMBER (label));
&& (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
}
-/* Return number of registers to be saved on the stack. */
+/* Return number of saved general prupose registers. */
static int
ix86_nsaved_regs (void)
int nregs = 0;
int regno;
- for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
- if (ix86_save_reg (regno, true))
- nregs++;
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+ nregs ++;
+ return nregs;
+}
+
+/* Return number of saved SSE registrers. */
+
+static int
+ix86_nsaved_sseregs (void)
+{
+ int nregs = 0;
+ int regno;
+
+ if (ix86_cfun_abi () != MS_ABI)
+ return 0;
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+ nregs ++;
return nregs;
}
HOST_WIDE_INT size = get_frame_size ();
frame->nregs = ix86_nsaved_regs ();
+ frame->nsseregs = ix86_nsaved_sseregs ();
total_size = size;
stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
+ /* MS ABI seem to require stack alignment to be always 16 except for function
+ prologues. */
+ if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
+ {
+ preferred_alignment = 16;
+ stack_alignment_needed = 16;
+ crtl->preferred_stack_boundary = 128;
+ crtl->stack_alignment_needed = 128;
+ }
+
gcc_assert (!size || stack_alignment_needed);
gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
gcc_assert (preferred_alignment <= stack_alignment_needed);
/* Register save area */
offset += frame->nregs * UNITS_PER_WORD;
- /* Va-arg area */
- if (ix86_save_varrargs_registers)
- {
- offset += X86_64_VARARGS_SIZE;
- frame->va_arg_size = X86_64_VARARGS_SIZE;
- }
+ /* Align SSE reg save area. */
+ if (frame->nsseregs)
+ frame->padding0 = ((offset + 16 - 1) & -16) - offset;
else
- frame->va_arg_size = 0;
+ frame->padding0 = 0;
+
+ /* SSE register save area. */
+ offset += frame->padding0 + frame->nsseregs * 16;
+
+ /* Va-arg area */
+ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
+ offset += frame->va_arg_size;
/* Align start of frame for local function. */
frame->padding1 = ((offset + stack_alignment_needed - 1)
frame->stack_pointer_offset -= frame->red_zone_size;
#if 0
fprintf (stderr, "\n");
- fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
fprintf (stderr, "size: %ld\n", (long)size);
+ fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
+ fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
+ fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
unsigned int regno;
rtx insn;
- for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
- if (ix86_save_reg (regno, true))
+ for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
{
insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
RTX_FRAME_RELATED_P (insn) = 1;
rtx insn;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (ix86_save_reg (regno, true))
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
{
insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
Pmode, offset),
}
}
+/* Emit code to save registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
+{
+ unsigned int regno;
+ rtx insn;
+ rtx mem;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+ {
+ mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
+ set_mem_align (mem, 128);
+ insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ offset += 16;
+ }
+}
+
/* Expand prologue or epilogue stack adjustment.
The pattern exist to put a dependency on all ebp-based memory accesses.
STYLE should be negative if instructions should be marked as frame related,
TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
+ /* The incoming stack frame has to be aligned at least at
+ parm_stack_boundary. */
+ if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
+ ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
+
/* Stack at entrance of main is aligned by runtime. We use the
smallest incoming stack boundary. */
if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
RTX_FRAME_RELATED_P (insn) = 1;
}
- allocate = frame.to_allocate;
+ allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
if (!frame.save_regs_using_mov)
ix86_emit_save_regs ();
emit_move_insn (eax, GEN_INT (allocate));
if (TARGET_64BIT)
- insn = gen_allocate_stack_worker_64 (eax);
+ insn = gen_allocate_stack_worker_64 (eax, eax);
else
- insn = gen_allocate_stack_worker_32 (eax);
+ insn = gen_allocate_stack_worker_32 (eax, eax);
insn = emit_insn (insn);
RTX_FRAME_RELATED_P (insn) = 1;
t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
|| !frame.to_allocate
|| crtl->stack_realign_needed)
ix86_emit_save_regs_using_mov (stack_pointer_rtx,
- frame.to_allocate);
+ frame.to_allocate
+ + frame.nsseregs * 16 + frame.padding0);
else
ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
-frame.nregs * UNITS_PER_WORD);
}
+ if (!frame_pointer_needed
+ || !frame.to_allocate
+ || crtl->stack_realign_needed)
+ ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate);
+ else
+ ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
+ - frame.nregs * UNITS_PER_WORD
+ - frame.nsseregs * 16
+ - frame.padding0);
pic_reg_used = false;
if (pic_offset_table_rtx
rtx base_address = gen_rtx_MEM (Pmode, pointer);
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (ix86_save_reg (regno, maybe_eh_return))
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
{
/* Ensure that adjust_address won't be forced to produce pointer
out of range allowed by x86-64 instruction set. */
offset = 0;
}
emit_move_insn (gen_rtx_REG (Pmode, regno),
- adjust_address (base_address, Pmode, offset));
+ adjust_address (base_address, Pmode, offset));
offset += UNITS_PER_WORD;
}
}
+/* Emit code to restore saved registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
+ int maybe_eh_return)
+{
+ int regno;
+ rtx base_address = gen_rtx_MEM (TImode, pointer);
+ rtx mem;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
+ {
+ /* Ensure that adjust_address won't be forced to produce pointer
+ out of range allowed by x86-64 instruction set. */
+ if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
+ {
+ rtx r11;
+
+ r11 = gen_rtx_REG (DImode, R11_REG);
+ emit_move_insn (r11, GEN_INT (offset));
+ emit_insn (gen_adddi3 (r11, r11, pointer));
+ base_address = gen_rtx_MEM (TImode, r11);
+ offset = 0;
+ }
+ mem = adjust_address (base_address, TImode, offset);
+ set_mem_align (mem, 128);
+ emit_move_insn (gen_rtx_REG (TImode, regno), mem);
+ offset += 16;
+ }
+}
+
/* Restore function stack, frame, and registers. */
void
if (crtl->calls_eh_return && style != 2)
offset -= 2;
offset *= -UNITS_PER_WORD;
+ offset -= frame.nsseregs * 16 + frame.padding0;
/* If we're only restoring one register and sp is not valid then
using a move instruction to restore the register since it's
if (!frame_pointer_needed
|| (sp_valid && !frame.to_allocate)
|| stack_realign_fp)
- ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
- frame.to_allocate, style == 2);
+ {
+ ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate, style == 2);
+ ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate
+ + frame.nsseregs * 16
+ + frame.padding0, style == 2);
+ }
else
- ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
- offset, style == 2);
+ {
+ ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
+ offset, style == 2);
+ ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
+ offset
+ + frame.nsseregs * 16
+ + frame.padding0, style == 2);
+ }
/* eh_return epilogues need %ecx added to the stack pointer. */
if (style == 2)
{
tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
tmp = plus_constant (tmp, (frame.to_allocate
- + frame.nregs * UNITS_PER_WORD));
+ + frame.nregs * UNITS_PER_WORD
+ + frame.nsseregs * 16
+ + frame.padding0));
emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
}
}
else if (!frame_pointer_needed)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (frame.to_allocate
- + frame.nregs * UNITS_PER_WORD),
+ + frame.nregs * UNITS_PER_WORD
+ + frame.nsseregs * 16
+ + frame.padding0),
style);
/* If not an i386, mov & pop is faster than "leave". */
else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
pro_epilogue_adjust_stack (stack_pointer_rtx,
hard_frame_pointer_rtx,
GEN_INT (offset), style);
+ ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate, style == 2);
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.nsseregs * 16), style);
+ }
+ else if (frame.to_allocate || frame.nsseregs)
+ {
+ ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate,
+ style == 2);
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.to_allocate
+ + frame.nsseregs * 16
+ + frame.padding0), style);
}
- else if (frame.to_allocate)
- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (frame.to_allocate), style);
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (ix86_save_reg (regno, false))
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
if (frame_pointer_needed)
{
requires to two regs - that would mean more pseudos with longer
lifetimes. */
static int
-ix86_address_cost (rtx x)
+ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
{
struct ix86_address parts;
int cost = 1;
static bool
darwin_local_data_pic (rtx disp)
{
- if (GET_CODE (disp) == MINUS)
- {
- if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
- || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
- if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
- {
- const char *sym_name = XSTR (XEXP (disp, 1), 0);
- if (! strcmp (sym_name, "<pic base>"))
- return true;
- }
- }
-
- return false;
+ return (GET_CODE (disp) == UNSPEC
+ && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
}
/* Determine if a given RTX is a valid constant. We already know this
x = XVECEXP (inner, 0, 0);
return (GET_CODE (x) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+ case UNSPEC_MACHOPIC_OFFSET:
+ return legitimate_pic_address_disp_p (x);
default:
return false;
}
reason_rtx = disp;
if (GET_CODE (disp) == CONST
- && GET_CODE (XEXP (disp, 0)) == UNSPEC)
+ && GET_CODE (XEXP (disp, 0)) == UNSPEC
+ && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
switch (XINT (XEXP (disp, 0), 1))
{
/* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
case UNSPEC_INDNTPOFF:
fputs ("@INDNTPOFF", file);
break;
+#if TARGET_MACHO
+ case UNSPEC_MACHOPIC_OFFSET:
+ putc ('-', file);
+ machopic_output_function_base_name (file);
+ break;
+#endif
default:
output_operand_lossage ("invalid UNSPEC as operand");
break;
}
}
+/* Return true if X is a representation of the PIC register. This copes
+ with calls from ix86_find_base_term, where the register might have
+ been replaced by a cselib value. */
+
+static bool
+ix86_pic_register_p (rtx x)
+{
+ if (GET_CODE (x) == VALUE)
+ return (pic_offset_table_rtx
+ && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
+ else
+ return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
+}
+
/* In the name of slightly smaller debug output, and to cater to
general assembler lossage, recognize PIC+GOTOFF and turn it back
into a direct symbol reference.
|| GET_CODE (XEXP (x, 1)) != CONST)
return orig_x;
- if (REG_P (XEXP (x, 0))
- && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
+ if (ix86_pic_register_p (XEXP (x, 0)))
/* %ebx + GOT/GOTOFF */
;
else if (GET_CODE (XEXP (x, 0)) == PLUS)
{
/* %ebx + %reg * scale + GOT/GOTOFF */
reg_addend = XEXP (x, 0);
- if (REG_P (XEXP (reg_addend, 0))
- && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
+ if (ix86_pic_register_p (XEXP (reg_addend, 0)))
reg_addend = XEXP (reg_addend, 1);
- else if (REG_P (XEXP (reg_addend, 1))
- && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
+ else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
reg_addend = XEXP (reg_addend, 0);
else
return orig_x;
if (TARGET_MACHO && darwin_local_data_pic (x)
&& !MEM_P (orig_x))
- result = XEXP (x, 0);
+ result = XVECEXP (x, 0, 0);
if (! result)
return orig_x;
if (const_addend)
- result = gen_rtx_PLUS (Pmode, result, const_addend);
+ result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
if (reg_addend)
result = gen_rtx_PLUS (Pmode, reg_addend, result);
return result;
|| XINT (term, 1) != UNSPEC_GOTPCREL)
return x;
- term = XVECEXP (term, 0, 0);
-
- if (GET_CODE (term) != SYMBOL_REF
- && GET_CODE (term) != LABEL_REF)
- return x;
-
- return term;
+ return XVECEXP (term, 0, 0);
}
- term = ix86_delegitimize_address (x);
-
- if (GET_CODE (term) != SYMBOL_REF
- && GET_CODE (term) != LABEL_REF)
- return x;
-
- return term;
+ return ix86_delegitimize_address (x);
}
\f
static void
output_addr_const (file, op);
fputs ("@INDNTPOFF", file);
break;
+#if TARGET_MACHO
+ case UNSPEC_MACHOPIC_OFFSET:
+ output_addr_const (file, op);
+ putc ('-', file);
+ machopic_output_function_base_name (file);
+ break;
+#endif
default:
return false;
*/
if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
- && BRANCH_COST >= 2)
+ && BRANCH_COST (optimize_insn_for_speed_p (),
+ false) >= 2)
{
if (cf == 0)
{
optab op;
rtx var, orig_out, out, tmp;
- if (BRANCH_COST <= 2)
+ if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
return 0; /* FAIL */
/* If one of the two operands is an interesting constant, load a
destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
}
+ if (CONST_INT_P (count))
+ {
+ count = GEN_INT (INTVAL (count)
+ & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
+ destmem = shallow_copy_rtx (destmem);
+ srcmem = shallow_copy_rtx (srcmem);
+ set_mem_size (destmem, count);
+ set_mem_size (srcmem, count);
+ }
+ else
+ {
+ if (MEM_SIZE (destmem))
+ set_mem_size (destmem, NULL_RTX);
+ if (MEM_SIZE (srcmem))
+ set_mem_size (srcmem, NULL_RTX);
+ }
emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
destexp, srcexp));
}
Arguments have same meaning as for previous function */
static void
expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
- rtx count,
- enum machine_mode mode)
+ rtx count, enum machine_mode mode,
+ rtx orig_value)
{
rtx destexp;
rtx countreg;
}
else
destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
+ if (orig_value == const0_rtx && CONST_INT_P (count))
+ {
+ count = GEN_INT (INTVAL (count)
+ & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
+ destmem = shallow_copy_rtx (destmem);
+ set_mem_size (destmem, count);
+ }
+ else if (MEM_SIZE (destmem))
+ set_mem_size (destmem, NULL_RTX);
emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
}
gcc_assert (desired_alignment <= 8);
}
+/* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
+ ALIGN_BYTES is how many bytes need to be copied. */
+static rtx
+expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
+ int desired_align, int align_bytes)
+{
+ rtx src = *srcp;
+ rtx src_size, dst_size;
+ int off = 0;
+ int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
+ if (src_align_bytes >= 0)
+ src_align_bytes = desired_align - src_align_bytes;
+ src_size = MEM_SIZE (src);
+ dst_size = MEM_SIZE (dst);
+ if (align_bytes & 1)
+ {
+ dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
+ src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
+ off = 1;
+ emit_insn (gen_strmov (destreg, dst, srcreg, src));
+ }
+ if (align_bytes & 2)
+ {
+ dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
+ src = adjust_automodify_address_nv (src, HImode, srcreg, off);
+ if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
+ set_mem_align (dst, 2 * BITS_PER_UNIT);
+ if (src_align_bytes >= 0
+ && (src_align_bytes & 1) == (align_bytes & 1)
+ && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
+ set_mem_align (src, 2 * BITS_PER_UNIT);
+ off = 2;
+ emit_insn (gen_strmov (destreg, dst, srcreg, src));
+ }
+ if (align_bytes & 4)
+ {
+ dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
+ src = adjust_automodify_address_nv (src, SImode, srcreg, off);
+ if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
+ set_mem_align (dst, 4 * BITS_PER_UNIT);
+ if (src_align_bytes >= 0)
+ {
+ unsigned int src_align = 0;
+ if ((src_align_bytes & 3) == (align_bytes & 3))
+ src_align = 4;
+ else if ((src_align_bytes & 1) == (align_bytes & 1))
+ src_align = 2;
+ if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
+ set_mem_align (src, src_align * BITS_PER_UNIT);
+ }
+ off = 4;
+ emit_insn (gen_strmov (destreg, dst, srcreg, src));
+ }
+ dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
+ src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
+ if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
+ set_mem_align (dst, desired_align * BITS_PER_UNIT);
+ if (src_align_bytes >= 0)
+ {
+ unsigned int src_align = 0;
+ if ((src_align_bytes & 7) == (align_bytes & 7))
+ src_align = 8;
+ else if ((src_align_bytes & 3) == (align_bytes & 3))
+ src_align = 4;
+ else if ((src_align_bytes & 1) == (align_bytes & 1))
+ src_align = 2;
+ if (src_align > (unsigned int) desired_align)
+ src_align = desired_align;
+ if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
+ set_mem_align (src, src_align * BITS_PER_UNIT);
+ }
+ if (dst_size)
+ set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
+ if (src_size)
+ set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
+ *srcp = src;
+ return dst;
+}
+
/* Set enough from DEST to align DEST known to by aligned by ALIGN to
DESIRED_ALIGNMENT. */
static void
gcc_assert (desired_alignment <= 8);
}
+/* Set enough from DST to align DST known to by aligned by ALIGN to
+ DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
+static rtx
+expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
+ int desired_align, int align_bytes)
+{
+ int off = 0;
+ rtx dst_size = MEM_SIZE (dst);
+ if (align_bytes & 1)
+ {
+ dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
+ off = 1;
+ emit_insn (gen_strset (destreg, dst,
+ gen_lowpart (QImode, value)));
+ }
+ if (align_bytes & 2)
+ {
+ dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
+ if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
+ set_mem_align (dst, 2 * BITS_PER_UNIT);
+ off = 2;
+ emit_insn (gen_strset (destreg, dst,
+ gen_lowpart (HImode, value)));
+ }
+ if (align_bytes & 4)
+ {
+ dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
+ if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
+ set_mem_align (dst, 4 * BITS_PER_UNIT);
+ off = 4;
+ emit_insn (gen_strset (destreg, dst,
+ gen_lowpart (SImode, value)));
+ }
+ dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
+ if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
+ set_mem_align (dst, desired_align * BITS_PER_UNIT);
+ if (dst_size)
+ set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
+ return dst;
+}
+
/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
static enum stringop_alg
decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
int *dynamic_check)
{
const struct stringop_algs * algs;
+ bool optimize_for_speed;
/* Algorithms using the rep prefix want at least edi and ecx;
additionally, memset wants eax and memcpy wants esi. Don't
consider such algorithms if the user has appropriated those
&& alg != rep_prefix_8_byte))
const struct processor_costs *cost;
- cost = optimize_insn_for_size_p () ? &ix86_size_cost : ix86_cost;
+ /* Even if the string operation call is cold, we still might spend a lot
+ of time processing large blocks. */
+ if (optimize_function_for_size_p (cfun)
+ || (optimize_insn_for_size_p ()
+ && expected_size != -1 && expected_size < 256))
+ optimize_for_speed = false;
+ else
+ optimize_for_speed = true;
+
+ cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
*dynamic_check = -1;
if (memset)
if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
return stringop_alg;
/* rep; movq or rep; movl is the smallest variant. */
- else if (optimize_insn_for_size_p ())
+ else if (!optimize_for_speed)
{
if (!count || (count & 3))
return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
unsigned HOST_WIDE_INT count = 0;
HOST_WIDE_INT expected_size = -1;
int size_needed = 0, epilogue_size_needed;
- int desired_align = 0;
+ int desired_align = 0, align_bytes = 0;
enum stringop_alg alg;
int dynamic_check;
+ bool need_zero_guard = false;
if (CONST_INT_P (align_exp))
align = INTVAL (align_exp);
if (CONST_INT_P (expected_align_exp)
&& INTVAL (expected_align_exp) > align)
align = INTVAL (expected_align_exp);
+ /* ALIGN is the minimum of destination and source alignment, but we care here
+ just about destination alignment. */
+ else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
+ align = MEM_ALIGN (dst) / BITS_PER_UNIT;
+
if (CONST_INT_P (count_exp))
count = expected_size = INTVAL (count_exp);
if (CONST_INT_P (expected_size_exp) && count == 0)
case no_stringop:
gcc_unreachable ();
case loop:
+ need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode);
break;
case unrolled_loop:
+ need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
break;
case rep_prefix_8_byte:
size_needed = 4;
break;
case rep_prefix_1_byte:
+ size_needed = 1;
+ break;
case loop_1_byte:
+ need_zero_guard = true;
size_needed = 1;
break;
}
/* Alignment code needs count to be in register. */
if (CONST_INT_P (count_exp) && desired_align > align)
- count_exp = force_reg (counter_mode (count_exp), count_exp);
+ {
+ if (INTVAL (count_exp) > desired_align
+ && INTVAL (count_exp) > size_needed)
+ {
+ align_bytes
+ = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
+ if (align_bytes <= 0)
+ align_bytes = 0;
+ else
+ align_bytes = desired_align - align_bytes;
+ }
+ if (align_bytes == 0)
+ count_exp = force_reg (counter_mode (count_exp), count_exp);
+ }
gcc_assert (desired_align >= 1 && align >= 1);
/* Ensure that alignment prologue won't copy past end of block. */
if (desired_align > align)
{
- /* Except for the first move in epilogue, we no longer know
- constant offset in aliasing info. It don't seems to worth
- the pain to maintain it for the first move, so throw away
- the info early. */
- src = change_address (src, BLKmode, srcreg);
- dst = change_address (dst, BLKmode, destreg);
- expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
- desired_align);
+ if (align_bytes == 0)
+ {
+ /* Except for the first move in epilogue, we no longer know
+ constant offset in aliasing info. It don't seems to worth
+ the pain to maintain it for the first move, so throw away
+ the info early. */
+ src = change_address (src, BLKmode, srcreg);
+ dst = change_address (dst, BLKmode, destreg);
+ expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
+ desired_align);
+ }
+ else
+ {
+ /* If we know how many bytes need to be stored before dst is
+ sufficiently aligned, maintain aliasing info accurately. */
+ dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
+ desired_align, align_bytes);
+ count_exp = plus_constant (count_exp, -align_bytes);
+ count -= align_bytes;
+ }
+ if (need_zero_guard && !count)
+ {
+ /* It is possible that we copied enough so the main loop will not
+ execute. */
+ emit_cmp_and_jump_insns (count_exp,
+ GEN_INT (size_needed),
+ LTU, 0, counter_mode (count_exp), 1, label);
+ if (expected_size == -1
+ || expected_size < (desired_align - align) / 2 + size_needed)
+ predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ else
+ predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ }
}
if (label && size_needed == 1)
{
unsigned HOST_WIDE_INT count = 0;
HOST_WIDE_INT expected_size = -1;
int size_needed = 0, epilogue_size_needed;
- int desired_align = 0;
+ int desired_align = 0, align_bytes = 0;
enum stringop_alg alg;
rtx promoted_val = NULL;
bool force_loopy_epilogue = false;
int dynamic_check;
+ bool need_zero_guard = false;
if (CONST_INT_P (align_exp))
align = INTVAL (align_exp);
case no_stringop:
gcc_unreachable ();
case loop:
+ need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode);
break;
case unrolled_loop:
+ need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode) * 4;
break;
case rep_prefix_8_byte:
size_needed = 4;
break;
case rep_prefix_1_byte:
+ size_needed = 1;
+ break;
case loop_1_byte:
+ need_zero_guard = true;
size_needed = 1;
break;
}
/* Alignment code needs count to be in register. */
if (CONST_INT_P (count_exp) && desired_align > align)
{
- enum machine_mode mode = SImode;
- if (TARGET_64BIT && (count & ~0xffffffff))
- mode = DImode;
- count_exp = force_reg (mode, count_exp);
+ if (INTVAL (count_exp) > desired_align
+ && INTVAL (count_exp) > size_needed)
+ {
+ align_bytes
+ = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
+ if (align_bytes <= 0)
+ align_bytes = 0;
+ else
+ align_bytes = desired_align - align_bytes;
+ }
+ if (align_bytes == 0)
+ {
+ enum machine_mode mode = SImode;
+ if (TARGET_64BIT && (count & ~0xffffffff))
+ mode = DImode;
+ count_exp = force_reg (mode, count_exp);
+ }
}
/* Do the cheap promotion to allow better CSE across the
main loop and epilogue (ie one load of the big constant in the
if (size_needed > 1 || (desired_align > 1 && desired_align > align))
{
epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
- /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
+ /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
Make sure it is power of 2. */
epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
if (desired_align > align)
{
- /* Except for the first move in epilogue, we no longer know
- constant offset in aliasing info. It don't seems to worth
- the pain to maintain it for the first move, so throw away
- the info early. */
- dst = change_address (dst, BLKmode, destreg);
- expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
- desired_align);
+ if (align_bytes == 0)
+ {
+ /* Except for the first move in epilogue, we no longer know
+ constant offset in aliasing info. It don't seems to worth
+ the pain to maintain it for the first move, so throw away
+ the info early. */
+ dst = change_address (dst, BLKmode, destreg);
+ expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
+ desired_align);
+ }
+ else
+ {
+ /* If we know how many bytes need to be stored before dst is
+ sufficiently aligned, maintain aliasing info accurately. */
+ dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
+ desired_align, align_bytes);
+ count_exp = plus_constant (count_exp, -align_bytes);
+ count -= align_bytes;
+ }
+ if (need_zero_guard && !count)
+ {
+ /* It is possible that we copied enough so the main loop will not
+ execute. */
+ emit_cmp_and_jump_insns (count_exp,
+ GEN_INT (size_needed),
+ LTU, 0, counter_mode (count_exp), 1, label);
+ if (expected_size == -1
+ || expected_size < (desired_align - align) / 2 + size_needed)
+ predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ else
+ predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ }
}
if (label && size_needed == 1)
{
break;
case rep_prefix_8_byte:
expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
- DImode);
+ DImode, val_exp);
break;
case rep_prefix_4_byte:
expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
- SImode);
+ SImode, val_exp);
break;
case rep_prefix_1_byte:
expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
- QImode);
+ QImode, val_exp);
break;
}
/* Adjust properly the offset of src and dest memory for aliasing. */
void
ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
- rtx callarg2 ATTRIBUTE_UNUSED,
+ rtx callarg2,
rtx pop, int sibcall)
{
rtx use = NULL, call;
+ enum calling_abi function_call_abi;
+ if (callarg2 && INTVAL (callarg2) == -2)
+ function_call_abi = MS_ABI;
+ else
+ function_call_abi = SYSV_ABI;
if (pop == const0_rtx)
pop = NULL;
gcc_assert (!TARGET_64BIT || !pop);
pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
+ gcc_assert (ix86_cfun_abi () != MS_ABI || function_call_abi != SYSV_ABI);
+ }
+ /* We need to represent that SI and DI registers are clobbered
+ by SYSV calls. */
+ if (ix86_cfun_abi () == MS_ABI && function_call_abi == SYSV_ABI)
+ {
+ static int clobbered_registers[] = {27, 28, 45, 46, 47, 48, 49, 50, 51,
+ 52, SI_REG, DI_REG};
+ unsigned int i;
+ rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
+ rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_MS_TO_SYSV_CALL);
+
+ vec[0] = call;
+ vec[1] = unspec;
+ for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
+ vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ gen_rtx_REG
+ (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ clobbered_registers[i]));
+
+ call = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
+ + 2, vec));
}
call = emit_call_insn (call);
IX86_BUILTIN_VPERMILPS,
IX86_BUILTIN_VPERMILPD256,
IX86_BUILTIN_VPERMILPS256,
- IX86_BUILTIN_VPERMIL2PD,
- IX86_BUILTIN_VPERMIL2PS,
- IX86_BUILTIN_VPERMIL2PD256,
- IX86_BUILTIN_VPERMIL2PS256,
IX86_BUILTIN_VPERM2F128PD256,
IX86_BUILTIN_VPERM2F128PS256,
IX86_BUILTIN_VPERM2F128SI256,
IX86_BUILTIN_STOREUPD256,
IX86_BUILTIN_STOREUPS256,
IX86_BUILTIN_LDDQU256,
+ IX86_BUILTIN_MOVNTDQ256,
+ IX86_BUILTIN_MOVNTPD256,
+ IX86_BUILTIN_MOVNTPS256,
IX86_BUILTIN_LOADDQU256,
IX86_BUILTIN_STOREDQU256,
IX86_BUILTIN_MASKLOADPD,
IX86_BUILTIN_FNMSUBSD,
IX86_BUILTIN_FNMSUBPS,
IX86_BUILTIN_FNMSUBPD,
+ IX86_BUILTIN_PCMOV,
IX86_BUILTIN_PCMOV_V2DI,
IX86_BUILTIN_PCMOV_V4SI,
IX86_BUILTIN_PCMOV_V8HI,
/* Table for the ix86 builtin decls. */
static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
-/* Table to record which ISA options the builtin needs. */
-static int ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
+/* Table of all of the builtin functions that are possible with different ISA's
+ but are waiting to be built until a function is declared to use that
+ ISA. */
+struct builtin_isa GTY(())
+{
+ tree type; /* builtin type to use in the declaration */
+ const char *name; /* function name */
+ int isa; /* isa_flags this builtin is defined for */
+ bool const_p; /* true if the declaration is constant */
+};
+
+static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
+
/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
* of which isa_flags to use in the ix86_builtins_isa array. Stores the
* function decl in the ix86_builtins array. Returns the function decl or
* NULL_TREE, if the builtin was not added.
*
- * Record all builtins, even if it isn't an instruction set in the current ISA
- * in case the user uses function specific options for a different ISA. When
- * the builtin is expanded, check at that time whether it is valid. */
+ * If the front end has a special hook for builtin functions, delay adding
+ * builtin functions that aren't in the current ISA until the ISA is changed
+ * with function specific optimization. Doing so, can save about 300K for the
+ * default compiler. When the builtin is expanded, check at that time whether
+ * it is valid.
+ *
+ * If the front end doesn't have a special hook, record all builtins, even if
+ * it isn't an instruction set in the current ISA in case the user uses
+ * function specific options for a different ISA, so that we don't get scope
+ * errors if a builtin is added in the middle of a function scope. */
static inline tree
def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
{
- decl = add_builtin_function (name, type, code, BUILT_IN_MD,
- NULL, NULL_TREE);
- ix86_builtins[(int) code] = decl;
- ix86_builtins_isa[(int) code] = mask;
+ ix86_builtins_isa[(int) code].isa = mask;
+
+ if ((mask & ix86_isa_flags) != 0
+ || (lang_hooks.builtin_function
+ == lang_hooks.builtin_function_ext_scope))
+
+ {
+ decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
+ NULL_TREE);
+ ix86_builtins[(int) code] = decl;
+ ix86_builtins_isa[(int) code].type = NULL_TREE;
+ }
+ else
+ {
+ ix86_builtins[(int) code] = NULL_TREE;
+ ix86_builtins_isa[(int) code].const_p = false;
+ ix86_builtins_isa[(int) code].type = type;
+ ix86_builtins_isa[(int) code].name = name;
+ }
}
return decl;
tree decl = def_builtin (mask, name, type, code);
if (decl)
TREE_READONLY (decl) = 1;
+ else
+ ix86_builtins_isa[(int) code].const_p = true;
+
return decl;
}
+/* Add any new builtin functions for a given ISA that may not have been
+ declared. This saves a bit of space compared to adding all of the
+ declarations to the tree, even if we didn't use them. */
+
+static void
+ix86_add_new_builtins (int isa)
+{
+ int i;
+ tree decl;
+
+ for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
+ {
+ if ((ix86_builtins_isa[i].isa & isa) != 0
+ && ix86_builtins_isa[i].type != NULL_TREE)
+ {
+ decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
+ ix86_builtins_isa[i].type,
+ i, BUILT_IN_MD, NULL,
+ NULL_TREE);
+
+ ix86_builtins[i] = decl;
+ ix86_builtins_isa[i].type = NULL_TREE;
+ if (ix86_builtins_isa[i].const_p)
+ TREE_READONLY (decl) = 1;
+ }
+ }
+}
+
/* Bits for builtin_description.flag. */
/* Set when we don't support the comparison natively, and should
V2DF_FTYPE_PCV2DF_V2DF,
V2DI_FTYPE_PV2DI,
VOID_FTYPE_PV2SF_V4SF,
+ VOID_FTYPE_PV4DI_V4DI,
VOID_FTYPE_PV2DI_V2DI,
VOID_FTYPE_PCHAR_V32QI,
VOID_FTYPE_PCHAR_V16QI,
V2DI2TI_FTYPE_V2DI_V2DI_INT,
V1DI2DI_FTYPE_V1DI_V1DI_INT,
V2DF_FTYPE_V2DF_V2DF_INT,
- V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
- V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
- V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
- V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
V2DI_FTYPE_V2DI_UINT_UINT,
V2DI_FTYPE_V2DI_V2DI_UINT_UINT
};
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
- { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_nandv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
V4DF_type_node, V4DF_type_node,
integer_type_node,
NULL_TREE);
- tree v8sf_ftype_v8sf_v8sf_v8si_int
- = build_function_type_list (V8SF_type_node,
- V8SF_type_node, V8SF_type_node,
- V8SI_type_node, integer_type_node,
- NULL_TREE);
- tree v4df_ftype_v4df_v4df_v4di_int
- = build_function_type_list (V4DF_type_node,
- V4DF_type_node, V4DF_type_node,
- V4DI_type_node, integer_type_node,
- NULL_TREE);
- tree v4sf_ftype_v4sf_v4sf_v4si_int
- = build_function_type_list (V4SF_type_node,
- V4SF_type_node, V4SF_type_node,
- V4SI_type_node, integer_type_node,
- NULL_TREE);
- tree v2df_ftype_v2df_v2df_v2di_int
- = build_function_type_list (V2DF_type_node,
- V2DF_type_node, V2DF_type_node,
- V2DI_type_node, integer_type_node,
- NULL_TREE);
tree v8sf_ftype_pcfloat
= build_function_type_list (V8SF_type_node,
pcfloat_type_node,
V8SI_type_node, V4SI_type_node,
integer_type_node,
NULL_TREE);
+ tree pv4di_type_node = build_pointer_type (V4DI_type_node);
+ tree void_ftype_pv4di_v4di
+ = build_function_type_list (void_type_node,
+ pv4di_type_node, V4DI_type_node,
+ NULL_TREE);
tree v8sf_ftype_v8sf_v4sf_int
= build_function_type_list (V8SF_type_node,
V8SF_type_node, V4SF_type_node,
case VOID_FTYPE_PV2SF_V4SF:
type = void_ftype_pv2sf_v4sf;
break;
+ case VOID_FTYPE_PV4DI_V4DI:
+ type = void_ftype_pv4di_v4di;
+ break;
case VOID_FTYPE_PV2DI_V2DI:
type = void_ftype_pv2di_v2di;
break;
case V1DI2DI_FTYPE_V1DI_V1DI_INT:
type = v1di_ftype_v1di_v1di_int;
break;
- case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
- type = v8sf_ftype_v8sf_v8sf_v8si_int;
- break;
- case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
- type = v4df_ftype_v4df_v4df_v4di_int;
- break;
- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
- type = v4sf_ftype_v4sf_v4sf_v4si_int;
- break;
- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
- type = v2df_ftype_v2df_v2df_v2di_int;
- break;
default:
gcc_unreachable ();
}
nargs = 3;
nargs_constant = 2;
break;
- case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
- case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
- nargs = 4;
- nargs_constant = 1;
- break;
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
nargs = 4;
nargs_constant = 2;
case CODE_FOR_sse4_1_blendpd:
case CODE_FOR_avx_vpermilv2df:
- case CODE_FOR_avx_vpermil2v2df3:
- case CODE_FOR_avx_vpermil2v4sf3:
- case CODE_FOR_avx_vpermil2v4df3:
- case CODE_FOR_avx_vpermil2v8sf3:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;
memory = 0;
break;
case VOID_FTYPE_PV2SF_V4SF:
+ case VOID_FTYPE_PV4DI_V4DI:
case VOID_FTYPE_PV2DI_V2DI:
case VOID_FTYPE_PCHAR_V32QI:
case VOID_FTYPE_PCHAR_V16QI:
current ISA based on the command line switches. With function specific
options, we need to check in the context of the function making the call
whether it is supported. */
- if (ix86_builtins_isa[fcode]
- && !(ix86_builtins_isa[fcode] & ix86_isa_flags))
+ if (ix86_builtins_isa[fcode].isa
+ && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
{
- char *opts = ix86_target_string (ix86_builtins_isa[fcode], 0, NULL,
+ char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
NULL, NULL, false);
if (!opts)
scanned. In either case, *TOTAL contains the cost result. */
static bool
-ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
+ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
{
enum rtx_code outer_code = (enum rtx_code) outer_code_i;
enum machine_mode mode = GET_MODE (x);
+ const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
switch (code)
{
&& GET_MODE (XEXP (x, 0)) == SImode)
*total = 1;
else if (TARGET_ZERO_EXTEND_WITH_AND)
- *total = ix86_cost->add;
+ *total = cost->add;
else
- *total = ix86_cost->movzx;
+ *total = cost->movzx;
return false;
case SIGN_EXTEND:
- *total = ix86_cost->movsx;
+ *total = cost->movsx;
return false;
case ASHIFT:
HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
if (value == 1)
{
- *total = ix86_cost->add;
+ *total = cost->add;
return false;
}
if ((value == 2 || value == 3)
- && ix86_cost->lea <= ix86_cost->shift_const)
+ && cost->lea <= cost->shift_const)
{
- *total = ix86_cost->lea;
+ *total = cost->lea;
return false;
}
}
if (CONST_INT_P (XEXP (x, 1)))
{
if (INTVAL (XEXP (x, 1)) > 32)
- *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
+ *total = cost->shift_const + COSTS_N_INSNS (2);
else
- *total = ix86_cost->shift_const * 2;
+ *total = cost->shift_const * 2;
}
else
{
if (GET_CODE (XEXP (x, 1)) == AND)
- *total = ix86_cost->shift_var * 2;
+ *total = cost->shift_var * 2;
else
- *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
+ *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
}
}
else
{
if (CONST_INT_P (XEXP (x, 1)))
- *total = ix86_cost->shift_const;
+ *total = cost->shift_const;
else
- *total = ix86_cost->shift_var;
+ *total = cost->shift_var;
}
return false;
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE scalar cost should be used here. */
- *total = ix86_cost->fmul;
+ *total = cost->fmul;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
- *total = ix86_cost->fmul;
+ *total = cost->fmul;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fmul;
+ *total = cost->fmul;
return false;
}
else
op0 = XEXP (op0, 0), mode = GET_MODE (op0);
}
- *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
- + nbits * ix86_cost->mult_bit
- + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
+ *total = (cost->mult_init[MODE_INDEX (mode)]
+ + nbits * cost->mult_bit
+ + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
return true;
}
case UMOD:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fdiv;
+ *total = cost->fdiv;
else if (X87_FLOAT_MODE_P (mode))
- *total = ix86_cost->fdiv;
+ *total = cost->fdiv;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fdiv;
+ *total = cost->fdiv;
else
- *total = ix86_cost->divide[MODE_INDEX (mode)];
+ *total = cost->divide[MODE_INDEX (mode)];
return false;
case PLUS:
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
if (val == 2 || val == 4 || val == 8)
{
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
*total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
- outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
+ outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
if (val == 2 || val == 4 || val == 8)
{
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
else if (GET_CODE (XEXP (x, 0)) == PLUS)
{
- *total = ix86_cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
- *total += rtx_cost (XEXP (x, 1), outer_code);
+ *total = cost->lea;
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, speed);
return true;
}
}
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fadd;
+ *total = cost->fadd;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
- *total = ix86_cost->fadd;
+ *total = cost->fadd;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fadd;
+ *total = cost->fadd;
return false;
}
/* FALLTHRU */
case XOR:
if (!TARGET_64BIT && mode == DImode)
{
- *total = (ix86_cost->add * 2
- + (rtx_cost (XEXP (x, 0), outer_code)
+ *total = (cost->add * 2
+ + (rtx_cost (XEXP (x, 0), outer_code, speed)
<< (GET_MODE (XEXP (x, 0)) != DImode))
- + (rtx_cost (XEXP (x, 1), outer_code)
+ + (rtx_cost (XEXP (x, 1), outer_code, speed)
<< (GET_MODE (XEXP (x, 1)) != DImode)));
return true;
}
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
{
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fchs;
+ *total = cost->fchs;
return false;
}
else if (X87_FLOAT_MODE_P (mode))
{
- *total = ix86_cost->fchs;
+ *total = cost->fchs;
return false;
}
else if (FLOAT_MODE_P (mode))
{
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fchs;
+ *total = cost->fchs;
return false;
}
/* FALLTHRU */
case NOT:
if (!TARGET_64BIT && mode == DImode)
- *total = ix86_cost->add * 2;
+ *total = cost->add * 2;
else
- *total = ix86_cost->add;
+ *total = cost->add;
return false;
case COMPARE:
{
/* This kind of construct is implemented using test[bwl].
Treat it as if we had an AND. */
- *total = (ix86_cost->add
- + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
- + rtx_cost (const1_rtx, outer_code));
+ *total = (cost->add
+ + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
+ + rtx_cost (const1_rtx, outer_code, speed));
return true;
}
return false;
case ABS:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fabs;
+ *total = cost->fabs;
else if (X87_FLOAT_MODE_P (mode))
- *total = ix86_cost->fabs;
+ *total = cost->fabs;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fabs;
+ *total = cost->fabs;
return false;
case SQRT:
if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
/* ??? SSE cost should be used here. */
- *total = ix86_cost->fsqrt;
+ *total = cost->fsqrt;
else if (X87_FLOAT_MODE_P (mode))
- *total = ix86_cost->fsqrt;
+ *total = cost->fsqrt;
else if (FLOAT_MODE_P (mode))
/* ??? SSE vector cost should be used here. */
- *total = ix86_cost->fsqrt;
+ *total = cost->fsqrt;
return false;
case UNSPEC:
ix86_expand_vector_init_interleave (enum machine_mode mode,
rtx target, rtx *ops, int n)
{
- enum machine_mode first_imode, second_imode, third_imode;
+ enum machine_mode first_imode, second_imode, third_imode, inner_mode;
int i, j;
rtx op0, op1;
rtx (*gen_load_even) (rtx, rtx, rtx);
gen_load_even = gen_vec_setv8hi;
gen_interleave_first_low = gen_vec_interleave_lowv4si;
gen_interleave_second_low = gen_vec_interleave_lowv2di;
+ inner_mode = HImode;
first_imode = V4SImode;
second_imode = V2DImode;
third_imode = VOIDmode;
gen_load_even = gen_vec_setv16qi;
gen_interleave_first_low = gen_vec_interleave_lowv8hi;
gen_interleave_second_low = gen_vec_interleave_lowv4si;
+ inner_mode = QImode;
first_imode = V8HImode;
second_imode = V4SImode;
third_imode = V2DImode;
emit_move_insn (op0, gen_lowpart (mode, op1));
/* Load even elements into the second positon. */
- emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
+ emit_insn ((*gen_load_even) (op0,
+ force_reg (inner_mode,
+ ops [i + i + 1]),
const1_rtx));
/* Cast vector to FIRST_IMODE vector. */
if (!TARGET_SSE2)
break;
+ /* Don't use ix86_expand_vector_init_interleave if we can't
+ move from GPR to SSE register directly. */
+ if (!TARGET_INTER_UNIT_MOVES)
+ break;
+
n = GET_MODE_NUNITS (mode);
for (i = 0; i < n; i++)
ops[i] = XVECEXP (vals, 0, i);
#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
#undef TARGET_OPTION_VALID_ATTRIBUTE_P
-#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_option_attribute_p
+#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
#undef TARGET_OPTION_SAVE
#define TARGET_OPTION_SAVE ix86_function_specific_save
#undef TARGET_OPTION_CAN_INLINE_P
#define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
-#undef TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION
-#define TARGET_OPTION_COLD_ATTRIBUTE_SETS_OPTIMIZATION true
-
-#undef TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION
-#define TARGET_OPTION_HOT_ATTRIBUTE_SETS_OPTIMIZATION true
+#undef TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
struct gcc_target targetm = TARGET_INITIALIZER;
\f