#define TARGET_SSE4_1 OPTION_ISA_SSE4_1
#define TARGET_SSE4_2 OPTION_ISA_SSE4_2
#define TARGET_SSE4A OPTION_ISA_SSE4A
+#define TARGET_SSE5 OPTION_ISA_SSE5
+#define TARGET_ROUND OPTION_ISA_ROUND
+
+/* SSE5 and SSE4.1 define the same round instructions */
+#define OPTION_MASK_ISA_ROUND (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE5)
+#define OPTION_ISA_ROUND ((ix86_isa_flags & OPTION_MASK_ISA_ROUND) != 0)
#include "config/vxworks-dummy.h"
When size is unknown, the UNKNOWN_SIZE alg is used. When size is
known at compile time or estimated via feedback, the SIZE array
is walked in order until MAX is greater then the estimate (or -1
- means infinity). Corresponding ALG is used then.
+ means infinity). Corresponding ALG is used then.
For example initializer:
- {{256, loop}, {-1, rep_prefix_4_byte}}
+ {{256, loop}, {-1, rep_prefix_4_byte}}
will use loop for blocks smaller or equal to 256 bytes, rep prefix will
be used otherwise. */
struct stringop_algs
scalar-to-vector operation. */
const int vec_to_scalar_cost; /* Cost of vect-to-scalar operation. */
const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */
- const int vec_align_load_cost; /* Cost of aligned vector load. */
+ const int vec_align_load_cost; /* Cost of aligned vector load. */
const int vec_unalign_load_cost; /* Cost of unaligned vector load. */
const int vec_store_cost; /* Cost of vector store. */
const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer
X86_TUNE_NOT_UNPAIRABLE,
X86_TUNE_NOT_VECTORMODE,
X86_TUNE_USE_VECTOR_CONVERTS,
+ X86_TUNE_FUSE_CMP_AND_BRANCH,
X86_TUNE_LAST
};
#define TARGET_MOVE_M1_VIA_OR ix86_tune_features[X86_TUNE_MOVE_M1_VIA_OR]
#define TARGET_NOT_UNPAIRABLE ix86_tune_features[X86_TUNE_NOT_UNPAIRABLE]
#define TARGET_NOT_VECTORMODE ix86_tune_features[X86_TUNE_NOT_VECTORMODE]
-#define TARGET_USE_VECTOR_CONVERTS ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
+#define TARGET_USE_VECTOR_CONVERTS \
+ ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
+#define TARGET_FUSE_CMP_AND_BRANCH \
+ ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
X86_ARCH_LAST
};
-
+
extern unsigned int ix86_arch_features[X86_ARCH_LAST];
#define TARGET_CMOVE ix86_arch_features[X86_ARCH_CMOVE]
#define TARGET_PREFETCH_SSE x86_prefetch_sse
#define TARGET_SAHF x86_sahf
#define TARGET_RECIP x86_recip
+#define TARGET_FUSED_MADD x86_fused_muladd
+#define TARGET_AES (TARGET_SSE2 && x86_aes)
+#define TARGET_PCLMUL (TARGET_SSE2 && x86_pclmul)
#define ASSEMBLER_DIALECT (ix86_asm_dialect)
#define TARGET_MACHO 0
/* Likewise, for the Windows 64-bit ABI. */
-#define TARGET_64BIT_MS_ABI 0
+#define TARGET_64BIT_MS_ABI (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
+
+/* Available call abi. */
+enum
+{
+ SYSV_ABI = 0,
+ MS_ABI = 1
+};
+
+/* The default abi form used by target. */
+#define DEFAULT_ABI SYSV_ABI
/* Subtargets may reset this to 1 in order to enable 96-bit long double
with the rounding mode forced to 53 bits. */
#define HAVE_LOCAL_CPU_DETECT
#endif
+#if TARGET_64BIT_DEFAULT
+#define OPT_ARCH64 "!m32"
+#define OPT_ARCH32 "m32"
+#else
+#define OPT_ARCH64 "m64"
+#define OPT_ARCH32 "!m64"
+#endif
+
/* Support for configure-time defaults of some command line options.
The order here is important so that -march doesn't squash the
tune or cpu values. */
-#define OPTION_DEFAULT_SPECS \
+#define OPTION_DEFAULT_SPECS \
{"tune", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \
- {"cpu", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \
- {"arch", "%{!march=*:-march=%(VALUE)}"}
+ {"tune_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+ {"tune_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+ {"cpu", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \
+ {"cpu_32", "%{" OPT_ARCH32 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+ {"cpu_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
+ {"arch", "%{!march=*:-march=%(VALUE)}"}, \
+ {"arch_32", "%{" OPT_ARCH32 ":%{!march=*:-march=%(VALUE)}}"}, \
+ {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"},
/* Specs for the compiler proper */
builtin_define_std ("i386"); \
} \
\
- /* Built-ins based on -mtune= (or -march= if no \
- -mtune= given). */ \
- if (TARGET_386) \
- builtin_define ("__tune_i386__"); \
- else if (TARGET_486) \
- builtin_define ("__tune_i486__"); \
- else if (TARGET_PENTIUM) \
+ /* Built-ins based on -march=. */ \
+ switch (ix86_arch) \
+ { \
+ case PROCESSOR_I386: \
+ break; \
+ case PROCESSOR_I486: \
+ builtin_define ("__i486"); \
+ builtin_define ("__i486__"); \
+ break; \
+ case PROCESSOR_PENTIUM: \
+ builtin_define ("__i586"); \
+ builtin_define ("__i586__"); \
+ builtin_define ("__pentium"); \
+ builtin_define ("__pentium__"); \
+ if (last_arch_char == 'x') \
+ builtin_define ("__pentium_mmx__"); \
+ break; \
+ case PROCESSOR_PENTIUMPRO: \
+ builtin_define ("__i686"); \
+ builtin_define ("__i686__"); \
+ builtin_define ("__pentiumpro"); \
+ builtin_define ("__pentiumpro__"); \
+ break; \
+ case PROCESSOR_GEODE: \
+ builtin_define ("__geode"); \
+ builtin_define ("__geode__"); \
+ break; \
+ case PROCESSOR_K6: \
+ builtin_define ("__k6"); \
+ builtin_define ("__k6__"); \
+ if (last_arch_char == '2') \
+ builtin_define ("__k6_2__"); \
+ else if (last_arch_char == '3') \
+ builtin_define ("__k6_3__"); \
+ break; \
+ case PROCESSOR_ATHLON: \
+ builtin_define ("__athlon"); \
+ builtin_define ("__athlon__"); \
+ /* Only plain "athlon" lacks SSE. */ \
+ if (last_arch_char != 'n') \
+ builtin_define ("__athlon_sse__"); \
+ break; \
+ case PROCESSOR_K8: \
+ builtin_define ("__k8"); \
+ builtin_define ("__k8__"); \
+ break; \
+ case PROCESSOR_AMDFAM10: \
+ builtin_define ("__amdfam10"); \
+ builtin_define ("__amdfam10__"); \
+ break; \
+ case PROCESSOR_PENTIUM4: \
+ builtin_define ("__pentium4"); \
+ builtin_define ("__pentium4__"); \
+ break; \
+ case PROCESSOR_NOCONA: \
+ builtin_define ("__nocona"); \
+ builtin_define ("__nocona__"); \
+ break; \
+ case PROCESSOR_CORE2: \
+ builtin_define ("__core2"); \
+ builtin_define ("__core2__"); \
+ break; \
+ case PROCESSOR_GENERIC32: \
+ case PROCESSOR_GENERIC64: \
+ case PROCESSOR_max: \
+ gcc_unreachable (); \
+ } \
+ \
+ /* Built-ins based on -mtune=. */ \
+ switch (ix86_tune) \
{ \
+ case PROCESSOR_I386: \
+ builtin_define ("__tune_i386__"); \
+ break; \
+ case PROCESSOR_I486: \
+ builtin_define ("__tune_i486__"); \
+ break; \
+ case PROCESSOR_PENTIUM: \
builtin_define ("__tune_i586__"); \
builtin_define ("__tune_pentium__"); \
if (last_tune_char == 'x') \
builtin_define ("__tune_pentium_mmx__"); \
- } \
- else if (TARGET_PENTIUMPRO) \
- { \
+ break; \
+ case PROCESSOR_PENTIUMPRO: \
builtin_define ("__tune_i686__"); \
builtin_define ("__tune_pentiumpro__"); \
switch (last_tune_char) \
builtin_define ("__tune_pentium2__"); \
break; \
} \
- } \
- else if (TARGET_GEODE) \
- { \
+ break; \
+ case PROCESSOR_GEODE: \
builtin_define ("__tune_geode__"); \
- } \
- else if (TARGET_K6) \
- { \
+ break; \
+ case PROCESSOR_K6: \
builtin_define ("__tune_k6__"); \
if (last_tune_char == '2') \
builtin_define ("__tune_k6_2__"); \
else if (last_tune_char == '3') \
builtin_define ("__tune_k6_3__"); \
- } \
- else if (TARGET_ATHLON) \
- { \
+ break; \
+ case PROCESSOR_ATHLON: \
builtin_define ("__tune_athlon__"); \
/* Only plain "athlon" lacks SSE. */ \
if (last_tune_char != 'n') \
builtin_define ("__tune_athlon_sse__"); \
+ break; \
+ case PROCESSOR_K8: \
+ builtin_define ("__tune_k8__"); \
+ break; \
+ case PROCESSOR_AMDFAM10: \
+ builtin_define ("__tune_amdfam10__"); \
+ break; \
+ case PROCESSOR_PENTIUM4: \
+ builtin_define ("__tune_pentium4__"); \
+ break; \
+ case PROCESSOR_NOCONA: \
+ builtin_define ("__tune_nocona__"); \
+ break; \
+ case PROCESSOR_CORE2: \
+ builtin_define ("__tune_core2__"); \
+ break; \
+ case PROCESSOR_GENERIC32: \
+ case PROCESSOR_GENERIC64: \
+ break; \
+ case PROCESSOR_max: \
+ gcc_unreachable (); \
} \
- else if (TARGET_K8) \
- builtin_define ("__tune_k8__"); \
- else if (TARGET_AMDFAM10) \
- builtin_define ("__tune_amdfam10__"); \
- else if (TARGET_PENTIUM4) \
- builtin_define ("__tune_pentium4__"); \
- else if (TARGET_NOCONA) \
- builtin_define ("__tune_nocona__"); \
- else if (TARGET_CORE2) \
- builtin_define ("__tune_core2__"); \
\
if (TARGET_MMX) \
builtin_define ("__MMX__"); \
builtin_define ("__SSE4_1__"); \
if (TARGET_SSE4_2) \
builtin_define ("__SSE4_2__"); \
+ if (TARGET_AES) \
+ builtin_define ("__AES__"); \
+ if (TARGET_PCLMUL) \
+ builtin_define ("__PCLMUL__"); \
if (TARGET_SSE4A) \
builtin_define ("__SSE4A__"); \
+ if (TARGET_SSE5) \
+ builtin_define ("__SSE5__"); \
if (TARGET_SSE_MATH && TARGET_SSE) \
builtin_define ("__SSE_MATH__"); \
if (TARGET_SSE_MATH && TARGET_SSE2) \
builtin_define ("__SSE2_MATH__"); \
- \
- /* Built-ins based on -march=. */ \
- if (ix86_arch == PROCESSOR_I486) \
- { \
- builtin_define ("__i486"); \
- builtin_define ("__i486__"); \
- } \
- else if (ix86_arch == PROCESSOR_PENTIUM) \
- { \
- builtin_define ("__i586"); \
- builtin_define ("__i586__"); \
- builtin_define ("__pentium"); \
- builtin_define ("__pentium__"); \
- if (last_arch_char == 'x') \
- builtin_define ("__pentium_mmx__"); \
- } \
- else if (ix86_arch == PROCESSOR_PENTIUMPRO) \
- { \
- builtin_define ("__i686"); \
- builtin_define ("__i686__"); \
- builtin_define ("__pentiumpro"); \
- builtin_define ("__pentiumpro__"); \
- } \
- else if (ix86_arch == PROCESSOR_GEODE) \
- { \
- builtin_define ("__geode"); \
- builtin_define ("__geode__"); \
- } \
- else if (ix86_arch == PROCESSOR_K6) \
- { \
- \
- builtin_define ("__k6"); \
- builtin_define ("__k6__"); \
- if (last_arch_char == '2') \
- builtin_define ("__k6_2__"); \
- else if (last_arch_char == '3') \
- builtin_define ("__k6_3__"); \
- } \
- else if (ix86_arch == PROCESSOR_ATHLON) \
- { \
- builtin_define ("__athlon"); \
- builtin_define ("__athlon__"); \
- /* Only plain "athlon" lacks SSE. */ \
- if (last_arch_char != 'n') \
- builtin_define ("__athlon_sse__"); \
- } \
- else if (ix86_arch == PROCESSOR_K8) \
- { \
- builtin_define ("__k8"); \
- builtin_define ("__k8__"); \
- } \
- else if (ix86_arch == PROCESSOR_AMDFAM10) \
- { \
- builtin_define ("__amdfam10"); \
- builtin_define ("__amdfam10__"); \
- } \
- else if (ix86_arch == PROCESSOR_PENTIUM4) \
- { \
- builtin_define ("__pentium4"); \
- builtin_define ("__pentium4__"); \
- } \
- else if (ix86_arch == PROCESSOR_NOCONA) \
- { \
- builtin_define ("__nocona"); \
- builtin_define ("__nocona__"); \
- } \
- else if (ix86_arch == PROCESSOR_CORE2) \
- { \
- builtin_define ("__core2"); \
- builtin_define ("__core2__"); \
- } \
} \
while (0)
-#define TARGET_CPU_DEFAULT_i386 0
-#define TARGET_CPU_DEFAULT_i486 1
-#define TARGET_CPU_DEFAULT_pentium 2
-#define TARGET_CPU_DEFAULT_pentium_mmx 3
-#define TARGET_CPU_DEFAULT_pentiumpro 4
-#define TARGET_CPU_DEFAULT_pentium2 5
-#define TARGET_CPU_DEFAULT_pentium3 6
-#define TARGET_CPU_DEFAULT_pentium4 7
-#define TARGET_CPU_DEFAULT_geode 8
-#define TARGET_CPU_DEFAULT_k6 9
-#define TARGET_CPU_DEFAULT_k6_2 10
-#define TARGET_CPU_DEFAULT_k6_3 11
-#define TARGET_CPU_DEFAULT_athlon 12
-#define TARGET_CPU_DEFAULT_athlon_sse 13
-#define TARGET_CPU_DEFAULT_k8 14
-#define TARGET_CPU_DEFAULT_pentium_m 15
-#define TARGET_CPU_DEFAULT_prescott 16
-#define TARGET_CPU_DEFAULT_nocona 17
-#define TARGET_CPU_DEFAULT_core2 18
-#define TARGET_CPU_DEFAULT_generic 19
-#define TARGET_CPU_DEFAULT_amdfam10 20
-
-#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
- "pentiumpro", "pentium2", "pentium3", \
- "pentium4", "geode", "k6", "k6-2", "k6-3", \
- "athlon", "athlon-4", "k8", \
- "pentium-m", "prescott", "nocona", \
- "core2", "generic", "amdfam10"}
+enum target_cpu_default
+{
+ TARGET_CPU_DEFAULT_generic = 0,
+
+ TARGET_CPU_DEFAULT_i386,
+ TARGET_CPU_DEFAULT_i486,
+ TARGET_CPU_DEFAULT_pentium,
+ TARGET_CPU_DEFAULT_pentium_mmx,
+ TARGET_CPU_DEFAULT_pentiumpro,
+ TARGET_CPU_DEFAULT_pentium2,
+ TARGET_CPU_DEFAULT_pentium3,
+ TARGET_CPU_DEFAULT_pentium4,
+ TARGET_CPU_DEFAULT_pentium_m,
+ TARGET_CPU_DEFAULT_prescott,
+ TARGET_CPU_DEFAULT_nocona,
+ TARGET_CPU_DEFAULT_core2,
+
+ TARGET_CPU_DEFAULT_geode,
+ TARGET_CPU_DEFAULT_k6,
+ TARGET_CPU_DEFAULT_k6_2,
+ TARGET_CPU_DEFAULT_k6_3,
+ TARGET_CPU_DEFAULT_athlon,
+ TARGET_CPU_DEFAULT_athlon_sse,
+ TARGET_CPU_DEFAULT_k8,
+ TARGET_CPU_DEFAULT_amdfam10,
+
+ TARGET_CPU_DEFAULT_max
+};
#ifndef CC1_SPEC
#define CC1_SPEC "%(cc1_cpu) "
{ "cc1_cpu", CC1_CPU_SPEC }, \
SUBTARGET_EXTRA_SPECS
\f
-/* target machine storage layout */
-
-#define LONG_DOUBLE_TYPE_SIZE 80
/* Set the value of FLT_EVAL_METHOD in float.h. When using only the
FPU, assume that the fpcw is set to extended precision; when using
#define TARGET_FLT_EVAL_METHOD \
(TARGET_MIX_SSE_I387 ? -1 : TARGET_SSE_MATH ? 0 : 2)
+/* target machine storage layout */
+
#define SHORT_TYPE_SIZE 16
#define INT_TYPE_SIZE 32
#define FLOAT_TYPE_SIZE 32
#define LONG_TYPE_SIZE BITS_PER_WORD
#define DOUBLE_TYPE_SIZE 64
#define LONG_LONG_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 80
+
+#define WIDEST_HARDWARE_FP_SIZE LONG_DOUBLE_TYPE_SIZE
#if defined (TARGET_BI_ARCH) || TARGET_64BIT_DEFAULT
#define MAX_BITS_PER_WORD 64
#define PARM_BOUNDARY BITS_PER_WORD
/* Boundary (in *bits*) on which stack pointer should be aligned. */
-#define STACK_BOUNDARY BITS_PER_WORD
+#define STACK_BOUNDARY (TARGET_64BIT && DEFAULT_ABI == MS_ABI ? 128 \
+ : BITS_PER_WORD)
/* Boundary (in *bits*) on which the stack pointer prefers to be
aligned; the compiler cannot rely on having this alignment. */
One use of this macro is to increase alignment of medium-size
data to make it all fit in fewer cache lines. */
-#define LOCAL_ALIGNMENT(TYPE, ALIGN) ix86_local_alignment ((TYPE), (ALIGN))
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+ ix86_local_alignment ((TYPE), VOIDmode, (ALIGN))
+
+/* If defined, a C expression to compute the alignment for stack slot.
+ TYPE is the data type, MODE is the widest mode available, and ALIGN
+ is the alignment that the slot would ordinarily have. The value of
+ this macro is used instead of that alignment to align the slot.
+
+ If this macro is not defined, then ALIGN is used when TYPE is NULL,
+ Otherwise, LOCAL_ALIGNMENT will be used.
+
+ One use of this macro is to set alignment of stack slot to the
+ maximum alignment of all possible modes which the slot may have. */
+
+#define STACK_SLOT_ALIGNMENT(TYPE, MODE, ALIGN) \
+ ix86_local_alignment ((TYPE), (MODE), (ALIGN))
/* If defined, a C expression that gives the alignment boundary, in
bits, of an argument with the specified mode and type. If it is
for details. */
#define STACK_REGS
+
#define IS_STACK_MODE(MODE) \
(((MODE) == SFmode && (!TARGET_SSE || !TARGET_SSE_MATH)) \
|| ((MODE) == DFmode && (!TARGET_SSE2 || !TARGET_SSE_MATH)) \
/* r8, r9, r10, r11, r12, r13, r14, r15*/ \
2, 2, 2, 2, 2, 2, 2, 2, \
/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \
- 2, 2, 2, 2, 2, 2, 2, 2}
+ 2, 2, 2, 2, 2, 2, 2, 2 }
/* 1 for registers not available across function calls.
/* r8, r9, r10, r11, r12, r13, r14, r15*/ \
1, 1, 1, 1, 2, 2, 2, 2, \
/*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/ \
- 1, 1, 1, 1, 1, 1, 1, 1} \
+ 1, 1, 1, 1, 1, 1, 1, 1 }
/* Order in which to allocate registers. Each register must be
listed once, even those in FIXED_REGISTERS. List frame pointer
#define ORDER_REGS_FOR_LOCAL_ALLOC x86_order_regs_for_local_alloc ()
+#define OVERRIDE_ABI_FORMAT(FNDECL) ix86_call_abi_override (FNDECL)
+
/* Macro to conditionally modify fixed_regs/call_used_regs. */
#define CONDITIONAL_REGISTER_USAGE \
do { \
for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) \
reg_names[i] = ""; \
} \
- if (TARGET_64BIT_MS_ABI) \
+ if (TARGET_64BIT && DEFAULT_ABI == MS_ABI) \
{ \
call_used_regs[4 /*RSI*/] = 0; \
call_used_regs[5 /*RDI*/] = 0; \
applied to them.
*/
-#define HARD_REGNO_NREGS(REGNO, MODE) \
+#define HARD_REGNO_NREGS(REGNO, MODE) \
(FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
: ((MODE) == XFmode \
#define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) ((MODE) == XFmode ? 4 : 8)
-#define VALID_SSE2_REG_MODE(MODE) \
- ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
- || (MODE) == V2DImode || (MODE) == DFmode)
+#define VALID_SSE2_REG_MODE(MODE) \
+ ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
+ || (MODE) == V2DImode || (MODE) == DFmode)
#define VALID_SSE_REG_MODE(MODE) \
- ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode \
- || (MODE) == SFmode || (MODE) == TFmode)
+ ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode \
+ || (MODE) == SFmode || (MODE) == TFmode)
#define VALID_MMX_REG_MODE_3DNOW(MODE) \
- ((MODE) == V2SFmode || (MODE) == SFmode)
+ ((MODE) == V2SFmode || (MODE) == SFmode)
#define VALID_MMX_REG_MODE(MODE) \
- ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \
- || (MODE) == V2SImode || (MODE) == SImode)
+ ((MODE == V1DImode) || (MODE) == DImode \
+ || (MODE) == V2SImode || (MODE) == SImode \
+ || (MODE) == V4HImode || (MODE) == V8QImode)
/* ??? No autovectorization into MMX or 3DNOW until we can reliably
place emms and femms instructions. */
-#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD)
+#define UNITS_PER_SIMD_WORD(MODE) (TARGET_SSE ? 16 : UNITS_PER_WORD)
-#define VALID_DFP_MODE_P(MODE) \
- ((MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode)
+#define VALID_DFP_MODE_P(MODE) \
+ ((MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode)
#define VALID_FP_MODE_P(MODE) \
- ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \
- || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) \
+ ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \
+ || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) \
#define VALID_INT_MODE_P(MODE) \
- ((MODE) == QImode || (MODE) == HImode || (MODE) == SImode \
- || (MODE) == DImode \
- || (MODE) == CQImode || (MODE) == CHImode || (MODE) == CSImode \
- || (MODE) == CDImode \
- || (TARGET_64BIT && ((MODE) == TImode || (MODE) == CTImode \
- || (MODE) == TFmode || (MODE) == TCmode)))
+ ((MODE) == QImode || (MODE) == HImode || (MODE) == SImode \
+ || (MODE) == DImode \
+ || (MODE) == CQImode || (MODE) == CHImode || (MODE) == CSImode \
+ || (MODE) == CDImode \
+ || (TARGET_64BIT && ((MODE) == TImode || (MODE) == CTImode \
+ || (MODE) == TFmode || (MODE) == TCmode)))
/* Return true for modes passed in SSE registers. */
-#define SSE_REG_MODE_P(MODE) \
- ((MODE) == TImode || (MODE) == V16QImode || (MODE) == TFmode \
+#define SSE_REG_MODE_P(MODE) \
+ ((MODE) == TImode || (MODE) == V16QImode || (MODE) == TFmode \
|| (MODE) == V8HImode || (MODE) == V2DFmode || (MODE) == V2DImode \
|| (MODE) == V4SFmode || (MODE) == V4SImode)
#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \
(CC_REGNO_P (REGNO) ? VOIDmode \
: (MODE) == VOIDmode && (NREGS) != 1 ? VOIDmode \
- : (MODE) == VOIDmode ? choose_hard_reg_mode ((REGNO), (NREGS), false)\
+ : (MODE) == VOIDmode ? choose_hard_reg_mode ((REGNO), (NREGS), false) \
: (MODE) == HImode && !TARGET_PARTIAL_REG_STALL ? SImode \
: (MODE) == QImode && (REGNO) >= 4 && !TARGET_64BIT ? SImode \
: (MODE))
+
/* Specify the registers used for certain standard purposes.
The values of these macros are register numbers. */
/* Register in which static-chain is passed to a function.
We do use ECX as static chain register for 32 bit ABI. On the
64bit ABI, ECX is an argument register, so we use R10 instead. */
-#define STATIC_CHAIN_REGNUM (TARGET_64BIT ? FIRST_REX_INT_REG + 10 - 8 : 2)
+#define STATIC_CHAIN_REGNUM (TARGET_64BIT ? R10_REG : CX_REG)
/* Register to hold the addressing base for position independent
code access to data items. We don't use PIC pointer for 64bit
#define GOT_SYMBOL_NAME "_GLOBAL_OFFSET_TABLE_"
-/* A C expression which can inhibit the returning of certain function
- values in registers, based on the type of value. A nonzero value
- says to return the function value in memory, just as large
- structures are always returned. Here TYPE will be a C expression
- of type `tree', representing the data type of the value.
-
- Note that values of mode `BLKmode' must be explicitly handled by
- this macro. Also, the option `-fpcc-struct-return' takes effect
- regardless of this macro. On most systems, it is possible to
- leave the macro undefined; this causes a default definition to be
- used, whose value is the constant 1 for `BLKmode' values, and 0
- otherwise.
-
- Do not use this macro to indicate that structures and unions
- should always be returned in memory. You should instead use
- `DEFAULT_PCC_STRUCT_RETURN' to indicate this. */
-
-#define RETURN_IN_MEMORY(TYPE) \
- ix86_return_in_memory (TYPE)
-
/* This is overridden by <cygwin.h>. */
#define MS_AGGREGATE_RETURN 0
#define SSE_FLOAT_MODE_P(MODE) \
((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
+#define SSE_VEC_FLOAT_MODE_P(MODE) \
+ ((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode))
+
#define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP)))
#define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
ix86_secondary_memory_needed ((CLASS1), (CLASS2), (MODE), 1)
-/* QImode spills from non-QI registers need a scratch. This does not
- happen often -- the only example so far requires an uninitialized
- pseudo. */
-
-#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, OUT) \
- (((CLASS) == GENERAL_REGS || (CLASS) == LEGACY_REGS \
- || (CLASS) == INDEX_REGS) && !TARGET_64BIT && (MODE) == QImode \
- ? Q_REGS : NO_REGS)
+/* Get_secondary_mem widens integral modes to BITS_PER_WORD.
+ There is no need to emit full 64 bit move on 64 bit targets
+ for integral modes that can be moved using 32 bit move. */
+#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \
+ (GET_MODE_BITSIZE (MODE) < 32 && INTEGRAL_MODE_P (MODE) \
+ ? mode_for_size (32, GET_MODE_CLASS (MODE), 0) \
+ : MODE)
/* Return the maximum number of consecutive registers
needed to represent mode MODE in a register of class CLASS. */
/* If defined, the maximum amount of space required for outgoing arguments will
be computed and placed into the variable
- `current_function_outgoing_args_size'. No space will be pushed onto the
+ `crtl->outgoing_args_size'. No space will be pushed onto the
stack for each call; instead, the function prologue should increase the stack
frame size by this amount. */
This space can be allocated by the caller, or be a part of the
machine-dependent stack frame: `OUTGOING_REG_PARM_STACK_SPACE' says
which. */
-#define REG_PARM_STACK_SPACE(FNDECL) 0
+#define REG_PARM_STACK_SPACE(FNDECL) ix86_reg_parm_stack_space (FNDECL)
+
+#define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) (ix86_function_type_abi (FNTYPE) == MS_ABI ? 1 : 0)
/* Value is the number of bytes of arguments automatically
popped when returning from a subroutine call.
int maybe_vaarg; /* true for calls to possibly vardic fncts. */
int float_in_sse; /* 1 if in 32-bit mode SFmode (2 for DFmode) should
be passed in SSE registers. Otherwise 0. */
+ int call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise
+ MS_ABI for ms abi. */
} CUMULATIVE_ARGS;
/* Initialize a variable CUM of type CUMULATIVE_ARGS
#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
function_arg (&(CUM), (MODE), (TYPE), (NAMED))
-/* Implement `va_start' for varargs and stdarg. */
-#define EXPAND_BUILTIN_VA_START(VALIST, NEXTARG) \
- ix86_va_start (VALIST, NEXTARG)
-
#define TARGET_ASM_FILE_END ix86_file_end
#define NEED_INDICATE_EXEC_STACK 0
All other eliminations are valid. */
#define CAN_ELIMINATE(FROM, TO) \
- ((TO) == STACK_POINTER_REGNUM ? ! frame_pointer_needed : 1)
+ ((TO) == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1)
/* Define the offset between two registers, one to be eliminated, and the other
its replacement, at the start of a routine. */
is also used as the pic register in ELF. So for now, don't allow more than
3 registers to be passed in registers. */
-#define REGPARM_MAX (TARGET_64BIT ? 6 : 3)
+/* Abi specific values for REGPARM_MAX and SSE_REGPARM_MAX */
+#define X86_64_REGPARM_MAX 6
+#define X64_REGPARM_MAX 4
+#define X86_32_REGPARM_MAX 3
+
+#define X86_64_SSE_REGPARM_MAX 8
+#define X64_SSE_REGPARM_MAX 4
+#define X86_32_SSE_REGPARM_MAX (TARGET_SSE ? 3 : 0)
+
+#define REGPARM_MAX (TARGET_64BIT ? (TARGET_64BIT_MS_ABI ? X64_REGPARM_MAX \
+ : X86_64_REGPARM_MAX) \
+ : X86_32_REGPARM_MAX)
-#define SSE_REGPARM_MAX (TARGET_64BIT ? 8 : (TARGET_SSE ? 3 : 0))
+#define SSE_REGPARM_MAX (TARGET_64BIT ? (TARGET_64BIT_MS_ABI ? X64_SSE_REGPARM_MAX \
+ : X86_64_SSE_REGPARM_MAX) \
+ : X86_32_SSE_REGPARM_MAX)
#define MMX_REGPARM_MAX (TARGET_64BIT ? 0 : (TARGET_MMX ? 3 : 0))
/* If a clear memory operation would take CLEAR_RATIO or more simple
move-instruction sequences, we will do a clrmem or libcall instead. */
-#define CLEAR_RATIO (optimize_size ? 2 \
- : ix86_cost->move_ratio > 6 ? 6 : ix86_cost->move_ratio)
+#define CLEAR_RATIO (optimize_size ? 2 : MIN (6, ix86_cost->move_ratio))
/* Define if shifts truncate the shift count
which implies one can omit a sign-extension or zero-extension
enum processor_type
{
- PROCESSOR_I386, /* 80386 */
+ PROCESSOR_I386 = 0, /* 80386 */
PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
PROCESSOR_PENTIUM,
PROCESSOR_PENTIUMPRO,
int save_varrargs_registers;
int accesses_prev_frame;
int optimize_mode_switching[MAX_386_ENTITIES];
- /* Set by ix86_compute_frame_layout and used by prologue/epilogue expander to
- determine the style used. */
+ int needs_cld;
+ /* Set by ix86_compute_frame_layout and used by prologue/epilogue
+ expander to determine the style used. */
int use_fast_prologue_epilogue;
/* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed
for. */
ix86_current_function_calls_tls_descriptor macro for a better
approximation. */
int tls_descriptor_call_expanded_p;
+ /* This value is used for amd64 targets and specifies the current abi
+ to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi. */
+ int call_abi;
};
#define ix86_stack_locals (cfun->machine->stack_locals)
#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
#define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
+#define ix86_current_function_needs_cld (cfun->machine->needs_cld)
#define ix86_tls_descriptor_calls_expanded_in_cfun \
(cfun->machine->tls_descriptor_call_expanded_p)
/* Since tls_descriptor_call_expanded is not cleared, even if all TLS
#define TARG_SCALAR_STORE_COST ix86_cost->scalar_store_cost
/* Cost of any vector operation, excluding load, store or vector to scalar
- operation. */
+ operation. */
#undef TARG_VEC_STMT_COST
#define TARG_VEC_STMT_COST ix86_cost->vec_stmt_cost