#include "ggc.h"
#include "target.h"
#include "target-def.h"
+#include "common/common-target.h"
#include "langhooks.h"
#include "cgraph.h"
#include "gimple.h"
#include "params.h"
#include "cselib.h"
#include "debug.h"
-#include "dwarf2out.h"
#include "sched-int.h"
#include "sbitmap.h"
#include "fibheap.h"
#include "opts.h"
+#include "diagnostic.h"
enum upper_128bits_state
{
/* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
in basic block BB. Delete it if upper 128bit AVX registers are
unused. If it isn't deleted, move it to just before a jump insn.
-
+
STATE is state of the upper 128bits of AVX registers at entry. */
static void
1, /* cond_not_taken_branch_cost. */
};
+struct processor_costs bdver2_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (4), /* SI */
+ COSTS_N_INSNS (6), /* DI */
+ COSTS_N_INSNS (6)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (35), /* HI */
+ COSTS_N_INSNS (51), /* SI */
+ COSTS_N_INSNS (83), /* DI */
+ COSTS_N_INSNS (83)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {5, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {5, 5, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 4}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 4, 4}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 4}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 2, /* MMX or SSE register to integer */
+ /* On K8:
+ MOVD reg64, xmmreg Double FSTORE 4
+ MOVD reg32, xmmreg Double FSTORE 4
+ On AMDFAM10:
+ MOVD reg64, xmmreg Double FADD 3
+ 1/1 1/1
+ MOVD reg32, xmmreg Double FADD 3
+ 1/1 1/1 */
+ 16, /* size of l1 cache. */
+ 2048, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ /* New AMD processors never drop prefetches; if they cannot be performed
+ immediately, they are queued. We set number of simultaneous prefetches
+ to a large constant to reflect this (it probably is not a good idea not
+ to limit number of prefetches at all, as their execution also takes some
+ time). */
+ 100, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (6), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (42), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
+
+ /* BDVER2 has optimized REP instruction for medium sized blocks, but for
+ very small blocks it is better to use loop. For large blocks, libcall
+ can do nontemporary accesses and beat inline considerably. */
+ {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
+ {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ {{libcall, {{8, loop}, {24, unrolled_loop},
+ {2048, rep_prefix_4_byte}, {-1, libcall}}},
+ {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 6, /* scalar_stmt_cost. */
+ 4, /* scalar load_cost. */
+ 4, /* scalar_store_cost. */
+ 6, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 4, /* vec_align_load_cost. */
+ 4, /* vec_unalign_load_cost. */
+ 4, /* vec_store_cost. */
+ 2, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
struct processor_costs btver1_cost = {
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
#define m_486 (1<<PROCESSOR_I486)
#define m_PENT (1<<PROCESSOR_PENTIUM)
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
-#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
-#define m_NOCONA (1<<PROCESSOR_NOCONA)
-#define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
-#define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
-#define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
-#define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
-#define m_COREI7 (m_COREI7_32 | m_COREI7_64)
-#define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
-#define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
-#define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
-#define m_ATOM (1<<PROCESSOR_ATOM)
-
-#define m_GEODE (1<<PROCESSOR_GEODE)
-#define m_K6 (1<<PROCESSOR_K6)
-#define m_K6_GEODE (m_K6 | m_GEODE)
-#define m_K8 (1<<PROCESSOR_K8)
-#define m_ATHLON (1<<PROCESSOR_ATHLON)
-#define m_ATHLON_K8 (m_K8 | m_ATHLON)
-#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
-#define m_BDVER1 (1<<PROCESSOR_BDVER1)
-#define m_BTVER1 (1<<PROCESSOR_BTVER1)
-#define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
+#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
+#define m_NOCONA (1<<PROCESSOR_NOCONA)
+#define m_P4_NOCONA (m_PENT4 | m_NOCONA)
+#define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
+#define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
+#define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
+#define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
+#define m_COREI7 (m_COREI7_32 | m_COREI7_64)
+#define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
+#define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
+#define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
+#define m_ATOM (1<<PROCESSOR_ATOM)
+
+#define m_GEODE (1<<PROCESSOR_GEODE)
+#define m_K6 (1<<PROCESSOR_K6)
+#define m_K6_GEODE (m_K6 | m_GEODE)
+#define m_K8 (1<<PROCESSOR_K8)
+#define m_ATHLON (1<<PROCESSOR_ATHLON)
+#define m_ATHLON_K8 (m_K8 | m_ATHLON)
+#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
+#define m_BDVER1 (1<<PROCESSOR_BDVER1)
+#define m_BDVER2 (1<<PROCESSOR_BDVER2)
+#define m_BDVER (m_BDVER1 | m_BDVER2)
+#define m_BTVER1 (1<<PROCESSOR_BTVER1)
+#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
negatively, so enabling for Generic64 seems like good code size
tradeoff. We can't enable it for 32bit generic because it does not
work well with PPro base chips. */
- m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
+ m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
/* X86_TUNE_PUSH_MEMORY */
- m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
- | m_NOCONA | m_CORE2I7 | m_GENERIC,
+ m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_ZERO_EXTEND_WITH_AND */
m_486 | m_PENT,
/* X86_TUNE_UNROLL_STRLEN */
- m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
- | m_CORE2I7 | m_GENERIC,
-
- /* X86_TUNE_DEEP_BRANCH_PREDICTION */
- m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
- | m_CORE2I7 | m_GENERIC,
+ m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
~m_386,
/* X86_TUNE_USE_SAHF */
- m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
- | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
partial dependencies. */
- m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
- | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
+ m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
register stalls on Generic32 compilation setting as well. However
m_386 | m_486 | m_K6_GEODE,
/* X86_TUNE_USE_SIMODE_FIOP */
- ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
+ ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
/* X86_TUNE_USE_MOV0 */
m_K6,
/* X86_TUNE_USE_CLTD */
- ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
+ ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
m_PENT4,
~(m_PENT | m_PPRO),
/* X86_TUNE_PROMOTE_QIMODE */
- m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
- | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
+ m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_FAST_PREFIX */
- ~(m_PENT | m_486 | m_386),
+ ~(m_386 | m_486 | m_PENT),
/* X86_TUNE_SINGLE_STRINGOP */
- m_386 | m_PENT4 | m_NOCONA,
+ m_386 | m_P4_NOCONA,
/* X86_TUNE_QIMODE_MATH */
~0,
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */
- ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
- | m_GENERIC | m_GEODE),
+ ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */
- m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
+ m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
conflict here in between PPro/Pentium4 based chips that thread 128bit
shows that disabling this option on P4 brings over 20% SPECfp regression,
while enabling it on K8 brings roughly 2.4% regression that can be partly
masked by careful scheduling of moves. */
- m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
- | m_AMDFAM10 | m_BDVER1,
+ m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
- m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
+ m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
- m_BDVER1 | m_COREI7,
+ m_COREI7 | m_BDVER,
/* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
- m_BDVER1,
+ m_BDVER ,
/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
are resolved on SSE register parts instead of whole registers, so we may
m_AMD_MULTIPLE,
/* X86_TUNE_SSE_LOAD0_BY_PXOR */
- m_PPRO | m_PENT4 | m_NOCONA,
+ m_PPRO | m_P4_NOCONA,
/* X86_TUNE_MEMORY_MISMATCH_STALL */
- m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
+ m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_PROLOGUE_USING_MOVE */
- m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
+ m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
/* X86_TUNE_EPILOGUE_USING_MOVE */
- m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
+ m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
/* X86_TUNE_SHIFT1 */
~m_486,
~(m_AMD_MULTIPLE | m_GENERIC),
/* X86_TUNE_INTER_UNIT_CONVERSIONS */
- ~(m_AMDFAM10 | m_BDVER1),
+ ~(m_AMDFAM10 | m_BDVER ),
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
- m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
- | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_SCHEDULE */
- m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
- | m_GENERIC,
+ m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_USE_BT */
- m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
+ m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_USE_INCDEC */
- ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
+ ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
/* X86_TUNE_PAD_RETURNS */
- m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
+ m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
m_ATOM,
/* X86_TUNE_EXT_80387_CONSTANTS */
- m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
- | m_CORE2I7 | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
/* X86_TUNE_SHORTEN_X87_SSE */
~m_K8,
/* X86_TUNE_AVOID_VECTOR_DECODE */
- m_K8 | m_CORE2I7_64 | m_GENERIC64,
+ m_CORE2I7_64 | m_K8 | m_GENERIC64,
/* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
and SImode multiply, but 386 and 486 do HImode multiply faster. */
/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
vector path on AMD machines. */
- m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
+ m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
/* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
machines. */
- m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
+ m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
/* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
than a MOV. */
/* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
from FP to FP. */
- m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
+ m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
/* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
from integer to FP. */
/* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
with a subsequent conditional jump instruction into a single
compare-and-branch uop. */
- m_BDVER1,
+ m_BDVER,
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
/* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
instructions. */
~m_ATOM,
+
+ /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
+ at -O3. For the moment, the prefetching seems badly tuned for Intel
+ chips. */
+ m_K6_GEODE | m_AMD_MULTIPLE,
+
+ /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
+ the auto-vectorizer. */
+ m_BDVER
};
/* Feature tests against the various architecture variations. */
};
static const unsigned int x86_accumulate_outgoing_args
- = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
- | m_GENERIC;
+ = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
static const unsigned int x86_arch_always_fancy_math_387
- = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
- | m_NOCONA | m_CORE2I7 | m_GENERIC;
+ = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
-static enum stringop_alg stringop_alg = no_stringop;
+static const unsigned int x86_avx256_split_unaligned_load
+ = m_COREI7 | m_GENERIC;
+
+static const unsigned int x86_avx256_split_unaligned_store
+ = m_COREI7 | m_BDVER | m_GENERIC;
/* In case the average insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
bool save_regs_using_mov;
};
-/* Code model option. */
-enum cmodel ix86_cmodel;
-/* Asm dialect. */
-enum asm_dialect ix86_asm_dialect = ASM_ATT;
-/* TLS dialects. */
-enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
-
-/* Which unit we are generating floating point math for. */
-enum fpmath_unit ix86_fpmath;
-
/* Which cpu are we scheduling for. */
enum attr_cpu ix86_schedule;
/* true if sse prefetch instruction is not NOOP. */
int x86_prefetch_sse;
-/* ix86_regparm_string as a number */
-static int ix86_regparm;
-
/* -mstackrealign option */
static const char ix86_force_align_arg_pointer_string[]
= "force_align_arg_pointer";
/* Alignment for incoming stack boundary in bits. */
unsigned int ix86_incoming_stack_boundary;
-/* The abi used by target. */
-enum calling_abi ix86_abi;
-
-/* Values 1-5: see jump.c */
-int ix86_branch_cost;
-
/* Calling abi specific va_list type nodes. */
static GTY(()) tree sysv_va_list_type_node;
static GTY(()) tree ms_va_list_type_node;
-/* Variables which are this size or smaller are put in the data/bss
- or ldata/lbss sections. */
-
-int ix86_section_threshold = 65536;
-
/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
char internal_label_prefix[16];
int internal_label_prefix_len;
{
IX86_FUNCTION_SPECIFIC_ARCH,
IX86_FUNCTION_SPECIFIC_TUNE,
- IX86_FUNCTION_SPECIFIC_FPMATH,
IX86_FUNCTION_SPECIFIC_MAX
};
static char *ix86_target_string (int, int, const char *, const char *,
- const char *, bool);
+ enum fpmath_unit, bool);
static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
static void ix86_function_specific_save (struct cl_target_option *);
static void ix86_function_specific_restore (struct cl_target_option *);
static void ix86_function_specific_print (FILE *, int,
struct cl_target_option *);
static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
-static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
+static bool ix86_valid_target_attribute_inner_p (tree, char *[],
+ struct gcc_options *);
static bool ix86_can_inline_p (tree, tree);
static void ix86_set_current_function (tree);
static unsigned int ix86_minimum_incoming_stack_boundary (bool);
static int ix86_tune_defaulted;
static int ix86_arch_specified;
-/* Define a set of ISAs which are available when a given ISA is
- enabled. MMX and SSE ISAs are handled separately. */
-
-#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
-#define OPTION_MASK_ISA_3DNOW_SET \
- (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
-
-#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
-#define OPTION_MASK_ISA_SSE2_SET \
- (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
-#define OPTION_MASK_ISA_SSE3_SET \
- (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
-#define OPTION_MASK_ISA_SSSE3_SET \
- (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
-#define OPTION_MASK_ISA_SSE4_1_SET \
- (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
-#define OPTION_MASK_ISA_SSE4_2_SET \
- (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
-#define OPTION_MASK_ISA_AVX_SET \
- (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
-#define OPTION_MASK_ISA_FMA_SET \
- (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
-
-/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
- as -msse4.2. */
-#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
-
-#define OPTION_MASK_ISA_SSE4A_SET \
- (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
-#define OPTION_MASK_ISA_FMA4_SET \
- (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
- | OPTION_MASK_ISA_AVX_SET)
-#define OPTION_MASK_ISA_XOP_SET \
- (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
-#define OPTION_MASK_ISA_LWP_SET \
- OPTION_MASK_ISA_LWP
-
-/* AES and PCLMUL need SSE2 because they use xmm registers */
-#define OPTION_MASK_ISA_AES_SET \
- (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
-#define OPTION_MASK_ISA_PCLMUL_SET \
- (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
-
-#define OPTION_MASK_ISA_ABM_SET \
- (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
-
-#define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
-#define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
-#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
-#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
-#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
-#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
-#define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
-
-#define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
-#define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
-#define OPTION_MASK_ISA_F16C_SET \
- (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
-
-/* Define a set of ISAs which aren't available when a given ISA is
- disabled. MMX and SSE ISAs are handled separately. */
-
-#define OPTION_MASK_ISA_MMX_UNSET \
- (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
-#define OPTION_MASK_ISA_3DNOW_UNSET \
- (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
-#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
-
-#define OPTION_MASK_ISA_SSE_UNSET \
- (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
-#define OPTION_MASK_ISA_SSE2_UNSET \
- (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
-#define OPTION_MASK_ISA_SSE3_UNSET \
- (OPTION_MASK_ISA_SSE3 \
- | OPTION_MASK_ISA_SSSE3_UNSET \
- | OPTION_MASK_ISA_SSE4A_UNSET )
-#define OPTION_MASK_ISA_SSSE3_UNSET \
- (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
-#define OPTION_MASK_ISA_SSE4_1_UNSET \
- (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
-#define OPTION_MASK_ISA_SSE4_2_UNSET \
- (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
-#define OPTION_MASK_ISA_AVX_UNSET \
- (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
- | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
-#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
-
-/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
- as -mno-sse4.1. */
-#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
-
-#define OPTION_MASK_ISA_SSE4A_UNSET \
- (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
-
-#define OPTION_MASK_ISA_FMA4_UNSET \
- (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
-#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
-#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
-
-#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
-#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
-#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
-#define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
-#define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
-#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
-#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
-#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
-#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
-#define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
-
-#define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
-#define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
-#define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
-
/* Vectorization library interface and handlers. */
static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
{&generic64_cost, 16, 10, 16, 10, 16},
{&amdfam10_cost, 32, 24, 32, 7, 32},
{&bdver1_cost, 32, 24, 32, 7, 32},
+ {&bdver2_cost, 32, 24, 32, 7, 32},
{&btver1_cost, 32, 24, 32, 7, 32},
{&atom_cost, 16, 7, 16, 7, 16}
};
"k8",
"amdfam10",
"bdver1",
+ "bdver2",
"btver1"
};
\f
{
return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
}
-
-/* Implement TARGET_HANDLE_OPTION. */
-
-static bool
-ix86_handle_option (struct gcc_options *opts,
- struct gcc_options *opts_set ATTRIBUTE_UNUSED,
- const struct cl_decoded_option *decoded,
- location_t loc ATTRIBUTE_UNUSED)
-{
- size_t code = decoded->opt_index;
- int value = decoded->value;
-
- switch (code)
- {
- case OPT_mmmx:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
- }
- return true;
-
- case OPT_m3dnow:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
- }
- return true;
-
- case OPT_m3dnowa:
- return false;
-
- case OPT_msse:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
- }
- return true;
-
- case OPT_msse2:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
- }
- return true;
-
- case OPT_msse3:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
- }
- return true;
-
- case OPT_mssse3:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
- }
- return true;
-
- case OPT_msse4_1:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
- }
- return true;
-
- case OPT_msse4_2:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
- }
- return true;
-
- case OPT_mavx:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
- }
- return true;
-
- case OPT_mfma:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
- }
- return true;
-
- case OPT_msse4:
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
- return true;
-
- case OPT_mno_sse4:
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
- return true;
-
- case OPT_msse4a:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
- }
- return true;
-
- case OPT_mfma4:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
- }
- return true;
-
- case OPT_mxop:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
- }
- return true;
-
- case OPT_mlwp:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
- }
- return true;
-
- case OPT_mabm:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
- }
- return true;
-
- case OPT_mbmi:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
- }
- return true;
-
- case OPT_mtbm:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
- }
- return true;
-
- case OPT_mpopcnt:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
- }
- return true;
-
- case OPT_msahf:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
- }
- return true;
-
- case OPT_mcx16:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
- }
- return true;
-
- case OPT_mmovbe:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
- }
- return true;
-
- case OPT_mcrc32:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
- }
- return true;
-
- case OPT_maes:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
- }
- return true;
-
- case OPT_mpclmul:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
- }
- return true;
-
- case OPT_mfsgsbase:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
- }
- return true;
-
- case OPT_mrdrnd:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
- }
- return true;
-
- case OPT_mf16c:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
- }
- return true;
-
- default:
- return true;
- }
-}
\f
/* Return a string that documents the current -m options. The caller is
responsible for freeing the string. */
static char *
ix86_target_string (int isa, int flags, const char *arch, const char *tune,
- const char *fpmath, bool add_nl_p)
+ enum fpmath_unit fpmath, bool add_nl_p)
{
struct ix86_target_opts
{
{ "-mmmx", OPTION_MASK_ISA_MMX },
{ "-mabm", OPTION_MASK_ISA_ABM },
{ "-mbmi", OPTION_MASK_ISA_BMI },
+ { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
{ "-mtbm", OPTION_MASK_ISA_TBM },
{ "-mpopcnt", OPTION_MASK_ISA_POPCNT },
{ "-mmovbe", OPTION_MASK_ISA_MOVBE },
{ "-mvzeroupper", MASK_VZEROUPPER },
{ "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
{ "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
+ { "-mprefer-avx128", MASK_PREFER_AVX128},
};
const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
if (fpmath)
{
opts[num][0] = "-mfpmath=";
- opts[num++][1] = fpmath;
+ switch ((int) fpmath)
+ {
+ case FPMATH_387:
+ opts[num++][1] = "387";
+ break;
+
+ case FPMATH_SSE:
+ opts[num++][1] = "sse";
+ break;
+
+ case FPMATH_387 | FPMATH_SSE:
+ opts[num++][1] = "sse+387";
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
}
/* Any options? */
return ret;
}
-/* Return TRUE if software prefetching is beneficial for the
- given CPU. */
-
-static bool
-software_prefetching_beneficial_p (void)
-{
- switch (ix86_tune)
- {
- case PROCESSOR_GEODE:
- case PROCESSOR_K6:
- case PROCESSOR_ATHLON:
- case PROCESSOR_K8:
- case PROCESSOR_AMDFAM10:
- case PROCESSOR_BTVER1:
- return true;
-
- default:
- return false;
- }
-}
-
/* Return true, if profiling code should be emitted before
prologue. Otherwise it returns false.
Note: For x86 with "hotfix" it is sorried. */
{
char *opts = ix86_target_string (ix86_isa_flags, target_flags,
ix86_arch_string, ix86_tune_string,
- ix86_fpmath_string, true);
+ ix86_fpmath, true);
if (opts)
{
const char *suffix;
const char *sw;
- /* Comes from final.c -- no real reason to change it. */
-#define MAX_CODE_ALIGN 16
-
enum pta_flags
{
PTA_SSE = 1 << 0,
PTA_RDRND = 1 << 25,
PTA_F16C = 1 << 26,
PTA_BMI = 1 << 27,
- PTA_TBM = 1 << 28
+ PTA_TBM = 1 << 28,
+ PTA_LZCNT = 1 << 29
/* if this reaches 32, need to widen struct pta flags below */
};
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
+ {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
+ | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
+ | PTA_RDRND | PTA_F16C},
{"atom", PROCESSOR_ATOM, CPU_ATOM,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
| PTA_XOP | PTA_LWP},
+ {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+ | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+ | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
+ | PTA_FMA},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
SUBSUBTARGET_OVERRIDE_OPTIONS;
#endif
+ if (TARGET_X32)
+ ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
+
/* -fPIC is the default for x86_64. */
if (TARGET_MACHO && TARGET_64BIT)
flag_pic = 2;
}
}
- if (ix86_stringop_string)
- {
- if (!strcmp (ix86_stringop_string, "rep_byte"))
- stringop_alg = rep_prefix_1_byte;
- else if (!strcmp (ix86_stringop_string, "libcall"))
- stringop_alg = libcall;
- else if (!strcmp (ix86_stringop_string, "rep_4byte"))
- stringop_alg = rep_prefix_4_byte;
- else if (!strcmp (ix86_stringop_string, "rep_8byte")
- && TARGET_64BIT)
- /* rep; movq isn't available in 32-bit code. */
- stringop_alg = rep_prefix_8_byte;
- else if (!strcmp (ix86_stringop_string, "byte_loop"))
- stringop_alg = loop_1_byte;
- else if (!strcmp (ix86_stringop_string, "loop"))
- stringop_alg = loop;
- else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
- stringop_alg = unrolled_loop;
- else
- error ("bad value (%s) for %sstringop-strategy=%s %s",
- ix86_stringop_string, prefix, suffix, sw);
+ if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
+ {
+ /* rep; movq isn't available in 32-bit code. */
+ error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
+ ix86_stringop_alg = no_stringop;
}
if (!ix86_arch_string)
else
ix86_arch_specified = 1;
- /* Validate -mabi= value. */
- if (ix86_abi_string)
- {
- if (strcmp (ix86_abi_string, "sysv") == 0)
- ix86_abi = SYSV_ABI;
- else if (strcmp (ix86_abi_string, "ms") == 0)
- ix86_abi = MS_ABI;
- else
- error ("unknown ABI (%s) for %sabi=%s %s",
- ix86_abi_string, prefix, suffix, sw);
- }
- else
+ if (!global_options_set.x_ix86_abi)
ix86_abi = DEFAULT_ABI;
- if (ix86_cmodel_string != 0)
+ if (global_options_set.x_ix86_cmodel)
{
- if (!strcmp (ix86_cmodel_string, "small"))
- ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
- else if (!strcmp (ix86_cmodel_string, "medium"))
- ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
- else if (!strcmp (ix86_cmodel_string, "large"))
- ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
- else if (flag_pic)
- error ("code model %s does not support PIC mode", ix86_cmodel_string);
- else if (!strcmp (ix86_cmodel_string, "32"))
- ix86_cmodel = CM_32;
- else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
- ix86_cmodel = CM_KERNEL;
- else
- error ("bad value (%s) for %scmodel=%s %s",
- ix86_cmodel_string, prefix, suffix, sw);
+ switch (ix86_cmodel)
+ {
+ case CM_SMALL:
+ case CM_SMALL_PIC:
+ if (flag_pic)
+ ix86_cmodel = CM_SMALL_PIC;
+ if (!TARGET_64BIT)
+ error ("code model %qs not supported in the %s bit mode",
+ "small", "32");
+ break;
+
+ case CM_MEDIUM:
+ case CM_MEDIUM_PIC:
+ if (flag_pic)
+ ix86_cmodel = CM_MEDIUM_PIC;
+ if (!TARGET_64BIT)
+ error ("code model %qs not supported in the %s bit mode",
+ "medium", "32");
+ else if (TARGET_X32)
+ error ("code model %qs not supported in x32 mode",
+ "medium");
+ break;
+
+ case CM_LARGE:
+ case CM_LARGE_PIC:
+ if (flag_pic)
+ ix86_cmodel = CM_LARGE_PIC;
+ if (!TARGET_64BIT)
+ error ("code model %qs not supported in the %s bit mode",
+ "large", "32");
+ else if (TARGET_X32)
+ error ("code model %qs not supported in x32 mode",
+ "medium");
+ break;
+
+ case CM_32:
+ if (flag_pic)
+ error ("code model %s does not support PIC mode", "32");
+ if (TARGET_64BIT)
+ error ("code model %qs not supported in the %s bit mode",
+ "32", "64");
+ break;
+
+ case CM_KERNEL:
+ if (flag_pic)
+ {
+ error ("code model %s does not support PIC mode", "kernel");
+ ix86_cmodel = CM_32;
+ }
+ if (!TARGET_64BIT)
+ error ("code model %qs not supported in the %s bit mode",
+ "kernel", "32");
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
}
else
{
else
ix86_cmodel = CM_32;
}
- if (ix86_asm_string != 0)
+ if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
{
- if (! TARGET_MACHO
- && !strcmp (ix86_asm_string, "intel"))
- ix86_asm_dialect = ASM_INTEL;
- else if (!strcmp (ix86_asm_string, "att"))
- ix86_asm_dialect = ASM_ATT;
- else
- error ("bad value (%s) for %sasm=%s %s",
- ix86_asm_string, prefix, suffix, sw);
+ error ("-masm=intel not supported in this configuration");
+ ix86_asm_dialect = ASM_ATT;
}
- if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
- error ("code model %qs not supported in the %s bit mode",
- ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
sorry ("%i-bit mode not compiled in",
(ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
if (processor_alias_table[i].flags & PTA_BMI
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
ix86_isa_flags |= OPTION_MASK_ISA_BMI;
+ if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
+ ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
if (processor_alias_table[i].flags & PTA_TBM
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
ix86_isa_flags |= OPTION_MASK_ISA_TBM;
init_machine_status = ix86_init_machine_status;
/* Validate -mregparm= value. */
- if (ix86_regparm_string)
+ if (global_options_set.x_ix86_regparm)
{
if (TARGET_64BIT)
- warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
- i = atoi (ix86_regparm_string);
- if (i < 0 || i > REGPARM_MAX)
- error ("%sregparm=%d%s is not between 0 and %d",
- prefix, i, suffix, REGPARM_MAX);
- else
- ix86_regparm = i;
- }
- if (TARGET_64BIT)
- ix86_regparm = REGPARM_MAX;
-
- /* If the user has provided any of the -malign-* options,
- warn and use that value only if -falign-* is not set.
- Remove this code in GCC 3.2 or later. */
- if (ix86_align_loops_string)
- {
- warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
- prefix, suffix, suffix);
- if (align_loops == 0)
+ warning (0, "-mregparm is ignored in 64-bit mode");
+ if (ix86_regparm > REGPARM_MAX)
{
- i = atoi (ix86_align_loops_string);
- if (i < 0 || i > MAX_CODE_ALIGN)
- error ("%salign-loops=%d%s is not between 0 and %d",
- prefix, i, suffix, MAX_CODE_ALIGN);
- else
- align_loops = 1 << i;
- }
- }
-
- if (ix86_align_jumps_string)
- {
- warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
- prefix, suffix, suffix);
- if (align_jumps == 0)
- {
- i = atoi (ix86_align_jumps_string);
- if (i < 0 || i > MAX_CODE_ALIGN)
- error ("%salign-loops=%d%s is not between 0 and %d",
- prefix, i, suffix, MAX_CODE_ALIGN);
- else
- align_jumps = 1 << i;
- }
- }
-
- if (ix86_align_funcs_string)
- {
- warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
- prefix, suffix, suffix);
- if (align_functions == 0)
- {
- i = atoi (ix86_align_funcs_string);
- if (i < 0 || i > MAX_CODE_ALIGN)
- error ("%salign-loops=%d%s is not between 0 and %d",
- prefix, i, suffix, MAX_CODE_ALIGN);
- else
- align_functions = 1 << i;
+ error ("-mregparm=%d is not between 0 and %d",
+ ix86_regparm, REGPARM_MAX);
+ ix86_regparm = 0;
}
}
+ if (TARGET_64BIT)
+ ix86_regparm = REGPARM_MAX;
/* Default align_* from the processor table. */
if (align_loops == 0)
align_functions = processor_target_table[ix86_tune].align_func;
}
- /* Validate -mbranch-cost= value, or provide default. */
- ix86_branch_cost = ix86_cost->branch_cost;
- if (ix86_branch_cost_string)
- {
- i = atoi (ix86_branch_cost_string);
- if (i < 0 || i > 5)
- error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
- else
- ix86_branch_cost = i;
- }
- if (ix86_section_threshold_string)
- {
- i = atoi (ix86_section_threshold_string);
- if (i < 0)
- error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
- else
- ix86_section_threshold = i;
- }
-
- if (ix86_tls_dialect_string)
- {
- if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
- ix86_tls_dialect = TLS_DIALECT_GNU;
- else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
- ix86_tls_dialect = TLS_DIALECT_GNU2;
- else
- error ("bad value (%s) for %stls-dialect=%s %s",
- ix86_tls_dialect_string, prefix, suffix, sw);
- }
-
- if (ix87_precision_string)
- {
- i = atoi (ix87_precision_string);
- if (i != 32 && i != 64 && i != 80)
- error ("pc%d is not valid precision setting (32, 64 or 80)", i);
- }
+ /* Provide default for -mbranch-cost= value. */
+ if (!global_options_set.x_ix86_branch_cost)
+ ix86_branch_cost = ix86_cost->branch_cost;
if (TARGET_64BIT)
{
if (TARGET_SSE4_2 || TARGET_ABM)
ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
+ /* Turn on lzcnt instruction for -mabm. */
+ if (TARGET_ABM)
+ ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
+
/* Validate -mpreferred-stack-boundary= value or default it to
PREFERRED_STACK_BOUNDARY_DEFAULT. */
ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
- if (ix86_preferred_stack_boundary_string)
+ if (global_options_set.x_ix86_preferred_stack_boundary_arg)
{
int min = (TARGET_64BIT ? 4 : 2);
int max = (TARGET_SEH ? 4 : 12);
- i = atoi (ix86_preferred_stack_boundary_string);
- if (i < min || i > max)
+ if (ix86_preferred_stack_boundary_arg < min
+ || ix86_preferred_stack_boundary_arg > max)
{
if (min == max)
- error ("%spreferred-stack-boundary%s is not supported "
- "for this target", prefix, suffix);
+ error ("-mpreferred-stack-boundary is not supported "
+ "for this target");
else
- error ("%spreferred-stack-boundary=%d%s is not between %d and %d",
- prefix, i, suffix, min, max);
+ error ("-mpreferred-stack-boundary=%d is not between %d and %d",
+ ix86_preferred_stack_boundary_arg, min, max);
}
else
- ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
+ ix86_preferred_stack_boundary
+ = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
}
/* Set the default value for -mstackrealign. */
/* Validate -mincoming-stack-boundary= value or default it to
MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
- if (ix86_incoming_stack_boundary_string)
+ if (global_options_set.x_ix86_incoming_stack_boundary_arg)
{
- i = atoi (ix86_incoming_stack_boundary_string);
- if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
+ if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
+ || ix86_incoming_stack_boundary_arg > 12)
error ("-mincoming-stack-boundary=%d is not between %d and 12",
- i, TARGET_64BIT ? 4 : 2);
+ ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
else
{
- ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
+ ix86_user_incoming_stack_boundary
+ = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
ix86_incoming_stack_boundary
= ix86_user_incoming_stack_boundary;
}
&& ! TARGET_SSE)
error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
- ix86_fpmath = TARGET_FPMATH_DEFAULT;
- if (ix86_fpmath_string != 0)
+ if (global_options_set.x_ix86_fpmath)
{
- if (! strcmp (ix86_fpmath_string, "387"))
- ix86_fpmath = FPMATH_387;
- else if (! strcmp (ix86_fpmath_string, "sse"))
- {
- if (!TARGET_SSE)
- {
- warning (0, "SSE instruction set disabled, using 387 arithmetics");
- ix86_fpmath = FPMATH_387;
- }
- else
- ix86_fpmath = FPMATH_SSE;
- }
- else if (! strcmp (ix86_fpmath_string, "387,sse")
- || ! strcmp (ix86_fpmath_string, "387+sse")
- || ! strcmp (ix86_fpmath_string, "sse,387")
- || ! strcmp (ix86_fpmath_string, "sse+387")
- || ! strcmp (ix86_fpmath_string, "both"))
+ if (ix86_fpmath & FPMATH_SSE)
{
if (!TARGET_SSE)
{
warning (0, "SSE instruction set disabled, using 387 arithmetics");
ix86_fpmath = FPMATH_387;
}
- else if (!TARGET_80387)
+ else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
{
warning (0, "387 instruction set disabled, using SSE arithmetics");
ix86_fpmath = FPMATH_SSE;
}
- else
- ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
}
- else
- error ("bad value (%s) for %sfpmath=%s %s",
- ix86_fpmath_string, prefix, suffix, sw);
}
+ else
+ ix86_fpmath = TARGET_FPMATH_DEFAULT;
/* If the i387 is disabled, then do not return values in it. */
if (!TARGET_80387)
target_flags &= ~MASK_FLOAT_RETURNS;
/* Use external vectorized library in vectorizing intrinsics. */
- if (ix86_veclibabi_string)
- {
- if (strcmp (ix86_veclibabi_string, "svml") == 0)
+ if (global_options_set.x_ix86_veclibabi_type)
+ switch (ix86_veclibabi_type)
+ {
+ case ix86_veclibabi_type_svml:
ix86_veclib_handler = ix86_veclibabi_svml;
- else if (strcmp (ix86_veclibabi_string, "acml") == 0)
+ break;
+
+ case ix86_veclibabi_type_acml:
ix86_veclib_handler = ix86_veclibabi_acml;
- else
- error ("unknown vectorization library ABI type (%s) for "
- "%sveclibabi=%s %s", ix86_veclibabi_string,
- prefix, suffix, sw);
- }
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
if ((!USE_IX86_FRAME_POINTER
|| (x86_accumulate_outgoing_args & ix86_tune_mask))
}
/* For sane SSE instruction set generation we need fcomi instruction.
- It is safe to enable all CMOVE instructions. */
- if (TARGET_SSE)
+ It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
+ expands to a sequence that includes conditional move. */
+ if (TARGET_SSE || TARGET_RDRND)
TARGET_CMOVE = 1;
/* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
if (flag_prefetch_loop_arrays < 0
&& HAVE_prefetch
&& optimize >= 3
- && software_prefetching_beneficial_p ())
+ && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
flag_prefetch_loop_arrays = 1;
/* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
#endif
}
- /* Save the initial options in case the user does function specific options */
- if (main_args_p)
- target_option_default_node = target_option_current_node
- = build_target_option_node ();
-
if (TARGET_AVX)
{
/* When not optimize for size, enable vzeroupper optimization for
if (flag_expensive_optimizations
&& !(target_flags_explicit & MASK_VZEROUPPER))
target_flags |= MASK_VZEROUPPER;
- if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+ if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
+ && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
- if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
+ if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
+ && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
+ /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
+ if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
+ target_flags |= MASK_PREFER_AVX128;
}
}
- else
+ else
{
/* Disable vzeroupper pass if TARGET_AVX is disabled. */
target_flags &= ~MASK_VZEROUPPER;
}
+
+ /* Save the initial options in case the user does function specific
+ options. */
+ if (main_args_p)
+ target_option_default_node = target_option_current_node
+ = build_target_option_node ();
}
/* Return TRUE if VAL is passed in register with 256bit AVX modes. */
ptr->arch = ix86_arch;
ptr->schedule = ix86_schedule;
ptr->tune = ix86_tune;
- ptr->fpmath = ix86_fpmath;
ptr->branch_cost = ix86_branch_cost;
ptr->tune_defaulted = ix86_tune_defaulted;
ptr->arch_specified = ix86_arch_specified;
gcc_assert (ptr->arch == ix86_arch);
gcc_assert (ptr->schedule == ix86_schedule);
gcc_assert (ptr->tune == ix86_tune);
- gcc_assert (ptr->fpmath == ix86_fpmath);
gcc_assert (ptr->branch_cost == ix86_branch_cost);
}
ix86_arch = (enum processor_type) ptr->arch;
ix86_schedule = (enum attr_cpu) ptr->schedule;
ix86_tune = (enum processor_type) ptr->tune;
- ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
ix86_branch_cost = ptr->branch_cost;
ix86_tune_defaulted = ptr->tune_defaulted;
ix86_arch_specified = ptr->arch_specified;
{
char *target_string
= ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
- NULL, NULL, NULL, false);
+ NULL, NULL, ptr->x_ix86_fpmath, false);
fprintf (file, "%*sarch = %d (%s)\n",
indent, "",
? cpu_names[ptr->tune]
: "<unknown>"));
- fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
- (ptr->fpmath & FPMATH_387) ? ", 387" : "",
- (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
if (target_string)
over the list. */
static bool
-ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
+ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
+ struct gcc_options *enum_opts_set)
{
char *next_optstr;
bool ret = true;
#define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
#define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
+#define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
#define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
ix86_opt_yes,
ix86_opt_no,
ix86_opt_str,
+ ix86_opt_enum,
ix86_opt_isa
};
IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
IX86_ATTR_ISA ("abm", OPT_mabm),
IX86_ATTR_ISA ("bmi", OPT_mbmi),
+ IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
IX86_ATTR_ISA ("tbm", OPT_mtbm),
IX86_ATTR_ISA ("aes", OPT_maes),
IX86_ATTR_ISA ("avx", OPT_mavx),
IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
IX86_ATTR_ISA ("f16c", OPT_mf16c),
+ /* enum options */
+ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
+
/* string options */
IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
- IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
/* flag options */
for (; args; args = TREE_CHAIN (args))
if (TREE_VALUE (args)
- && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
+ && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
+ p_strings, enum_opts_set))
ret = false;
return ret;
type = attrs[i].type;
opt_len = attrs[i].len;
if (ch == attrs[i].string[0]
- && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
+ && ((type != ix86_opt_str && type != ix86_opt_enum)
+ ? len == opt_len
+ : len > opt_len)
&& memcmp (p, attrs[i].string, opt_len) == 0)
{
opt = attrs[i].opt;
p_strings[opt] = xstrdup (p + opt_len);
}
+ else if (type == ix86_opt_enum)
+ {
+ bool arg_ok;
+ int value;
+
+ arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
+ if (arg_ok)
+ set_option (&global_options, enum_opts_set, opt, value,
+ p + opt_len, DK_UNSPECIFIED, input_location,
+ global_dc);
+ else
+ {
+ error ("attribute(target(\"%s\")) is unknown", orig_p);
+ ret = false;
+ }
+ }
+
else
gcc_unreachable ();
}
{
const char *orig_arch_string = ix86_arch_string;
const char *orig_tune_string = ix86_tune_string;
- const char *orig_fpmath_string = ix86_fpmath_string;
+ enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
int orig_tune_defaulted = ix86_tune_defaulted;
int orig_arch_specified = ix86_arch_specified;
- char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
+ char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
tree t = NULL_TREE;
int i;
struct cl_target_option *def
= TREE_TARGET_OPTION (target_option_default_node);
+ struct gcc_options enum_opts_set;
+
+ memset (&enum_opts_set, 0, sizeof (enum_opts_set));
/* Process each of the options on the chain. */
- if (! ix86_valid_target_attribute_inner_p (args, option_strings))
+ if (! ix86_valid_target_attribute_inner_p (args, option_strings,
+ &enum_opts_set))
return NULL_TREE;
/* If the changed options are different from the default, rerun
|| target_flags != def->x_target_flags
|| option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
|| option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
- || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
+ || enum_opts_set.x_ix86_fpmath)
{
/* If we are using the default tune= or arch=, undo the string assigned,
and use the default. */
ix86_tune_string = NULL;
/* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
- if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
- ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
+ if (enum_opts_set.x_ix86_fpmath)
+ global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
else if (!TARGET_64BIT && TARGET_SSE)
- ix86_fpmath_string = "sse,387";
+ {
+ ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
+ global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
+ }
/* Do any overrides, such as arch=xxx, or tune=xxx support. */
ix86_option_override_internal (false);
ix86_arch_string = orig_arch_string;
ix86_tune_string = orig_tune_string;
- ix86_fpmath_string = orig_fpmath_string;
+ global_options_set.x_ix86_fpmath = orig_fpmath_set;
/* Free up memory allocated to hold the strings */
for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
else if (caller_opts->tune != callee_opts->tune)
ret = false;
- else if (caller_opts->fpmath != callee_opts->fpmath)
+ else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
ret = false;
else if (caller_opts->branch_cost != callee_opts->branch_cost)
ASM_OUTPUT_SKIP (file, size ? size : 1);
}
\f
-static const struct default_options ix86_option_optimization_table[] =
- {
- /* Turn off -fschedule-insns by default. It tends to make the
- problem with not enough registers even worse. */
-#ifdef INSN_SCHEDULING
- { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
-#endif
-
-#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
- SUBTARGET_OPTIMIZATION_OPTIONS,
-#endif
- { OPT_LEVELS_NONE, 0, NULL, 0 }
- };
-
-/* Implement TARGET_OPTION_INIT_STRUCT. */
-
-static void
-ix86_option_init_struct (struct gcc_options *opts)
-{
- if (TARGET_MACHO)
- /* The Darwin libraries never set errno, so we might as well
- avoid calling them when that's the only reason we would. */
- opts->x_flag_errno_math = 0;
-
- opts->x_flag_pcc_struct_return = 2;
- opts->x_flag_asynchronous_unwind_tables = 2;
- opts->x_flag_vect_cost_model = 1;
-}
-
/* Decide whether we must probe the stack before any space allocation
on this target. It's essentially TARGET_STACK_PROBE except when
-fstack-check causes the stack to be already probed differently. */
optimize any indirect call, or a direct call to a global function,
as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
if (!TARGET_MACHO
- && !TARGET_64BIT
- && flag_pic
+ && !TARGET_64BIT
+ && flag_pic
&& (!decl || !targetm.binds_local_p (decl)))
return false;
may not be available.) */
static void
-ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
const_tree type, bool named)
{
+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
HOST_WIDE_INT bytes, words;
if (mode == BLKmode)
ellipsis). */
static rtx
-ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
+ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
const_tree type, bool named)
{
+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
enum machine_mode mode = omode;
HOST_WIDE_INT bytes, words;
rtx arg;
appropriate for passing a pointer to that type. */
static bool
-ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
enum machine_mode mode ATTRIBUTE_UNUSED,
const_tree type, bool named ATTRIBUTE_UNUSED)
{
+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
/* See Windows x64 Software Convention. */
if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
{
return gen_rtx_REG (mode, AX_REG);
}
}
+ else if (POINTER_TYPE_P (valtype))
+ {
+ /* Pointers are always returned in Pmode. */
+ mode = Pmode;
+ }
ret = construct_container (mode, orig_mode, valtype, 1,
X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
}
+/* Pointer function arguments and return values are promoted to Pmode. */
+
+static enum machine_mode
+ix86_promote_function_mode (const_tree type, enum machine_mode mode,
+ int *punsignedp, const_tree fntype,
+ int for_return)
+{
+ if (type != NULL_TREE && POINTER_TYPE_P (type))
+ {
+ *punsignedp = POINTERS_EXTEND_UNSIGNED;
+ return Pmode;
+ }
+ return default_promote_function_mode (type, mode, punsignedp, fntype,
+ for_return);
+}
+
rtx
ix86_libcall_value (enum machine_mode mode)
{
alias_set_type set = get_varargs_alias_set ();
int i;
+ /* Reset to zero, as there might be a sysv vaarg used
+ before. */
+ ix86_varargs_gpr_size = 0;
+ ix86_varargs_fpr_size = 0;
+
for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
{
rtx reg, mem;
}
static void
-ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
tree type, int *pretend_size ATTRIBUTE_UNUSED,
int no_rtl)
{
+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
CUMULATIVE_ARGS next_cum;
tree fntype;
For stdargs, we do want to skip the last named argument. */
next_cum = *cum;
if (stdarg_p (fntype))
- ix86_function_arg_advance (&next_cum, mode, type, true);
+ ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
+ true);
if (cum->call_abi == MS_ABI)
setup_incoming_varargs_ms_64 (&next_cum);
ovf_rtx = cfun->machine->split_stack_varargs_pointer;
t = make_tree (type, ovf_rtx);
if (words != 0)
- t = build2 (POINTER_PLUS_EXPR, type, t,
- size_int (words * UNITS_PER_WORD));
+ t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
t = build2 (MODIFY_EXPR, type, ovf, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
type = TREE_TYPE (sav);
t = make_tree (type, frame_pointer_rtx);
if (!ix86_varargs_gpr_size)
- t = build2 (POINTER_PLUS_EXPR, type, t,
- size_int (-8 * X86_64_REGPARM_MAX));
+ t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
t = build2 (MODIFY_EXPR, type, sav, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
if (needed_intregs)
{
/* int_addr = gpr + sav; */
- t = fold_convert (sizetype, gpr);
- t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
+ t = fold_build_pointer_plus (sav, gpr);
gimplify_assign (int_addr, t, pre_p);
}
if (needed_sseregs)
{
/* sse_addr = fpr + sav; */
- t = fold_convert (sizetype, fpr);
- t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
+ t = fold_build_pointer_plus (sav, fpr);
gimplify_assign (sse_addr, t, pre_p);
}
if (need_temp)
src_offset = REGNO (reg) * 8;
}
src_addr = fold_convert (addr_type, src_addr);
- src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
- size_int (src_offset));
+ src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
dest_addr = fold_convert (daddr_type, addr);
- dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
- size_int (prev_size));
+ dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
if (cur_size == GET_MODE_SIZE (mode))
{
src = build_va_arg_indirect_ref (src_addr);
else
{
HOST_WIDE_INT align = arg_boundary / 8;
- t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
- size_int (align - 1));
- t = fold_convert (sizetype, t);
+ t = fold_build_pointer_plus_hwi (ovf, align - 1);
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
- size_int (-align));
- t = fold_convert (TREE_TYPE (ovf), t);
+ build_int_cst (TREE_TYPE (t), -align));
}
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
- t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
- size_int (rsize * UNITS_PER_WORD));
+ t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
gimplify_assign (unshare_expr (ovf), t, pre_p);
if (container)
case 1:
switch (get_attr_mode (insn))
{
- case MODE_V4SF:
- return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
- case MODE_V2DF:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
- else
- return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
case MODE_TI:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
- else
- return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
- case MODE_V8SF:
- return "vxorps\t%x0, %x0, %x0";
- case MODE_V4DF:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "vxorps\t%x0, %x0, %x0";
- else
- return "vxorpd\t%x0, %x0, %x0";
+ if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+ return "%vpxor\t%0, %d0";
+ case MODE_V2DF:
+ if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+ return "%vxorpd\t%0, %d0";
+ case MODE_V4SF:
+ return "%vxorps\t%0, %d0";
+
case MODE_OI:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "vxorps\t%x0, %x0, %x0";
- else
+ if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "vpxor\t%x0, %x0, %x0";
+ case MODE_V4DF:
+ if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+ return "vxorpd\t%x0, %x0, %x0";
+ case MODE_V8SF:
+ return "vxorps\t%x0, %x0, %x0";
+
default:
break;
}
+
case 2:
- return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
+ return "%vpcmpeqd\t%0, %d0";
default:
break;
}
}
\f
#ifndef USE_HIDDEN_LINKONCE
-# if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
+# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
# define USE_HIDDEN_LINKONCE 1
# else
# define USE_HIDDEN_LINKONCE 0
xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
- if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
+ if (!flag_pic)
{
xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
- if (!flag_pic)
- output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
- else
- {
- output_asm_insn ("call\t%a2", xops);
-#ifdef DWARF2_UNWIND_INFO
- /* The call to next label acts as a push. */
- if (dwarf2out_do_frame ())
- {
- rtx insn;
- start_sequence ();
- insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- gen_rtx_PLUS (Pmode,
- stack_pointer_rtx,
- GEN_INT (-4))));
- RTX_FRAME_RELATED_P (insn) = 1;
- dwarf2out_frame_debug (insn, true);
- end_sequence ();
- }
-#endif
- }
+ output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
#if TARGET_MACHO
/* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
targetm.asm_out.internal_label (asm_out_file, "L",
CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
-
- if (flag_pic)
- {
- output_asm_insn ("pop%z0\t%0", xops);
-#ifdef DWARF2_UNWIND_INFO
- /* The pop is a pop and clobbers dest, but doesn't restore it
- for unwind info purposes. */
- if (dwarf2out_do_frame ())
- {
- rtx insn;
- start_sequence ();
- insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
- dwarf2out_frame_debug (insn, true);
- insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- gen_rtx_PLUS (Pmode,
- stack_pointer_rtx,
- GEN_INT (4))));
- RTX_FRAME_RELATED_P (insn) = 1;
- dwarf2out_frame_debug (insn, true);
- end_sequence ();
- }
-#endif
- }
}
else
{
get_pc_thunk_name (name, REGNO (dest));
pic_labels_used |= 1 << REGNO (dest);
-#ifdef DWARF2_UNWIND_INFO
- /* Ensure all queued register saves are flushed before the
- call. */
- if (dwarf2out_do_frame ())
- dwarf2out_flush_queued_reg_saves ();
-#endif
xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
xops[2] = gen_rtx_MEM (QImode, xops[2]);
output_asm_insn ("call\t%X2", xops);
#endif
}
- if (TARGET_MACHO)
- return "";
-
- if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
+ if (!TARGET_MACHO)
output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
- else
- output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
return "";
}
return INVALID_REGNUM;
}
-/* Return 1 if we need to save REGNO. */
-static int
-ix86_save_reg (unsigned int regno, int maybe_eh_return)
+/* Return TRUE if we need to save REGNO. */
+
+static bool
+ix86_save_reg (unsigned int regno, bool maybe_eh_return)
{
if (pic_offset_table_rtx
&& regno == REAL_PIC_OFFSET_TABLE_REGNUM
|| crtl->profile
|| crtl->calls_eh_return
|| crtl->uses_const_pool))
- {
- if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
- return 0;
- return 1;
- }
+ return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
if (crtl->calls_eh_return && maybe_eh_return)
{
if (test == INVALID_REGNUM)
break;
if (test == regno)
- return 1;
+ return true;
}
}
if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
- return 1;
+ return true;
return (df_regs_ever_live_p (regno)
&& !call_used_regs[regno]
return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
}
-/* On the x86 -fsplit-stack and -fstack-protector both use the same
- field in the TCB, so they can not be used together. */
-
-static bool
-ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
- struct gcc_options *opts ATTRIBUTE_UNUSED)
-{
- bool ret = true;
-
-#ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
- if (report)
- error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
- ret = false;
-#else
- if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
- {
- if (report)
- error ("%<-fsplit-stack%> requires "
- "assembler support for CFI directives");
- ret = false;
- }
-#endif
-
- return ret;
-}
-
/* When using -fsplit-stack, the allocation routines set a field in
the TCB to the bottom of the stack plus this much space, measured
in bytes. */
cfun->machine->use_fast_prologue_epilogue
= !expensive_function_p (count);
}
- if (TARGET_PROLOGUE_USING_MOVE
- && cfun->machine->use_fast_prologue_epilogue)
- frame->save_regs_using_mov = true;
- else
- frame->save_regs_using_mov = false;
- /* If static stack checking is enabled and done with probes, the registers
- need to be saved before allocating the frame. */
- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
- frame->save_regs_using_mov = false;
+ frame->save_regs_using_mov
+ = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
+ /* If static stack checking is enabled and done with probes,
+ the registers need to be saved before allocating the frame. */
+ && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
/* Skip return address. */
offset = UNITS_PER_WORD;
return len;
}
-
+
/* Return an RTX that points to CFA_OFFSET within the stack frame.
The valid base registers are taken from CFUN->MACHINE->FS. */
allocate = frame.stack_pointer_offset - m->fs.sp_offset;
- if (flag_stack_usage)
+ if (flag_stack_usage_info)
{
/* We start to count from ARG_POINTER. */
HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
}
else
- insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+ {
+ insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
+ }
}
/* In the pic_reg_used case, make sure that the got load isn't deleted
emit_insn (gen_cld ());
/* SEH requires that the prologue end within 256 bytes of the start of
- the function. Prevent instruction schedules that would extend that. */
+ the function. Prevent instruction schedules that would extend that.
+ Further, prevent alloca modifications to the stack pointer from being
+ combined with prologue modifications. */
if (TARGET_SEH)
- emit_insn (gen_blockage ());
+ emit_insn (gen_prologue_use (stack_pointer_rtx));
}
/* Emit code to restore REG using a POP insn. */
First register is restored from CFA - CFA_OFFSET. */
static void
ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
- int maybe_eh_return)
+ bool maybe_eh_return)
{
struct machine_function *m = cfun->machine;
unsigned int regno;
{
rtx reg = gen_rtx_REG (Pmode, regno);
rtx insn, mem;
-
+
mem = choose_baseaddr (cfa_offset);
mem = gen_frame_mem (Pmode, mem);
insn = emit_move_insn (reg, mem);
First register is restored from CFA - CFA_OFFSET. */
static void
ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
- int maybe_eh_return)
+ bool maybe_eh_return)
{
unsigned int regno;
if (TARGET_VZEROUPPER
&& !TREE_THIS_VOLATILE (cfun->decl)
&& !cfun->machine->caller_return_avx256_p)
- emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
+ emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
if (crtl->args.pops_args && crtl->args.size)
{
}
call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
GEN_INT (UNITS_PER_WORD), constm1_rtx,
- NULL_RTX, 0);
+ NULL_RTX, false);
add_function_usage_to (call_insn, call_fusage);
/* In order to make call/return prediction work right, we now need
}
}
\f
+/* Determine if op is suitable SUBREG RTX for address. */
+
+static bool
+ix86_address_subreg_operand (rtx op)
+{
+ enum machine_mode mode;
+
+ if (!REG_P (op))
+ return false;
+
+ mode = GET_MODE (op);
+
+ if (GET_MODE_CLASS (mode) != MODE_INT)
+ return false;
+
+ /* Don't allow SUBREGs that span more than a word. It can lead to spill
+ failures when the register is one word out of a two word structure. */
+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ return false;
+
+ /* Allow only SUBREGs of non-eliminable hard registers. */
+ return register_no_elim_operand (op, mode);
+}
+
/* Extract the parts of an RTL expression that is a valid memory address
for an instruction. Return 0 if the structure of the address is
grossly off. Return -1 if the address contains ASHIFT, so it is not
int retval = 1;
enum ix86_address_seg seg = SEG_DEFAULT;
- if (REG_P (addr) || GET_CODE (addr) == SUBREG)
+ /* Allow zero-extended SImode addresses,
+ they will be emitted with addr32 prefix. */
+ if (TARGET_64BIT
+ && GET_CODE (addr) == ZERO_EXTEND
+ && GET_MODE (addr) == DImode
+ && GET_MODE (XEXP (addr, 0)) == SImode)
+ addr = XEXP (addr, 0);
+
+ if (REG_P (addr))
base = addr;
+ else if (GET_CODE (addr) == SUBREG)
+ {
+ if (ix86_address_subreg_operand (SUBREG_REG (addr)))
+ base = addr;
+ else
+ return 0;
+ }
else if (GET_CODE (addr) == PLUS)
{
rtx addends[4], op;
return 0;
break;
- case REG:
case SUBREG:
+ if (!ix86_address_subreg_operand (SUBREG_REG (op)))
+ return 0;
+ /* FALLTHRU */
+
+ case REG:
if (!base)
base = op;
else if (!index)
else
disp = addr; /* displacement */
+ if (index)
+ {
+ if (REG_P (index))
+ ;
+ else if (GET_CODE (index) == SUBREG
+ && ix86_address_subreg_operand (SUBREG_REG (index)))
+ ;
+ else
+ return 0;
+ }
+
/* Extract the integral value of scale. */
if (scale_rtx)
{
disp = parts.disp;
scale = parts.scale;
- /* Validate base register.
-
- Don't allow SUBREG's that span more than a word here. It can lead to spill
- failures when the base is one word out of a two word structure, which is
- represented internally as a DImode int. */
-
+ /* Validate base register. */
if (base)
{
rtx reg;
if (REG_P (base))
reg = base;
- else if (GET_CODE (base) == SUBREG
- && REG_P (SUBREG_REG (base))
- && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
- <= UNITS_PER_WORD)
- reg = SUBREG_REG (base);
+ else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
+ reg = SUBREG_REG (base);
else
/* Base is not a register. */
return false;
- if (GET_MODE (base) != Pmode)
- /* Base is not in Pmode. */
+ if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
return false;
if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
return false;
}
- /* Validate index register.
-
- Don't allow SUBREG's that span more than a word here -- same as above. */
-
+ /* Validate index register. */
if (index)
{
rtx reg;
if (REG_P (index))
reg = index;
- else if (GET_CODE (index) == SUBREG
- && REG_P (SUBREG_REG (index))
- && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
- <= UNITS_PER_WORD)
- reg = SUBREG_REG (index);
+ else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
+ reg = SUBREG_REG (index);
else
/* Index is not a register. */
return false;
- if (GET_MODE (index) != Pmode)
- /* Index is not in Pmode. */
+ if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
return false;
if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
return false;
}
+ /* Index and base should have the same mode. */
+ if (base && index
+ && GET_MODE (base) != GET_MODE (index))
+ return false;
+
/* Validate scale factor. */
if (scale != 1)
{
/* Load the thread pointer. If TO_REG is true, force it into a register. */
static rtx
-get_thread_pointer (int to_reg)
+get_thread_pointer (bool to_reg)
{
- rtx tp, reg, insn;
+ rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
- tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
- if (!to_reg)
- return tp;
+ if (GET_MODE (tp) != Pmode)
+ tp = convert_to_mode (Pmode, tp, 1);
- reg = gen_reg_rtx (Pmode);
- insn = gen_rtx_SET (VOIDmode, reg, tp);
- insn = emit_insn (insn);
+ if (to_reg)
+ tp = copy_addr_to_reg (tp);
- return reg;
+ return tp;
+}
+
+/* Construct the SYMBOL_REF for the tls_get_addr function. */
+
+static GTY(()) rtx ix86_tls_symbol;
+
+static rtx
+ix86_tls_get_addr (void)
+{
+ if (!ix86_tls_symbol)
+ {
+ const char *sym
+ = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
+ ? "___tls_get_addr" : "__tls_get_addr");
+
+ ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
+ }
+
+ return ix86_tls_symbol;
+}
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
+
+static GTY(()) rtx ix86_tls_module_base_symbol;
+
+rtx
+ix86_tls_module_base (void)
+{
+ if (!ix86_tls_module_base_symbol)
+ {
+ ix86_tls_module_base_symbol
+ = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
+
+ SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
+ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+ }
+
+ return ix86_tls_module_base_symbol;
}
/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
we expect to load the address into a register. */
static rtx
-legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
+legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
{
- rtx dest, base, off, pic, tp;
+ rtx dest, base, off;
+ rtx pic = NULL_RTX, tp = NULL_RTX;
int type;
switch (model)
{
case TLS_MODEL_GLOBAL_DYNAMIC:
dest = gen_reg_rtx (Pmode);
- tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
- if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+ if (!TARGET_64BIT)
{
- rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
-
- start_sequence ();
- emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
- insns = get_insns ();
- end_sequence ();
-
- RTL_CONST_CALL_P (insns) = 1;
- emit_libcall_block (insns, dest, rax, x);
+ if (flag_pic)
+ pic = pic_offset_table_rtx;
+ else
+ {
+ pic = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (pic));
+ }
}
- else if (TARGET_64BIT && TARGET_GNU2_TLS)
- emit_insn (gen_tls_global_dynamic_64 (dest, x));
- else
- emit_insn (gen_tls_global_dynamic_32 (dest, x));
if (TARGET_GNU2_TLS)
{
+ if (TARGET_64BIT)
+ emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
+ else
+ emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
+
+ tp = get_thread_pointer (true);
dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
}
+ else
+ {
+ rtx caddr = ix86_tls_get_addr ();
+
+ if (TARGET_64BIT)
+ {
+ rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
+
+ start_sequence ();
+ emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
+ insns = get_insns ();
+ end_sequence ();
+
+ RTL_CONST_CALL_P (insns) = 1;
+ emit_libcall_block (insns, dest, rax, x);
+ }
+ else
+ emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
+ }
break;
case TLS_MODEL_LOCAL_DYNAMIC:
base = gen_reg_rtx (Pmode);
- tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
- if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+ if (!TARGET_64BIT)
{
- rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
-
- start_sequence ();
- emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
- insns = get_insns ();
- end_sequence ();
-
- note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
- note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
- RTL_CONST_CALL_P (insns) = 1;
- emit_libcall_block (insns, base, rax, note);
+ if (flag_pic)
+ pic = pic_offset_table_rtx;
+ else
+ {
+ pic = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (pic));
+ }
}
- else if (TARGET_64BIT && TARGET_GNU2_TLS)
- emit_insn (gen_tls_local_dynamic_base_64 (base));
- else
- emit_insn (gen_tls_local_dynamic_base_32 (base));
if (TARGET_GNU2_TLS)
{
- rtx x = ix86_tls_module_base ();
+ rtx tmp = ix86_tls_module_base ();
+
+ if (TARGET_64BIT)
+ emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
+ else
+ emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
+ tp = get_thread_pointer (true);
set_unique_reg_note (get_last_insn (), REG_EQUIV,
- gen_rtx_MINUS (Pmode, x, tp));
+ gen_rtx_MINUS (Pmode, tmp, tp));
+ }
+ else
+ {
+ rtx caddr = ix86_tls_get_addr ();
+
+ if (TARGET_64BIT)
+ {
+ rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
+
+ start_sequence ();
+ emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
+ insns = get_insns ();
+ end_sequence ();
+
+ /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+ share the LD_BASE result with other LD model accesses. */
+ eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_TLS_LD_BASE);
+
+ RTL_CONST_CALL_P (insns) = 1;
+ emit_libcall_block (insns, base, rax, eqv);
+ }
+ else
+ emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
}
off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
}
-
break;
case TLS_MODEL_INITIAL_EXEC:
rtx temp = gen_reg_rtx (Pmode);
rtx val = force_operand (XEXP (x, 1), temp);
if (val != temp)
- emit_move_insn (temp, val);
+ {
+ if (GET_MODE (val) != Pmode)
+ val = convert_to_mode (Pmode, val, 1);
+ emit_move_insn (temp, val);
+ }
XEXP (x, 1) = temp;
return x;
rtx temp = gen_reg_rtx (Pmode);
rtx val = force_operand (XEXP (x, 0), temp);
if (val != temp)
- emit_move_insn (temp, val);
+ {
+ if (GET_MODE (val) != Pmode)
+ val = convert_to_mode (Pmode, val, 1);
+ emit_move_insn (temp, val);
+ }
XEXP (x, 0) = temp;
return x;
|| !MEM_P (orig_x))
return ix86_delegitimize_tls_address (orig_x);
x = XVECEXP (XEXP (x, 0), 0, 0);
- if (GET_MODE (orig_x) != Pmode)
+ if (GET_MODE (orig_x) != GET_MODE (x))
{
- x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
+ x = simplify_gen_subreg (GET_MODE (orig_x), x,
+ GET_MODE (x), 0);
if (x == NULL_RTX)
return orig_x;
}
d -- print duplicated register operand for AVX instruction.
D -- print condition for SSE cmp instruction.
P -- if PIC, print an @PLT suffix.
+ p -- print raw symbol name.
X -- don't print any sort of PIC '@' suffix for a symbol.
& -- print some in-use local-dynamic symbol name.
H -- print a memory address offset by 8; used for sse high-parts
case 'x':
case 'X':
case 'P':
+ case 'p':
break;
case 's':
x = const0_rtx;
}
- if (code != 'P')
+ if (code != 'P' && code != 'p')
{
if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
{
gcc_assert (ok);
+ if (parts.base && GET_CODE (parts.base) == SUBREG)
+ {
+ rtx tmp = SUBREG_REG (parts.base);
+ parts.base = simplify_subreg (GET_MODE (parts.base),
+ tmp, GET_MODE (tmp), 0);
+ }
+
+ if (parts.index && GET_CODE (parts.index) == SUBREG)
+ {
+ rtx tmp = SUBREG_REG (parts.index);
+ parts.index = simplify_subreg (GET_MODE (parts.index),
+ tmp, GET_MODE (tmp), 0);
+ }
+
base = parts.base;
index = parts.index;
disp = parts.disp;
}
else
{
+ int code = 0;
+
+ /* Print SImode registers for zero-extended addresses to force
+ addr32 prefix. Otherwise print DImode registers to avoid it. */
+ if (TARGET_64BIT)
+ code = (GET_CODE (addr) == ZERO_EXTEND) ? 'l' : 'q';
+
if (ASSEMBLER_DIALECT == ASM_ATT)
{
if (disp)
putc ('(', file);
if (base)
- print_reg (base, 0, file);
+ print_reg (base, code, file);
if (index)
{
putc (',', file);
- print_reg (index, 0, file);
+ print_reg (index, code, file);
if (scale != 1)
fprintf (file, ",%d", scale);
}
putc ('[', file);
if (base)
{
- print_reg (base, 0, file);
+ print_reg (base, code, file);
if (offset)
{
if (INTVAL (offset) >= 0)
if (index)
{
putc ('+', file);
- print_reg (index, 0, file);
+ print_reg (index, code, file);
if (scale != 1)
fprintf (file, "*%d", scale);
}
operand may be [SDX]Fmode. */
const char *
-output_fix_trunc (rtx insn, rtx *operands, int fisttp)
+output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
{
int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
int dimode_p = GET_MODE (operands[0]) == DImode;
should be used. UNORDERED_P is true when fucom should be used. */
const char *
-output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
+output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
{
int stack_top_dies;
rtx cmp_op0, cmp_op1;
const char *directive = ASM_LONG;
#ifdef ASM_QUAD
- if (TARGET_64BIT)
+ if (TARGET_LP64)
directive = ASM_QUAD;
#else
gcc_assert (!TARGET_64BIT);
op1 = force_operand (op1, op0);
if (op1 == op0)
return;
+ if (GET_MODE (op1) != mode)
+ op1 = convert_to_mode (mode, op1, 1);
}
else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op1))
op0, 1, OPTAB_DIRECT);
if (tmp == op0)
return;
+ if (GET_MODE (tmp) != mode)
+ op1 = convert_to_mode (mode, tmp, 1);
}
}
- if ((flag_pic || MACHOPIC_INDIRECT)
- && mode == Pmode && symbolic_operand (op1, Pmode))
+ if ((flag_pic || MACHOPIC_INDIRECT)
+ && symbolic_operand (op1, mode))
{
if (TARGET_MACHO && !TARGET_64BIT)
{
else
{
if (MEM_P (op0))
- op1 = force_reg (Pmode, op1);
- else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
+ op1 = force_reg (mode, op1);
+ else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
{
rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
op1 = legitimize_pic_address (op1, reg);
if (op0 == op1)
return;
+ if (GET_MODE (op1) != mode)
+ op1 = convert_to_mode (mode, op1, 1);
}
}
}
insn = emit_move_insn (operands[1], tmp1);
else
{
- /* Need a new scratch register since the old one has result
+ /* Need a new scratch register since the old one has result
of 8bit divide. */
scratch = gen_reg_rtx (mode);
emit_move_insn (scratch, tmp1);
rtx destexp;
rtx srcexp;
rtx countreg;
+ HOST_WIDE_INT rounded_count;
/* If the size is known, it is shorter to use rep movs. */
if (mode == QImode && CONST_INT_P (count)
}
if (CONST_INT_P (count))
{
- count = GEN_INT (INTVAL (count)
+ rounded_count = (INTVAL (count)
& ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
destmem = shallow_copy_rtx (destmem);
srcmem = shallow_copy_rtx (srcmem);
- set_mem_size (destmem, count);
- set_mem_size (srcmem, count);
+ set_mem_size (destmem, rounded_count);
+ set_mem_size (srcmem, rounded_count);
}
else
{
- if (MEM_SIZE (destmem))
- set_mem_size (destmem, NULL_RTX);
- if (MEM_SIZE (srcmem))
- set_mem_size (srcmem, NULL_RTX);
+ if (MEM_SIZE_KNOWN_P (destmem))
+ clear_mem_size (destmem);
+ if (MEM_SIZE_KNOWN_P (srcmem))
+ clear_mem_size (srcmem);
}
emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
destexp, srcexp));
{
rtx destexp;
rtx countreg;
+ HOST_WIDE_INT rounded_count;
if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
if (orig_value == const0_rtx && CONST_INT_P (count))
{
- count = GEN_INT (INTVAL (count)
+ rounded_count = (INTVAL (count)
& ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
destmem = shallow_copy_rtx (destmem);
- set_mem_size (destmem, count);
+ set_mem_size (destmem, rounded_count);
}
- else if (MEM_SIZE (destmem))
- set_mem_size (destmem, NULL_RTX);
+ else if (MEM_SIZE_KNOWN_P (destmem))
+ clear_mem_size (destmem);
emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
}
int desired_align, int align_bytes)
{
rtx src = *srcp;
- rtx src_size, dst_size;
+ rtx orig_dst = dst;
+ rtx orig_src = src;
int off = 0;
int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
if (src_align_bytes >= 0)
src_align_bytes = desired_align - src_align_bytes;
- src_size = MEM_SIZE (src);
- dst_size = MEM_SIZE (dst);
if (align_bytes & 1)
{
dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
set_mem_align (src, src_align * BITS_PER_UNIT);
}
- if (dst_size)
- set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
- if (src_size)
- set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
+ if (MEM_SIZE_KNOWN_P (orig_dst))
+ set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
+ if (MEM_SIZE_KNOWN_P (orig_src))
+ set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
*srcp = src;
return dst;
}
int desired_align, int align_bytes)
{
int off = 0;
- rtx dst_size = MEM_SIZE (dst);
+ rtx orig_dst = dst;
if (align_bytes & 1)
{
dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
set_mem_align (dst, desired_align * BITS_PER_UNIT);
- if (dst_size)
- set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
+ if (MEM_SIZE_KNOWN_P (orig_dst))
+ set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
return dst;
}
algs = &cost->memset[TARGET_64BIT != 0];
else
algs = &cost->memcpy[TARGET_64BIT != 0];
- if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
- return stringop_alg;
+ if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
+ return ix86_stringop_alg;
/* rep; movq or rep; movl is the smallest variant. */
else if (!optimize_for_speed)
{
rtx
ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
rtx callarg2,
- rtx pop, int sibcall)
+ rtx pop, bool sibcall)
{
+ /* We need to represent that SI and DI registers are clobbered
+ by SYSV calls. */
+ static int clobbered_registers[] = {
+ XMM6_REG, XMM7_REG, XMM8_REG,
+ XMM9_REG, XMM10_REG, XMM11_REG,
+ XMM12_REG, XMM13_REG, XMM14_REG,
+ XMM15_REG, SI_REG, DI_REG
+ };
+ rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
rtx use = NULL, call;
+ unsigned int vec_len;
if (pop == const0_rtx)
pop = NULL;
? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
: !call_insn_operand (XEXP (fnaddr, 0), Pmode))
{
- fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
- fnaddr = gen_rtx_MEM (QImode, fnaddr);
+ fnaddr = XEXP (fnaddr, 0);
+ if (GET_MODE (fnaddr) != Pmode)
+ fnaddr = convert_to_mode (Pmode, fnaddr, 1);
+ fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
}
+ vec_len = 0;
call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
if (retval)
call = gen_rtx_SET (VOIDmode, retval, call);
+ vec[vec_len++] = call;
+
if (pop)
{
pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
- call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
+ vec[vec_len++] = pop;
}
+
if (TARGET_64BIT_MS_ABI
&& (!callarg2 || INTVAL (callarg2) != -2))
{
- /* We need to represent that SI and DI registers are clobbered
- by SYSV calls. */
- static int clobbered_registers[] = {
- XMM6_REG, XMM7_REG, XMM8_REG,
- XMM9_REG, XMM10_REG, XMM11_REG,
- XMM12_REG, XMM13_REG, XMM14_REG,
- XMM15_REG, SI_REG, DI_REG
- };
- unsigned int i;
- rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
- rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
- UNSPEC_MS_TO_SYSV_CALL);
+ unsigned i;
- vec[0] = call;
- vec[1] = unspec;
- for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
- vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
- ? TImode : DImode,
- gen_rtx_REG
- (SSE_REGNO_P (clobbered_registers[i])
- ? TImode : DImode,
- clobbered_registers[i]));
+ vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_MS_TO_SYSV_CALL);
- call = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
- + 2, vec));
+ for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
+ vec[vec_len++]
+ = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ clobbered_registers[i]));
}
/* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
if (TARGET_VZEROUPPER)
{
- rtx unspec;
int avx256;
-
if (cfun->machine->callee_pass_avx256_p)
{
if (cfun->machine->callee_return_avx256_p)
if (reload_completed)
emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
else
- {
- unspec = gen_rtx_UNSPEC (VOIDmode,
- gen_rtvec (1, GEN_INT (avx256)),
- UNSPEC_CALL_NEEDS_VZEROUPPER);
- call = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (2, call, unspec));
- }
+ vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
+ gen_rtvec (1, GEN_INT (avx256)),
+ UNSPEC_CALL_NEEDS_VZEROUPPER);
}
+ if (vec_len > 1)
+ call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
call = emit_call_insn (call);
if (use)
CALL_INSN_FUNCTION_USAGE (call) = use;
void
ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
{
- rtx call = XVECEXP (PATTERN (insn), 0, 0);
+ rtx pat = PATTERN (insn);
+ rtvec vec = XVEC (pat, 0);
+ int len = GET_NUM_ELEM (vec) - 1;
+
+ /* Strip off the last entry of the parallel. */
+ gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
+ gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
+ if (len == 1)
+ pat = RTVEC_ELT (vec, 0);
+ else
+ pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
+
emit_insn (gen_avx_vzeroupper (vzeroupper));
- emit_call_insn (call);
+ emit_call_insn (pat);
}
/* Output the assembly for a call instruction. */
const char *
-ix86_output_call_insn (rtx insn, rtx call_op, int addr_op)
+ix86_output_call_insn (rtx insn, rtx call_op)
{
bool direct_p = constant_call_address_operand (call_op, Pmode);
bool seh_nop_p = false;
-
- gcc_assert (addr_op == 0 || addr_op == 1);
+ const char *xasm;
if (SIBLING_CALL_P (insn))
{
if (direct_p)
- return addr_op ? "jmp\t%P1" : "jmp\t%P0";
+ xasm = "jmp\t%P0";
/* SEH epilogue detection requires the indirect branch case
to include REX.W. */
else if (TARGET_SEH)
- return addr_op ? "rex.W jmp %A1" : "rex.W jmp %A0";
+ xasm = "rex.W jmp %A0";
else
- return addr_op ? "jmp\t%A1" : "jmp\t%A0";
+ xasm = "jmp\t%A0";
+
+ output_asm_insn (xasm, &call_op);
+ return "";
}
/* SEH unwinding can require an extra nop to be emitted in several
}
if (direct_p)
- {
- if (seh_nop_p)
- return addr_op ? "call\t%P1\n\tnop" : "call\t%P0\n\tnop";
- else
- return addr_op ? "call\t%P1" : "call\t%P0";
- }
+ xasm = "call\t%P0";
else
- {
- if (seh_nop_p)
- return addr_op ? "call\t%A1\n\tnop" : "call\t%A0\n\tnop";
- else
- return addr_op ? "call\t%A1" : "call\t%A0";
- }
+ xasm = "call\t%A0";
+
+ output_asm_insn (xasm, &call_op);
+
+ if (seh_nop_p)
+ return "nop";
+
+ return "";
}
\f
/* Clear stack slot assignments remembered from previous functions.
ix86_stack_locals = s;
return s->rtl;
}
-
-/* Construct the SYMBOL_REF for the tls_get_addr function. */
-
-static GTY(()) rtx ix86_tls_symbol;
-rtx
-ix86_tls_get_addr (void)
-{
-
- if (!ix86_tls_symbol)
- {
- ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
- (TARGET_ANY_GNU_TLS
- && !TARGET_64BIT)
- ? "___tls_get_addr"
- : "__tls_get_addr");
- }
-
- return ix86_tls_symbol;
-}
-
-/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
-
-static GTY(()) rtx ix86_tls_module_base_symbol;
-rtx
-ix86_tls_module_base (void)
-{
-
- if (!ix86_tls_module_base_symbol)
- {
- ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
- "_TLS_MODULE_BASE_");
- SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
- |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
- }
-
- return ix86_tls_module_base_symbol;
-}
\f
/* Calculate the length of the memory address in the instruction
- encoding. Does not include the one-byte modrm, opcode, or prefix. */
+ encoding. Includes addr32 prefix, does not include the one-byte modrm,
+ opcode, or other prefixes. */
int
memory_address_length (rtx addr)
base = parts.base;
index = parts.index;
disp = parts.disp;
- len = 0;
+
+ /* Add length of addr32 prefix. */
+ len = (GET_CODE (addr) == ZERO_EXTEND);
/* Rule of thumb:
- esp as the base always wants an index,
/* Compute default value for "length_immediate" attribute. When SHORTFORM
is set, expect that insn have 8bit immediate alternative. */
int
-ix86_attr_length_immediate_default (rtx insn, int shortform)
+ix86_attr_length_immediate_default (rtx insn, bool shortform)
{
int len = 0;
int i;
2 or 3 byte VEX prefix and 1 opcode byte. */
int
-ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
- int has_vex_w)
+ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
{
int i;
case PROCESSOR_GENERIC32:
case PROCESSOR_GENERIC64:
case PROCESSOR_BDVER1:
+ case PROCESSOR_BDVER2:
case PROCESSOR_BTVER1:
return 3;
}
}
-/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
+/* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
by DEP_INSN and nothing set by DEP_INSN. */
-static int
+static bool
ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
{
rtx set, set2;
&& insn_type != TYPE_ICMOV
&& insn_type != TYPE_FCMOV
&& insn_type != TYPE_IBR)
- return 0;
+ return false;
if ((set = single_set (dep_insn)) != 0)
{
set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
}
else
- return 0;
+ return false;
if (!REG_P (set) || REGNO (set) != FLAGS_REG)
- return 0;
+ return false;
/* This test is true if the dependent insn reads the flags but
not any other potentially set register. */
if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
- return 0;
+ return false;
if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
- return 0;
+ return false;
- return 1;
+ return true;
}
/* Return true iff USE_INSN has a memory address with operands set by
case PROCESSOR_K8:
case PROCESSOR_AMDFAM10:
case PROCESSOR_BDVER1:
+ case PROCESSOR_BDVER2:
case PROCESSOR_BTVER1:
case PROCESSOR_ATOM:
case PROCESSOR_GENERIC32:
ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
{
rtx mem, fnaddr;
+ int opcode;
+ int offset = 0;
fnaddr = XEXP (DECL_RTL (fndecl), 0);
- if (!TARGET_64BIT)
- {
- rtx disp, chain;
- int opcode;
-
- /* Depending on the static chain location, either load a register
- with a constant, or push the constant to the stack. All of the
- instructions are the same size. */
- chain = ix86_static_chain (fndecl, true);
- if (REG_P (chain))
- {
- if (REGNO (chain) == CX_REG)
- opcode = 0xb9;
- else if (REGNO (chain) == AX_REG)
- opcode = 0xb8;
- else
- gcc_unreachable ();
- }
- else
- opcode = 0x68;
-
- mem = adjust_address (m_tramp, QImode, 0);
- emit_move_insn (mem, gen_int_mode (opcode, QImode));
-
- mem = adjust_address (m_tramp, SImode, 1);
- emit_move_insn (mem, chain_value);
-
- /* Compute offset from the end of the jmp to the target function.
- In the case in which the trampoline stores the static chain on
- the stack, we need to skip the first insn which pushes the
- (call-saved) register static chain; this push is 1 byte. */
- disp = expand_binop (SImode, sub_optab, fnaddr,
- plus_constant (XEXP (m_tramp, 0),
- MEM_P (chain) ? 9 : 10),
- NULL_RTX, 1, OPTAB_DIRECT);
-
- mem = adjust_address (m_tramp, QImode, 5);
- emit_move_insn (mem, gen_int_mode (0xe9, QImode));
-
- mem = adjust_address (m_tramp, SImode, 6);
- emit_move_insn (mem, disp);
- }
- else
+ if (TARGET_64BIT)
{
- int offset = 0;
+ int size;
/* Load the function address to r11. Try to load address using
the shorter movl instead of movabs. We may want to support
offset += 10;
}
- /* Load static chain using movabs to r10. */
+ /* Load static chain using movabs to r10. Use the
+ shorter movl instead of movabs for x32. */
+ if (TARGET_X32)
+ {
+ opcode = 0xba41;
+ size = 6;
+ }
+ else
+ {
+ opcode = 0xba49;
+ size = 10;
+ }
+
mem = adjust_address (m_tramp, HImode, offset);
- emit_move_insn (mem, gen_int_mode (0xba49, HImode));
+ emit_move_insn (mem, gen_int_mode (opcode, HImode));
- mem = adjust_address (m_tramp, DImode, offset + 2);
+ mem = adjust_address (m_tramp, ptr_mode, offset + 2);
emit_move_insn (mem, chain_value);
- offset += 10;
+ offset += size;
/* Jump to r11; the last (unused) byte is a nop, only there to
pad the write out to a single 32-bit store. */
mem = adjust_address (m_tramp, SImode, offset);
emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
offset += 4;
+ }
+ else
+ {
+ rtx disp, chain;
+
+ /* Depending on the static chain location, either load a register
+ with a constant, or push the constant to the stack. All of the
+ instructions are the same size. */
+ chain = ix86_static_chain (fndecl, true);
+ if (REG_P (chain))
+ {
+ switch (REGNO (chain))
+ {
+ case AX_REG:
+ opcode = 0xb8; break;
+ case CX_REG:
+ opcode = 0xb9; break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ opcode = 0x68;
+
+ mem = adjust_address (m_tramp, QImode, offset);
+ emit_move_insn (mem, gen_int_mode (opcode, QImode));
- gcc_assert (offset <= TRAMPOLINE_SIZE);
+ mem = adjust_address (m_tramp, SImode, offset + 1);
+ emit_move_insn (mem, chain_value);
+ offset += 5;
+
+ mem = adjust_address (m_tramp, QImode, offset);
+ emit_move_insn (mem, gen_int_mode (0xe9, QImode));
+
+ mem = adjust_address (m_tramp, SImode, offset + 1);
+
+ /* Compute offset from the end of the jmp to the target function.
+ In the case in which the trampoline stores the static chain on
+ the stack, we need to skip the first insn which pushes the
+ (call-saved) register static chain; this push is 1 byte. */
+ offset += 5;
+ disp = expand_binop (SImode, sub_optab, fnaddr,
+ plus_constant (XEXP (m_tramp, 0),
+ offset - (MEM_P (chain) ? 1 : 0)),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ emit_move_insn (mem, disp);
}
-#ifdef ENABLE_EXECUTE_STACK
+ gcc_assert (offset <= TRAMPOLINE_SIZE);
+
+#ifdef HAVE_ENABLE_EXECUTE_STACK
#ifdef CHECK_EXECUTE_STACK_ENABLED
if (CHECK_EXECUTE_STACK_ENABLED)
#endif
IX86_BUILTIN_CLFLUSH,
IX86_BUILTIN_MFENCE,
IX86_BUILTIN_LFENCE,
+ IX86_BUILTIN_PAUSE,
IX86_BUILTIN_BSRSI,
IX86_BUILTIN_BSRDI,
{
{ ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
{ ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
+ { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
/* MMX */
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
- { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
+ { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
/* BMI */
{ OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
ix86_init_mmx_sse_builtins ();
- if (TARGET_64BIT)
+ if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
#ifdef SUBTARGET_INIT_BUILTINS
int adjust = (comparison_p) ? 1 : 0;
enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
- if (last_arg_constant && i == nargs-1)
+ if (last_arg_constant && i == nargs - 1)
{
- if (!CONST_INT_P (op))
+ if (!insn_data[icode].operand[i + 1].predicate (op, mode))
{
- error ("last argument must be an immediate");
- return gen_reg_rtx (tmode);
+ enum insn_code new_icode = icode;
+ switch (icode)
+ {
+ case CODE_FOR_xop_vpermil2v2df3:
+ case CODE_FOR_xop_vpermil2v4sf3:
+ case CODE_FOR_xop_vpermil2v4df3:
+ case CODE_FOR_xop_vpermil2v8sf3:
+ error ("the last argument must be a 2-bit immediate");
+ return gen_reg_rtx (tmode);
+ case CODE_FOR_xop_rotlv2di3:
+ new_icode = CODE_FOR_rotlv2di3;
+ goto xop_rotl;
+ case CODE_FOR_xop_rotlv4si3:
+ new_icode = CODE_FOR_rotlv4si3;
+ goto xop_rotl;
+ case CODE_FOR_xop_rotlv8hi3:
+ new_icode = CODE_FOR_rotlv8hi3;
+ goto xop_rotl;
+ case CODE_FOR_xop_rotlv16qi3:
+ new_icode = CODE_FOR_rotlv16qi3;
+ xop_rotl:
+ if (CONST_INT_P (op))
+ {
+ int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
+ op = GEN_INT (INTVAL (op) & mask);
+ gcc_checking_assert
+ (insn_data[icode].operand[i + 1].predicate (op, mode));
+ }
+ else
+ {
+ gcc_checking_assert
+ (nargs == 2
+ && insn_data[new_icode].operand[0].mode == tmode
+ && insn_data[new_icode].operand[1].mode == tmode
+ && insn_data[new_icode].operand[2].mode == mode
+ && insn_data[new_icode].operand[0].predicate
+ == insn_data[icode].operand[0].predicate
+ && insn_data[new_icode].operand[1].predicate
+ == insn_data[icode].operand[1].predicate);
+ icode = new_icode;
+ goto non_constant;
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
}
}
else
{
+ non_constant:
if (VECTOR_MODE_P (mode))
op = safe_vector_operand (op, mode);
if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
{
- error ("the fifth argument must be a 8-bit immediate");
+ error ("the fifth argument must be an 8-bit immediate");
return const0_rtx;
}
if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
{
- error ("the third argument must be a 8-bit immediate");
+ error ("the third argument must be an 8-bit immediate");
return const0_rtx;
}
op = expand_normal (arg);
gcc_assert (target == 0);
if (memory)
- target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+ {
+ if (GET_MODE (op) != Pmode)
+ op = convert_to_mode (Pmode, op, 1);
+ target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
+ }
else
target = force_reg (tmode, op);
arg_adjust = 1;
if (i == memory)
{
/* This must be the memory operand. */
- op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
+ if (GET_MODE (op) != Pmode)
+ op = convert_to_mode (Pmode, op, 1);
+ op = gen_rtx_MEM (mode, force_reg (Pmode, op));
gcc_assert (GET_MODE (op) == mode
|| GET_MODE (op) == VOIDmode);
}
&& !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
{
char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
- NULL, NULL, false);
+ NULL, (enum fpmath_unit) 0, false);
if (!opts)
error ("%qE needs unknown isa option", fndecl);
mode1 = insn_data[icode].operand[1].mode;
mode2 = insn_data[icode].operand[2].mode;
- op0 = force_reg (Pmode, op0);
- op0 = gen_rtx_MEM (mode1, op0);
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
if (!insn_data[icode].operand[0].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
op0 = expand_normal (arg0);
icode = CODE_FOR_sse2_clflush;
if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = copy_to_mode_reg (Pmode, op0);
+ {
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = force_reg (Pmode, op0);
+ }
emit_insn (gen_sse2_clflush (op0));
return 0;
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
if (!REG_P (op0))
- op0 = copy_to_mode_reg (Pmode, op0);
+ {
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = force_reg (Pmode, op0);
+ }
if (!REG_P (op1))
op1 = copy_to_mode_reg (SImode, op1);
if (!REG_P (op2))
op0 = expand_normal (arg0);
icode = CODE_FOR_lwp_llwpcb;
if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = copy_to_mode_reg (Pmode, op0);
+ {
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = force_reg (Pmode, op0);
+ }
emit_insn (gen_lwp_llwpcb (op0));
return 0;
op0 = gen_reg_rtx (mode0);
emit_insn (GEN_FCN (icode) (op0));
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op1 = expand_normal (arg0);
+ if (!address_operand (op1, VOIDmode))
+ {
+ op1 = convert_memory_address (Pmode, op1);
+ op1 = copy_addr_to_reg (op1);
+ }
+ emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
+
op1 = gen_reg_rtx (SImode);
emit_move_insn (op1, CONST1_RTX (SImode));
else
op2 = gen_rtx_SUBREG (SImode, op0, 0);
+ if (target == 0)
+ target = gen_reg_rtx (SImode);
+
pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
const0_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, op1,
+ emit_insn (gen_rtx_SET (VOIDmode, target,
gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
- emit_move_insn (target, op1);
-
- arg0 = CALL_EXPR_ARG (exp, 0);
- op1 = expand_normal (arg0);
- if (!address_operand (op1, VOIDmode))
- op1 = copy_addr_to_reg (op1);
- emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
return target;
default:
zero above. We only want to wind up preferring 80387 registers if
we plan on doing computation with them. */
if (TARGET_80387
- && standard_80387_constant_p (x))
+ && standard_80387_constant_p (x) > 0)
{
/* Limit class to non-sse. */
if (regclass == FLOAT_SSE_REGS)
static reg_class_t
ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
- enum machine_mode mode,
- secondary_reload_info *sri ATTRIBUTE_UNUSED)
+ enum machine_mode mode, secondary_reload_info *sri)
{
+ /* Double-word spills from general registers to non-offsettable memory
+ references (zero-extended addresses) require special handling. */
+ if (TARGET_64BIT
+ && MEM_P (x)
+ && GET_MODE_SIZE (mode) > UNITS_PER_WORD
+ && rclass == GENERAL_REGS
+ && !offsettable_memref_p (x))
+ {
+ sri->icode = (in_p
+ ? CODE_FOR_reload_noff_load
+ : CODE_FOR_reload_noff_store);
+ /* Add the cost of move to a temporary. */
+ sri->extra_cost = 1;
+
+ return NO_REGS;
+ }
+
/* QImode spills from non-QI registers require
intermediate register on 32bit targets. */
if (!TARGET_64BIT
/* This condition handles corner case where an expression involving
pointers gets vectorized. We're trying to use the address of a
- stack slot as a vector initializer.
+ stack slot as a vector initializer.
(set (reg:V2DI 74 [ vect_cst_.2 ])
(vec_duplicate:V2DI (reg/f:DI 20 frame)))
return inline_secondary_memory_needed (class1, class2, mode, strict);
}
+/* Implement the TARGET_CLASS_MAX_NREGS hook.
+
+ On the 80386, this is the size of MODE in words,
+ except in the FP regs, where a single reg is always enough. */
+
+static unsigned char
+ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
+{
+ if (MAYBE_INTEGER_CLASS_P (rclass))
+ {
+ if (mode == XFmode)
+ return (TARGET_64BIT ? 2 : 3);
+ else if (mode == XCmode)
+ return (TARGET_64BIT ? 4 : 6);
+ else
+ return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+ }
+ else
+ {
+ if (COMPLEX_MODE_P (mode))
+ return 2;
+ else
+ return 1;
+ }
+}
+
/* Return true if the registers in CLASS cannot represent the change from
modes FROM to TO. */
/* In case of copying from general_purpose_register we may emit multiple
stores followed by single load causing memory size mismatch stall.
Count this as arbitrarily high cost of 20. */
- if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
+ if (targetm.class_max_nregs (class1, mode)
+ > targetm.class_max_nregs (class2, mode))
cost += 20;
/* In the case of FP/MMX moves, the registers actually overlap, and we
return 2;
}
-/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
+/* Return TRUE if hard register REGNO can hold a value of machine-mode
+ MODE. */
bool
ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
if (GET_MODE_CLASS (mode) == MODE_CC
|| GET_MODE_CLASS (mode) == MODE_RANDOM
|| GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
- return 0;
+ return false;
if (FP_REGNO_P (regno))
return VALID_FP_MODE_P (mode);
if (SSE_REGNO_P (regno))
/* Take care for QImode values - they can be in non-QI regs,
but then they do cause partial register stalls. */
if (regno <= BX_REG || TARGET_64BIT)
- return 1;
+ return true;
if (!TARGET_PARTIAL_REG_STALL)
- return 1;
- return reload_in_progress || reload_completed;
+ return true;
+ return !can_create_pseudo_p ();
}
/* We handle both integer and floats in the general purpose registers. */
else if (VALID_INT_MODE_P (mode))
- return 1;
+ return true;
else if (VALID_FP_MODE_P (mode))
- return 1;
+ return true;
else if (VALID_DFP_MODE_P (mode))
- return 1;
+ return true;
/* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
on to use that value in smaller contexts, this can easily force a
pseudo to be allocated to GENERAL_REGS. Since this is no worse than
supporting DImode, allow it. */
else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
- return 1;
+ return true;
- return 0;
+ return false;
}
/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
/* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
sub = XEXP (x, 0);
if (GET_CODE (sub) == NEG)
- sub = XEXP (x, 0);
+ sub = XEXP (sub, 0);
*total += rtx_cost (sub, FMA, speed);
sub = XEXP (x, 2);
if (GET_CODE (sub) == NEG)
- sub = XEXP (x, 0);
+ sub = XEXP (sub, 0);
*total += rtx_cost (sub, FMA, speed);
return true;
}
if (MACHOPIC_ATT_STUB)
switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
else if (MACHOPIC_PURE)
- {
- if (TARGET_DEEP_BRANCH_PREDICTION)
- switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
- else
- switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
- }
+ switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
else
switch_to_section (darwin_sections[machopic_symbol_stub_section]);
else if (MACHOPIC_PURE)
{
/* PIC stub. */
- if (TARGET_DEEP_BRANCH_PREDICTION)
- {
- /* 25-byte PIC stub using "CALL get_pc_thunk". */
- rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
- output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
- fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
- }
- else
- {
- /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
- fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
- fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
- }
+ /* 25-byte PIC stub using "CALL get_pc_thunk". */
+ rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
+ output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
+ fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
+ label, lazy_ptr_name, label);
fprintf (file, "\tjmp\t*%%ecx\n");
}
else
compatibility with existing dylibs. */
if (MACHOPIC_PURE)
{
- /* PIC stubs. */
- if (TARGET_DEEP_BRANCH_PREDICTION)
- /* 25-byte PIC stub using "CALL get_pc_thunk". */
- switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
- else
- /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
- switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
+ /* 25-byte PIC stub using "CALL get_pc_thunk". */
+ switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
}
else
/* 16-byte -mdynamic-no-pic stub. */
*no_add_attrs = true;
return NULL_TREE;
}
- if (!TARGET_64BIT)
- {
- warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
- name);
- *no_add_attrs = true;
- return NULL_TREE;
- }
/* Can combine regparm with all attributes but fastcall. */
if (is_attribute_p ("ms_abi", name))
tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
HOST_WIDE_INT vcall_offset, tree function)
{
- rtx xops[3];
rtx this_param = x86_this_parameter (function);
- rtx this_reg, tmp;
+ rtx this_reg, tmp, fnaddr;
- /* Make sure unwind info is emitted for the thunk if needed. */
- final_start_function (emit_barrier (), file, 1);
+ emit_note (NOTE_INSN_PROLOGUE_END);
/* If VCALL_OFFSET, we'll need THIS in a register. Might as well
pull it in now and let DELTA benefit. */
else if (vcall_offset)
{
/* Put the this parameter into %eax. */
- xops[0] = this_param;
- xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
- output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
+ this_reg = gen_rtx_REG (Pmode, AX_REG);
+ emit_move_insn (this_reg, this_param);
}
else
this_reg = NULL_RTX;
/* Adjust the this parameter by a fixed constant. */
if (delta)
{
- xops[0] = GEN_INT (delta);
- xops[1] = this_reg ? this_reg : this_param;
+ rtx delta_rtx = GEN_INT (delta);
+ rtx delta_dst = this_reg ? this_reg : this_param;
+
if (TARGET_64BIT)
{
- if (!x86_64_general_operand (xops[0], DImode))
+ if (!x86_64_general_operand (delta_rtx, Pmode))
{
- tmp = gen_rtx_REG (DImode, R10_REG);
- xops[1] = tmp;
- output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
- xops[0] = tmp;
- xops[1] = this_param;
+ tmp = gen_rtx_REG (Pmode, R10_REG);
+ emit_move_insn (tmp, delta_rtx);
+ delta_rtx = tmp;
}
- if (x86_maybe_negate_const_int (&xops[0], DImode))
- output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
}
- else if (x86_maybe_negate_const_int (&xops[0], SImode))
- output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
+
+ emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
}
/* Adjust the this parameter by a value stored in the vtable. */
if (vcall_offset)
{
+ rtx vcall_addr, vcall_mem, this_mem;
+ unsigned int tmp_regno;
+
if (TARGET_64BIT)
- tmp = gen_rtx_REG (DImode, R10_REG);
+ tmp_regno = R10_REG;
else
{
- int tmp_regno = CX_REG;
unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
tmp_regno = AX_REG;
- tmp = gen_rtx_REG (SImode, tmp_regno);
+ else
+ tmp_regno = CX_REG;
}
+ tmp = gen_rtx_REG (Pmode, tmp_regno);
- xops[0] = gen_rtx_MEM (Pmode, this_reg);
- xops[1] = tmp;
- output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
+ this_mem = gen_rtx_MEM (ptr_mode, this_reg);
+ if (Pmode != ptr_mode)
+ this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
+ emit_move_insn (tmp, this_mem);
/* Adjust the this parameter. */
- xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
- if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
+ vcall_addr = plus_constant (tmp, vcall_offset);
+ if (TARGET_64BIT
+ && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
{
- rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
- xops[0] = GEN_INT (vcall_offset);
- xops[1] = tmp2;
- output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
- xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
+ rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
+ emit_move_insn (tmp2, GEN_INT (vcall_offset));
+ vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
}
- xops[1] = this_reg;
- output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
+
+ vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
+ if (Pmode != ptr_mode)
+ emit_insn (gen_addsi_1_zext (this_reg,
+ gen_rtx_REG (ptr_mode,
+ REGNO (this_reg)),
+ vcall_mem));
+ else
+ emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
}
/* If necessary, drop THIS back to its stack slot. */
if (this_reg && this_reg != this_param)
- {
- xops[0] = this_reg;
- xops[1] = this_param;
- output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
- }
+ emit_move_insn (this_param, this_reg);
- xops[0] = XEXP (DECL_RTL (function), 0);
+ fnaddr = XEXP (DECL_RTL (function), 0);
if (TARGET_64BIT)
{
if (!flag_pic || targetm.binds_local_p (function)
- || DEFAULT_ABI == MS_ABI)
- output_asm_insn ("jmp\t%P0", xops);
- /* All thunks should be in the same object as their target,
- and thus binds_local_p should be true. */
- else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
- gcc_unreachable ();
+ || cfun->machine->call_abi == MS_ABI)
+ ;
else
{
- tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
+ tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
tmp = gen_rtx_CONST (Pmode, tmp);
- tmp = gen_rtx_MEM (QImode, tmp);
- xops[0] = tmp;
- output_asm_insn ("jmp\t%A0", xops);
+ fnaddr = gen_rtx_MEM (Pmode, tmp);
}
}
else
{
if (!flag_pic || targetm.binds_local_p (function))
- output_asm_insn ("jmp\t%P0", xops);
- else
+ ;
#if TARGET_MACHO
- if (TARGET_MACHO)
- {
- rtx sym_ref = XEXP (DECL_RTL (function), 0);
- if (TARGET_MACHO_BRANCH_ISLANDS)
- sym_ref = (gen_rtx_SYMBOL_REF
- (Pmode,
- machopic_indirection_name (sym_ref, /*stub_p=*/true)));
- tmp = gen_rtx_MEM (QImode, sym_ref);
- xops[0] = tmp;
- output_asm_insn ("jmp\t%0", xops);
- }
- else
+ else if (TARGET_MACHO)
+ {
+ fnaddr = machopic_indirect_call_target (DECL_RTL (function));
+ fnaddr = XEXP (fnaddr, 0);
+ }
#endif /* TARGET_MACHO */
+ else
{
- tmp = gen_rtx_REG (SImode, CX_REG);
+ tmp = gen_rtx_REG (Pmode, CX_REG);
output_set_got (tmp, NULL_RTX);
- xops[1] = tmp;
- output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
- output_asm_insn ("jmp\t{*}%1", xops);
+ fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
+ fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
+ fnaddr = gen_rtx_MEM (Pmode, fnaddr);
}
}
+
+ /* Our sibling call patterns do not allow memories, because we have no
+ predicate that can distinguish between frame and non-frame memory.
+ For our purposes here, we can get away with (ab)using a jump pattern,
+ because we're going to do no optimization. */
+ if (MEM_P (fnaddr))
+ emit_jump_insn (gen_indirect_jump (fnaddr));
+ else
+ {
+ tmp = gen_rtx_MEM (QImode, fnaddr);
+ tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
+ tmp = emit_call_insn (tmp);
+ SIBLING_CALL_P (tmp) = 1;
+ }
+ emit_barrier ();
+
+ /* Emit just enough of rest_of_compilation to get the insns emitted.
+ Note that use_thunk calls assemble_start_function et al. */
+ tmp = get_insns ();
+ insn_locators_alloc ();
+ shorten_branches (tmp);
+ final_start_function (tmp, file, 1);
+ final (tmp, file, 1);
final_end_function ();
}
/* Count the minimum number of instructions in BB. Return 4 if the
number of instructions >= 4. */
-static int
+static int
ix86_count_insn_bb (basic_block bb)
{
rtx insn;
}
-/* Count the minimum number of instructions in code path in BB.
+/* Count the minimum number of instructions in code path in BB.
Return 4 if the number of instructions >= 4. */
-static int
+static int
ix86_count_insn (basic_block bb)
{
edge e;
with old MDEP_REORGS that are not CFG based. Recompute it now. */
compute_bb_for_insn ();
+ /* Run the vzeroupper optimization if needed. */
+ if (TARGET_VZEROUPPER)
+ move_or_delete_vzeroupper ();
+
if (optimize && optimize_function_for_speed_p (cfun))
{
if (TARGET_PAD_SHORT_FUNCTION)
ix86_avoid_jump_mispredicts ();
#endif
}
-
- /* Run the vzeroupper optimization if needed. */
- if (TARGET_VZEROUPPER)
- move_or_delete_vzeroupper ();
}
/* Return nonzero when QImode register that must be represented via REX prefix
return clobbers;
}
-/* Implements target vector targetm.asm.encode_section_info. This
- is not used by netware. */
+/* Implements target vector targetm.asm.encode_section_info. */
static void ATTRIBUTE_UNUSED
ix86_encode_section_info (tree decl, rtx rtl, int first)
gen_rtx_MULT (mode, e2, e3)));
}
+#ifdef TARGET_SOLARIS
/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
-static void ATTRIBUTE_UNUSED
+static void
i386_solaris_elf_named_section (const char *name, unsigned int flags,
tree decl)
{
flags & SECTION_WRITE ? "aw" : "a");
return;
}
+
+#ifndef USE_GAS
+ if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
+ {
+ solaris_elf_asm_comdat_section (name, flags, decl);
+ return;
+ }
+#endif
+
default_elf_asm_named_section (name, flags, decl);
}
+#endif /* TARGET_SOLARIS */
/* Return the mangling of TYPE if it is an extended fundamental type. */
__stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
register, so it is better to call __stack_chk_fail directly. */
-static tree
+static tree ATTRIBUTE_UNUSED
ix86_stack_protect_fail (void)
{
return TARGET_64BIT
static bool
has_dispatch (rtx insn, int action)
{
- if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
+ if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
+ && flag_dispatch_scheduler)
switch (action)
{
default:
static enum machine_mode
ix86_preferred_simd_mode (enum machine_mode mode)
{
- /* Disable double precision vectorizer if needed. */
- if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
- return word_mode;
-
- if (!TARGET_AVX && !TARGET_SSE)
+ if (!TARGET_SSE)
return word_mode;
switch (mode)
{
- case SFmode:
- return (TARGET_AVX && !flag_prefer_avx128) ? V8SFmode : V4SFmode;
- case DFmode:
- return (TARGET_AVX && !flag_prefer_avx128) ? V4DFmode : V2DFmode;
- case DImode:
- return V2DImode;
- case SImode:
- return V4SImode;
- case HImode:
- return V8HImode;
case QImode:
return V16QImode;
+ case HImode:
+ return V8HImode;
+ case SImode:
+ return V4SImode;
+ case DImode:
+ return V2DImode;
- default:;
- }
+ case SFmode:
+ if (TARGET_AVX && !TARGET_PREFER_AVX128)
+ return V8SFmode;
+ else
+ return V4SFmode;
+
+ case DFmode:
+ if (!TARGET_VECTORIZE_DOUBLE)
+ return word_mode;
+ else if (TARGET_AVX && !TARGET_PREFER_AVX128)
+ return V4DFmode;
+ else if (TARGET_SSE2)
+ return V2DFmode;
+ /* FALLTHRU */
- return word_mode;
+ default:
+ return word_mode;
+ }
}
/* If AVX is enabled then try vectorizing with both 256bit and 128bit
static unsigned int
ix86_autovectorize_vector_sizes (void)
{
- return TARGET_AVX ? 32 | 16 : 0;
+ return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
}
/* Initialize the GCC target structure. */
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
-#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
#undef TARGET_SCHED_INIT_GLOBAL
#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
#undef TARGET_ASM_FILE_START
#define TARGET_ASM_FILE_START x86_file_start
-#undef TARGET_DEFAULT_TARGET_FLAGS
-#define TARGET_DEFAULT_TARGET_FLAGS \
- (TARGET_DEFAULT \
- | TARGET_SUBTARGET_DEFAULT \
- | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
-
-#undef TARGET_HANDLE_OPTION
-#define TARGET_HANDLE_OPTION ix86_handle_option
-
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE ix86_option_override
-#undef TARGET_OPTION_OPTIMIZATION_TABLE
-#define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
-#undef TARGET_OPTION_INIT_STRUCT
-#define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE ix86_mangle_type
+#ifndef TARGET_MACHO
#undef TARGET_STACK_PROTECT_FAIL
#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
-
-#undef TARGET_SUPPORTS_SPLIT_STACK
-#define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
+#endif
#undef TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE ix86_function_value
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
+
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
+#undef TARGET_CLASS_MAX_NREGS
+#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
+
#undef TARGET_PREFERRED_RELOAD_CLASS
#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS