#include "ggc.h"
#include "target.h"
#include "target-def.h"
+#include "common/common-target.h"
#include "langhooks.h"
#include "cgraph.h"
#include "gimple.h"
#include "params.h"
#include "cselib.h"
#include "debug.h"
-#include "dwarf2out.h"
#include "sched-int.h"
#include "sbitmap.h"
#include "fibheap.h"
/* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
in basic block BB. Delete it if upper 128bit AVX registers are
unused. If it isn't deleted, move it to just before a jump insn.
-
+
STATE is state of the upper 128bits of AVX registers at entry. */
static void
1, /* cond_not_taken_branch_cost. */
};
+struct processor_costs bdver2_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1), /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (4), /* SI */
+ COSTS_N_INSNS (6), /* DI */
+ COSTS_N_INSNS (6)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (35), /* HI */
+ COSTS_N_INSNS (51), /* SI */
+ COSTS_N_INSNS (83), /* DI */
+ COSTS_N_INSNS (83)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 9, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {5, 5, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 2, /* cost of reg,reg fld/fst */
+ {5, 5, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {4, 4, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {4, 4}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {4, 4}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {4, 4, 4}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {4, 4, 4}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 2, /* MMX or SSE register to integer */
+ /* On K8:
+ MOVD reg64, xmmreg Double FSTORE 4
+ MOVD reg32, xmmreg Double FSTORE 4
+ On AMDFAM10:
+ MOVD reg64, xmmreg Double FADD 3
+ 1/1 1/1
+ MOVD reg32, xmmreg Double FADD 3
+ 1/1 1/1 */
+ 16, /* size of l1 cache. */
+ 2048, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ /* New AMD processors never drop prefetches; if they cannot be performed
+ immediately, they are queued. We set number of simultaneous prefetches
+ to a large constant to reflect this (it probably is not a good idea not
+ to limit number of prefetches at all, as their execution also takes some
+ time). */
+ 100, /* number of parallel prefetches */
+ 2, /* Branch cost */
+ COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (6), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (42), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (2), /* cost of FABS instruction. */
+ COSTS_N_INSNS (2), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
+
+ /* BDVER2 has optimized REP instruction for medium sized blocks, but for
+ very small blocks it is better to use loop. For large blocks, libcall
+ can do nontemporary accesses and beat inline considerably. */
+ {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
+ {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ {{libcall, {{8, loop}, {24, unrolled_loop},
+ {2048, rep_prefix_4_byte}, {-1, libcall}}},
+ {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 6, /* scalar_stmt_cost. */
+ 4, /* scalar load_cost. */
+ 4, /* scalar_store_cost. */
+ 6, /* vec_stmt_cost. */
+ 0, /* vec_to_scalar_cost. */
+ 2, /* scalar_to_vec_cost. */
+ 4, /* vec_align_load_cost. */
+ 4, /* vec_unalign_load_cost. */
+ 4, /* vec_store_cost. */
+ 2, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
struct processor_costs btver1_cost = {
COSTS_N_INSNS (1), /* cost of an add instruction */
COSTS_N_INSNS (2), /* cost of a lea instruction */
#define m_486 (1<<PROCESSOR_I486)
#define m_PENT (1<<PROCESSOR_PENTIUM)
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
-#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
-#define m_NOCONA (1<<PROCESSOR_NOCONA)
-#define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
-#define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
-#define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
-#define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
-#define m_COREI7 (m_COREI7_32 | m_COREI7_64)
-#define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
-#define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
-#define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
-#define m_ATOM (1<<PROCESSOR_ATOM)
-
-#define m_GEODE (1<<PROCESSOR_GEODE)
-#define m_K6 (1<<PROCESSOR_K6)
-#define m_K6_GEODE (m_K6 | m_GEODE)
-#define m_K8 (1<<PROCESSOR_K8)
-#define m_ATHLON (1<<PROCESSOR_ATHLON)
-#define m_ATHLON_K8 (m_K8 | m_ATHLON)
-#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
-#define m_BDVER1 (1<<PROCESSOR_BDVER1)
-#define m_BTVER1 (1<<PROCESSOR_BTVER1)
-#define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1 | m_BTVER1)
+#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
+#define m_NOCONA (1<<PROCESSOR_NOCONA)
+#define m_P4_NOCONA (m_PENT4 | m_NOCONA)
+#define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
+#define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
+#define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
+#define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
+#define m_COREI7 (m_COREI7_32 | m_COREI7_64)
+#define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
+#define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
+#define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
+#define m_ATOM (1<<PROCESSOR_ATOM)
+
+#define m_GEODE (1<<PROCESSOR_GEODE)
+#define m_K6 (1<<PROCESSOR_K6)
+#define m_K6_GEODE (m_K6 | m_GEODE)
+#define m_K8 (1<<PROCESSOR_K8)
+#define m_ATHLON (1<<PROCESSOR_ATHLON)
+#define m_ATHLON_K8 (m_K8 | m_ATHLON)
+#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
+#define m_BDVER1 (1<<PROCESSOR_BDVER1)
+#define m_BDVER2 (1<<PROCESSOR_BDVER2)
+#define m_BDVER (m_BDVER1 | m_BDVER2)
+#define m_BTVER1 (1<<PROCESSOR_BTVER1)
+#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
#define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
#define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
negatively, so enabling for Generic64 seems like good code size
tradeoff. We can't enable it for 32bit generic because it does not
work well with PPro base chips. */
- m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2I7_64 | m_GENERIC64,
+ m_386 | m_CORE2I7_64 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
/* X86_TUNE_PUSH_MEMORY */
- m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
- | m_NOCONA | m_CORE2I7 | m_GENERIC,
+ m_386 | m_P4_NOCONA | m_CORE2I7 | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_ZERO_EXTEND_WITH_AND */
m_486 | m_PENT,
/* X86_TUNE_UNROLL_STRLEN */
- m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
- | m_CORE2I7 | m_GENERIC,
-
- /* X86_TUNE_DEEP_BRANCH_PREDICTION */
- m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
- | m_CORE2I7 | m_GENERIC,
+ m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE2I7 | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
~m_386,
/* X86_TUNE_USE_SAHF */
- m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_BTVER1
- | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC,
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
partial dependencies. */
- m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
- | m_CORE2I7 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
+ m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
register stalls on Generic32 compilation setting as well. However
m_386 | m_486 | m_K6_GEODE,
/* X86_TUNE_USE_SIMODE_FIOP */
- ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2I7 | m_GENERIC),
+ ~(m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
/* X86_TUNE_USE_MOV0 */
m_K6,
/* X86_TUNE_USE_CLTD */
- ~(m_PENT | m_ATOM | m_K6 | m_CORE2I7 | m_GENERIC),
+ ~(m_PENT | m_CORE2I7 | m_ATOM | m_K6 | m_GENERIC),
/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
m_PENT4,
~(m_PENT | m_PPRO),
/* X86_TUNE_PROMOTE_QIMODE */
- m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
- | m_CORE2I7 | m_GENERIC /* | m_PENT4 ? */,
+ m_386 | m_486 | m_PENT | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_FAST_PREFIX */
- ~(m_PENT | m_486 | m_386),
+ ~(m_386 | m_486 | m_PENT),
/* X86_TUNE_SINGLE_STRINGOP */
- m_386 | m_PENT4 | m_NOCONA,
+ m_386 | m_P4_NOCONA,
/* X86_TUNE_QIMODE_MATH */
~0,
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */
- ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
- | m_GENERIC | m_GEODE),
+ ~(m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */
- m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
+ m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
conflict here in between PPro/Pentium4 based chips that thread 128bit
shows that disabling this option on P4 brings over 20% SPECfp regression,
while enabling it on K8 brings roughly 2.4% regression that can be partly
masked by careful scheduling of moves. */
- m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7 | m_GENERIC
- | m_AMDFAM10 | m_BDVER1,
+ m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
- m_AMDFAM10 | m_BDVER1 | m_BTVER1 | m_COREI7,
+ m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER1,
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
- m_BDVER1 | m_COREI7,
+ m_COREI7 | m_BDVER,
/* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
- m_BDVER1,
+ m_BDVER ,
/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
are resolved on SSE register parts instead of whole registers, so we may
m_AMD_MULTIPLE,
/* X86_TUNE_SSE_LOAD0_BY_PXOR */
- m_PPRO | m_PENT4 | m_NOCONA,
+ m_PPRO | m_P4_NOCONA,
/* X86_TUNE_MEMORY_MISMATCH_STALL */
- m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC,
+ m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_PROLOGUE_USING_MOVE */
- m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
+ m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
/* X86_TUNE_EPILOGUE_USING_MOVE */
- m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2I7 | m_GENERIC,
+ m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
/* X86_TUNE_SHIFT1 */
~m_486,
~(m_AMD_MULTIPLE | m_GENERIC),
/* X86_TUNE_INTER_UNIT_CONVERSIONS */
- ~(m_AMDFAM10 | m_BDVER1),
+ ~(m_AMDFAM10 | m_BDVER ),
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
- m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2I7
- | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_SCHEDULE */
- m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2I7
- | m_GENERIC,
+ m_PENT | m_PPRO | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_USE_BT */
- m_AMD_MULTIPLE | m_ATOM | m_CORE2I7 | m_GENERIC,
+ m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_USE_INCDEC */
- ~(m_PENT4 | m_NOCONA | m_CORE2I7 | m_GENERIC | m_ATOM),
+ ~(m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_GENERIC),
/* X86_TUNE_PAD_RETURNS */
- m_AMD_MULTIPLE | m_CORE2I7 | m_GENERIC,
+ m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
m_ATOM,
/* X86_TUNE_EXT_80387_CONSTANTS */
- m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
- | m_CORE2I7 | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
/* X86_TUNE_SHORTEN_X87_SSE */
~m_K8,
/* X86_TUNE_AVOID_VECTOR_DECODE */
- m_K8 | m_CORE2I7_64 | m_GENERIC64,
+ m_CORE2I7_64 | m_K8 | m_GENERIC64,
/* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
and SImode multiply, but 386 and 486 do HImode multiply faster. */
/* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
vector path on AMD machines. */
- m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
+ m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
/* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
machines. */
- m_K8 | m_CORE2I7_64 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1 | m_BTVER1,
+ m_CORE2I7_64 | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1 | m_GENERIC64,
/* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
than a MOV. */
/* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
from FP to FP. */
- m_AMDFAM10 | m_CORE2I7 | m_GENERIC,
+ m_CORE2I7 | m_AMDFAM10 | m_GENERIC,
/* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
from integer to FP. */
/* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
with a subsequent conditional jump instruction into a single
compare-and-branch uop. */
- m_BDVER1,
+ m_BDVER,
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
/* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
at -O3. For the moment, the prefetching seems badly tuned for Intel
chips. */
- m_K6_GEODE | m_AMD_MULTIPLE
+ m_K6_GEODE | m_AMD_MULTIPLE,
+
+ /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
+ the auto-vectorizer. */
+ m_BDVER,
+
+ /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
+ during reassociation of integer computation. */
+ m_ATOM,
+
+ /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
+ during reassociation of fp computation. */
+ m_ATOM
};
/* Feature tests against the various architecture variations. */
};
static const unsigned int x86_accumulate_outgoing_args
- = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2I7
- | m_GENERIC;
+ = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE2I7 | m_AMD_MULTIPLE | m_GENERIC;
static const unsigned int x86_arch_always_fancy_math_387
- = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
- | m_NOCONA | m_CORE2I7 | m_GENERIC;
+ = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
+
+static const unsigned int x86_avx256_split_unaligned_load
+ = m_COREI7 | m_GENERIC;
+
+static const unsigned int x86_avx256_split_unaligned_store
+ = m_COREI7 | m_BDVER | m_GENERIC;
/* In case the average insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
static void ix86_compute_frame_layout (struct ix86_frame *);
static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
rtx, rtx, int);
-static void ix86_add_new_builtins (int);
+static void ix86_add_new_builtins (HOST_WIDE_INT);
static rtx ix86_expand_vec_perm_builtin (tree);
static tree ix86_canonical_va_list_type (tree);
static void predict_jump (int);
IX86_FUNCTION_SPECIFIC_MAX
};
-static char *ix86_target_string (int, int, const char *, const char *,
- enum fpmath_unit, bool);
+static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
+ const char *, enum fpmath_unit, bool);
static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
static void ix86_function_specific_save (struct cl_target_option *);
static void ix86_function_specific_restore (struct cl_target_option *);
static int ix86_tune_defaulted;
static int ix86_arch_specified;
-/* Define a set of ISAs which are available when a given ISA is
- enabled. MMX and SSE ISAs are handled separately. */
-
-#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
-#define OPTION_MASK_ISA_3DNOW_SET \
- (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
-
-#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
-#define OPTION_MASK_ISA_SSE2_SET \
- (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
-#define OPTION_MASK_ISA_SSE3_SET \
- (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
-#define OPTION_MASK_ISA_SSSE3_SET \
- (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
-#define OPTION_MASK_ISA_SSE4_1_SET \
- (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
-#define OPTION_MASK_ISA_SSE4_2_SET \
- (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
-#define OPTION_MASK_ISA_AVX_SET \
- (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
-#define OPTION_MASK_ISA_FMA_SET \
- (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
-
-/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
- as -msse4.2. */
-#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
-
-#define OPTION_MASK_ISA_SSE4A_SET \
- (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
-#define OPTION_MASK_ISA_FMA4_SET \
- (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
- | OPTION_MASK_ISA_AVX_SET)
-#define OPTION_MASK_ISA_XOP_SET \
- (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
-#define OPTION_MASK_ISA_LWP_SET \
- OPTION_MASK_ISA_LWP
-
-/* AES and PCLMUL need SSE2 because they use xmm registers */
-#define OPTION_MASK_ISA_AES_SET \
- (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
-#define OPTION_MASK_ISA_PCLMUL_SET \
- (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
-
-#define OPTION_MASK_ISA_ABM_SET \
- (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
-
-#define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
-#define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
-#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
-#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
-#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
-#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
-#define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
-
-#define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
-#define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
-#define OPTION_MASK_ISA_F16C_SET \
- (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
-
-/* Define a set of ISAs which aren't available when a given ISA is
- disabled. MMX and SSE ISAs are handled separately. */
-
-#define OPTION_MASK_ISA_MMX_UNSET \
- (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
-#define OPTION_MASK_ISA_3DNOW_UNSET \
- (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
-#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
-
-#define OPTION_MASK_ISA_SSE_UNSET \
- (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
-#define OPTION_MASK_ISA_SSE2_UNSET \
- (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
-#define OPTION_MASK_ISA_SSE3_UNSET \
- (OPTION_MASK_ISA_SSE3 \
- | OPTION_MASK_ISA_SSSE3_UNSET \
- | OPTION_MASK_ISA_SSE4A_UNSET )
-#define OPTION_MASK_ISA_SSSE3_UNSET \
- (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
-#define OPTION_MASK_ISA_SSE4_1_UNSET \
- (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
-#define OPTION_MASK_ISA_SSE4_2_UNSET \
- (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
-#define OPTION_MASK_ISA_AVX_UNSET \
- (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
- | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
-#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
-
-/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
- as -mno-sse4.1. */
-#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
-
-#define OPTION_MASK_ISA_SSE4A_UNSET \
- (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
-
-#define OPTION_MASK_ISA_FMA4_UNSET \
- (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
-#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
-#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
-
-#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
-#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
-#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
-#define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
-#define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
-#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
-#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
-#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
-#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
-#define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
-
-#define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
-#define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
-#define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
-
/* Vectorization library interface and handlers. */
static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
{&generic64_cost, 16, 10, 16, 10, 16},
{&amdfam10_cost, 32, 24, 32, 7, 32},
{&bdver1_cost, 32, 24, 32, 7, 32},
+ {&bdver2_cost, 32, 24, 32, 7, 32},
{&btver1_cost, 32, 24, 32, 7, 32},
{&atom_cost, 16, 7, 16, 7, 16}
};
"k8",
"amdfam10",
"bdver1",
+ "bdver2",
"btver1"
};
\f
{
return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
}
-
-/* Implement TARGET_HANDLE_OPTION. */
-
-static bool
-ix86_handle_option (struct gcc_options *opts,
- struct gcc_options *opts_set ATTRIBUTE_UNUSED,
- const struct cl_decoded_option *decoded,
- location_t loc)
-{
- size_t code = decoded->opt_index;
- int value = decoded->value;
-
- switch (code)
- {
- case OPT_mmmx:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
- }
- return true;
-
- case OPT_m3dnow:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
- }
- return true;
-
- case OPT_m3dnowa:
- return false;
-
- case OPT_msse:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
- }
- return true;
-
- case OPT_msse2:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
- }
- return true;
-
- case OPT_msse3:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
- }
- return true;
-
- case OPT_mssse3:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
- }
- return true;
-
- case OPT_msse4_1:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
- }
- return true;
-
- case OPT_msse4_2:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
- }
- return true;
-
- case OPT_mavx:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
- }
- return true;
-
- case OPT_mfma:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
- }
- return true;
-
- case OPT_msse4:
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
- return true;
-
- case OPT_mno_sse4:
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
- return true;
-
- case OPT_msse4a:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
- }
- return true;
-
- case OPT_mfma4:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
- }
- return true;
-
- case OPT_mxop:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
- }
- return true;
-
- case OPT_mlwp:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
- }
- return true;
-
- case OPT_mabm:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
- }
- return true;
-
- case OPT_mbmi:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI_UNSET;
- }
- return true;
-
- case OPT_mtbm:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_TBM_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_TBM_UNSET;
- }
- return true;
-
- case OPT_mpopcnt:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
- }
- return true;
-
- case OPT_msahf:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
- }
- return true;
-
- case OPT_mcx16:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
- }
- return true;
-
- case OPT_mmovbe:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
- }
- return true;
-
- case OPT_mcrc32:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
- }
- return true;
-
- case OPT_maes:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
- }
- return true;
-
- case OPT_mpclmul:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
- }
- return true;
-
- case OPT_mfsgsbase:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
- }
- return true;
-
- case OPT_mrdrnd:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
- }
- return true;
-
- case OPT_mf16c:
- if (value)
- {
- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
- }
- else
- {
- opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
- opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
- }
- return true;
-
- /* Comes from final.c -- no real reason to change it. */
-#define MAX_CODE_ALIGN 16
-
- case OPT_malign_loops_:
- warning_at (loc, 0, "-malign-loops is obsolete, use -falign-loops");
- if (value > MAX_CODE_ALIGN)
- error_at (loc, "-malign-loops=%d is not between 0 and %d",
- value, MAX_CODE_ALIGN);
- else
- opts->x_align_loops = 1 << value;
- return true;
-
- case OPT_malign_jumps_:
- warning_at (loc, 0, "-malign-jumps is obsolete, use -falign-jumps");
- if (value > MAX_CODE_ALIGN)
- error_at (loc, "-malign-jumps=%d is not between 0 and %d",
- value, MAX_CODE_ALIGN);
- else
- opts->x_align_jumps = 1 << value;
- return true;
-
- case OPT_malign_functions_:
- warning_at (loc, 0,
- "-malign-functions is obsolete, use -falign-functions");
- if (value > MAX_CODE_ALIGN)
- error_at (loc, "-malign-functions=%d is not between 0 and %d",
- value, MAX_CODE_ALIGN);
- else
- opts->x_align_functions = 1 << value;
- return true;
-
- case OPT_mbranch_cost_:
- if (value > 5)
- {
- error_at (loc, "-mbranch-cost=%d is not between 0 and 5", value);
- opts->x_ix86_branch_cost = 5;
- }
- return true;
-
- default:
- return true;
- }
-}
\f
/* Return a string that documents the current -m options. The caller is
responsible for freeing the string. */
static char *
-ix86_target_string (int isa, int flags, const char *arch, const char *tune,
- enum fpmath_unit fpmath, bool add_nl_p)
+ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
+ const char *tune, enum fpmath_unit fpmath,
+ bool add_nl_p)
{
struct ix86_target_opts
{
const char *option; /* option string */
- int mask; /* isa mask options */
+ HOST_WIDE_INT mask; /* isa mask options */
};
/* This table is ordered so that options like -msse4.2 that imply
{ "-mmmx", OPTION_MASK_ISA_MMX },
{ "-mabm", OPTION_MASK_ISA_ABM },
{ "-mbmi", OPTION_MASK_ISA_BMI },
+ { "-mbmi2", OPTION_MASK_ISA_BMI2 },
+ { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
{ "-mtbm", OPTION_MASK_ISA_TBM },
{ "-mpopcnt", OPTION_MASK_ISA_POPCNT },
{ "-mmovbe", OPTION_MASK_ISA_MOVBE },
{ "-mvzeroupper", MASK_VZEROUPPER },
{ "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
{ "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
+ { "-mprefer-avx128", MASK_PREFER_AVX128},
};
const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
if (isa && add_nl_p)
{
opts[num++][0] = isa_other;
- sprintf (isa_other, "(other isa: %#x)", isa);
+ sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
+ isa);
}
/* Add flag options. */
const char *suffix;
const char *sw;
- enum pta_flags
- {
- PTA_SSE = 1 << 0,
- PTA_SSE2 = 1 << 1,
- PTA_SSE3 = 1 << 2,
- PTA_MMX = 1 << 3,
- PTA_PREFETCH_SSE = 1 << 4,
- PTA_3DNOW = 1 << 5,
- PTA_3DNOW_A = 1 << 6,
- PTA_64BIT = 1 << 7,
- PTA_SSSE3 = 1 << 8,
- PTA_CX16 = 1 << 9,
- PTA_POPCNT = 1 << 10,
- PTA_ABM = 1 << 11,
- PTA_SSE4A = 1 << 12,
- PTA_NO_SAHF = 1 << 13,
- PTA_SSE4_1 = 1 << 14,
- PTA_SSE4_2 = 1 << 15,
- PTA_AES = 1 << 16,
- PTA_PCLMUL = 1 << 17,
- PTA_AVX = 1 << 18,
- PTA_FMA = 1 << 19,
- PTA_MOVBE = 1 << 20,
- PTA_FMA4 = 1 << 21,
- PTA_XOP = 1 << 22,
- PTA_LWP = 1 << 23,
- PTA_FSGSBASE = 1 << 24,
- PTA_RDRND = 1 << 25,
- PTA_F16C = 1 << 26,
- PTA_BMI = 1 << 27,
- PTA_TBM = 1 << 28
- /* if this reaches 32, need to widen struct pta flags below */
- };
+#define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
+#define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
+#define PTA_64BIT (HOST_WIDE_INT_1 << 2)
+#define PTA_ABM (HOST_WIDE_INT_1 << 3)
+#define PTA_AES (HOST_WIDE_INT_1 << 4)
+#define PTA_AVX (HOST_WIDE_INT_1 << 5)
+#define PTA_BMI (HOST_WIDE_INT_1 << 6)
+#define PTA_CX16 (HOST_WIDE_INT_1 << 7)
+#define PTA_F16C (HOST_WIDE_INT_1 << 8)
+#define PTA_FMA (HOST_WIDE_INT_1 << 9)
+#define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
+#define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
+#define PTA_LWP (HOST_WIDE_INT_1 << 12)
+#define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
+#define PTA_MMX (HOST_WIDE_INT_1 << 14)
+#define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
+#define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
+#define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
+#define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
+#define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
+#define PTA_RDRND (HOST_WIDE_INT_1 << 20)
+#define PTA_SSE (HOST_WIDE_INT_1 << 21)
+#define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
+#define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
+#define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
+#define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
+#define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
+#define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
+#define PTA_TBM (HOST_WIDE_INT_1 << 28)
+#define PTA_XOP (HOST_WIDE_INT_1 << 29)
+#define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
+#define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
+/* if this reaches 64, need to widen struct pta flags below */
static struct pta
{
const char *const name; /* processor name or nickname. */
const enum processor_type processor;
const enum attr_cpu schedule;
- const unsigned /*enum pta_flags*/ flags;
+ const unsigned HOST_WIDE_INT flags;
}
const processor_alias_table[] =
{
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
+ {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
+ | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
+ | PTA_RDRND | PTA_F16C},
+ {"core-avx2", PROCESSOR_COREI7_64, CPU_COREI7,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
+ | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
+ | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
+ | PTA_FMA | PTA_MOVBE},
{"atom", PROCESSOR_ATOM, CPU_ATOM,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
| PTA_XOP | PTA_LWP},
+ {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+ | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+ | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
+ | PTA_FMA},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
SUBSUBTARGET_OVERRIDE_OPTIONS;
#endif
+ if (TARGET_X32)
+ ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
+
/* -fPIC is the default for x86_64. */
if (TARGET_MACHO && TARGET_64BIT)
flag_pic = 2;
if (!TARGET_64BIT)
error ("code model %qs not supported in the %s bit mode",
"medium", "32");
+ else if (TARGET_X32)
+ error ("code model %qs not supported in x32 mode",
+ "medium");
break;
case CM_LARGE:
if (!TARGET_64BIT)
error ("code model %qs not supported in the %s bit mode",
"large", "32");
+ else if (TARGET_X32)
+ error ("code model %qs not supported in x32 mode",
+ "medium");
break;
case CM_32:
if (processor_alias_table[i].flags & PTA_AVX
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
ix86_isa_flags |= OPTION_MASK_ISA_AVX;
+ if (processor_alias_table[i].flags & PTA_AVX2
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
+ ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
if (processor_alias_table[i].flags & PTA_FMA
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
ix86_isa_flags |= OPTION_MASK_ISA_FMA;
if (processor_alias_table[i].flags & PTA_BMI
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
ix86_isa_flags |= OPTION_MASK_ISA_BMI;
+ if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
+ ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
if (processor_alias_table[i].flags & PTA_TBM
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
ix86_isa_flags |= OPTION_MASK_ISA_TBM;
+ if (processor_alias_table[i].flags & PTA_BMI2
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
+ ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
if (processor_alias_table[i].flags & PTA_CX16
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
ix86_isa_flags |= OPTION_MASK_ISA_CX16;
if (!TARGET_80387)
target_flags |= MASK_NO_FANCY_MATH_387;
- /* On 32bit targets, avoid moving DFmode values in
- integer registers when optimizing for size. */
- if (TARGET_64BIT)
- target_flags |= TARGET_INTEGER_DFMODE_MOVES;
- else if (optimize_size)
- target_flags &= ~TARGET_INTEGER_DFMODE_MOVES;
-
/* Turn on MMX builtins for -msse. */
if (TARGET_SSE)
{
if (TARGET_SSE4_2 || TARGET_ABM)
ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
+ /* Turn on lzcnt instruction for -mabm. */
+ if (TARGET_ABM)
+ ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
+
/* Validate -mpreferred-stack-boundary= value or default it to
PREFERRED_STACK_BOUNDARY_DEFAULT. */
ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
}
/* For sane SSE instruction set generation we need fcomi instruction.
- It is safe to enable all CMOVE instructions. */
- if (TARGET_SSE)
+ It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
+ expands to a sequence that includes conditional move. */
+ if (TARGET_SSE || TARGET_RDRND)
TARGET_CMOVE = 1;
/* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
#endif
}
- /* Save the initial options in case the user does function specific options */
- if (main_args_p)
- target_option_default_node = target_option_current_node
- = build_target_option_node ();
-
if (TARGET_AVX)
{
/* When not optimize for size, enable vzeroupper optimization for
if (flag_expensive_optimizations
&& !(target_flags_explicit & MASK_VZEROUPPER))
target_flags |= MASK_VZEROUPPER;
- if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+ if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
+ && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
- if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
+ if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
+ && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
+ /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
+ if (TARGET_AVX128_OPTIMAL && !(target_flags_explicit & MASK_PREFER_AVX128))
+ target_flags |= MASK_PREFER_AVX128;
}
}
- else
+ else
{
/* Disable vzeroupper pass if TARGET_AVX is disabled. */
target_flags &= ~MASK_VZEROUPPER;
}
+
+ /* Save the initial options in case the user does function specific
+ options. */
+ if (main_args_p)
+ target_option_default_node = target_option_current_node
+ = build_target_option_node ();
}
/* Return TRUE if VAL is passed in register with 256bit AVX modes. */
IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
IX86_ATTR_ISA ("abm", OPT_mabm),
IX86_ATTR_ISA ("bmi", OPT_mbmi),
+ IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
+ IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
IX86_ATTR_ISA ("tbm", OPT_mtbm),
IX86_ATTR_ISA ("aes", OPT_maes),
IX86_ATTR_ISA ("avx", OPT_mavx),
+ IX86_ATTR_ISA ("avx2", OPT_mavx2),
IX86_ATTR_ISA ("mmx", OPT_mmmx),
IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
IX86_ATTR_ISA ("sse4a", OPT_msse4a),
IX86_ATTR_ISA ("ssse3", OPT_mssse3),
IX86_ATTR_ISA ("fma4", OPT_mfma4),
+ IX86_ATTR_ISA ("fma", OPT_mfma),
IX86_ATTR_ISA ("xop", OPT_mxop),
IX86_ATTR_ISA ("lwp", OPT_mlwp),
IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
|| target_flags != def->x_target_flags
|| option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
|| option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
- || ix86_fpmath != def->x_ix86_fpmath)
+ || enum_opts_set.x_ix86_fpmath)
{
/* If we are using the default tune= or arch=, undo the string assigned,
and use the default. */
ASM_OUTPUT_SKIP (file, size ? size : 1);
}
\f
-static const struct default_options ix86_option_optimization_table[] =
- {
- /* Turn off -fschedule-insns by default. It tends to make the
- problem with not enough registers even worse. */
-#ifdef INSN_SCHEDULING
- { OPT_LEVELS_ALL, OPT_fschedule_insns, NULL, 0 },
-#endif
-
-#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
- SUBTARGET_OPTIMIZATION_OPTIONS,
-#endif
- { OPT_LEVELS_NONE, 0, NULL, 0 }
- };
-
-/* Implement TARGET_OPTION_INIT_STRUCT. */
-
-static void
-ix86_option_init_struct (struct gcc_options *opts)
-{
- if (TARGET_MACHO)
- /* The Darwin libraries never set errno, so we might as well
- avoid calling them when that's the only reason we would. */
- opts->x_flag_errno_math = 0;
-
- opts->x_flag_pcc_struct_return = 2;
- opts->x_flag_asynchronous_unwind_tables = 2;
- opts->x_flag_vect_cost_model = 1;
-}
-
/* Decide whether we must probe the stack before any space allocation
on this target. It's essentially TARGET_STACK_PROBE except when
-fstack-check causes the stack to be already probed differently. */
optimize any indirect call, or a direct call to a global function,
as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
if (!TARGET_MACHO
- && !TARGET_64BIT
- && flag_pic
+ && !TARGET_64BIT
+ && flag_pic
&& (!decl || !targetm.binds_local_p (decl)))
return false;
may not be available.) */
static void
-ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
const_tree type, bool named)
{
+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
HOST_WIDE_INT bytes, words;
if (mode == BLKmode)
ellipsis). */
static rtx
-ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
+ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
const_tree type, bool named)
{
+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
enum machine_mode mode = omode;
HOST_WIDE_INT bytes, words;
rtx arg;
appropriate for passing a pointer to that type. */
static bool
-ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
+ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
enum machine_mode mode ATTRIBUTE_UNUSED,
const_tree type, bool named ATTRIBUTE_UNUSED)
{
+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
/* See Windows x64 Software Convention. */
if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
{
{
switch (regno)
{
- case 0:
+ case AX_REG:
return true;
case FIRST_FLOAT_REG:
we normally prevent this case when mmx is not available. However
some ABIs may require the result to be returned like DImode. */
if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
- regno = TARGET_MMX ? FIRST_MMX_REG : 0;
+ regno = FIRST_MMX_REG;
/* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
we prevent this case when sse is not available. However some ABIs
may require the result to be returned like integer TImode. */
else if (mode == TImode
|| (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
- regno = TARGET_SSE ? FIRST_SSE_REG : 0;
+ regno = FIRST_SSE_REG;
/* 32-byte vector modes in %ymm0. */
else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
- regno = TARGET_AVX ? FIRST_SSE_REG : 0;
+ regno = FIRST_SSE_REG;
/* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
/* Handle libcalls, which don't provide a type node. */
if (valtype == NULL)
{
+ unsigned int regno;
+
switch (mode)
{
case SFmode:
case SDmode:
case DDmode:
case TDmode:
- return gen_rtx_REG (mode, FIRST_SSE_REG);
+ regno = FIRST_SSE_REG;
+ break;
case XFmode:
case XCmode:
- return gen_rtx_REG (mode, FIRST_FLOAT_REG);
+ regno = FIRST_FLOAT_REG;
+ break;
case TCmode:
return NULL;
default:
- return gen_rtx_REG (mode, AX_REG);
+ regno = AX_REG;
}
+
+ return gen_rtx_REG (mode, regno);
+ }
+ else if (POINTER_TYPE_P (valtype))
+ {
+ /* Pointers are always returned in Pmode. */
+ mode = Pmode;
}
ret = construct_container (mode, orig_mode, valtype, 1,
return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
}
+/* Pointer function arguments and return values are promoted to Pmode. */
+
+static enum machine_mode
+ix86_promote_function_mode (const_tree type, enum machine_mode mode,
+ int *punsignedp, const_tree fntype,
+ int for_return)
+{
+ if (type != NULL_TREE && POINTER_TYPE_P (type))
+ {
+ *punsignedp = POINTERS_EXTEND_UNSIGNED;
+ return Pmode;
+ }
+ return default_promote_function_mode (type, mode, punsignedp, fntype,
+ for_return);
+}
+
rtx
ix86_libcall_value (enum machine_mode mode)
{
alias_set_type set = get_varargs_alias_set ();
int i;
+ /* Reset to zero, as there might be a sysv vaarg used
+ before. */
+ ix86_varargs_gpr_size = 0;
+ ix86_varargs_fpr_size = 0;
+
for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
{
rtx reg, mem;
}
static void
-ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
tree type, int *pretend_size ATTRIBUTE_UNUSED,
int no_rtl)
{
+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
CUMULATIVE_ARGS next_cum;
tree fntype;
For stdargs, we do want to skip the last named argument. */
next_cum = *cum;
if (stdarg_p (fntype))
- ix86_function_arg_advance (&next_cum, mode, type, true);
+ ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
+ true);
if (cum->call_abi == MS_ABI)
setup_incoming_varargs_ms_64 (&next_cum);
ovf_rtx = cfun->machine->split_stack_varargs_pointer;
t = make_tree (type, ovf_rtx);
if (words != 0)
- t = build2 (POINTER_PLUS_EXPR, type, t,
- size_int (words * UNITS_PER_WORD));
+ t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
t = build2 (MODIFY_EXPR, type, ovf, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
type = TREE_TYPE (sav);
t = make_tree (type, frame_pointer_rtx);
if (!ix86_varargs_gpr_size)
- t = build2 (POINTER_PLUS_EXPR, type, t,
- size_int (-8 * X86_64_REGPARM_MAX));
+ t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
t = build2 (MODIFY_EXPR, type, sav, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
if (needed_intregs)
{
/* int_addr = gpr + sav; */
- t = fold_convert (sizetype, gpr);
- t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
+ t = fold_build_pointer_plus (sav, gpr);
gimplify_assign (int_addr, t, pre_p);
}
if (needed_sseregs)
{
/* sse_addr = fpr + sav; */
- t = fold_convert (sizetype, fpr);
- t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
+ t = fold_build_pointer_plus (sav, fpr);
gimplify_assign (sse_addr, t, pre_p);
}
if (need_temp)
src_offset = REGNO (reg) * 8;
}
src_addr = fold_convert (addr_type, src_addr);
- src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
- size_int (src_offset));
+ src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
dest_addr = fold_convert (daddr_type, addr);
- dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
- size_int (prev_size));
+ dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
if (cur_size == GET_MODE_SIZE (mode))
{
src = build_va_arg_indirect_ref (src_addr);
else
{
HOST_WIDE_INT align = arg_boundary / 8;
- t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
- size_int (align - 1));
- t = fold_convert (sizetype, t);
+ t = fold_build_pointer_plus_hwi (ovf, align - 1);
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
- size_int (-align));
- t = fold_convert (TREE_TYPE (ovf), t);
+ build_int_cst (TREE_TYPE (t), -align));
}
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
- t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
- size_int (rsize * UNITS_PER_WORD));
+ t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
gimplify_assign (unshare_expr (ovf), t, pre_p);
if (container)
case 1:
switch (get_attr_mode (insn))
{
- case MODE_V4SF:
- return "%vxorps\t%0, %d0";
- case MODE_V2DF:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vxorps\t%0, %d0";
- else
- return "%vxorpd\t%0, %d0";
case MODE_TI:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vxorps\t%0, %d0";
- else
+ if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vpxor\t%0, %d0";
- case MODE_V8SF:
- return "vxorps\t%x0, %x0, %x0";
- case MODE_V4DF:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "vxorps\t%x0, %x0, %x0";
- else
- return "vxorpd\t%x0, %x0, %x0";
+ case MODE_V2DF:
+ if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+ return "%vxorpd\t%0, %d0";
+ case MODE_V4SF:
+ return "%vxorps\t%0, %d0";
+
case MODE_OI:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "vxorps\t%x0, %x0, %x0";
- else
+ if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "vpxor\t%x0, %x0, %x0";
+ case MODE_V4DF:
+ if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+ return "vxorpd\t%x0, %x0, %x0";
+ case MODE_V8SF:
+ return "vxorps\t%x0, %x0, %x0";
+
default:
break;
}
+
case 2:
return "%vpcmpeqd\t%0, %d0";
default:
}
\f
#ifndef USE_HIDDEN_LINKONCE
-# if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
+# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
# define USE_HIDDEN_LINKONCE 1
# else
# define USE_HIDDEN_LINKONCE 0
gcc_assert (!TARGET_64BIT);
if (USE_HIDDEN_LINKONCE)
- sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
+ sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
else
ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
}
xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
- if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
+ if (!flag_pic)
{
xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
- if (!flag_pic)
- output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
- else
- {
- output_asm_insn ("call\t%a2", xops);
-#ifdef DWARF2_UNWIND_INFO
- /* The call to next label acts as a push. */
- if (dwarf2out_do_frame ())
- {
- rtx insn;
- start_sequence ();
- insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- gen_rtx_PLUS (Pmode,
- stack_pointer_rtx,
- GEN_INT (-4))));
- RTX_FRAME_RELATED_P (insn) = 1;
- dwarf2out_frame_debug (insn, true);
- end_sequence ();
- }
-#endif
- }
+ output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
#if TARGET_MACHO
/* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
targetm.asm_out.internal_label (asm_out_file, "L",
CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
-
- if (flag_pic)
- {
- output_asm_insn ("pop%z0\t%0", xops);
-#ifdef DWARF2_UNWIND_INFO
- /* The pop is a pop and clobbers dest, but doesn't restore it
- for unwind info purposes. */
- if (dwarf2out_do_frame ())
- {
- rtx insn;
- start_sequence ();
- insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
- dwarf2out_frame_debug (insn, true);
- insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- gen_rtx_PLUS (Pmode,
- stack_pointer_rtx,
- GEN_INT (4))));
- RTX_FRAME_RELATED_P (insn) = 1;
- dwarf2out_frame_debug (insn, true);
- end_sequence ();
- }
-#endif
- }
}
else
{
get_pc_thunk_name (name, REGNO (dest));
pic_labels_used |= 1 << REGNO (dest);
-#ifdef DWARF2_UNWIND_INFO
- /* Ensure all queued register saves are flushed before the
- call. */
- if (dwarf2out_do_frame ())
- dwarf2out_flush_queued_reg_saves ();
-#endif
xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
xops[2] = gen_rtx_MEM (QImode, xops[2]);
output_asm_insn ("call\t%X2", xops);
#endif
}
- if (TARGET_MACHO)
- return "";
-
- if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
+ if (!TARGET_MACHO)
output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
- else
- output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
return "";
}
return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
}
-/* On the x86 -fsplit-stack and -fstack-protector both use the same
- field in the TCB, so they can not be used together. */
-
-static bool
-ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
- struct gcc_options *opts ATTRIBUTE_UNUSED)
-{
- bool ret = true;
-
-#ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
- if (report)
- error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
- ret = false;
-#else
- if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
- {
- if (report)
- error ("%<-fsplit-stack%> requires "
- "assembler support for CFI directives");
- ret = false;
- }
-#endif
-
- return ret;
-}
-
/* When using -fsplit-stack, the allocation routines set a field in
the TCB to the bottom of the stack plus this much space, measured
in bytes. */
cfun->machine->use_fast_prologue_epilogue
= !expensive_function_p (count);
}
- if (TARGET_PROLOGUE_USING_MOVE
- && cfun->machine->use_fast_prologue_epilogue)
- frame->save_regs_using_mov = true;
- else
- frame->save_regs_using_mov = false;
- /* If static stack checking is enabled and done with probes, the registers
- need to be saved before allocating the frame. */
- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
- frame->save_regs_using_mov = false;
+ frame->save_regs_using_mov
+ = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
+ /* If static stack checking is enabled and done with probes,
+ the registers need to be saved before allocating the frame. */
+ && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
/* Skip return address. */
offset = UNITS_PER_WORD;
return len;
}
-
+
/* Return an RTX that points to CFA_OFFSET within the stack frame.
The valid base registers are taken from CFUN->MACHINE->FS. */
allocate = frame.stack_pointer_offset - m->fs.sp_offset;
- if (flag_stack_usage)
+ if (flag_stack_usage_info)
{
/* We start to count from ARG_POINTER. */
HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
}
else
- insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+ {
+ insn = emit_insn (gen_set_got (pic_offset_table_rtx));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
+ }
}
/* In the pic_reg_used case, make sure that the got load isn't deleted
emit_insn (gen_cld ());
/* SEH requires that the prologue end within 256 bytes of the start of
- the function. Prevent instruction schedules that would extend that. */
+ the function. Prevent instruction schedules that would extend that.
+ Further, prevent alloca modifications to the stack pointer from being
+ combined with prologue modifications. */
if (TARGET_SEH)
- emit_insn (gen_blockage ());
+ emit_insn (gen_prologue_use (stack_pointer_rtx));
}
/* Emit code to restore REG using a POP insn. */
{
rtx reg = gen_rtx_REG (Pmode, regno);
rtx insn, mem;
-
+
mem = choose_baseaddr (cfa_offset);
mem = gen_frame_mem (Pmode, mem);
insn = emit_move_insn (reg, mem);
if (TARGET_VZEROUPPER
&& !TREE_THIS_VOLATILE (cfun->decl)
&& !cfun->machine->caller_return_avx256_p)
- emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
+ emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
if (crtl->args.pops_args && crtl->args.size)
{
}
call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
GEN_INT (UNITS_PER_WORD), constm1_rtx,
- NULL_RTX, 0);
+ NULL_RTX, false);
add_function_usage_to (call_insn, call_fusage);
/* In order to make call/return prediction work right, we now need
}
}
\f
+/* Determine if op is suitable SUBREG RTX for address. */
+
+static bool
+ix86_address_subreg_operand (rtx op)
+{
+ enum machine_mode mode;
+
+ if (!REG_P (op))
+ return false;
+
+ mode = GET_MODE (op);
+
+ if (GET_MODE_CLASS (mode) != MODE_INT)
+ return false;
+
+ /* Don't allow SUBREGs that span more than a word. It can lead to spill
+ failures when the register is one word out of a two word structure. */
+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ return false;
+
+ /* Allow only SUBREGs of non-eliminable hard registers. */
+ return register_no_elim_operand (op, mode);
+}
+
/* Extract the parts of an RTL expression that is a valid memory address
for an instruction. Return 0 if the structure of the address is
grossly off. Return -1 if the address contains ASHIFT, so it is not
int retval = 1;
enum ix86_address_seg seg = SEG_DEFAULT;
- if (REG_P (addr) || GET_CODE (addr) == SUBREG)
+ /* Allow zero-extended SImode addresses,
+ they will be emitted with addr32 prefix. */
+ if (TARGET_64BIT && GET_MODE (addr) == DImode)
+ {
+ if (GET_CODE (addr) == ZERO_EXTEND
+ && GET_MODE (XEXP (addr, 0)) == SImode)
+ addr = XEXP (addr, 0);
+ else if (GET_CODE (addr) == AND
+ && const_32bit_mask (XEXP (addr, 1), DImode))
+ {
+ addr = XEXP (addr, 0);
+
+ /* Strip subreg. */
+ if (GET_CODE (addr) == SUBREG
+ && GET_MODE (SUBREG_REG (addr)) == SImode)
+ addr = SUBREG_REG (addr);
+ }
+ }
+
+ if (REG_P (addr))
base = addr;
+ else if (GET_CODE (addr) == SUBREG)
+ {
+ if (ix86_address_subreg_operand (SUBREG_REG (addr)))
+ base = addr;
+ else
+ return 0;
+ }
else if (GET_CODE (addr) == PLUS)
{
rtx addends[4], op;
return 0;
break;
- case REG:
case SUBREG:
+ if (!ix86_address_subreg_operand (SUBREG_REG (op)))
+ return 0;
+ /* FALLTHRU */
+
+ case REG:
if (!base)
base = op;
else if (!index)
else
disp = addr; /* displacement */
+ if (index)
+ {
+ if (REG_P (index))
+ ;
+ else if (GET_CODE (index) == SUBREG
+ && ix86_address_subreg_operand (SUBREG_REG (index)))
+ ;
+ else
+ return 0;
+ }
+
/* Extract the integral value of scale. */
if (scale_rtx)
{
disp = parts.disp;
scale = parts.scale;
- /* Validate base register.
-
- Don't allow SUBREG's that span more than a word here. It can lead to spill
- failures when the base is one word out of a two word structure, which is
- represented internally as a DImode int. */
-
+ /* Validate base register. */
if (base)
{
rtx reg;
if (REG_P (base))
reg = base;
- else if (GET_CODE (base) == SUBREG
- && REG_P (SUBREG_REG (base))
- && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
- <= UNITS_PER_WORD)
- reg = SUBREG_REG (base);
+ else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
+ reg = SUBREG_REG (base);
else
/* Base is not a register. */
return false;
- if (GET_MODE (base) != Pmode)
- /* Base is not in Pmode. */
+ if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
return false;
if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
return false;
}
- /* Validate index register.
-
- Don't allow SUBREG's that span more than a word here -- same as above. */
-
+ /* Validate index register. */
if (index)
{
rtx reg;
if (REG_P (index))
reg = index;
- else if (GET_CODE (index) == SUBREG
- && REG_P (SUBREG_REG (index))
- && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
- <= UNITS_PER_WORD)
- reg = SUBREG_REG (index);
+ else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
+ reg = SUBREG_REG (index);
else
/* Index is not a register. */
return false;
- if (GET_MODE (index) != Pmode)
- /* Index is not in Pmode. */
+ if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
return false;
if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
return false;
}
+ /* Index and base should have the same mode. */
+ if (base && index
+ && GET_MODE (base) != GET_MODE (index))
+ return false;
+
/* Validate scale factor. */
if (scale != 1)
{
static rtx
get_thread_pointer (bool to_reg)
{
- rtx tp, reg, insn;
+ rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
- tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
- if (!to_reg)
- return tp;
+ if (GET_MODE (tp) != Pmode)
+ tp = convert_to_mode (Pmode, tp, 1);
- reg = gen_reg_rtx (Pmode);
- insn = gen_rtx_SET (VOIDmode, reg, tp);
- insn = emit_insn (insn);
+ if (to_reg)
+ tp = copy_addr_to_reg (tp);
- return reg;
+ return tp;
}
/* Construct the SYMBOL_REF for the tls_get_addr function. */
tp = get_thread_pointer (true);
dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
- set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
}
else
{
emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
tp = get_thread_pointer (true);
- set_unique_reg_note (get_last_insn (), REG_EQUIV,
+ set_unique_reg_note (get_last_insn (), REG_EQUAL,
gen_rtx_MINUS (Pmode, tmp, tp));
}
else
insns = get_insns ();
end_sequence ();
- /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+ /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
share the LD_BASE result with other LD model accesses. */
eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
UNSPEC_TLS_LD_BASE);
{
dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
- set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
}
break;
rtx temp = gen_reg_rtx (Pmode);
rtx val = force_operand (XEXP (x, 1), temp);
if (val != temp)
- emit_move_insn (temp, val);
+ {
+ if (GET_MODE (val) != Pmode)
+ val = convert_to_mode (Pmode, val, 1);
+ emit_move_insn (temp, val);
+ }
XEXP (x, 1) = temp;
return x;
rtx temp = gen_reg_rtx (Pmode);
rtx val = force_operand (XEXP (x, 0), temp);
if (val != temp)
- emit_move_insn (temp, val);
+ {
+ if (GET_MODE (val) != Pmode)
+ val = convert_to_mode (Pmode, val, 1);
+ emit_move_insn (temp, val);
+ }
XEXP (x, 0) = temp;
return x;
|| !MEM_P (orig_x))
return ix86_delegitimize_tls_address (orig_x);
x = XVECEXP (XEXP (x, 0), 0, 0);
- if (GET_MODE (orig_x) != Pmode)
+ if (GET_MODE (orig_x) != GET_MODE (x))
{
- x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
+ x = simplify_gen_subreg (GET_MODE (orig_x), x,
+ GET_MODE (x), 0);
if (x == NULL_RTX)
return orig_x;
}
d -- print duplicated register operand for AVX instruction.
D -- print condition for SSE cmp instruction.
P -- if PIC, print an @PLT suffix.
+ p -- print raw symbol name.
X -- don't print any sort of PIC '@' suffix for a symbol.
& -- print some in-use local-dynamic symbol name.
H -- print a memory address offset by 8; used for sse high-parts
case 'x':
case 'X':
case 'P':
+ case 'p':
break;
case 's':
x = const0_rtx;
}
- if (code != 'P')
+ if (code != 'P' && code != 'p')
{
if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
{
gcc_assert (ok);
+ if (parts.base && GET_CODE (parts.base) == SUBREG)
+ {
+ rtx tmp = SUBREG_REG (parts.base);
+ parts.base = simplify_subreg (GET_MODE (parts.base),
+ tmp, GET_MODE (tmp), 0);
+ }
+
+ if (parts.index && GET_CODE (parts.index) == SUBREG)
+ {
+ rtx tmp = SUBREG_REG (parts.index);
+ parts.index = simplify_subreg (GET_MODE (parts.index),
+ tmp, GET_MODE (tmp), 0);
+ }
+
base = parts.base;
index = parts.index;
disp = parts.disp;
}
else
{
+ int code = 0;
+
+ /* Print SImode registers for zero-extended addresses to force
+ addr32 prefix. Otherwise print DImode registers to avoid it. */
+ if (TARGET_64BIT)
+ code = ((GET_CODE (addr) == ZERO_EXTEND
+ || GET_CODE (addr) == AND)
+ ? 'l'
+ : 'q');
+
if (ASSEMBLER_DIALECT == ASM_ATT)
{
if (disp)
putc ('(', file);
if (base)
- print_reg (base, 0, file);
+ print_reg (base, code, file);
if (index)
{
putc (',', file);
- print_reg (index, 0, file);
+ print_reg (index, code, file);
if (scale != 1)
fprintf (file, ",%d", scale);
}
putc ('[', file);
if (base)
{
- print_reg (base, 0, file);
+ print_reg (base, code, file);
if (offset)
{
if (INTVAL (offset) >= 0)
if (index)
{
putc ('+', file);
- print_reg (index, 0, file);
+ print_reg (index, code, file);
if (scale != 1)
fprintf (file, "*%d", scale);
}
operand may be [SDX]Fmode. */
const char *
-output_fix_trunc (rtx insn, rtx *operands, int fisttp)
+output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
{
int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
int dimode_p = GET_MODE (operands[0]) == DImode;
should be used. UNORDERED_P is true when fucom should be used. */
const char *
-output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
+output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
{
int stack_top_dies;
rtx cmp_op0, cmp_op1;
const char *directive = ASM_LONG;
#ifdef ASM_QUAD
- if (TARGET_64BIT)
+ if (TARGET_LP64)
directive = ASM_QUAD;
#else
gcc_assert (!TARGET_64BIT);
op1 = force_operand (op1, op0);
if (op1 == op0)
return;
+ if (GET_MODE (op1) != mode)
+ op1 = convert_to_mode (mode, op1, 1);
}
else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op1))
op0, 1, OPTAB_DIRECT);
if (tmp == op0)
return;
+ if (GET_MODE (tmp) != mode)
+ op1 = convert_to_mode (mode, tmp, 1);
}
}
- if ((flag_pic || MACHOPIC_INDIRECT)
- && mode == Pmode && symbolic_operand (op1, Pmode))
+ if ((flag_pic || MACHOPIC_INDIRECT)
+ && symbolic_operand (op1, mode))
{
if (TARGET_MACHO && !TARGET_64BIT)
{
else
{
if (MEM_P (op0))
- op1 = force_reg (Pmode, op1);
- else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
+ op1 = force_reg (mode, op1);
+ else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
{
rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
op1 = legitimize_pic_address (op1, reg);
if (op0 == op1)
return;
+ if (GET_MODE (op1) != mode)
+ op1 = convert_to_mode (mode, op1, 1);
}
}
}
/* Source 1 cannot be a non-matching memory. */
if (MEM_P (src1) && !rtx_equal_p (dst, src1))
- {
- /* Support "andhi/andsi/anddi" as a zero-extending move. */
- return (code == AND
- && (mode == HImode
- || mode == SImode
- || (TARGET_64BIT && mode == DImode))
- && CONST_INT_P (src2)
- && (INTVAL (src2) == 0xff
- || INTVAL (src2) == 0xffff));
- }
+ /* Support "andhi/andsi/anddi" as a zero-extending move. */
+ return (code == AND
+ && (mode == HImode
+ || mode == SImode
+ || (TARGET_64BIT && mode == DImode))
+ && satisfies_constraint_L (src2));
return true;
}
insn = emit_move_insn (operands[1], tmp1);
else
{
- /* Need a new scratch register since the old one has result
+ /* Need a new scratch register since the old one has result
of 8bit divide. */
scratch = gen_reg_rtx (mode);
emit_move_insn (scratch, tmp1);
emit_label (end_label);
}
-#define LEA_SEARCH_THRESHOLD 12
+#define LEA_MAX_STALL (3)
+#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
+
+/* Increase given DISTANCE in half-cycles according to
+ dependencies between PREV and NEXT instructions.
+ Add 1 half-cycle if there is no dependency and
+ go to next cycle if there is some dependecy. */
+
+static unsigned int
+increase_distance (rtx prev, rtx next, unsigned int distance)
+{
+ df_ref *use_rec;
+ df_ref *def_rec;
+
+ if (!prev || !next)
+ return distance + (distance & 1) + 2;
+
+ if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
+ return distance + 1;
+
+ for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+ for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+ if (!DF_REF_IS_ARTIFICIAL (*def_rec)
+ && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
+ return distance + (distance & 1) + 2;
+
+ return distance + 1;
+}
+
+/* Function checks if instruction INSN defines register number
+ REGNO1 or REGNO2. */
+
+static bool
+insn_defines_reg (unsigned int regno1, unsigned int regno2,
+ rtx insn)
+{
+ df_ref *def_rec;
+
+ for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
+ if (DF_REF_REG_DEF_P (*def_rec)
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && (regno1 == DF_REF_REGNO (*def_rec)
+ || regno2 == DF_REF_REGNO (*def_rec)))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+/* Function checks if instruction INSN uses register number
+ REGNO as a part of address expression. */
+
+static bool
+insn_uses_reg_mem (unsigned int regno, rtx insn)
+{
+ df_ref *use_rec;
+
+ for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
+ if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
+ return true;
+
+ return false;
+}
+
+/* Search backward for non-agu definition of register number REGNO1
+ or register number REGNO2 in basic block starting from instruction
+ START up to head of basic block or instruction INSN.
+
+ Function puts true value into *FOUND var if definition was found
+ and false otherwise.
+
+ Distance in half-cycles between START and found instruction or head
+ of BB is added to DISTANCE and returned. */
+
+static int
+distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
+ rtx insn, int distance,
+ rtx start, bool *found)
+{
+ basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
+ rtx prev = start;
+ rtx next = NULL;
+ enum attr_type insn_type;
+
+ *found = false;
+
+ while (prev
+ && prev != insn
+ && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
+ {
+ distance = increase_distance (prev, next, distance);
+ if (insn_defines_reg (regno1, regno2, prev))
+ {
+ insn_type = get_attr_type (prev);
+ if (insn_type != TYPE_LEA)
+ {
+ *found = true;
+ return distance;
+ }
+ }
+
+ next = prev;
+ }
+ if (prev == BB_HEAD (bb))
+ break;
+
+ prev = PREV_INSN (prev);
+ }
+
+ return distance;
+}
/* Search backward for non-agu definition of register number REGNO1
or register number REGNO2 in INSN's basic block until
1. Pass LEA_SEARCH_THRESHOLD instructions, or
- 2. Reach BB boundary, or
+ 2. Reach neighbour BBs boundary, or
3. Reach agu definition.
Returns the distance between the non-agu definition point and INSN.
If no definition point, returns -1. */
{
basic_block bb = BLOCK_FOR_INSN (insn);
int distance = 0;
- df_ref *def_rec;
- enum attr_type insn_type;
+ bool found = false;
if (insn != BB_HEAD (bb))
- {
- rtx prev = PREV_INSN (insn);
- while (prev && distance < LEA_SEARCH_THRESHOLD)
- {
- if (NONDEBUG_INSN_P (prev))
- {
- distance++;
- for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
- if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
- && !DF_REF_IS_ARTIFICIAL (*def_rec)
- && (regno1 == DF_REF_REGNO (*def_rec)
- || regno2 == DF_REF_REGNO (*def_rec)))
- {
- insn_type = get_attr_type (prev);
- if (insn_type != TYPE_LEA)
- goto done;
- }
- }
- if (prev == BB_HEAD (bb))
- break;
- prev = PREV_INSN (prev);
- }
- }
+ distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
+ distance, PREV_INSN (insn),
+ &found);
- if (distance < LEA_SEARCH_THRESHOLD)
+ if (!found && distance < LEA_SEARCH_THRESHOLD)
{
edge e;
edge_iterator ei;
}
if (simple_loop)
+ distance = distance_non_agu_define_in_bb (regno1, regno2,
+ insn, distance,
+ BB_END (bb), &found);
+ else
{
- rtx prev = BB_END (bb);
- while (prev
- && prev != insn
- && distance < LEA_SEARCH_THRESHOLD)
+ int shortest_dist = -1;
+ bool found_in_bb = false;
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
{
- if (NONDEBUG_INSN_P (prev))
+ int bb_dist = distance_non_agu_define_in_bb (regno1, regno2,
+ insn, distance,
+ BB_END (e->src),
+ &found_in_bb);
+ if (found_in_bb)
{
- distance++;
- for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
- if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
- && !DF_REF_IS_ARTIFICIAL (*def_rec)
- && (regno1 == DF_REF_REGNO (*def_rec)
- || regno2 == DF_REF_REGNO (*def_rec)))
- {
- insn_type = get_attr_type (prev);
- if (insn_type != TYPE_LEA)
- goto done;
- }
+ if (shortest_dist < 0)
+ shortest_dist = bb_dist;
+ else if (bb_dist > 0)
+ shortest_dist = MIN (bb_dist, shortest_dist);
}
- prev = PREV_INSN (prev);
+
+ found = found || found_in_bb;
}
+
+ distance = shortest_dist;
}
}
- distance = -1;
-
-done:
/* get_attr_type may modify recog data. We want to make sure
that recog data is valid for instruction INSN, on which
distance_non_agu_define is called. INSN is unchanged here. */
extract_insn_cached (insn);
+
+ if (!found)
+ distance = -1;
+ else
+ distance = distance >> 1;
+
return distance;
}
-/* Return the distance between INSN and the next insn that uses
- register number REGNO0 in memory address. Return -1 if no such
- a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
+/* Return the distance in half-cycles between INSN and the next
+ insn that uses register number REGNO in memory address added
+ to DISTANCE. Return -1 if REGNO0 is set.
+
+ Put true value into *FOUND if register usage was found and
+ false otherwise.
+ Put true value into *REDEFINED if register redefinition was
+ found and false otherwise. */
static int
-distance_agu_use (unsigned int regno0, rtx insn)
+distance_agu_use_in_bb(unsigned int regno,
+ rtx insn, int distance, rtx start,
+ bool *found, bool *redefined)
{
- basic_block bb = BLOCK_FOR_INSN (insn);
- int distance = 0;
- df_ref *def_rec;
- df_ref *use_rec;
+ basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
+ rtx next = start;
+ rtx prev = NULL;
- if (insn != BB_END (bb))
+ *found = false;
+ *redefined = false;
+
+ while (next
+ && next != insn
+ && distance < LEA_SEARCH_THRESHOLD)
{
- rtx next = NEXT_INSN (insn);
- while (next && distance < LEA_SEARCH_THRESHOLD)
+ if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
{
- if (NONDEBUG_INSN_P (next))
+ distance = increase_distance(prev, next, distance);
+ if (insn_uses_reg_mem (regno, next))
{
- distance++;
-
- for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
- if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
- || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
- && regno0 == DF_REF_REGNO (*use_rec))
- {
- /* Return DISTANCE if OP0 is used in memory
- address in NEXT. */
- return distance;
- }
+ /* Return DISTANCE if OP0 is used in memory
+ address in NEXT. */
+ *found = true;
+ return distance;
+ }
- for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
- if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
- && !DF_REF_IS_ARTIFICIAL (*def_rec)
- && regno0 == DF_REF_REGNO (*def_rec))
- {
- /* Return -1 if OP0 is set in NEXT. */
- return -1;
- }
+ if (insn_defines_reg (regno, INVALID_REGNUM, next))
+ {
+ /* Return -1 if OP0 is set in NEXT. */
+ *redefined = true;
+ return -1;
}
- if (next == BB_END (bb))
- break;
- next = NEXT_INSN (next);
+
+ prev = next;
}
+
+ if (next == BB_END (bb))
+ break;
+
+ next = NEXT_INSN (next);
}
- if (distance < LEA_SEARCH_THRESHOLD)
+ return distance;
+}
+
+/* Return the distance between INSN and the next insn that uses
+ register number REGNO0 in memory address. Return -1 if no such
+ a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
+
+static int
+distance_agu_use (unsigned int regno0, rtx insn)
+{
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ int distance = 0;
+ bool found = false;
+ bool redefined = false;
+
+ if (insn != BB_END (bb))
+ distance = distance_agu_use_in_bb (regno0, insn, distance,
+ NEXT_INSN (insn),
+ &found, &redefined);
+
+ if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
{
edge e;
edge_iterator ei;
}
if (simple_loop)
+ distance = distance_agu_use_in_bb (regno0, insn,
+ distance, BB_HEAD (bb),
+ &found, &redefined);
+ else
{
- rtx next = BB_HEAD (bb);
- while (next
- && next != insn
- && distance < LEA_SEARCH_THRESHOLD)
+ int shortest_dist = -1;
+ bool found_in_bb = false;
+ bool redefined_in_bb = false;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
{
- if (NONDEBUG_INSN_P (next))
+ int bb_dist = distance_agu_use_in_bb (regno0, insn,
+ distance, BB_HEAD (e->dest),
+ &found_in_bb, &redefined_in_bb);
+ if (found_in_bb)
{
- distance++;
-
- for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
- if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
- || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
- && regno0 == DF_REF_REGNO (*use_rec))
- {
- /* Return DISTANCE if OP0 is used in memory
- address in NEXT. */
- return distance;
- }
-
- for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
- if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
- && !DF_REF_IS_ARTIFICIAL (*def_rec)
- && regno0 == DF_REF_REGNO (*def_rec))
- {
- /* Return -1 if OP0 is set in NEXT. */
- return -1;
- }
-
+ if (shortest_dist < 0)
+ shortest_dist = bb_dist;
+ else if (bb_dist > 0)
+ shortest_dist = MIN (bb_dist, shortest_dist);
}
- next = NEXT_INSN (next);
+
+ found = found || found_in_bb;
}
+
+ distance = shortest_dist;
}
}
- return -1;
+ if (!found || redefined)
+ distance = -1;
+ else
+ distance = distance >> 1;
+
+ return distance;
}
/* Define this macro to tune LEA priority vs ADD, it take effect when
Negative value: ADD is more preferred than LEA
Zero: Netrual
Positive value: LEA is more preferred than ADD*/
-#define IX86_LEA_PRIORITY 2
+#define IX86_LEA_PRIORITY 0
+
+/* Return true if usage of lea INSN has performance advantage
+ over a sequence of instructions. Instructions sequence has
+ SPLIT_COST cycles higher latency than lea latency. */
+
+bool
+ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
+ unsigned int regno2, unsigned int split_cost)
+{
+ int dist_define, dist_use;
+
+ dist_define = distance_non_agu_define (regno1, regno2, insn);
+ dist_use = distance_agu_use (regno0, insn);
+
+ if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
+ {
+ /* If there is no non AGU operand definition, no AGU
+ operand usage and split cost is 0 then both lea
+ and non lea variants have same priority. Currently
+ we prefer lea for 64 bit code and non lea on 32 bit
+ code. */
+ if (dist_use < 0 && split_cost == 0)
+ return TARGET_64BIT || IX86_LEA_PRIORITY;
+ else
+ return true;
+ }
+
+ /* With longer definitions distance lea is more preferable.
+ Here we change it to take into account splitting cost and
+ lea priority. */
+ dist_define += split_cost + IX86_LEA_PRIORITY;
+
+ /* If there is no use in memory addess then we just check
+ that split cost does not exceed AGU stall. */
+ if (dist_use < 0)
+ return dist_define >= LEA_MAX_STALL;
+
+ /* If this insn has both backward non-agu dependence and forward
+ agu dependence, the one with short distance takes effect. */
+ return dist_define >= dist_use;
+}
+
+/* Return true if it is legal to clobber flags by INSN and
+ false otherwise. */
+
+static bool
+ix86_ok_to_clobber_flags(rtx insn)
+{
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ df_ref *use;
+ bitmap live;
+
+ while (insn)
+ {
+ if (NONDEBUG_INSN_P (insn))
+ {
+ for (use = DF_INSN_USES (insn); *use; use++)
+ if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
+ return false;
+
+ if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
+ return true;
+ }
+
+ if (insn == BB_END (bb))
+ break;
+
+ insn = NEXT_INSN (insn);
+ }
+
+ live = df_get_live_out(bb);
+ return !REGNO_REG_SET_P (live, FLAGS_REG);
+}
+
+/* Return true if we need to split op0 = op1 + op2 into a sequence of
+ move and add to avoid AGU stalls. */
+
+bool
+ix86_avoid_lea_for_add (rtx insn, rtx operands[])
+{
+ unsigned int regno0 = true_regnum (operands[0]);
+ unsigned int regno1 = true_regnum (operands[1]);
+ unsigned int regno2 = true_regnum (operands[2]);
+
+ /* Check if we need to optimize. */
+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+ return false;
+
+ /* Check it is correct to split here. */
+ if (!ix86_ok_to_clobber_flags(insn))
+ return false;
+
+ /* We need to split only adds with non destructive
+ destination operand. */
+ if (regno0 == regno1 || regno0 == regno2)
+ return false;
+ else
+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
+}
+
+/* Return true if we need to split lea into a sequence of
+ instructions to avoid AGU stalls. */
+
+bool
+ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
+{
+ unsigned int regno0 = true_regnum (operands[0]) ;
+ unsigned int regno1 = -1;
+ unsigned int regno2 = -1;
+ unsigned int split_cost = 0;
+ struct ix86_address parts;
+ int ok;
+
+ /* Check we need to optimize. */
+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+ return false;
+
+ /* Check it is correct to split here. */
+ if (!ix86_ok_to_clobber_flags(insn))
+ return false;
+
+ ok = ix86_decompose_address (operands[1], &parts);
+ gcc_assert (ok);
+
+ /* We should not split into add if non legitimate pic
+ operand is used as displacement. */
+ if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
+ return false;
+
+ if (parts.base)
+ regno1 = true_regnum (parts.base);
+ if (parts.index)
+ regno2 = true_regnum (parts.index);
+
+ /* Compute how many cycles we will add to execution time
+ if split lea into a sequence of instructions. */
+ if (parts.base || parts.index)
+ {
+ /* Have to use mov instruction if non desctructive
+ destination form is used. */
+ if (regno1 != regno0 && regno2 != regno0)
+ split_cost += 1;
+
+ /* Have to add index to base if both exist. */
+ if (parts.base && parts.index)
+ split_cost += 1;
+
+ /* Have to use shift and adds if scale is 2 or greater. */
+ if (parts.scale > 1)
+ {
+ if (regno0 != regno1)
+ split_cost += 1;
+ else if (regno2 == regno0)
+ split_cost += 4;
+ else
+ split_cost += parts.scale;
+ }
+
+ /* Have to use add instruction with immediate if
+ disp is non zero. */
+ if (parts.disp && parts.disp != const0_rtx)
+ split_cost += 1;
+
+ /* Subtract the price of lea. */
+ split_cost -= 1;
+ }
+
+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
+}
+
+/* Split lea instructions into a sequence of instructions
+ which are executed on ALU to avoid AGU stalls.
+ It is assumed that it is allowed to clobber flags register
+ at lea position. */
+
+extern void
+ix86_split_lea_for_addr (rtx operands[], enum machine_mode mode)
+{
+ unsigned int regno0 = true_regnum (operands[0]) ;
+ unsigned int regno1 = INVALID_REGNUM;
+ unsigned int regno2 = INVALID_REGNUM;
+ struct ix86_address parts;
+ rtx tmp, clob;
+ rtvec par;
+ int ok, adds;
+
+ ok = ix86_decompose_address (operands[1], &parts);
+ gcc_assert (ok);
+
+ if (parts.base)
+ {
+ if (GET_MODE (parts.base) != mode)
+ parts.base = gen_rtx_SUBREG (mode, parts.base, 0);
+ regno1 = true_regnum (parts.base);
+ }
+
+ if (parts.index)
+ {
+ if (GET_MODE (parts.index) != mode)
+ parts.index = gen_rtx_SUBREG (mode, parts.index, 0);
+ regno2 = true_regnum (parts.index);
+ }
+
+ if (parts.scale > 1)
+ {
+ /* Case r1 = r1 + ... */
+ if (regno1 == regno0)
+ {
+ /* If we have a case r1 = r1 + C * r1 then we
+ should use multiplication which is very
+ expensive. Assume cost model is wrong if we
+ have such case here. */
+ gcc_assert (regno2 != regno0);
+
+ for (adds = parts.scale; adds > 0; adds--)
+ {
+ tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
+ tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
+ clob = gen_rtx_CLOBBER (VOIDmode,
+ gen_rtx_REG (CCmode, FLAGS_REG));
+ par = gen_rtvec (2, tmp, clob);
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
+ }
+ }
+ else
+ {
+ /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
+ if (regno0 != regno2)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
+
+ /* Use shift for scaling. */
+ tmp = gen_rtx_ASHIFT (mode, operands[0],
+ GEN_INT (exact_log2 (parts.scale)));
+ tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ par = gen_rtvec (2, tmp, clob);
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
+
+ if (parts.base)
+ {
+ tmp = gen_rtx_PLUS (mode, operands[0], parts.base);
+ tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ par = gen_rtvec (2, tmp, clob);
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
+ }
+
+ if (parts.disp && parts.disp != const0_rtx)
+ {
+ tmp = gen_rtx_PLUS (mode, operands[0], parts.disp);
+ tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ par = gen_rtvec (2, tmp, clob);
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
+ }
+ }
+ }
+ else if (!parts.base && !parts.index)
+ {
+ gcc_assert(parts.disp);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.disp));
+ }
+ else
+ {
+ if (!parts.base)
+ {
+ if (regno0 != regno2)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.index));
+ }
+ else if (!parts.index)
+ {
+ if (regno0 != regno1)
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
+ }
+ else
+ {
+ if (regno0 == regno1)
+ tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
+ else if (regno0 == regno2)
+ tmp = gen_rtx_PLUS (mode, operands[0], parts.base);
+ else
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], parts.base));
+ tmp = gen_rtx_PLUS (mode, operands[0], parts.index);
+ }
+
+ tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ par = gen_rtvec (2, tmp, clob);
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
+ }
+
+ if (parts.disp && parts.disp != const0_rtx)
+ {
+ tmp = gen_rtx_PLUS (mode, operands[0], parts.disp);
+ tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ par = gen_rtvec (2, tmp, clob);
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
+ }
+ }
+}
/* Return true if it is ok to optimize an ADD operation to LEA
operation to avoid flag register consumation. For most processors,
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
- else
- {
- int dist_define, dist_use;
-
- /* Return false if REGNO0 isn't used in memory address. */
- dist_use = distance_agu_use (regno0, insn);
- if (dist_use <= 0)
- return false;
-
- dist_define = distance_non_agu_define (regno1, regno2, insn);
- if (dist_define <= 0)
- return true;
-
- /* If this insn has both backward non-agu dependence and forward
- agu dependence, the one with short distance take effect. */
- if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
- return false;
- return true;
- }
+ return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
}
/* Return true if destination reg of SET_BODY is shift count of
rtx
ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
{
+ int i, n_elt;
rtvec v;
+ enum machine_mode scalar_mode;
+
switch (mode)
{
case V4SImode:
- gcc_assert (vect);
- v = gen_rtvec (4, value, value, value, value);
- return gen_rtx_CONST_VECTOR (V4SImode, v);
-
case V2DImode:
gcc_assert (vect);
- v = gen_rtvec (2, value, value);
- return gen_rtx_CONST_VECTOR (V2DImode, v);
-
case V8SFmode:
- if (vect)
- v = gen_rtvec (8, value, value, value, value,
- value, value, value, value);
- else
- v = gen_rtvec (8, value, CONST0_RTX (SFmode),
- CONST0_RTX (SFmode), CONST0_RTX (SFmode),
- CONST0_RTX (SFmode), CONST0_RTX (SFmode),
- CONST0_RTX (SFmode), CONST0_RTX (SFmode));
- return gen_rtx_CONST_VECTOR (V8SFmode, v);
-
case V4SFmode:
- if (vect)
- v = gen_rtvec (4, value, value, value, value);
- else
- v = gen_rtvec (4, value, CONST0_RTX (SFmode),
- CONST0_RTX (SFmode), CONST0_RTX (SFmode));
- return gen_rtx_CONST_VECTOR (V4SFmode, v);
-
case V4DFmode:
- if (vect)
- v = gen_rtvec (4, value, value, value, value);
- else
- v = gen_rtvec (4, value, CONST0_RTX (DFmode),
- CONST0_RTX (DFmode), CONST0_RTX (DFmode));
- return gen_rtx_CONST_VECTOR (V4DFmode, v);
-
case V2DFmode:
- if (vect)
- v = gen_rtvec (2, value, value);
- else
- v = gen_rtvec (2, value, CONST0_RTX (DFmode));
- return gen_rtx_CONST_VECTOR (V2DFmode, v);
+ n_elt = GET_MODE_NUNITS (mode);
+ v = rtvec_alloc (n_elt);
+ scalar_mode = GET_MODE_INNER (mode);
+
+ RTVEC_ELT (v, 0) = value;
+
+ for (i = 1; i < n_elt; ++i)
+ RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
+
+ return gen_rtx_CONST_VECTOR (mode, v);
default:
gcc_unreachable ();
{
rtx tmp;
+ /* AVX supports all the needed comparisons, no need to swap arguments
+ nor help reload. */
+ if (TARGET_AVX)
+ return code;
+
switch (code)
{
case LTGT:
rtx op_true, rtx op_false)
{
enum machine_mode mode = GET_MODE (dest);
+ enum machine_mode cmp_mode = GET_MODE (cmp_op0);
rtx x;
- cmp_op0 = force_reg (mode, cmp_op0);
- if (!nonimmediate_operand (cmp_op1, mode))
- cmp_op1 = force_reg (mode, cmp_op1);
+ cmp_op0 = force_reg (cmp_mode, cmp_op0);
+ if (!nonimmediate_operand (cmp_op1, cmp_mode))
+ cmp_op1 = force_reg (cmp_mode, cmp_op1);
if (optimize
|| reg_overlap_mentioned_p (dest, op_true)
|| reg_overlap_mentioned_p (dest, op_false))
dest = gen_reg_rtx (mode);
- x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
- emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+ x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
+ if (cmp_mode != mode)
+ {
+ x = force_reg (cmp_mode, x);
+ convert_move (dest, x, false);
+ }
+ else
+ emit_insn (gen_rtx_SET (VOIDmode, dest, x));
return dest;
}
code = ix86_prepare_sse_fp_compare_args (operands[0], code,
&operands[4], &operands[5]);
if (code == UNKNOWN)
- return false;
+ {
+ rtx temp;
+ switch (GET_CODE (operands[3]))
+ {
+ case LTGT:
+ temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
+ operands[5], operands[0], operands[0]);
+ cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
+ operands[5], operands[1], operands[2]);
+ code = AND;
+ break;
+ case UNEQ:
+ temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
+ operands[5], operands[0], operands[0]);
+ cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
+ operands[5], operands[1], operands[2]);
+ code = IOR;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
+ OPTAB_DIRECT);
+ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
+ return true;
+ }
if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
operands[5], operands[1], operands[2]))
rtx destexp;
rtx srcexp;
rtx countreg;
+ HOST_WIDE_INT rounded_count;
/* If the size is known, it is shorter to use rep movs. */
if (mode == QImode && CONST_INT_P (count)
}
if (CONST_INT_P (count))
{
- count = GEN_INT (INTVAL (count)
+ rounded_count = (INTVAL (count)
& ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
destmem = shallow_copy_rtx (destmem);
srcmem = shallow_copy_rtx (srcmem);
- set_mem_size (destmem, count);
- set_mem_size (srcmem, count);
+ set_mem_size (destmem, rounded_count);
+ set_mem_size (srcmem, rounded_count);
}
else
{
- if (MEM_SIZE (destmem))
- set_mem_size (destmem, NULL_RTX);
- if (MEM_SIZE (srcmem))
- set_mem_size (srcmem, NULL_RTX);
+ if (MEM_SIZE_KNOWN_P (destmem))
+ clear_mem_size (destmem);
+ if (MEM_SIZE_KNOWN_P (srcmem))
+ clear_mem_size (srcmem);
}
emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
destexp, srcexp));
{
rtx destexp;
rtx countreg;
+ HOST_WIDE_INT rounded_count;
if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
if (orig_value == const0_rtx && CONST_INT_P (count))
{
- count = GEN_INT (INTVAL (count)
+ rounded_count = (INTVAL (count)
& ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
destmem = shallow_copy_rtx (destmem);
- set_mem_size (destmem, count);
+ set_mem_size (destmem, rounded_count);
}
- else if (MEM_SIZE (destmem))
- set_mem_size (destmem, NULL_RTX);
+ else if (MEM_SIZE_KNOWN_P (destmem))
+ clear_mem_size (destmem);
emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
}
int desired_align, int align_bytes)
{
rtx src = *srcp;
- rtx src_size, dst_size;
+ rtx orig_dst = dst;
+ rtx orig_src = src;
int off = 0;
int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
if (src_align_bytes >= 0)
src_align_bytes = desired_align - src_align_bytes;
- src_size = MEM_SIZE (src);
- dst_size = MEM_SIZE (dst);
if (align_bytes & 1)
{
dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
set_mem_align (src, src_align * BITS_PER_UNIT);
}
- if (dst_size)
- set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
- if (src_size)
- set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
+ if (MEM_SIZE_KNOWN_P (orig_dst))
+ set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
+ if (MEM_SIZE_KNOWN_P (orig_src))
+ set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
*srcp = src;
return dst;
}
int desired_align, int align_bytes)
{
int off = 0;
- rtx dst_size = MEM_SIZE (dst);
+ rtx orig_dst = dst;
if (align_bytes & 1)
{
dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
set_mem_align (dst, desired_align * BITS_PER_UNIT);
- if (dst_size)
- set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
+ if (MEM_SIZE_KNOWN_P (orig_dst))
+ set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
return dst;
}
rtx
ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
rtx callarg2,
- rtx pop, int sibcall)
+ rtx pop, bool sibcall)
{
+ /* We need to represent that SI and DI registers are clobbered
+ by SYSV calls. */
+ static int clobbered_registers[] = {
+ XMM6_REG, XMM7_REG, XMM8_REG,
+ XMM9_REG, XMM10_REG, XMM11_REG,
+ XMM12_REG, XMM13_REG, XMM14_REG,
+ XMM15_REG, SI_REG, DI_REG
+ };
+ rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
rtx use = NULL, call;
+ unsigned int vec_len;
if (pop == const0_rtx)
pop = NULL;
? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
: !call_insn_operand (XEXP (fnaddr, 0), Pmode))
{
- fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
- fnaddr = gen_rtx_MEM (QImode, fnaddr);
+ fnaddr = XEXP (fnaddr, 0);
+ if (GET_MODE (fnaddr) != Pmode)
+ fnaddr = convert_to_mode (Pmode, fnaddr, 1);
+ fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
}
+ vec_len = 0;
call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
if (retval)
call = gen_rtx_SET (VOIDmode, retval, call);
+ vec[vec_len++] = call;
+
if (pop)
{
pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
- call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
+ vec[vec_len++] = pop;
}
+
if (TARGET_64BIT_MS_ABI
&& (!callarg2 || INTVAL (callarg2) != -2))
{
- /* We need to represent that SI and DI registers are clobbered
- by SYSV calls. */
- static int clobbered_registers[] = {
- XMM6_REG, XMM7_REG, XMM8_REG,
- XMM9_REG, XMM10_REG, XMM11_REG,
- XMM12_REG, XMM13_REG, XMM14_REG,
- XMM15_REG, SI_REG, DI_REG
- };
- unsigned int i;
- rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
- rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
- UNSPEC_MS_TO_SYSV_CALL);
+ unsigned i;
- vec[0] = call;
- vec[1] = unspec;
- for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
- vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
- ? TImode : DImode,
- gen_rtx_REG
- (SSE_REGNO_P (clobbered_registers[i])
- ? TImode : DImode,
- clobbered_registers[i]));
+ vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_MS_TO_SYSV_CALL);
- call = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
- + 2, vec));
+ for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
+ vec[vec_len++]
+ = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ clobbered_registers[i]));
}
/* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
if (TARGET_VZEROUPPER)
{
- rtx unspec;
int avx256;
-
if (cfun->machine->callee_pass_avx256_p)
{
if (cfun->machine->callee_return_avx256_p)
if (reload_completed)
emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
else
- {
- unspec = gen_rtx_UNSPEC (VOIDmode,
- gen_rtvec (1, GEN_INT (avx256)),
- UNSPEC_CALL_NEEDS_VZEROUPPER);
- call = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (2, call, unspec));
- }
+ vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
+ gen_rtvec (1, GEN_INT (avx256)),
+ UNSPEC_CALL_NEEDS_VZEROUPPER);
}
+ if (vec_len > 1)
+ call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
call = emit_call_insn (call);
if (use)
CALL_INSN_FUNCTION_USAGE (call) = use;
void
ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
{
- rtx call = XVECEXP (PATTERN (insn), 0, 0);
+ rtx pat = PATTERN (insn);
+ rtvec vec = XVEC (pat, 0);
+ int len = GET_NUM_ELEM (vec) - 1;
+
+ /* Strip off the last entry of the parallel. */
+ gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
+ gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
+ if (len == 1)
+ pat = RTVEC_ELT (vec, 0);
+ else
+ pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
+
emit_insn (gen_avx_vzeroupper (vzeroupper));
- emit_call_insn (call);
+ emit_call_insn (pat);
}
/* Output the assembly for a call instruction. */
for (s = ix86_stack_locals; s; s = s->next)
if (s->mode == mode && s->n == n)
- return copy_rtx (s->rtl);
+ return validize_mem (copy_rtx (s->rtl));
s = ggc_alloc_stack_local_entry ();
s->n = n;
s->next = ix86_stack_locals;
ix86_stack_locals = s;
- return s->rtl;
+ return validize_mem (s->rtl);
}
\f
-/* Calculate the length of the memory address in the instruction
- encoding. Does not include the one-byte modrm, opcode, or prefix. */
+/* Calculate the length of the memory address in the instruction encoding.
+ Includes addr32 prefix, does not include the one-byte modrm, opcode,
+ or other prefixes. */
int
memory_address_length (rtx addr)
base = parts.base;
index = parts.index;
disp = parts.disp;
- len = 0;
+
+ /* Add length of addr32 prefix. */
+ len = (GET_CODE (addr) == ZERO_EXTEND
+ || GET_CODE (addr) == AND);
/* Rule of thumb:
- esp as the base always wants an index,
/* Compute default value for "length_immediate" attribute. When SHORTFORM
is set, expect that insn have 8bit immediate alternative. */
int
-ix86_attr_length_immediate_default (rtx insn, int shortform)
+ix86_attr_length_immediate_default (rtx insn, bool shortform)
{
int len = 0;
int i;
2 or 3 byte VEX prefix and 1 opcode byte. */
int
-ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
- int has_vex_w)
+ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
{
int i;
case PROCESSOR_GENERIC32:
case PROCESSOR_GENERIC64:
case PROCESSOR_BDVER1:
+ case PROCESSOR_BDVER2:
case PROCESSOR_BTVER1:
return 3;
}
}
-/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
+/* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
by DEP_INSN and nothing set by DEP_INSN. */
-static int
+static bool
ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
{
rtx set, set2;
&& insn_type != TYPE_ICMOV
&& insn_type != TYPE_FCMOV
&& insn_type != TYPE_IBR)
- return 0;
+ return false;
if ((set = single_set (dep_insn)) != 0)
{
set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
}
else
- return 0;
+ return false;
if (!REG_P (set) || REGNO (set) != FLAGS_REG)
- return 0;
+ return false;
/* This test is true if the dependent insn reads the flags but
not any other potentially set register. */
if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
- return 0;
+ return false;
if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
- return 0;
+ return false;
- return 1;
+ return true;
}
/* Return true iff USE_INSN has a memory address with operands set by
case PROCESSOR_K8:
case PROCESSOR_AMDFAM10:
case PROCESSOR_BDVER1:
+ case PROCESSOR_BDVER2:
case PROCESSOR_BTVER1:
case PROCESSOR_ATOM:
case PROCESSOR_GENERIC32:
ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
{
rtx mem, fnaddr;
+ int opcode;
+ int offset = 0;
fnaddr = XEXP (DECL_RTL (fndecl), 0);
- if (!TARGET_64BIT)
- {
- rtx disp, chain;
- int opcode;
-
- /* Depending on the static chain location, either load a register
- with a constant, or push the constant to the stack. All of the
- instructions are the same size. */
- chain = ix86_static_chain (fndecl, true);
- if (REG_P (chain))
- {
- if (REGNO (chain) == CX_REG)
- opcode = 0xb9;
- else if (REGNO (chain) == AX_REG)
- opcode = 0xb8;
- else
- gcc_unreachable ();
- }
- else
- opcode = 0x68;
-
- mem = adjust_address (m_tramp, QImode, 0);
- emit_move_insn (mem, gen_int_mode (opcode, QImode));
-
- mem = adjust_address (m_tramp, SImode, 1);
- emit_move_insn (mem, chain_value);
-
- /* Compute offset from the end of the jmp to the target function.
- In the case in which the trampoline stores the static chain on
- the stack, we need to skip the first insn which pushes the
- (call-saved) register static chain; this push is 1 byte. */
- disp = expand_binop (SImode, sub_optab, fnaddr,
- plus_constant (XEXP (m_tramp, 0),
- MEM_P (chain) ? 9 : 10),
- NULL_RTX, 1, OPTAB_DIRECT);
-
- mem = adjust_address (m_tramp, QImode, 5);
- emit_move_insn (mem, gen_int_mode (0xe9, QImode));
-
- mem = adjust_address (m_tramp, SImode, 6);
- emit_move_insn (mem, disp);
- }
- else
+ if (TARGET_64BIT)
{
- int offset = 0;
+ int size;
/* Load the function address to r11. Try to load address using
the shorter movl instead of movabs. We may want to support
offset += 10;
}
- /* Load static chain using movabs to r10. */
+ /* Load static chain using movabs to r10. Use the
+ shorter movl instead of movabs for x32. */
+ if (TARGET_X32)
+ {
+ opcode = 0xba41;
+ size = 6;
+ }
+ else
+ {
+ opcode = 0xba49;
+ size = 10;
+ }
+
mem = adjust_address (m_tramp, HImode, offset);
- emit_move_insn (mem, gen_int_mode (0xba49, HImode));
+ emit_move_insn (mem, gen_int_mode (opcode, HImode));
- mem = adjust_address (m_tramp, DImode, offset + 2);
+ mem = adjust_address (m_tramp, ptr_mode, offset + 2);
emit_move_insn (mem, chain_value);
- offset += 10;
+ offset += size;
/* Jump to r11; the last (unused) byte is a nop, only there to
pad the write out to a single 32-bit store. */
mem = adjust_address (m_tramp, SImode, offset);
emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
offset += 4;
+ }
+ else
+ {
+ rtx disp, chain;
+
+ /* Depending on the static chain location, either load a register
+ with a constant, or push the constant to the stack. All of the
+ instructions are the same size. */
+ chain = ix86_static_chain (fndecl, true);
+ if (REG_P (chain))
+ {
+ switch (REGNO (chain))
+ {
+ case AX_REG:
+ opcode = 0xb8; break;
+ case CX_REG:
+ opcode = 0xb9; break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ else
+ opcode = 0x68;
- gcc_assert (offset <= TRAMPOLINE_SIZE);
+ mem = adjust_address (m_tramp, QImode, offset);
+ emit_move_insn (mem, gen_int_mode (opcode, QImode));
+
+ mem = adjust_address (m_tramp, SImode, offset + 1);
+ emit_move_insn (mem, chain_value);
+ offset += 5;
+
+ mem = adjust_address (m_tramp, QImode, offset);
+ emit_move_insn (mem, gen_int_mode (0xe9, QImode));
+
+ mem = adjust_address (m_tramp, SImode, offset + 1);
+
+ /* Compute offset from the end of the jmp to the target function.
+ In the case in which the trampoline stores the static chain on
+ the stack, we need to skip the first insn which pushes the
+ (call-saved) register static chain; this push is 1 byte. */
+ offset += 5;
+ disp = expand_binop (SImode, sub_optab, fnaddr,
+ plus_constant (XEXP (m_tramp, 0),
+ offset - (MEM_P (chain) ? 1 : 0)),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ emit_move_insn (mem, disp);
}
-#ifdef ENABLE_EXECUTE_STACK
+ gcc_assert (offset <= TRAMPOLINE_SIZE);
+
+#ifdef HAVE_ENABLE_EXECUTE_STACK
#ifdef CHECK_EXECUTE_STACK_ENABLED
if (CHECK_EXECUTE_STACK_ENABLED)
#endif
IX86_BUILTIN_CLFLUSH,
IX86_BUILTIN_MFENCE,
IX86_BUILTIN_LFENCE,
+ IX86_BUILTIN_PAUSE,
IX86_BUILTIN_BSRSI,
IX86_BUILTIN_BSRDI,
IX86_BUILTIN_CEILPD,
IX86_BUILTIN_TRUNCPD,
IX86_BUILTIN_RINTPD,
+ IX86_BUILTIN_ROUNDPD_AZ,
IX86_BUILTIN_FLOORPS,
IX86_BUILTIN_CEILPS,
IX86_BUILTIN_TRUNCPS,
IX86_BUILTIN_RINTPS,
+ IX86_BUILTIN_ROUNDPS_AZ,
IX86_BUILTIN_PTESTZ,
IX86_BUILTIN_PTESTC,
IX86_BUILTIN_CEILPD256,
IX86_BUILTIN_TRUNCPD256,
IX86_BUILTIN_RINTPD256,
+ IX86_BUILTIN_ROUNDPD_AZ256,
IX86_BUILTIN_FLOORPS256,
IX86_BUILTIN_CEILPS256,
IX86_BUILTIN_TRUNCPS256,
IX86_BUILTIN_RINTPS256,
+ IX86_BUILTIN_ROUNDPS_AZ256,
IX86_BUILTIN_UNPCKHPD256,
IX86_BUILTIN_UNPCKLPD256,
IX86_BUILTIN_MOVMSKPD256,
IX86_BUILTIN_MOVMSKPS256,
+ /* AVX2 */
+ IX86_BUILTIN_MPSADBW256,
+ IX86_BUILTIN_PABSB256,
+ IX86_BUILTIN_PABSW256,
+ IX86_BUILTIN_PABSD256,
+ IX86_BUILTIN_PACKSSDW256,
+ IX86_BUILTIN_PACKSSWB256,
+ IX86_BUILTIN_PACKUSDW256,
+ IX86_BUILTIN_PACKUSWB256,
+ IX86_BUILTIN_PADDB256,
+ IX86_BUILTIN_PADDW256,
+ IX86_BUILTIN_PADDD256,
+ IX86_BUILTIN_PADDQ256,
+ IX86_BUILTIN_PADDSB256,
+ IX86_BUILTIN_PADDSW256,
+ IX86_BUILTIN_PADDUSB256,
+ IX86_BUILTIN_PADDUSW256,
+ IX86_BUILTIN_PALIGNR256,
+ IX86_BUILTIN_AND256I,
+ IX86_BUILTIN_ANDNOT256I,
+ IX86_BUILTIN_PAVGB256,
+ IX86_BUILTIN_PAVGW256,
+ IX86_BUILTIN_PBLENDVB256,
+ IX86_BUILTIN_PBLENDVW256,
+ IX86_BUILTIN_PCMPEQB256,
+ IX86_BUILTIN_PCMPEQW256,
+ IX86_BUILTIN_PCMPEQD256,
+ IX86_BUILTIN_PCMPEQQ256,
+ IX86_BUILTIN_PCMPGTB256,
+ IX86_BUILTIN_PCMPGTW256,
+ IX86_BUILTIN_PCMPGTD256,
+ IX86_BUILTIN_PCMPGTQ256,
+ IX86_BUILTIN_PHADDW256,
+ IX86_BUILTIN_PHADDD256,
+ IX86_BUILTIN_PHADDSW256,
+ IX86_BUILTIN_PHSUBW256,
+ IX86_BUILTIN_PHSUBD256,
+ IX86_BUILTIN_PHSUBSW256,
+ IX86_BUILTIN_PMADDUBSW256,
+ IX86_BUILTIN_PMADDWD256,
+ IX86_BUILTIN_PMAXSB256,
+ IX86_BUILTIN_PMAXSW256,
+ IX86_BUILTIN_PMAXSD256,
+ IX86_BUILTIN_PMAXUB256,
+ IX86_BUILTIN_PMAXUW256,
+ IX86_BUILTIN_PMAXUD256,
+ IX86_BUILTIN_PMINSB256,
+ IX86_BUILTIN_PMINSW256,
+ IX86_BUILTIN_PMINSD256,
+ IX86_BUILTIN_PMINUB256,
+ IX86_BUILTIN_PMINUW256,
+ IX86_BUILTIN_PMINUD256,
+ IX86_BUILTIN_PMOVMSKB256,
+ IX86_BUILTIN_PMOVSXBW256,
+ IX86_BUILTIN_PMOVSXBD256,
+ IX86_BUILTIN_PMOVSXBQ256,
+ IX86_BUILTIN_PMOVSXWD256,
+ IX86_BUILTIN_PMOVSXWQ256,
+ IX86_BUILTIN_PMOVSXDQ256,
+ IX86_BUILTIN_PMOVZXBW256,
+ IX86_BUILTIN_PMOVZXBD256,
+ IX86_BUILTIN_PMOVZXBQ256,
+ IX86_BUILTIN_PMOVZXWD256,
+ IX86_BUILTIN_PMOVZXWQ256,
+ IX86_BUILTIN_PMOVZXDQ256,
+ IX86_BUILTIN_PMULDQ256,
+ IX86_BUILTIN_PMULHRSW256,
+ IX86_BUILTIN_PMULHUW256,
+ IX86_BUILTIN_PMULHW256,
+ IX86_BUILTIN_PMULLW256,
+ IX86_BUILTIN_PMULLD256,
+ IX86_BUILTIN_PMULUDQ256,
+ IX86_BUILTIN_POR256,
+ IX86_BUILTIN_PSADBW256,
+ IX86_BUILTIN_PSHUFB256,
+ IX86_BUILTIN_PSHUFD256,
+ IX86_BUILTIN_PSHUFHW256,
+ IX86_BUILTIN_PSHUFLW256,
+ IX86_BUILTIN_PSIGNB256,
+ IX86_BUILTIN_PSIGNW256,
+ IX86_BUILTIN_PSIGND256,
+ IX86_BUILTIN_PSLLDQI256,
+ IX86_BUILTIN_PSLLWI256,
+ IX86_BUILTIN_PSLLW256,
+ IX86_BUILTIN_PSLLDI256,
+ IX86_BUILTIN_PSLLD256,
+ IX86_BUILTIN_PSLLQI256,
+ IX86_BUILTIN_PSLLQ256,
+ IX86_BUILTIN_PSRAWI256,
+ IX86_BUILTIN_PSRAW256,
+ IX86_BUILTIN_PSRADI256,
+ IX86_BUILTIN_PSRAD256,
+ IX86_BUILTIN_PSRLDQI256,
+ IX86_BUILTIN_PSRLWI256,
+ IX86_BUILTIN_PSRLW256,
+ IX86_BUILTIN_PSRLDI256,
+ IX86_BUILTIN_PSRLD256,
+ IX86_BUILTIN_PSRLQI256,
+ IX86_BUILTIN_PSRLQ256,
+ IX86_BUILTIN_PSUBB256,
+ IX86_BUILTIN_PSUBW256,
+ IX86_BUILTIN_PSUBD256,
+ IX86_BUILTIN_PSUBQ256,
+ IX86_BUILTIN_PSUBSB256,
+ IX86_BUILTIN_PSUBSW256,
+ IX86_BUILTIN_PSUBUSB256,
+ IX86_BUILTIN_PSUBUSW256,
+ IX86_BUILTIN_PUNPCKHBW256,
+ IX86_BUILTIN_PUNPCKHWD256,
+ IX86_BUILTIN_PUNPCKHDQ256,
+ IX86_BUILTIN_PUNPCKHQDQ256,
+ IX86_BUILTIN_PUNPCKLBW256,
+ IX86_BUILTIN_PUNPCKLWD256,
+ IX86_BUILTIN_PUNPCKLDQ256,
+ IX86_BUILTIN_PUNPCKLQDQ256,
+ IX86_BUILTIN_PXOR256,
+ IX86_BUILTIN_MOVNTDQA256,
+ IX86_BUILTIN_VBROADCASTSS_PS,
+ IX86_BUILTIN_VBROADCASTSS_PS256,
+ IX86_BUILTIN_VBROADCASTSD_PD256,
+ IX86_BUILTIN_VBROADCASTSI256,
+ IX86_BUILTIN_PBLENDD256,
+ IX86_BUILTIN_PBLENDD128,
+ IX86_BUILTIN_PBROADCASTB256,
+ IX86_BUILTIN_PBROADCASTW256,
+ IX86_BUILTIN_PBROADCASTD256,
+ IX86_BUILTIN_PBROADCASTQ256,
+ IX86_BUILTIN_PBROADCASTB128,
+ IX86_BUILTIN_PBROADCASTW128,
+ IX86_BUILTIN_PBROADCASTD128,
+ IX86_BUILTIN_PBROADCASTQ128,
+ IX86_BUILTIN_VPERMVARSI256,
+ IX86_BUILTIN_VPERMDF256,
+ IX86_BUILTIN_VPERMVARSF256,
+ IX86_BUILTIN_VPERMDI256,
+ IX86_BUILTIN_VPERMTI256,
+ IX86_BUILTIN_VEXTRACT128I256,
+ IX86_BUILTIN_VINSERT128I256,
+ IX86_BUILTIN_MASKLOADD,
+ IX86_BUILTIN_MASKLOADQ,
+ IX86_BUILTIN_MASKLOADD256,
+ IX86_BUILTIN_MASKLOADQ256,
+ IX86_BUILTIN_MASKSTORED,
+ IX86_BUILTIN_MASKSTOREQ,
+ IX86_BUILTIN_MASKSTORED256,
+ IX86_BUILTIN_MASKSTOREQ256,
+ IX86_BUILTIN_PSLLVV4DI,
+ IX86_BUILTIN_PSLLVV2DI,
+ IX86_BUILTIN_PSLLVV8SI,
+ IX86_BUILTIN_PSLLVV4SI,
+ IX86_BUILTIN_PSRAVV8SI,
+ IX86_BUILTIN_PSRAVV4SI,
+ IX86_BUILTIN_PSRLVV4DI,
+ IX86_BUILTIN_PSRLVV2DI,
+ IX86_BUILTIN_PSRLVV8SI,
+ IX86_BUILTIN_PSRLVV4SI,
+
+ IX86_BUILTIN_GATHERSIV2DF,
+ IX86_BUILTIN_GATHERSIV4DF,
+ IX86_BUILTIN_GATHERDIV2DF,
+ IX86_BUILTIN_GATHERDIV4DF,
+ IX86_BUILTIN_GATHERSIV4SF,
+ IX86_BUILTIN_GATHERSIV8SF,
+ IX86_BUILTIN_GATHERDIV4SF,
+ IX86_BUILTIN_GATHERDIV8SF,
+ IX86_BUILTIN_GATHERSIV2DI,
+ IX86_BUILTIN_GATHERSIV4DI,
+ IX86_BUILTIN_GATHERDIV2DI,
+ IX86_BUILTIN_GATHERDIV4DI,
+ IX86_BUILTIN_GATHERSIV4SI,
+ IX86_BUILTIN_GATHERSIV8SI,
+ IX86_BUILTIN_GATHERDIV4SI,
+ IX86_BUILTIN_GATHERDIV8SI,
+
/* TFmode support builtins. */
IX86_BUILTIN_INFQ,
IX86_BUILTIN_HUGE_VALQ,
IX86_BUILTIN_VEC_PERM_V4DF,
IX86_BUILTIN_VEC_PERM_V8SF,
- /* FMA4 and XOP instructions. */
+ /* FMA4 instructions. */
IX86_BUILTIN_VFMADDSS,
IX86_BUILTIN_VFMADDSD,
IX86_BUILTIN_VFMADDPS,
IX86_BUILTIN_VFMADDSUBPS256,
IX86_BUILTIN_VFMADDSUBPD256,
+ /* FMA3 instructions. */
+ IX86_BUILTIN_VFMADDSS3,
+ IX86_BUILTIN_VFMADDSD3,
+
+ /* XOP instructions. */
IX86_BUILTIN_VPCMOV,
IX86_BUILTIN_VPCMOV_V2DI,
IX86_BUILTIN_VPCMOV_V4SI,
IX86_BUILTIN_BEXTRI32,
IX86_BUILTIN_BEXTRI64,
+ /* BMI2 instructions. */
+ IX86_BUILTIN_BZHI32,
+ IX86_BUILTIN_BZHI64,
+ IX86_BUILTIN_PDEP32,
+ IX86_BUILTIN_PDEP64,
+ IX86_BUILTIN_PEXT32,
+ IX86_BUILTIN_PEXT64,
/* FSGSBASE instructions. */
IX86_BUILTIN_RDFSBASE32,
struct builtin_isa {
const char *name; /* function name */
enum ix86_builtin_func_type tcode; /* type to use in the declaration */
- int isa; /* isa_flags this builtin is defined for */
+ HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
bool const_p; /* true if the declaration is constant */
bool set_and_not_built_p;
};
errors if a builtin is added in the middle of a function scope. */
static inline tree
-def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
+def_builtin (HOST_WIDE_INT mask, const char *name,
+ enum ix86_builtin_func_type tcode,
enum ix86_builtins code)
{
tree decl = NULL_TREE;
/* Like def_builtin, but also marks the function decl "const". */
static inline tree
-def_builtin_const (int mask, const char *name,
+def_builtin_const (HOST_WIDE_INT mask, const char *name,
enum ix86_builtin_func_type tcode, enum ix86_builtins code)
{
tree decl = def_builtin (mask, name, tcode, code);
declarations to the tree, even if we didn't use them. */
static void
-ix86_add_new_builtins (int isa)
+ix86_add_new_builtins (HOST_WIDE_INT isa)
{
int i;
struct builtin_description
{
- const unsigned int mask;
+ const HOST_WIDE_INT mask;
const enum insn_code icode;
const char *const name;
const enum ix86_builtins code;
{
{ ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
{ ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
+ { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
/* MMX */
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
+ /* AVX2 */
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
+
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
{ OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
+
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
- { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
+ /* AVX2 */
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv4di, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlqv4di3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrqv4di3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+
+ { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
/* BMI */
{ OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
{ OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
{ OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
{ OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
+
+ /* BMI2 */
+ { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+ { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+ { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+ { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+ { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+ { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
};
/* FMA4 and XOP. */
"__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
+ "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
+ UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
+ "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
+ UNKNOWN, (int)MULTI_ARG_3_DF },
+
{ OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
"__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
UNKNOWN, (int)MULTI_ARG_3_SF },
"__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
IX86_BUILTIN_RDRAND64_STEP);
+ /* AVX2 */
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
+ V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
+ IX86_BUILTIN_GATHERSIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
+ IX86_BUILTIN_GATHERSIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
+ V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
+ IX86_BUILTIN_GATHERDIV2DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
+ IX86_BUILTIN_GATHERDIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
+ V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
+ IX86_BUILTIN_GATHERSIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
+ V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
+ IX86_BUILTIN_GATHERSIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
+ V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
+ IX86_BUILTIN_GATHERDIV4SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
+ V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
+ IX86_BUILTIN_GATHERDIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
+ V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
+ IX86_BUILTIN_GATHERSIV2DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
+ V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
+ IX86_BUILTIN_GATHERSIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
+ V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
+ IX86_BUILTIN_GATHERDIV2DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
+ V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
+ IX86_BUILTIN_GATHERDIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
+ V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
+ IX86_BUILTIN_GATHERSIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
+ V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
+ IX86_BUILTIN_GATHERSIV8SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
+ V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
+ IX86_BUILTIN_GATHERDIV4SI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
+ V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
+ IX86_BUILTIN_GATHERDIV8SI);
+
/* MMX access to the vec_init patterns. */
def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
ix86_init_mmx_sse_builtins ();
- if (TARGET_64BIT)
+ if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
#ifdef SUBTARGET_INIT_BUILTINS
int adjust = (comparison_p) ? 1 : 0;
enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
- if (last_arg_constant && i == nargs-1)
+ if (last_arg_constant && i == nargs - 1)
{
- if (!CONST_INT_P (op))
+ if (!insn_data[icode].operand[i + 1].predicate (op, mode))
{
- error ("last argument must be an immediate");
- return gen_reg_rtx (tmode);
+ enum insn_code new_icode = icode;
+ switch (icode)
+ {
+ case CODE_FOR_xop_vpermil2v2df3:
+ case CODE_FOR_xop_vpermil2v4sf3:
+ case CODE_FOR_xop_vpermil2v4df3:
+ case CODE_FOR_xop_vpermil2v8sf3:
+ error ("the last argument must be a 2-bit immediate");
+ return gen_reg_rtx (tmode);
+ case CODE_FOR_xop_rotlv2di3:
+ new_icode = CODE_FOR_rotlv2di3;
+ goto xop_rotl;
+ case CODE_FOR_xop_rotlv4si3:
+ new_icode = CODE_FOR_rotlv4si3;
+ goto xop_rotl;
+ case CODE_FOR_xop_rotlv8hi3:
+ new_icode = CODE_FOR_rotlv8hi3;
+ goto xop_rotl;
+ case CODE_FOR_xop_rotlv16qi3:
+ new_icode = CODE_FOR_rotlv16qi3;
+ xop_rotl:
+ if (CONST_INT_P (op))
+ {
+ int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
+ op = GEN_INT (INTVAL (op) & mask);
+ gcc_checking_assert
+ (insn_data[icode].operand[i + 1].predicate (op, mode));
+ }
+ else
+ {
+ gcc_checking_assert
+ (nargs == 2
+ && insn_data[new_icode].operand[0].mode == tmode
+ && insn_data[new_icode].operand[1].mode == tmode
+ && insn_data[new_icode].operand[2].mode == mode
+ && insn_data[new_icode].operand[0].predicate
+ == insn_data[icode].operand[0].predicate
+ && insn_data[new_icode].operand[1].predicate
+ == insn_data[icode].operand[1].predicate);
+ icode = new_icode;
+ goto non_constant;
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
}
}
else
{
+ non_constant:
if (VECTOR_MODE_P (mode))
op = safe_vector_operand (op, mode);
if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
{
- error ("the fifth argument must be a 8-bit immediate");
+ error ("the fifth argument must be an 8-bit immediate");
return const0_rtx;
}
if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
{
- error ("the third argument must be a 8-bit immediate");
+ error ("the third argument must be an 8-bit immediate");
return const0_rtx;
}
case INT_FTYPE_V4DF:
case INT_FTYPE_V4SF:
case INT_FTYPE_V2DF:
+ case INT_FTYPE_V32QI:
case V16QI_FTYPE_V16QI:
case V8SI_FTYPE_V8SF:
case V8SI_FTYPE_V4SI:
case V2SI_FTYPE_V2DF:
case V2SF_FTYPE_V2SF:
case V2SF_FTYPE_V2SI:
+ case V32QI_FTYPE_V32QI:
+ case V32QI_FTYPE_V16QI:
+ case V16HI_FTYPE_V16HI:
+ case V16HI_FTYPE_V8HI:
+ case V8SI_FTYPE_V8SI:
+ case V16HI_FTYPE_V16QI:
+ case V8SI_FTYPE_V16QI:
+ case V4DI_FTYPE_V16QI:
+ case V8SI_FTYPE_V8HI:
+ case V4DI_FTYPE_V8HI:
+ case V4DI_FTYPE_V4SI:
+ case V4DI_FTYPE_V2DI:
nargs = 1;
break;
case V4SF_FTYPE_V4SF_VEC_MERGE:
case V1DI_FTYPE_V1DI_V1DI:
case V1DI_FTYPE_V8QI_V8QI:
case V1DI_FTYPE_V2SI_V2SI:
+ case V32QI_FTYPE_V16HI_V16HI:
+ case V16HI_FTYPE_V8SI_V8SI:
+ case V32QI_FTYPE_V32QI_V32QI:
+ case V16HI_FTYPE_V32QI_V32QI:
+ case V16HI_FTYPE_V16HI_V16HI:
+ case V8SI_FTYPE_V8SI_V8SI:
+ case V8SI_FTYPE_V16HI_V16HI:
+ case V4DI_FTYPE_V4DI_V4DI:
+ case V4DI_FTYPE_V8SI_V8SI:
if (comparison == UNKNOWN)
return ix86_expand_binop_builtin (icode, exp, target);
nargs = 2;
nargs = 2;
swap = true;
break;
+ case V16HI_FTYPE_V16HI_V8HI_COUNT:
+ case V16HI_FTYPE_V16HI_SI_COUNT:
+ case V8SI_FTYPE_V8SI_V4SI_COUNT:
+ case V8SI_FTYPE_V8SI_SI_COUNT:
+ case V4DI_FTYPE_V4DI_V2DI_COUNT:
+ case V4DI_FTYPE_V4DI_INT_COUNT:
case V8HI_FTYPE_V8HI_V8HI_COUNT:
case V8HI_FTYPE_V8HI_SI_COUNT:
case V4SI_FTYPE_V4SI_V4SI_COUNT:
case V2DI_FTYPE_V2DI_INT:
case V2DF_FTYPE_V2DF_INT:
case V2DF_FTYPE_V4DF_INT:
+ case V16HI_FTYPE_V16HI_INT:
+ case V8SI_FTYPE_V8SI_INT:
+ case V4DI_FTYPE_V4DI_INT:
+ case V2DI_FTYPE_V4DI_INT:
nargs = 2;
nargs_constant = 1;
break;
case V4DF_FTYPE_V4DF_V4DF_V4DF:
case V4SF_FTYPE_V4SF_V4SF_V4SF:
case V2DF_FTYPE_V2DF_V2DF_V2DF:
+ case V32QI_FTYPE_V32QI_V32QI_V32QI:
nargs = 3;
break;
+ case V32QI_FTYPE_V32QI_V32QI_INT:
+ case V16HI_FTYPE_V16HI_V16HI_INT:
case V16QI_FTYPE_V16QI_V16QI_INT:
+ case V4DI_FTYPE_V4DI_V4DI_INT:
case V8HI_FTYPE_V8HI_V8HI_INT:
case V8SI_FTYPE_V8SI_V8SI_INT:
case V8SI_FTYPE_V8SI_V4SI_INT:
case V4DF_FTYPE_V4DF_V2DF_INT:
case V4SF_FTYPE_V4SF_V4SF_INT:
case V2DI_FTYPE_V2DI_V2DI_INT:
+ case V4DI_FTYPE_V4DI_V2DI_INT:
case V2DF_FTYPE_V2DF_V2DF_INT:
nargs = 3;
nargs_constant = 1;
break;
+ case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
+ nargs = 3;
+ rmode = V4DImode;
+ nargs_constant = 1;
+ break;
case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
nargs = 3;
rmode = V2DImode;
if (!match)
switch (icode)
{
+ case CODE_FOR_avx2_inserti128:
+ case CODE_FOR_avx2_extracti128:
+ error ("the last argument must be an 1-bit immediate");
+ return const0_rtx;
+
case CODE_FOR_sse4_1_roundpd:
case CODE_FOR_sse4_1_roundps:
case CODE_FOR_sse4_1_roundsd:
break;
case UINT64_FTYPE_PUNSIGNED:
case V2DI_FTYPE_PV2DI:
+ case V4DI_FTYPE_PV4DI:
case V32QI_FTYPE_PCCHAR:
case V16QI_FTYPE_PCCHAR:
case V8SF_FTYPE_PCV4SF:
case V4DF_FTYPE_PCV4DF_V4DI:
case V4SF_FTYPE_PCV4SF_V4SI:
case V2DF_FTYPE_PCV2DF_V2DI:
+ case V8SI_FTYPE_PCV8SI_V8SI:
+ case V4DI_FTYPE_PCV4DI_V4DI:
+ case V4SI_FTYPE_PCV4SI_V4SI:
+ case V2DI_FTYPE_PCV2DI_V2DI:
nargs = 2;
klass = load;
memory = 0;
case VOID_FTYPE_PV4DF_V4DI_V4DF:
case VOID_FTYPE_PV4SF_V4SI_V4SF:
case VOID_FTYPE_PV2DF_V2DI_V2DF:
+ case VOID_FTYPE_PV8SI_V8SI_V8SI:
+ case VOID_FTYPE_PV4DI_V4DI_V4DI:
+ case VOID_FTYPE_PV4SI_V4SI_V4SI:
+ case VOID_FTYPE_PV2DI_V2DI_V2DI:
nargs = 2;
klass = store;
/* Reserve memory operand for target. */
op = expand_normal (arg);
gcc_assert (target == 0);
if (memory)
- target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+ {
+ if (GET_MODE (op) != Pmode)
+ op = convert_to_mode (Pmode, op, 1);
+ target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
+ }
else
target = force_reg (tmode, op);
arg_adjust = 1;
if (i == memory)
{
/* This must be the memory operand. */
- op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
+ if (GET_MODE (op) != Pmode)
+ op = convert_to_mode (Pmode, op, 1);
+ op = gen_rtx_MEM (mode, force_reg (Pmode, op));
gcc_assert (GET_MODE (op) == mode
|| GET_MODE (op) == VOIDmode);
}
size_t i;
enum insn_code icode;
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
- tree arg0, arg1, arg2;
- rtx op0, op1, op2, pat;
- enum machine_mode mode0, mode1, mode2;
+ tree arg0, arg1, arg2, arg3, arg4;
+ rtx op0, op1, op2, op3, op4, pat;
+ enum machine_mode mode0, mode1, mode2, mode3, mode4;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
/* Determine whether the builtin function is available under the current ISA.
mode1 = insn_data[icode].operand[1].mode;
mode2 = insn_data[icode].operand[2].mode;
- op0 = force_reg (Pmode, op0);
- op0 = gen_rtx_MEM (mode1, op0);
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
if (!insn_data[icode].operand[0].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
op0 = expand_normal (arg0);
icode = CODE_FOR_sse2_clflush;
if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = copy_to_mode_reg (Pmode, op0);
+ {
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = force_reg (Pmode, op0);
+ }
emit_insn (gen_sse2_clflush (op0));
return 0;
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
if (!REG_P (op0))
- op0 = copy_to_mode_reg (Pmode, op0);
+ {
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = force_reg (Pmode, op0);
+ }
if (!REG_P (op1))
op1 = copy_to_mode_reg (SImode, op1);
if (!REG_P (op2))
op0 = expand_normal (arg0);
icode = CODE_FOR_lwp_llwpcb;
if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = copy_to_mode_reg (Pmode, op0);
+ {
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = force_reg (Pmode, op0);
+ }
emit_insn (gen_lwp_llwpcb (op0));
return 0;
op0 = gen_reg_rtx (mode0);
emit_insn (GEN_FCN (icode) (op0));
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op1 = expand_normal (arg0);
+ if (!address_operand (op1, VOIDmode))
+ {
+ op1 = convert_memory_address (Pmode, op1);
+ op1 = copy_addr_to_reg (op1);
+ }
+ emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
+
op1 = gen_reg_rtx (SImode);
emit_move_insn (op1, CONST1_RTX (SImode));
else
op2 = gen_rtx_SUBREG (SImode, op0, 0);
+ if (target == 0)
+ target = gen_reg_rtx (SImode);
+
pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
const0_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, op1,
+ emit_insn (gen_rtx_SET (VOIDmode, target,
gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
- emit_move_insn (target, op1);
+ return target;
+ case IX86_BUILTIN_GATHERSIV2DF:
+ icode = CODE_FOR_avx2_gathersiv2df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV4DF:
+ icode = CODE_FOR_avx2_gathersiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV2DF:
+ icode = CODE_FOR_avx2_gatherdiv2df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV4DF:
+ icode = CODE_FOR_avx2_gatherdiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV4SF:
+ icode = CODE_FOR_avx2_gathersiv4sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV8SF:
+ icode = CODE_FOR_avx2_gathersiv8sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV4SF:
+ icode = CODE_FOR_avx2_gatherdiv4sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV8SF:
+ icode = CODE_FOR_avx2_gatherdiv4sf256;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV2DI:
+ icode = CODE_FOR_avx2_gathersiv2di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV4DI:
+ icode = CODE_FOR_avx2_gathersiv4di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV2DI:
+ icode = CODE_FOR_avx2_gatherdiv2di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV4DI:
+ icode = CODE_FOR_avx2_gatherdiv4di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV4SI:
+ icode = CODE_FOR_avx2_gathersiv4si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERSIV8SI:
+ icode = CODE_FOR_avx2_gathersiv8si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV4SI:
+ icode = CODE_FOR_avx2_gatherdiv4si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERDIV8SI:
+ icode = CODE_FOR_avx2_gatherdiv4si256;
+
+ gather_gen:
arg0 = CALL_EXPR_ARG (exp, 0);
- op1 = expand_normal (arg0);
- if (!address_operand (op1, VOIDmode))
- op1 = copy_addr_to_reg (op1);
- emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ arg3 = CALL_EXPR_ARG (exp, 3);
+ arg4 = CALL_EXPR_ARG (exp, 4);
+ op0 = expand_normal (arg0);
+ op1 = expand_normal (arg1);
+ op2 = expand_normal (arg2);
+ op3 = expand_normal (arg3);
+ op4 = expand_normal (arg4);
+ /* Note the arg order is different from the operand order. */
+ mode0 = insn_data[icode].operand[1].mode;
+ mode1 = insn_data[icode].operand[2].mode;
+ mode2 = insn_data[icode].operand[3].mode;
+ mode3 = insn_data[icode].operand[4].mode;
+ mode4 = insn_data[icode].operand[5].mode;
+
+ if (target == NULL_RTX)
+ target = gen_reg_rtx (insn_data[icode].operand[0].mode);
+
+ /* Force memory operand only with base register here. But we
+ don't want to do it on memory operand for other builtin
+ functions. */
+ if (GET_MODE (op1) != Pmode)
+ op1 = convert_to_mode (Pmode, op1, 1);
+ op1 = force_reg (Pmode, op1);
+ op1 = gen_rtx_MEM (mode1, op1);
+
+ if (!insn_data[icode].operand[1].predicate (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (!insn_data[icode].operand[2].predicate (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+ if (!insn_data[icode].operand[3].predicate (op2, mode2))
+ op2 = copy_to_mode_reg (mode2, op2);
+ if (!insn_data[icode].operand[4].predicate (op3, mode3))
+ op3 = copy_to_mode_reg (mode3, op3);
+ if (!insn_data[icode].operand[5].predicate (op4, mode4))
+ {
+ error ("last argument must be scale 1, 2, 4, 8");
+ return const0_rtx;
+ }
+ pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4);
+ if (! pat)
+ return const0_rtx;
+ emit_insn (pat);
return target;
default:
}
break;
+ case BUILT_IN_ROUND:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math || !TARGET_ROUND)
+ break;
+
+ if (out_mode == DFmode && in_mode == DFmode)
+ {
+ if (out_n == 2 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
+ else if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
+ }
+ break;
+
+ case BUILT_IN_ROUNDF:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math || !TARGET_ROUND)
+ break;
+
+ if (out_mode == SFmode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
+ }
+ break;
+
case BUILT_IN_FMA:
if (out_mode == DFmode && in_mode == DFmode)
{
static reg_class_t
ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
- enum machine_mode mode,
- secondary_reload_info *sri ATTRIBUTE_UNUSED)
+ enum machine_mode mode, secondary_reload_info *sri)
{
+ /* Double-word spills from general registers to non-offsettable memory
+ references (zero-extended addresses) require special handling. */
+ if (TARGET_64BIT
+ && MEM_P (x)
+ && GET_MODE_SIZE (mode) > UNITS_PER_WORD
+ && rclass == GENERAL_REGS
+ && !offsettable_memref_p (x))
+ {
+ sri->icode = (in_p
+ ? CODE_FOR_reload_noff_load
+ : CODE_FOR_reload_noff_store);
+ /* Add the cost of moving address to a temporary. */
+ sri->extra_cost = 1;
+
+ return NO_REGS;
+ }
+
/* QImode spills from non-QI registers require
intermediate register on 32bit targets. */
if (!TARGET_64BIT
/* This condition handles corner case where an expression involving
pointers gets vectorized. We're trying to use the address of a
- stack slot as a vector initializer.
+ stack slot as a vector initializer.
(set (reg:V2DI 74 [ vect_cst_.2 ])
(vec_duplicate:V2DI (reg/f:DI 20 frame)))
return inline_secondary_memory_needed (class1, class2, mode, strict);
}
+/* Implement the TARGET_CLASS_MAX_NREGS hook.
+
+ On the 80386, this is the size of MODE in words,
+ except in the FP regs, where a single reg is always enough. */
+
+static unsigned char
+ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
+{
+ if (MAYBE_INTEGER_CLASS_P (rclass))
+ {
+ if (mode == XFmode)
+ return (TARGET_64BIT ? 2 : 3);
+ else if (mode == XCmode)
+ return (TARGET_64BIT ? 4 : 6);
+ else
+ return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+ }
+ else
+ {
+ if (COMPLEX_MODE_P (mode))
+ return 2;
+ else
+ return 1;
+ }
+}
+
/* Return true if the registers in CLASS cannot represent the change from
modes FROM to TO. */
/* In case of copying from general_purpose_register we may emit multiple
stores followed by single load causing memory size mismatch stall.
Count this as arbitrarily high cost of 20. */
- if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
+ if (targetm.class_max_nregs (class1, mode)
+ > targetm.class_max_nregs (class2, mode))
cost += 20;
/* In the case of FP/MMX moves, the registers actually overlap, and we
scanned. In either case, *TOTAL contains the cost result. */
static bool
-ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
+ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
+ bool speed)
{
enum rtx_code outer_code = (enum rtx_code) outer_code_i;
enum machine_mode mode = GET_MODE (x);
/* ??? SSE scalar/vector cost should be used here. */
/* ??? Bald assumption that fma has the same cost as fmul. */
*total = cost->fmul;
- *total += rtx_cost (XEXP (x, 1), FMA, speed);
+ *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
/* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
sub = XEXP (x, 0);
if (GET_CODE (sub) == NEG)
- sub = XEXP (x, 0);
- *total += rtx_cost (sub, FMA, speed);
+ sub = XEXP (sub, 0);
+ *total += rtx_cost (sub, FMA, 0, speed);
sub = XEXP (x, 2);
if (GET_CODE (sub) == NEG)
- sub = XEXP (x, 0);
- *total += rtx_cost (sub, FMA, speed);
+ sub = XEXP (sub, 0);
+ *total += rtx_cost (sub, FMA, 2, speed);
return true;
}
*total = (cost->mult_init[MODE_INDEX (mode)]
+ nbits * cost->mult_bit
- + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
+ + rtx_cost (op0, outer_code, opno, speed)
+ + rtx_cost (op1, outer_code, opno, speed));
return true;
}
if (val == 2 || val == 4 || val == 8)
{
*total = cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1),
+ outer_code, opno, speed);
*total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
- outer_code, speed);
- *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+ outer_code, opno, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
return true;
}
}
if (val == 2 || val == 4 || val == 8)
{
*total = cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
- *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0),
+ outer_code, opno, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
return true;
}
}
else if (GET_CODE (XEXP (x, 0)) == PLUS)
{
*total = cost->lea;
- *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
- *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
- *total += rtx_cost (XEXP (x, 1), outer_code, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0),
+ outer_code, opno, speed);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1),
+ outer_code, opno, speed);
+ *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
return true;
}
}
if (!TARGET_64BIT && mode == DImode)
{
*total = (cost->add * 2
- + (rtx_cost (XEXP (x, 0), outer_code, speed)
+ + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
<< (GET_MODE (XEXP (x, 0)) != DImode))
- + (rtx_cost (XEXP (x, 1), outer_code, speed)
+ + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
<< (GET_MODE (XEXP (x, 1)) != DImode)));
return true;
}
/* This kind of construct is implemented using test[bwl].
Treat it as if we had an AND. */
*total = (cost->add
- + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
- + rtx_cost (const1_rtx, outer_code, speed));
+ + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
+ + rtx_cost (const1_rtx, outer_code, opno, speed));
return true;
}
return false;
if (MACHOPIC_ATT_STUB)
switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
else if (MACHOPIC_PURE)
- {
- if (TARGET_DEEP_BRANCH_PREDICTION)
- switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
- else
- switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
- }
+ switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
else
switch_to_section (darwin_sections[machopic_symbol_stub_section]);
else if (MACHOPIC_PURE)
{
/* PIC stub. */
- if (TARGET_DEEP_BRANCH_PREDICTION)
- {
- /* 25-byte PIC stub using "CALL get_pc_thunk". */
- rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
- output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
- fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label);
- }
- else
- {
- /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */
- fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label);
- fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label);
- }
+ /* 25-byte PIC stub using "CALL get_pc_thunk". */
+ rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
+ output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
+ fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
+ label, lazy_ptr_name, label);
fprintf (file, "\tjmp\t*%%ecx\n");
}
else
compatibility with existing dylibs. */
if (MACHOPIC_PURE)
{
- /* PIC stubs. */
- if (TARGET_DEEP_BRANCH_PREDICTION)
- /* 25-byte PIC stub using "CALL get_pc_thunk". */
- switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
- else
- /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */
- switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
+ /* 25-byte PIC stub using "CALL get_pc_thunk". */
+ switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
}
else
/* 16-byte -mdynamic-no-pic stub. */
*no_add_attrs = true;
return NULL_TREE;
}
- if (!TARGET_64BIT)
- {
- warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
- name);
- *no_add_attrs = true;
- return NULL_TREE;
- }
/* Can combine regparm with all attributes but fastcall. */
if (is_attribute_p ("ms_abi", name))
tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
HOST_WIDE_INT vcall_offset, tree function)
{
- rtx xops[3];
rtx this_param = x86_this_parameter (function);
- rtx this_reg, tmp;
+ rtx this_reg, tmp, fnaddr;
- /* Make sure unwind info is emitted for the thunk if needed. */
- final_start_function (emit_barrier (), file, 1);
+ emit_note (NOTE_INSN_PROLOGUE_END);
/* If VCALL_OFFSET, we'll need THIS in a register. Might as well
pull it in now and let DELTA benefit. */
else if (vcall_offset)
{
/* Put the this parameter into %eax. */
- xops[0] = this_param;
- xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
- output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
+ this_reg = gen_rtx_REG (Pmode, AX_REG);
+ emit_move_insn (this_reg, this_param);
}
else
this_reg = NULL_RTX;
/* Adjust the this parameter by a fixed constant. */
if (delta)
{
- xops[0] = GEN_INT (delta);
- xops[1] = this_reg ? this_reg : this_param;
+ rtx delta_rtx = GEN_INT (delta);
+ rtx delta_dst = this_reg ? this_reg : this_param;
+
if (TARGET_64BIT)
{
- if (!x86_64_general_operand (xops[0], DImode))
+ if (!x86_64_general_operand (delta_rtx, Pmode))
{
- tmp = gen_rtx_REG (DImode, R10_REG);
- xops[1] = tmp;
- output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
- xops[0] = tmp;
- xops[1] = this_param;
+ tmp = gen_rtx_REG (Pmode, R10_REG);
+ emit_move_insn (tmp, delta_rtx);
+ delta_rtx = tmp;
}
- if (x86_maybe_negate_const_int (&xops[0], DImode))
- output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
}
- else if (x86_maybe_negate_const_int (&xops[0], SImode))
- output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
+
+ emit_insn (ix86_gen_add3 (delta_dst, delta_dst, delta_rtx));
}
/* Adjust the this parameter by a value stored in the vtable. */
if (vcall_offset)
{
+ rtx vcall_addr, vcall_mem, this_mem;
+ unsigned int tmp_regno;
+
if (TARGET_64BIT)
- tmp = gen_rtx_REG (DImode, R10_REG);
+ tmp_regno = R10_REG;
else
{
- int tmp_regno = CX_REG;
unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
tmp_regno = AX_REG;
- tmp = gen_rtx_REG (SImode, tmp_regno);
+ else
+ tmp_regno = CX_REG;
}
+ tmp = gen_rtx_REG (Pmode, tmp_regno);
- xops[0] = gen_rtx_MEM (Pmode, this_reg);
- xops[1] = tmp;
- output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
+ this_mem = gen_rtx_MEM (ptr_mode, this_reg);
+ if (Pmode != ptr_mode)
+ this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
+ emit_move_insn (tmp, this_mem);
/* Adjust the this parameter. */
- xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
- if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
+ vcall_addr = plus_constant (tmp, vcall_offset);
+ if (TARGET_64BIT
+ && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
{
- rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
- xops[0] = GEN_INT (vcall_offset);
- xops[1] = tmp2;
- output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
- xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
+ rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
+ emit_move_insn (tmp2, GEN_INT (vcall_offset));
+ vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
}
- xops[1] = this_reg;
- output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
+
+ vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
+ if (Pmode != ptr_mode)
+ emit_insn (gen_addsi_1_zext (this_reg,
+ gen_rtx_REG (ptr_mode,
+ REGNO (this_reg)),
+ vcall_mem));
+ else
+ emit_insn (ix86_gen_add3 (this_reg, this_reg, vcall_mem));
}
/* If necessary, drop THIS back to its stack slot. */
if (this_reg && this_reg != this_param)
- {
- xops[0] = this_reg;
- xops[1] = this_param;
- output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
- }
+ emit_move_insn (this_param, this_reg);
- xops[0] = XEXP (DECL_RTL (function), 0);
+ fnaddr = XEXP (DECL_RTL (function), 0);
if (TARGET_64BIT)
{
if (!flag_pic || targetm.binds_local_p (function)
- || DEFAULT_ABI == MS_ABI)
- output_asm_insn ("jmp\t%P0", xops);
- /* All thunks should be in the same object as their target,
- and thus binds_local_p should be true. */
- else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
- gcc_unreachable ();
+ || cfun->machine->call_abi == MS_ABI)
+ ;
else
{
- tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
+ tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
tmp = gen_rtx_CONST (Pmode, tmp);
- tmp = gen_rtx_MEM (QImode, tmp);
- xops[0] = tmp;
- output_asm_insn ("jmp\t%A0", xops);
+ fnaddr = gen_rtx_MEM (Pmode, tmp);
}
}
else
{
if (!flag_pic || targetm.binds_local_p (function))
- output_asm_insn ("jmp\t%P0", xops);
- else
+ ;
#if TARGET_MACHO
- if (TARGET_MACHO)
- {
- rtx sym_ref = XEXP (DECL_RTL (function), 0);
- if (TARGET_MACHO_BRANCH_ISLANDS)
- sym_ref = (gen_rtx_SYMBOL_REF
- (Pmode,
- machopic_indirection_name (sym_ref, /*stub_p=*/true)));
- tmp = gen_rtx_MEM (QImode, sym_ref);
- xops[0] = tmp;
- output_asm_insn ("jmp\t%0", xops);
- }
- else
+ else if (TARGET_MACHO)
+ {
+ fnaddr = machopic_indirect_call_target (DECL_RTL (function));
+ fnaddr = XEXP (fnaddr, 0);
+ }
#endif /* TARGET_MACHO */
+ else
{
- tmp = gen_rtx_REG (SImode, CX_REG);
+ tmp = gen_rtx_REG (Pmode, CX_REG);
output_set_got (tmp, NULL_RTX);
- xops[1] = tmp;
- output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
- output_asm_insn ("jmp\t{*}%1", xops);
+ fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
+ fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
+ fnaddr = gen_rtx_MEM (Pmode, fnaddr);
}
}
+
+ /* Our sibling call patterns do not allow memories, because we have no
+ predicate that can distinguish between frame and non-frame memory.
+ For our purposes here, we can get away with (ab)using a jump pattern,
+ because we're going to do no optimization. */
+ if (MEM_P (fnaddr))
+ emit_jump_insn (gen_indirect_jump (fnaddr));
+ else
+ {
+ tmp = gen_rtx_MEM (QImode, fnaddr);
+ tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
+ tmp = emit_call_insn (tmp);
+ SIBLING_CALL_P (tmp) = 1;
+ }
+ emit_barrier ();
+
+ /* Emit just enough of rest_of_compilation to get the insns emitted.
+ Note that use_thunk calls assemble_start_function et al. */
+ tmp = get_insns ();
+ insn_locators_alloc ();
+ shorten_branches (tmp);
+ final_start_function (tmp, file, 1);
+ final (tmp, file, 1);
final_end_function ();
}
rtx prev;
bool replace = false;
- if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
+ if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
|| optimize_bb_for_size_p (bb))
continue;
for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
/* Count the minimum number of instructions in BB. Return 4 if the
number of instructions >= 4. */
-static int
+static int
ix86_count_insn_bb (basic_block bb)
{
rtx insn;
{
/* Only happen in exit blocks. */
if (JUMP_P (insn)
- && GET_CODE (PATTERN (insn)) == RETURN)
+ && ANY_RETURN_P (PATTERN (insn)))
break;
if (NONDEBUG_INSN_P (insn)
}
-/* Count the minimum number of instructions in code path in BB.
+/* Count the minimum number of instructions in code path in BB.
Return 4 if the number of instructions >= 4. */
-static int
+static int
ix86_count_insn (basic_block bb)
{
edge e;
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
{
rtx ret = BB_END (e->src);
- if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
+ if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
{
int insn_count = ix86_count_insn (e->src);
return clobbers;
}
-/* Implements target vector targetm.asm.encode_section_info. This
- is not used by netware. */
+/* Implements target vector targetm.asm.encode_section_info. */
static void ATTRIBUTE_UNUSED
ix86_encode_section_info (tree decl, rtx rtl, int first)
emit_label (label2);
}
+/* Emit code for round calculation. */
+void ix86_emit_i387_round (rtx op0, rtx op1)
+{
+ enum machine_mode inmode = GET_MODE (op1);
+ enum machine_mode outmode = GET_MODE (op0);
+ rtx e1, e2, res, tmp, tmp1, half;
+ rtx scratch = gen_reg_rtx (HImode);
+ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
+ rtx jump_label = gen_label_rtx ();
+ rtx insn;
+ rtx (*gen_abs) (rtx, rtx);
+ rtx (*gen_neg) (rtx, rtx);
+
+ switch (inmode)
+ {
+ case SFmode:
+ gen_abs = gen_abssf2;
+ break;
+ case DFmode:
+ gen_abs = gen_absdf2;
+ break;
+ case XFmode:
+ gen_abs = gen_absxf2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ switch (outmode)
+ {
+ case SFmode:
+ gen_neg = gen_negsf2;
+ break;
+ case DFmode:
+ gen_neg = gen_negdf2;
+ break;
+ case XFmode:
+ gen_neg = gen_negxf2;
+ break;
+ case HImode:
+ gen_neg = gen_neghi2;
+ break;
+ case SImode:
+ gen_neg = gen_negsi2;
+ break;
+ case DImode:
+ gen_neg = gen_negdi2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ e1 = gen_reg_rtx (inmode);
+ e2 = gen_reg_rtx (inmode);
+ res = gen_reg_rtx (outmode);
+
+ half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
+
+ /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
+
+ /* scratch = fxam(op1) */
+ emit_insn (gen_rtx_SET (VOIDmode, scratch,
+ gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
+ UNSPEC_FXAM)));
+ /* e1 = fabs(op1) */
+ emit_insn (gen_abs (e1, op1));
+
+ /* e2 = e1 + 0.5 */
+ half = force_reg (inmode, half);
+ emit_insn (gen_rtx_SET (VOIDmode, e2,
+ gen_rtx_PLUS (inmode, e1, half)));
+
+ /* res = floor(e2) */
+ if (inmode != XFmode)
+ {
+ tmp1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_rtx_SET (VOIDmode, tmp1,
+ gen_rtx_FLOAT_EXTEND (XFmode, e2)));
+ }
+ else
+ tmp1 = e2;
+
+ switch (outmode)
+ {
+ case SFmode:
+ case DFmode:
+ {
+ rtx tmp0 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
+
+ emit_insn (gen_rtx_SET (VOIDmode, res,
+ gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
+ UNSPEC_TRUNC_NOOP)));
+ }
+ break;
+ case XFmode:
+ emit_insn (gen_frndintxf2_floor (res, tmp1));
+ break;
+ case HImode:
+ emit_insn (gen_lfloorxfhi2 (res, tmp1));
+ break;
+ case SImode:
+ emit_insn (gen_lfloorxfsi2 (res, tmp1));
+ break;
+ case DImode:
+ emit_insn (gen_lfloorxfdi2 (res, tmp1));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* flags = signbit(a) */
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
+
+ /* if (flags) then res = -res */
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
+ gen_rtx_EQ (VOIDmode, flags, const0_rtx),
+ gen_rtx_LABEL_REF (VOIDmode, jump_label),
+ pc_rtx);
+ insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+ predict_jump (REG_BR_PROB_BASE * 50 / 100);
+ JUMP_LABEL (insn) = jump_label;
+
+ emit_insn (gen_neg (res, res));
+
+ emit_label (jump_label);
+ LABEL_NUSES (jump_label) = 1;
+
+ emit_move_insn (op0, res);
+}
+
/* Output code to perform a Newton-Rhapson approximation of a single precision
floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
gen_rtx_MULT (mode, e2, e3)));
}
+#ifdef TARGET_SOLARIS
/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
-static void ATTRIBUTE_UNUSED
+static void
i386_solaris_elf_named_section (const char *name, unsigned int flags,
tree decl)
{
flags & SECTION_WRITE ? "aw" : "a");
return;
}
+
+#ifndef USE_GAS
+ if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
+ {
+ solaris_elf_asm_comdat_section (name, flags, decl);
+ return;
+ }
+#endif
+
default_elf_asm_named_section (name, flags, decl);
}
+#endif /* TARGET_SOLARIS */
/* Return the mangling of TYPE if it is an extended fundamental type. */
__stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
register, so it is better to call __stack_chk_fail directly. */
-static tree
+static tree ATTRIBUTE_UNUSED
ix86_stack_protect_fail (void)
{
return TARGET_64BIT
emit_move_insn (operand0, res);
}
+
+/* Expand SSE sequence for computing round
+ from OP1 storing into OP0 using sse4 round insn. */
+void
+ix86_expand_round_sse4 (rtx op0, rtx op1)
+{
+ enum machine_mode mode = GET_MODE (op0);
+ rtx e1, e2, res, half;
+ const struct real_format *fmt;
+ REAL_VALUE_TYPE pred_half, half_minus_pred_half;
+ rtx (*gen_copysign) (rtx, rtx, rtx);
+ rtx (*gen_round) (rtx, rtx, rtx);
+
+ switch (mode)
+ {
+ case SFmode:
+ gen_copysign = gen_copysignsf3;
+ gen_round = gen_sse4_1_roundsf2;
+ break;
+ case DFmode:
+ gen_copysign = gen_copysigndf3;
+ gen_round = gen_sse4_1_rounddf2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* round (a) = trunc (a + copysign (0.5, a)) */
+
+ /* load nextafter (0.5, 0.0) */
+ fmt = REAL_MODE_FORMAT (mode);
+ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
+ REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
+ half = const_double_from_real_value (pred_half, mode);
+
+ /* e1 = copysign (0.5, op1) */
+ e1 = gen_reg_rtx (mode);
+ emit_insn (gen_copysign (e1, half, op1));
+
+ /* e2 = op1 + e1 */
+ e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
+
+ /* res = trunc (e2) */
+ res = gen_reg_rtx (mode);
+ emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
+
+ emit_move_insn (op0, res);
+}
\f
/* Table of valid machine attributes. */
#define TARGET_SCHED_DISPATCH has_dispatch
#undef TARGET_SCHED_DISPATCH_DO
#define TARGET_SCHED_DISPATCH_DO do_dispatch
+#undef TARGET_SCHED_REASSOCIATION_WIDTH
+#define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
/* The size of the dispatch window is the total number of bytes of
object code allowed in a window. */
static bool
has_dispatch (rtx insn, int action)
{
- if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
+ if ((ix86_tune == PROCESSOR_BDVER1 || ix86_tune == PROCESSOR_BDVER2)
+ && flag_dispatch_scheduler)
switch (action)
{
default:
return false;
}
+/* Implementation of reassociation_width target hook used by
+ reassoc phase to identify parallelism level in reassociated
+ tree. Statements tree_code is passed in OPC. Arguments type
+ is passed in MODE.
+
+ Currently parallel reassociation is enabled for Atom
+ processors only and we set reassociation width to be 2
+ because Atom may issue up to 2 instructions per cycle.
+
+ Return value should be fixed if parallel reassociation is
+ enabled for other processors. */
+
+static int
+ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
+ enum machine_mode mode)
+{
+ int res = 1;
+
+ if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
+ res = 2;
+ else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
+ res = 2;
+
+ return res;
+}
+
/* ??? No autovectorization into MMX or 3DNOW until we can reliably
place emms and femms instructions. */
switch (mode)
{
case QImode:
- return V16QImode;
+ return TARGET_AVX2 ? V32QImode : V16QImode;
case HImode:
- return V8HImode;
+ return TARGET_AVX2 ? V16HImode : V8HImode;
case SImode:
- return V4SImode;
+ return TARGET_AVX2 ? V8SImode : V4SImode;
case DImode:
- return V2DImode;
+ return TARGET_AVX2 ? V4DImode : V2DImode;
case SFmode:
- if (TARGET_AVX && !flag_prefer_avx128)
+ if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V8SFmode;
else
return V4SFmode;
case DFmode:
if (!TARGET_VECTORIZE_DOUBLE)
return word_mode;
- else if (TARGET_AVX && !flag_prefer_avx128)
+ else if (TARGET_AVX && !TARGET_PREFER_AVX128)
return V4DFmode;
else if (TARGET_SSE2)
return V2DFmode;
static unsigned int
ix86_autovectorize_vector_sizes (void)
{
- return (TARGET_AVX && !flag_prefer_avx128) ? 32 | 16 : 0;
+ return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
}
/* Initialize the GCC target structure. */
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
-#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
#undef TARGET_SCHED_INIT_GLOBAL
#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
#undef TARGET_ASM_FILE_START
#define TARGET_ASM_FILE_START x86_file_start
-#undef TARGET_DEFAULT_TARGET_FLAGS
-#define TARGET_DEFAULT_TARGET_FLAGS \
- (TARGET_DEFAULT \
- | TARGET_SUBTARGET_DEFAULT \
- | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
-
-#undef TARGET_HANDLE_OPTION
-#define TARGET_HANDLE_OPTION ix86_handle_option
-
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE ix86_option_override
-#undef TARGET_OPTION_OPTIMIZATION_TABLE
-#define TARGET_OPTION_OPTIMIZATION_TABLE ix86_option_optimization_table
-#undef TARGET_OPTION_INIT_STRUCT
-#define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE ix86_mangle_type
+#ifndef TARGET_MACHO
#undef TARGET_STACK_PROTECT_FAIL
#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
-
-#undef TARGET_SUPPORTS_SPLIT_STACK
-#define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
+#endif
#undef TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE ix86_function_value
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
+
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
+#undef TARGET_CLASS_MAX_NREGS
+#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
+
#undef TARGET_PREFERRED_RELOAD_CLASS
#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS