/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
Free Software Foundation, Inc.
This file is part of GCC.
#include "insn-codes.h"
#include "insn-attr.h"
#include "flags.h"
-#include "c-common.h"
#include "except.h"
#include "function.h"
#include "recog.h"
1, /* cond_not_taken_branch_cost. */
};
+static const
+struct processor_costs atom_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 2, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {8, 8, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {8, 8, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
+ {libcall, {{32, loop}, {64, rep_prefix_4_byte},
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ {{libcall, {{8, loop}, {15, unrolled_loop},
+ {2048, rep_prefix_4_byte}, {-1, libcall}}},
+ {libcall, {{24, loop}, {32, unrolled_loop},
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
/* Generic64 should produce code tuned for Nocona and K8. */
static const
struct processor_costs generic64_cost = {
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
#define m_NOCONA (1<<PROCESSOR_NOCONA)
#define m_CORE2 (1<<PROCESSOR_CORE2)
+#define m_ATOM (1<<PROCESSOR_ATOM)
#define m_GEODE (1<<PROCESSOR_GEODE)
#define m_K6 (1<<PROCESSOR_K6)
m_486 | m_PENT,
/* X86_TUNE_UNROLL_STRLEN */
- m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
+ m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
+ | m_CORE2 | m_GENERIC,
/* X86_TUNE_DEEP_BRANCH_PREDICTION */
- m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
+ m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
~m_386,
/* X86_TUNE_USE_SAHF */
- m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
+ m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
partial dependencies. */
- m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
m_386 | m_486 | m_K6_GEODE,
/* X86_TUNE_USE_SIMODE_FIOP */
- ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
+ ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
/* X86_TUNE_USE_MOV0 */
m_K6,
/* X86_TUNE_USE_CLTD */
- ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
+ ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
m_PENT4,
~(m_PENT | m_PPRO),
/* X86_TUNE_PROMOTE_QIMODE */
- m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
- | m_GENERIC /* | m_PENT4 ? */,
+ m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
+ | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
/* X86_TUNE_FAST_PREFIX */
~(m_PENT | m_486 | m_386),
m_PPRO,
/* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
- m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
+ | m_CORE2 | m_GENERIC,
/* X86_TUNE_ADD_ESP_8 */
- m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
| m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SUB_ESP_4 */
- m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
+ | m_GENERIC,
/* X86_TUNE_SUB_ESP_8 */
- m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */
- ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
+ ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
| m_GENERIC | m_GEODE),
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */
- m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
conflict here in between PPro/Pentium4 based chips that thread 128bit
shows that disabling this option on P4 brings over 20% SPECfp regression,
while enabling it on K8 brings roughly 2.4% regression that can be partly
masked by careful scheduling of moves. */
- m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
+ m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
+ | m_AMDFAM10,
/* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
m_AMDFAM10,
m_PPRO | m_PENT4 | m_NOCONA,
/* X86_TUNE_MEMORY_MISMATCH_STALL */
- m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
/* X86_TUNE_PROLOGUE_USING_MOVE */
- m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
+ m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
/* X86_TUNE_EPILOGUE_USING_MOVE */
- m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
+ m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
/* X86_TUNE_SHIFT1 */
~m_486,
m_AMD_MULTIPLE,
/* X86_TUNE_INTER_UNIT_MOVES */
- ~(m_AMD_MULTIPLE | m_GENERIC),
+ ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
/* X86_TUNE_INTER_UNIT_CONVERSIONS */
~(m_AMDFAM10),
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
- m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
+ m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
+ | m_GENERIC,
/* X86_TUNE_SCHEDULE */
- m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
+ m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
+ | m_GENERIC,
/* X86_TUNE_USE_BT */
- m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
+ m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
/* X86_TUNE_USE_INCDEC */
- ~(m_PENT4 | m_NOCONA | m_GENERIC),
+ ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
/* X86_TUNE_PAD_RETURNS */
m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
/* X86_TUNE_EXT_80387_CONSTANTS */
- m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
+ m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
+ | m_CORE2 | m_GENERIC,
/* X86_TUNE_SHORTEN_X87_SSE */
~m_K8,
with a subsequent conditional jump instruction into a single
compare-and-branch uop. */
m_CORE2,
+
+ /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
+ will impact LEA instruction selection. */
+ m_ATOM,
};
/* Feature tests against the various architecture variations. */
};
static const unsigned int x86_accumulate_outgoing_args
- = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+ = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
+ | m_GENERIC;
static const unsigned int x86_arch_always_fancy_math_387
- = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
+ = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
| m_NOCONA | m_CORE2 | m_GENERIC;
static enum stringop_alg stringop_alg = no_stringop;
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
};
-static int const x86_64_int_parameter_registers[6] =
-{
- 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
- FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
-};
-
-static int const x86_64_ms_abi_int_parameter_registers[4] =
-{
- 2 /*RCX*/, 1 /*RDX*/,
- FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
-};
-
-static int const x86_64_int_return_registers[4] =
-{
- 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
-};
-
/* The "default" register map used in 64bit mode. */
+
int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
{
0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
rtx ix86_compare_op0 = NULL_RTX;
rtx ix86_compare_op1 = NULL_RTX;
-rtx ix86_compare_emitted = NULL_RTX;
-/* Define the structure for the machine field in struct function. */
+/* Define parameter passing and return registers. */
+
+static int const x86_64_int_parameter_registers[6] =
+{
+ DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
+};
+
+static int const x86_64_ms_abi_int_parameter_registers[4] =
+{
+ CX_REG, DX_REG, R8_REG, R9_REG
+};
-struct stack_local_entry GTY(())
+static int const x86_64_int_return_registers[4] =
{
+ AX_REG, DX_REG, DI_REG, SI_REG
+};
+
+/* Define the structure for the machine field in struct function. */
+
+struct GTY(()) stack_local_entry {
unsigned short mode;
unsigned short n;
rtx rtl;
<- HARD_FRAME_POINTER
[saved regs]
+ [padding0]
+
+ [saved SSE regs]
+
[padding1] \
)
[va_arg registers] (
*/
struct ix86_frame
{
+ int padding0;
+ int nsseregs;
int nregs;
int padding1;
int va_arg_size;
/* Alignment for incoming stack boundary in bits. */
unsigned int ix86_incoming_stack_boundary;
+/* The abi used by target. */
+enum calling_abi ix86_abi;
+
/* Values 1-5: see jump.c */
int ix86_branch_cost;
X86_64_NO_CLASS,
X86_64_INTEGER_CLASS,
X86_64_INTEGERSI_CLASS,
- X86_64_AVX_CLASS,
X86_64_SSE_CLASS,
X86_64_SSESF_CLASS,
X86_64_SSEDF_CLASS,
X86_64_COMPLEX_X87_CLASS,
X86_64_MEMORY_CLASS
};
-static const char * const x86_64_reg_class_name[] =
-{
- "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
- "sseup", "x87", "x87up", "cplx87", "no"
-};
#define MAX_CLASSES 4
static bool ix86_can_inline_p (tree, tree);
static void ix86_set_current_function (tree);
+static enum calling_abi ix86_function_abi (const_tree);
+
\f
/* The svr4 ABI for the i386 says that records and unions are returned
in memory. */
#define OPTION_MASK_ISA_ABM_SET \
(OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
+
#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
+#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
/* Define a set of ISAs which aren't available when a given ISA is
disabled. MMX and SSE ISAs are handled separately. */
#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
+#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
/* Vectorization library interface and handlers. */
tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
{&core2_cost, 16, 10, 16, 10, 16},
{&generic32_cost, 16, 7, 16, 7, 16},
{&generic64_cost, 16, 10, 16, 10, 16},
- {&amdfam10_cost, 32, 24, 32, 7, 32}
+ {&amdfam10_cost, 32, 24, 32, 7, 32},
+ {&atom_cost, 16, 7, 16, 7, 16}
};
static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
"prescott",
"nocona",
"core2",
+ "atom",
"geode",
"k6",
"k6-2",
}
return true;
+ case OPT_mmovbe:
+ if (value)
+ {
+ ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
+ }
+ else
+ {
+ ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
+ }
+ return true;
+
case OPT_maes:
if (value)
{
{ "-mmmx", OPTION_MASK_ISA_MMX },
{ "-mabm", OPTION_MASK_ISA_ABM },
{ "-mpopcnt", OPTION_MASK_ISA_POPCNT },
+ { "-mmovbe", OPTION_MASK_ISA_MOVBE },
{ "-maes", OPTION_MASK_ISA_AES },
{ "-mpclmul", OPTION_MASK_ISA_PCLMUL },
};
{ "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
};
- const char *opts[ (sizeof (isa_opts) / sizeof (isa_opts[0])
- + sizeof (flag_opts) / sizeof (flag_opts[0])
- + 6)][2];
+ const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
char isa_other[40];
char target_other[40];
}
/* Pick out the options in isa options. */
- for (i = 0; i < sizeof (isa_opts) / sizeof (isa_opts[0]); i++)
+ for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
{
if ((isa & isa_opts[i].mask) != 0)
{
}
/* Add flag options. */
- for (i = 0; i < sizeof (flag_opts) / sizeof (flag_opts[0]); i++)
+ for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
{
if ((flags & flag_opts[i].mask) != 0)
{
if (num == 0)
return NULL;
- gcc_assert (num < sizeof (opts) / sizeof (opts[0]));
+ gcc_assert (num < ARRAY_SIZE (opts));
/* Size the string. */
len = 0;
PTA_AES = 1 << 17,
PTA_PCLMUL = 1 << 18,
PTA_AVX = 1 << 19,
- PTA_FMA = 1 << 20
+ PTA_FMA = 1 << 20,
+ PTA_MOVBE = 1 << 21
};
static struct pta
{"core2", PROCESSOR_CORE2, CPU_CORE2,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_CX16},
+ {"atom", PROCESSOR_ATOM, CPU_ATOM,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
{"geode", PROCESSOR_GEODE, CPU_GEODE,
PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
{"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
stringop_alg = libcall;
else if (!strcmp (ix86_stringop_string, "rep_4byte"))
stringop_alg = rep_prefix_4_byte;
- else if (!strcmp (ix86_stringop_string, "rep_8byte"))
+ else if (!strcmp (ix86_stringop_string, "rep_8byte")
+ && TARGET_64BIT)
+ /* rep; movq isn't available in 32-bit code. */
stringop_alg = rep_prefix_8_byte;
else if (!strcmp (ix86_stringop_string, "byte_loop"))
stringop_alg = loop_1_byte;
error ("bad value (%s) for %sarch=%s %s",
ix86_arch_string, prefix, suffix, sw);
+ /* Validate -mabi= value. */
+ if (ix86_abi_string)
+ {
+ if (strcmp (ix86_abi_string, "sysv") == 0)
+ ix86_abi = SYSV_ABI;
+ else if (strcmp (ix86_abi_string, "ms") == 0)
+ ix86_abi = MS_ABI;
+ else
+ error ("unknown ABI (%s) for %sabi=%s %s",
+ ix86_abi_string, prefix, suffix, sw);
+ }
+ else
+ ix86_abi = DEFAULT_ABI;
+
if (ix86_cmodel_string != 0)
{
if (!strcmp (ix86_cmodel_string, "small"))
if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
+ if (processor_alias_table[i].flags & PTA_MOVBE
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
+ ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
if (processor_alias_table[i].flags & PTA_AES
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
ix86_isa_flags |= OPTION_MASK_ISA_AES;
Remove this code in GCC 3.2 or later. */
if (ix86_align_loops_string)
{
- warning (0, "%salign-loops%s is obsolete, use %salign-loops%s",
- prefix, suffix, prefix, suffix);
+ warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
+ prefix, suffix, suffix);
if (align_loops == 0)
{
i = atoi (ix86_align_loops_string);
if (ix86_align_jumps_string)
{
- warning (0, "%salign-jumps%s is obsolete, use %salign-jumps%s",
- prefix, suffix, prefix, suffix);
+ warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
+ prefix, suffix, suffix);
if (align_jumps == 0)
{
i = atoi (ix86_align_jumps_string);
if (ix86_align_funcs_string)
{
- warning (0, "%salign-functions%s is obsolete, use %salign-functions%s",
- prefix, suffix, prefix, suffix);
+ warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
+ prefix, suffix, suffix);
if (align_functions == 0)
{
i = atoi (ix86_align_funcs_string);
unsigned int ix86_arch_mask, ix86_tune_mask;
int i;
- ix86_arch = ptr->arch;
- ix86_schedule = ptr->schedule;
- ix86_tune = ptr->tune;
- ix86_fpmath = ptr->fpmath;
+ ix86_arch = (enum processor_type) ptr->arch;
+ ix86_schedule = (enum attr_cpu) ptr->schedule;
+ ix86_tune = (enum processor_type) ptr->tune;
+ ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
ix86_branch_cost = ptr->branch_cost;
ix86_tune_defaulted = ptr->tune_defaulted;
ix86_arch_specified = ptr->arch_specified;
/* Find the option. */
ch = *p;
opt = N_OPTS;
- for (i = 0; i < sizeof (attrs) / sizeof (attrs[0]); i++)
+ for (i = 0; i < ARRAY_SIZE (attrs); i++)
{
type = attrs[i].type;
opt_len = attrs[i].len;
&& TREE_CODE (*node) != FIELD_DECL
&& TREE_CODE (*node) != TYPE_DECL)
{
- warning (OPT_Wattributes, "%qs attribute only applies to functions",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute only applies to functions",
+ name);
*no_add_attrs = true;
return NULL_TREE;
}
if (TREE_CODE (cst) != INTEGER_CST)
{
warning (OPT_Wattributes,
- "%qs attribute requires an integer constant argument",
- IDENTIFIER_POINTER (name));
+ "%qE attribute requires an integer constant argument",
+ name);
*no_add_attrs = true;
}
else if (compare_tree_int (cst, REGPARM_MAX) > 0)
{
- warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
- IDENTIFIER_POINTER (name), REGPARM_MAX);
+ warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
+ name, REGPARM_MAX);
*no_add_attrs = true;
}
{
/* Do not warn when emulating the MS ABI. */
if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
- warning (OPT_Wattributes, "%qs attribute ignored",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute ignored",
+ name);
*no_add_attrs = true;
return NULL_TREE;
}
ix86_function_regparm (const_tree type, const_tree decl)
{
tree attr;
- int regparm = ix86_regparm;
+ int regparm;
static bool error_issued;
if (TARGET_64BIT)
- {
- if (ix86_function_type_abi (type) == DEFAULT_ABI)
- return regparm;
- return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
- }
+ return (ix86_function_type_abi (type) == SYSV_ABI
+ ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
+ regparm = ix86_regparm;
attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
if (attr)
{
return 2;
/* Use register calling convention for local functions when possible. */
- if (decl && TREE_CODE (decl) == FUNCTION_DECL
+ if (decl
+ && TREE_CODE (decl) == FUNCTION_DECL
+ && optimize
&& !profile_flag)
{
/* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
/* For local functions, pass up to SSE_REGPARM_MAX SFmode
(and DFmode for SSE2) arguments in SSE registers. */
- if (decl && TARGET_SSE_MATH && !profile_flag)
+ if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
{
/* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
default ABI. */
/* RAX is used as hidden argument to va_arg functions. */
- if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
+ if (ix86_abi == SYSV_ABI && regno == AX_REG)
return true;
- if (DEFAULT_ABI == MS_ABI)
+ if (ix86_abi == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
- for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
+ for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
: X86_64_REGPARM_MAX); i++)
if (regno == parm_regs[i])
return true;
int
ix86_reg_parm_stack_space (const_tree fndecl)
{
- int call_abi = SYSV_ABI;
+ enum calling_abi call_abi = SYSV_ABI;
if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
call_abi = ix86_function_abi (fndecl);
else
/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
call abi used. */
-int
+enum calling_abi
ix86_function_type_abi (const_tree fntype)
{
if (TARGET_64BIT && fntype != NULL)
{
- int abi;
- if (DEFAULT_ABI == SYSV_ABI)
- abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
- else
- abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
-
+ enum calling_abi abi = ix86_abi;
+ if (abi == SYSV_ABI)
+ {
+ if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
+ abi = MS_ABI;
+ }
+ else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
+ abi = SYSV_ABI;
return abi;
}
- return DEFAULT_ABI;
+ return ix86_abi;
}
-int
+static enum calling_abi
ix86_function_abi (const_tree fndecl)
{
if (! fndecl)
- return DEFAULT_ABI;
+ return ix86_abi;
return ix86_function_type_abi (TREE_TYPE (fndecl));
}
/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
call abi used. */
-int
+enum calling_abi
ix86_cfun_abi (void)
{
if (! cfun || ! TARGET_64BIT)
- return DEFAULT_ABI;
+ return ix86_abi;
return cfun->machine->call_abi;
}
/* Implementation of call abi switching target hook. Specific to FNDECL
the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
- for more details.
- To prevent redudant calls of costy function init_regs (), it checks not to
- reset register usage for default abi. */
+ for more details. */
void
ix86_call_abi_override (const_tree fndecl)
{
if (fndecl == NULL_TREE)
- cfun->machine->call_abi = DEFAULT_ABI;
+ cfun->machine->call_abi = ix86_abi;
else
cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
- if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
- {
- if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
- {
- call_used_regs[4 /*RSI*/] = 0;
- call_used_regs[5 /*RDI*/] = 0;
- init_regs ();
- }
- }
- else if (TARGET_64BIT)
- {
- if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
- {
- call_used_regs[4 /*RSI*/] = 1;
- call_used_regs[5 /*RDI*/] = 1;
- init_regs ();
- }
- }
+}
+
+/* MS and SYSV ABI have different set of call used registers. Avoid expensive
+ re-initialization of init_regs each time we switch function context since
+ this is needed only during RTL expansion. */
+static void
+ix86_maybe_switch_abi (void)
+{
+ if (TARGET_64BIT &&
+ call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
+ reinit_regs ();
}
/* Initialize a variable CUM of type CUMULATIVE_ARGS
else
cum->call_abi = ix86_function_type_abi (fntype);
/* Set up the number of registers to use for passing arguments. */
+
+ if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
+ sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
cum->nregs = ix86_regparm;
if (TARGET_64BIT)
{
- if (cum->call_abi != DEFAULT_ABI)
- cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
+ if (cum->call_abi != ix86_abi)
+ cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
: X64_REGPARM_MAX;
}
if (TARGET_SSE)
cum->sse_nregs = SSE_REGPARM_MAX;
if (TARGET_64BIT)
{
- if (cum->call_abi != DEFAULT_ABI)
- cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+ if (cum->call_abi != ix86_abi)
+ cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
: X64_SSE_REGPARM_MAX;
}
}
modes, the generic vector support in gcc will choose some non-vector mode
in order to implement the type. By computing the natural mode, we'll
select the proper ABI location for the operand and not depend on whatever
- the middle-end decides to do with these vector types. */
+ the middle-end decides to do with these vector types.
+
+ The midde-end can't deal with the vector types > 16 bytes. In this
+ case, we return the original mode and warn ABI change if CUM isn't
+ NULL. */
static enum machine_mode
-type_natural_mode (const_tree type)
+type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
{
enum machine_mode mode = TYPE_MODE (type);
if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
{
HOST_WIDE_INT size = int_size_in_bytes (type);
- if ((size == 8 || size == 16)
+ if ((size == 8 || size == 16 || size == 32)
/* ??? Generic code allows us to create width 1 vectors. Ignore. */
&& TYPE_VECTOR_SUBPARTS (type) > 1)
{
for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
&& GET_MODE_INNER (mode) == innermode)
- return mode;
+ {
+ if (size == 32 && !TARGET_AVX)
+ {
+ static bool warnedavx;
+
+ if (cum
+ && !warnedavx
+ && cum->warn_avx)
+ {
+ warnedavx = true;
+ warning (0, "AVX vector argument without AVX "
+ "enabled changes the ABI");
+ }
+ return TYPE_MODE (type);
+ }
+ else
+ return mode;
+ }
gcc_unreachable ();
}
tree field;
enum x86_64_reg_class subclasses[MAX_CLASSES];
- /* On x86-64 we pass structures larger than 16 bytes on the stack. */
- if (bytes > 16)
+ /* On x86-64 we pass structures larger than 32 bytes on the stack. */
+ if (bytes > 32)
return 0;
for (i = 0; i < words; i++)
}
else
{
- num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
- TREE_TYPE (field), subclasses,
+ int pos;
+
+ type = TREE_TYPE (field);
+
+ /* Flexible array member is ignored. */
+ if (TYPE_MODE (type) == BLKmode
+ && TREE_CODE (type) == ARRAY_TYPE
+ && TYPE_SIZE (type) == NULL_TREE
+ && TYPE_DOMAIN (type) != NULL_TREE
+ && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
+ == NULL_TREE))
+ {
+ static bool warned;
+
+ if (!warned && warn_psabi)
+ {
+ warned = true;
+ inform (input_location,
+ "The ABI of passing struct with"
+ " a flexible array member has"
+ " changed in GCC 4.4");
+ }
+ continue;
+ }
+ num = classify_argument (TYPE_MODE (type), type,
+ subclasses,
(int_bit_position (field)
+ bit_offset) % 256);
if (!num)
return 0;
- for (i = 0; i < num; i++)
- {
- int pos =
- (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
- classes[i + pos] =
- merge_classes (subclasses[i], classes[i + pos]);
- }
+ pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
+ for (i = 0; i < num && (i + pos) < words; i++)
+ classes[i + pos] =
+ merge_classes (subclasses[i], classes[i + pos]);
}
}
}
gcc_unreachable ();
}
+ if (words > 2)
+ {
+ /* When size > 16 bytes, if the first one isn't
+ X86_64_SSE_CLASS or any other ones aren't
+ X86_64_SSEUP_CLASS, everything should be passed in
+ memory. */
+ if (classes[0] != X86_64_SSE_CLASS)
+ return 0;
+
+ for (i = 1; i < words; i++)
+ if (classes[i] != X86_64_SSEUP_CLASS)
+ return 0;
+ }
+
/* Final merger cleanup. */
for (i = 0; i < words; i++)
{
return 0;
/* The X86_64_SSEUP_CLASS should be always preceded by
- X86_64_SSE_CLASS. */
+ X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
if (classes[i] == X86_64_SSEUP_CLASS
- && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
- classes[i] = X86_64_SSE_CLASS;
+ && classes[i - 1] != X86_64_SSE_CLASS
+ && classes[i - 1] != X86_64_SSEUP_CLASS)
+ {
+ /* The first one should never be X86_64_SSEUP_CLASS. */
+ gcc_assert (i != 0);
+ classes[i] = X86_64_SSE_CLASS;
+ }
- /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
+ /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
+ everything should be passed in memory. */
if (classes[i] == X86_64_X87UP_CLASS
- && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
- classes[i] = X86_64_SSE_CLASS;
+ && (classes[i - 1] != X86_64_X87_CLASS))
+ {
+ static bool warned;
+
+ /* The first one should never be X86_64_X87UP_CLASS. */
+ gcc_assert (i != 0);
+ if (!warned && warn_psabi)
+ {
+ warned = true;
+ inform (input_location,
+ "The ABI of passing union with long double"
+ " has changed in GCC 4.4");
+ }
+ return 0;
+ }
}
return words;
}
case TImode:
classes[0] = classes[1] = X86_64_INTEGER_CLASS;
return 2;
- case CTImode:
case COImode:
case OImode:
+ /* OImode shouldn't be used directly. */
+ gcc_unreachable ();
+ case CTImode:
return 0;
case SFmode:
if (!(bit_offset % 64))
return 2;
case SCmode:
classes[0] = X86_64_SSE_CLASS;
- return 1;
+ if (!(bit_offset % 64))
+ return 1;
+ else
+ {
+ static bool warned;
+
+ if (!warned && warn_psabi)
+ {
+ warned = true;
+ inform (input_location,
+ "The ABI of passing structure with complex float"
+ " member has changed in GCC 4.4");
+ }
+ classes[1] = X86_64_SSESF_CLASS;
+ return 2;
+ }
case DCmode:
classes[0] = X86_64_SSEDF_CLASS;
classes[1] = X86_64_SSEDF_CLASS;
case V16HImode:
case V4DFmode:
case V4DImode:
- classes[0] = X86_64_AVX_CLASS;
- return 1;
+ classes[0] = X86_64_SSE_CLASS;
+ classes[1] = X86_64_SSEUP_CLASS;
+ classes[2] = X86_64_SSEUP_CLASS;
+ classes[3] = X86_64_SSEUP_CLASS;
+ return 4;
case V4SFmode:
case V4SImode:
case V16QImode:
case X86_64_INTEGERSI_CLASS:
(*int_nregs)++;
break;
- case X86_64_AVX_CLASS:
case X86_64_SSE_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
return gen_rtx_REG (mode, intreg[0]);
- case X86_64_AVX_CLASS:
case X86_64_SSE_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
- return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
+ if (mode != BLKmode)
+ return gen_reg_or_parallel (mode, orig_mode,
+ SSE_REGNO (sse_regno));
+ break;
case X86_64_X87_CLASS:
case X86_64_COMPLEX_X87_CLASS:
return gen_rtx_REG (mode, FIRST_STACK_REG);
if (n == 2 && regclass[0] == X86_64_SSE_CLASS
&& regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
+ if (n == 4
+ && regclass[0] == X86_64_SSE_CLASS
+ && regclass[1] == X86_64_SSEUP_CLASS
+ && regclass[2] == X86_64_SSEUP_CLASS
+ && regclass[3] == X86_64_SSEUP_CLASS
+ && mode != BLKmode)
+ return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
if (n == 2
&& regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
/* Otherwise figure out the entries of the PARALLEL. */
for (i = 0; i < n; i++)
{
+ int pos;
+
switch (regclass[i])
{
case X86_64_NO_CLASS:
sse_regno++;
break;
case X86_64_SSE_CLASS:
- if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
- tmpmode = TImode;
- else
- tmpmode = DImode;
+ pos = i;
+ switch (n)
+ {
+ case 1:
+ tmpmode = DImode;
+ break;
+ case 2:
+ if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
+ {
+ tmpmode = TImode;
+ i++;
+ }
+ else
+ tmpmode = DImode;
+ break;
+ case 4:
+ gcc_assert (i == 0
+ && regclass[1] == X86_64_SSEUP_CLASS
+ && regclass[2] == X86_64_SSEUP_CLASS
+ && regclass[3] == X86_64_SSEUP_CLASS);
+ tmpmode = OImode;
+ i += 3;
+ break;
+ default:
+ gcc_unreachable ();
+ }
exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (tmpmode,
SSE_REGNO (sse_regno)),
- GEN_INT (i*8));
- if (tmpmode == TImode)
- i++;
+ GEN_INT (pos*8));
sse_regno++;
break;
default:
}
break;
+ case OImode:
+ /* OImode shouldn't be used directly. */
+ gcc_unreachable ();
+
case DFmode:
if (cum->float_in_sse < 2)
break;
break;
/* FALLTHRU */
- case OImode:
case V8SFmode:
case V8SImode:
case V32QImode:
words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
if (type)
- mode = type_natural_mode (type);
+ mode = type_natural_mode (type, NULL);
- if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
function_arg_advance_ms_64 (cum, bytes, words);
else if (TARGET_64BIT)
function_arg_advance_64 (cum, mode, type, words, named);
enum machine_mode orig_mode, tree type,
HOST_WIDE_INT bytes, HOST_WIDE_INT words)
{
- static bool warnedavx, warnedsse, warnedmmx;
+ static bool warnedsse, warnedmmx;
/* Avoid the AL settings for the Unix64 ABI. */
if (mode == VOIDmode)
break;
case OImode:
- /* In 32bit, we pass OImode in ymm registers. */
+ /* OImode shouldn't be used directly. */
+ gcc_unreachable ();
+
case V8SFmode:
case V8SImode:
case V32QImode:
case V4DImode:
if (!type || !AGGREGATE_TYPE_P (type))
{
- if (!TARGET_AVX && !warnedavx && cum->warn_avx)
- {
- warnedavx = true;
- warning (0, "AVX vector argument without AVX enabled "
- "changes the ABI");
- }
if (cum->sse_nregs)
return gen_reg_or_parallel (mode, orig_mode,
cum->sse_regno + FIRST_SSE_REG);
function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
enum machine_mode orig_mode, tree type, int named)
{
- static bool warnedavx;
-
/* Handle a hidden AL argument containing number of registers
for varargs x86-64 functions. */
if (mode == VOIDmode)
return GEN_INT (cum->maybe_vaarg
? (cum->sse_nregs < 0
- ? (cum->call_abi == DEFAULT_ABI
+ ? (cum->call_abi == ix86_abi
? SSE_REGPARM_MAX
- : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+ : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
: X64_SSE_REGPARM_MAX))
: cum->sse_regno)
: -1);
case V16HImode:
case V4DFmode:
case V4DImode:
- /* In 64bit, we pass TImode in interger registers and OImode on
- stack. */
- if (!type || !AGGREGATE_TYPE_P (type))
- {
- if (!TARGET_AVX && !warnedavx && cum->warn_avx)
- {
- warnedavx = true;
- warning (0, "AVX vector argument without AVX enabled "
- "changes the ABI");
- }
- }
-
/* Unnamed 256bit vector mode parameters are passed on stack. */
if (!named)
return NULL;
{
unsigned int regno;
- /* Avoid the AL settings for the Unix64 ABI. */
+ /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
+ We use value of -2 to specify that current function call is MSABI. */
if (mode == VOIDmode)
- return constm1_rtx;
+ return GEN_INT (-2);
/* If we've run out of registers, it goes on the stack. */
if (cum->nregs == 0)
/* To simplify the code below, represent vector types with a vector mode
even if MMX/SSE are not active. */
if (type && TREE_CODE (type) == VECTOR_TYPE)
- mode = type_natural_mode (type);
+ mode = type_natural_mode (type, cum);
- if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
return function_arg_ms_64 (cum, mode, omode, named, bytes);
else if (TARGET_64BIT)
return function_arg_64 (cum, mode, omode, type, named);
const_tree type, bool named ATTRIBUTE_UNUSED)
{
/* See Windows x64 Software Convention. */
- if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
{
int msize = (int) GET_MODE_SIZE (mode);
if (type)
/* TODO: The function should depend on current function ABI but
builtins.c would need updating then. Therefore we use the
default ABI. */
- if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
+ if (TARGET_64BIT && ix86_abi == MS_ABI)
return false;
return TARGET_FLOAT_RETURNS_IN_80387;
|| (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
regno = TARGET_SSE ? FIRST_SSE_REG : 0;
+ /* 32-byte vector modes in %ymm0. */
+ else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
+ regno = TARGET_AVX ? FIRST_SSE_REG : 0;
+
/* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
regno = FIRST_FLOAT_REG;
regno = FIRST_SSE_REG;
}
+ /* OImode shouldn't be used directly. */
+ gcc_assert (mode != OImode);
+
return gen_rtx_REG (orig_mode, regno);
}
enum machine_mode mode, orig_mode;
orig_mode = TYPE_MODE (valtype);
- mode = type_natural_mode (valtype);
+ mode = type_natural_mode (valtype, NULL);
return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
}
/* SSE values are returned in XMM0, except when it doesn't exist. */
if (size == 16)
return (TARGET_SSE ? 0 : 1);
+
+ /* AVX values are returned in YMM0, except when it doesn't exist. */
+ if (size == 32)
+ return TARGET_AVX ? 0 : 1;
}
if (mode == XFmode)
if (size > 12)
return 1;
+
+ /* OImode shouldn't be used directly. */
+ gcc_assert (mode != OImode);
+
return 0;
}
#ifdef SUBTARGET_RETURN_IN_MEMORY
return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
#else
- const enum machine_mode mode = type_natural_mode (type);
+ const enum machine_mode mode = type_natural_mode (type, NULL);
- if (TARGET_64BIT_MS_ABI)
- return return_in_memory_ms_64 (type, mode);
- else if (TARGET_64BIT)
- return return_in_memory_64 (type, mode);
- else
- return return_in_memory_32 (type, mode);
+ if (TARGET_64BIT)
+ {
+ if (ix86_function_type_abi (fntype) == MS_ABI)
+ return return_in_memory_ms_64 (type, mode);
+ else
+ return return_in_memory_64 (type, mode);
+ }
+ else
+ return return_in_memory_32 (type, mode);
#endif
}
ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
int size;
- enum machine_mode mode = type_natural_mode (type);
+ enum machine_mode mode = type_natural_mode (type, NULL);
if (TARGET_64BIT)
return return_in_memory_64 (type, mode);
static tree
ix86_build_builtin_va_list (void)
{
- tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
+ tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
/* Initialize abi specific va_list builtin types. */
if (TARGET_64BIT)
{
tree t;
- if (DEFAULT_ABI == MS_ABI)
+ if (ix86_abi == MS_ABI)
{
t = ix86_build_builtin_va_list_abi (SYSV_ABI);
if (TREE_CODE (t) != RECORD_TYPE)
t = build_variant_type_copy (t);
sysv_va_list_type_node = t;
}
- if (DEFAULT_ABI != MS_ABI)
+ if (ix86_abi != MS_ABI)
{
t = ix86_build_builtin_va_list_abi (MS_ABI);
if (TREE_CODE (t) != RECORD_TYPE)
int i;
int regparm = ix86_regparm;
- if (cum->call_abi != DEFAULT_ABI)
- regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
+ if (cum->call_abi != ix86_abi)
+ regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
/* GPR size of varargs save area. */
if (cfun->va_list_gpr_size)
return true;
canonic = ix86_canonical_va_list_type (type);
return (canonic == ms_va_list_type_node
- || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
+ || (ix86_abi == MS_ABI && canonic == va_list_type_node));
}
/* Implement va_start. */
size = int_size_in_bytes (type);
rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
- nat_mode = type_natural_mode (type);
+ nat_mode = type_natural_mode (type, NULL);
switch (nat_mode)
{
case V8SFmode:
return 0;
ix86_compute_frame_layout (&frame);
- return frame.to_allocate == 0 && frame.nregs == 0;
+ return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
}
\f
/* Value should be nonzero if functions must have frame pointers.
cfun->machine->accesses_prev_frame = 1;
}
\f
-#if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
-# define USE_HIDDEN_LINKONCE 1
-#else
-# define USE_HIDDEN_LINKONCE 0
+#ifndef USE_HIDDEN_LINKONCE
+# if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
+# define USE_HIDDEN_LINKONCE 1
+# else
+# define USE_HIDDEN_LINKONCE 0
+# endif
#endif
static int pic_labels_used;
&& (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
}
-/* Return number of registers to be saved on the stack. */
+/* Return number of saved general prupose registers. */
static int
ix86_nsaved_regs (void)
int nregs = 0;
int regno;
- for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
- if (ix86_save_reg (regno, true))
- nregs++;
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+ nregs ++;
+ return nregs;
+}
+
+/* Return number of saved SSE registrers. */
+
+static int
+ix86_nsaved_sseregs (void)
+{
+ int nregs = 0;
+ int regno;
+
+ if (ix86_cfun_abi () != MS_ABI)
+ return 0;
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+ nregs ++;
return nregs;
}
}
}
+/* In a dynamically-aligned function, we can't know the offset from
+ stack pointer to frame pointer, so we must ensure that setjmp
+ eliminates fp against the hard fp (%ebp) rather than trying to
+ index from %esp up to the top of the frame across a gap that is
+ of unknown (at compile-time) size. */
+static rtx
+ix86_builtin_setjmp_frame_value (void)
+{
+ return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
+}
+
/* Fill structure ix86_frame about frame of currently computed function. */
static void
HOST_WIDE_INT size = get_frame_size ();
frame->nregs = ix86_nsaved_regs ();
+ frame->nsseregs = ix86_nsaved_sseregs ();
total_size = size;
stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
+ /* MS ABI seem to require stack alignment to be always 16 except for function
+ prologues. */
+ if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
+ {
+ preferred_alignment = 16;
+ stack_alignment_needed = 16;
+ crtl->preferred_stack_boundary = 128;
+ crtl->stack_alignment_needed = 128;
+ }
+
gcc_assert (!size || stack_alignment_needed);
gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
gcc_assert (preferred_alignment <= stack_alignment_needed);
/* Register save area */
offset += frame->nregs * UNITS_PER_WORD;
+ /* Align SSE reg save area. */
+ if (frame->nsseregs)
+ frame->padding0 = ((offset + 16 - 1) & -16) - offset;
+ else
+ frame->padding0 = 0;
+
+ /* SSE register save area. */
+ offset += frame->padding0 + frame->nsseregs * 16;
+
/* Va-arg area */
frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
offset += frame->va_arg_size;
frame->stack_pointer_offset -= frame->red_zone_size;
#if 0
fprintf (stderr, "\n");
- fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
fprintf (stderr, "size: %ld\n", (long)size);
+ fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
+ fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
+ fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
unsigned int regno;
rtx insn;
- for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
- if (ix86_save_reg (regno, true))
+ for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
{
insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
RTX_FRAME_RELATED_P (insn) = 1;
rtx insn;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (ix86_save_reg (regno, true))
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
{
insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
Pmode, offset),
}
}
-/* Expand prologue or epilogue stack adjustment.
+/* Emit code to save registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
+{
+ unsigned int regno;
+ rtx insn;
+ rtx mem;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
+ {
+ mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
+ set_mem_align (mem, 128);
+ insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ offset += 16;
+ }
+}
+
+/* Expand prologue or epilogue stack adjustment.
The pattern exist to put a dependency on all ebp-based memory accesses.
STYLE should be negative if instructions should be marked as frame related,
zero if %r11 register is live and cannot be freely used and positive
RTX_FRAME_RELATED_P (insn) = 1;
}
- allocate = frame.to_allocate;
+ allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
if (!frame.save_regs_using_mov)
ix86_emit_save_regs ();
RTX_FRAME_RELATED_P (insn) = 1;
t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
- t, REG_NOTES (insn));
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
if (eax_live)
{
|| !frame.to_allocate
|| crtl->stack_realign_needed)
ix86_emit_save_regs_using_mov (stack_pointer_rtx,
- frame.to_allocate);
+ frame.to_allocate
+ + frame.nsseregs * 16 + frame.padding0);
else
ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
-frame.nregs * UNITS_PER_WORD);
}
+ if (!frame_pointer_needed
+ || !frame.to_allocate
+ || crtl->stack_realign_needed)
+ ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate);
+ else
+ ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
+ - frame.nregs * UNITS_PER_WORD
+ - frame.nsseregs * 16
+ - frame.padding0);
pic_reg_used = false;
if (pic_offset_table_rtx
insn = emit_insn (gen_set_got (pic_offset_table_rtx));
}
- /* Prevent function calls from being scheduled before the call to mcount.
- In the pic_reg_used case, make sure that the got load isn't deleted. */
- if (crtl->profile)
- {
- if (pic_reg_used)
- emit_insn (gen_prologue_use (pic_offset_table_rtx));
- emit_insn (gen_blockage ());
- }
+ /* In the pic_reg_used case, make sure that the got load isn't deleted
+ when mcount needs it. Blockage to avoid call movement across mcount
+ call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
+ note. */
+ if (crtl->profile && pic_reg_used)
+ emit_insn (gen_prologue_use (pic_offset_table_rtx));
if (crtl->drap_reg && !crtl->stack_realign_needed)
{
insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
}
+ /* Prevent instructions from being scheduled into register save push
+ sequence when access to the redzone area is done through frame pointer.
+ The offset betweeh the frame pointer and the stack pointer is calculated
+ relative to the value of the stack pointer at the end of the function
+ prologue, and moving instructions that access redzone area via frame
+ pointer inside push sequence violates this assumption. */
+ if (frame_pointer_needed && frame.red_zone_size)
+ emit_insn (gen_memory_blockage ());
+
/* Emit cld instruction if stringops are used in the function. */
if (TARGET_CLD && ix86_current_function_needs_cld)
emit_insn (gen_cld ());
rtx base_address = gen_rtx_MEM (Pmode, pointer);
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (ix86_save_reg (regno, maybe_eh_return))
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
{
/* Ensure that adjust_address won't be forced to produce pointer
out of range allowed by x86-64 instruction set. */
offset = 0;
}
emit_move_insn (gen_rtx_REG (Pmode, regno),
- adjust_address (base_address, Pmode, offset));
+ adjust_address (base_address, Pmode, offset));
offset += UNITS_PER_WORD;
}
}
+/* Emit code to restore saved registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
+ int maybe_eh_return)
+{
+ int regno;
+ rtx base_address = gen_rtx_MEM (TImode, pointer);
+ rtx mem;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
+ {
+ /* Ensure that adjust_address won't be forced to produce pointer
+ out of range allowed by x86-64 instruction set. */
+ if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
+ {
+ rtx r11;
+
+ r11 = gen_rtx_REG (DImode, R11_REG);
+ emit_move_insn (r11, GEN_INT (offset));
+ emit_insn (gen_adddi3 (r11, r11, pointer));
+ base_address = gen_rtx_MEM (TImode, r11);
+ offset = 0;
+ }
+ mem = adjust_address (base_address, TImode, offset);
+ set_mem_align (mem, 128);
+ emit_move_insn (gen_rtx_REG (TImode, regno), mem);
+ offset += 16;
+ }
+}
+
/* Restore function stack, frame, and registers. */
void
ix86_compute_frame_layout (&frame);
+ /* See the comment about red zone and frame
+ pointer usage in ix86_expand_prologue. */
+ if (frame_pointer_needed && frame.red_zone_size)
+ emit_insn (gen_memory_blockage ());
+
/* Calculate start of saved registers relative to ebp. Special care
must be taken for the normal return case of a function using
eh_return: the eax and edx registers are marked as saved, but not
if (crtl->calls_eh_return && style != 2)
offset -= 2;
offset *= -UNITS_PER_WORD;
+ offset -= frame.nsseregs * 16 + frame.padding0;
/* If we're only restoring one register and sp is not valid then
using a move instruction to restore the register since it's
are no registers to restore. We also use this code when TARGET_USE_LEAVE
and there is exactly one register to pop. This heuristic may need some
tuning in future. */
- if ((!sp_valid && frame.nregs <= 1)
+ if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
|| (TARGET_EPILOGUE_USING_MOVE
&& cfun->machine->use_fast_prologue_epilogue
- && (frame.nregs > 1 || frame.to_allocate))
- || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
+ && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
+ || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
|| (frame_pointer_needed && TARGET_USE_LEAVE
&& cfun->machine->use_fast_prologue_epilogue
- && frame.nregs == 1)
+ && (frame.nregs + frame.nsseregs) == 1)
|| crtl->calls_eh_return)
{
/* Restore registers. We can use ebp or esp to address the memory
if (!frame_pointer_needed
|| (sp_valid && !frame.to_allocate)
|| stack_realign_fp)
- ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
- frame.to_allocate, style == 2);
+ {
+ ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate, style == 2);
+ ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate
+ + frame.nsseregs * 16
+ + frame.padding0, style == 2);
+ }
else
- ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
- offset, style == 2);
+ {
+ ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
+ offset, style == 2);
+ ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
+ offset
+ + frame.nsseregs * 16
+ + frame.padding0, style == 2);
+ }
/* eh_return epilogues need %ecx added to the stack pointer. */
if (style == 2)
{
tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
tmp = plus_constant (tmp, (frame.to_allocate
- + frame.nregs * UNITS_PER_WORD));
+ + frame.nregs * UNITS_PER_WORD
+ + frame.nsseregs * 16
+ + frame.padding0));
emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
}
}
else if (!frame_pointer_needed)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (frame.to_allocate
- + frame.nregs * UNITS_PER_WORD),
+ + frame.nregs * UNITS_PER_WORD
+ + frame.nsseregs * 16
+ + frame.padding0),
style);
/* If not an i386, mov & pop is faster than "leave". */
else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
pro_epilogue_adjust_stack (stack_pointer_rtx,
hard_frame_pointer_rtx,
GEN_INT (offset), style);
+ ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate, style == 2);
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.nsseregs * 16), style);
+ }
+ else if (frame.to_allocate || frame.nsseregs)
+ {
+ ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate,
+ style == 2);
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.to_allocate
+ + frame.nsseregs * 16
+ + frame.padding0), style);
}
- else if (frame.to_allocate)
- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (frame.to_allocate), style);
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (ix86_save_reg (regno, false))
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
if (frame_pointer_needed)
{
break;
case CONST_VECTOR:
- if (x == CONST0_RTX (GET_MODE (x)))
- return true;
- return false;
+ if (!standard_sse_constant_p (x))
+ return false;
default:
break;
return !legitimate_constant_p (x);
}
-/* Determine if a given RTX is a valid constant address. */
-
-bool
-constant_address_p (rtx x)
-{
- return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
-}
/* Nonzero if the constant value X is a legitimate general operand
when generating PIC code. It is given that flag_pic is on and
return 0;
}
-/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
- memory address for an instruction. The MODE argument is the machine mode
- for the MEM expression that wants to use this address.
+/* Recognizes RTL expressions that are valid memory addresses for an
+ instruction. The MODE argument is the machine mode for the MEM
+ expression that wants to use this address.
It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
convert common non-canonical forms to canonical form so that they will
be recognized. */
-int
-legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
- rtx addr, int strict)
+static bool
+ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
+ rtx addr, bool strict)
{
struct ix86_address parts;
rtx base, index, disp;
report_error:
return FALSE;
}
+
+/* Determine if a given RTX is a valid constant address. */
+
+bool
+constant_address_p (rtx x)
+{
+ return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
+}
\f
/* Return a unique alias set for the GOT. */
differentiate them from global data objects. The returned
address is the PIC reg + an unspec constant.
- GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
+ TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
reg also appears in the address. */
static rtx
return reg;
}
-/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
+/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
false if we expect this to be used for a memory address and true if
we expect to load the address into a register. */
OLDX is the address as it was before break_out_memory_refs was called.
In some cases it is useful to look at this to decide what needs to be done.
- MODE and WIN are passed so that this macro can use
- GO_IF_LEGITIMATE_ADDRESS.
-
It is always safe for this macro to do nothing. It exists to recognize
opportunities to optimize the output.
When -fpic is used, special handling is needed for symbolic references.
See comments by legitimize_pic_address in i386.c for details. */
-rtx
-legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
+static rtx
+ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+ enum machine_mode mode)
{
int changed = 0;
unsigned log;
}
}
- if (changed && legitimate_address_p (mode, x, FALSE))
+ if (changed && ix86_legitimate_address_p (mode, x, FALSE))
return x;
if (GET_CODE (XEXP (x, 0)) == MULT)
x = legitimize_pic_address (x, 0);
}
- if (changed && legitimate_address_p (mode, x, FALSE))
+ if (changed && ix86_legitimate_address_p (mode, x, FALSE))
return x;
if (REG_P (XEXP (x, 0)))
otherwise nothing
R -- print the prefix for register names.
z -- print the opcode suffix for the size of the current operand.
+ Z -- likewise, with special suffixes for x87 instructions.
* -- print a star (in certain assembler syntax)
A -- print an absolute memory reference.
w -- print the operand as if it's a "word" (HImode) even if it isn't.
return;
case 'z':
- /* 387 opcodes don't get size suffixes if the operands are
- registers. */
- if (STACK_REG_P (x))
- return;
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ {
+ /* Opcodes don't get size suffixes if using Intel opcodes. */
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ return;
+
+ switch (GET_MODE_SIZE (GET_MODE (x)))
+ {
+ case 1:
+ putc ('b', file);
+ return;
+
+ case 2:
+ putc ('w', file);
+ return;
+
+ case 4:
+ putc ('l', file);
+ return;
+
+ case 8:
+ putc ('q', file);
+ return;
+
+ default:
+ output_operand_lossage
+ ("invalid operand size for operand code '%c'", code);
+ return;
+ }
+ }
+
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+ warning
+ (0, "non-integer operand used with operand code '%c'", code);
+ /* FALLTHRU */
- /* Likewise if using Intel opcodes. */
+ case 'Z':
+ /* 387 opcodes don't get size suffixes if using Intel opcodes. */
if (ASSEMBLER_DIALECT == ASM_INTEL)
return;
- /* This is the size of op from size of operand. */
- switch (GET_MODE_SIZE (GET_MODE (x)))
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
{
- case 1:
- putc ('b', file);
- return;
-
- case 2:
- if (MEM_P (x))
+ switch (GET_MODE_SIZE (GET_MODE (x)))
{
-#ifdef HAVE_GAS_FILDS_FISTS
+ case 2:
+#ifdef HAVE_AS_IX86_FILDS
putc ('s', file);
#endif
return;
+
+ case 4:
+ putc ('l', file);
+ return;
+
+ case 8:
+#ifdef HAVE_AS_IX86_FILDQ
+ putc ('q', file);
+#else
+ fputs ("ll", file);
+#endif
+ return;
+
+ default:
+ break;
}
- else
- putc ('w', file);
- return;
+ }
+ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+ {
+ /* 387 opcodes don't get size suffixes
+ if the operands are registers. */
+ if (STACK_REG_P (x))
+ return;
- case 4:
- if (GET_MODE (x) == SFmode)
+ switch (GET_MODE_SIZE (GET_MODE (x)))
{
+ case 4:
putc ('s', file);
return;
- }
- else
- putc ('l', file);
- return;
- case 12:
- case 16:
- putc ('t', file);
- return;
+ case 8:
+ putc ('l', file);
+ return;
- case 8:
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
- {
- if (MEM_P (x))
- {
-#ifdef GAS_MNEMONICS
- putc ('q', file);
-#else
- putc ('l', file);
- putc ('l', file);
-#endif
- }
- else
- putc ('q', file);
+ case 12:
+ case 16:
+ putc ('t', file);
+ return;
+
+ default:
+ break;
}
- else
- putc ('l', file);
+ }
+ else
+ {
+ output_operand_lossage
+ ("invalid operand type used with operand code '%c'", code);
return;
-
- default:
- gcc_unreachable ();
}
+ output_operand_lossage
+ ("invalid operand size for operand code '%c'", code);
+ return;
+
case 'd':
case 'b':
case 'w':
fputs ("ord", file);
break;
default:
- gcc_unreachable ();
+ output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
+ return;
}
}
else
fputs ("ord", file);
break;
default:
- gcc_unreachable ();
+ output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
+ return;
}
}
return;
#endif
return;
case 'C':
+ if (!COMPARISON_P (x))
+ {
+ output_operand_lossage ("operand is neither a constant nor a "
+ "condition code, invalid operand code "
+ "'C'");
+ return;
+ }
put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
return;
case 'F':
+ if (!COMPARISON_P (x))
+ {
+ output_operand_lossage ("operand is neither a constant nor a "
+ "condition code, invalid operand code "
+ "'F'");
+ return;
+ }
#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('.', file);
/* Check to see if argument to %c is really a constant
and not a condition code which needs to be reversed. */
if (!COMPARISON_P (x))
- {
- output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
- return;
- }
+ {
+ output_operand_lossage ("operand is neither a constant nor a "
+ "condition code, invalid operand "
+ "code 'c'");
+ return;
+ }
put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
return;
case 'f':
+ if (!COMPARISON_P (x))
+ {
+ output_operand_lossage ("operand is neither a constant nor a "
+ "condition code, invalid operand "
+ "code 'f'");
+ return;
+ }
#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('.', file);
fputs ("une", file);
break;
default:
- gcc_unreachable ();
+ output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
+ return;
}
return;
if (MEM_P (operands[2]))
{
- p = "%z2\t%2";
+ p = "%Z2\t%2";
break;
}
case DIV:
if (MEM_P (operands[1]))
{
- p = "r%z1\t%1";
+ p = "r%Z1\t%1";
break;
}
if (MEM_P (operands[2]))
{
- p = "%z2\t%2";
+ p = "%Z2\t%2";
break;
}
gcc_assert (GET_MODE (operands[1]) != TFmode);
if (fisttp)
- output_asm_insn ("fisttp%z0\t%0", operands);
+ output_asm_insn ("fisttp%Z0\t%0", operands);
else
{
if (round_mode != I387_CW_ANY)
output_asm_insn ("fldcw\t%3", operands);
if (stack_top_dies || dimode_p)
- output_asm_insn ("fistp%z0\t%0", operands);
+ output_asm_insn ("fistp%Z0\t%0", operands);
else
- output_asm_insn ("fist%z0\t%0", operands);
+ output_asm_insn ("fist%Z0\t%0", operands);
if (round_mode != I387_CW_ANY)
output_asm_insn ("fldcw\t%2", operands);
}
static const char * const alt[16] =
{
- "fcom%z2\t%y2\n\tfnstsw\t%0",
- "fcomp%z2\t%y2\n\tfnstsw\t%0",
- "fucom%z2\t%y2\n\tfnstsw\t%0",
- "fucomp%z2\t%y2\n\tfnstsw\t%0",
+ "fcom%Z2\t%y2\n\tfnstsw\t%0",
+ "fcomp%Z2\t%y2\n\tfnstsw\t%0",
+ "fucom%Z2\t%y2\n\tfnstsw\t%0",
+ "fucomp%Z2\t%y2\n\tfnstsw\t%0",
- "ficom%z2\t%y2\n\tfnstsw\t%0",
- "ficomp%z2\t%y2\n\tfnstsw\t%0",
+ "ficom%Z2\t%y2\n\tfnstsw\t%0",
+ "ficomp%Z2\t%y2\n\tfnstsw\t%0",
NULL,
NULL,
emit_move_insn (stack_pointer_rtx, tmp);
tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
+
+ /* When we push an operand onto stack, it has to be aligned at least
+ at the function argument boundary. However since we don't have
+ the argument type, we can't determine the actual argument
+ boundary. */
emit_move_insn (tmp, x);
}
if (CONSTANT_P (src1))
return 0;
- /* Source 1 cannot be a non-matching memory. */
- if (MEM_P (src1) && !rtx_equal_p (dst, src1))
- return 0;
+ /* Source 1 cannot be a non-matching memory. */
+ if (MEM_P (src1) && !rtx_equal_p (dst, src1))
+ return 0;
+
+ return 1;
+}
+
+/* Attempt to expand a unary operator. Make the expansion closer to the
+ actual machine, then just general_operand, which will allow 2 separate
+ memory references (one output, one input) in a single insn. */
+
+void
+ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
+ rtx operands[])
+{
+ int matching_memory;
+ rtx src, dst, op, clob;
+
+ dst = operands[0];
+ src = operands[1];
+
+ /* If the destination is memory, and we do not have matching source
+ operands, do things in registers. */
+ matching_memory = 0;
+ if (MEM_P (dst))
+ {
+ if (rtx_equal_p (dst, src))
+ matching_memory = 1;
+ else
+ dst = gen_reg_rtx (mode);
+ }
+
+ /* When source operand is memory, destination must match. */
+ if (MEM_P (src) && !matching_memory)
+ src = force_reg (mode, src);
+
+ /* Emit the instruction. */
+
+ op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
+ if (reload_in_progress || code == NOT)
+ {
+ /* Reload doesn't know about the flags register, and doesn't know that
+ it doesn't want to clobber it. */
+ gcc_assert (code == NOT);
+ emit_insn (op);
+ }
+ else
+ {
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+ }
+
+ /* Fix up the destination if needed. */
+ if (dst != operands[0])
+ emit_move_insn (operands[0], dst);
+}
+
+#define LEA_SEARCH_THRESHOLD 12
+
+/* Search backward for non-agu definition of register number REGNO1
+ or register number REGNO2 in INSN's basic block until
+ 1. Pass LEA_SEARCH_THRESHOLD instructions, or
+ 2. Reach BB boundary, or
+ 3. Reach agu definition.
+ Returns the distance between the non-agu definition point and INSN.
+ If no definition point, returns -1. */
+
+static int
+distance_non_agu_define (unsigned int regno1, unsigned int regno2,
+ rtx insn)
+{
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ int distance = 0;
+ df_ref *def_rec;
+ enum attr_type insn_type;
+
+ if (insn != BB_HEAD (bb))
+ {
+ rtx prev = PREV_INSN (insn);
+ while (prev && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (prev))
+ {
+ distance++;
+ for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && (regno1 == DF_REF_REGNO (*def_rec)
+ || regno2 == DF_REF_REGNO (*def_rec)))
+ {
+ insn_type = get_attr_type (prev);
+ if (insn_type != TYPE_LEA)
+ goto done;
+ }
+ }
+ if (prev == BB_HEAD (bb))
+ break;
+ prev = PREV_INSN (prev);
+ }
+ }
+
+ if (distance < LEA_SEARCH_THRESHOLD)
+ {
+ edge e;
+ edge_iterator ei;
+ bool simple_loop = false;
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if (e->src == bb)
+ {
+ simple_loop = true;
+ break;
+ }
+
+ if (simple_loop)
+ {
+ rtx prev = BB_END (bb);
+ while (prev
+ && prev != insn
+ && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (prev))
+ {
+ distance++;
+ for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && (regno1 == DF_REF_REGNO (*def_rec)
+ || regno2 == DF_REF_REGNO (*def_rec)))
+ {
+ insn_type = get_attr_type (prev);
+ if (insn_type != TYPE_LEA)
+ goto done;
+ }
+ }
+ prev = PREV_INSN (prev);
+ }
+ }
+ }
+
+ distance = -1;
+
+done:
+ /* get_attr_type may modify recog data. We want to make sure
+ that recog data is valid for instruction INSN, on which
+ distance_non_agu_define is called. INSN is unchanged here. */
+ extract_insn_cached (insn);
+ return distance;
+}
+
+/* Return the distance between INSN and the next insn that uses
+ register number REGNO0 in memory address. Return -1 if no such
+ a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
+
+static int
+distance_agu_use (unsigned int regno0, rtx insn)
+{
+ basic_block bb = BLOCK_FOR_INSN (insn);
+ int distance = 0;
+ df_ref *def_rec;
+ df_ref *use_rec;
+
+ if (insn != BB_END (bb))
+ {
+ rtx next = NEXT_INSN (insn);
+ while (next && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (next))
+ {
+ distance++;
+
+ for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+ if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+ || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+ && regno0 == DF_REF_REGNO (*use_rec))
+ {
+ /* Return DISTANCE if OP0 is used in memory
+ address in NEXT. */
+ return distance;
+ }
+
+ for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && regno0 == DF_REF_REGNO (*def_rec))
+ {
+ /* Return -1 if OP0 is set in NEXT. */
+ return -1;
+ }
+ }
+ if (next == BB_END (bb))
+ break;
+ next = NEXT_INSN (next);
+ }
+ }
+
+ if (distance < LEA_SEARCH_THRESHOLD)
+ {
+ edge e;
+ edge_iterator ei;
+ bool simple_loop = false;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (e->dest == bb)
+ {
+ simple_loop = true;
+ break;
+ }
+
+ if (simple_loop)
+ {
+ rtx next = BB_HEAD (bb);
+ while (next
+ && next != insn
+ && distance < LEA_SEARCH_THRESHOLD)
+ {
+ if (INSN_P (next))
+ {
+ distance++;
+
+ for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
+ if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
+ || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
+ && regno0 == DF_REF_REGNO (*use_rec))
+ {
+ /* Return DISTANCE if OP0 is used in memory
+ address in NEXT. */
+ return distance;
+ }
+
+ for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
+ && !DF_REF_IS_ARTIFICIAL (*def_rec)
+ && regno0 == DF_REF_REGNO (*def_rec))
+ {
+ /* Return -1 if OP0 is set in NEXT. */
+ return -1;
+ }
+
+ }
+ next = NEXT_INSN (next);
+ }
+ }
+ }
- return 1;
+ return -1;
}
-/* Attempt to expand a unary operator. Make the expansion closer to the
- actual machine, then just general_operand, which will allow 2 separate
- memory references (one output, one input) in a single insn. */
+/* Define this macro to tune LEA priority vs ADD, it take effect when
+ there is a dilemma of choicing LEA or ADD
+ Negative value: ADD is more preferred than LEA
+ Zero: Netrual
+ Positive value: LEA is more preferred than ADD*/
+#define IX86_LEA_PRIORITY 2
-void
-ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
- rtx operands[])
+/* Return true if it is ok to optimize an ADD operation to LEA
+ operation to avoid flag register consumation. For the processors
+ like ATOM, if the destination register of LEA holds an actual
+ address which will be used soon, LEA is better and otherwise ADD
+ is better. */
+
+bool
+ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
+ rtx insn, rtx operands[])
{
- int matching_memory;
- rtx src, dst, op, clob;
+ unsigned int regno0 = true_regnum (operands[0]);
+ unsigned int regno1 = true_regnum (operands[1]);
+ unsigned int regno2;
- dst = operands[0];
- src = operands[1];
+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+ return regno0 != regno1;
- /* If the destination is memory, and we do not have matching source
- operands, do things in registers. */
- matching_memory = 0;
- if (MEM_P (dst))
+ regno2 = true_regnum (operands[2]);
+
+ /* If a = b + c, (a!=b && a!=c), must use lea form. */
+ if (regno0 != regno1 && regno0 != regno2)
+ return true;
+ else
{
- if (rtx_equal_p (dst, src))
- matching_memory = 1;
- else
- dst = gen_reg_rtx (mode);
+ int dist_define, dist_use;
+ dist_define = distance_non_agu_define (regno1, regno2, insn);
+ if (dist_define <= 0)
+ return true;
+
+ /* If this insn has both backward non-agu dependence and forward
+ agu dependence, the one with short distance take effect. */
+ dist_use = distance_agu_use (regno0, insn);
+ if (dist_use <= 0
+ || (dist_define + IX86_LEA_PRIORITY) < dist_use)
+ return false;
+
+ return true;
}
+}
- /* When source operand is memory, destination must match. */
- if (MEM_P (src) && !matching_memory)
- src = force_reg (mode, src);
+/* Return true if destination reg of SET_BODY is shift count of
+ USE_BODY. */
- /* Emit the instruction. */
+static bool
+ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
+{
+ rtx set_dest;
+ rtx shift_rtx;
+ int i;
- op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
- if (reload_in_progress || code == NOT)
+ /* Retrieve destination of SET_BODY. */
+ switch (GET_CODE (set_body))
{
- /* Reload doesn't know about the flags register, and doesn't know that
- it doesn't want to clobber it. */
- gcc_assert (code == NOT);
- emit_insn (op);
+ case SET:
+ set_dest = SET_DEST (set_body);
+ if (!set_dest || !REG_P (set_dest))
+ return false;
+ break;
+ case PARALLEL:
+ for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
+ if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
+ use_body))
+ return true;
+ default:
+ return false;
+ break;
}
- else
+
+ /* Retrieve shift count of USE_BODY. */
+ switch (GET_CODE (use_body))
{
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+ case SET:
+ shift_rtx = XEXP (use_body, 1);
+ break;
+ case PARALLEL:
+ for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
+ if (ix86_dep_by_shift_count_body (set_body,
+ XVECEXP (use_body, 0, i)))
+ return true;
+ default:
+ return false;
+ break;
}
- /* Fix up the destination if needed. */
- if (dst != operands[0])
- emit_move_insn (operands[0], dst);
+ if (shift_rtx
+ && (GET_CODE (shift_rtx) == ASHIFT
+ || GET_CODE (shift_rtx) == LSHIFTRT
+ || GET_CODE (shift_rtx) == ASHIFTRT
+ || GET_CODE (shift_rtx) == ROTATE
+ || GET_CODE (shift_rtx) == ROTATERT))
+ {
+ rtx shift_count = XEXP (shift_rtx, 1);
+
+ /* Return true if shift count is dest of SET_BODY. */
+ if (REG_P (shift_count)
+ && true_regnum (set_dest) == true_regnum (shift_count))
+ return true;
+ }
+
+ return false;
+}
+
+/* Return true if destination reg of SET_INSN is shift count of
+ USE_INSN. */
+
+bool
+ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
+{
+ return ix86_dep_by_shift_count_body (PATTERN (set_insn),
+ PATTERN (use_insn));
}
/* Return TRUE or FALSE depending on whether the unary operator meets the
emit_move_insn (target, fp_hi);
}
-/* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
+/* A subroutine of ix86_build_signbit_mask. If VECT is true,
then replicate the value for all elements of the vector
register. */
all elements of the vector register. If INVERT is true, then create
a mask excluding the sign bit. */
-rtx
+static rtx
ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
{
enum machine_mode vec_mode, imode;
if (bypass_test)
*bypass_test = NULL_RTX;
- if (ix86_compare_emitted)
- {
- ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
- ix86_compare_emitted = NULL_RTX;
- }
+ if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
+ ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
+
else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
{
gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
{
rtx tmp;
- /* If we have emitted a compare insn, go straight to simple.
- ix86_expand_compare won't emit anything if ix86_compare_emitted
- is non NULL. */
- if (ix86_compare_emitted)
- goto simple;
-
switch (GET_MODE (ix86_compare_op0))
{
case QImode:
}
default:
- gcc_unreachable ();
+ /* If we have already emitted a compare insn, go straight to simple.
+ ix86_expand_compare won't emit anything if ix86_compare_emitted
+ is non NULL. */
+ gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
+ goto simple;
}
}
label),
pc_rtx)));
if (bypass_probability >= 0)
- REG_NOTES (i)
- = gen_rtx_EXPR_LIST (REG_BR_PROB,
- GEN_INT (bypass_probability),
- REG_NOTES (i));
+ add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
}
i = emit_jump_insn (gen_rtx_SET
(VOIDmode, pc_rtx,
gen_rtx_IF_THEN_ELSE (VOIDmode,
condition, target1, target2)));
if (probability >= 0)
- REG_NOTES (i)
- = gen_rtx_EXPR_LIST (REG_BR_PROB,
- GEN_INT (probability),
- REG_NOTES (i));
+ add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
if (second != NULL_RTX)
{
i = emit_jump_insn (gen_rtx_SET
gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
target2)));
if (second_probability >= 0)
- REG_NOTES (i)
- = gen_rtx_EXPR_LIST (REG_BR_PROB,
- GEN_INT (second_probability),
- REG_NOTES (i));
+ add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
}
if (label != NULL_RTX)
emit_label (label);
}
-int
+void
ix86_expand_setcc (enum rtx_code code, rtx dest)
{
rtx ret, tmp, tmpreg, equiv;
rtx second_test, bypass_test;
- if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
- return 0; /* FAIL */
-
gcc_assert (GET_MODE (dest) == QImode);
ret = ix86_expand_compare (code, &second_test, &bypass_test);
ix86_compare_op0, ix86_compare_op1);
set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
}
-
- return 1; /* DONE */
}
/* Expand comparison setting or clearing carry flag. Return true when
bool sign_bit_compare_p = false;;
start_sequence ();
+ ix86_compare_op0 = XEXP (operands[1], 0);
+ ix86_compare_op1 = XEXP (operands[1], 1);
compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
compare_seq = get_insns ();
end_sequence ();
enum rtx_code code = GET_CODE (operands[1]);
rtx tmp, compare_op, second_test, bypass_test;
+ ix86_compare_op0 = XEXP (operands[1], 0);
+ ix86_compare_op1 = XEXP (operands[1], 1);
if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
{
enum machine_mode cmode;
bool fpcmp = false;
enum machine_mode mode = GET_MODE (operands[0]);
+ ix86_compare_op0 = XEXP (operands[1], 0);
+ ix86_compare_op1 = XEXP (operands[1], 1);
if (operands[3] != const1_rtx
&& operands[3] != constm1_rtx)
return 0;
{
rtx insn = get_last_insn ();
gcc_assert (JUMP_P (insn));
- REG_NOTES (insn)
- = gen_rtx_EXPR_LIST (REG_BR_PROB,
- GEN_INT (prob),
- REG_NOTES (insn));
+ add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
}
/* Helper function for the string operations below. Dest VARIABLE whether
{
if (GET_MODE (count_exp) != VOIDmode)
return GET_MODE (count_exp);
- if (GET_CODE (count_exp) != CONST_INT)
+ if (!CONST_INT_P (count_exp))
return Pmode;
if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
return DImode;
destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
}
+ if (CONST_INT_P (count))
+ {
+ count = GEN_INT (INTVAL (count)
+ & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
+ destmem = shallow_copy_rtx (destmem);
+ srcmem = shallow_copy_rtx (srcmem);
+ set_mem_size (destmem, count);
+ set_mem_size (srcmem, count);
+ }
+ else
+ {
+ if (MEM_SIZE (destmem))
+ set_mem_size (destmem, NULL_RTX);
+ if (MEM_SIZE (srcmem))
+ set_mem_size (srcmem, NULL_RTX);
+ }
emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
destexp, srcexp));
}
Arguments have same meaning as for previous function */
static void
expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
- rtx count,
- enum machine_mode mode)
+ rtx count, enum machine_mode mode,
+ rtx orig_value)
{
rtx destexp;
rtx countreg;
}
else
destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
+ if (orig_value == const0_rtx && CONST_INT_P (count))
+ {
+ count = GEN_INT (INTVAL (count)
+ & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
+ destmem = shallow_copy_rtx (destmem);
+ set_mem_size (destmem, count);
+ }
+ else if (MEM_SIZE (destmem))
+ set_mem_size (destmem, NULL_RTX);
emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
}
gcc_assert (desired_alignment <= 8);
}
+/* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
+ ALIGN_BYTES is how many bytes need to be copied. */
+static rtx
+expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
+ int desired_align, int align_bytes)
+{
+ rtx src = *srcp;
+ rtx src_size, dst_size;
+ int off = 0;
+ int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
+ if (src_align_bytes >= 0)
+ src_align_bytes = desired_align - src_align_bytes;
+ src_size = MEM_SIZE (src);
+ dst_size = MEM_SIZE (dst);
+ if (align_bytes & 1)
+ {
+ dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
+ src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
+ off = 1;
+ emit_insn (gen_strmov (destreg, dst, srcreg, src));
+ }
+ if (align_bytes & 2)
+ {
+ dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
+ src = adjust_automodify_address_nv (src, HImode, srcreg, off);
+ if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
+ set_mem_align (dst, 2 * BITS_PER_UNIT);
+ if (src_align_bytes >= 0
+ && (src_align_bytes & 1) == (align_bytes & 1)
+ && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
+ set_mem_align (src, 2 * BITS_PER_UNIT);
+ off = 2;
+ emit_insn (gen_strmov (destreg, dst, srcreg, src));
+ }
+ if (align_bytes & 4)
+ {
+ dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
+ src = adjust_automodify_address_nv (src, SImode, srcreg, off);
+ if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
+ set_mem_align (dst, 4 * BITS_PER_UNIT);
+ if (src_align_bytes >= 0)
+ {
+ unsigned int src_align = 0;
+ if ((src_align_bytes & 3) == (align_bytes & 3))
+ src_align = 4;
+ else if ((src_align_bytes & 1) == (align_bytes & 1))
+ src_align = 2;
+ if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
+ set_mem_align (src, src_align * BITS_PER_UNIT);
+ }
+ off = 4;
+ emit_insn (gen_strmov (destreg, dst, srcreg, src));
+ }
+ dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
+ src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
+ if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
+ set_mem_align (dst, desired_align * BITS_PER_UNIT);
+ if (src_align_bytes >= 0)
+ {
+ unsigned int src_align = 0;
+ if ((src_align_bytes & 7) == (align_bytes & 7))
+ src_align = 8;
+ else if ((src_align_bytes & 3) == (align_bytes & 3))
+ src_align = 4;
+ else if ((src_align_bytes & 1) == (align_bytes & 1))
+ src_align = 2;
+ if (src_align > (unsigned int) desired_align)
+ src_align = desired_align;
+ if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
+ set_mem_align (src, src_align * BITS_PER_UNIT);
+ }
+ if (dst_size)
+ set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
+ if (src_size)
+ set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
+ *srcp = src;
+ return dst;
+}
+
/* Set enough from DEST to align DEST known to by aligned by ALIGN to
DESIRED_ALIGNMENT. */
static void
gcc_assert (desired_alignment <= 8);
}
+/* Set enough from DST to align DST known to by aligned by ALIGN to
+ DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
+static rtx
+expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
+ int desired_align, int align_bytes)
+{
+ int off = 0;
+ rtx dst_size = MEM_SIZE (dst);
+ if (align_bytes & 1)
+ {
+ dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
+ off = 1;
+ emit_insn (gen_strset (destreg, dst,
+ gen_lowpart (QImode, value)));
+ }
+ if (align_bytes & 2)
+ {
+ dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
+ if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
+ set_mem_align (dst, 2 * BITS_PER_UNIT);
+ off = 2;
+ emit_insn (gen_strset (destreg, dst,
+ gen_lowpart (HImode, value)));
+ }
+ if (align_bytes & 4)
+ {
+ dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
+ if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
+ set_mem_align (dst, 4 * BITS_PER_UNIT);
+ off = 4;
+ emit_insn (gen_strset (destreg, dst,
+ gen_lowpart (SImode, value)));
+ }
+ dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
+ if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
+ set_mem_align (dst, desired_align * BITS_PER_UNIT);
+ if (dst_size)
+ set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
+ return dst;
+}
+
/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
static enum stringop_alg
decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
unsigned HOST_WIDE_INT count = 0;
HOST_WIDE_INT expected_size = -1;
int size_needed = 0, epilogue_size_needed;
- int desired_align = 0;
+ int desired_align = 0, align_bytes = 0;
enum stringop_alg alg;
int dynamic_check;
bool need_zero_guard = false;
if (CONST_INT_P (expected_align_exp)
&& INTVAL (expected_align_exp) > align)
align = INTVAL (expected_align_exp);
+ /* ALIGN is the minimum of destination and source alignment, but we care here
+ just about destination alignment. */
+ else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
+ align = MEM_ALIGN (dst) / BITS_PER_UNIT;
+
if (CONST_INT_P (count_exp))
count = expected_size = INTVAL (count_exp);
if (CONST_INT_P (expected_size_exp) && count == 0)
/* Alignment code needs count to be in register. */
if (CONST_INT_P (count_exp) && desired_align > align)
- count_exp = force_reg (counter_mode (count_exp), count_exp);
+ {
+ if (INTVAL (count_exp) > desired_align
+ && INTVAL (count_exp) > size_needed)
+ {
+ align_bytes
+ = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
+ if (align_bytes <= 0)
+ align_bytes = 0;
+ else
+ align_bytes = desired_align - align_bytes;
+ }
+ if (align_bytes == 0)
+ count_exp = force_reg (counter_mode (count_exp), count_exp);
+ }
gcc_assert (desired_align >= 1 && align >= 1);
/* Ensure that alignment prologue won't copy past end of block. */
Make sure it is power of 2. */
epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
- if (CONST_INT_P (count_exp))
+ if (count)
{
- if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
- goto epilogue;
+ if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
+ {
+ /* If main algorithm works on QImode, no epilogue is needed.
+ For small sizes just don't align anything. */
+ if (size_needed == 1)
+ desired_align = align;
+ else
+ goto epilogue;
+ }
}
else
{
if (desired_align > align)
{
- /* Except for the first move in epilogue, we no longer know
- constant offset in aliasing info. It don't seems to worth
- the pain to maintain it for the first move, so throw away
- the info early. */
- src = change_address (src, BLKmode, srcreg);
- dst = change_address (dst, BLKmode, destreg);
- expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
- desired_align);
- if (need_zero_guard && !count)
+ if (align_bytes == 0)
+ {
+ /* Except for the first move in epilogue, we no longer know
+ constant offset in aliasing info. It don't seems to worth
+ the pain to maintain it for the first move, so throw away
+ the info early. */
+ src = change_address (src, BLKmode, srcreg);
+ dst = change_address (dst, BLKmode, destreg);
+ expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
+ desired_align);
+ }
+ else
+ {
+ /* If we know how many bytes need to be stored before dst is
+ sufficiently aligned, maintain aliasing info accurately. */
+ dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
+ desired_align, align_bytes);
+ count_exp = plus_constant (count_exp, -align_bytes);
+ count -= align_bytes;
+ }
+ if (need_zero_guard
+ && (count < (unsigned HOST_WIDE_INT) size_needed
+ || (align_bytes == 0
+ && count < ((unsigned HOST_WIDE_INT) size_needed
+ + desired_align - align))))
{
/* It is possible that we copied enough so the main loop will not
execute. */
+ gcc_assert (size_needed > 1);
+ if (label == NULL_RTX)
+ label = gen_label_rtx ();
emit_cmp_and_jump_insns (count_exp,
GEN_INT (size_needed),
LTU, 0, counter_mode (count_exp), 1, label);
emit_label (label);
LABEL_NUSES (label) = 1;
label = NULL;
+ epilogue_size_needed = 1;
}
+ else if (label == NULL_RTX)
+ epilogue_size_needed = size_needed;
/* Step 3: Main loop. */
unsigned HOST_WIDE_INT count = 0;
HOST_WIDE_INT expected_size = -1;
int size_needed = 0, epilogue_size_needed;
- int desired_align = 0;
+ int desired_align = 0, align_bytes = 0;
enum stringop_alg alg;
rtx promoted_val = NULL;
bool force_loopy_epilogue = false;
/* Alignment code needs count to be in register. */
if (CONST_INT_P (count_exp) && desired_align > align)
{
- enum machine_mode mode = SImode;
- if (TARGET_64BIT && (count & ~0xffffffff))
- mode = DImode;
- count_exp = force_reg (mode, count_exp);
+ if (INTVAL (count_exp) > desired_align
+ && INTVAL (count_exp) > size_needed)
+ {
+ align_bytes
+ = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
+ if (align_bytes <= 0)
+ align_bytes = 0;
+ else
+ align_bytes = desired_align - align_bytes;
+ }
+ if (align_bytes == 0)
+ {
+ enum machine_mode mode = SImode;
+ if (TARGET_64BIT && (count & ~0xffffffff))
+ mode = DImode;
+ count_exp = force_reg (mode, count_exp);
+ }
}
/* Do the cheap promotion to allow better CSE across the
main loop and epilogue (ie one load of the big constant in the
if (size_needed > 1 || (desired_align > 1 && desired_align > align))
{
epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
- /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
+ /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
Make sure it is power of 2. */
epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
loop variant. */
if (epilogue_size_needed > 2 && !promoted_val)
force_loopy_epilogue = true;
- label = gen_label_rtx ();
- emit_cmp_and_jump_insns (count_exp,
- GEN_INT (epilogue_size_needed),
- LTU, 0, counter_mode (count_exp), 1, label);
- if (GET_CODE (count_exp) == CONST_INT)
- ;
- else if (expected_size == -1 || expected_size <= epilogue_size_needed)
- predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ if (count)
+ {
+ if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
+ {
+ /* If main algorithm works on QImode, no epilogue is needed.
+ For small sizes just don't align anything. */
+ if (size_needed == 1)
+ desired_align = align;
+ else
+ goto epilogue;
+ }
+ }
else
- predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (count_exp,
+ GEN_INT (epilogue_size_needed),
+ LTU, 0, counter_mode (count_exp), 1, label);
+ if (expected_size == -1 || expected_size <= epilogue_size_needed)
+ predict_jump (REG_BR_PROB_BASE * 60 / 100);
+ else
+ predict_jump (REG_BR_PROB_BASE * 20 / 100);
+ }
}
if (dynamic_check != -1)
{
if (desired_align > align)
{
- /* Except for the first move in epilogue, we no longer know
- constant offset in aliasing info. It don't seems to worth
- the pain to maintain it for the first move, so throw away
- the info early. */
- dst = change_address (dst, BLKmode, destreg);
- expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
- desired_align);
- if (need_zero_guard && !count)
+ if (align_bytes == 0)
+ {
+ /* Except for the first move in epilogue, we no longer know
+ constant offset in aliasing info. It don't seems to worth
+ the pain to maintain it for the first move, so throw away
+ the info early. */
+ dst = change_address (dst, BLKmode, destreg);
+ expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
+ desired_align);
+ }
+ else
+ {
+ /* If we know how many bytes need to be stored before dst is
+ sufficiently aligned, maintain aliasing info accurately. */
+ dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
+ desired_align, align_bytes);
+ count_exp = plus_constant (count_exp, -align_bytes);
+ count -= align_bytes;
+ }
+ if (need_zero_guard
+ && (count < (unsigned HOST_WIDE_INT) size_needed
+ || (align_bytes == 0
+ && count < ((unsigned HOST_WIDE_INT) size_needed
+ + desired_align - align))))
{
/* It is possible that we copied enough so the main loop will not
execute. */
+ gcc_assert (size_needed > 1);
+ if (label == NULL_RTX)
+ label = gen_label_rtx ();
emit_cmp_and_jump_insns (count_exp,
GEN_INT (size_needed),
LTU, 0, counter_mode (count_exp), 1, label);
emit_label (label);
LABEL_NUSES (label) = 1;
label = NULL;
+ promoted_val = val_exp;
+ epilogue_size_needed = 1;
}
+ else if (label == NULL_RTX)
+ epilogue_size_needed = size_needed;
/* Step 3: Main loop. */
break;
case rep_prefix_8_byte:
expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
- DImode);
+ DImode, val_exp);
break;
case rep_prefix_4_byte:
expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
- SImode);
+ SImode, val_exp);
break;
case rep_prefix_1_byte:
expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
- QImode);
+ QImode, val_exp);
break;
}
/* Adjust properly the offset of src and dest memory for aliasing. */
Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
bytes. Compensate if needed. */
- if (size_needed < desired_align - align)
+ if (size_needed < epilogue_size_needed)
{
tmp =
expand_simple_binop (counter_mode (count_exp), AND, count_exp,
GEN_INT (size_needed - 1), count_exp, 1,
OPTAB_DIRECT);
- size_needed = desired_align - align + 1;
if (tmp != count_exp)
emit_move_insn (count_exp, tmp);
}
emit_label (label);
LABEL_NUSES (label) = 1;
}
+ epilogue:
if (count_exp != const0_rtx && epilogue_size_needed > 1)
{
if (force_loopy_epilogue)
expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
- size_needed);
+ epilogue_size_needed);
else
expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
- size_needed);
+ epilogue_size_needed);
}
if (jump_around_label)
emit_label (jump_around_label);
void
ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
- rtx callarg2 ATTRIBUTE_UNUSED,
+ rtx callarg2,
rtx pop, int sibcall)
{
rtx use = NULL, call;
}
if (ix86_cmodel == CM_LARGE_PIC
- && GET_CODE (fnaddr) == MEM
+ && MEM_P (fnaddr)
&& GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
&& !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
}
+ if (TARGET_64BIT
+ && ix86_cfun_abi () == MS_ABI
+ && (!callarg2 || INTVAL (callarg2) != -2))
+ {
+ /* We need to represent that SI and DI registers are clobbered
+ by SYSV calls. */
+ static int clobbered_registers[] = {
+ XMM6_REG, XMM7_REG, XMM8_REG,
+ XMM9_REG, XMM10_REG, XMM11_REG,
+ XMM12_REG, XMM13_REG, XMM14_REG,
+ XMM15_REG, SI_REG, DI_REG
+ };
+ unsigned int i;
+ rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
+ rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_MS_TO_SYSV_CALL);
+
+ vec[0] = call;
+ vec[1] = unspec;
+ for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
+ vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ gen_rtx_REG
+ (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ clobbered_registers[i]));
+
+ call = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
+ + 2, vec));
+ }
call = emit_call_insn (call);
if (use)
f = GGC_CNEW (struct machine_function);
f->use_fast_prologue_epilogue_nregs = -1;
f->tls_descriptor_call_expanded_p = 0;
- f->call_abi = DEFAULT_ABI;
+ f->call_abi = ix86_abi;
return f;
}
/* Rule of thumb:
- esp as the base always wants an index,
- - ebp as the base always wants a displacement. */
+ - ebp as the base always wants a displacement,
+ - r12 as the base always wants an index,
+ - r13 as the base always wants a displacement. */
/* Register Indirect. */
if (base && !index && !disp)
{
/* esp (for its index) and ebp (for its displacement) need
- the two-byte modrm form. */
- if (addr == stack_pointer_rtx
- || addr == arg_pointer_rtx
- || addr == frame_pointer_rtx
- || addr == hard_frame_pointer_rtx)
+ the two-byte modrm form. Similarly for r12 and r13 in 64-bit
+ code. */
+ if (REG_P (addr)
+ && (addr == arg_pointer_rtx
+ || addr == frame_pointer_rtx
+ || REGNO (addr) == SP_REG
+ || REGNO (addr) == BP_REG
+ || REGNO (addr) == R12_REG
+ || REGNO (addr) == R13_REG))
len = 1;
}
else
len = 4;
}
- /* ebp always wants a displacement. */
- else if (base == hard_frame_pointer_rtx)
+ /* ebp always wants a displacement. Similarly r13. */
+ else if (REG_P (base)
+ && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
len = 1;
/* An index requires the two-byte modrm form.... */
if (index
- /* ...like esp, which always wants an index. */
- || base == stack_pointer_rtx
+ /* ...like esp (or r12), which always wants an index. */
|| base == arg_pointer_rtx
- || base == frame_pointer_rtx)
+ || base == frame_pointer_rtx
+ || (REG_P (base)
+ && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
len += 1;
}
if (get_attr_type (insn) == TYPE_LEA)
{
- rtx set = PATTERN (insn);
+ rtx set = PATTERN (insn), addr;
if (GET_CODE (set) == PARALLEL)
set = XVECEXP (set, 0, 0);
gcc_assert (GET_CODE (set) == SET);
- return memory_address_length (SET_SRC (set));
+ addr = SET_SRC (set);
+ if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
+ {
+ if (GET_CODE (addr) == ZERO_EXTEND)
+ addr = XEXP (addr, 0);
+ if (GET_CODE (addr) == SUBREG)
+ addr = SUBREG_REG (addr);
+ }
+
+ return memory_address_length (addr);
}
extract_insn_cached (insn);
switch (ix86_tune)
{
case PROCESSOR_PENTIUM:
+ case PROCESSOR_ATOM:
case PROCESSOR_K6:
return 2;
return 1;
}
-/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
- address with operands set by DEP_INSN. */
+/* Return true iff USE_INSN has a memory address with operands set by
+ SET_INSN. */
-static int
-ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
+bool
+ix86_agi_dependent (rtx set_insn, rtx use_insn)
{
- rtx addr;
-
- if (insn_type == TYPE_LEA
- && TARGET_PENTIUM)
- {
- addr = PATTERN (insn);
-
- if (GET_CODE (addr) == PARALLEL)
- addr = XVECEXP (addr, 0, 0);
-
- gcc_assert (GET_CODE (addr) == SET);
-
- addr = SET_SRC (addr);
- }
- else
- {
- int i;
- extract_insn_cached (insn);
- for (i = recog_data.n_operands - 1; i >= 0; --i)
- if (MEM_P (recog_data.operand[i]))
- {
- addr = XEXP (recog_data.operand[i], 0);
- goto found;
- }
- return 0;
- found:;
- }
-
- return modified_in_p (addr, dep_insn);
+ int i;
+ extract_insn_cached (use_insn);
+ for (i = recog_data.n_operands - 1; i >= 0; --i)
+ if (MEM_P (recog_data.operand[i]))
+ {
+ rtx addr = XEXP (recog_data.operand[i], 0);
+ return modified_in_p (addr, set_insn) != 0;
+ }
+ return false;
}
static int
{
case PROCESSOR_PENTIUM:
/* Address Generation Interlock adds a cycle of latency. */
- if (ix86_agi_dependent (insn, dep_insn, insn_type))
+ if (insn_type == TYPE_LEA)
+ {
+ rtx addr = PATTERN (insn);
+
+ if (GET_CODE (addr) == PARALLEL)
+ addr = XVECEXP (addr, 0, 0);
+
+ gcc_assert (GET_CODE (addr) == SET);
+
+ addr = SET_SRC (addr);
+ if (modified_in_p (addr, dep_insn))
+ cost += 1;
+ }
+ else if (ix86_agi_dependent (dep_insn, insn))
cost += 1;
/* ??? Compares pair with jump/setcc. */
/* Floating point stores require value to be ready one cycle earlier. */
if (insn_type == TYPE_FMOV
&& get_attr_memory (insn) == MEMORY_STORE
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (dep_insn, insn))
cost += 1;
break;
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (dep_insn, insn))
{
/* Claim moves to take one cycle, as core can issue one load
at time and the next load can start cycle later. */
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (dep_insn, insn))
{
/* Claim moves to take one cycle, as core can issue one load
at time and the next load can start cycle later. */
case PROCESSOR_ATHLON:
case PROCESSOR_K8:
case PROCESSOR_AMDFAM10:
+ case PROCESSOR_ATOM:
case PROCESSOR_GENERIC32:
case PROCESSOR_GENERIC64:
memory = get_attr_memory (insn);
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependent (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (dep_insn, insn))
{
enum attr_unit unit = get_attr_unit (insn);
int loadcost = 3;
return align;
}
-/* Compute the alignment for a local variable or a stack slot. TYPE is
- the data type, MODE is the widest mode available and ALIGN is the
- alignment that the object would ordinarily have. The value of this
- macro is used instead of that alignment to align the object. */
+/* Compute the alignment for a local variable or a stack slot. EXP is
+ the data type or decl itself, MODE is the widest mode available and
+ ALIGN is the alignment that the object would ordinarily have. The
+ value of this macro is used instead of that alignment to align the
+ object. */
unsigned int
-ix86_local_alignment (tree type, enum machine_mode mode,
+ix86_local_alignment (tree exp, enum machine_mode mode,
unsigned int align)
{
+ tree type, decl;
+
+ if (exp && DECL_P (exp))
+ {
+ type = TREE_TYPE (exp);
+ decl = exp;
+ }
+ else
+ {
+ type = exp;
+ decl = NULL;
+ }
+
+ /* Don't do dynamic stack realignment for long long objects with
+ -mpreferred-stack-boundary=2. */
+ if (!TARGET_64BIT
+ && align == 64
+ && ix86_preferred_stack_boundary < 64
+ && (mode == DImode || (type && TYPE_MODE (type) == DImode))
+ && (!type || !TYPE_USER_ALIGN (type))
+ && (!decl || !DECL_USER_ALIGN (decl)))
+ align = 32;
+
/* If TYPE is NULL, we are allocating a stack slot for caller-save
register in MODE. We will return the largest alignment of XF
and DF. */
IX86_BUILTIN_VPERMILPS,
IX86_BUILTIN_VPERMILPD256,
IX86_BUILTIN_VPERMILPS256,
- IX86_BUILTIN_VPERMIL2PD,
- IX86_BUILTIN_VPERMIL2PS,
- IX86_BUILTIN_VPERMIL2PD256,
- IX86_BUILTIN_VPERMIL2PS256,
IX86_BUILTIN_VPERM2F128PD256,
IX86_BUILTIN_VPERM2F128PS256,
IX86_BUILTIN_VPERM2F128SI256,
IX86_BUILTIN_STOREUPD256,
IX86_BUILTIN_STOREUPS256,
IX86_BUILTIN_LDDQU256,
+ IX86_BUILTIN_MOVNTDQ256,
+ IX86_BUILTIN_MOVNTPD256,
+ IX86_BUILTIN_MOVNTPS256,
IX86_BUILTIN_LOADDQU256,
IX86_BUILTIN_STOREDQU256,
IX86_BUILTIN_MASKLOADPD,
/* TFmode support builtins. */
IX86_BUILTIN_INFQ,
+ IX86_BUILTIN_HUGE_VALQ,
IX86_BUILTIN_FABSQ,
IX86_BUILTIN_COPYSIGNQ,
/* Table of all of the builtin functions that are possible with different ISA's
but are waiting to be built until a function is declared to use that
ISA. */
-struct builtin_isa GTY(())
-{
+struct GTY(()) builtin_isa {
tree type; /* builtin type to use in the declaration */
const char *name; /* function name */
int isa; /* isa_flags this builtin is defined for */
V2DF_FTYPE_PCV2DF_V2DF,
V2DI_FTYPE_PV2DI,
VOID_FTYPE_PV2SF_V4SF,
+ VOID_FTYPE_PV4DI_V4DI,
VOID_FTYPE_PV2DI_V2DI,
VOID_FTYPE_PCHAR_V32QI,
VOID_FTYPE_PCHAR_V16QI,
V2DI2TI_FTYPE_V2DI_V2DI_INT,
V1DI2DI_FTYPE_V1DI_V1DI_INT,
V2DF_FTYPE_V2DF_V2DF_INT,
- V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
- V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
- V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
- V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
V2DI_FTYPE_V2DI_UINT_UINT,
V2DI_FTYPE_V2DI_V2DI_UINT_UINT
};
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
static const struct builtin_description bdesc_multi_arg[] =
{
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
-
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
-
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
+
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
};
/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
V4DF_type_node, V4DF_type_node,
integer_type_node,
NULL_TREE);
- tree v8sf_ftype_v8sf_v8sf_v8si_int
- = build_function_type_list (V8SF_type_node,
- V8SF_type_node, V8SF_type_node,
- V8SI_type_node, integer_type_node,
- NULL_TREE);
- tree v4df_ftype_v4df_v4df_v4di_int
- = build_function_type_list (V4DF_type_node,
- V4DF_type_node, V4DF_type_node,
- V4DI_type_node, integer_type_node,
- NULL_TREE);
- tree v4sf_ftype_v4sf_v4sf_v4si_int
- = build_function_type_list (V4SF_type_node,
- V4SF_type_node, V4SF_type_node,
- V4SI_type_node, integer_type_node,
- NULL_TREE);
- tree v2df_ftype_v2df_v2df_v2di_int
- = build_function_type_list (V2DF_type_node,
- V2DF_type_node, V2DF_type_node,
- V2DI_type_node, integer_type_node,
- NULL_TREE);
tree v8sf_ftype_pcfloat
= build_function_type_list (V8SF_type_node,
pcfloat_type_node,
V8SI_type_node, V4SI_type_node,
integer_type_node,
NULL_TREE);
+ tree pv4di_type_node = build_pointer_type (V4DI_type_node);
+ tree void_ftype_pv4di_v4di
+ = build_function_type_list (void_type_node,
+ pv4di_type_node, V4DI_type_node,
+ NULL_TREE);
tree v8sf_ftype_v8sf_v4sf_int
= build_function_type_list (V8SF_type_node,
V8SF_type_node, V4SF_type_node,
case VOID_FTYPE_PV2SF_V4SF:
type = void_ftype_pv2sf_v4sf;
break;
+ case VOID_FTYPE_PV4DI_V4DI:
+ type = void_ftype_pv4di_v4di;
+ break;
case VOID_FTYPE_PV2DI_V2DI:
type = void_ftype_pv2di_v2di;
break;
case V1DI2DI_FTYPE_V1DI_V1DI_INT:
type = v1di_ftype_v1di_v1di_int;
break;
- case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
- type = v8sf_ftype_v8sf_v8sf_v8si_int;
- break;
- case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
- type = v4df_ftype_v4df_v4df_v4di_int;
- break;
- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
- type = v4sf_ftype_v4sf_v4sf_v4si_int;
- break;
- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
- type = v2df_ftype_v2df_v2df_v2di_int;
- break;
default:
gcc_unreachable ();
}
NULL, NULL_TREE);
ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
+ decl = add_builtin_function ("__builtin_huge_valq", ftype,
+ IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
+ NULL, NULL_TREE);
+ ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
+
/* We will expand them to normal call if SSE2 isn't available since
they are used by libgcc. */
ftype = build_function_type_list (float128_type_node,
static rtx
ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
enum multi_arg_type m_type,
- enum insn_code sub_code)
+ enum rtx_code sub_code)
{
rtx pat;
int i;
if (last_arg_constant && i == nargs-1)
{
- if (GET_CODE (op) != CONST_INT)
+ if (!CONST_INT_P (op))
{
error ("last argument must be an immediate");
return gen_reg_rtx (tmode);
nargs = 3;
nargs_constant = 2;
break;
- case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
- case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
- nargs = 4;
- nargs_constant = 1;
- break;
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
nargs = 4;
nargs_constant = 2;
case CODE_FOR_sse4_1_blendpd:
case CODE_FOR_avx_vpermilv2df:
- case CODE_FOR_avx_vpermil2v2df3:
- case CODE_FOR_avx_vpermil2v4sf3:
- case CODE_FOR_avx_vpermil2v4df3:
- case CODE_FOR_avx_vpermil2v8sf3:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;
memory = 0;
break;
case VOID_FTYPE_PV2SF_V4SF:
+ case VOID_FTYPE_PV4DI_V4DI:
case VOID_FTYPE_PV2DI_V2DI:
case VOID_FTYPE_PCHAR_V32QI:
case VOID_FTYPE_PCHAR_V16QI:
return ix86_expand_vec_set_builtin (exp);
case IX86_BUILTIN_INFQ:
+ case IX86_BUILTIN_HUGE_VALQ:
{
REAL_VALUE_TYPE inf;
rtx tmp;
/* Dispatch to a handler for a vectorization library. */
if (ix86_veclib_handler)
- return (*ix86_veclib_handler)(fn, type_out, type_in);
+ return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
+ type_in);
return NULL_TREE;
}
static tree
ix86_vectorize_builtin_conversion (unsigned int code, tree type)
{
- if (TREE_CODE (type) != VECTOR_TYPE)
+ if (TREE_CODE (type) != VECTOR_TYPE
+ /* There are only conversions from/to signed integers. */
+ || TYPE_UNSIGNED (TREE_TYPE (type)))
return NULL_TREE;
switch (code)
{
/* Take care for QImode values - they can be in non-QI regs,
but then they do cause partial register stalls. */
- if (regno < 4 || TARGET_64BIT)
+ if (regno <= BX_REG || TARGET_64BIT)
return 1;
if (!TARGET_PARTIAL_REG_STALL)
return 1;
&& TREE_CODE (*node) != FIELD_DECL
&& TREE_CODE (*node) != TYPE_DECL)
{
- warning (OPT_Wattributes, "%qs attribute only applies to functions",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute only applies to functions",
+ name);
*no_add_attrs = true;
return NULL_TREE;
}
if (!TARGET_64BIT)
{
- warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
+ name);
*no_add_attrs = true;
return NULL_TREE;
}
if (!(type && (TREE_CODE (*type) == RECORD_TYPE
|| TREE_CODE (*type) == UNION_TYPE)))
{
- warning (OPT_Wattributes, "%qs attribute ignored",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE attribute ignored",
+ name);
*no_add_attrs = true;
}
|| ((is_attribute_p ("gcc_struct", name)
&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
{
- warning (OPT_Wattributes, "%qs incompatible attribute ignored",
- IDENTIFIER_POINTER (name));
+ warning (OPT_Wattributes, "%qE incompatible attribute ignored",
+ name);
*no_add_attrs = true;
}
}
}
+#ifdef ASM_OUTPUT_MAX_SKIP_PAD
/* We don't have exact information about the insn sizes, but we may assume
quite safely that we are informed about all 1 byte insns and memory
address sizes. This is enough to eliminate unnecessary padding in
if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
&& XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
return 0;
- if (JUMP_P (insn)
- && (GET_CODE (PATTERN (insn)) == ADDR_VEC
- || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
+ if (JUMP_TABLE_DATA_P(insn))
return 0;
/* Important case - calls are always 5 bytes.
window. */
static void
-ix86_avoid_jump_misspredicts (void)
+ix86_avoid_jump_mispredicts (void)
{
rtx insn, start = get_insns ();
int nbytes = 0, njumps = 0;
The smallest offset in the page INSN can start is the case where START
ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
- We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
+ We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
*/
- for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ for (insn = start; insn; insn = NEXT_INSN (insn))
{
+ int min_size;
+
+ if (LABEL_P (insn))
+ {
+ int align = label_to_alignment (insn);
+ int max_skip = label_to_max_skip (insn);
+
+ if (max_skip > 15)
+ max_skip = 15;
+ /* If align > 3, only up to 16 - max_skip - 1 bytes can be
+ already in the current 16 byte page, because otherwise
+ ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
+ bytes to reach 16 byte boundary. */
+ if (align <= 0
+ || (align <= 3 && max_skip != (1 << align) - 1))
+ max_skip = 0;
+ if (dump_file)
+ fprintf (dump_file, "Label %i with max_skip %i\n",
+ INSN_UID (insn), max_skip);
+ if (max_skip)
+ {
+ while (nbytes + max_skip >= 16)
+ {
+ start = NEXT_INSN (start);
+ if ((JUMP_P (start)
+ && GET_CODE (PATTERN (start)) != ADDR_VEC
+ && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
+ || CALL_P (start))
+ njumps--, isjump = 1;
+ else
+ isjump = 0;
+ nbytes -= min_insn_size (start);
+ }
+ }
+ continue;
+ }
- nbytes += min_insn_size (insn);
+ min_size = min_insn_size (insn);
+ nbytes += min_size;
if (dump_file)
- fprintf(dump_file, "Insn %i estimated to %i bytes\n",
- INSN_UID (insn), min_insn_size (insn));
+ fprintf (dump_file, "Insn %i estimated to %i bytes\n",
+ INSN_UID (insn), min_size);
if ((JUMP_P (insn)
&& GET_CODE (PATTERN (insn)) != ADDR_VEC
&& GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
gcc_assert (njumps >= 0);
if (dump_file)
fprintf (dump_file, "Interval %i to %i has %i bytes\n",
- INSN_UID (start), INSN_UID (insn), nbytes);
+ INSN_UID (start), INSN_UID (insn), nbytes);
if (njumps == 3 && isjump && nbytes < 16)
{
if (dump_file)
fprintf (dump_file, "Padding insn %i by %i bytes!\n",
INSN_UID (insn), padsize);
- emit_insn_before (gen_align (GEN_INT (padsize)), insn);
+ emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
}
}
}
+#endif
/* AMD Athlon works faster
when RET is not destination of conditional jump or directly preceded
static void
ix86_reorg (void)
{
- if (TARGET_PAD_RETURNS && optimize
- && optimize_function_for_speed_p (cfun))
- ix86_pad_returns ();
- if (TARGET_FOUR_JUMP_LIMIT && optimize
- && optimize_function_for_speed_p (cfun))
- ix86_avoid_jump_misspredicts ();
+ if (optimize && optimize_function_for_speed_p (cfun))
+ {
+ if (TARGET_PAD_RETURNS)
+ ix86_pad_returns ();
+#ifdef ASM_OUTPUT_MAX_SKIP_PAD
+ if (TARGET_FOUR_JUMP_LIMIT)
+ ix86_avoid_jump_mispredicts ();
+#endif
+ }
}
/* Return nonzero when QImode register that must be represented via REX prefix
extract_insn_cached (insn);
for (i = 0; i < recog_data.n_operands; i++)
if (REG_P (recog_data.operand[i])
- && REGNO (recog_data.operand[i]) >= 4)
+ && REGNO (recog_data.operand[i]) > BX_REG)
return true;
return false;
}
case V16HImode:
case V8SImode:
case V8SFmode:
- case V4DImode:
case V4DFmode:
use_vector_set = TARGET_AVX;
break;
+ case V4DImode:
+ /* Use ix86_expand_vector_set in 64bit mode only. */
+ use_vector_set = TARGET_AVX && TARGET_64BIT;
+ break;
default:
break;
}
the general case. */
return false;
- case V4DFmode:
case V4DImode:
+ /* Use ix86_expand_vector_set in 64bit mode only. */
+ if (!TARGET_64BIT)
+ return false;
+ case V4DFmode:
case V8SFmode:
case V8SImode:
case V16HImode:
return "fstp\t%y0";
}
if (STACK_TOP_P (operands[0]))
- return "fld%z1\t%y1";
+ return "fld%Z1\t%y1";
return "fst\t%y0";
}
else if (MEM_P (operands[0]))
{
gcc_assert (REG_P (operands[1]));
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
- return "fstp%z0\t%y0";
+ return "fstp%Z0\t%y0";
else
{
/* There is no non-popping store to memory for XFmode.
So if we need one, follow the store with a load. */
if (GET_MODE (operands[0]) == XFmode)
- return "fstp%z0\t%y0\n\tfld%z0\t%y0";
+ return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
else
- return "fst%z0\t%y0";
+ return "fst%Z0\t%y0";
}
}
else
rtx tmp = gen_reg_rtx (XFmode);
rtx tmp2 = gen_reg_rtx (XFmode);
+ rtx test;
emit_insn (gen_absxf2 (tmp, op1));
- emit_insn (gen_cmpxf (tmp,
+ test = gen_rtx_GE (VOIDmode, tmp,
CONST_DOUBLE_FROM_REAL_VALUE (
REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
- XFmode)));
- emit_jump_insn (gen_bge (label1));
+ XFmode));
+ emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
tree
ix86_fn_abi_va_list (tree fndecl)
{
- int abi;
-
if (!TARGET_64BIT)
return va_list_type_node;
gcc_assert (fndecl != NULL_TREE);
- abi = ix86_function_abi ((const_tree) fndecl);
- if (abi == MS_ABI)
+ if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
return ms_va_list_type_node;
else
return sysv_va_list_type_node;
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
+
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
+#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
+#define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
+
#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
#undef TARGET_OPTION_CAN_INLINE_P
#define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
+#undef TARGET_EXPAND_TO_RTL_HOOK
+#define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
#include "gt-i386.h"