/* Subroutines used for code generation on IA-32.
- Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
- 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Free Software Foundation, Inc.
This file is part of GCC.
#include "target-def.h"
#include "common/common-target.h"
#include "langhooks.h"
+#include "reload.h"
#include "cgraph.h"
#include "gimple.h"
#include "dwarf2.h"
/* Feature tests against the various architecture variations, used to create
ix86_arch_features based on the processor mask. */
static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
- /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
+ /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
~(m_386 | m_486 | m_PENT | m_K6),
/* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
/* Which instruction set architecture to use. */
enum processor_type ix86_arch;
-/* true if sse prefetch instruction is not NOOP. */
+/* True if processor has SSE prefetch instruction. */
int x86_prefetch_sse;
+/* True if processor has prefetchw instruction. */
+int x86_prefetchw;
+
/* -mstackrealign option */
static const char ix86_force_align_arg_pointer_string[]
= "force_align_arg_pointer";
#define PTA_XOP (HOST_WIDE_INT_1 << 29)
#define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
#define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
+#define PTA_PREFETCHW (HOST_WIDE_INT_1 << 32)
+
/* if this reaches 64, need to widen struct pta flags below */
static struct pta
| PTA_SSSE3 | PTA_CX16},
{"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_POPCNT},
{"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
- | PTA_FMA | PTA_MOVBE},
+ | PTA_FMA | PTA_MOVBE},
{"atom", PROCESSOR_ATOM, CPU_ATOM,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
{"geode", PROCESSOR_GEODE, CPU_GEODE,
- PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
{"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
{"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
{"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
| PTA_SSE2 | PTA_NO_SAHF},
{"opteron-sse3", PROCESSOR_K8, CPU_K8,
- PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
{"athlon64", PROCESSOR_K8, CPU_K8,
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
{"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
- PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
- | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
- | PTA_XOP | PTA_LWP},
+ PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+ | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
+ | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+ | PTA_FMA4 | PTA_XOP | PTA_LWP},
{"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
- PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
- | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
- | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
+ PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+ | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
+ | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+ | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
| PTA_FMA},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
- PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
+ PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+ | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16},
{"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
0 /* flags are only used for -march switch. */ },
{"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
"large", "32");
else if (TARGET_X32)
error ("code model %qs not supported in x32 mode",
- "medium");
+ "large");
break;
case CM_32:
ix86_isa_flags |= OPTION_MASK_ISA_F16C;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
x86_prefetch_sse = true;
+ if (processor_alias_table[i].flags & PTA_PREFETCHW)
+ x86_prefetchw = true;
break;
}
-mtune (rather than -march) points us to a processor that has them.
However, the VIA C3 gives a SIGILL, so we only do that for i686 and
higher processors. */
- if (TARGET_CMOVE
+ if (TARGET_CMOV
&& (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
x86_prefetch_sse = true;
break;
in case they weren't overwritten by command line options. */
if (TARGET_64BIT)
{
- if (optimize > 1 && !global_options_set.x_flag_zee)
- flag_zee = 1;
if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
if (flag_asynchronous_unwind_tables == 2)
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
}
- /* For sane SSE instruction set generation we need fcomi instruction.
- It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
- expands to a sequence that includes conditional move. */
- if (TARGET_SSE || TARGET_RDRND)
- TARGET_CMOVE = 1;
-
/* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
{
char *p;
{
/* The return value of this function uses 256bit AVX modes. */
if (caller)
- cfun->machine->callee_return_avx256_p = true;
+ {
+ cfun->machine->callee_return_avx256_p = true;
+ cum->callee_return_avx256_p = true;
+ }
else
cfun->machine->caller_return_avx256_p = true;
}
/* Likewise, error if the ABI requires us to return values in the
x87 registers and the user specified -mno-80387. */
- if (!TARGET_80387 && in_return)
+ if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
for (i = 0; i < n; i++)
if (regclass[i] == X86_64_X87_CLASS
|| regclass[i] == X86_64_X87UP_CLASS
{
/* This argument uses 256bit AVX modes. */
if (cum->caller)
- cfun->machine->callee_pass_avx256_p = true;
+ cum->callee_pass_avx256_p = true;
else
cfun->machine->caller_pass_avx256_p = true;
}
+ if (cum->caller && mode == VOIDmode)
+ {
+ /* This function is called with MODE == VOIDmode immediately
+ before the call instruction is emitted. We copy callee 256bit
+ AVX info from the current CUM here. */
+ cfun->machine->callee_return_avx256_p = cum->callee_return_avx256_p;
+ cfun->machine->callee_pass_avx256_p = cum->callee_pass_avx256_p;
+ }
+
return arg;
}
switch (regno)
{
case AX_REG:
+ case DX_REG:
return true;
+ case DI_REG:
+ case SI_REG:
+ return TARGET_64BIT && ix86_abi != MS_ABI;
- case FIRST_FLOAT_REG:
+ /* Complex values are returned in %st(0)/%st(1) pair. */
+ case ST0_REG:
+ case ST1_REG:
/* TODO: The function should depend on current function ABI but
builtins.c would need updating then. Therefore we use the
default ABI. */
return false;
return TARGET_FLOAT_RETURNS_IN_80387;
- case FIRST_SSE_REG:
+ /* Complex values are returned in %xmm0/%xmm1 pair. */
+ case XMM0_REG:
+ case XMM1_REG:
return TARGET_SSE;
- case FIRST_MMX_REG:
+ case MM0_REG:
if (TARGET_MACHO || TARGET_64BIT)
return false;
return TARGET_MMX;
if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
return true;
+ /* Win64 SEH, very large frames need a frame-pointer as maximum stack
+ allocation is 4GB. */
+ if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
+ return true;
+
/* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
turns off the frame pointer by default. Turn it back on now if
we've not got a leaf function. */
if (!flag_pic)
{
- xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
+ if (TARGET_MACHO)
+ /* We don't need a pic base, we're not producing pic. */
+ gcc_unreachable ();
+ xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
-
-#if TARGET_MACHO
- /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
- is what will be referenced by the Mach-O PIC subsystem. */
- if (!label)
- ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
-#endif
-
targetm.asm_out.internal_label (asm_out_file, "L",
CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
}
xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
xops[2] = gen_rtx_MEM (QImode, xops[2]);
output_asm_insn ("call\t%X2", xops);
- /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
- is what will be referenced by the Mach-O PIC subsystem. */
+
#if TARGET_MACHO
- if (!label)
+ /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
+ This is what will be referenced by the Mach-O PIC subsystem. */
+ if (machopic_should_output_picbase_label () || !label)
ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
- else
+
+ /* When we are restoring the pic base at the site of a nonlocal label,
+ and we decided to emit the pic base above, we will still output a
+ local label used for calculating the correction offset (even though
+ the offset will be 0 in that case). */
+ if (label)
targetm.asm_out.internal_label (asm_out_file, "L",
CODE_LABEL_NUMBER (label));
#endif
&& (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
|| crtl->profile
|| crtl->calls_eh_return
- || crtl->uses_const_pool))
+ || crtl->uses_const_pool
+ || cfun->has_nonlocal_label))
return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
if (crtl->calls_eh_return && maybe_eh_return)
offset += frame->nregs * UNITS_PER_WORD;
frame->reg_save_offset = offset;
+ /* On SEH target, registers are pushed just before the frame pointer
+ location. */
+ if (TARGET_SEH)
+ frame->hard_frame_pointer_offset = offset;
+
/* Align and set SSE register save area. */
if (frame->nsseregs)
{
{
HOST_WIDE_INT diff;
- /* If we can leave the frame pointer where it is, do so. */
+ /* If we can leave the frame pointer where it is, do so. Also, returns
+ the establisher frame for __builtin_frame_address (0). */
diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
- if (diff > 240 || (diff & 15) != 0)
+ if (diff <= SEH_MAX_FRAME_SIZE
+ && (diff > 240 || (diff & 15) != 0)
+ && !crtl->accesses_prior_frames)
{
/* Ideally we'd determine what portion of the local stack frame
(within the constraint of the lowest 240) is most heavily used.
tree decl = current_function_decl, fntype = TREE_TYPE (decl);
bool fastcall_p
= lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
+ bool thiscall_p
+ = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
bool static_chain_p = DECL_STATIC_CHAIN (decl);
int regparm = ix86_function_regparm (fntype, decl);
int drap_regno
if ((regparm < 1 || (fastcall_p && !static_chain_p))
&& drap_regno != AX_REG)
regno = AX_REG;
- else if (regparm < 2 && drap_regno != DX_REG)
+ /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
+ for the static chain register. */
+ else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
+ regno = AX_REG;
+ else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
regno = DX_REG;
/* ecx is the static chain register. */
- else if (regparm < 3 && !fastcall_p && !static_chain_p
+ else if (regparm < 3 && !fastcall_p && !thiscall_p
+ && !static_chain_p
&& drap_regno != CX_REG)
regno = CX_REG;
else if (ix86_save_reg (BX_REG, true))
{
if (sr->saved)
{
+ struct machine_function *m = cfun->machine;
rtx x, insn = emit_insn (gen_pop (sr->reg));
/* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
+ m->fs.sp_offset -= UNITS_PER_WORD;
}
}
struct ix86_frame frame;
HOST_WIDE_INT allocate;
bool int_registers_saved;
+ bool sse_registers_saved;
ix86_finalize_stack_realign_flags ();
m->fs.realigned = true;
}
+ int_registers_saved = (frame.nregs == 0);
+ sse_registers_saved = (frame.nsseregs == 0);
+
if (frame_pointer_needed && !m->fs.fp_valid)
{
/* Note: AT&T enter does NOT have reversed args. Enter is probably
insn = emit_insn (gen_push (hard_frame_pointer_rtx));
RTX_FRAME_RELATED_P (insn) = 1;
+ /* Push registers now, before setting the frame pointer
+ on SEH target. */
+ if (!int_registers_saved
+ && TARGET_SEH
+ && !frame.save_regs_using_mov)
+ {
+ ix86_emit_save_regs ();
+ int_registers_saved = true;
+ gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
+ }
+
if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
{
insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
}
}
- int_registers_saved = (frame.nregs == 0);
-
if (!int_registers_saved)
{
/* If saving registers via PUSH, do so now. */
current_function_static_stack_size = stack_size;
}
+ /* On SEH target with very large frame size, allocate an area to save
+ SSE registers (as the very large allocation won't be described). */
+ if (TARGET_SEH
+ && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
+ && !sse_registers_saved)
+ {
+ HOST_WIDE_INT sse_size =
+ frame.sse_reg_save_offset - frame.reg_save_offset;
+
+ gcc_assert (int_registers_saved);
+
+ /* No need to do stack checking as the area will be immediately
+ written. */
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-sse_size), -1,
+ m->fs.cfa_reg == stack_pointer_rtx);
+ allocate -= sse_size;
+ ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
+ sse_registers_saved = true;
+ }
+
/* The stack has already been decremented by the instruction calling us
so probe if the size is non-negative to preserve the protection area. */
if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
rtx eax = gen_rtx_REG (Pmode, AX_REG);
rtx r10 = NULL;
rtx (*adjust_stack_insn)(rtx, rtx, rtx);
-
+ const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
bool eax_live = false;
bool r10_live = false;
if (!TARGET_64BIT_MS_ABI)
eax_live = ix86_eax_live_at_start_p ();
+ /* Note that SEH directives need to continue tracking the stack
+ pointer even after the frame pointer has been set up. */
if (eax_live)
{
- emit_insn (gen_push (eax));
+ insn = emit_insn (gen_push (eax));
allocate -= UNITS_PER_WORD;
+ if (sp_is_cfa_reg || TARGET_SEH)
+ {
+ if (sp_is_cfa_reg)
+ m->fs.cfa_offset += UNITS_PER_WORD;
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
}
+
if (r10_live)
{
r10 = gen_rtx_REG (Pmode, R10_REG);
- emit_insn (gen_push (r10));
+ insn = emit_insn (gen_push (r10));
allocate -= UNITS_PER_WORD;
+ if (sp_is_cfa_reg || TARGET_SEH)
+ {
+ if (sp_is_cfa_reg)
+ m->fs.cfa_offset += UNITS_PER_WORD;
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
}
emit_move_insn (eax, GEN_INT (allocate));
insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
stack_pointer_rtx, eax));
- /* Note that SEH directives need to continue tracking the stack
- pointer even after the frame pointer has been set up. */
- if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
+ if (sp_is_cfa_reg || TARGET_SEH)
{
- if (m->fs.cfa_reg == stack_pointer_rtx)
+ if (sp_is_cfa_reg)
m->fs.cfa_offset += allocate;
-
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR,
gen_rtx_SET (VOIDmode, stack_pointer_rtx,
if (r10_live && eax_live)
{
- t = choose_baseaddr (m->fs.sp_offset - allocate);
+ t = plus_constant (stack_pointer_rtx, allocate);
emit_move_insn (r10, gen_frame_mem (Pmode, t));
- t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
+ t = plus_constant (stack_pointer_rtx,
+ allocate - UNITS_PER_WORD);
emit_move_insn (eax, gen_frame_mem (Pmode, t));
}
else if (eax_live || r10_live)
{
- t = choose_baseaddr (m->fs.sp_offset - allocate);
+ t = plus_constant (stack_pointer_rtx, allocate);
emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
}
}
if (!int_registers_saved)
ix86_emit_save_regs_using_mov (frame.reg_save_offset);
- if (frame.nsseregs)
+ if (!sse_registers_saved)
ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
pic_reg_used = false;
add_reg_note (insn, REG_CFA_DEF_CFA,
plus_constant (stack_pointer_rtx, m->fs.sp_offset));
RTX_FRAME_RELATED_P (insn) = 1;
- ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
- m->fs.fp_offset);
}
+ ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
+ m->fs.fp_offset);
}
/* Emit code to restore saved registers using MOV insns.
}
/* First step is to deallocate the stack frame so that we can
- pop the registers. */
- if (!m->fs.sp_valid)
+ pop the registers. Also do it on SEH target for very large
+ frame as the emitted instructions aren't allowed by the ABI in
+ epilogues. */
+ if (!m->fs.sp_valid
+ || (TARGET_SEH
+ && (m->fs.sp_offset - frame.reg_save_offset
+ >= SEH_MAX_FRAME_SIZE)))
{
pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
GEN_INT (m->fs.fp_offset
return R11_REG;
else
{
- bool is_fastcall;
+ bool is_fastcall, is_thiscall;
int regparm;
is_fastcall = (lookup_attribute ("fastcall",
TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
!= NULL);
+ is_thiscall = (lookup_attribute ("thiscall",
+ TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
+ != NULL);
regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
if (is_fastcall)
}
return AX_REG;
}
+ else if (is_thiscall)
+ {
+ if (!DECL_STATIC_CHAIN (cfun->decl))
+ return DX_REG;
+ return AX_REG;
+ }
else if (regparm < 3)
{
if (!DECL_STATIC_CHAIN (cfun->decl))
}
}
\f
-/* Determine if op is suitable SUBREG RTX for address. */
-
-static bool
-ix86_address_subreg_operand (rtx op)
-{
- enum machine_mode mode;
-
- if (!REG_P (op))
- return false;
-
- mode = GET_MODE (op);
-
- if (GET_MODE_CLASS (mode) != MODE_INT)
- return false;
-
- /* Don't allow SUBREGs that span more than a word. It can lead to spill
- failures when the register is one word out of a two word structure. */
- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
- return false;
-
- /* Allow only SUBREGs of non-eliminable hard registers. */
- return register_no_elim_operand (op, mode);
-}
-
/* Extract the parts of an RTL expression that is a valid memory address
for an instruction. Return 0 if the structure of the address is
grossly off. Return -1 if the address contains ASHIFT, so it is not
{
if (GET_CODE (addr) == ZERO_EXTEND
&& GET_MODE (XEXP (addr, 0)) == SImode)
- addr = XEXP (addr, 0);
+ {
+ addr = XEXP (addr, 0);
+ if (CONST_INT_P (addr))
+ return 0;
+ }
else if (GET_CODE (addr) == AND
&& const_32bit_mask (XEXP (addr, 1), DImode))
{
addr = XEXP (addr, 0);
- /* Strip subreg. */
+ /* Adjust SUBREGs. */
if (GET_CODE (addr) == SUBREG
&& GET_MODE (SUBREG_REG (addr)) == SImode)
- addr = SUBREG_REG (addr);
+ {
+ addr = SUBREG_REG (addr);
+ if (CONST_INT_P (addr))
+ return 0;
+ }
+ else if (GET_MODE (addr) == DImode)
+ addr = gen_rtx_SUBREG (SImode, addr, 0);
+ else if (GET_MODE (addr) != VOIDmode)
+ return 0;
+ }
+ }
+
+ /* Allow SImode subregs of DImode addresses,
+ they will be emitted with addr32 prefix. */
+ if (TARGET_64BIT && GET_MODE (addr) == SImode)
+ {
+ if (GET_CODE (addr) == SUBREG
+ && GET_MODE (SUBREG_REG (addr)) == DImode)
+ {
+ addr = SUBREG_REG (addr);
+ if (CONST_INT_P (addr))
+ return 0;
}
}
base = addr;
else if (GET_CODE (addr) == SUBREG)
{
- if (ix86_address_subreg_operand (SUBREG_REG (addr)))
+ if (REG_P (SUBREG_REG (addr)))
base = addr;
else
return 0;
break;
case SUBREG:
- if (!ix86_address_subreg_operand (SUBREG_REG (op)))
+ if (!REG_P (SUBREG_REG (op)))
return 0;
/* FALLTHRU */
if (REG_P (index))
;
else if (GET_CODE (index) == SUBREG
- && ix86_address_subreg_operand (SUBREG_REG (index)))
+ && REG_P (SUBREG_REG (index)))
;
else
return 0;
break;
if (GET_CODE (op0) == LABEL_REF)
return true;
+ if (GET_CODE (op0) == CONST
+ && GET_CODE (XEXP (op0, 0)) == UNSPEC
+ && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
+ return true;
+ if (GET_CODE (op0) == UNSPEC
+ && XINT (op0, 1) == UNSPEC_PCREL)
+ return true;
if (GET_CODE (op0) != SYMBOL_REF)
break;
/* FALLTHRU */
return false;
}
+/* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
+ replace the input X, or the original X if no replacement is called for.
+ The output parameter *WIN is 1 if the calling macro should goto WIN,
+ 0 if it should not. */
+
+bool
+ix86_legitimize_reload_address (rtx x,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ int opnum, int type,
+ int ind_levels ATTRIBUTE_UNUSED)
+{
+ /* Reload can generate:
+
+ (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
+ (reg:DI 97))
+ (reg:DI 2 cx))
+
+ This RTX is rejected from ix86_legitimate_address_p due to
+ non-strictness of base register 97. Following this rejection,
+ reload pushes all three components into separate registers,
+ creating invalid memory address RTX.
+
+ Following code reloads only the invalid part of the
+ memory address RTX. */
+
+ if (GET_CODE (x) == PLUS
+ && REG_P (XEXP (x, 1))
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && REG_P (XEXP (XEXP (x, 0), 1)))
+ {
+ rtx base, index;
+ bool something_reloaded = false;
+
+ base = XEXP (XEXP (x, 0), 1);
+ if (!REG_OK_FOR_BASE_STRICT_P (base))
+ {
+ push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
+ BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+ opnum, (enum reload_type)type);
+ something_reloaded = true;
+ }
+
+ index = XEXP (x, 1);
+ if (!REG_OK_FOR_INDEX_STRICT_P (index))
+ {
+ push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
+ INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+ opnum, (enum reload_type)type);
+ something_reloaded = true;
+ }
+
+ gcc_assert (something_reloaded);
+ return true;
+ }
+
+ return false;
+}
+
+/* Determine if op is suitable RTX for an address register.
+ Return naked register if a register or a register subreg is
+ found, otherwise return NULL_RTX. */
+
+static rtx
+ix86_validate_address_register (rtx op)
+{
+ enum machine_mode mode = GET_MODE (op);
+
+ /* Only SImode or DImode registers can form the address. */
+ if (mode != SImode && mode != DImode)
+ return NULL_RTX;
+
+ if (REG_P (op))
+ return op;
+ else if (GET_CODE (op) == SUBREG)
+ {
+ rtx reg = SUBREG_REG (op);
+
+ if (!REG_P (reg))
+ return NULL_RTX;
+
+ mode = GET_MODE (reg);
+
+ /* Don't allow SUBREGs that span more than a word. It can
+ lead to spill failures when the register is one word out
+ of a two word structure. */
+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ return NULL_RTX;
+
+ /* Allow only SUBREGs of non-eliminable hard registers. */
+ if (register_no_elim_operand (reg, mode))
+ return reg;
+ }
+
+ /* Op is not a register. */
+ return NULL_RTX;
+}
+
/* Recognizes RTL expressions that are valid memory addresses for an
instruction. The MODE argument is the machine mode for the MEM
expression that wants to use this address.
struct ix86_address parts;
rtx base, index, disp;
HOST_WIDE_INT scale;
+ enum ix86_address_seg seg;
if (ix86_decompose_address (addr, &parts) <= 0)
/* Decomposition failed. */
index = parts.index;
disp = parts.disp;
scale = parts.scale;
+ seg = parts.seg;
/* Validate base register. */
if (base)
{
- rtx reg;
-
- if (REG_P (base))
- reg = base;
- else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
- reg = SUBREG_REG (base);
- else
- /* Base is not a register. */
- return false;
+ rtx reg = ix86_validate_address_register (base);
- if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
+ if (reg == NULL_RTX)
return false;
if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
/* Validate index register. */
if (index)
{
- rtx reg;
-
- if (REG_P (index))
- reg = index;
- else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
- reg = SUBREG_REG (index);
- else
- /* Index is not a register. */
- return false;
+ rtx reg = ix86_validate_address_register (index);
- if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
+ if (reg == NULL_RTX)
return false;
if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
&& GET_MODE (base) != GET_MODE (index))
return false;
+ /* Address override works only on the (%reg) part of %fs:(%reg). */
+ if (seg != SEG_DEFAULT
+ && ((base && GET_MODE (base) != word_mode)
+ || (index && GET_MODE (index) != word_mode)))
+ return false;
+
/* Validate scale factor. */
if (scale != 1)
{
&& !x86_64_immediate_operand (disp, VOIDmode))
/* Displacement is out of range. */
return false;
+ /* In x32 mode, constant addresses are sign extended to 64bit, so
+ we have to prevent addresses from 0x80000000 to 0xffffffff. */
+ else if (TARGET_X32 && !(index || base)
+ && CONST_INT_P (disp)
+ && val_signbit_known_set_p (SImode, INTVAL (disp)))
+ return false;
}
/* Everything looks valid. */
{
rtx addr = orig;
rtx new_rtx = orig;
- rtx base;
#if TARGET_MACHO
if (TARGET_MACHO && !TARGET_64BIT)
}
else
{
- base = legitimize_pic_address (XEXP (addr, 0), reg);
- new_rtx = legitimize_pic_address (XEXP (addr, 1),
- base == reg ? NULL_RTX : reg);
+ rtx base = legitimize_pic_address (op0, reg);
+ enum machine_mode mode = GET_MODE (base);
+ new_rtx
+ = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
if (CONST_INT_P (new_rtx))
- new_rtx = plus_constant (base, INTVAL (new_rtx));
+ {
+ if (INTVAL (new_rtx) < -16*1024*1024
+ || INTVAL (new_rtx) >= 16*1024*1024)
+ {
+ if (!x86_64_immediate_operand (new_rtx, mode))
+ new_rtx = force_reg (mode, new_rtx);
+ new_rtx
+ = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
+ }
+ else
+ new_rtx = plus_constant (base, INTVAL (new_rtx));
+ }
else
{
- if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
+ if (GET_CODE (new_rtx) == PLUS
+ && CONSTANT_P (XEXP (new_rtx, 1)))
{
- base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
+ base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
new_rtx = XEXP (new_rtx, 1);
}
- new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
+ new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
}
}
}
tp = get_thread_pointer (true);
dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
+ if (GET_MODE (x) != Pmode)
+ x = gen_rtx_ZERO_EXTEND (Pmode, x);
+
set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
}
else
if (TARGET_64BIT)
{
- rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
+ rtx rax = gen_rtx_REG (Pmode, AX_REG);
+ rtx insns;
start_sequence ();
emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
insns = get_insns ();
end_sequence ();
+ if (GET_MODE (x) != Pmode)
+ x = gen_rtx_ZERO_EXTEND (Pmode, x);
+
RTL_CONST_CALL_P (insns) = 1;
emit_libcall_block (insns, dest, rax, x);
}
if (TARGET_64BIT)
{
- rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
+ rtx rax = gen_rtx_REG (Pmode, AX_REG);
+ rtx insns, eqv;
start_sequence ();
emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
{
dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
+ if (GET_MODE (x) != Pmode)
+ x = gen_rtx_ZERO_EXTEND (Pmode, x);
+
set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
}
break;
if (TARGET_64BIT)
{
+ if (GET_CODE (x) == CONST
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_MODE (XEXP (x, 0)) == Pmode
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
+ && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
+ {
+ rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
+ x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
+ if (MEM_P (orig_x))
+ x = replace_equiv_address_nv (orig_x, x);
+ return x;
+ }
if (GET_CODE (x) != CONST
|| GET_CODE (XEXP (x, 0)) != UNSPEC
|| (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
&& XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
- || !MEM_P (orig_x))
+ || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
return ix86_delegitimize_tls_address (orig_x);
x = XVECEXP (XEXP (x, 0), 0, 0);
- if (GET_MODE (orig_x) != GET_MODE (x))
+ if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
{
x = simplify_gen_subreg (GET_MODE (orig_x), x,
GET_MODE (x), 0);
Those same assemblers have the same but opposite lossage on cmov. */
if (mode == CCmode)
suffix = fp ? "nbe" : "a";
- else if (mode == CCCmode)
- suffix = "b";
else
gcc_unreachable ();
break;
}
break;
case LTU:
- gcc_assert (mode == CCmode || mode == CCCmode);
- suffix = "b";
+ if (mode == CCmode)
+ suffix = "b";
+ else if (mode == CCCmode)
+ suffix = "c";
+ else
+ gcc_unreachable ();
break;
case GE:
switch (mode)
}
break;
case GEU:
- /* ??? As above. */
- gcc_assert (mode == CCmode || mode == CCCmode);
- suffix = fp ? "nb" : "ae";
+ if (mode == CCmode)
+ suffix = fp ? "nb" : "ae";
+ else if (mode == CCCmode)
+ suffix = "nc";
+ else
+ gcc_unreachable ();
break;
case LE:
gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
suffix = "le";
break;
case LEU:
- /* ??? As above. */
if (mode == CCmode)
suffix = "be";
- else if (mode == CCCmode)
- suffix = fp ? "nb" : "ae";
else
gcc_unreachable ();
break;
print_reg (rtx x, int code, FILE *file)
{
const char *reg;
+ unsigned int regno;
bool duplicated = code == 'd' && TARGET_AVX;
- gcc_assert (x == pc_rtx
- || (REGNO (x) != ARG_POINTER_REGNUM
- && REGNO (x) != FRAME_POINTER_REGNUM
- && REGNO (x) != FLAGS_REG
- && REGNO (x) != FPSR_REG
- && REGNO (x) != FPCR_REG));
-
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('%', file);
return;
}
+ regno = true_regnum (x);
+ gcc_assert (regno != ARG_POINTER_REGNUM
+ && regno != FRAME_POINTER_REGNUM
+ && regno != FLAGS_REG
+ && regno != FPSR_REG
+ && regno != FPCR_REG);
+
if (code == 'w' || MMX_REG_P (x))
code = 2;
else if (code == 'b')
/* Irritatingly, AMD extended registers use different naming convention
from the normal registers: "r%d[bwd]" */
- if (REX_INT_REG_P (x))
+ if (REX_INT_REGNO_P (regno))
{
gcc_assert (TARGET_64BIT);
putc ('r', file);
- fprint_ul (file, REGNO (x) - FIRST_REX_INT_REG + 8);
+ fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
switch (code)
{
case 0:
case 16:
case 2:
normal:
- reg = hi_reg_name[REGNO (x)];
+ reg = hi_reg_name[regno];
break;
case 1:
- if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
+ if (regno >= ARRAY_SIZE (qi_reg_name))
goto normal;
- reg = qi_reg_name[REGNO (x)];
+ reg = qi_reg_name[regno];
break;
case 0:
- if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
+ if (regno >= ARRAY_SIZE (qi_high_reg_name))
goto normal;
- reg = qi_high_reg_name[REGNO (x)];
+ reg = qi_high_reg_name[regno];
break;
case 32:
if (SSE_REG_P (x))
{
gcc_assert (!duplicated);
putc ('y', file);
- fputs (hi_reg_name[REGNO (x)] + 1, file);
+ fputs (hi_reg_name[regno] + 1, file);
return;
}
break;
Z -- likewise, with special suffixes for x87 instructions.
* -- print a star (in certain assembler syntax)
A -- print an absolute memory reference.
+ E -- print address with DImode register names if TARGET_64BIT.
w -- print the operand as if it's a "word" (HImode) even if it isn't.
s -- print a shift double count, followed by the assemblers argument
delimiter.
ix86_print_operand (file, x, 0);
return;
+ case 'E':
+ /* Wrap address in an UNSPEC to declare special handling. */
+ if (TARGET_64BIT)
+ x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
+ output_address (x);
+ return;
+
case 'L':
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('l', file);
return;
case 'H':
+ if (!offsettable_memref_p (x))
+ {
+ output_operand_lossage ("operand is not an offsettable memory "
+ "reference, invalid operand "
+ "code 'H'");
+ return;
+ }
/* It doesn't actually matter what mode we use here, as we're
only going to use this for printing. */
x = adjust_address_nv (x, DImode, 8);
putc ('$', file);
/* Sign extend 32bit SFmode immediate to 8 bytes. */
if (code == 'q')
- fprintf (file, "0x%08llx", (unsigned long long) (int) l);
+ fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
+ (unsigned long long) (int) l);
else
fprintf (file, "0x%08x", (unsigned int) l);
}
int scale;
int ok;
bool vsib = false;
+ int code = 0;
if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
{
addr = XVECEXP (addr, 0, 0);
vsib = true;
}
+ else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
+ {
+ gcc_assert (TARGET_64BIT);
+ ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
+ code = 'q';
+ }
else
ok = ix86_decompose_address (addr, &parts);
gcc_assert (ok);
- if (parts.base && GET_CODE (parts.base) == SUBREG)
- {
- rtx tmp = SUBREG_REG (parts.base);
- parts.base = simplify_subreg (GET_MODE (parts.base),
- tmp, GET_MODE (tmp), 0);
- }
-
- if (parts.index && GET_CODE (parts.index) == SUBREG)
- {
- rtx tmp = SUBREG_REG (parts.index);
- parts.index = simplify_subreg (GET_MODE (parts.index),
- tmp, GET_MODE (tmp), 0);
- }
-
base = parts.base;
index = parts.index;
disp = parts.disp;
}
else
{
- int code = 0;
-
- /* Print SImode registers for zero-extended addresses to force
- addr32 prefix. Otherwise print DImode registers to avoid it. */
- if (TARGET_64BIT)
- code = ((GET_CODE (addr) == ZERO_EXTEND
- || GET_CODE (addr) == AND)
- ? 'l'
- : 'q');
+ /* Print SImode register names to force addr32 prefix. */
+ if (SImode_address_operand (addr, VOIDmode))
+ {
+#ifdef ENABLE_CHECKING
+ gcc_assert (TARGET_64BIT);
+ switch (GET_CODE (addr))
+ {
+ case SUBREG:
+ gcc_assert (GET_MODE (addr) == SImode);
+ gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
+ break;
+ case ZERO_EXTEND:
+ case AND:
+ gcc_assert (GET_MODE (addr) == DImode);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+#endif
+ gcc_assert (!code);
+ code = 'k';
+ }
+ else if (code == 0
+ && TARGET_X32
+ && disp
+ && CONST_INT_P (disp)
+ && INTVAL (disp) < -16*1024*1024)
+ {
+ /* X32 runs in 64-bit mode, where displacement, DISP, in
+ address DISP(%r64), is encoded as 32-bit immediate sign-
+ extended from 32-bit to 64-bit. For -0x40000300(%r64),
+ address is %r64 + 0xffffffffbffffd00. When %r64 <
+ 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
+ which is invalid for x32. The correct address is %r64
+ - 0x40000300 == 0xf7ffdd64. To properly encode
+ -0x40000300(%r64) for x32, we zero-extend negative
+ displacement by forcing addr32 prefix which truncates
+ 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
+ zero-extend all negative displacements, including -1(%rsp).
+ However, for small negative displacements, sign-extension
+ won't cause overflow. We only zero-extend negative
+ displacements if they < -16*1024*1024, which is also used
+ to check legitimate address displacements for PIC. */
+ code = 'k';
+ }
if (ASSEMBLER_DIALECT == ASM_ATT)
{
op0, 1, OPTAB_DIRECT);
if (tmp == op0)
return;
- if (GET_MODE (tmp) != mode)
- op1 = convert_to_mode (mode, tmp, 1);
+ op1 = convert_to_mode (mode, tmp, 1);
}
}
{
rtx m;
rtx (*extract) (rtx, rtx, rtx);
- rtx (*move_unaligned) (rtx, rtx);
+ rtx (*load_unaligned) (rtx, rtx);
+ rtx (*store_unaligned) (rtx, rtx);
enum machine_mode mode;
switch (GET_MODE (op0))
gcc_unreachable ();
case V32QImode:
extract = gen_avx_vextractf128v32qi;
- move_unaligned = gen_avx_movdqu256;
+ load_unaligned = gen_avx_loaddqu256;
+ store_unaligned = gen_avx_storedqu256;
mode = V16QImode;
break;
case V8SFmode:
extract = gen_avx_vextractf128v8sf;
- move_unaligned = gen_avx_movups256;
+ load_unaligned = gen_avx_loadups256;
+ store_unaligned = gen_avx_storeups256;
mode = V4SFmode;
break;
case V4DFmode:
extract = gen_avx_vextractf128v4df;
- move_unaligned = gen_avx_movupd256;
+ load_unaligned = gen_avx_loadupd256;
+ store_unaligned = gen_avx_storeupd256;
mode = V2DFmode;
break;
}
- if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+ if (MEM_P (op1))
{
- rtx r = gen_reg_rtx (mode);
- m = adjust_address (op1, mode, 0);
- emit_move_insn (r, m);
- m = adjust_address (op1, mode, 16);
- r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
- emit_move_insn (op0, r);
+ if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+ {
+ rtx r = gen_reg_rtx (mode);
+ m = adjust_address (op1, mode, 0);
+ emit_move_insn (r, m);
+ m = adjust_address (op1, mode, 16);
+ r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
+ emit_move_insn (op0, r);
+ }
+ else
+ emit_insn (load_unaligned (op0, op1));
}
- else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+ else if (MEM_P (op0))
{
- m = adjust_address (op0, mode, 0);
- emit_insn (extract (m, op1, const0_rtx));
- m = adjust_address (op0, mode, 16);
- emit_insn (extract (m, op1, const1_rtx));
+ if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+ {
+ m = adjust_address (op0, mode, 0);
+ emit_insn (extract (m, op1, const0_rtx));
+ m = adjust_address (op0, mode, 16);
+ emit_insn (extract (m, op1, const1_rtx));
+ }
+ else
+ emit_insn (store_unaligned (op0, op1));
}
else
- emit_insn (move_unaligned (op0, op1));
+ gcc_unreachable ();
}
/* Implement the movmisalign patterns for SSE. Non-SSE modes go
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
{
rtx op0, op1, m;
+ rtx (*move_unaligned) (rtx, rtx);
op0 = operands[0];
op1 = operands[1];
/* If we're optimizing for size, movups is the smallest. */
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
+ if (MEM_P (op1))
+ move_unaligned = gen_sse_loadups;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse_storeups;
+ else
+ gcc_unreachable ();
+
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (move_unaligned (op0, op1));
return;
}
+ if (MEM_P (op1))
+ move_unaligned = gen_sse2_loaddqu;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse2_storedqu;
+ else
+ gcc_unreachable ();
+
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
+ emit_insn (move_unaligned (op0, op1));
break;
case 32:
op0 = gen_lowpart (V32QImode, op0);
switch (mode)
{
case V4SFmode:
- emit_insn (gen_sse_movups (op0, op1));
+ if (MEM_P (op1))
+ move_unaligned = gen_sse_loadups;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse_storeups;
+ else
+ gcc_unreachable ();
+
+ emit_insn (move_unaligned (op0, op1));
break;
case V8SFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
case V2DFmode:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
+ if (MEM_P (op1))
+ move_unaligned = gen_sse_loadups;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse_storeups;
+ else
+ gcc_unreachable ();
+
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (move_unaligned (op0, op1));
return;
}
- emit_insn (gen_sse2_movupd (op0, op1));
+ if (MEM_P (op1))
+ move_unaligned = gen_sse2_loadupd;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse2_storeupd;
+ else
+ gcc_unreachable ();
+
+ emit_insn (move_unaligned (op0, op1));
break;
case V4DFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_loadups (op0, op1));
return;
}
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
+ emit_insn (gen_sse2_loaddqu (op0, op1));
return;
}
{
op0 = gen_lowpart (V2DFmode, op0);
op1 = gen_lowpart (V2DFmode, op1);
- emit_insn (gen_sse2_movupd (op0, op1));
+ emit_insn (gen_sse2_loadupd (op0, op1));
return;
}
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_loadups (op0, op1));
return;
}
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_storeups (op0, op1));
return;
}
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
+ emit_insn (gen_sse2_storedqu (op0, op1));
return;
}
{
op0 = gen_lowpart (V2DFmode, op0);
op1 = gen_lowpart (V2DFmode, op1);
- emit_insn (gen_sse2_movupd (op0, op1));
+ emit_insn (gen_sse2_storeupd (op0, op1));
}
else
{
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_storeups (op0, op1));
}
else
{
over a sequence of instructions. Instructions sequence has
SPLIT_COST cycles higher latency than lea latency. */
-bool
+static bool
ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
- unsigned int regno2, unsigned int split_cost)
+ unsigned int regno2, int split_cost)
{
int dist_define, dist_use;
regno0 = true_regnum (operands[0]);
regno1 = true_regnum (operands[1]);
- return ix86_lea_outperforms (insn, regno0, regno1, -1, 0);
+ return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
}
/* Return true if we need to split lea into a sequence of
ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
{
unsigned int regno0 = true_regnum (operands[0]) ;
- unsigned int regno1 = -1;
- unsigned int regno2 = -1;
- unsigned int split_cost = 0;
+ unsigned int regno1 = INVALID_REGNUM;
+ unsigned int regno2 = INVALID_REGNUM;
+ int split_cost = 0;
struct ix86_address parts;
int ok;
+ /* FIXME: Handle zero-extended addresses. */
+ if (GET_CODE (operands[1]) == ZERO_EXTEND
+ || GET_CODE (operands[1]) == AND)
+ return false;
+
/* Check we need to optimize. */
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
ok = ix86_decompose_address (operands[1], &parts);
gcc_assert (ok);
+ /* There should be at least two components in the address. */
+ if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
+ + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
+ return false;
+
/* We should not split into add if non legitimate pic
operand is used as displacement. */
if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
return CCmode;
case GTU: /* CF=0 & ZF=0 */
case LEU: /* CF=1 | ZF=1 */
- /* Detect overflow checks. They need just the carry flag. */
- if (GET_CODE (op0) == MINUS
- && rtx_equal_p (op1, XEXP (op0, 0)))
- return CCCmode;
- else
- return CCmode;
+ return CCmode;
/* Codes possibly doable only with sign flag when
comparing against zero. */
case GE: /* SF=OF or SF=0 */
cop0 = operands[4];
cop1 = operands[5];
+ /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
+ and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
+ if ((code == LT || code == GE)
+ && data_mode == mode
+ && cop1 == CONST0_RTX (mode)
+ && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
+ && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
+ && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
+ && (GET_MODE_SIZE (data_mode) == 16
+ || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
+ {
+ rtx negop = operands[2 - (code == LT)];
+ int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
+ if (negop == CONST1_RTX (data_mode))
+ {
+ rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
+ operands[0], 1, OPTAB_DIRECT);
+ if (res != operands[0])
+ emit_move_insn (operands[0], res);
+ return true;
+ }
+ else if (GET_MODE_INNER (data_mode) != DImode
+ && vector_all_ones_operand (negop, data_mode))
+ {
+ rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
+ operands[0], 0, OPTAB_DIRECT);
+ if (res != operands[0])
+ emit_move_insn (operands[0], res);
+ return true;
+ }
+ }
+
+ if (!nonimmediate_operand (cop1, mode))
+ cop1 = force_reg (mode, cop1);
+ if (!general_operand (operands[1], data_mode))
+ operands[1] = force_reg (data_mode, operands[1]);
+ if (!general_operand (operands[2], data_mode))
+ operands[2] = force_reg (data_mode, operands[2]);
+
/* XOP supports all of the comparisons on all 128-bit vector int types. */
if (TARGET_XOP
&& (mode == V16QImode || mode == V8HImode
vt = force_reg (maskmode, vt);
mask = gen_lowpart (maskmode, mask);
if (maskmode == V8SImode)
- emit_insn (gen_avx2_permvarv8si (t1, vt, mask));
+ emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
else
emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
vec[i * 2 + 1] = const1_rtx;
}
vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
- vt = force_const_mem (maskmode, vt);
+ vt = validize_mem (force_const_mem (maskmode, vt));
t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
OPTAB_DIRECT);
the high bits of the shuffle elements. No need for us to
perform an AND ourselves. */
if (one_operand_shuffle)
- emit_insn (gen_avx2_permvarv8si (target, mask, op0));
+ emit_insn (gen_avx2_permvarv8si (target, op0, mask));
else
{
t1 = gen_reg_rtx (V8SImode);
t2 = gen_reg_rtx (V8SImode);
- emit_insn (gen_avx2_permvarv8si (t1, mask, op0));
- emit_insn (gen_avx2_permvarv8si (t2, mask, op1));
+ emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
+ emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
goto merge_two;
}
return;
case V8SFmode:
mask = gen_lowpart (V8SFmode, mask);
if (one_operand_shuffle)
- emit_insn (gen_avx2_permvarv8sf (target, mask, op0));
+ emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
else
{
t1 = gen_reg_rtx (V8SFmode);
t2 = gen_reg_rtx (V8SFmode);
- emit_insn (gen_avx2_permvarv8sf (t1, mask, op0));
- emit_insn (gen_avx2_permvarv8sf (t2, mask, op1));
+ emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
+ emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
goto merge_two;
}
return;
t2 = gen_reg_rtx (V8SImode);
emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
- emit_insn (gen_avx2_permvarv8si (t1, t2, t1));
+ emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
return;
case V4SFmode:
t1 = gen_reg_rtx (V8SFmode);
- t2 = gen_reg_rtx (V8SFmode);
- mask = gen_lowpart (V4SFmode, mask);
+ t2 = gen_reg_rtx (V8SImode);
+ mask = gen_lowpart (V4SImode, mask);
emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
- emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask));
- emit_insn (gen_avx2_permvarv8sf (t1, t2, t1));
+ emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
+ emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
return;
for (i = 0; i < 16; ++i)
vec[i] = GEN_INT (i/e * e);
vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
- vt = force_const_mem (V16QImode, vt);
+ vt = validize_mem (force_const_mem (V16QImode, vt));
if (TARGET_XOP)
emit_insn (gen_xop_pperm (mask, mask, mask, vt));
else
for (i = 0; i < 16; ++i)
vec[i] = GEN_INT (i % e);
vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
- vt = force_const_mem (V16QImode, vt);
+ vt = validize_mem (force_const_mem (V16QImode, vt));
emit_insn (gen_addv16qi3 (mask, mask, vt));
}
f = ggc_alloc_cleared_machine_function ();
f->use_fast_prologue_epilogue_nregs = -1;
- f->tls_descriptor_call_expanded_p = 0;
f->call_abi = ix86_abi;
return f;
gcc_assert (n < MAX_386_STACK_LOCALS);
- /* Virtual slot is valid only before vregs are instantiated. */
- gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
-
for (s = ix86_stack_locals; s; s = s->next)
if (s->mode == mode && s->n == n)
return validize_mem (copy_rtx (s->rtl));
ix86_stack_locals = s;
return validize_mem (s->rtl);
}
+
+static void
+ix86_instantiate_decls (void)
+{
+ struct stack_local_entry *s;
+
+ for (s = ix86_stack_locals; s; s = s->next)
+ if (s->rtl != NULL_RTX)
+ instantiate_decl_rtl (s->rtl);
+}
\f
/* Calculate the length of the memory address in the instruction encoding.
Includes addr32 prefix, does not include the one-byte modrm, opcode,
- or other prefixes. */
+ or other prefixes. We never generate addr32 prefix for LEA insn. */
int
-memory_address_length (rtx addr)
+memory_address_length (rtx addr, bool lea)
{
struct ix86_address parts;
rtx base, index, disp;
ok = ix86_decompose_address (addr, &parts);
gcc_assert (ok);
- if (parts.base && GET_CODE (parts.base) == SUBREG)
- parts.base = SUBREG_REG (parts.base);
- if (parts.index && GET_CODE (parts.index) == SUBREG)
- parts.index = SUBREG_REG (parts.index);
+ len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
+
+ /* If this is not LEA instruction, add the length of addr32 prefix. */
+ if (TARGET_64BIT && !lea
+ && (SImode_address_operand (addr, VOIDmode)
+ || (parts.base && GET_MODE (parts.base) == SImode)
+ || (parts.index && GET_MODE (parts.index) == SImode)))
+ len++;
base = parts.base;
index = parts.index;
disp = parts.disp;
- /* Add length of addr32 prefix. */
- len = (GET_CODE (addr) == ZERO_EXTEND
- || GET_CODE (addr) == AND);
+ if (base && GET_CODE (base) == SUBREG)
+ base = SUBREG_REG (base);
+ if (index && GET_CODE (index) == SUBREG)
+ index = SUBREG_REG (index);
+
+ gcc_assert (base == NULL_RTX || REG_P (base));
+ gcc_assert (index == NULL_RTX || REG_P (index));
/* Rule of thumb:
- esp as the base always wants an index,
/* esp (for its index) and ebp (for its displacement) need
the two-byte modrm form. Similarly for r12 and r13 in 64-bit
code. */
- if (REG_P (addr)
- && (addr == arg_pointer_rtx
- || addr == frame_pointer_rtx
- || REGNO (addr) == SP_REG
- || REGNO (addr) == BP_REG
- || REGNO (addr) == R12_REG
- || REGNO (addr) == R13_REG))
- len = 1;
+ if (base == arg_pointer_rtx
+ || base == frame_pointer_rtx
+ || REGNO (base) == SP_REG
+ || REGNO (base) == BP_REG
+ || REGNO (base) == R12_REG
+ || REGNO (base) == R13_REG)
+ len++;
}
/* Direct Addressing. In 64-bit mode mod 00 r/m 5
by UNSPEC. */
else if (disp && !base && !index)
{
- len = 4;
+ len += 4;
if (TARGET_64BIT)
{
rtx symbol = disp;
|| (XINT (symbol, 1) != UNSPEC_GOTPCREL
&& XINT (symbol, 1) != UNSPEC_PCREL
&& XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
- len += 1;
+ len++;
}
}
-
else
{
/* Find the length of the displacement constant. */
if (disp)
{
if (base && satisfies_constraint_K (disp))
- len = 1;
+ len += 1;
else
- len = 4;
+ len += 4;
}
/* ebp always wants a displacement. Similarly r13. */
- else if (base && REG_P (base)
- && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
- len = 1;
+ else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
+ len++;
/* An index requires the two-byte modrm form.... */
if (index
/* ...like esp (or r12), which always wants an index. */
|| base == arg_pointer_rtx
|| base == frame_pointer_rtx
- || (base && REG_P (base)
- && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
- len += 1;
- }
-
- switch (parts.seg)
- {
- case SEG_FS:
- case SEG_GS:
- len += 1;
- break;
- default:
- break;
+ || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
+ len++;
}
return len;
case MODE_SI:
len = 4;
break;
- /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
+ /* Immediates for DImode instructions are encoded
+ as 32bit sign extended values. */
case MODE_DI:
len = 4;
break;
}
return len;
}
+
/* Compute default value for "length_address" attribute. */
int
ix86_attr_length_address_default (rtx insn)
gcc_assert (GET_CODE (set) == SET);
addr = SET_SRC (set);
- if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
- {
- if (GET_CODE (addr) == ZERO_EXTEND)
- addr = XEXP (addr, 0);
- if (GET_CODE (addr) == SUBREG)
- addr = SUBREG_REG (addr);
- }
- return memory_address_length (addr);
+ return memory_address_length (addr, true);
}
extract_insn_cached (insn);
if (*constraints == 'X')
continue;
}
- return memory_address_length (XEXP (recog_data.operand[i], 0));
+ return memory_address_length (XEXP (recog_data.operand[i], 0), false);
}
return 0;
}
case PROCESSOR_CORE2_64:
case PROCESSOR_COREI7_32:
case PROCESSOR_COREI7_64:
+ case PROCESSOR_ATOM:
/* Generally, we want haifa-sched:max_issue() to look ahead as far
as many instructions can be executed on a cycle, i.e.,
issue_rate. I wonder why tuning for many CPUs does not do this. */
fntype = TREE_TYPE (fndecl);
ccvt = ix86_get_callcvt (fntype);
- if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
+ if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
{
/* Fastcall functions use ecx/edx for arguments, which leaves
us with EAX for the static chain.
leaves us with EAX for the static chain. */
regno = AX_REG;
}
+ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
+ {
+ /* Thiscall functions use ecx for arguments, which leaves
+ us with EAX and EDX for the static chain.
+ We are using for abi-compatibility EAX. */
+ regno = AX_REG;
+ }
else if (ix86_function_regparm (fntype, fndecl) == 3)
{
/* For regparm 3, we have no free call-clobbered registers in
{ OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
/* SSE */
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
/* SSE2 */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
{ OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
if (!flag_tm)
return;
+ /* If there are no builtins defined, we must be compiling in a
+ language without trans-mem support. */
+ if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
+ return;
+
/* Use whatever attributes a normal TM load has. */
decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
attrs_load = DECL_ATTRIBUTES (decl);
arg_adjust = 0;
if (optimize
|| target == 0
- || GET_MODE (target) != tmode
- || !insn_p->operand[0].predicate (target, tmode))
+ || !register_operand (target, tmode)
+ || GET_MODE (target) != tmode)
target = gen_reg_rtx (tmode);
}
case IX86_BUILTIN_LDMXCSR:
op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
- target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+ target = assign_386_stack_local (SImode, SLOT_TEMP);
emit_move_insn (target, op0);
emit_insn (gen_sse_ldmxcsr (target));
return 0;
case IX86_BUILTIN_STMXCSR:
- target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+ target = assign_386_stack_local (SImode, SLOT_TEMP);
emit_insn (gen_sse_stmxcsr (target));
return copy_to_mode_reg (SImode, target);
{
if (CONST_INT_P (XEXP (x, 1)))
*total = cost->shift_const;
+ else if (GET_CODE (XEXP (x, 1)) == SUBREG
+ && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
+ {
+ /* Return the cost after shift-and truncation. */
+ *total = cost->shift_var;
+ return true;
+ }
else
*total = cost->shift_var;
}
else
type = node;
- if (!(type && (TREE_CODE (*type) == RECORD_TYPE
- || TREE_CODE (*type) == UNION_TYPE)))
+ if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
{
warning (OPT_Wattributes, "%qE attribute ignored",
name);
{
rtx this_param = x86_this_parameter (function);
rtx this_reg, tmp, fnaddr;
+ unsigned int tmp_regno;
+
+ if (TARGET_64BIT)
+ tmp_regno = R10_REG;
+ else
+ {
+ unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
+ if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
+ tmp_regno = AX_REG;
+ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
+ tmp_regno = DX_REG;
+ else
+ tmp_regno = CX_REG;
+ }
emit_note (NOTE_INSN_PROLOGUE_END);
{
if (!x86_64_general_operand (delta_rtx, Pmode))
{
- tmp = gen_rtx_REG (Pmode, R10_REG);
+ tmp = gen_rtx_REG (Pmode, tmp_regno);
emit_move_insn (tmp, delta_rtx);
delta_rtx = tmp;
}
if (vcall_offset)
{
rtx vcall_addr, vcall_mem, this_mem;
- unsigned int tmp_regno;
- if (TARGET_64BIT)
- tmp_regno = R10_REG;
- else
- {
- unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
- if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
- tmp_regno = AX_REG;
- else
- tmp_regno = CX_REG;
- }
tmp = gen_rtx_REG (Pmode, tmp_regno);
this_mem = gen_rtx_MEM (ptr_mode, this_reg);
emit_jump_insn (gen_indirect_jump (fnaddr));
else
{
+ if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
+ fnaddr = legitimize_pic_address (fnaddr,
+ gen_rtx_REG (Pmode, tmp_regno));
+
+ if (!sibcall_insn_operand (fnaddr, Pmode))
+ {
+ tmp = gen_rtx_REG (Pmode, tmp_regno);
+ if (GET_MODE (fnaddr) != Pmode)
+ fnaddr = gen_rtx_ZERO_EXTEND (Pmode, fnaddr);
+ emit_move_insn (tmp, fnaddr);
+ fnaddr = tmp;
+ }
+
tmp = gen_rtx_MEM (QImode, fnaddr);
tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
tmp = emit_call_insn (tmp);
tmp = gen_reg_rtx (GET_MODE_INNER (mode));
ix86_expand_vector_extract (true, tmp, target, 1 - elt);
if (elt == 0)
- tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
- else
tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+ else
+ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
return;
}
tmp = gen_reg_rtx (GET_MODE_INNER (mode));
ix86_expand_vector_extract (false, tmp, target, 1 - elt);
if (elt == 0)
- tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
- else
tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+ else
+ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
return;
return ix86_cost->cond_not_taken_branch_cost;
case vec_perm:
- return 1;
+ case vec_promote_demote:
+ return ix86_cost->vec_stmt_cost;
default:
gcc_unreachable ();
}
}
-
-/* Return a vector mode with twice as many elements as VMODE. */
-/* ??? Consider moving this to a table generated by genmodes.c. */
-
-static enum machine_mode
-doublesize_vector_mode (enum machine_mode vmode)
-{
- switch (vmode)
- {
- case V2SFmode: return V4SFmode;
- case V1DImode: return V2DImode;
- case V2SImode: return V4SImode;
- case V4HImode: return V8HImode;
- case V8QImode: return V16QImode;
-
- case V2DFmode: return V4DFmode;
- case V4SFmode: return V8SFmode;
- case V2DImode: return V4DImode;
- case V4SImode: return V8SImode;
- case V8HImode: return V16HImode;
- case V16QImode: return V32QImode;
-
- case V4DFmode: return V8DFmode;
- case V8SFmode: return V16SFmode;
- case V4DImode: return V8DImode;
- case V8SImode: return V16SImode;
- case V16HImode: return V32HImode;
- case V32QImode: return V64QImode;
-
- default:
- gcc_unreachable ();
- }
-}
-
/* Construct (set target (vec_select op0 (parallel perm))) and
return true if that's a valid instruction in the active ISA. */
enum machine_mode v2mode;
rtx x;
- v2mode = doublesize_vector_mode (GET_MODE (op0));
+ v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
return expand_vselect (target, x, perm, nelt);
}
else if (vmode == V32QImode)
emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
else
- emit_insn (gen_avx2_permvarv8si (target, vperm, op0));
+ emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
}
else
{
return ok;
}
+static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
+
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
a two vector permutation into a single vector permutation by using
an interleave operation to merge the vectors. */
/* For 32-byte modes allow even d->op0 == d->op1.
The lack of cross-lane shuffling in some instructions
might prevent a single insn shuffle. */
+ dfinal = *d;
+ dfinal.testing_p = true;
+ /* If expand_vec_perm_interleave3 can expand this into
+ a 3 insn sequence, give up and let it be expanded as
+ 3 insn sequence. While that is one insn longer,
+ it doesn't need a memory operand and in the common
+ case that both interleave low and high permutations
+ with the same operands are adjacent needs 4 insns
+ for both after CSE. */
+ if (expand_vec_perm_interleave3 (&dfinal))
+ return false;
}
else
return false;
stopping once we have promoted to V4SImode and then use pshufd. */
do
{
- optab otab = vec_interleave_low_optab;
+ rtx dest;
+ rtx (*gen) (rtx, rtx, rtx)
+ = vmode == V16QImode ? gen_vec_interleave_lowv16qi
+ : gen_vec_interleave_lowv8hi;
if (elt >= nelt2)
{
- otab = vec_interleave_high_optab;
+ gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
+ : gen_vec_interleave_highv8hi;
elt -= nelt2;
}
nelt2 /= 2;
- op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
+ dest = gen_reg_rtx (vmode);
+ emit_insn (gen (dest, op0, op0));
vmode = get_mode_wider_vector (vmode);
- op0 = gen_lowpart (vmode, op0);
+ op0 = gen_lowpart (vmode, dest);
}
while (vmode != V4SImode);
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE ix86_mangle_type
-#ifndef TARGET_MACHO
+#if !TARGET_MACHO
#undef TARGET_STACK_PROTECT_FAIL
#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
#endif
#undef TARGET_PROMOTE_FUNCTION_MODE
#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
+#undef TARGET_INSTANTIATE_DECLS
+#define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
+
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD ix86_secondary_reload