/* Subroutines used for code generation on IA-32.
- Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
- 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
Free Software Foundation, Inc.
This file is part of GCC.
#include "target-def.h"
#include "common/common-target.h"
#include "langhooks.h"
+#include "reload.h"
#include "cgraph.h"
#include "gimple.h"
#include "dwarf2.h"
8, /* MMX or SSE register to integer */
8, /* size of l1 cache. */
1024, /* size of l2 cache. */
- 128, /* size of prefetch block */
+ 64, /* size of prefetch block */
8, /* number of parallel prefetches */
1, /* Branch cost */
COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
/* Feature tests against the various architecture variations, used to create
ix86_arch_features based on the processor mask. */
static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
- /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
+ /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
~(m_386 | m_486 | m_PENT | m_K6),
/* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
/* Which instruction set architecture to use. */
enum processor_type ix86_arch;
-/* true if sse prefetch instruction is not NOOP. */
+/* True if processor has SSE prefetch instruction. */
int x86_prefetch_sse;
+/* True if processor has prefetchw instruction. */
+int x86_prefetchw;
+
/* -mstackrealign option */
static const char ix86_force_align_arg_pointer_string[]
= "force_align_arg_pointer";
#define PTA_XOP (HOST_WIDE_INT_1 << 29)
#define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
#define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
+#define PTA_PREFETCHW (HOST_WIDE_INT_1 << 32)
+
/* if this reaches 64, need to widen struct pta flags below */
static struct pta
| PTA_SSSE3 | PTA_CX16},
{"corei7", PROCESSOR_COREI7_64, CPU_COREI7,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16},
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_POPCNT},
{"corei7-avx", PROCESSOR_COREI7_64, CPU_COREI7,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
- | PTA_FMA | PTA_MOVBE},
+ | PTA_FMA | PTA_MOVBE},
{"atom", PROCESSOR_ATOM, CPU_ATOM,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
{"geode", PROCESSOR_GEODE, CPU_GEODE,
- PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
+ PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
{"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
{"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
{"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
| PTA_SSE2 | PTA_NO_SAHF},
{"opteron-sse3", PROCESSOR_K8, CPU_K8,
- PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
+ PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
{"athlon64", PROCESSOR_K8, CPU_K8,
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
{"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
- PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
- | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
- | PTA_XOP | PTA_LWP},
+ PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+ | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
+ | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+ | PTA_FMA4 | PTA_XOP | PTA_LWP},
{"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
- PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
- | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
- | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
+ PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+ | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
+ | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+ | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
| PTA_FMA},
{"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
- PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
- | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
+ PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+ | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16},
{"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
0 /* flags are only used for -march switch. */ },
{"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
"large", "32");
else if (TARGET_X32)
error ("code model %qs not supported in x32 mode",
- "medium");
+ "large");
break;
case CM_32:
ix86_isa_flags |= OPTION_MASK_ISA_F16C;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
x86_prefetch_sse = true;
+ if (processor_alias_table[i].flags & PTA_PREFETCHW)
+ x86_prefetchw = true;
break;
}
-mtune (rather than -march) points us to a processor that has them.
However, the VIA C3 gives a SIGILL, so we only do that for i686 and
higher processors. */
- if (TARGET_CMOVE
+ if (TARGET_CMOV
&& (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
x86_prefetch_sse = true;
break;
in case they weren't overwritten by command line options. */
if (TARGET_64BIT)
{
- if (optimize > 1 && !global_options_set.x_flag_zee)
- flag_zee = 1;
if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
if (flag_asynchronous_unwind_tables == 2)
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
}
- /* For sane SSE instruction set generation we need fcomi instruction.
- It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
- expands to a sequence that includes conditional move. */
- if (TARGET_SSE || TARGET_RDRND)
- TARGET_CMOVE = 1;
-
/* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
{
char *p;
return NULL_TREE;
}
+/* The transactional memory builtins are implicitly regparm or fastcall
+ depending on the ABI. Override the generic do-nothing attribute that
+ these builtins were declared with, and replace it with one of the two
+ attributes that we expect elsewhere. */
+
+static tree
+ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
+ tree args ATTRIBUTE_UNUSED,
+ int flags ATTRIBUTE_UNUSED,
+ bool *no_add_attrs)
+{
+ tree alt;
+
+ /* In no case do we want to add the placeholder attribute. */
+ *no_add_attrs = true;
+
+ /* The 64-bit ABI is unchanged for transactional memory. */
+ if (TARGET_64BIT)
+ return NULL_TREE;
+
+ /* ??? Is there a better way to validate 32-bit windows? We have
+ cfun->machine->call_abi, but that seems to be set only for 64-bit. */
+ if (CHECK_STACK_LIMIT > 0)
+ alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
+ else
+ {
+ alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
+ alt = tree_cons (get_identifier ("regparm"), alt, NULL);
+ }
+ decl_attributes (node, alt, flags);
+
+ return NULL_TREE;
+}
+
/* This function determines from TYPE the calling-convention. */
unsigned int
{
/* The return value of this function uses 256bit AVX modes. */
if (caller)
- cfun->machine->callee_return_avx256_p = true;
+ {
+ cfun->machine->callee_return_avx256_p = true;
+ cum->callee_return_avx256_p = true;
+ }
else
cfun->machine->caller_return_avx256_p = true;
}
/* Likewise, error if the ABI requires us to return values in the
x87 registers and the user specified -mno-80387. */
- if (!TARGET_80387 && in_return)
+ if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
for (i = 0; i < n; i++)
if (regclass[i] == X86_64_X87_CLASS
|| regclass[i] == X86_64_X87UP_CLASS
{
/* This argument uses 256bit AVX modes. */
if (cum->caller)
- cfun->machine->callee_pass_avx256_p = true;
+ cum->callee_pass_avx256_p = true;
else
cfun->machine->caller_pass_avx256_p = true;
}
+ if (cum->caller && mode == VOIDmode)
+ {
+ /* This function is called with MODE == VOIDmode immediately
+ before the call instruction is emitted. We copy callee 256bit
+ AVX info from the current CUM here. */
+ cfun->machine->callee_return_avx256_p = cum->callee_return_avx256_p;
+ cfun->machine->callee_pass_avx256_p = cum->callee_pass_avx256_p;
+ }
+
return arg;
}
switch (regno)
{
case AX_REG:
+ case DX_REG:
return true;
+ case DI_REG:
+ case SI_REG:
+ return TARGET_64BIT && ix86_abi != MS_ABI;
- case FIRST_FLOAT_REG:
+ /* Complex values are returned in %st(0)/%st(1) pair. */
+ case ST0_REG:
+ case ST1_REG:
/* TODO: The function should depend on current function ABI but
builtins.c would need updating then. Therefore we use the
default ABI. */
return false;
return TARGET_FLOAT_RETURNS_IN_80387;
- case FIRST_SSE_REG:
+ /* Complex values are returned in %xmm0/%xmm1 pair. */
+ case XMM0_REG:
+ case XMM1_REG:
return TARGET_SSE;
- case FIRST_MMX_REG:
+ case MM0_REG:
if (TARGET_MACHO || TARGET_64BIT)
return false;
return TARGET_MMX;
if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
return true;
+ /* Win64 SEH, very large frames need a frame-pointer as maximum stack
+ allocation is 4GB. */
+ if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
+ return true;
+
/* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
turns off the frame pointer by default. Turn it back on now if
we've not got a leaf function. */
if (!flag_pic)
{
- xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
+ if (TARGET_MACHO)
+ /* We don't need a pic base, we're not producing pic. */
+ gcc_unreachable ();
+ xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
-
-#if TARGET_MACHO
- /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
- is what will be referenced by the Mach-O PIC subsystem. */
- if (!label)
- ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
-#endif
-
targetm.asm_out.internal_label (asm_out_file, "L",
CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
}
xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
xops[2] = gen_rtx_MEM (QImode, xops[2]);
output_asm_insn ("call\t%X2", xops);
- /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
- is what will be referenced by the Mach-O PIC subsystem. */
+
#if TARGET_MACHO
- if (!label)
+ /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
+ This is what will be referenced by the Mach-O PIC subsystem. */
+ if (machopic_should_output_picbase_label () || !label)
ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
- else
+
+ /* When we are restoring the pic base at the site of a nonlocal label,
+ and we decided to emit the pic base above, we will still output a
+ local label used for calculating the correction offset (even though
+ the offset will be 0 in that case). */
+ if (label)
targetm.asm_out.internal_label (asm_out_file, "L",
CODE_LABEL_NUMBER (label));
#endif
&& (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
|| crtl->profile
|| crtl->calls_eh_return
- || crtl->uses_const_pool))
+ || crtl->uses_const_pool
+ || cfun->has_nonlocal_label))
return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
if (crtl->calls_eh_return && maybe_eh_return)
offset += frame->nregs * UNITS_PER_WORD;
frame->reg_save_offset = offset;
+ /* On SEH target, registers are pushed just before the frame pointer
+ location. */
+ if (TARGET_SEH)
+ frame->hard_frame_pointer_offset = offset;
+
/* Align and set SSE register save area. */
if (frame->nsseregs)
{
{
HOST_WIDE_INT diff;
- /* If we can leave the frame pointer where it is, do so. */
+ /* If we can leave the frame pointer where it is, do so. Also, returns
+ the establisher frame for __builtin_frame_address (0). */
diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
- if (diff > 240 || (diff & 15) != 0)
+ if (diff <= SEH_MAX_FRAME_SIZE
+ && (diff > 240 || (diff & 15) != 0)
+ && !crtl->accesses_prior_frames)
{
/* Ideally we'd determine what portion of the local stack frame
(within the constraint of the lowest 240) is most heavily used.
tree decl = current_function_decl, fntype = TREE_TYPE (decl);
bool fastcall_p
= lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
+ bool thiscall_p
+ = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
bool static_chain_p = DECL_STATIC_CHAIN (decl);
int regparm = ix86_function_regparm (fntype, decl);
int drap_regno
if ((regparm < 1 || (fastcall_p && !static_chain_p))
&& drap_regno != AX_REG)
regno = AX_REG;
- else if (regparm < 2 && drap_regno != DX_REG)
+ /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
+ for the static chain register. */
+ else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
+ regno = AX_REG;
+ else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
regno = DX_REG;
/* ecx is the static chain register. */
- else if (regparm < 3 && !fastcall_p && !static_chain_p
+ else if (regparm < 3 && !fastcall_p && !thiscall_p
+ && !static_chain_p
&& drap_regno != CX_REG)
regno = CX_REG;
else if (ix86_save_reg (BX_REG, true))
{
if (sr->saved)
{
+ struct machine_function *m = cfun->machine;
rtx x, insn = emit_insn (gen_pop (sr->reg));
/* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
+ m->fs.sp_offset -= UNITS_PER_WORD;
}
}
/* After stack_realign_needed is finalized, we can't no longer
change it. */
gcc_assert (crtl->stack_realign_needed == stack_realign);
+ return;
}
- else
- {
- crtl->stack_realign_needed = stack_realign;
- crtl->stack_realign_finalized = true;
+
+ /* If the only reason for frame_pointer_needed is that we conservatively
+ assumed stack realignment might be needed, but in the end nothing that
+ needed the stack alignment had been spilled, clear frame_pointer_needed
+ and say we don't need stack realignment. */
+ if (stack_realign
+ && !crtl->need_drap
+ && frame_pointer_needed
+ && current_function_is_leaf
+ && flag_omit_frame_pointer
+ && current_function_sp_is_unchanging
+ && !ix86_current_function_calls_tls_descriptor
+ && !crtl->accesses_prior_frames
+ && !cfun->calls_alloca
+ && !crtl->calls_eh_return
+ && !(flag_stack_check && STACK_CHECK_MOVING_SP)
+ && !ix86_frame_pointer_required ()
+ && get_frame_size () == 0
+ && ix86_nsaved_sseregs () == 0
+ && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
+ {
+ HARD_REG_SET set_up_by_prologue, prologue_used;
+ basic_block bb;
+
+ CLEAR_HARD_REG_SET (prologue_used);
+ CLEAR_HARD_REG_SET (set_up_by_prologue);
+ add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
+ add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
+ add_to_hard_reg_set (&set_up_by_prologue, Pmode,
+ HARD_FRAME_POINTER_REGNUM);
+ FOR_EACH_BB (bb)
+ {
+ rtx insn;
+ FOR_BB_INSNS (bb, insn)
+ if (NONDEBUG_INSN_P (insn)
+ && requires_stack_frame_p (insn, prologue_used,
+ set_up_by_prologue))
+ {
+ crtl->stack_realign_needed = stack_realign;
+ crtl->stack_realign_finalized = true;
+ return;
+ }
+ }
+
+ frame_pointer_needed = false;
+ stack_realign = false;
+ crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
+ crtl->stack_alignment_needed = incoming_stack_boundary;
+ crtl->stack_alignment_estimated = incoming_stack_boundary;
+ if (crtl->preferred_stack_boundary > incoming_stack_boundary)
+ crtl->preferred_stack_boundary = incoming_stack_boundary;
+ df_finish_pass (true);
+ df_scan_alloc (NULL);
+ df_scan_blocks ();
+ df_compute_regs_ever_live (true);
+ df_analyze ();
}
+
+ crtl->stack_realign_needed = stack_realign;
+ crtl->stack_realign_finalized = true;
}
/* Expand the prologue into a bunch of separate insns. */
struct ix86_frame frame;
HOST_WIDE_INT allocate;
bool int_registers_saved;
+ bool sse_registers_saved;
ix86_finalize_stack_realign_flags ();
m->fs.realigned = true;
}
+ int_registers_saved = (frame.nregs == 0);
+ sse_registers_saved = (frame.nsseregs == 0);
+
if (frame_pointer_needed && !m->fs.fp_valid)
{
/* Note: AT&T enter does NOT have reversed args. Enter is probably
insn = emit_insn (gen_push (hard_frame_pointer_rtx));
RTX_FRAME_RELATED_P (insn) = 1;
+ /* Push registers now, before setting the frame pointer
+ on SEH target. */
+ if (!int_registers_saved
+ && TARGET_SEH
+ && !frame.save_regs_using_mov)
+ {
+ ix86_emit_save_regs ();
+ int_registers_saved = true;
+ gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
+ }
+
if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
{
insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
}
}
- int_registers_saved = (frame.nregs == 0);
-
if (!int_registers_saved)
{
/* If saving registers via PUSH, do so now. */
current_function_static_stack_size = stack_size;
}
+ /* On SEH target with very large frame size, allocate an area to save
+ SSE registers (as the very large allocation won't be described). */
+ if (TARGET_SEH
+ && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
+ && !sse_registers_saved)
+ {
+ HOST_WIDE_INT sse_size =
+ frame.sse_reg_save_offset - frame.reg_save_offset;
+
+ gcc_assert (int_registers_saved);
+
+ /* No need to do stack checking as the area will be immediately
+ written. */
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-sse_size), -1,
+ m->fs.cfa_reg == stack_pointer_rtx);
+ allocate -= sse_size;
+ ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
+ sse_registers_saved = true;
+ }
+
/* The stack has already been decremented by the instruction calling us
so probe if the size is non-negative to preserve the protection area. */
if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
rtx eax = gen_rtx_REG (Pmode, AX_REG);
rtx r10 = NULL;
rtx (*adjust_stack_insn)(rtx, rtx, rtx);
-
+ const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
bool eax_live = false;
bool r10_live = false;
if (!TARGET_64BIT_MS_ABI)
eax_live = ix86_eax_live_at_start_p ();
+ /* Note that SEH directives need to continue tracking the stack
+ pointer even after the frame pointer has been set up. */
if (eax_live)
{
- emit_insn (gen_push (eax));
+ insn = emit_insn (gen_push (eax));
allocate -= UNITS_PER_WORD;
+ if (sp_is_cfa_reg || TARGET_SEH)
+ {
+ if (sp_is_cfa_reg)
+ m->fs.cfa_offset += UNITS_PER_WORD;
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
}
+
if (r10_live)
{
r10 = gen_rtx_REG (Pmode, R10_REG);
- emit_insn (gen_push (r10));
+ insn = emit_insn (gen_push (r10));
allocate -= UNITS_PER_WORD;
+ if (sp_is_cfa_reg || TARGET_SEH)
+ {
+ if (sp_is_cfa_reg)
+ m->fs.cfa_offset += UNITS_PER_WORD;
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
}
emit_move_insn (eax, GEN_INT (allocate));
insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
stack_pointer_rtx, eax));
- /* Note that SEH directives need to continue tracking the stack
- pointer even after the frame pointer has been set up. */
- if (m->fs.cfa_reg == stack_pointer_rtx || TARGET_SEH)
+ if (sp_is_cfa_reg || TARGET_SEH)
{
- if (m->fs.cfa_reg == stack_pointer_rtx)
+ if (sp_is_cfa_reg)
m->fs.cfa_offset += allocate;
-
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR,
gen_rtx_SET (VOIDmode, stack_pointer_rtx,
if (r10_live && eax_live)
{
- t = choose_baseaddr (m->fs.sp_offset - allocate);
+ t = plus_constant (stack_pointer_rtx, allocate);
emit_move_insn (r10, gen_frame_mem (Pmode, t));
- t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
+ t = plus_constant (stack_pointer_rtx,
+ allocate - UNITS_PER_WORD);
emit_move_insn (eax, gen_frame_mem (Pmode, t));
}
else if (eax_live || r10_live)
{
- t = choose_baseaddr (m->fs.sp_offset - allocate);
+ t = plus_constant (stack_pointer_rtx, allocate);
emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
}
}
if (!int_registers_saved)
ix86_emit_save_regs_using_mov (frame.reg_save_offset);
- if (frame.nsseregs)
+ if (!sse_registers_saved)
ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
pic_reg_used = false;
add_reg_note (insn, REG_CFA_DEF_CFA,
plus_constant (stack_pointer_rtx, m->fs.sp_offset));
RTX_FRAME_RELATED_P (insn) = 1;
- ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
- m->fs.fp_offset);
}
+ ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
+ m->fs.fp_offset);
}
/* Emit code to restore saved registers using MOV insns.
}
}
+/* Emit vzeroupper if needed. */
+
+void
+ix86_maybe_emit_epilogue_vzeroupper (void)
+{
+ if (TARGET_VZEROUPPER
+ && !TREE_THIS_VOLATILE (cfun->decl)
+ && !cfun->machine->caller_return_avx256_p)
+ emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
+}
+
/* Restore function stack, frame, and registers. */
void
}
/* First step is to deallocate the stack frame so that we can
- pop the registers. */
- if (!m->fs.sp_valid)
+ pop the registers. Also do it on SEH target for very large
+ frame as the emitted instructions aren't allowed by the ABI in
+ epilogues. */
+ if (!m->fs.sp_valid
+ || (TARGET_SEH
+ && (m->fs.sp_offset - frame.reg_save_offset
+ >= SEH_MAX_FRAME_SIZE)))
{
pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
GEN_INT (m->fs.fp_offset
}
/* Emit vzeroupper if needed. */
- if (TARGET_VZEROUPPER
- && !TREE_THIS_VOLATILE (cfun->decl)
- && !cfun->machine->caller_return_avx256_p)
- emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
+ ix86_maybe_emit_epilogue_vzeroupper ();
if (crtl->args.pops_args && crtl->args.size)
{
it looks like we might want one, insert a NOP. */
{
rtx insn = get_last_insn ();
+ rtx deleted_debug_label = NULL_RTX;
while (insn
&& NOTE_P (insn)
&& NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
- insn = PREV_INSN (insn);
+ {
+ /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
+ notes only, instead set their CODE_LABEL_NUMBER to -1,
+ otherwise there would be code generation differences
+ in between -g and -g0. */
+ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
+ deleted_debug_label = insn;
+ insn = PREV_INSN (insn);
+ }
if (insn
&& (LABEL_P (insn)
|| (NOTE_P (insn)
&& NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
fputs ("\tnop\n", file);
+ else if (deleted_debug_label)
+ for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
+ if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
+ CODE_LABEL_NUMBER (insn) = -1;
}
#endif
return R11_REG;
else
{
- bool is_fastcall;
+ bool is_fastcall, is_thiscall;
int regparm;
is_fastcall = (lookup_attribute ("fastcall",
TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
!= NULL);
+ is_thiscall = (lookup_attribute ("thiscall",
+ TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
+ != NULL);
regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
if (is_fastcall)
}
return AX_REG;
}
+ else if (is_thiscall)
+ {
+ if (!DECL_STATIC_CHAIN (cfun->decl))
+ return DX_REG;
+ return AX_REG;
+ }
else if (regparm < 3)
{
if (!DECL_STATIC_CHAIN (cfun->decl))
}
}
\f
-/* Determine if op is suitable SUBREG RTX for address. */
-
-static bool
-ix86_address_subreg_operand (rtx op)
-{
- enum machine_mode mode;
-
- if (!REG_P (op))
- return false;
-
- mode = GET_MODE (op);
-
- if (GET_MODE_CLASS (mode) != MODE_INT)
- return false;
-
- /* Don't allow SUBREGs that span more than a word. It can lead to spill
- failures when the register is one word out of a two word structure. */
- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
- return false;
-
- /* Allow only SUBREGs of non-eliminable hard registers. */
- return register_no_elim_operand (op, mode);
-}
-
/* Extract the parts of an RTL expression that is a valid memory address
for an instruction. Return 0 if the structure of the address is
grossly off. Return -1 if the address contains ASHIFT, so it is not
{
if (GET_CODE (addr) == ZERO_EXTEND
&& GET_MODE (XEXP (addr, 0)) == SImode)
- addr = XEXP (addr, 0);
+ {
+ addr = XEXP (addr, 0);
+ if (CONST_INT_P (addr))
+ return 0;
+ }
else if (GET_CODE (addr) == AND
&& const_32bit_mask (XEXP (addr, 1), DImode))
{
addr = XEXP (addr, 0);
- /* Strip subreg. */
+ /* Adjust SUBREGs. */
if (GET_CODE (addr) == SUBREG
&& GET_MODE (SUBREG_REG (addr)) == SImode)
- addr = SUBREG_REG (addr);
+ {
+ addr = SUBREG_REG (addr);
+ if (CONST_INT_P (addr))
+ return 0;
+ }
+ else if (GET_MODE (addr) == DImode)
+ addr = gen_rtx_SUBREG (SImode, addr, 0);
+ else if (GET_MODE (addr) != VOIDmode)
+ return 0;
+ }
+ }
+
+ /* Allow SImode subregs of DImode addresses,
+ they will be emitted with addr32 prefix. */
+ if (TARGET_64BIT && GET_MODE (addr) == SImode)
+ {
+ if (GET_CODE (addr) == SUBREG
+ && GET_MODE (SUBREG_REG (addr)) == DImode)
+ {
+ addr = SUBREG_REG (addr);
+ if (CONST_INT_P (addr))
+ return 0;
}
}
base = addr;
else if (GET_CODE (addr) == SUBREG)
{
- if (ix86_address_subreg_operand (SUBREG_REG (addr)))
+ if (REG_P (SUBREG_REG (addr)))
base = addr;
else
return 0;
break;
case SUBREG:
- if (!ix86_address_subreg_operand (SUBREG_REG (op)))
+ if (!REG_P (SUBREG_REG (op)))
return 0;
/* FALLTHRU */
if (REG_P (index))
;
else if (GET_CODE (index) == SUBREG
- && ix86_address_subreg_operand (SUBREG_REG (index)))
+ && REG_P (SUBREG_REG (index)))
;
else
return 0;
break;
if (GET_CODE (op0) == LABEL_REF)
return true;
+ if (GET_CODE (op0) == CONST
+ && GET_CODE (XEXP (op0, 0)) == UNSPEC
+ && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
+ return true;
+ if (GET_CODE (op0) == UNSPEC
+ && XINT (op0, 1) == UNSPEC_PCREL)
+ return true;
if (GET_CODE (op0) != SYMBOL_REF)
break;
/* FALLTHRU */
return false;
}
+/* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
+ replace the input X, or the original X if no replacement is called for.
+ The output parameter *WIN is 1 if the calling macro should goto WIN,
+ 0 if it should not. */
+
+bool
+ix86_legitimize_reload_address (rtx x,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ int opnum, int type,
+ int ind_levels ATTRIBUTE_UNUSED)
+{
+ /* Reload can generate:
+
+ (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
+ (reg:DI 97))
+ (reg:DI 2 cx))
+
+ This RTX is rejected from ix86_legitimate_address_p due to
+ non-strictness of base register 97. Following this rejection,
+ reload pushes all three components into separate registers,
+ creating invalid memory address RTX.
+
+ Following code reloads only the invalid part of the
+ memory address RTX. */
+
+ if (GET_CODE (x) == PLUS
+ && REG_P (XEXP (x, 1))
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && REG_P (XEXP (XEXP (x, 0), 1)))
+ {
+ rtx base, index;
+ bool something_reloaded = false;
+
+ base = XEXP (XEXP (x, 0), 1);
+ if (!REG_OK_FOR_BASE_STRICT_P (base))
+ {
+ push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
+ BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+ opnum, (enum reload_type)type);
+ something_reloaded = true;
+ }
+
+ index = XEXP (x, 1);
+ if (!REG_OK_FOR_INDEX_STRICT_P (index))
+ {
+ push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
+ INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
+ opnum, (enum reload_type)type);
+ something_reloaded = true;
+ }
+
+ gcc_assert (something_reloaded);
+ return true;
+ }
+
+ return false;
+}
+
+/* Determine if op is suitable RTX for an address register.
+ Return naked register if a register or a register subreg is
+ found, otherwise return NULL_RTX. */
+
+static rtx
+ix86_validate_address_register (rtx op)
+{
+ enum machine_mode mode = GET_MODE (op);
+
+ /* Only SImode or DImode registers can form the address. */
+ if (mode != SImode && mode != DImode)
+ return NULL_RTX;
+
+ if (REG_P (op))
+ return op;
+ else if (GET_CODE (op) == SUBREG)
+ {
+ rtx reg = SUBREG_REG (op);
+
+ if (!REG_P (reg))
+ return NULL_RTX;
+
+ mode = GET_MODE (reg);
+
+ /* Don't allow SUBREGs that span more than a word. It can
+ lead to spill failures when the register is one word out
+ of a two word structure. */
+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ return NULL_RTX;
+
+ /* Allow only SUBREGs of non-eliminable hard registers. */
+ if (register_no_elim_operand (reg, mode))
+ return reg;
+ }
+
+ /* Op is not a register. */
+ return NULL_RTX;
+}
+
/* Recognizes RTL expressions that are valid memory addresses for an
instruction. The MODE argument is the machine mode for the MEM
expression that wants to use this address.
struct ix86_address parts;
rtx base, index, disp;
HOST_WIDE_INT scale;
+ enum ix86_address_seg seg;
if (ix86_decompose_address (addr, &parts) <= 0)
/* Decomposition failed. */
index = parts.index;
disp = parts.disp;
scale = parts.scale;
+ seg = parts.seg;
/* Validate base register. */
if (base)
{
- rtx reg;
-
- if (REG_P (base))
- reg = base;
- else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
- reg = SUBREG_REG (base);
- else
- /* Base is not a register. */
- return false;
+ rtx reg = ix86_validate_address_register (base);
- if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
+ if (reg == NULL_RTX)
return false;
if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
/* Validate index register. */
if (index)
{
- rtx reg;
-
- if (REG_P (index))
- reg = index;
- else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
- reg = SUBREG_REG (index);
- else
- /* Index is not a register. */
- return false;
+ rtx reg = ix86_validate_address_register (index);
- if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
+ if (reg == NULL_RTX)
return false;
if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
&& GET_MODE (base) != GET_MODE (index))
return false;
+ /* Address override works only on the (%reg) part of %fs:(%reg). */
+ if (seg != SEG_DEFAULT
+ && ((base && GET_MODE (base) != word_mode)
+ || (index && GET_MODE (index) != word_mode)))
+ return false;
+
/* Validate scale factor. */
if (scale != 1)
{
&& !x86_64_immediate_operand (disp, VOIDmode))
/* Displacement is out of range. */
return false;
+ /* In x32 mode, constant addresses are sign extended to 64bit, so
+ we have to prevent addresses from 0x80000000 to 0xffffffff. */
+ else if (TARGET_X32 && !(index || base)
+ && CONST_INT_P (disp)
+ && val_signbit_known_set_p (SImode, INTVAL (disp)))
+ return false;
}
/* Everything looks valid. */
{
rtx addr = orig;
rtx new_rtx = orig;
- rtx base;
#if TARGET_MACHO
if (TARGET_MACHO && !TARGET_64BIT)
}
else
{
- base = legitimize_pic_address (XEXP (addr, 0), reg);
- new_rtx = legitimize_pic_address (XEXP (addr, 1),
- base == reg ? NULL_RTX : reg);
+ rtx base = legitimize_pic_address (op0, reg);
+ enum machine_mode mode = GET_MODE (base);
+ new_rtx
+ = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
if (CONST_INT_P (new_rtx))
- new_rtx = plus_constant (base, INTVAL (new_rtx));
+ {
+ if (INTVAL (new_rtx) < -16*1024*1024
+ || INTVAL (new_rtx) >= 16*1024*1024)
+ {
+ if (!x86_64_immediate_operand (new_rtx, mode))
+ new_rtx = force_reg (mode, new_rtx);
+ new_rtx
+ = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
+ }
+ else
+ new_rtx = plus_constant (base, INTVAL (new_rtx));
+ }
else
{
- if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
+ if (GET_CODE (new_rtx) == PLUS
+ && CONSTANT_P (XEXP (new_rtx, 1)))
{
- base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
+ base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
new_rtx = XEXP (new_rtx, 1);
}
- new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
+ new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
}
}
}
tp = get_thread_pointer (true);
dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
+ if (GET_MODE (x) != Pmode)
+ x = gen_rtx_ZERO_EXTEND (Pmode, x);
+
set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
}
else
if (TARGET_64BIT)
{
- rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
+ rtx rax = gen_rtx_REG (Pmode, AX_REG);
+ rtx insns;
start_sequence ();
emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
insns = get_insns ();
end_sequence ();
+ if (GET_MODE (x) != Pmode)
+ x = gen_rtx_ZERO_EXTEND (Pmode, x);
+
RTL_CONST_CALL_P (insns) = 1;
emit_libcall_block (insns, dest, rax, x);
}
if (TARGET_64BIT)
{
- rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
+ rtx rax = gen_rtx_REG (Pmode, AX_REG);
+ rtx insns, eqv;
start_sequence ();
emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
{
dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
+ if (GET_MODE (x) != Pmode)
+ x = gen_rtx_ZERO_EXTEND (Pmode, x);
+
set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
}
break;
if (TARGET_64BIT)
{
+ if (GET_CODE (x) == CONST
+ && GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_MODE (XEXP (x, 0)) == Pmode
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
+ && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
+ {
+ rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
+ x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
+ if (MEM_P (orig_x))
+ x = replace_equiv_address_nv (orig_x, x);
+ return x;
+ }
if (GET_CODE (x) != CONST
|| GET_CODE (XEXP (x, 0)) != UNSPEC
|| (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
&& XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
- || !MEM_P (orig_x))
+ || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
return ix86_delegitimize_tls_address (orig_x);
x = XVECEXP (XEXP (x, 0), 0, 0);
- if (GET_MODE (orig_x) != GET_MODE (x))
+ if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
{
x = simplify_gen_subreg (GET_MODE (orig_x), x,
GET_MODE (x), 0);
Those same assemblers have the same but opposite lossage on cmov. */
if (mode == CCmode)
suffix = fp ? "nbe" : "a";
- else if (mode == CCCmode)
- suffix = "b";
else
gcc_unreachable ();
break;
}
break;
case LTU:
- gcc_assert (mode == CCmode || mode == CCCmode);
- suffix = "b";
+ if (mode == CCmode)
+ suffix = "b";
+ else if (mode == CCCmode)
+ suffix = "c";
+ else
+ gcc_unreachable ();
break;
case GE:
switch (mode)
}
break;
case GEU:
- /* ??? As above. */
- gcc_assert (mode == CCmode || mode == CCCmode);
- suffix = fp ? "nb" : "ae";
+ if (mode == CCmode)
+ suffix = fp ? "nb" : "ae";
+ else if (mode == CCCmode)
+ suffix = "nc";
+ else
+ gcc_unreachable ();
break;
case LE:
gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
suffix = "le";
break;
case LEU:
- /* ??? As above. */
if (mode == CCmode)
suffix = "be";
- else if (mode == CCCmode)
- suffix = fp ? "nb" : "ae";
else
gcc_unreachable ();
break;
print_reg (rtx x, int code, FILE *file)
{
const char *reg;
+ unsigned int regno;
bool duplicated = code == 'd' && TARGET_AVX;
- gcc_assert (x == pc_rtx
- || (REGNO (x) != ARG_POINTER_REGNUM
- && REGNO (x) != FRAME_POINTER_REGNUM
- && REGNO (x) != FLAGS_REG
- && REGNO (x) != FPSR_REG
- && REGNO (x) != FPCR_REG));
-
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('%', file);
return;
}
+ regno = true_regnum (x);
+ gcc_assert (regno != ARG_POINTER_REGNUM
+ && regno != FRAME_POINTER_REGNUM
+ && regno != FLAGS_REG
+ && regno != FPSR_REG
+ && regno != FPCR_REG);
+
if (code == 'w' || MMX_REG_P (x))
code = 2;
else if (code == 'b')
code = GET_MODE_SIZE (GET_MODE (x));
/* Irritatingly, AMD extended registers use different naming convention
- from the normal registers. */
- if (REX_INT_REG_P (x))
+ from the normal registers: "r%d[bwd]" */
+ if (REX_INT_REGNO_P (regno))
{
gcc_assert (TARGET_64BIT);
+ putc ('r', file);
+ fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
switch (code)
{
case 0:
error ("extended registers have no high halves");
break;
case 1:
- fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
+ putc ('b', file);
break;
case 2:
- fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
+ putc ('w', file);
break;
case 4:
- fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
+ putc ('d', file);
break;
case 8:
- fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
+ /* no suffix */
break;
default:
error ("unsupported operand size for extended register");
case 16:
case 2:
normal:
- reg = hi_reg_name[REGNO (x)];
+ reg = hi_reg_name[regno];
break;
case 1:
- if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
+ if (regno >= ARRAY_SIZE (qi_reg_name))
goto normal;
- reg = qi_reg_name[REGNO (x)];
+ reg = qi_reg_name[regno];
break;
case 0:
- if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
+ if (regno >= ARRAY_SIZE (qi_high_reg_name))
goto normal;
- reg = qi_high_reg_name[REGNO (x)];
+ reg = qi_high_reg_name[regno];
break;
case 32:
if (SSE_REG_P (x))
{
gcc_assert (!duplicated);
putc ('y', file);
- fputs (hi_reg_name[REGNO (x)] + 1, file);
+ fputs (hi_reg_name[regno] + 1, file);
return;
}
break;
Z -- likewise, with special suffixes for x87 instructions.
* -- print a star (in certain assembler syntax)
A -- print an absolute memory reference.
+ E -- print address with DImode register names if TARGET_64BIT.
w -- print the operand as if it's a "word" (HImode) even if it isn't.
s -- print a shift double count, followed by the assemblers argument
delimiter.
ix86_print_operand (file, x, 0);
return;
+ case 'E':
+ /* Wrap address in an UNSPEC to declare special handling. */
+ if (TARGET_64BIT)
+ x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
+ output_address (x);
+ return;
+
case 'L':
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('l', file);
return;
case 'H':
+ if (!offsettable_memref_p (x))
+ {
+ output_operand_lossage ("operand is not an offsettable memory "
+ "reference, invalid operand "
+ "code 'H'");
+ return;
+ }
/* It doesn't actually matter what mode we use here, as we're
only going to use this for printing. */
x = adjust_address_nv (x, DImode, 8);
gcc_unreachable ();
}
- /* Check for explicit size override (codes 'b', 'w' and 'k') */
+ /* Check for explicit size override (codes 'b', 'w', 'k',
+ 'q' and 'x') */
if (code == 'b')
size = "BYTE";
else if (code == 'w')
size = "WORD";
else if (code == 'k')
size = "DWORD";
+ else if (code == 'q')
+ size = "QWORD";
+ else if (code == 'x')
+ size = "XMMWORD";
fputs (size, file);
fputs (" PTR ", file);
putc ('$', file);
/* Sign extend 32bit SFmode immediate to 8 bytes. */
if (code == 'q')
- fprintf (file, "0x%08llx", (unsigned long long) (int) l);
+ fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
+ (unsigned long long) (int) l);
else
fprintf (file, "0x%08x", (unsigned int) l);
}
int scale;
int ok;
bool vsib = false;
+ int code = 0;
if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
{
addr = XVECEXP (addr, 0, 0);
vsib = true;
}
+ else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
+ {
+ gcc_assert (TARGET_64BIT);
+ ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
+ code = 'q';
+ }
else
ok = ix86_decompose_address (addr, &parts);
gcc_assert (ok);
- if (parts.base && GET_CODE (parts.base) == SUBREG)
- {
- rtx tmp = SUBREG_REG (parts.base);
- parts.base = simplify_subreg (GET_MODE (parts.base),
- tmp, GET_MODE (tmp), 0);
- }
-
- if (parts.index && GET_CODE (parts.index) == SUBREG)
- {
- rtx tmp = SUBREG_REG (parts.index);
- parts.index = simplify_subreg (GET_MODE (parts.index),
- tmp, GET_MODE (tmp), 0);
- }
-
base = parts.base;
index = parts.index;
disp = parts.disp;
}
else
{
- int code = 0;
-
- /* Print SImode registers for zero-extended addresses to force
- addr32 prefix. Otherwise print DImode registers to avoid it. */
- if (TARGET_64BIT)
- code = ((GET_CODE (addr) == ZERO_EXTEND
- || GET_CODE (addr) == AND)
- ? 'l'
- : 'q');
+ /* Print SImode register names to force addr32 prefix. */
+ if (SImode_address_operand (addr, VOIDmode))
+ {
+#ifdef ENABLE_CHECKING
+ gcc_assert (TARGET_64BIT);
+ switch (GET_CODE (addr))
+ {
+ case SUBREG:
+ gcc_assert (GET_MODE (addr) == SImode);
+ gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
+ break;
+ case ZERO_EXTEND:
+ case AND:
+ gcc_assert (GET_MODE (addr) == DImode);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+#endif
+ gcc_assert (!code);
+ code = 'k';
+ }
+ else if (code == 0
+ && TARGET_X32
+ && disp
+ && CONST_INT_P (disp)
+ && INTVAL (disp) < -16*1024*1024)
+ {
+ /* X32 runs in 64-bit mode, where displacement, DISP, in
+ address DISP(%r64), is encoded as 32-bit immediate sign-
+ extended from 32-bit to 64-bit. For -0x40000300(%r64),
+ address is %r64 + 0xffffffffbffffd00. When %r64 <
+ 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
+ which is invalid for x32. The correct address is %r64
+ - 0x40000300 == 0xf7ffdd64. To properly encode
+ -0x40000300(%r64) for x32, we zero-extend negative
+ displacement by forcing addr32 prefix which truncates
+ 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
+ zero-extend all negative displacements, including -1(%rsp).
+ However, for small negative displacements, sign-extension
+ won't cause overflow. We only zero-extend negative
+ displacements if they < -16*1024*1024, which is also used
+ to check legitimate address displacements for PIC. */
+ code = 'k';
+ }
if (ASSEMBLER_DIALECT == ASM_ATT)
{
op0, 1, OPTAB_DIRECT);
if (tmp == op0)
return;
- if (GET_MODE (tmp) != mode)
- op1 = convert_to_mode (mode, tmp, 1);
+ op1 = convert_to_mode (mode, tmp, 1);
}
}
{
rtx m;
rtx (*extract) (rtx, rtx, rtx);
- rtx (*move_unaligned) (rtx, rtx);
+ rtx (*load_unaligned) (rtx, rtx);
+ rtx (*store_unaligned) (rtx, rtx);
enum machine_mode mode;
switch (GET_MODE (op0))
gcc_unreachable ();
case V32QImode:
extract = gen_avx_vextractf128v32qi;
- move_unaligned = gen_avx_movdqu256;
+ load_unaligned = gen_avx_loaddqu256;
+ store_unaligned = gen_avx_storedqu256;
mode = V16QImode;
break;
case V8SFmode:
extract = gen_avx_vextractf128v8sf;
- move_unaligned = gen_avx_movups256;
+ load_unaligned = gen_avx_loadups256;
+ store_unaligned = gen_avx_storeups256;
mode = V4SFmode;
break;
case V4DFmode:
extract = gen_avx_vextractf128v4df;
- move_unaligned = gen_avx_movupd256;
+ load_unaligned = gen_avx_loadupd256;
+ store_unaligned = gen_avx_storeupd256;
mode = V2DFmode;
break;
}
- if (MEM_P (op1) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+ if (MEM_P (op1))
{
- rtx r = gen_reg_rtx (mode);
- m = adjust_address (op1, mode, 0);
- emit_move_insn (r, m);
- m = adjust_address (op1, mode, 16);
- r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
- emit_move_insn (op0, r);
+ if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
+ {
+ rtx r = gen_reg_rtx (mode);
+ m = adjust_address (op1, mode, 0);
+ emit_move_insn (r, m);
+ m = adjust_address (op1, mode, 16);
+ r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
+ emit_move_insn (op0, r);
+ }
+ else
+ emit_insn (load_unaligned (op0, op1));
}
- else if (MEM_P (op0) && TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+ else if (MEM_P (op0))
{
- m = adjust_address (op0, mode, 0);
- emit_insn (extract (m, op1, const0_rtx));
- m = adjust_address (op0, mode, 16);
- emit_insn (extract (m, op1, const1_rtx));
+ if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
+ {
+ m = adjust_address (op0, mode, 0);
+ emit_insn (extract (m, op1, const0_rtx));
+ m = adjust_address (op0, mode, 16);
+ emit_insn (extract (m, op1, const1_rtx));
+ }
+ else
+ emit_insn (store_unaligned (op0, op1));
}
else
- emit_insn (move_unaligned (op0, op1));
+ gcc_unreachable ();
}
/* Implement the movmisalign patterns for SSE. Non-SSE modes go
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
{
rtx op0, op1, m;
+ rtx (*move_unaligned) (rtx, rtx);
op0 = operands[0];
op1 = operands[1];
/* If we're optimizing for size, movups is the smallest. */
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
+ if (MEM_P (op1))
+ move_unaligned = gen_sse_loadups;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse_storeups;
+ else
+ gcc_unreachable ();
+
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (move_unaligned (op0, op1));
return;
}
+ if (MEM_P (op1))
+ move_unaligned = gen_sse2_loaddqu;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse2_storedqu;
+ else
+ gcc_unreachable ();
+
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
+ emit_insn (move_unaligned (op0, op1));
break;
case 32:
op0 = gen_lowpart (V32QImode, op0);
switch (mode)
{
case V4SFmode:
- emit_insn (gen_sse_movups (op0, op1));
+ if (MEM_P (op1))
+ move_unaligned = gen_sse_loadups;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse_storeups;
+ else
+ gcc_unreachable ();
+
+ emit_insn (move_unaligned (op0, op1));
break;
case V8SFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
case V2DFmode:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
+ if (MEM_P (op1))
+ move_unaligned = gen_sse_loadups;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse_storeups;
+ else
+ gcc_unreachable ();
+
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (move_unaligned (op0, op1));
return;
}
- emit_insn (gen_sse2_movupd (op0, op1));
+ if (MEM_P (op1))
+ move_unaligned = gen_sse2_loadupd;
+ else if (MEM_P (op0))
+ move_unaligned = gen_sse2_storeupd;
+ else
+ gcc_unreachable ();
+
+ emit_insn (move_unaligned (op0, op1));
break;
case V4DFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_loadups (op0, op1));
return;
}
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
+ emit_insn (gen_sse2_loaddqu (op0, op1));
return;
}
{
op0 = gen_lowpart (V2DFmode, op0);
op1 = gen_lowpart (V2DFmode, op1);
- emit_insn (gen_sse2_movupd (op0, op1));
+ emit_insn (gen_sse2_loadupd (op0, op1));
return;
}
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_loadups (op0, op1));
return;
}
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_storeups (op0, op1));
return;
}
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_sse2_movdqu (op0, op1));
+ emit_insn (gen_sse2_storedqu (op0, op1));
return;
}
{
op0 = gen_lowpart (V2DFmode, op0);
op1 = gen_lowpart (V2DFmode, op1);
- emit_insn (gen_sse2_movupd (op0, op1));
+ emit_insn (gen_sse2_storeupd (op0, op1));
}
else
{
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
- emit_insn (gen_sse_movups (op0, op1));
+ emit_insn (gen_sse_storeups (op0, op1));
}
else
{
basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
rtx prev = start;
rtx next = NULL;
- enum attr_type insn_type;
*found = false;
distance = increase_distance (prev, next, distance);
if (insn_defines_reg (regno1, regno2, prev))
{
- insn_type = get_attr_type (prev);
- if (insn_type != TYPE_LEA)
+ if (recog_memoized (prev) < 0
+ || get_attr_type (prev) != TYPE_LEA)
{
*found = true;
return distance;
over a sequence of instructions. Instructions sequence has
SPLIT_COST cycles higher latency than lea latency. */
-bool
+static bool
ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
- unsigned int regno2, unsigned int split_cost)
+ unsigned int regno2, int split_cost)
{
int dist_define, dist_use;
return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
}
+/* Return true if we should emit lea instruction instead of mov
+ instruction. */
+
+bool
+ix86_use_lea_for_mov (rtx insn, rtx operands[])
+{
+ unsigned int regno0;
+ unsigned int regno1;
+
+ /* Check if we need to optimize. */
+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
+ return false;
+
+ /* Use lea for reg to reg moves only. */
+ if (!REG_P (operands[0]) || !REG_P (operands[1]))
+ return false;
+
+ regno0 = true_regnum (operands[0]);
+ regno1 = true_regnum (operands[1]);
+
+ return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
+}
+
/* Return true if we need to split lea into a sequence of
instructions to avoid AGU stalls. */
ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
{
unsigned int regno0 = true_regnum (operands[0]) ;
- unsigned int regno1 = -1;
- unsigned int regno2 = -1;
- unsigned int split_cost = 0;
+ unsigned int regno1 = INVALID_REGNUM;
+ unsigned int regno2 = INVALID_REGNUM;
+ int split_cost = 0;
struct ix86_address parts;
int ok;
+ /* FIXME: Handle zero-extended addresses. */
+ if (GET_CODE (operands[1]) == ZERO_EXTEND
+ || GET_CODE (operands[1]) == AND)
+ return false;
+
/* Check we need to optimize. */
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
ok = ix86_decompose_address (operands[1], &parts);
gcc_assert (ok);
+ /* There should be at least two components in the address. */
+ if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
+ + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
+ return false;
+
/* We should not split into add if non legitimate pic
operand is used as displacement. */
if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
x = gen_rtx_REG (V4SImode, REGNO (value));
if (vecmode == V4SFmode)
- emit_insn (gen_sse2_cvttps2dq (x, value));
+ emit_insn (gen_fix_truncv4sfv4si2 (x, value));
else
emit_insn (gen_sse2_cvttpd2dq (x, value));
value = x;
emit_move_insn (target, fp_hi);
}
+/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
+ a vector of unsigned ints VAL to vector of floats TARGET. */
+
+void
+ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
+{
+ rtx tmp[8];
+ REAL_VALUE_TYPE TWO16r;
+ enum machine_mode intmode = GET_MODE (val);
+ enum machine_mode fltmode = GET_MODE (target);
+ rtx (*cvt) (rtx, rtx);
+
+ if (intmode == V4SImode)
+ cvt = gen_floatv4siv4sf2;
+ else
+ cvt = gen_floatv8siv8sf2;
+ tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
+ tmp[0] = force_reg (intmode, tmp[0]);
+ tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
+ OPTAB_DIRECT);
+ tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ tmp[3] = gen_reg_rtx (fltmode);
+ emit_insn (cvt (tmp[3], tmp[1]));
+ tmp[4] = gen_reg_rtx (fltmode);
+ emit_insn (cvt (tmp[4], tmp[2]));
+ real_ldexp (&TWO16r, &dconst1, 16);
+ tmp[5] = const_double_from_real_value (TWO16r, SFmode);
+ tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
+ tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
+ OPTAB_DIRECT);
+ tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
+ OPTAB_DIRECT);
+ if (tmp[7] != target)
+ emit_move_insn (target, tmp[7]);
+}
+
+/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
+ pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
+ This is done by doing just signed conversion if < 0x1p31, and otherwise by
+ subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
+
+rtx
+ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
+{
+ REAL_VALUE_TYPE TWO31r;
+ rtx two31r, tmp[4];
+ enum machine_mode mode = GET_MODE (val);
+ enum machine_mode scalarmode = GET_MODE_INNER (mode);
+ enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
+ rtx (*cmp) (rtx, rtx, rtx, rtx);
+ int i;
+
+ for (i = 0; i < 3; i++)
+ tmp[i] = gen_reg_rtx (mode);
+ real_ldexp (&TWO31r, &dconst1, 31);
+ two31r = const_double_from_real_value (TWO31r, scalarmode);
+ two31r = ix86_build_const_vector (mode, 1, two31r);
+ two31r = force_reg (mode, two31r);
+ switch (mode)
+ {
+ case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
+ case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
+ case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
+ case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
+ default: gcc_unreachable ();
+ }
+ tmp[3] = gen_rtx_LE (mode, two31r, val);
+ emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
+ tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
+ 0, OPTAB_DIRECT);
+ if (intmode == V4SImode || TARGET_AVX2)
+ *xorp = expand_simple_binop (intmode, ASHIFT,
+ gen_lowpart (intmode, tmp[0]),
+ GEN_INT (31), NULL_RTX, 0,
+ OPTAB_DIRECT);
+ else
+ {
+ rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
+ two31 = ix86_build_const_vector (intmode, 1, two31);
+ *xorp = expand_simple_binop (intmode, AND,
+ gen_lowpart (intmode, tmp[0]),
+ two31, NULL_RTX, 0,
+ OPTAB_DIRECT);
+ }
+ return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
+ 0, OPTAB_DIRECT);
+}
+
/* A subroutine of ix86_build_signbit_mask. If VECT is true,
then replicate the value for all elements of the vector
register. */
return CCmode;
case GTU: /* CF=0 & ZF=0 */
case LEU: /* CF=1 | ZF=1 */
- /* Detect overflow checks. They need just the carry flag. */
- if (GET_CODE (op0) == MINUS
- && rtx_equal_p (op1, XEXP (op0, 0)))
- return CCCmode;
- else
- return CCmode;
+ return CCmode;
/* Codes possibly doable only with sign flag when
comparing against zero. */
case GE: /* SF=OF or SF=0 */
cop0 = operands[4];
cop1 = operands[5];
- /* XOP supports all of the comparisons on all vector int types. */
- if (!TARGET_XOP)
+ /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
+ and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
+ if ((code == LT || code == GE)
+ && data_mode == mode
+ && cop1 == CONST0_RTX (mode)
+ && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
+ && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
+ && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
+ && (GET_MODE_SIZE (data_mode) == 16
+ || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
+ {
+ rtx negop = operands[2 - (code == LT)];
+ int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
+ if (negop == CONST1_RTX (data_mode))
+ {
+ rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
+ operands[0], 1, OPTAB_DIRECT);
+ if (res != operands[0])
+ emit_move_insn (operands[0], res);
+ return true;
+ }
+ else if (GET_MODE_INNER (data_mode) != DImode
+ && vector_all_ones_operand (negop, data_mode))
+ {
+ rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
+ operands[0], 0, OPTAB_DIRECT);
+ if (res != operands[0])
+ emit_move_insn (operands[0], res);
+ return true;
+ }
+ }
+
+ if (!nonimmediate_operand (cop1, mode))
+ cop1 = force_reg (mode, cop1);
+ if (!general_operand (operands[1], data_mode))
+ operands[1] = force_reg (data_mode, operands[1]);
+ if (!general_operand (operands[2], data_mode))
+ operands[2] = force_reg (data_mode, operands[2]);
+
+ /* XOP supports all of the comparisons on all 128-bit vector int types. */
+ if (TARGET_XOP
+ && (mode == V16QImode || mode == V8HImode
+ || mode == V4SImode || mode == V2DImode))
+ ;
+ else
{
/* Canonicalize the comparison to EQ, GT, GTU. */
switch (code)
vt = force_reg (maskmode, vt);
mask = gen_lowpart (maskmode, mask);
if (maskmode == V8SImode)
- emit_insn (gen_avx2_permvarv8si (t1, vt, mask));
+ emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
else
emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
vec[i * 2 + 1] = const1_rtx;
}
vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
- vt = force_const_mem (maskmode, vt);
+ vt = validize_mem (force_const_mem (maskmode, vt));
t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
OPTAB_DIRECT);
the high bits of the shuffle elements. No need for us to
perform an AND ourselves. */
if (one_operand_shuffle)
- emit_insn (gen_avx2_permvarv8si (target, mask, op0));
+ emit_insn (gen_avx2_permvarv8si (target, op0, mask));
else
{
t1 = gen_reg_rtx (V8SImode);
t2 = gen_reg_rtx (V8SImode);
- emit_insn (gen_avx2_permvarv8si (t1, mask, op0));
- emit_insn (gen_avx2_permvarv8si (t2, mask, op1));
+ emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
+ emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
goto merge_two;
}
return;
case V8SFmode:
mask = gen_lowpart (V8SFmode, mask);
if (one_operand_shuffle)
- emit_insn (gen_avx2_permvarv8sf (target, mask, op0));
+ emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
else
{
t1 = gen_reg_rtx (V8SFmode);
t2 = gen_reg_rtx (V8SFmode);
- emit_insn (gen_avx2_permvarv8sf (t1, mask, op0));
- emit_insn (gen_avx2_permvarv8sf (t2, mask, op1));
+ emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
+ emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
goto merge_two;
}
return;
t2 = gen_reg_rtx (V8SImode);
emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
- emit_insn (gen_avx2_permvarv8si (t1, t2, t1));
+ emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
return;
case V4SFmode:
t1 = gen_reg_rtx (V8SFmode);
- t2 = gen_reg_rtx (V8SFmode);
- mask = gen_lowpart (V4SFmode, mask);
+ t2 = gen_reg_rtx (V8SImode);
+ mask = gen_lowpart (V4SImode, mask);
emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
- emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask));
- emit_insn (gen_avx2_permvarv8sf (t1, t2, t1));
+ emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
+ emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
return;
for (i = 0; i < 16; ++i)
vec[i] = GEN_INT (i/e * e);
vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
- vt = force_const_mem (V16QImode, vt);
+ vt = validize_mem (force_const_mem (V16QImode, vt));
if (TARGET_XOP)
emit_insn (gen_xop_pperm (mask, mask, mask, vt));
else
for (i = 0; i < 16; ++i)
vec[i] = GEN_INT (i % e);
vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
- vt = force_const_mem (V16QImode, vt);
+ vt = validize_mem (force_const_mem (V16QImode, vt));
emit_insn (gen_addv16qi3 (mask, mask, vt));
}
f = ggc_alloc_cleared_machine_function ();
f->use_fast_prologue_epilogue_nregs = -1;
- f->tls_descriptor_call_expanded_p = 0;
f->call_abi = ix86_abi;
return f;
gcc_assert (n < MAX_386_STACK_LOCALS);
- /* Virtual slot is valid only before vregs are instantiated. */
- gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
-
for (s = ix86_stack_locals; s; s = s->next)
if (s->mode == mode && s->n == n)
return validize_mem (copy_rtx (s->rtl));
ix86_stack_locals = s;
return validize_mem (s->rtl);
}
+
+static void
+ix86_instantiate_decls (void)
+{
+ struct stack_local_entry *s;
+
+ for (s = ix86_stack_locals; s; s = s->next)
+ if (s->rtl != NULL_RTX)
+ instantiate_decl_rtl (s->rtl);
+}
\f
/* Calculate the length of the memory address in the instruction encoding.
Includes addr32 prefix, does not include the one-byte modrm, opcode,
- or other prefixes. */
+ or other prefixes. We never generate addr32 prefix for LEA insn. */
int
-memory_address_length (rtx addr)
+memory_address_length (rtx addr, bool lea)
{
struct ix86_address parts;
rtx base, index, disp;
ok = ix86_decompose_address (addr, &parts);
gcc_assert (ok);
- if (parts.base && GET_CODE (parts.base) == SUBREG)
- parts.base = SUBREG_REG (parts.base);
- if (parts.index && GET_CODE (parts.index) == SUBREG)
- parts.index = SUBREG_REG (parts.index);
+ len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
+
+ /* If this is not LEA instruction, add the length of addr32 prefix. */
+ if (TARGET_64BIT && !lea
+ && (SImode_address_operand (addr, VOIDmode)
+ || (parts.base && GET_MODE (parts.base) == SImode)
+ || (parts.index && GET_MODE (parts.index) == SImode)))
+ len++;
base = parts.base;
index = parts.index;
disp = parts.disp;
- /* Add length of addr32 prefix. */
- len = (GET_CODE (addr) == ZERO_EXTEND
- || GET_CODE (addr) == AND);
+ if (base && GET_CODE (base) == SUBREG)
+ base = SUBREG_REG (base);
+ if (index && GET_CODE (index) == SUBREG)
+ index = SUBREG_REG (index);
+
+ gcc_assert (base == NULL_RTX || REG_P (base));
+ gcc_assert (index == NULL_RTX || REG_P (index));
/* Rule of thumb:
- esp as the base always wants an index,
/* esp (for its index) and ebp (for its displacement) need
the two-byte modrm form. Similarly for r12 and r13 in 64-bit
code. */
- if (REG_P (addr)
- && (addr == arg_pointer_rtx
- || addr == frame_pointer_rtx
- || REGNO (addr) == SP_REG
- || REGNO (addr) == BP_REG
- || REGNO (addr) == R12_REG
- || REGNO (addr) == R13_REG))
- len = 1;
+ if (base == arg_pointer_rtx
+ || base == frame_pointer_rtx
+ || REGNO (base) == SP_REG
+ || REGNO (base) == BP_REG
+ || REGNO (base) == R12_REG
+ || REGNO (base) == R13_REG)
+ len++;
}
/* Direct Addressing. In 64-bit mode mod 00 r/m 5
by UNSPEC. */
else if (disp && !base && !index)
{
- len = 4;
+ len += 4;
if (TARGET_64BIT)
{
rtx symbol = disp;
|| (XINT (symbol, 1) != UNSPEC_GOTPCREL
&& XINT (symbol, 1) != UNSPEC_PCREL
&& XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
- len += 1;
+ len++;
}
}
-
else
{
/* Find the length of the displacement constant. */
if (disp)
{
if (base && satisfies_constraint_K (disp))
- len = 1;
+ len += 1;
else
- len = 4;
+ len += 4;
}
/* ebp always wants a displacement. Similarly r13. */
- else if (base && REG_P (base)
- && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
- len = 1;
+ else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
+ len++;
/* An index requires the two-byte modrm form.... */
if (index
/* ...like esp (or r12), which always wants an index. */
|| base == arg_pointer_rtx
|| base == frame_pointer_rtx
- || (base && REG_P (base)
- && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
- len += 1;
- }
-
- switch (parts.seg)
- {
- case SEG_FS:
- case SEG_GS:
- len += 1;
- break;
- default:
- break;
+ || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
+ len++;
}
return len;
case MODE_SI:
len = 4;
break;
- /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
+ /* Immediates for DImode instructions are encoded
+ as 32bit sign extended values. */
case MODE_DI:
len = 4;
break;
}
return len;
}
+
/* Compute default value for "length_address" attribute. */
int
ix86_attr_length_address_default (rtx insn)
gcc_assert (GET_CODE (set) == SET);
addr = SET_SRC (set);
- if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
- {
- if (GET_CODE (addr) == ZERO_EXTEND)
- addr = XEXP (addr, 0);
- if (GET_CODE (addr) == SUBREG)
- addr = SUBREG_REG (addr);
- }
- return memory_address_length (addr);
+ return memory_address_length (addr, true);
}
extract_insn_cached (insn);
if (*constraints == 'X')
continue;
}
- return memory_address_length (XEXP (recog_data.operand[i], 0));
+ return memory_address_length (XEXP (recog_data.operand[i], 0), false);
}
return 0;
}
case PROCESSOR_CORE2_64:
case PROCESSOR_COREI7_32:
case PROCESSOR_COREI7_64:
+ case PROCESSOR_ATOM:
/* Generally, we want haifa-sched:max_issue() to look ahead as far
as many instructions can be executed on a cycle, i.e.,
issue_rate. I wonder why tuning for many CPUs does not do this. */
int
ix86_data_alignment (tree type, int align)
{
- int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
+ int max_align
+ = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
if (AGGREGATE_TYPE_P (type)
&& TYPE_SIZE (type)
fntype = TREE_TYPE (fndecl);
ccvt = ix86_get_callcvt (fntype);
- if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
+ if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
{
/* Fastcall functions use ecx/edx for arguments, which leaves
us with EAX for the static chain.
leaves us with EAX for the static chain. */
regno = AX_REG;
}
+ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
+ {
+ /* Thiscall functions use ecx for arguments, which leaves
+ us with EAX and EDX for the static chain.
+ We are using for abi-compatibility EAX. */
+ regno = AX_REG;
+ }
else if (ix86_function_regparm (fntype, fndecl) == 3)
{
/* For regparm 3, we have no free call-clobbered registers in
IX86_BUILTIN_CVTTPS2DQ,
IX86_BUILTIN_MOVNTI,
+ IX86_BUILTIN_MOVNTI64,
IX86_BUILTIN_MOVNTPD,
IX86_BUILTIN_MOVNTDQ,
IX86_BUILTIN_PMULDQ128,
IX86_BUILTIN_PMULLD128,
- IX86_BUILTIN_ROUNDPD,
- IX86_BUILTIN_ROUNDPS,
IX86_BUILTIN_ROUNDSD,
IX86_BUILTIN_ROUNDSS,
+ IX86_BUILTIN_ROUNDPD,
+ IX86_BUILTIN_ROUNDPS,
+
IX86_BUILTIN_FLOORPD,
IX86_BUILTIN_CEILPD,
IX86_BUILTIN_TRUNCPD,
IX86_BUILTIN_RINTPD,
IX86_BUILTIN_ROUNDPD_AZ,
+
+ IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
+ IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
+ IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
+
IX86_BUILTIN_FLOORPS,
IX86_BUILTIN_CEILPS,
IX86_BUILTIN_TRUNCPS,
IX86_BUILTIN_RINTPS,
IX86_BUILTIN_ROUNDPS_AZ,
+ IX86_BUILTIN_FLOORPS_SFIX,
+ IX86_BUILTIN_CEILPS_SFIX,
+ IX86_BUILTIN_ROUNDPS_AZ_SFIX,
+
IX86_BUILTIN_PTESTZ,
IX86_BUILTIN_PTESTC,
IX86_BUILTIN_PTESTNZC,
IX86_BUILTIN_VEC_SET_V16QI,
IX86_BUILTIN_VEC_PACK_SFIX,
+ IX86_BUILTIN_VEC_PACK_SFIX256,
/* SSE4.2. */
IX86_BUILTIN_CRC32QI,
IX86_BUILTIN_TRUNCPD256,
IX86_BUILTIN_RINTPD256,
IX86_BUILTIN_ROUNDPD_AZ256,
+
+ IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
+ IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
+ IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
+
IX86_BUILTIN_FLOORPS256,
IX86_BUILTIN_CEILPS256,
IX86_BUILTIN_TRUNCPS256,
IX86_BUILTIN_RINTPS256,
IX86_BUILTIN_ROUNDPS_AZ256,
+ IX86_BUILTIN_FLOORPS_SFIX256,
+ IX86_BUILTIN_CEILPS_SFIX256,
+ IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
+
IX86_BUILTIN_UNPCKHPD256,
IX86_BUILTIN_UNPCKLPD256,
IX86_BUILTIN_UNPCKHPS256,
IX86_BUILTIN_GATHERDIV4SI,
IX86_BUILTIN_GATHERDIV8SI,
+ /* Alternate 4 element gather for the vectorizer where
+ all operands are 32-byte wide. */
+ IX86_BUILTIN_GATHERALTSIV4DF,
+ IX86_BUILTIN_GATHERALTDIV8SF,
+ IX86_BUILTIN_GATHERALTSIV4DI,
+ IX86_BUILTIN_GATHERALTDIV8SI,
+
/* TFmode support builtins. */
IX86_BUILTIN_INFQ,
IX86_BUILTIN_HUGE_VALQ,
IX86_BUILTIN_CPYSGNPS256,
IX86_BUILTIN_CPYSGNPD256,
- IX86_BUILTIN_CVTUDQ2PS,
-
/* FMA4 instructions. */
IX86_BUILTIN_VFMADDSS,
IX86_BUILTIN_VFMADDSD,
{ OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
/* SSE */
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
/* SSE or 3DNow!A */
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
- { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
/* SSE2 */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
+ { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
+
{ OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
+
{ OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
+
{ OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
+
/* AVX2 */
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
-
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
- { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
+
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
{ OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
};
+\f
+/* TM vector builtins. */
+
+/* Reuse the existing x86-specific `struct builtin_description' cause
+ we're lazy. Add casts to make them fit. */
+static const struct builtin_description bdesc_tm[] =
+{
+ { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+ { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
+
+ { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
+
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
+
+ { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
+};
+
+/* TM callbacks. */
+
+/* Return the builtin decl needed to load a vector of TYPE. */
+
+static tree
+ix86_builtin_tm_load (tree type)
+{
+ if (TREE_CODE (type) == VECTOR_TYPE)
+ {
+ switch (tree_low_cst (TYPE_SIZE (type), 1))
+ {
+ case 64:
+ return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
+ case 128:
+ return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
+ case 256:
+ return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
+ }
+ }
+ return NULL_TREE;
+}
+
+/* Return the builtin decl needed to store a vector of TYPE. */
+
+static tree
+ix86_builtin_tm_store (tree type)
+{
+ if (TREE_CODE (type) == VECTOR_TYPE)
+ {
+ switch (tree_low_cst (TYPE_SIZE (type), 1))
+ {
+ case 64:
+ return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
+ case 128:
+ return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
+ case 256:
+ return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
+ }
+ }
+ return NULL_TREE;
+}
+\f
+/* Initialize the transactional memory vector load/store builtins. */
+
+static void
+ix86_init_tm_builtins (void)
+{
+ enum ix86_builtin_func_type ftype;
+ const struct builtin_description *d;
+ size_t i;
+ tree decl;
+ tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
+ tree attrs_log, attrs_type_log;
+
+ if (!flag_tm)
+ return;
+
+ /* If there are no builtins defined, we must be compiling in a
+ language without trans-mem support. */
+ if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
+ return;
+
+ /* Use whatever attributes a normal TM load has. */
+ decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
+ attrs_load = DECL_ATTRIBUTES (decl);
+ attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ /* Use whatever attributes a normal TM store has. */
+ decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
+ attrs_store = DECL_ATTRIBUTES (decl);
+ attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+ /* Use whatever attributes a normal TM log has. */
+ decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
+ attrs_log = DECL_ATTRIBUTES (decl);
+ attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+
+ for (i = 0, d = bdesc_tm;
+ i < ARRAY_SIZE (bdesc_tm);
+ i++, d++)
+ {
+ if ((d->mask & ix86_isa_flags) != 0
+ || (lang_hooks.builtin_function
+ == lang_hooks.builtin_function_ext_scope))
+ {
+ tree type, attrs, attrs_type;
+ enum built_in_function code = (enum built_in_function) d->code;
+
+ ftype = (enum ix86_builtin_func_type) d->flag;
+ type = ix86_get_builtin_func_type (ftype);
+
+ if (BUILTIN_TM_LOAD_P (code))
+ {
+ attrs = attrs_load;
+ attrs_type = attrs_type_load;
+ }
+ else if (BUILTIN_TM_STORE_P (code))
+ {
+ attrs = attrs_store;
+ attrs_type = attrs_type_store;
+ }
+ else
+ {
+ attrs = attrs_log;
+ attrs_type = attrs_type_log;
+ }
+ decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
+ /* The builtin without the prefix for
+ calling it directly. */
+ d->name + strlen ("__builtin_"),
+ attrs);
+ /* add_builtin_function() will set the DECL_ATTRIBUTES, now
+ set the TYPE_ATTRIBUTES. */
+ decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
+
+ set_builtin_decl (code, decl, false);
+ }
+ }
+}
/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
in the current target ISA to allow the user to compile particular modules
V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
IX86_BUILTIN_GATHERDIV8SI);
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
+ V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
+ IX86_BUILTIN_GATHERALTSIV4DF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
+ V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
+ IX86_BUILTIN_GATHERALTDIV8SF);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
+ V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
+ IX86_BUILTIN_GATHERALTSIV4DI);
+
+ def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
+ V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
+ IX86_BUILTIN_GATHERALTDIV8SI);
+
/* MMX access to the vec_init patterns. */
def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
TREE_READONLY (t) = 1;
ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
+ ix86_init_tm_builtins ();
ix86_init_mmx_sse_builtins ();
if (TARGET_LP64)
return SUBREG_REG (target);
}
-/* Subroutine of ix86_expand_args_builtin to take care of round insns. */
+/* Subroutines of ix86_expand_args_builtin to take care of round insns. */
static rtx
ix86_expand_sse_round (const struct builtin_description *d, tree exp,
return target;
}
+static rtx
+ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2;
+ enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
+
+ if (optimize || target == 0
+ || GET_MODE (target) != tmode
+ || !insn_data[d->icode].operand[0].predicate (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ op0 = safe_vector_operand (op0, mode0);
+ op1 = safe_vector_operand (op1, mode1);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !insn_data[d->icode].operand[0].predicate (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !insn_data[d->icode].operand[1].predicate (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ op2 = GEN_INT (d->comparison);
+
+ pat = GEN_FCN (d->icode) (target, op0, op1, op2);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ return target;
+}
+
/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
static rtx
case V4DF_FTYPE_V4DF_ROUND:
case V4SF_FTYPE_V4SF_ROUND:
case V8SF_FTYPE_V8SF_ROUND:
+ case V4SI_FTYPE_V4SF_ROUND:
+ case V8SI_FTYPE_V8SF_ROUND:
return ix86_expand_sse_round (d, exp, target);
+ case V4SI_FTYPE_V2DF_V2DF_ROUND:
+ case V8SI_FTYPE_V4DF_V4DF_ROUND:
+ return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
case INT_FTYPE_V8SF_V8SF_PTEST:
case INT_FTYPE_V4DI_V4DI_PTEST:
case INT_FTYPE_V4DF_V4DF_PTEST:
case V32QI_FTYPE_V32QI_V32QI:
case V16HI_FTYPE_V32QI_V32QI:
case V16HI_FTYPE_V16HI_V16HI:
+ case V8SI_FTYPE_V4DF_V4DF:
case V8SI_FTYPE_V8SI_V8SI:
case V8SI_FTYPE_V16HI_V16HI:
case V4DI_FTYPE_V4DI_V4DI:
error ("the last argument must be an 1-bit immediate");
return const0_rtx;
- case CODE_FOR_sse4_1_roundpd:
- case CODE_FOR_sse4_1_roundps:
case CODE_FOR_sse4_1_roundsd:
case CODE_FOR_sse4_1_roundss:
+
+ case CODE_FOR_sse4_1_roundpd:
+ case CODE_FOR_sse4_1_roundps:
+ case CODE_FOR_avx_roundpd256:
+ case CODE_FOR_avx_roundps256:
+
+ case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
+ case CODE_FOR_sse4_1_roundps_sfix:
+ case CODE_FOR_avx_roundpd_vec_pack_sfix256:
+ case CODE_FOR_avx_roundps_sfix256:
+
case CODE_FOR_sse4_1_blendps:
case CODE_FOR_avx_blendpd256:
case CODE_FOR_avx_vpermilv4df:
- case CODE_FOR_avx_roundpd256:
- case CODE_FOR_avx_roundps256:
error ("the last argument must be a 4-bit immediate");
return const0_rtx;
case VOID_FTYPE_PFLOAT_V4SF:
case VOID_FTYPE_PDOUBLE_V4DF:
case VOID_FTYPE_PDOUBLE_V2DF:
+ case VOID_FTYPE_PLONGLONG_LONGLONG:
case VOID_FTYPE_PULONGLONG_ULONGLONG:
case VOID_FTYPE_PINT_INT:
nargs = 1;
arg_adjust = 0;
if (optimize
|| target == 0
- || GET_MODE (target) != tmode
- || !insn_p->operand[0].predicate (target, tmode))
+ || !register_operand (target, tmode)
+ || GET_MODE (target) != tmode)
target = gen_reg_rtx (tmode);
}
case IX86_BUILTIN_LDMXCSR:
op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
- target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+ target = assign_386_stack_local (SImode, SLOT_TEMP);
emit_move_insn (target, op0);
emit_insn (gen_sse_ldmxcsr (target));
return 0;
case IX86_BUILTIN_STMXCSR:
- target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+ target = assign_386_stack_local (SImode, SLOT_TEMP);
emit_insn (gen_sse_stmxcsr (target));
return copy_to_mode_reg (SImode, target);
icode = CODE_FOR_avx2_gatherdiv4sf;
goto gather_gen;
case IX86_BUILTIN_GATHERDIV8SF:
- icode = CODE_FOR_avx2_gatherdiv4sf256;
+ icode = CODE_FOR_avx2_gatherdiv8sf;
goto gather_gen;
case IX86_BUILTIN_GATHERSIV2DI:
icode = CODE_FOR_avx2_gathersiv2di;
icode = CODE_FOR_avx2_gatherdiv4si;
goto gather_gen;
case IX86_BUILTIN_GATHERDIV8SI:
- icode = CODE_FOR_avx2_gatherdiv4si256;
+ icode = CODE_FOR_avx2_gatherdiv8si;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERALTSIV4DF:
+ icode = CODE_FOR_avx2_gathersiv4df;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERALTDIV8SF:
+ icode = CODE_FOR_avx2_gatherdiv8sf;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERALTSIV4DI:
+ icode = CODE_FOR_avx2_gathersiv4di;
+ goto gather_gen;
+ case IX86_BUILTIN_GATHERALTDIV8SI:
+ icode = CODE_FOR_avx2_gatherdiv8si;
+ goto gather_gen;
gather_gen:
arg0 = CALL_EXPR_ARG (exp, 0);
mode3 = insn_data[icode].operand[4].mode;
mode4 = insn_data[icode].operand[5].mode;
- if (target == NULL_RTX)
- target = gen_reg_rtx (insn_data[icode].operand[0].mode);
+ if (target == NULL_RTX
+ || GET_MODE (target) != insn_data[icode].operand[0].mode
+ || !insn_data[icode].operand[0].predicate (target,
+ GET_MODE (target)))
+ subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
+ else
+ subtarget = target;
+
+ if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
+ || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
+ {
+ rtx half = gen_reg_rtx (V4SImode);
+ if (!nonimmediate_operand (op2, V8SImode))
+ op2 = copy_to_mode_reg (V8SImode, op2);
+ emit_insn (gen_vec_extract_lo_v8si (half, op2));
+ op2 = half;
+ }
+ else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
+ || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
+ {
+ rtx (*gen) (rtx, rtx);
+ rtx half = gen_reg_rtx (mode0);
+ if (mode0 == V4SFmode)
+ gen = gen_vec_extract_lo_v8sf;
+ else
+ gen = gen_vec_extract_lo_v8si;
+ if (!nonimmediate_operand (op0, GET_MODE (op0)))
+ op0 = copy_to_mode_reg (GET_MODE (op0), op0);
+ emit_insn (gen (half, op0));
+ op0 = half;
+ if (!nonimmediate_operand (op3, GET_MODE (op3)))
+ op3 = copy_to_mode_reg (GET_MODE (op3), op3);
+ emit_insn (gen (half, op3));
+ op3 = half;
+ }
/* Force memory operand only with base register here. But we
don't want to do it on memory operand for other builtin
error ("last argument must be scale 1, 2, 4, 8");
return const0_rtx;
}
- pat = GEN_FCN (icode) (target, op0, op1, op2, op3, op4);
+
+ /* Optimize. If mask is known to have all high bits set,
+ replace op0 with pc_rtx to signal that the instruction
+ overwrites the whole destination and doesn't use its
+ previous contents. */
+ if (optimize)
+ {
+ if (TREE_CODE (arg3) == VECTOR_CST)
+ {
+ tree elt;
+ unsigned int negative = 0;
+ for (elt = TREE_VECTOR_CST_ELTS (arg3);
+ elt; elt = TREE_CHAIN (elt))
+ {
+ tree cst = TREE_VALUE (elt);
+ if (TREE_CODE (cst) == INTEGER_CST
+ && tree_int_cst_sign_bit (cst))
+ negative++;
+ else if (TREE_CODE (cst) == REAL_CST
+ && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
+ negative++;
+ }
+ if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
+ op0 = pc_rtx;
+ }
+ else if (TREE_CODE (arg3) == SSA_NAME)
+ {
+ /* Recognize also when mask is like:
+ __v2df src = _mm_setzero_pd ();
+ __v2df mask = _mm_cmpeq_pd (src, src);
+ or
+ __v8sf src = _mm256_setzero_ps ();
+ __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
+ as that is a cheaper way to load all ones into
+ a register than having to load a constant from
+ memory. */
+ gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
+ if (is_gimple_call (def_stmt))
+ {
+ tree fndecl = gimple_call_fndecl (def_stmt);
+ if (fndecl
+ && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
+ switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
+ {
+ case IX86_BUILTIN_CMPPD:
+ case IX86_BUILTIN_CMPPS:
+ case IX86_BUILTIN_CMPPD256:
+ case IX86_BUILTIN_CMPPS256:
+ if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
+ break;
+ /* FALLTHRU */
+ case IX86_BUILTIN_CMPEQPD:
+ case IX86_BUILTIN_CMPEQPS:
+ if (initializer_zerop (gimple_call_arg (def_stmt, 0))
+ && initializer_zerop (gimple_call_arg (def_stmt,
+ 1)))
+ op0 = pc_rtx;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
if (! pat)
return const0_rtx;
emit_insn (pat);
+
+ if (fcode == IX86_BUILTIN_GATHERDIV8SF
+ || fcode == IX86_BUILTIN_GATHERDIV8SI)
+ {
+ enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
+ ? V4SFmode : V4SImode;
+ if (target == NULL_RTX)
+ target = gen_reg_rtx (tmode);
+ if (tmode == V4SFmode)
+ emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
+ else
+ emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
+ }
+ else
+ target = subtarget;
+
return target;
default:
}
break;
+ case BUILT_IN_IFLOOR:
+ case BUILT_IN_LFLOOR:
+ case BUILT_IN_LLFLOOR:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math || !TARGET_ROUND)
+ break;
+
+ if (out_mode == SImode && in_mode == DFmode)
+ {
+ if (out_n == 4 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
+ else if (out_n == 8 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
+ }
+ break;
+
+ case BUILT_IN_IFLOORF:
+ case BUILT_IN_LFLOORF:
+ case BUILT_IN_LLFLOORF:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math || !TARGET_ROUND)
+ break;
+
+ if (out_mode == SImode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
+ }
+ break;
+
+ case BUILT_IN_ICEIL:
+ case BUILT_IN_LCEIL:
+ case BUILT_IN_LLCEIL:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math || !TARGET_ROUND)
+ break;
+
+ if (out_mode == SImode && in_mode == DFmode)
+ {
+ if (out_n == 4 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
+ else if (out_n == 8 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
+ }
+ break;
+
+ case BUILT_IN_ICEILF:
+ case BUILT_IN_LCEILF:
+ case BUILT_IN_LLCEILF:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math || !TARGET_ROUND)
+ break;
+
+ if (out_mode == SImode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
+ }
+ break;
+
+ case BUILT_IN_IRINT:
case BUILT_IN_LRINT:
- if (out_mode == SImode && out_n == 4
- && in_mode == DFmode && in_n == 2)
- return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+ case BUILT_IN_LLRINT:
+ if (out_mode == SImode && in_mode == DFmode)
+ {
+ if (out_n == 4 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+ else if (out_n == 8 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
+ }
break;
+ case BUILT_IN_IRINTF:
case BUILT_IN_LRINTF:
+ case BUILT_IN_LLRINTF:
if (out_mode == SImode && in_mode == SFmode)
{
if (out_n == 4 && in_n == 4)
}
break;
+ case BUILT_IN_IROUND:
+ case BUILT_IN_LROUND:
+ case BUILT_IN_LLROUND:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math || !TARGET_ROUND)
+ break;
+
+ if (out_mode == SImode && in_mode == DFmode)
+ {
+ if (out_n == 4 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
+ else if (out_n == 8 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
+ }
+ break;
+
+ case BUILT_IN_IROUNDF:
+ case BUILT_IN_LROUNDF:
+ case BUILT_IN_LLROUNDF:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math || !TARGET_ROUND)
+ break;
+
+ if (out_mode == SImode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
+ }
+ break;
+
case BUILT_IN_COPYSIGN:
if (out_mode == DFmode && in_mode == DFmode)
{
return new_fndecl;
}
-
-/* Returns a decl of a function that implements conversion of an integer vector
- into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
- are the types involved when converting according to CODE.
+/* Returns a decl of a function that implements gather load with
+ memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
Return NULL_TREE if it is not available. */
static tree
-ix86_vectorize_builtin_conversion (unsigned int code,
- tree dest_type, tree src_type)
+ix86_vectorize_builtin_gather (const_tree mem_vectype,
+ const_tree index_type, int scale)
{
- if (! TARGET_SSE2)
+ bool si;
+ enum ix86_builtins code;
+
+ if (! TARGET_AVX2)
return NULL_TREE;
- switch (code)
- {
- case FLOAT_EXPR:
- switch (TYPE_MODE (src_type))
- {
- case V4SImode:
- switch (TYPE_MODE (dest_type))
- {
- case V4SFmode:
- return (TYPE_UNSIGNED (src_type)
- ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
- : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
- case V4DFmode:
- return (TYPE_UNSIGNED (src_type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
- default:
- return NULL_TREE;
- }
- break;
- case V8SImode:
- switch (TYPE_MODE (dest_type))
- {
- case V8SFmode:
- return (TYPE_UNSIGNED (src_type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
- default:
- return NULL_TREE;
- }
- break;
- default:
- return NULL_TREE;
- }
+ if ((TREE_CODE (index_type) != INTEGER_TYPE
+ && !POINTER_TYPE_P (index_type))
+ || (TYPE_MODE (index_type) != SImode
+ && TYPE_MODE (index_type) != DImode))
+ return NULL_TREE;
- case FIX_TRUNC_EXPR:
- switch (TYPE_MODE (dest_type))
- {
- case V4SImode:
- switch (TYPE_MODE (src_type))
- {
- case V4SFmode:
- return (TYPE_UNSIGNED (dest_type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
- case V4DFmode:
- return (TYPE_UNSIGNED (dest_type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
- default:
- return NULL_TREE;
- }
- break;
+ if (TYPE_PRECISION (index_type) > POINTER_SIZE)
+ return NULL_TREE;
- case V8SImode:
- switch (TYPE_MODE (src_type))
- {
- case V8SFmode:
- return (TYPE_UNSIGNED (dest_type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
- default:
- return NULL_TREE;
- }
- break;
+ /* v*gather* insn sign extends index to pointer mode. */
+ if (TYPE_PRECISION (index_type) < POINTER_SIZE
+ && TYPE_UNSIGNED (index_type))
+ return NULL_TREE;
- default:
- return NULL_TREE;
- }
+ if (scale <= 0
+ || scale > 8
+ || (scale & (scale - 1)) != 0)
+ return NULL_TREE;
+ si = TYPE_MODE (index_type) == SImode;
+ switch (TYPE_MODE (mem_vectype))
+ {
+ case V2DFmode:
+ code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
+ break;
+ case V4DFmode:
+ code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
+ break;
+ case V2DImode:
+ code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
+ break;
+ case V4DImode:
+ code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
+ break;
+ case V4SFmode:
+ code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
+ break;
+ case V8SFmode:
+ code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
+ break;
+ case V4SImode:
+ code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
+ break;
+ case V8SImode:
+ code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
+ break;
default:
return NULL_TREE;
}
- return NULL_TREE;
+ return ix86_builtins[code];
}
/* Returns a code for a target-specific builtin that implements
return mask + 1;
}
\f
-
/* Store OPERAND to the memory after reload is completed. This means
that we can't easily use assign_stack_local. */
rtx
{
if (CONST_INT_P (XEXP (x, 1)))
*total = cost->shift_const;
+ else if (GET_CODE (XEXP (x, 1)) == SUBREG
+ && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
+ {
+ /* Return the cost after shift-and truncation. */
+ *total = cost->shift_var;
+ return true;
+ }
else
*total = cost->shift_var;
}
else
type = node;
- if (!(type && (TREE_CODE (*type) == RECORD_TYPE
- || TREE_CODE (*type) == UNION_TYPE)))
+ if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
{
warning (OPT_Wattributes, "%qE attribute ignored",
name);
{
rtx this_param = x86_this_parameter (function);
rtx this_reg, tmp, fnaddr;
+ unsigned int tmp_regno;
+
+ if (TARGET_64BIT)
+ tmp_regno = R10_REG;
+ else
+ {
+ unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
+ if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
+ tmp_regno = AX_REG;
+ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
+ tmp_regno = DX_REG;
+ else
+ tmp_regno = CX_REG;
+ }
emit_note (NOTE_INSN_PROLOGUE_END);
{
if (!x86_64_general_operand (delta_rtx, Pmode))
{
- tmp = gen_rtx_REG (Pmode, R10_REG);
+ tmp = gen_rtx_REG (Pmode, tmp_regno);
emit_move_insn (tmp, delta_rtx);
delta_rtx = tmp;
}
if (vcall_offset)
{
rtx vcall_addr, vcall_mem, this_mem;
- unsigned int tmp_regno;
- if (TARGET_64BIT)
- tmp_regno = R10_REG;
- else
- {
- unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
- if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
- tmp_regno = AX_REG;
- else
- tmp_regno = CX_REG;
- }
tmp = gen_rtx_REG (Pmode, tmp_regno);
this_mem = gen_rtx_MEM (ptr_mode, this_reg);
emit_jump_insn (gen_indirect_jump (fnaddr));
else
{
+ if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
+ fnaddr = legitimize_pic_address (fnaddr,
+ gen_rtx_REG (Pmode, tmp_regno));
+
+ if (!sibcall_insn_operand (fnaddr, Pmode))
+ {
+ tmp = gen_rtx_REG (Pmode, tmp_regno);
+ if (GET_MODE (fnaddr) != Pmode)
+ fnaddr = gen_rtx_ZERO_EXTEND (Pmode, fnaddr);
+ emit_move_insn (tmp, fnaddr);
+ fnaddr = tmp;
+ }
+
tmp = gen_rtx_MEM (QImode, fnaddr);
tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
tmp = emit_call_insn (tmp);
tmp = gen_reg_rtx (GET_MODE_INNER (mode));
ix86_expand_vector_extract (true, tmp, target, 1 - elt);
if (elt == 0)
- tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
- else
tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+ else
+ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
return;
}
tmp = gen_reg_rtx (GET_MODE_INNER (mode));
ix86_expand_vector_extract (false, tmp, target, 1 - elt);
if (elt == 0)
- tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
- else
tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+ else
+ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
return;
for FP arguments. */
{ "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
true },
+ /* The transactional memory builtins are implicitly regparm or fastcall
+ depending on the ABI. Override the generic do-nothing attribute that
+ these builtins were declared with. */
+ { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
+ true },
/* force_align_arg_pointer says this function realigns the stack at entry. */
{ (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
false, true, true, ix86_handle_cconv_attribute, false },
return ix86_cost->cond_not_taken_branch_cost;
case vec_perm:
- return 1;
+ case vec_promote_demote:
+ return ix86_cost->vec_stmt_cost;
default:
gcc_unreachable ();
}
}
-
-/* Return a vector mode with twice as many elements as VMODE. */
-/* ??? Consider moving this to a table generated by genmodes.c. */
-
-static enum machine_mode
-doublesize_vector_mode (enum machine_mode vmode)
-{
- switch (vmode)
- {
- case V2SFmode: return V4SFmode;
- case V1DImode: return V2DImode;
- case V2SImode: return V4SImode;
- case V4HImode: return V8HImode;
- case V8QImode: return V16QImode;
-
- case V2DFmode: return V4DFmode;
- case V4SFmode: return V8SFmode;
- case V2DImode: return V4DImode;
- case V4SImode: return V8SImode;
- case V8HImode: return V16HImode;
- case V16QImode: return V32QImode;
-
- case V4DFmode: return V8DFmode;
- case V8SFmode: return V16SFmode;
- case V4DImode: return V8DImode;
- case V8SImode: return V16SImode;
- case V16HImode: return V32HImode;
- case V32QImode: return V64QImode;
-
- default:
- gcc_unreachable ();
- }
-}
-
/* Construct (set target (vec_select op0 (parallel perm))) and
return true if that's a valid instruction in the active ISA. */
enum machine_mode v2mode;
rtx x;
- v2mode = doublesize_vector_mode (GET_MODE (op0));
+ v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
return expand_vselect (target, x, perm, nelt);
}
else if (vmode == V32QImode)
emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
else
- emit_insn (gen_avx2_permvarv8si (target, vperm, op0));
+ emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
}
else
{
return ok;
}
+static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
+
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
a two vector permutation into a single vector permutation by using
an interleave operation to merge the vectors. */
/* For 32-byte modes allow even d->op0 == d->op1.
The lack of cross-lane shuffling in some instructions
might prevent a single insn shuffle. */
+ dfinal = *d;
+ dfinal.testing_p = true;
+ /* If expand_vec_perm_interleave3 can expand this into
+ a 3 insn sequence, give up and let it be expanded as
+ 3 insn sequence. While that is one insn longer,
+ it doesn't need a memory operand and in the common
+ case that both interleave low and high permutations
+ with the same operands are adjacent needs 4 insns
+ for both after CSE. */
+ if (expand_vec_perm_interleave3 (&dfinal))
+ return false;
}
else
return false;
dremap.perm[i * 2] = i;
dremap.perm[i * 2 + 1] = i + nelt;
}
+ if (!TARGET_SSE2 && d->vmode == V4SImode)
+ dremap.vmode = V4SFmode;
}
else if ((contents & (h2 | h4)) == contents)
{
dremap.perm[i * 2] = i + nelt2;
dremap.perm[i * 2 + 1] = i + nelt + nelt2;
}
+ if (!TARGET_SSE2 && d->vmode == V4SImode)
+ dremap.vmode = V4SFmode;
}
else if ((contents & (h1 | h4)) == contents)
{
stopping once we have promoted to V4SImode and then use pshufd. */
do
{
- optab otab = vec_interleave_low_optab;
+ rtx dest;
+ rtx (*gen) (rtx, rtx, rtx)
+ = vmode == V16QImode ? gen_vec_interleave_lowv16qi
+ : gen_vec_interleave_lowv8hi;
if (elt >= nelt2)
{
- otab = vec_interleave_high_optab;
+ gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
+ : gen_vec_interleave_highv8hi;
elt -= nelt2;
}
nelt2 /= 2;
- op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
+ dest = gen_reg_rtx (vmode);
+ emit_insn (gen (dest, op0, op0));
vmode = get_mode_wider_vector (vmode);
- op0 = gen_lowpart (vmode, op0);
+ op0 = gen_lowpart (vmode, dest);
}
while (vmode != V4SImode);
switch (mode)
{
case QImode:
- return TARGET_AVX2 ? V32QImode : V16QImode;
+ return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
case HImode:
- return TARGET_AVX2 ? V16HImode : V8HImode;
+ return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
case SImode:
- return TARGET_AVX2 ? V8SImode : V4SImode;
+ return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
case DImode:
- return TARGET_AVX2 ? V4DImode : V2DImode;
+ return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
case SFmode:
if (TARGET_AVX && !TARGET_PREFER_AVX128)
#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
ix86_builtin_vectorized_function
-#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
-#define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
+#undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
+#define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
+
+#undef TARGET_VECTORIZE_BUILTIN_TM_STORE
+#define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
+
+#undef TARGET_VECTORIZE_BUILTIN_GATHER
+#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
#undef TARGET_BUILTIN_RECIPROCAL
#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE ix86_mangle_type
-#ifndef TARGET_MACHO
+#if !TARGET_MACHO
#undef TARGET_STACK_PROTECT_FAIL
#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
#endif
#undef TARGET_PROMOTE_FUNCTION_MODE
#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
+#undef TARGET_INSTANTIATE_DECLS
+#define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
+
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD ix86_secondary_reload