m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
/* X86_TUNE_USE_BT */
- m_AMD_MULTIPLE,
+ m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
/* X86_TUNE_USE_INCDEC */
~(m_PENT4 | m_NOCONA | m_GENERIC),
/* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
from integer to FP. */
m_AMDFAM10,
+
+ /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
+ with a subsequent conditional jump instruction into a single
+ compare-and-branch uop. */
+ m_CORE2,
};
/* Feature tests against the various architecture variations. */
rtx ix86_compare_emitted = NULL_RTX;
/* Size of the register save area. */
-#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
+#define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
/* Define the structure for the machine field in struct function. */
extern int ix86_force_align_arg_pointer;
static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
+static rtx (*ix86_gen_leave) (void);
+static rtx (*ix86_gen_pop1) (rtx);
+static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
+static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
+static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
+static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
+static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
+
/* Preferred alignment for stack boundary in bits. */
unsigned int ix86_preferred_stack_boundary;
}
else
{
- /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
+ /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
use of rip-relative addressing. This eliminates fixups that
would otherwise be needed if this object is to be placed in a
DLL, and is essentially just as efficient as direct addressing. */
- if (TARGET_64BIT_MS_ABI)
+ if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
else if (TARGET_64BIT)
ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
/* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
- can be optimized to ap = __builtin_next_arg (0). */
- if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
+ can be optimized to ap = __builtin_next_arg (0).
+ For abi switching it should be corrected. */
+ if (!TARGET_64BIT || DEFAULT_ABI == MS_ABI)
targetm.expand_builtin_va_start = NULL;
+
+ if (TARGET_64BIT)
+ {
+ ix86_gen_leave = gen_leave_rex64;
+ ix86_gen_pop1 = gen_popdi1;
+ ix86_gen_add3 = gen_adddi3;
+ ix86_gen_sub3 = gen_subdi3;
+ ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
+ ix86_gen_one_cmpl2 = gen_one_cmpldi2;
+ ix86_gen_monitor = gen_sse3_monitor64;
+ }
+ else
+ {
+ ix86_gen_leave = gen_leave;
+ ix86_gen_pop1 = gen_popsi1;
+ ix86_gen_add3 = gen_addsi3;
+ ix86_gen_sub3 = gen_subsi3;
+ ix86_gen_sub3_carry = gen_subsi3_carry;
+ ix86_gen_one_cmpl2 = gen_one_cmplsi2;
+ ix86_gen_monitor = gen_sse3_monitor;
+ }
+
+#ifdef USE_IX86_CLD
+ /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
+ if (!TARGET_64BIT)
+ target_flags |= MASK_CLD & ~target_flags_explicit;
+#endif
}
\f
/* Return true if this goes in large data/bss. */
if (TARGET_64BIT)
{
/* Do not warn when emulating the MS ABI. */
- if (!TARGET_64BIT_MS_ABI)
+ if (TREE_CODE (*node) != FUNCTION_TYPE || !ix86_function_type_abi (*node))
warning (OPT_Wattributes, "%qs attribute ignored",
IDENTIFIER_POINTER (name));
*no_add_attrs = true;
static bool error_issued;
if (TARGET_64BIT)
- return regparm;
+ {
+ if (ix86_function_type_abi (type) == DEFAULT_ABI)
+ return regparm;
+ return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
+ }
attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
if (attr)
return true;
}
+ /* TODO: The function should depend on current function ABI but
+ builtins.c would need updating then. Therefore we use the
+ default ABI. */
+
/* RAX is used as hidden argument to va_arg functions. */
- if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
+ if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
return true;
- if (TARGET_64BIT_MS_ABI)
+ if (DEFAULT_ABI == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
- for (i = 0; i < REGPARM_MAX; i++)
+ for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
+ : X86_64_REGPARM_MAX); i++)
if (regno == parm_regs[i])
return true;
return false;
&& type && TREE_CODE (type) != VECTOR_TYPE);
}
+/* It returns the size, in bytes, of the area reserved for arguments passed
+ in registers for the function represented by fndecl dependent to the used
+ abi format. */
+int
+ix86_reg_parm_stack_space (const_tree fndecl)
+{
+ int call_abi = 0;
+ /* For libcalls it is possible that there is no fndecl at hand.
+ Therefore assume for this case the default abi of the target. */
+ if (!fndecl)
+ call_abi = DEFAULT_ABI;
+ else
+ call_abi = ix86_function_abi (fndecl);
+ if (call_abi == 1)
+ return 32;
+ return 0;
+}
+
+/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
+ call abi used. */
+int
+ix86_function_type_abi (const_tree fntype)
+{
+ if (TARGET_64BIT && fntype != NULL)
+ {
+ int abi;
+ if (DEFAULT_ABI == SYSV_ABI)
+ abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
+ else
+ abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
+
+ if (DEFAULT_ABI == MS_ABI && abi == SYSV_ABI)
+ sorry ("using sysv calling convention on target w64 is not supported");
+
+ return abi;
+ }
+ return DEFAULT_ABI;
+}
+
+int
+ix86_function_abi (const_tree fndecl)
+{
+ if (! fndecl)
+ return DEFAULT_ABI;
+ return ix86_function_type_abi (TREE_TYPE (fndecl));
+}
+
+/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
+ call abi used. */
+int
+ix86_cfun_abi (void)
+{
+ if (! cfun || ! TARGET_64BIT)
+ return DEFAULT_ABI;
+ return cfun->machine->call_abi;
+}
+
+/* regclass.c */
+extern void init_regs (void);
+
+/* Implementation of call abi switching target hook. Specific to FNDECL
+ the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
+ for more details.
+ To prevent redudant calls of costy function init_regs (), it checks not to
+ reset register usage for default abi. */
+void
+ix86_call_abi_override (const_tree fndecl)
+{
+ if (fndecl == NULL_TREE)
+ cfun->machine->call_abi = DEFAULT_ABI;
+ else
+ cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
+ if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
+ {
+ if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
+ {
+ call_used_regs[4 /*RSI*/] = 0;
+ call_used_regs[5 /*RDI*/] = 0;
+ init_regs ();
+ }
+ }
+ else if (TARGET_64BIT)
+ {
+ if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
+ {
+ call_used_regs[4 /*RSI*/] = 1;
+ call_used_regs[5 /*RDI*/] = 1;
+ init_regs ();
+ }
+ }
+}
+
/* Initialize a variable CUM of type CUMULATIVE_ARGS
for a call to a function whose data type is FNTYPE.
For a library call, FNTYPE is 0. */
struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
memset (cum, 0, sizeof (*cum));
+ cum->call_abi = ix86_function_type_abi (fntype);
/* Set up the number of registers to use for passing arguments. */
cum->nregs = ix86_regparm;
+ if (TARGET_64BIT)
+ {
+ if (cum->call_abi != DEFAULT_ABI)
+ cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
+ : X64_REGPARM_MAX;
+ }
if (TARGET_SSE)
- cum->sse_nregs = SSE_REGPARM_MAX;
+ {
+ cum->sse_nregs = SSE_REGPARM_MAX;
+ if (TARGET_64BIT)
+ {
+ if (cum->call_abi != DEFAULT_ABI)
+ cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+ : X64_SSE_REGPARM_MAX;
+ }
+ }
if (TARGET_MMX)
cum->mmx_nregs = MMX_REGPARM_MAX;
cum->warn_sse = true;
if (type)
mode = type_natural_mode (type);
- if (TARGET_64BIT_MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
function_arg_advance_ms_64 (cum, bytes, words);
else if (TARGET_64BIT)
function_arg_advance_64 (cum, mode, type, words);
if (mode == VOIDmode)
return GEN_INT (cum->maybe_vaarg
? (cum->sse_nregs < 0
- ? SSE_REGPARM_MAX
- : cum->sse_regno)
+ ? (cum->call_abi == DEFAULT_ABI
+ ? SSE_REGPARM_MAX
+ : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+ : X64_SSE_REGPARM_MAX))
+ : cum->sse_regno)
: -1);
return construct_container (mode, orig_mode, type, 0, cum->nregs,
if (type && TREE_CODE (type) == VECTOR_TYPE)
mode = type_natural_mode (type);
- if (TARGET_64BIT_MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
return function_arg_ms_64 (cum, mode, omode, named, bytes);
else if (TARGET_64BIT)
return function_arg_64 (cum, mode, omode, type);
const_tree type, bool named ATTRIBUTE_UNUSED)
{
/* See Windows x64 Software Convention. */
- if (TARGET_64BIT_MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
{
int msize = (int) GET_MODE_SIZE (mode);
if (type)
{
int align;
if (type)
- align = TYPE_ALIGN (type);
+ {
+ /* Since canonical type is used for call, we convert it to
+ canonical type if needed. */
+ if (!TYPE_STRUCTURAL_EQUALITY_P (type))
+ type = TYPE_CANONICAL (type);
+ align = TYPE_ALIGN (type);
+ }
else
align = GET_MODE_ALIGNMENT (mode);
if (align < PARM_BOUNDARY)
return true;
case FIRST_FLOAT_REG:
- if (TARGET_64BIT_MS_ABI)
+ /* TODO: The function should depend on current function ABI but
+ builtins.c would need updating then. Therefore we use the
+ default ABI. */
+ if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
return false;
return TARGET_FLOAT_RETURNS_IN_80387;
}
ret = construct_container (mode, orig_mode, valtype, 1,
- REGPARM_MAX, SSE_REGPARM_MAX,
+ X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
x86_64_int_return_registers, 0);
/* For zero sized structures, construct_container returns NULL, but we
fn = fntype_or_decl;
fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
- if (TARGET_64BIT_MS_ABI)
+ if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
return function_value_ms_64 (orig_mode, mode);
else if (TARGET_64BIT)
return function_value_64 (orig_mode, mode, valtype);
/* Return true iff type is returned in memory. */
-static int
+static int ATTRIBUTE_UNUSED
return_in_memory_32 (const_tree type, enum machine_mode mode)
{
HOST_WIDE_INT size;
return 0;
}
-static int
+static int ATTRIBUTE_UNUSED
return_in_memory_64 (const_tree type, enum machine_mode mode)
{
int needed_intregs, needed_sseregs;
return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
}
-static int
+static int ATTRIBUTE_UNUSED
return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
{
HOST_WIDE_INT size = int_size_in_bytes (type);
return (size != 1 && size != 2 && size != 4 && size != 8);
}
-int
+static bool
ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
- const enum machine_mode mode = type_natural_mode (type);
-
+#ifdef SUBTARGET_RETURN_IN_MEMORY
+ return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
+#else
+ const enum machine_mode mode = type_natural_mode (type);
+
if (TARGET_64BIT_MS_ABI)
- return return_in_memory_ms_64 (type, mode);
- else if (TARGET_64BIT)
- return return_in_memory_64 (type, mode);
- else
- return return_in_memory_32 (type, mode);
+ return return_in_memory_ms_64 (type, mode);
+ else if (TARGET_64BIT)
+ return return_in_memory_64 (type, mode);
+ else
+ return return_in_memory_32 (type, mode);
+#endif
}
/* Return false iff TYPE is returned in memory. This version is used
but differs notably in that when MMX is available, 8-byte vectors
are returned in memory, rather than in MMX registers. */
-int
+bool
ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
int size;
return size > 12;
}
-int
-ix86_i386elf_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
-{
- return (TYPE_MODE (type) == BLKmode
- || (VECTOR_MODE_P (TYPE_MODE (type)) && int_size_in_bytes (type) == 8));
-}
-
-int
-ix86_i386interix_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
-{
- return (TYPE_MODE (type) == BLKmode
- || (AGGREGATE_TYPE_P (type) && int_size_in_bytes(type) > 8 ));
-}
-
/* When returning SSE vector types, we have a choice of either
(1) being abi incompatible with a -march switch, or
(2) generating an error.
tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
/* For i386 we use plain pointer to argument area. */
- if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
+ if (!TARGET_64BIT || ix86_cfun_abi () == MS_ABI)
return build_pointer_type (char_type_node);
record = (*lang_hooks.types.make_type) (RECORD_TYPE);
rtx nsse_reg;
alias_set_type set;
int i;
+ int regparm = ix86_regparm;
+
+ if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
+ regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
return;
set = get_varargs_alias_set ();
for (i = cum->regno;
- i < ix86_regparm
+ i < regparm
&& i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
i++)
{
label_ref = gen_rtx_LABEL_REF (Pmode, label);
/* Compute address to jump to :
- label - 5*eax + nnamed_sse_arguments*5 */
+ label - eax*4 + nnamed_sse_arguments*4 */
tmp_reg = gen_reg_rtx (Pmode);
nsse_reg = gen_reg_rtx (Pmode);
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
tmp_reg = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
plus_constant (save_area,
- 8 * REGPARM_MAX + 127)));
+ 8 * X86_64_REGPARM_MAX + 127)));
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
alias_set_type set = get_varargs_alias_set ();
int i;
- for (i = cum->regno; i < REGPARM_MAX; i++)
+ for (i = cum->regno; i < X64_REGPARM_MAX; i++)
{
rtx reg, mem;
if (stdarg_p (fntype))
function_arg_advance (&next_cum, mode, type, 1);
- if (TARGET_64BIT_MS_ABI)
+ if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
setup_incoming_varargs_ms_64 (&next_cum);
else
setup_incoming_varargs_64 (&next_cum);
tree type;
/* Only 64bit target needs something special. */
- if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
+ if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
{
std_expand_builtin_va_start (valist, nextarg);
return;
{
type = TREE_TYPE (fpr);
t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
- build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
+ build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
enum machine_mode nat_mode;
/* Only 64bit target needs something special. */
- if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
+ if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
nat_mode = type_natural_mode (type);
container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
- REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
+ X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
+ intreg, 0);
/* Pull the value out of the saved registers. */
if (needed_intregs)
{
t = build_int_cst (TREE_TYPE (gpr),
- (REGPARM_MAX - needed_intregs + 1) * 8);
+ (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
t = build2 (GE_EXPR, boolean_type_node, gpr, t);
t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
if (needed_sseregs)
{
t = build_int_cst (TREE_TYPE (fpr),
- (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
- + REGPARM_MAX * 8);
+ (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
+ + X86_64_REGPARM_MAX * 8);
t = build2 (GE_EXPR, boolean_type_node, fpr, t);
t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
xops[0] = gen_rtx_REG (Pmode, regno);
xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
- if (TARGET_64BIT)
- output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
- else
- output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
+ output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
output_asm_insn ("ret", xops);
}
xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
if (!flag_pic)
- {
- if (TARGET_64BIT)
- output_asm_insn ("mov{q}\t{%2, %0|%0, %2}", xops);
- else
- output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
- }
+ output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
else
output_asm_insn ("call\t%a2", xops);
CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
if (flag_pic)
- {
- if (TARGET_64BIT)
- output_asm_insn ("pop{q}\t%0", xops);
- else
- output_asm_insn ("pop{l}\t%0", xops);
- }
+ output_asm_insn ("pop%z0\t%0", xops);
}
else
{
return "";
if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
- {
- if (TARGET_64BIT)
- output_asm_insn ("add{q}\t{%1, %0|%0, %1}", xops);
- else
- output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
- }
+ output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
else
- {
- if (TARGET_64BIT)
- output_asm_insn ("add{q}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
- else
- output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
- }
+ output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
return "";
}
|| (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
frame->save_regs_using_mov = false;
- if (TARGET_RED_ZONE && current_function_sp_is_unchanging
+ if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
&& current_function_is_leaf
&& !ix86_current_function_calls_tls_descriptor)
{
avoid doing this if I am going to have to probe the stack since
at least on x86_64 the stack probe can turn into a call that clobbers
a red zone location */
- if (TARGET_RED_ZONE && frame.save_regs_using_mov
+ if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
&& (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
: stack_pointer_rtx,
bool eax_live;
rtx t;
- gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
+ gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
- if (TARGET_64BIT_MS_ABI)
+ if (cfun->machine->call_abi == MS_ABI)
eax_live = false;
else
eax_live = ix86_eax_live_at_start_p ();
}
if (frame.save_regs_using_mov
- && !(TARGET_RED_ZONE
+ && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
&& (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
{
if (!frame_pointer_needed || !frame.to_allocate)
emit_insn (gen_prologue_use (pic_offset_table_rtx));
emit_insn (gen_blockage ());
}
+
+ /* Emit cld instruction if stringops are used in the function. */
+ if (TARGET_CLD && ix86_current_function_needs_cld)
+ emit_insn (gen_cld ());
}
/* Emit code to restore saved registers using MOV insns. First register
/* If not an i386, mov & pop is faster than "leave". */
else if (TARGET_USE_LEAVE || optimize_size
|| !cfun->machine->use_fast_prologue_epilogue)
- emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
+ emit_insn ((*ix86_gen_leave) ());
else
{
pro_epilogue_adjust_stack (stack_pointer_rtx,
hard_frame_pointer_rtx,
const0_rtx, style);
- if (TARGET_64BIT)
- emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
- else
- emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
+
+ emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
}
}
else
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (ix86_save_reg (regno, false))
- {
- if (TARGET_64BIT)
- emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
- else
- emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
- }
+ emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
if (frame_pointer_needed)
{
/* Leave results in shorter dependency chains on CPUs that are
able to grok it fast. */
if (TARGET_USE_LEAVE)
- emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
- else if (TARGET_64BIT)
- emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
+ emit_insn ((*ix86_gen_leave) ());
else
- emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
+ emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
}
}
#endif
assemble_name (file, name);
}
- if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
+ if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
&& code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
fputs ("@PLT", file);
break;
L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
C -- print opcode suffix for set/cmov insn.
c -- like C, but print reversed condition
+ E,e -- likewise, but for compare-and-branch fused insn.
F,f -- likewise, but for floating-point.
O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
otherwise nothing
case 8:
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
{
+ if (MEM_P (x))
+ {
#ifdef GAS_MNEMONICS
- putc ('q', file);
+ putc ('q', file);
#else
- putc ('l', file);
- putc ('l', file);
+ putc ('l', file);
+ putc ('l', file);
#endif
+ }
+ else
+ putc ('q', file);
}
else
putc ('l', file);
if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
{
PRINT_OPERAND (file, x, 0);
- putc (',', file);
+ fputs (", ", file);
}
return;
put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
return;
+ case 'E':
+ put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
+ return;
+
+ case 'e':
+ put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
+ return;
+
case 'H':
/* It doesn't actually matter what mode we use here, as we're
only going to use this for printing. */
&& standard_sse_constant_p (op1) <= 0)
op1 = validize_mem (force_const_mem (mode, op1));
- /* TDmode values are passed as TImode on the stack. TImode values
- are moved via xmm registers, and moving them to stack can result in
- unaligned memory access. Use ix86_expand_vector_move_misalign()
- if memory operand is not aligned correctly. */
+ /* We need to check memory alignment for SSE mode since attribute
+ can make operands unaligned. */
if (can_create_pseudo_p ()
- && (mode == TImode) && !TARGET_64BIT
+ && SSE_REG_MODE_P (mode)
&& ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
|| (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
{
writing to the top half twice. */
if (TARGET_SSE_SPLIT_REGS)
{
- emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+ emit_clobber (op0);
zero = op0;
}
else
if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
emit_move_insn (op0, CONST0_RTX (mode));
else
- emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+ emit_clobber (op0);
if (mode != V4SFmode)
op0 = gen_lowpart (V4SFmode, op0);
emit_insn (gen_movdi_to_sse (int_xmm, input));
else if (TARGET_SSE_SPLIT_REGS)
{
- emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
+ emit_clobber (int_xmm);
emit_move_insn (gen_lowpart (DImode, int_xmm), input);
}
else
enum machine_mode mode = GET_MODE (dest);
rtx t2, t3, x;
- if (TARGET_SSE5)
- {
- rtx pcmov = gen_rtx_SET (mode, dest,
- gen_rtx_IF_THEN_ELSE (mode, cmp,
- op_true,
- op_false));
- emit_insn (pcmov);
- }
- else if (op_false == CONST0_RTX (mode))
+ if (op_false == CONST0_RTX (mode))
{
op_true = force_reg (mode, op_true);
x = gen_rtx_AND (mode, cmp, op_true);
x = gen_rtx_AND (mode, x, op_false);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
+ else if (TARGET_SSE5)
+ {
+ rtx pcmov = gen_rtx_SET (mode, dest,
+ gen_rtx_IF_THEN_ELSE (mode, cmp,
+ op_true,
+ op_false));
+ emit_insn (pcmov);
+ }
else
{
op_true = force_reg (mode, op_true);
cop0 = operands[4];
cop1 = operands[5];
- /* Canonicalize the comparison to EQ, GT, GTU. */
- switch (code)
- {
- case EQ:
- case GT:
- case GTU:
- break;
-
- case NE:
- case LE:
- case LEU:
- code = reverse_condition (code);
- negate = true;
- break;
-
- case GE:
- case GEU:
- code = reverse_condition (code);
- negate = true;
- /* FALLTHRU */
-
- case LT:
- case LTU:
- code = swap_condition (code);
- x = cop0, cop0 = cop1, cop1 = x;
- break;
-
- default:
- gcc_unreachable ();
- }
-
- /* Only SSE4.1/SSE4.2 supports V2DImode. */
- if (mode == V2DImode)
+ /* SSE5 supports all of the comparisons on all vector int types. */
+ if (!TARGET_SSE5)
{
+ /* Canonicalize the comparison to EQ, GT, GTU. */
switch (code)
{
case EQ:
- /* SSE4.1 supports EQ. */
- if (!TARGET_SSE4_1)
- return false;
- break;
-
case GT:
case GTU:
- /* SSE4.2 supports GT/GTU. */
- if (!TARGET_SSE4_2)
- return false;
+ break;
+
+ case NE:
+ case LE:
+ case LEU:
+ code = reverse_condition (code);
+ negate = true;
+ break;
+
+ case GE:
+ case GEU:
+ code = reverse_condition (code);
+ negate = true;
+ /* FALLTHRU */
+
+ case LT:
+ case LTU:
+ code = swap_condition (code);
+ x = cop0, cop0 = cop1, cop1 = x;
break;
default:
gcc_unreachable ();
}
- }
- /* Unsigned parallel compare is not supported by the hardware. Play some
- tricks to turn this into a signed comparison against 0. */
- if (code == GTU)
- {
- cop0 = force_reg (mode, cop0);
+ /* Only SSE4.1/SSE4.2 supports V2DImode. */
+ if (mode == V2DImode)
+ {
+ switch (code)
+ {
+ case EQ:
+ /* SSE4.1 supports EQ. */
+ if (!TARGET_SSE4_1)
+ return false;
+ break;
- switch (mode)
+ case GT:
+ case GTU:
+ /* SSE4.2 supports GT/GTU. */
+ if (!TARGET_SSE4_2)
+ return false;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ /* Unsigned parallel compare is not supported by the hardware. Play some
+ tricks to turn this into a signed comparison against 0. */
+ if (code == GTU)
{
- case V4SImode:
- case V2DImode:
- {
- rtx t1, t2, mask;
-
- /* Perform a parallel modulo subtraction. */
- t1 = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_subv4si3
- : gen_subv2di3) (t1, cop0, cop1));
-
- /* Extract the original sign bit of op0. */
- mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
- true, false);
- t2 = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_andv4si3
- : gen_andv2di3) (t2, cop0, mask));
-
- /* XOR it back into the result of the subtraction. This results
- in the sign bit set iff we saw unsigned underflow. */
- x = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_xorv4si3
- : gen_xorv2di3) (x, t1, t2));
-
- code = GT;
- }
- break;
+ cop0 = force_reg (mode, cop0);
- case V16QImode:
- case V8HImode:
- /* Perform a parallel unsigned saturating subtraction. */
- x = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, x,
- gen_rtx_US_MINUS (mode, cop0, cop1)));
+ switch (mode)
+ {
+ case V4SImode:
+ case V2DImode:
+ {
+ rtx t1, t2, mask;
+
+ /* Perform a parallel modulo subtraction. */
+ t1 = gen_reg_rtx (mode);
+ emit_insn ((mode == V4SImode
+ ? gen_subv4si3
+ : gen_subv2di3) (t1, cop0, cop1));
+
+ /* Extract the original sign bit of op0. */
+ mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
+ true, false);
+ t2 = gen_reg_rtx (mode);
+ emit_insn ((mode == V4SImode
+ ? gen_andv4si3
+ : gen_andv2di3) (t2, cop0, mask));
+
+ /* XOR it back into the result of the subtraction. This results
+ in the sign bit set iff we saw unsigned underflow. */
+ x = gen_reg_rtx (mode);
+ emit_insn ((mode == V4SImode
+ ? gen_xorv4si3
+ : gen_xorv2di3) (x, t1, t2));
+
+ code = GT;
+ }
+ break;
- code = EQ;
- negate = !negate;
- break;
+ case V16QImode:
+ case V8HImode:
+ /* Perform a parallel unsigned saturating subtraction. */
+ x = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (VOIDmode, x,
+ gen_rtx_US_MINUS (mode, cop0, cop1)));
- default:
- gcc_unreachable ();
- }
+ code = EQ;
+ negate = !negate;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
- cop0 = x;
- cop1 = CONST0_RTX (mode);
+ cop0 = x;
+ cop1 = CONST0_RTX (mode);
+ }
}
x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
}
/* This function performs the same task as ix86_expand_sse_unpack,
- but with amdfam15 instructions. */
-
-#define PPERM_SRC 0x00 /* copy source */
-#define PPERM_INVERT 0x20 /* invert source */
-#define PPERM_REVERSE 0x40 /* bit reverse source */
-#define PPERM_REV_INV 0x60 /* bit reverse & invert src */
-#define PPERM_ZERO 0x80 /* all 0's */
-#define PPERM_ONES 0xa0 /* all 1's */
-#define PPERM_SIGN 0xc0 /* propagate sign bit */
-#define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
-
-#define PPERM_SRC1 0x00 /* use first source byte */
-#define PPERM_SRC2 0x10 /* use second source byte */
+ but with sse5 instructions. */
void
ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
if (!rtx_equal_p (operands[0], operands[1]))
emit_move_insn (operands[0], operands[1]);
emit_insn ((mode == DImode
- ? gen_x86_shld_1
+ ? gen_x86_shld
: gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
ix86_expand_ashl_const (low[0], count, mode);
}
(mode == DImode ? split_di : split_ti) (operands, 1, low, high);
emit_insn ((mode == DImode
- ? gen_x86_shld_1
+ ? gen_x86_shld
: gen_x86_64_shld) (high[0], low[0], operands[2]));
}
if (!rtx_equal_p (operands[0], operands[1]))
emit_move_insn (operands[0], operands[1]);
emit_insn ((mode == DImode
- ? gen_x86_shrd_1
+ ? gen_x86_shrd
: gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
emit_insn ((mode == DImode
? gen_ashrsi3
(mode == DImode ? split_di : split_ti) (operands, 1, low, high);
emit_insn ((mode == DImode
- ? gen_x86_shrd_1
+ ? gen_x86_shrd
: gen_x86_64_shrd) (low[0], high[0], operands[2]));
emit_insn ((mode == DImode
? gen_ashrsi3
if (!rtx_equal_p (operands[0], operands[1]))
emit_move_insn (operands[0], operands[1]);
emit_insn ((mode == DImode
- ? gen_x86_shrd_1
+ ? gen_x86_shrd
: gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
emit_insn ((mode == DImode
? gen_lshrsi3
(mode == DImode ? split_di : split_ti) (operands, 1, low, high);
emit_insn ((mode == DImode
- ? gen_x86_shrd_1
+ ? gen_x86_shrd
: gen_x86_64_shrd) (low[0], high[0], operands[2]));
emit_insn ((mode == DImode
? gen_lshrsi3
QImode, 1, end_0_label);
/* Increment the address. */
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const1_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
/* Not needed with an alignment of 2 */
if (align != 2)
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
end_0_label);
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const1_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
emit_label (align_3_label);
}
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
end_0_label);
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const1_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
}
/* Generate loop to check 4 bytes at a time. It is not a good idea to
mem = change_address (src, SImode, out);
emit_move_insn (scratch, mem);
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
- else
- emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
+ emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
/* This formula yields a nonzero result iff one of the bytes is zero.
This saves three branches inside loop and many cycles. */
/* Not in the first two. Move two bytes forward. */
emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const2_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const2_rtx));
+ emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
emit_label (end_2_label);
tmpreg = gen_lowpart (QImode, tmpreg);
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
- if (TARGET_64BIT)
- emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
- else
- emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
+ emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
emit_label (end_0_label);
}
/* strlensi_unroll_1 returns the address of the zero at the end of
the string, like memchr(), so compute the length by subtracting
the start address. */
- if (TARGET_64BIT)
- emit_insn (gen_subdi3 (out, out, addr));
- else
- emit_insn (gen_subsi3 (out, out, addr));
+ emit_insn ((*ix86_gen_sub3) (out, out, addr));
}
else
{
unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
scratch4), UNSPEC_SCAS);
emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
- if (TARGET_64BIT)
- {
- emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
- emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
- }
- else
- {
- emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
- emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
- }
+ emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
+ emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
}
return 1;
}
f = GGC_CNEW (struct machine_function);
f->use_fast_prologue_epilogue_nregs = -1;
f->tls_descriptor_call_expanded_p = 0;
+ f->call_abi = DEFAULT_ABI;
return f;
}
return align;
}
-/* Compute the alignment for a local variable.
- TYPE is the data type, and ALIGN is the alignment that
- the object would ordinarily have. The value of this macro is used
- instead of that alignment to align the object. */
+/* Compute the alignment for a local variable or a stack slot. TYPE is
+ the data type, MODE is the widest mode available and ALIGN is the
+ alignment that the object would ordinarily have. The value of this
+ macro is used instead of that alignment to align the object. */
-int
-ix86_local_alignment (tree type, int align)
+unsigned int
+ix86_local_alignment (tree type, enum machine_mode mode,
+ unsigned int align)
{
+ /* If TYPE is NULL, we are allocating a stack slot for caller-save
+ register in MODE. We will return the largest alignment of XF
+ and DF. */
+ if (!type)
+ {
+ if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
+ align = GET_MODE_ALIGNMENT (DFmode);
+ return align;
+ }
+
/* x86-64 ABI requires arrays greater than 16 bytes to be aligned
to 16byte boundary. */
if (TARGET_64BIT)
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
};
-static const struct builtin_description bdesc_ptest[] =
-{
- /* SSE4.1 */
- { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
- { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
- { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
-};
-
static const struct builtin_description bdesc_pcmpestr[] =
{
/* SSE4.2 */
FLOAT128_FTYPE_FLOAT128,
FLOAT_FTYPE_FLOAT,
FLOAT128_FTYPE_FLOAT128_FLOAT128,
+ INT_FTYPE_V2DI_V2DI_PTEST,
INT64_FTYPE_V4SF,
INT64_FTYPE_V2DF,
INT_FTYPE_V16QI,
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+
/* SSE4.2 */
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
- { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
+ { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
{ OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
case FLOAT_FTYPE_FLOAT:
type = float_ftype_float;
break;
+ case INT_FTYPE_V2DI_V2DI_PTEST:
+ type = int_ftype_v2di_v2di;
+ break;
case INT64_FTYPE_V4SF:
type = int64_ftype_v4sf;
break;
else
def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
- /* ptest insns. */
- for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
- def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
-
/* SSE */
def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
return target;
}
-/* Subroutine of ix86_expand_builtin to take care of insns with
- variable number of operands. */
-
-static rtx
-ix86_expand_args_builtin (const struct builtin_description *d,
- tree exp, rtx target)
-{
- rtx pat, real_target;
- unsigned int i, nargs;
- unsigned int nargs_constant = 0;
- int num_memory = 0;
- struct
- {
- rtx op;
- enum machine_mode mode;
- } args[4];
- bool last_arg_count = false;
- enum insn_code icode = d->icode;
- const struct insn_data *insn_p = &insn_data[icode];
- enum machine_mode tmode = insn_p->operand[0].mode;
- enum machine_mode rmode = VOIDmode;
- bool swap = false;
- enum rtx_code comparison = d->comparison;
-
- switch ((enum ix86_builtin_type) d->flag)
- {
- case FLOAT128_FTYPE_FLOAT128:
- case FLOAT_FTYPE_FLOAT:
- case INT64_FTYPE_V4SF:
- case INT64_FTYPE_V2DF:
- case INT_FTYPE_V16QI:
- case INT_FTYPE_V8QI:
- case INT_FTYPE_V4SF:
- case INT_FTYPE_V2DF:
- case V16QI_FTYPE_V16QI:
- case V8HI_FTYPE_V8HI:
- case V8HI_FTYPE_V16QI:
- case V8QI_FTYPE_V8QI:
- case V4SI_FTYPE_V4SI:
- case V4SI_FTYPE_V16QI:
- case V4SI_FTYPE_V4SF:
- case V4SI_FTYPE_V8HI:
- case V4SI_FTYPE_V2DF:
- case V4HI_FTYPE_V4HI:
- case V4SF_FTYPE_V4SF:
- case V4SF_FTYPE_V4SI:
- case V4SF_FTYPE_V2DF:
- case V2DI_FTYPE_V2DI:
- case V2DI_FTYPE_V16QI:
- case V2DI_FTYPE_V8HI:
- case V2DI_FTYPE_V4SI:
- case V2DF_FTYPE_V2DF:
- case V2DF_FTYPE_V4SI:
- case V2DF_FTYPE_V4SF:
- case V2DF_FTYPE_V2SI:
- case V2SI_FTYPE_V2SI:
- case V2SI_FTYPE_V4SF:
- case V2SI_FTYPE_V2SF:
- case V2SI_FTYPE_V2DF:
- case V2SF_FTYPE_V2SF:
- case V2SF_FTYPE_V2SI:
- nargs = 1;
- break;
- case V4SF_FTYPE_V4SF_VEC_MERGE:
- case V2DF_FTYPE_V2DF_VEC_MERGE:
- return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
- case FLOAT128_FTYPE_FLOAT128_FLOAT128:
- case V16QI_FTYPE_V16QI_V16QI:
- case V16QI_FTYPE_V8HI_V8HI:
- case V8QI_FTYPE_V8QI_V8QI:
- case V8QI_FTYPE_V4HI_V4HI:
- case V8HI_FTYPE_V8HI_V8HI:
- case V8HI_FTYPE_V16QI_V16QI:
- case V8HI_FTYPE_V4SI_V4SI:
- case V4SI_FTYPE_V4SI_V4SI:
- case V4SI_FTYPE_V8HI_V8HI:
- case V4SI_FTYPE_V4SF_V4SF:
- case V4SI_FTYPE_V2DF_V2DF:
- case V4HI_FTYPE_V4HI_V4HI:
- case V4HI_FTYPE_V8QI_V8QI:
- case V4HI_FTYPE_V2SI_V2SI:
- case V4SF_FTYPE_V4SF_V4SF:
- case V4SF_FTYPE_V4SF_V2SI:
- case V4SF_FTYPE_V4SF_V2DF:
- case V4SF_FTYPE_V4SF_DI:
- case V4SF_FTYPE_V4SF_SI:
- case V2DI_FTYPE_V2DI_V2DI:
- case V2DI_FTYPE_V16QI_V16QI:
- case V2DI_FTYPE_V4SI_V4SI:
- case V2DI_FTYPE_V2DI_V16QI:
- case V2DI_FTYPE_V2DF_V2DF:
- case V2SI_FTYPE_V2SI_V2SI:
- case V2SI_FTYPE_V4HI_V4HI:
- case V2SI_FTYPE_V2SF_V2SF:
- case V2DF_FTYPE_V2DF_V2DF:
- case V2DF_FTYPE_V2DF_V4SF:
- case V2DF_FTYPE_V2DF_DI:
- case V2DF_FTYPE_V2DF_SI:
- case V2SF_FTYPE_V2SF_V2SF:
- case V1DI_FTYPE_V1DI_V1DI:
- case V1DI_FTYPE_V8QI_V8QI:
- case V1DI_FTYPE_V2SI_V2SI:
- if (comparison == UNKNOWN)
- return ix86_expand_binop_builtin (icode, exp, target);
- nargs = 2;
- break;
- case V4SF_FTYPE_V4SF_V4SF_SWAP:
- case V2DF_FTYPE_V2DF_V2DF_SWAP:
- gcc_assert (comparison != UNKNOWN);
- nargs = 2;
- swap = true;
- break;
- case V8HI_FTYPE_V8HI_V8HI_COUNT:
- case V8HI_FTYPE_V8HI_SI_COUNT:
- case V4SI_FTYPE_V4SI_V4SI_COUNT:
- case V4SI_FTYPE_V4SI_SI_COUNT:
- case V4HI_FTYPE_V4HI_V4HI_COUNT:
- case V4HI_FTYPE_V4HI_SI_COUNT:
- case V2DI_FTYPE_V2DI_V2DI_COUNT:
- case V2DI_FTYPE_V2DI_SI_COUNT:
- case V2SI_FTYPE_V2SI_V2SI_COUNT:
- case V2SI_FTYPE_V2SI_SI_COUNT:
- case V1DI_FTYPE_V1DI_V1DI_COUNT:
- case V1DI_FTYPE_V1DI_SI_COUNT:
- nargs = 2;
- last_arg_count = true;
- break;
- case UINT64_FTYPE_UINT64_UINT64:
- case UINT_FTYPE_UINT_UINT:
- case UINT_FTYPE_UINT_USHORT:
- case UINT_FTYPE_UINT_UCHAR:
- nargs = 2;
- break;
- case V2DI2TI_FTYPE_V2DI_INT:
- nargs = 2;
- rmode = V2DImode;
- nargs_constant = 1;
- break;
- case V8HI_FTYPE_V8HI_INT:
- case V4SI_FTYPE_V4SI_INT:
- case V4HI_FTYPE_V4HI_INT:
- case V4SF_FTYPE_V4SF_INT:
- case V2DI_FTYPE_V2DI_INT:
- case V2DF_FTYPE_V2DF_INT:
- nargs = 2;
- nargs_constant = 1;
- break;
- case V16QI_FTYPE_V16QI_V16QI_V16QI:
- case V4SF_FTYPE_V4SF_V4SF_V4SF:
- case V2DF_FTYPE_V2DF_V2DF_V2DF:
- nargs = 3;
- break;
- case V16QI_FTYPE_V16QI_V16QI_INT:
- case V8HI_FTYPE_V8HI_V8HI_INT:
- case V4SI_FTYPE_V4SI_V4SI_INT:
- case V4SF_FTYPE_V4SF_V4SF_INT:
- case V2DI_FTYPE_V2DI_V2DI_INT:
- case V2DF_FTYPE_V2DF_V2DF_INT:
- nargs = 3;
- nargs_constant = 1;
- break;
- case V2DI2TI_FTYPE_V2DI_V2DI_INT:
- nargs = 3;
- rmode = V2DImode;
- nargs_constant = 1;
- break;
- case V1DI2DI_FTYPE_V1DI_V1DI_INT:
- nargs = 3;
- rmode = DImode;
- nargs_constant = 1;
- break;
- case V2DI_FTYPE_V2DI_UINT_UINT:
- nargs = 3;
- nargs_constant = 2;
- break;
- case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
- nargs = 4;
- nargs_constant = 2;
- break;
- default:
- gcc_unreachable ();
- }
-
- gcc_assert (nargs <= ARRAY_SIZE (args));
-
- if (comparison != UNKNOWN)
- {
- gcc_assert (nargs == 2);
- return ix86_expand_sse_compare (d, exp, target, swap);
- }
-
- if (rmode == VOIDmode || rmode == tmode)
- {
- if (optimize
- || target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_p->operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- real_target = target;
- }
- else
- {
- target = gen_reg_rtx (rmode);
- real_target = simplify_gen_subreg (tmode, target, rmode, 0);
- }
-
- for (i = 0; i < nargs; i++)
- {
- tree arg = CALL_EXPR_ARG (exp, i);
- rtx op = expand_normal (arg);
- enum machine_mode mode = insn_p->operand[i + 1].mode;
- bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
-
- if (last_arg_count && (i + 1) == nargs)
- {
- /* SIMD shift insns take either an 8-bit immediate or
- register as count. But builtin functions take int as
- count. If count doesn't match, we put it in register. */
- if (!match)
- {
- op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
- if (!(*insn_p->operand[i + 1].predicate) (op, mode))
- op = copy_to_reg (op);
- }
- }
- else if ((nargs - i) <= nargs_constant)
- {
- if (!match)
- switch (icode)
- {
- case CODE_FOR_sse4_1_roundpd:
- case CODE_FOR_sse4_1_roundps:
- case CODE_FOR_sse4_1_roundsd:
- case CODE_FOR_sse4_1_roundss:
- case CODE_FOR_sse4_1_blendps:
- error ("the last argument must be a 4-bit immediate");
- return const0_rtx;
-
- case CODE_FOR_sse4_1_blendpd:
- error ("the last argument must be a 2-bit immediate");
- return const0_rtx;
-
- default:
- switch (nargs_constant)
- {
- case 2:
- if ((nargs - i) == nargs_constant)
- {
- error ("the next to last argument must be an 8-bit immediate");
- break;
- }
- case 1:
- error ("the last argument must be an 8-bit immediate");
- break;
- default:
- gcc_unreachable ();
- }
- return const0_rtx;
- }
- }
- else
- {
- if (VECTOR_MODE_P (mode))
- op = safe_vector_operand (op, mode);
-
- /* If we aren't optimizing, only allow one memory operand to
- be generated. */
- if (memory_operand (op, mode))
- num_memory++;
-
- if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
- {
- if (optimize || !match || num_memory > 1)
- op = copy_to_mode_reg (mode, op);
- }
- else
- {
- op = copy_to_reg (op);
- op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
- }
- }
-
- args[i].op = op;
- args[i].mode = mode;
- }
-
- switch (nargs)
- {
- case 1:
- pat = GEN_FCN (icode) (real_target, args[0].op);
- break;
- case 2:
- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
- break;
- case 3:
- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
- args[2].op);
- break;
- case 4:
- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
- args[2].op, args[3].op);
- break;
- default:
- gcc_unreachable ();
- }
-
- if (! pat)
- return 0;
-
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of special insns
- with variable number of operands. */
-
-static rtx
-ix86_expand_special_args_builtin (const struct builtin_description *d,
- tree exp, rtx target)
-{
- tree arg;
- rtx pat, op;
- unsigned int i, nargs, arg_adjust, memory;
- struct
- {
- rtx op;
- enum machine_mode mode;
- } args[2];
- enum insn_code icode = d->icode;
- bool last_arg_constant = false;
- const struct insn_data *insn_p = &insn_data[icode];
- enum machine_mode tmode = insn_p->operand[0].mode;
- enum { load, store } class;
-
- switch ((enum ix86_special_builtin_type) d->flag)
- {
- case VOID_FTYPE_VOID:
- emit_insn (GEN_FCN (icode) (target));
- return 0;
- case V2DI_FTYPE_PV2DI:
- case V16QI_FTYPE_PCCHAR:
- case V4SF_FTYPE_PCFLOAT:
- case V2DF_FTYPE_PCDOUBLE:
- nargs = 1;
- class = load;
- memory = 0;
- break;
- case VOID_FTYPE_PV2SF_V4SF:
- case VOID_FTYPE_PV2DI_V2DI:
- case VOID_FTYPE_PCHAR_V16QI:
- case VOID_FTYPE_PFLOAT_V4SF:
- case VOID_FTYPE_PDOUBLE_V2DF:
- case VOID_FTYPE_PDI_DI:
- case VOID_FTYPE_PINT_INT:
- nargs = 1;
- class = store;
- /* Reserve memory operand for target. */
- memory = ARRAY_SIZE (args);
- break;
- case V4SF_FTYPE_V4SF_PCV2SF:
- case V2DF_FTYPE_V2DF_PCDOUBLE:
- nargs = 2;
- class = load;
- memory = 1;
- break;
- default:
- gcc_unreachable ();
- }
-
- gcc_assert (nargs <= ARRAY_SIZE (args));
-
- if (class == store)
- {
- arg = CALL_EXPR_ARG (exp, 0);
- op = expand_normal (arg);
- gcc_assert (target == 0);
- target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
- arg_adjust = 1;
- }
- else
- {
- arg_adjust = 0;
- if (optimize
- || target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_p->operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- }
-
- for (i = 0; i < nargs; i++)
- {
- enum machine_mode mode = insn_p->operand[i + 1].mode;
- bool match;
-
- arg = CALL_EXPR_ARG (exp, i + arg_adjust);
- op = expand_normal (arg);
- match = (*insn_p->operand[i + 1].predicate) (op, mode);
-
- if (last_arg_constant && (i + 1) == nargs)
- {
- if (!match)
- switch (icode)
- {
- default:
- error ("the last argument must be an 8-bit immediate");
- return const0_rtx;
- }
- }
- else
- {
- if (i == memory)
- {
- /* This must be the memory operand. */
- op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
- gcc_assert (GET_MODE (op) == mode
- || GET_MODE (op) == VOIDmode);
- }
- else
- {
- /* This must be register. */
- if (VECTOR_MODE_P (mode))
- op = safe_vector_operand (op, mode);
-
- gcc_assert (GET_MODE (op) == mode
- || GET_MODE (op) == VOIDmode);
- op = copy_to_mode_reg (mode, op);
- }
- }
-
- args[i].op = op;
- args[i].mode = mode;
- }
-
- switch (nargs)
- {
- case 1:
- pat = GEN_FCN (icode) (target, args[0].op);
- break;
- case 2:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
- break;
- default:
- gcc_unreachable ();
- }
-
- if (! pat)
- return 0;
- emit_insn (pat);
- return class == store ? 0 : target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of comi insns. */
+/* Subroutine of ix86_expand_builtin to take care of comi insns. */
static rtx
ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
if (VECTOR_MODE_P (mode1))
op1 = safe_vector_operand (op1, mode1);
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (d->icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ SET_DEST (pat),
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpestr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ tree arg3 = CALL_EXPR_ARG (exp, 3);
+ tree arg4 = CALL_EXPR_ARG (exp, 4);
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ rtx op3 = expand_normal (arg3);
+ rtx op4 = expand_normal (arg4);
+ enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modei3 = insn_data[d->icode].operand[3].mode;
+ modev4 = insn_data[d->icode].operand[4].mode;
+ modei5 = insn_data[d->icode].operand[5].mode;
+ modeimm = insn_data[d->icode].operand[6].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev4))
+ op2 = safe_vector_operand (op2, modev4);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
+ op1 = copy_to_mode_reg (modei3, op1);
+ if ((optimize && !register_operand (op2, modev4))
+ || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
+ op2 = copy_to_mode_reg (modev4, op2);
+ if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
+ op3 = copy_to_mode_reg (modei5, op3);
+
+ if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
+ {
+ error ("the fifth argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPESTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPESTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpistr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modev3 = insn_data[d->icode].operand[3].mode;
+ modeimm = insn_data[d->icode].operand[4].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev3))
+ op1 = safe_vector_operand (op1, modev3);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if ((optimize && !register_operand (op1, modev3))
+ || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
+ op1 = copy_to_mode_reg (modev3, op1);
+
+ if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
+ {
+ error ("the third argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPISTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPISTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
- if ((optimize && !register_operand (op0, mode0))
- || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
+ }
- pat = GEN_FCN (d->icode) (op0, op1);
if (! pat)
return 0;
+
emit_insn (pat);
- emit_insn (gen_rtx_SET (VOIDmode,
- gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (comparison, QImode,
- SET_DEST (pat),
- const0_rtx)));
- return SUBREG_REG (target);
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
}
-/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
+/* Subroutine of ix86_expand_builtin to take care of insns with
+ variable number of operands. */
static rtx
-ix86_expand_sse_pcmpestr (const struct builtin_description *d,
+ix86_expand_args_builtin (const struct builtin_description *d,
tree exp, rtx target)
{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- tree arg2 = CALL_EXPR_ARG (exp, 2);
- tree arg3 = CALL_EXPR_ARG (exp, 3);
- tree arg4 = CALL_EXPR_ARG (exp, 4);
- rtx scratch0, scratch1;
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2 = expand_normal (arg2);
- rtx op3 = expand_normal (arg3);
- rtx op4 = expand_normal (arg4);
- enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
+ rtx pat, real_target;
+ unsigned int i, nargs;
+ unsigned int nargs_constant = 0;
+ int num_memory = 0;
+ struct
+ {
+ rtx op;
+ enum machine_mode mode;
+ } args[4];
+ bool last_arg_count = false;
+ enum insn_code icode = d->icode;
+ const struct insn_data *insn_p = &insn_data[icode];
+ enum machine_mode tmode = insn_p->operand[0].mode;
+ enum machine_mode rmode = VOIDmode;
+ bool swap = false;
+ enum rtx_code comparison = d->comparison;
- tmode0 = insn_data[d->icode].operand[0].mode;
- tmode1 = insn_data[d->icode].operand[1].mode;
- modev2 = insn_data[d->icode].operand[2].mode;
- modei3 = insn_data[d->icode].operand[3].mode;
- modev4 = insn_data[d->icode].operand[4].mode;
- modei5 = insn_data[d->icode].operand[5].mode;
- modeimm = insn_data[d->icode].operand[6].mode;
+ switch ((enum ix86_builtin_type) d->flag)
+ {
+ case INT_FTYPE_V2DI_V2DI_PTEST:
+ return ix86_expand_sse_ptest (d, exp, target);
+ case FLOAT128_FTYPE_FLOAT128:
+ case FLOAT_FTYPE_FLOAT:
+ case INT64_FTYPE_V4SF:
+ case INT64_FTYPE_V2DF:
+ case INT_FTYPE_V16QI:
+ case INT_FTYPE_V8QI:
+ case INT_FTYPE_V4SF:
+ case INT_FTYPE_V2DF:
+ case V16QI_FTYPE_V16QI:
+ case V8HI_FTYPE_V8HI:
+ case V8HI_FTYPE_V16QI:
+ case V8QI_FTYPE_V8QI:
+ case V4SI_FTYPE_V4SI:
+ case V4SI_FTYPE_V16QI:
+ case V4SI_FTYPE_V4SF:
+ case V4SI_FTYPE_V8HI:
+ case V4SI_FTYPE_V2DF:
+ case V4HI_FTYPE_V4HI:
+ case V4SF_FTYPE_V4SF:
+ case V4SF_FTYPE_V4SI:
+ case V4SF_FTYPE_V2DF:
+ case V2DI_FTYPE_V2DI:
+ case V2DI_FTYPE_V16QI:
+ case V2DI_FTYPE_V8HI:
+ case V2DI_FTYPE_V4SI:
+ case V2DF_FTYPE_V2DF:
+ case V2DF_FTYPE_V4SI:
+ case V2DF_FTYPE_V4SF:
+ case V2DF_FTYPE_V2SI:
+ case V2SI_FTYPE_V2SI:
+ case V2SI_FTYPE_V4SF:
+ case V2SI_FTYPE_V2SF:
+ case V2SI_FTYPE_V2DF:
+ case V2SF_FTYPE_V2SF:
+ case V2SF_FTYPE_V2SI:
+ nargs = 1;
+ break;
+ case V4SF_FTYPE_V4SF_VEC_MERGE:
+ case V2DF_FTYPE_V2DF_VEC_MERGE:
+ return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
+ case FLOAT128_FTYPE_FLOAT128_FLOAT128:
+ case V16QI_FTYPE_V16QI_V16QI:
+ case V16QI_FTYPE_V8HI_V8HI:
+ case V8QI_FTYPE_V8QI_V8QI:
+ case V8QI_FTYPE_V4HI_V4HI:
+ case V8HI_FTYPE_V8HI_V8HI:
+ case V8HI_FTYPE_V16QI_V16QI:
+ case V8HI_FTYPE_V4SI_V4SI:
+ case V4SI_FTYPE_V4SI_V4SI:
+ case V4SI_FTYPE_V8HI_V8HI:
+ case V4SI_FTYPE_V4SF_V4SF:
+ case V4SI_FTYPE_V2DF_V2DF:
+ case V4HI_FTYPE_V4HI_V4HI:
+ case V4HI_FTYPE_V8QI_V8QI:
+ case V4HI_FTYPE_V2SI_V2SI:
+ case V4SF_FTYPE_V4SF_V4SF:
+ case V4SF_FTYPE_V4SF_V2SI:
+ case V4SF_FTYPE_V4SF_V2DF:
+ case V4SF_FTYPE_V4SF_DI:
+ case V4SF_FTYPE_V4SF_SI:
+ case V2DI_FTYPE_V2DI_V2DI:
+ case V2DI_FTYPE_V16QI_V16QI:
+ case V2DI_FTYPE_V4SI_V4SI:
+ case V2DI_FTYPE_V2DI_V16QI:
+ case V2DI_FTYPE_V2DF_V2DF:
+ case V2SI_FTYPE_V2SI_V2SI:
+ case V2SI_FTYPE_V4HI_V4HI:
+ case V2SI_FTYPE_V2SF_V2SF:
+ case V2DF_FTYPE_V2DF_V2DF:
+ case V2DF_FTYPE_V2DF_V4SF:
+ case V2DF_FTYPE_V2DF_DI:
+ case V2DF_FTYPE_V2DF_SI:
+ case V2SF_FTYPE_V2SF_V2SF:
+ case V1DI_FTYPE_V1DI_V1DI:
+ case V1DI_FTYPE_V8QI_V8QI:
+ case V1DI_FTYPE_V2SI_V2SI:
+ if (comparison == UNKNOWN)
+ return ix86_expand_binop_builtin (icode, exp, target);
+ nargs = 2;
+ break;
+ case V4SF_FTYPE_V4SF_V4SF_SWAP:
+ case V2DF_FTYPE_V2DF_V2DF_SWAP:
+ gcc_assert (comparison != UNKNOWN);
+ nargs = 2;
+ swap = true;
+ break;
+ case V8HI_FTYPE_V8HI_V8HI_COUNT:
+ case V8HI_FTYPE_V8HI_SI_COUNT:
+ case V4SI_FTYPE_V4SI_V4SI_COUNT:
+ case V4SI_FTYPE_V4SI_SI_COUNT:
+ case V4HI_FTYPE_V4HI_V4HI_COUNT:
+ case V4HI_FTYPE_V4HI_SI_COUNT:
+ case V2DI_FTYPE_V2DI_V2DI_COUNT:
+ case V2DI_FTYPE_V2DI_SI_COUNT:
+ case V2SI_FTYPE_V2SI_V2SI_COUNT:
+ case V2SI_FTYPE_V2SI_SI_COUNT:
+ case V1DI_FTYPE_V1DI_V1DI_COUNT:
+ case V1DI_FTYPE_V1DI_SI_COUNT:
+ nargs = 2;
+ last_arg_count = true;
+ break;
+ case UINT64_FTYPE_UINT64_UINT64:
+ case UINT_FTYPE_UINT_UINT:
+ case UINT_FTYPE_UINT_USHORT:
+ case UINT_FTYPE_UINT_UCHAR:
+ nargs = 2;
+ break;
+ case V2DI2TI_FTYPE_V2DI_INT:
+ nargs = 2;
+ rmode = V2DImode;
+ nargs_constant = 1;
+ break;
+ case V8HI_FTYPE_V8HI_INT:
+ case V4SI_FTYPE_V4SI_INT:
+ case V4HI_FTYPE_V4HI_INT:
+ case V4SF_FTYPE_V4SF_INT:
+ case V2DI_FTYPE_V2DI_INT:
+ case V2DF_FTYPE_V2DF_INT:
+ nargs = 2;
+ nargs_constant = 1;
+ break;
+ case V16QI_FTYPE_V16QI_V16QI_V16QI:
+ case V4SF_FTYPE_V4SF_V4SF_V4SF:
+ case V2DF_FTYPE_V2DF_V2DF_V2DF:
+ nargs = 3;
+ break;
+ case V16QI_FTYPE_V16QI_V16QI_INT:
+ case V8HI_FTYPE_V8HI_V8HI_INT:
+ case V4SI_FTYPE_V4SI_V4SI_INT:
+ case V4SF_FTYPE_V4SF_V4SF_INT:
+ case V2DI_FTYPE_V2DI_V2DI_INT:
+ case V2DF_FTYPE_V2DF_V2DF_INT:
+ nargs = 3;
+ nargs_constant = 1;
+ break;
+ case V2DI2TI_FTYPE_V2DI_V2DI_INT:
+ nargs = 3;
+ rmode = V2DImode;
+ nargs_constant = 1;
+ break;
+ case V1DI2DI_FTYPE_V1DI_V1DI_INT:
+ nargs = 3;
+ rmode = DImode;
+ nargs_constant = 1;
+ break;
+ case V2DI_FTYPE_V2DI_UINT_UINT:
+ nargs = 3;
+ nargs_constant = 2;
+ break;
+ case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
+ nargs = 4;
+ nargs_constant = 2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
- if (VECTOR_MODE_P (modev2))
- op0 = safe_vector_operand (op0, modev2);
- if (VECTOR_MODE_P (modev4))
- op2 = safe_vector_operand (op2, modev4);
+ gcc_assert (nargs <= ARRAY_SIZE (args));
- if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
- op0 = copy_to_mode_reg (modev2, op0);
- if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
- op1 = copy_to_mode_reg (modei3, op1);
- if ((optimize && !register_operand (op2, modev4))
- || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
- op2 = copy_to_mode_reg (modev4, op2);
- if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
- op3 = copy_to_mode_reg (modei5, op3);
+ if (comparison != UNKNOWN)
+ {
+ gcc_assert (nargs == 2);
+ return ix86_expand_sse_compare (d, exp, target, swap);
+ }
- if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
+ if (rmode == VOIDmode || rmode == tmode)
{
- error ("the fifth argument must be a 8-bit immediate");
- return const0_rtx;
+ if (optimize
+ || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_p->operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ real_target = target;
+ }
+ else
+ {
+ target = gen_reg_rtx (rmode);
+ real_target = simplify_gen_subreg (tmode, target, rmode, 0);
}
- if (d->code == IX86_BUILTIN_PCMPESTRI128)
+ for (i = 0; i < nargs; i++)
{
- if (optimize || !target
- || GET_MODE (target) != tmode0
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
- target = gen_reg_rtx (tmode0);
+ tree arg = CALL_EXPR_ARG (exp, i);
+ rtx op = expand_normal (arg);
+ enum machine_mode mode = insn_p->operand[i + 1].mode;
+ bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
- scratch1 = gen_reg_rtx (tmode1);
+ if (last_arg_count && (i + 1) == nargs)
+ {
+ /* SIMD shift insns take either an 8-bit immediate or
+ register as count. But builtin functions take int as
+ count. If count doesn't match, we put it in register. */
+ if (!match)
+ {
+ op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
+ if (!(*insn_p->operand[i + 1].predicate) (op, mode))
+ op = copy_to_reg (op);
+ }
+ }
+ else if ((nargs - i) <= nargs_constant)
+ {
+ if (!match)
+ switch (icode)
+ {
+ case CODE_FOR_sse4_1_roundpd:
+ case CODE_FOR_sse4_1_roundps:
+ case CODE_FOR_sse4_1_roundsd:
+ case CODE_FOR_sse4_1_roundss:
+ case CODE_FOR_sse4_1_blendps:
+ error ("the last argument must be a 4-bit immediate");
+ return const0_rtx;
- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
- }
- else if (d->code == IX86_BUILTIN_PCMPESTRM128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode1
- || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
- target = gen_reg_rtx (tmode1);
+ case CODE_FOR_sse4_1_blendpd:
+ error ("the last argument must be a 2-bit immediate");
+ return const0_rtx;
- scratch0 = gen_reg_rtx (tmode0);
+ default:
+ switch (nargs_constant)
+ {
+ case 2:
+ if ((nargs - i) == nargs_constant)
+ {
+ error ("the next to last argument must be an 8-bit immediate");
+ break;
+ }
+ case 1:
+ error ("the last argument must be an 8-bit immediate");
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ return const0_rtx;
+ }
+ }
+ else
+ {
+ if (VECTOR_MODE_P (mode))
+ op = safe_vector_operand (op, mode);
- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
- }
- else
- {
- gcc_assert (d->flag);
+ /* If we aren't optimizing, only allow one memory operand to
+ be generated. */
+ if (memory_operand (op, mode))
+ num_memory++;
- scratch0 = gen_reg_rtx (tmode0);
- scratch1 = gen_reg_rtx (tmode1);
+ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
+ {
+ if (optimize || !match || num_memory > 1)
+ op = copy_to_mode_reg (mode, op);
+ }
+ else
+ {
+ op = copy_to_reg (op);
+ op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
+ }
+ }
- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
+ args[i].op = op;
+ args[i].mode = mode;
+ }
+
+ switch (nargs)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (real_target, args[0].op);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+ args[2].op);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+ args[2].op, args[3].op);
+ break;
+ default:
+ gcc_unreachable ();
}
if (! pat)
return 0;
emit_insn (pat);
-
- if (d->flag)
- {
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- emit_insn
- (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (EQ, QImode,
- gen_rtx_REG ((enum machine_mode) d->flag,
- FLAGS_REG),
- const0_rtx)));
- return SUBREG_REG (target);
- }
- else
- return target;
+ return target;
}
-
-/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
+/* Subroutine of ix86_expand_builtin to take care of special insns
+ with variable number of operands. */
static rtx
-ix86_expand_sse_pcmpistr (const struct builtin_description *d,
- tree exp, rtx target)
+ix86_expand_special_args_builtin (const struct builtin_description *d,
+ tree exp, rtx target)
{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- tree arg2 = CALL_EXPR_ARG (exp, 2);
- rtx scratch0, scratch1;
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2 = expand_normal (arg2);
- enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
-
- tmode0 = insn_data[d->icode].operand[0].mode;
- tmode1 = insn_data[d->icode].operand[1].mode;
- modev2 = insn_data[d->icode].operand[2].mode;
- modev3 = insn_data[d->icode].operand[3].mode;
- modeimm = insn_data[d->icode].operand[4].mode;
+ tree arg;
+ rtx pat, op;
+ unsigned int i, nargs, arg_adjust, memory;
+ struct
+ {
+ rtx op;
+ enum machine_mode mode;
+ } args[2];
+ enum insn_code icode = d->icode;
+ bool last_arg_constant = false;
+ const struct insn_data *insn_p = &insn_data[icode];
+ enum machine_mode tmode = insn_p->operand[0].mode;
+ enum { load, store } class;
- if (VECTOR_MODE_P (modev2))
- op0 = safe_vector_operand (op0, modev2);
- if (VECTOR_MODE_P (modev3))
- op1 = safe_vector_operand (op1, modev3);
+ switch ((enum ix86_special_builtin_type) d->flag)
+ {
+ case VOID_FTYPE_VOID:
+ emit_insn (GEN_FCN (icode) (target));
+ return 0;
+ case V2DI_FTYPE_PV2DI:
+ case V16QI_FTYPE_PCCHAR:
+ case V4SF_FTYPE_PCFLOAT:
+ case V2DF_FTYPE_PCDOUBLE:
+ nargs = 1;
+ class = load;
+ memory = 0;
+ break;
+ case VOID_FTYPE_PV2SF_V4SF:
+ case VOID_FTYPE_PV2DI_V2DI:
+ case VOID_FTYPE_PCHAR_V16QI:
+ case VOID_FTYPE_PFLOAT_V4SF:
+ case VOID_FTYPE_PDOUBLE_V2DF:
+ case VOID_FTYPE_PDI_DI:
+ case VOID_FTYPE_PINT_INT:
+ nargs = 1;
+ class = store;
+ /* Reserve memory operand for target. */
+ memory = ARRAY_SIZE (args);
+ break;
+ case V4SF_FTYPE_V4SF_PCV2SF:
+ case V2DF_FTYPE_V2DF_PCDOUBLE:
+ nargs = 2;
+ class = load;
+ memory = 1;
+ break;
+ default:
+ gcc_unreachable ();
+ }
- if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
- op0 = copy_to_mode_reg (modev2, op0);
- if ((optimize && !register_operand (op1, modev3))
- || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
- op1 = copy_to_mode_reg (modev3, op1);
+ gcc_assert (nargs <= ARRAY_SIZE (args));
- if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
+ if (class == store)
{
- error ("the third argument must be a 8-bit immediate");
- return const0_rtx;
+ arg = CALL_EXPR_ARG (exp, 0);
+ op = expand_normal (arg);
+ gcc_assert (target == 0);
+ target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+ arg_adjust = 1;
+ }
+ else
+ {
+ arg_adjust = 0;
+ if (optimize
+ || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_p->operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
}
- if (d->code == IX86_BUILTIN_PCMPISTRI128)
+ for (i = 0; i < nargs; i++)
{
- if (optimize || !target
- || GET_MODE (target) != tmode0
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
- target = gen_reg_rtx (tmode0);
+ enum machine_mode mode = insn_p->operand[i + 1].mode;
+ bool match;
- scratch1 = gen_reg_rtx (tmode1);
+ arg = CALL_EXPR_ARG (exp, i + arg_adjust);
+ op = expand_normal (arg);
+ match = (*insn_p->operand[i + 1].predicate) (op, mode);
- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
- }
- else if (d->code == IX86_BUILTIN_PCMPISTRM128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode1
- || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
- target = gen_reg_rtx (tmode1);
+ if (last_arg_constant && (i + 1) == nargs)
+ {
+ if (!match)
+ switch (icode)
+ {
+ default:
+ error ("the last argument must be an 8-bit immediate");
+ return const0_rtx;
+ }
+ }
+ else
+ {
+ if (i == memory)
+ {
+ /* This must be the memory operand. */
+ op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
+ gcc_assert (GET_MODE (op) == mode
+ || GET_MODE (op) == VOIDmode);
+ }
+ else
+ {
+ /* This must be register. */
+ if (VECTOR_MODE_P (mode))
+ op = safe_vector_operand (op, mode);
- scratch0 = gen_reg_rtx (tmode0);
+ gcc_assert (GET_MODE (op) == mode
+ || GET_MODE (op) == VOIDmode);
+ op = copy_to_mode_reg (mode, op);
+ }
+ }
- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
+ args[i].op = op;
+ args[i].mode = mode;
}
- else
- {
- gcc_assert (d->flag);
-
- scratch0 = gen_reg_rtx (tmode0);
- scratch1 = gen_reg_rtx (tmode1);
- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
+ switch (nargs)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (target, args[0].op);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+ break;
+ default:
+ gcc_unreachable ();
}
if (! pat)
return 0;
-
emit_insn (pat);
-
- if (d->flag)
- {
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- emit_insn
- (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (EQ, QImode,
- gen_rtx_REG ((enum machine_mode) d->flag,
- FLAGS_REG),
- const0_rtx)));
- return SUBREG_REG (target);
- }
- else
- return target;
+ return class == store ? 0 : target;
}
/* Return the integer constant in ARG. Constrain it to be in the range
op1 = copy_to_mode_reg (SImode, op1);
if (!REG_P (op2))
op2 = copy_to_mode_reg (SImode, op2);
- if (!TARGET_64BIT)
- emit_insn (gen_sse3_monitor (op0, op1, op2));
- else
- emit_insn (gen_sse3_monitor64 (op0, op1, op2));
+ emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
return 0;
case IX86_BUILTIN_MWAIT:
if (d->code == fcode)
return ix86_expand_sse_comi (d, exp, target);
- for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
- if (d->code == fcode)
- return ix86_expand_sse_ptest (d, exp, target);
-
for (i = 0, d = bdesc_pcmpestr;
i < ARRAY_SIZE (bdesc_pcmpestr);
i++, d++)
rtx result;
gcc_assert (reload_completed);
- if (TARGET_RED_ZONE)
+ if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
{
result = gen_rtx_MEM (mode,
gen_rtx_PLUS (Pmode,
GEN_INT (-RED_ZONE_SIZE)));
emit_move_insn (result, operand);
}
- else if (!TARGET_RED_ZONE && TARGET_64BIT)
+ else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
{
switch (mode)
{
void
ix86_free_from_memory (enum machine_mode mode)
{
- if (!TARGET_RED_ZONE)
+ if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
{
int size;
return regclass;
}
+static enum reg_class
+ix86_secondary_reload (bool in_p, rtx x, enum reg_class class,
+ enum machine_mode mode,
+ secondary_reload_info *sri ATTRIBUTE_UNUSED)
+{
+ /* QImode spills from non-QI registers require
+ intermediate register on 32bit targets. */
+ if (!in_p && mode == QImode && !TARGET_64BIT
+ && (class == GENERAL_REGS
+ || class == LEGACY_REGS
+ || class == INDEX_REGS))
+ {
+ int regno;
+
+ if (REG_P (x))
+ regno = REGNO (x);
+ else
+ regno = -1;
+
+ if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
+ regno = true_regnum (x);
+
+ /* Return Q_REGS if the operand is in memory. */
+ if (regno == -1)
+ return Q_REGS;
+ }
+
+ return NO_REGS;
+}
+
/* If we are copying between general and FP registers, we need a memory
location. The same is true for SSE and MMX registers.
symb = (*targetm.strip_name_encoding) (symb);
length = strlen (stub);
- binder_name = alloca (length + 32);
+ binder_name = XALLOCAVEC (char, length + 32);
GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
length = strlen (symb);
- symbol_name = alloca (length + 32);
+ symbol_name = XALLOCAVEC (char, length + 32);
GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
sprintf (lazy_ptr_name, "L%d$lz", label);
{
const int *parm_regs;
- if (TARGET_64BIT_MS_ABI)
+ if (ix86_function_type_abi (type) == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
/* Put the this parameter into %eax. */
xops[0] = this_param;
xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
- if (TARGET_64BIT)
- output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
}
else
this_reg = NULL_RTX;
xops[0] = gen_rtx_MEM (Pmode, this_reg);
xops[1] = tmp;
- if (TARGET_64BIT)
- output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
/* Adjust the this parameter. */
xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
}
xops[1] = this_reg;
- if (TARGET_64BIT)
- output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
+ output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
}
/* If necessary, drop THIS back to its stack slot. */
{
xops[0] = this_reg;
xops[1] = this_param;
- if (TARGET_64BIT)
- output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
}
xops[0] = XEXP (DECL_RTL (function), 0);
output_asm_insn ("jmp\t%P0", xops);
/* All thunks should be in the same object as their target,
and thus binds_local_p should be true. */
- else if (TARGET_64BIT_MS_ABI)
+ else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
gcc_unreachable ();
else
{
if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
return computed;
- mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
- ? get_inner_array_type (type) : type);
+ mode = TYPE_MODE (strip_array_types (type));
if (mode == DFmode || mode == DCmode
|| GET_MODE_CLASS (mode) == MODE_INT
|| GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
#endif
- if (!TARGET_64BIT_MS_ABI && flag_pic)
+ if (DEFAULT_ABI == SYSV_ABI && flag_pic)
fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
else
fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
enum machine_mode vsimode;
rtx new_target;
rtx x, tmp;
+ bool use_vector_set = false;
+
+ switch (mode)
+ {
+ case V2DImode:
+ use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
+ break;
+ case V16QImode:
+ case V4SImode:
+ case V4SFmode:
+ use_vector_set = TARGET_SSE4_1;
+ break;
+ case V8HImode:
+ use_vector_set = TARGET_SSE2;
+ break;
+ case V4HImode:
+ use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
+ break;
+ default:
+ break;
+ }
+
+ if (use_vector_set)
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
+ var = force_reg (GET_MODE_INNER (mode), var);
+ ix86_expand_vector_set (mmx_ok, target, var, one_var);
+ return true;
+ }
switch (mode)
{
break;
case V16QImode:
+ if (TARGET_SSE4_1)
+ break;
wmode = V8HImode;
goto widen;
case V8QImode:
return true;
}
+/* A subroutine of ix86_expand_vector_init_general. Use vector
+ concatenate to handle the most general case: all values variable,
+ and none identical. */
+
+static void
+ix86_expand_vector_init_concat (enum machine_mode mode,
+ rtx target, rtx *ops, int n)
+{
+ enum machine_mode cmode, hmode = VOIDmode;
+ rtx first[4], second[2];
+ rtvec v;
+ int i, j;
+
+ switch (n)
+ {
+ case 2:
+ switch (mode)
+ {
+ case V4SImode:
+ cmode = V2SImode;
+ break;
+ case V4SFmode:
+ cmode = V2SFmode;
+ break;
+ case V2DImode:
+ cmode = DImode;
+ break;
+ case V2SImode:
+ cmode = SImode;
+ break;
+ case V2DFmode:
+ cmode = DFmode;
+ break;
+ case V2SFmode:
+ cmode = SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (!register_operand (ops[1], cmode))
+ ops[1] = force_reg (cmode, ops[1]);
+ if (!register_operand (ops[0], cmode))
+ ops[0] = force_reg (cmode, ops[0]);
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, ops[0],
+ ops[1])));
+ break;
+
+ case 4:
+ switch (mode)
+ {
+ case V4SImode:
+ cmode = V2SImode;
+ break;
+ case V4SFmode:
+ cmode = V2SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ goto half;
+
+half:
+ /* FIXME: We process inputs backward to help RA. PR 36222. */
+ i = n - 1;
+ j = (n >> 1) - 1;
+ for (; i > 0; i -= 2, j--)
+ {
+ first[j] = gen_reg_rtx (cmode);
+ v = gen_rtvec (2, ops[i - 1], ops[i]);
+ ix86_expand_vector_init (false, first[j],
+ gen_rtx_PARALLEL (cmode, v));
+ }
+
+ n >>= 1;
+ if (n > 2)
+ {
+ gcc_assert (hmode != VOIDmode);
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ second[j] = gen_reg_rtx (hmode);
+ ix86_expand_vector_init_concat (hmode, second [j],
+ &first [i], 2);
+ }
+ n >>= 1;
+ ix86_expand_vector_init_concat (mode, target, second, n);
+ }
+ else
+ ix86_expand_vector_init_concat (mode, target, first, n);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+/* A subroutine of ix86_expand_vector_init_general. Use vector
+ interleave to handle the most general case: all values variable,
+ and none identical. */
+
+static void
+ix86_expand_vector_init_interleave (enum machine_mode mode,
+ rtx target, rtx *ops, int n)
+{
+ enum machine_mode first_imode, second_imode, third_imode;
+ int i, j;
+ rtx op0, op1;
+ rtx (*gen_load_even) (rtx, rtx, rtx);
+ rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
+ rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
+
+ switch (mode)
+ {
+ case V8HImode:
+ gen_load_even = gen_vec_setv8hi;
+ gen_interleave_first_low = gen_vec_interleave_lowv4si;
+ gen_interleave_second_low = gen_vec_interleave_lowv2di;
+ first_imode = V4SImode;
+ second_imode = V2DImode;
+ third_imode = VOIDmode;
+ break;
+ case V16QImode:
+ gen_load_even = gen_vec_setv16qi;
+ gen_interleave_first_low = gen_vec_interleave_lowv8hi;
+ gen_interleave_second_low = gen_vec_interleave_lowv4si;
+ first_imode = V8HImode;
+ second_imode = V4SImode;
+ third_imode = V2DImode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ for (i = 0; i < n; i++)
+ {
+ /* Extend the odd elment to SImode using a paradoxical SUBREG. */
+ op0 = gen_reg_rtx (SImode);
+ emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
+
+ /* Insert the SImode value as low element of V4SImode vector. */
+ op1 = gen_reg_rtx (V4SImode);
+ op0 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode,
+ op0),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
+
+ /* Cast the V4SImode vector back to a vector in orignal mode. */
+ op0 = gen_reg_rtx (mode);
+ emit_move_insn (op0, gen_lowpart (mode, op1));
+
+ /* Load even elements into the second positon. */
+ emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
+ const1_rtx));
+
+ /* Cast vector to FIRST_IMODE vector. */
+ ops[i] = gen_reg_rtx (first_imode);
+ emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
+ }
+
+ /* Interleave low FIRST_IMODE vectors. */
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ op0 = gen_reg_rtx (first_imode);
+ emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
+
+ /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
+ ops[j] = gen_reg_rtx (second_imode);
+ emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
+ }
+
+ /* Interleave low SECOND_IMODE vectors. */
+ switch (second_imode)
+ {
+ case V4SImode:
+ for (i = j = 0; i < n / 2; i += 2, j++)
+ {
+ op0 = gen_reg_rtx (second_imode);
+ emit_insn ((*gen_interleave_second_low) (op0, ops[i],
+ ops[i + 1]));
+
+ /* Cast the SECOND_IMODE vector to the THIRD_IMODE
+ vector. */
+ ops[j] = gen_reg_rtx (third_imode);
+ emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
+ }
+ second_imode = V2DImode;
+ gen_interleave_second_low = gen_vec_interleave_lowv2di;
+ /* FALLTHRU */
+
+ case V2DImode:
+ op0 = gen_reg_rtx (second_imode);
+ emit_insn ((*gen_interleave_second_low) (op0, ops[0],
+ ops[1]));
+
+ /* Cast the SECOND_IMODE vector back to a vector on original
+ mode. */
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_lowpart (mode, op0)));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* A subroutine of ix86_expand_vector_init. Handle the most general case:
all values variable, and none identical. */
ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
rtx target, rtx vals)
{
- enum machine_mode half_mode = GET_MODE_INNER (mode);
- rtx op0 = NULL, op1 = NULL;
- bool use_vec_concat = false;
+ rtx ops[16];
+ int n, i;
switch (mode)
{
break;
/* FALLTHRU */
- case V2DFmode:
- case V2DImode:
- /* For the two element vectors, we always implement VEC_CONCAT. */
- op0 = XVECEXP (vals, 0, 0);
- op1 = XVECEXP (vals, 0, 1);
- use_vec_concat = true;
- break;
-
case V4SFmode:
- half_mode = V2SFmode;
- goto half;
case V4SImode:
- half_mode = V2SImode;
- goto half;
- half:
- {
- rtvec v;
-
- /* For V4SF and V4SI, we implement a concat of two V2 vectors.
- Recurse to load the two halves. */
+ case V2DFmode:
+ case V2DImode:
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ ix86_expand_vector_init_concat (mode, target, ops, n);
+ return;
- op0 = gen_reg_rtx (half_mode);
- v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
- ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
+ case V16QImode:
+ if (!TARGET_SSE4_1)
+ break;
+ /* FALLTHRU */
- op1 = gen_reg_rtx (half_mode);
- v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
- ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
+ case V8HImode:
+ if (!TARGET_SSE2)
+ break;
- use_vec_concat = true;
- }
- break;
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
+ return;
- case V8HImode:
- case V16QImode:
case V4HImode:
case V8QImode:
break;
gcc_unreachable ();
}
- if (use_vec_concat)
- {
- if (!register_operand (op0, half_mode))
- op0 = force_reg (half_mode, op0);
- if (!register_operand (op1, half_mode))
- op1 = force_reg (half_mode, op1);
-
- emit_insn (gen_rtx_SET (VOIDmode, target,
- gen_rtx_VEC_CONCAT (mode, op0, op1)));
- }
- else
{
int i, j, n_elts, n_words, n_elt_per_word;
enum machine_mode inner_mode;
else if (n_words == 2)
{
rtx tmp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
+ emit_clobber (tmp);
emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
emit_move_insn (target, tmp);
else if (n_words == 4)
{
rtx tmp = gen_reg_rtx (V4SImode);
+ gcc_assert (word_mode == SImode);
vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
emit_move_insn (target, gen_lowpart (mode, tmp));
NUM is the number of operands.
USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
NUM_MEMORY is the maximum number of memory operands to accept. */
+
bool
-ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
+ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
+ bool uses_oc0, int num_memory)
{
int mem_mask;
int mem_count;
}
}
+ /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
+ a memory operation. */
+ if (num_memory < 0)
+ {
+ num_memory = -num_memory;
+ if ((mem_mask & (1 << (num-1))) != 0)
+ {
+ mem_mask &= ~(1 << (num-1));
+ mem_count--;
+ }
+ }
+
/* If there were no memory operations, allow the insn */
if (mem_mask == 0)
return true;
}
/* Initialize the GCC target structure. */
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
+
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
#undef TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE ix86_function_value
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
+
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost