rtx ix86_compare_emitted = NULL_RTX;
/* Size of the register save area. */
-#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
+#define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
/* Define the structure for the machine field in struct function. */
extern int ix86_force_align_arg_pointer;
static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
+static rtx (*ix86_gen_leave) (void);
+static rtx (*ix86_gen_pop1) (rtx);
+static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
+static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
+static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
+static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
+static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
+
/* Preferred alignment for stack boundary in bits. */
unsigned int ix86_preferred_stack_boundary;
}
else
{
- /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
+ /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
use of rip-relative addressing. This eliminates fixups that
would otherwise be needed if this object is to be placed in a
DLL, and is essentially just as efficient as direct addressing. */
- if (TARGET_64BIT_MS_ABI)
+ if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
else if (TARGET_64BIT)
ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
/* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
- can be optimized to ap = __builtin_next_arg (0). */
- if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
+ can be optimized to ap = __builtin_next_arg (0).
+ For abi switching it should be corrected. */
+ if (!TARGET_64BIT || DEFAULT_ABI == MS_ABI)
targetm.expand_builtin_va_start = NULL;
+
+ if (TARGET_64BIT)
+ {
+ ix86_gen_leave = gen_leave_rex64;
+ ix86_gen_pop1 = gen_popdi1;
+ ix86_gen_add3 = gen_adddi3;
+ ix86_gen_sub3 = gen_subdi3;
+ ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
+ ix86_gen_one_cmpl2 = gen_one_cmpldi2;
+ ix86_gen_monitor = gen_sse3_monitor64;
+ }
+ else
+ {
+ ix86_gen_leave = gen_leave;
+ ix86_gen_pop1 = gen_popsi1;
+ ix86_gen_add3 = gen_addsi3;
+ ix86_gen_sub3 = gen_subsi3;
+ ix86_gen_sub3_carry = gen_subsi3_carry;
+ ix86_gen_one_cmpl2 = gen_one_cmplsi2;
+ ix86_gen_monitor = gen_sse3_monitor;
+ }
+
+#ifdef USE_IX86_CLD
+ /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
+ if (!TARGET_64BIT)
+ target_flags |= MASK_CLD & ~target_flags_explicit;
+#endif
}
\f
/* Return true if this goes in large data/bss. */
if (TARGET_64BIT)
{
/* Do not warn when emulating the MS ABI. */
- if (!TARGET_64BIT_MS_ABI)
+ if (TREE_CODE (*node) != FUNCTION_TYPE || !ix86_function_type_abi (*node))
warning (OPT_Wattributes, "%qs attribute ignored",
IDENTIFIER_POINTER (name));
*no_add_attrs = true;
static bool error_issued;
if (TARGET_64BIT)
- return regparm;
+ {
+ if (ix86_function_type_abi (type) == DEFAULT_ABI)
+ return regparm;
+ return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
+ }
attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
if (attr)
return true;
}
+ /* TODO: The function should depend on current function ABI but
+ builtins.c would need updating then. Therefore we use the
+ default ABI. */
+
/* RAX is used as hidden argument to va_arg functions. */
- if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
+ if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
return true;
- if (TARGET_64BIT_MS_ABI)
+ if (DEFAULT_ABI == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
- for (i = 0; i < REGPARM_MAX; i++)
+ for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
+ : X86_64_REGPARM_MAX); i++)
if (regno == parm_regs[i])
return true;
return false;
&& type && TREE_CODE (type) != VECTOR_TYPE);
}
+/* It returns the size, in bytes, of the area reserved for arguments passed
+ in registers for the function represented by fndecl dependent to the used
+ abi format. */
+int
+ix86_reg_parm_stack_space (const_tree fndecl)
+{
+ int call_abi = 0;
+ /* For libcalls it is possible that there is no fndecl at hand.
+ Therefore assume for this case the default abi of the target. */
+ if (!fndecl)
+ call_abi = DEFAULT_ABI;
+ else
+ call_abi = ix86_function_abi (fndecl);
+ if (call_abi == 1)
+ return 32;
+ return 0;
+}
+
+/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
+ call abi used. */
+int
+ix86_function_type_abi (const_tree fntype)
+{
+ if (TARGET_64BIT && fntype != NULL)
+ {
+ int abi;
+ if (DEFAULT_ABI == SYSV_ABI)
+ abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
+ else
+ abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
+
+ if (DEFAULT_ABI == MS_ABI && abi == SYSV_ABI)
+ sorry ("using sysv calling convention on target w64 is not supported");
+
+ return abi;
+ }
+ return DEFAULT_ABI;
+}
+
+int
+ix86_function_abi (const_tree fndecl)
+{
+ if (! fndecl)
+ return DEFAULT_ABI;
+ return ix86_function_type_abi (TREE_TYPE (fndecl));
+}
+
+/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
+ call abi used. */
+int
+ix86_cfun_abi (void)
+{
+ if (! cfun || ! TARGET_64BIT)
+ return DEFAULT_ABI;
+ return cfun->machine->call_abi;
+}
+
+/* regclass.c */
+extern void init_regs (void);
+
+/* Implementation of call abi switching target hook. Specific to FNDECL
+ the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
+ for more details.
+ To prevent redudant calls of costy function init_regs (), it checks not to
+ reset register usage for default abi. */
+void
+ix86_call_abi_override (const_tree fndecl)
+{
+ if (fndecl == NULL_TREE)
+ cfun->machine->call_abi = DEFAULT_ABI;
+ else
+ cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
+ if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
+ {
+ if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
+ {
+ call_used_regs[4 /*RSI*/] = 0;
+ call_used_regs[5 /*RDI*/] = 0;
+ init_regs ();
+ }
+ }
+ else if (TARGET_64BIT)
+ {
+ if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
+ {
+ call_used_regs[4 /*RSI*/] = 1;
+ call_used_regs[5 /*RDI*/] = 1;
+ init_regs ();
+ }
+ }
+}
+
/* Initialize a variable CUM of type CUMULATIVE_ARGS
for a call to a function whose data type is FNTYPE.
For a library call, FNTYPE is 0. */
struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
memset (cum, 0, sizeof (*cum));
+ cum->call_abi = ix86_function_type_abi (fntype);
/* Set up the number of registers to use for passing arguments. */
cum->nregs = ix86_regparm;
+ if (TARGET_64BIT)
+ {
+ if (cum->call_abi != DEFAULT_ABI)
+ cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
+ : X64_REGPARM_MAX;
+ }
if (TARGET_SSE)
- cum->sse_nregs = SSE_REGPARM_MAX;
+ {
+ cum->sse_nregs = SSE_REGPARM_MAX;
+ if (TARGET_64BIT)
+ {
+ if (cum->call_abi != DEFAULT_ABI)
+ cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+ : X64_SSE_REGPARM_MAX;
+ }
+ }
if (TARGET_MMX)
cum->mmx_nregs = MMX_REGPARM_MAX;
cum->warn_sse = true;
if (type)
mode = type_natural_mode (type);
- if (TARGET_64BIT_MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
function_arg_advance_ms_64 (cum, bytes, words);
else if (TARGET_64BIT)
function_arg_advance_64 (cum, mode, type, words);
if (mode == VOIDmode)
return GEN_INT (cum->maybe_vaarg
? (cum->sse_nregs < 0
- ? SSE_REGPARM_MAX
- : cum->sse_regno)
+ ? (cum->call_abi == DEFAULT_ABI
+ ? SSE_REGPARM_MAX
+ : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+ : X64_SSE_REGPARM_MAX))
+ : cum->sse_regno)
: -1);
return construct_container (mode, orig_mode, type, 0, cum->nregs,
if (type && TREE_CODE (type) == VECTOR_TYPE)
mode = type_natural_mode (type);
- if (TARGET_64BIT_MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
return function_arg_ms_64 (cum, mode, omode, named, bytes);
else if (TARGET_64BIT)
return function_arg_64 (cum, mode, omode, type);
const_tree type, bool named ATTRIBUTE_UNUSED)
{
/* See Windows x64 Software Convention. */
- if (TARGET_64BIT_MS_ABI)
+ if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
{
int msize = (int) GET_MODE_SIZE (mode);
if (type)
{
int align;
if (type)
- align = TYPE_ALIGN (type);
+ {
+ /* Since canonical type is used for call, we convert it to
+ canonical type if needed. */
+ if (!TYPE_STRUCTURAL_EQUALITY_P (type))
+ type = TYPE_CANONICAL (type);
+ align = TYPE_ALIGN (type);
+ }
else
align = GET_MODE_ALIGNMENT (mode);
if (align < PARM_BOUNDARY)
return true;
case FIRST_FLOAT_REG:
- if (TARGET_64BIT_MS_ABI)
+ /* TODO: The function should depend on current function ABI but
+ builtins.c would need updating then. Therefore we use the
+ default ABI. */
+ if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
return false;
return TARGET_FLOAT_RETURNS_IN_80387;
}
ret = construct_container (mode, orig_mode, valtype, 1,
- REGPARM_MAX, SSE_REGPARM_MAX,
+ X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
x86_64_int_return_registers, 0);
/* For zero sized structures, construct_container returns NULL, but we
fn = fntype_or_decl;
fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
- if (TARGET_64BIT_MS_ABI)
+ if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
return function_value_ms_64 (orig_mode, mode);
else if (TARGET_64BIT)
return function_value_64 (orig_mode, mode, valtype);
/* Return true iff type is returned in memory. */
-static int
+static int ATTRIBUTE_UNUSED
return_in_memory_32 (const_tree type, enum machine_mode mode)
{
HOST_WIDE_INT size;
return 0;
}
-static int
+static int ATTRIBUTE_UNUSED
return_in_memory_64 (const_tree type, enum machine_mode mode)
{
int needed_intregs, needed_sseregs;
return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
}
-static int
+static int ATTRIBUTE_UNUSED
return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
{
HOST_WIDE_INT size = int_size_in_bytes (type);
return (size != 1 && size != 2 && size != 4 && size != 8);
}
-bool
+static bool
ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
- const enum machine_mode mode = type_natural_mode (type);
-
+#ifdef SUBTARGET_RETURN_IN_MEMORY
+ return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
+#else
+ const enum machine_mode mode = type_natural_mode (type);
+
if (TARGET_64BIT_MS_ABI)
- return return_in_memory_ms_64 (type, mode);
- else if (TARGET_64BIT)
- return return_in_memory_64 (type, mode);
- else
- return return_in_memory_32 (type, mode);
+ return return_in_memory_ms_64 (type, mode);
+ else if (TARGET_64BIT)
+ return return_in_memory_64 (type, mode);
+ else
+ return return_in_memory_32 (type, mode);
+#endif
}
/* Return false iff TYPE is returned in memory. This version is used
return size > 12;
}
-bool
-ix86_i386elf_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
-{
- return (TYPE_MODE (type) == BLKmode
- || (VECTOR_MODE_P (TYPE_MODE (type)) && int_size_in_bytes (type) == 8));
-}
-
-bool
-ix86_i386interix_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
-{
- return (TYPE_MODE (type) == BLKmode
- || (AGGREGATE_TYPE_P (type) && int_size_in_bytes(type) > 8 ));
-}
-
/* When returning SSE vector types, we have a choice of either
(1) being abi incompatible with a -march switch, or
(2) generating an error.
tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
/* For i386 we use plain pointer to argument area. */
- if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
+ if (!TARGET_64BIT || ix86_cfun_abi () == MS_ABI)
return build_pointer_type (char_type_node);
record = (*lang_hooks.types.make_type) (RECORD_TYPE);
rtx nsse_reg;
alias_set_type set;
int i;
+ int regparm = ix86_regparm;
+
+ if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
+ regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
return;
set = get_varargs_alias_set ();
for (i = cum->regno;
- i < ix86_regparm
+ i < regparm
&& i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
i++)
{
label_ref = gen_rtx_LABEL_REF (Pmode, label);
/* Compute address to jump to :
- label - 5*eax + nnamed_sse_arguments*5 */
+ label - eax*4 + nnamed_sse_arguments*4 */
tmp_reg = gen_reg_rtx (Pmode);
nsse_reg = gen_reg_rtx (Pmode);
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
tmp_reg = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
plus_constant (save_area,
- 8 * REGPARM_MAX + 127)));
+ 8 * X86_64_REGPARM_MAX + 127)));
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
alias_set_type set = get_varargs_alias_set ();
int i;
- for (i = cum->regno; i < REGPARM_MAX; i++)
+ for (i = cum->regno; i < X64_REGPARM_MAX; i++)
{
rtx reg, mem;
if (stdarg_p (fntype))
function_arg_advance (&next_cum, mode, type, 1);
- if (TARGET_64BIT_MS_ABI)
+ if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
setup_incoming_varargs_ms_64 (&next_cum);
else
setup_incoming_varargs_64 (&next_cum);
tree type;
/* Only 64bit target needs something special. */
- if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
+ if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
{
std_expand_builtin_va_start (valist, nextarg);
return;
{
type = TREE_TYPE (fpr);
t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
- build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
+ build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
enum machine_mode nat_mode;
/* Only 64bit target needs something special. */
- if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
+ if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
nat_mode = type_natural_mode (type);
container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
- REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
+ X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
+ intreg, 0);
/* Pull the value out of the saved registers. */
if (needed_intregs)
{
t = build_int_cst (TREE_TYPE (gpr),
- (REGPARM_MAX - needed_intregs + 1) * 8);
+ (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
t = build2 (GE_EXPR, boolean_type_node, gpr, t);
t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
if (needed_sseregs)
{
t = build_int_cst (TREE_TYPE (fpr),
- (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
- + REGPARM_MAX * 8);
+ (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
+ + X86_64_REGPARM_MAX * 8);
t = build2 (GE_EXPR, boolean_type_node, fpr, t);
t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
xops[0] = gen_rtx_REG (Pmode, regno);
xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
- if (TARGET_64BIT)
- output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
- else
- output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
+ output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
output_asm_insn ("ret", xops);
}
xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
if (!flag_pic)
- {
- if (TARGET_64BIT)
- output_asm_insn ("mov{q}\t{%2, %0|%0, %2}", xops);
- else
- output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
- }
+ output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
else
output_asm_insn ("call\t%a2", xops);
CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
if (flag_pic)
- {
- if (TARGET_64BIT)
- output_asm_insn ("pop{q}\t%0", xops);
- else
- output_asm_insn ("pop{l}\t%0", xops);
- }
+ output_asm_insn ("pop%z0\t%0", xops);
}
else
{
return "";
if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
- {
- if (TARGET_64BIT)
- output_asm_insn ("add{q}\t{%1, %0|%0, %1}", xops);
- else
- output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
- }
+ output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
else
- {
- if (TARGET_64BIT)
- output_asm_insn ("add{q}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
- else
- output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
- }
+ output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
return "";
}
bool eax_live;
rtx t;
- gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
+ gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
- if (TARGET_64BIT_MS_ABI)
+ if (cfun->machine->call_abi == MS_ABI)
eax_live = false;
else
eax_live = ix86_eax_live_at_start_p ();
emit_insn (gen_prologue_use (pic_offset_table_rtx));
emit_insn (gen_blockage ());
}
+
+ /* Emit cld instruction if stringops are used in the function. */
+ if (TARGET_CLD && ix86_current_function_needs_cld)
+ emit_insn (gen_cld ());
}
/* Emit code to restore saved registers using MOV insns. First register
/* If not an i386, mov & pop is faster than "leave". */
else if (TARGET_USE_LEAVE || optimize_size
|| !cfun->machine->use_fast_prologue_epilogue)
- emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
+ emit_insn ((*ix86_gen_leave) ());
else
{
pro_epilogue_adjust_stack (stack_pointer_rtx,
hard_frame_pointer_rtx,
const0_rtx, style);
- if (TARGET_64BIT)
- emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
- else
- emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
+
+ emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
}
}
else
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
if (ix86_save_reg (regno, false))
- {
- if (TARGET_64BIT)
- emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
- else
- emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
- }
+ emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
if (frame_pointer_needed)
{
/* Leave results in shorter dependency chains on CPUs that are
able to grok it fast. */
if (TARGET_USE_LEAVE)
- emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
- else if (TARGET_64BIT)
- emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
+ emit_insn ((*ix86_gen_leave) ());
else
- emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
+ emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
}
}
#endif
assemble_name (file, name);
}
- if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
+ if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
&& code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
fputs ("@PLT", file);
break;
case 8:
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
{
+ if (MEM_P (x))
+ {
#ifdef GAS_MNEMONICS
- putc ('q', file);
+ putc ('q', file);
#else
- putc ('l', file);
- putc ('l', file);
+ putc ('l', file);
+ putc ('l', file);
#endif
+ }
+ else
+ putc ('q', file);
}
else
putc ('l', file);
&& standard_sse_constant_p (op1) <= 0)
op1 = validize_mem (force_const_mem (mode, op1));
- /* TDmode values are passed as TImode on the stack. TImode values
- are moved via xmm registers, and moving them to stack can result in
- unaligned memory access. Use ix86_expand_vector_move_misalign()
- if memory operand is not aligned correctly. */
+ /* We need to check memory alignment for SSE mode since attribute
+ can make operands unaligned. */
if (can_create_pseudo_p ()
- && (mode == TImode) && !TARGET_64BIT
+ && SSE_REG_MODE_P (mode)
&& ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
|| (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
{
writing to the top half twice. */
if (TARGET_SSE_SPLIT_REGS)
{
- emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+ emit_clobber (op0);
zero = op0;
}
else
if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
emit_move_insn (op0, CONST0_RTX (mode));
else
- emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+ emit_clobber (op0);
if (mode != V4SFmode)
op0 = gen_lowpart (V4SFmode, op0);
emit_insn (gen_movdi_to_sse (int_xmm, input));
else if (TARGET_SSE_SPLIT_REGS)
{
- emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
+ emit_clobber (int_xmm);
emit_move_insn (gen_lowpart (DImode, int_xmm), input);
}
else
QImode, 1, end_0_label);
/* Increment the address. */
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const1_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
/* Not needed with an alignment of 2 */
if (align != 2)
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
end_0_label);
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const1_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
emit_label (align_3_label);
}
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
end_0_label);
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const1_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
}
/* Generate loop to check 4 bytes at a time. It is not a good idea to
mem = change_address (src, SImode, out);
emit_move_insn (scratch, mem);
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
- else
- emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
+ emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
/* This formula yields a nonzero result iff one of the bytes is zero.
This saves three branches inside loop and many cycles. */
/* Not in the first two. Move two bytes forward. */
emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
- if (TARGET_64BIT)
- emit_insn (gen_adddi3 (out, out, const2_rtx));
- else
- emit_insn (gen_addsi3 (out, out, const2_rtx));
+ emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
emit_label (end_2_label);
tmpreg = gen_lowpart (QImode, tmpreg);
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
- if (TARGET_64BIT)
- emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
- else
- emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
+ emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
emit_label (end_0_label);
}
/* strlensi_unroll_1 returns the address of the zero at the end of
the string, like memchr(), so compute the length by subtracting
the start address. */
- if (TARGET_64BIT)
- emit_insn (gen_subdi3 (out, out, addr));
- else
- emit_insn (gen_subsi3 (out, out, addr));
+ emit_insn ((*ix86_gen_sub3) (out, out, addr));
}
else
{
unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
scratch4), UNSPEC_SCAS);
emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
- if (TARGET_64BIT)
- {
- emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
- emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
- }
- else
- {
- emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
- emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
- }
+ emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
+ emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
}
return 1;
}
f = GGC_CNEW (struct machine_function);
f->use_fast_prologue_epilogue_nregs = -1;
f->tls_descriptor_call_expanded_p = 0;
+ f->call_abi = DEFAULT_ABI;
return f;
}
return align;
}
-/* Compute the alignment for a local variable.
- TYPE is the data type, and ALIGN is the alignment that
- the object would ordinarily have. The value of this macro is used
- instead of that alignment to align the object. */
+/* Compute the alignment for a local variable or a stack slot. TYPE is
+ the data type, MODE is the widest mode available and ALIGN is the
+ alignment that the object would ordinarily have. The value of this
+ macro is used instead of that alignment to align the object. */
-int
-ix86_local_alignment (tree type, int align)
+unsigned int
+ix86_local_alignment (tree type, enum machine_mode mode,
+ unsigned int align)
{
+ /* If TYPE is NULL, we are allocating a stack slot for caller-save
+ register in MODE. We will return the largest alignment of XF
+ and DF. */
+ if (!type)
+ {
+ if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
+ align = GET_MODE_ALIGNMENT (DFmode);
+ return align;
+ }
+
/* x86-64 ABI requires arrays greater than 16 bytes to be aligned
to 16byte boundary. */
if (TARGET_64BIT)
op1 = copy_to_mode_reg (SImode, op1);
if (!REG_P (op2))
op2 = copy_to_mode_reg (SImode, op2);
- if (!TARGET_64BIT)
- emit_insn (gen_sse3_monitor (op0, op1, op2));
- else
- emit_insn (gen_sse3_monitor64 (op0, op1, op2));
+ emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
return 0;
case IX86_BUILTIN_MWAIT:
{
/* QImode spills from non-QI registers require
intermediate register on 32bit targets. */
- if (!in_p && mode == QImode && class == NON_Q_REGS
- && !TARGET_64BIT)
+ if (!in_p && mode == QImode && !TARGET_64BIT
+ && (class == GENERAL_REGS
+ || class == LEGACY_REGS
+ || class == INDEX_REGS))
{
int regno;
{
const int *parm_regs;
- if (TARGET_64BIT_MS_ABI)
+ if (ix86_function_type_abi (type) == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
/* Put the this parameter into %eax. */
xops[0] = this_param;
xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
- if (TARGET_64BIT)
- output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
}
else
this_reg = NULL_RTX;
xops[0] = gen_rtx_MEM (Pmode, this_reg);
xops[1] = tmp;
- if (TARGET_64BIT)
- output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
/* Adjust the this parameter. */
xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
}
xops[1] = this_reg;
- if (TARGET_64BIT)
- output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
+ output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
}
/* If necessary, drop THIS back to its stack slot. */
{
xops[0] = this_reg;
xops[1] = this_param;
- if (TARGET_64BIT)
- output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
- else
- output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
}
xops[0] = XEXP (DECL_RTL (function), 0);
output_asm_insn ("jmp\t%P0", xops);
/* All thunks should be in the same object as their target,
and thus binds_local_p should be true. */
- else if (TARGET_64BIT_MS_ABI)
+ else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
gcc_unreachable ();
else
{
if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
return computed;
- mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
- ? get_inner_array_type (type) : type);
+ mode = TYPE_MODE (strip_array_types (type));
if (mode == DFmode || mode == DCmode
|| GET_MODE_CLASS (mode) == MODE_INT
|| GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
#endif
- if (!TARGET_64BIT_MS_ABI && flag_pic)
+ if (DEFAULT_ABI == SYSV_ABI && flag_pic)
fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
else
fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
break;
case V4HImode:
use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
+ break;
default:
break;
}
break;
case V16QImode:
+ if (TARGET_SSE4_1)
+ break;
wmode = V8HImode;
goto widen;
case V8QImode:
return true;
}
-/* A subroutine of ix86_expand_vector_init. Handle the most general case:
- all values variable, and none identical. */
+/* A subroutine of ix86_expand_vector_init_general. Use vector
+ concatenate to handle the most general case: all values variable,
+ and none identical. */
static void
-ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
- rtx target, rtx vals)
+ix86_expand_vector_init_concat (enum machine_mode mode,
+ rtx target, rtx *ops, int n)
{
- enum machine_mode half_mode = GET_MODE_INNER (mode);
- rtx op0 = NULL, op1 = NULL;
- bool use_vec_concat = false;
+ enum machine_mode cmode, hmode = VOIDmode;
+ rtx first[4], second[2];
+ rtvec v;
+ int i, j;
- switch (mode)
+ switch (n)
{
- case V2SFmode:
- case V2SImode:
- if (!mmx_ok && !TARGET_SSE)
- break;
- /* FALLTHRU */
+ case 2:
+ switch (mode)
+ {
+ case V4SImode:
+ cmode = V2SImode;
+ break;
+ case V4SFmode:
+ cmode = V2SFmode;
+ break;
+ case V2DImode:
+ cmode = DImode;
+ break;
+ case V2SImode:
+ cmode = SImode;
+ break;
+ case V2DFmode:
+ cmode = DFmode;
+ break;
+ case V2SFmode:
+ cmode = SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
- case V2DFmode:
- case V2DImode:
- /* For the two element vectors, we always implement VEC_CONCAT. */
- op0 = XVECEXP (vals, 0, 0);
- op1 = XVECEXP (vals, 0, 1);
- use_vec_concat = true;
+ if (!register_operand (ops[1], cmode))
+ ops[1] = force_reg (cmode, ops[1]);
+ if (!register_operand (ops[0], cmode))
+ ops[0] = force_reg (cmode, ops[0]);
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_VEC_CONCAT (mode, ops[0],
+ ops[1])));
break;
- case V4SFmode:
- half_mode = V2SFmode;
- goto half;
- case V4SImode:
- half_mode = V2SImode;
+ case 4:
+ switch (mode)
+ {
+ case V4SImode:
+ cmode = V2SImode;
+ break;
+ case V4SFmode:
+ cmode = V2SFmode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
goto half;
- half:
- {
- rtvec v;
- /* For V4SF and V4SI, we implement a concat of two V2 vectors.
- Recurse to load the two halves. */
+half:
+ /* FIXME: We process inputs backward to help RA. PR 36222. */
+ i = n - 1;
+ j = (n >> 1) - 1;
+ for (; i > 0; i -= 2, j--)
+ {
+ first[j] = gen_reg_rtx (cmode);
+ v = gen_rtvec (2, ops[i - 1], ops[i]);
+ ix86_expand_vector_init (false, first[j],
+ gen_rtx_PARALLEL (cmode, v));
+ }
- op1 = gen_reg_rtx (half_mode);
- v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
- ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
+ n >>= 1;
+ if (n > 2)
+ {
+ gcc_assert (hmode != VOIDmode);
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ second[j] = gen_reg_rtx (hmode);
+ ix86_expand_vector_init_concat (hmode, second [j],
+ &first [i], 2);
+ }
+ n >>= 1;
+ ix86_expand_vector_init_concat (mode, target, second, n);
+ }
+ else
+ ix86_expand_vector_init_concat (mode, target, first, n);
+ break;
- op0 = gen_reg_rtx (half_mode);
- v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
- ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
+ default:
+ gcc_unreachable ();
+ }
+}
- use_vec_concat = true;
- }
- break;
+/* A subroutine of ix86_expand_vector_init_general. Use vector
+ interleave to handle the most general case: all values variable,
+ and none identical. */
+static void
+ix86_expand_vector_init_interleave (enum machine_mode mode,
+ rtx target, rtx *ops, int n)
+{
+ enum machine_mode first_imode, second_imode, third_imode;
+ int i, j;
+ rtx op0, op1;
+ rtx (*gen_load_even) (rtx, rtx, rtx);
+ rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
+ rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
+
+ switch (mode)
+ {
case V8HImode:
- if (TARGET_SSE2)
- {
- rtx ops[4];
- unsigned int i, j;
+ gen_load_even = gen_vec_setv8hi;
+ gen_interleave_first_low = gen_vec_interleave_lowv4si;
+ gen_interleave_second_low = gen_vec_interleave_lowv2di;
+ first_imode = V4SImode;
+ second_imode = V2DImode;
+ third_imode = VOIDmode;
+ break;
+ case V16QImode:
+ gen_load_even = gen_vec_setv16qi;
+ gen_interleave_first_low = gen_vec_interleave_lowv8hi;
+ gen_interleave_second_low = gen_vec_interleave_lowv4si;
+ first_imode = V8HImode;
+ second_imode = V4SImode;
+ third_imode = V2DImode;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ for (i = 0; i < n; i++)
+ {
+ /* Extend the odd elment to SImode using a paradoxical SUBREG. */
+ op0 = gen_reg_rtx (SImode);
+ emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
- for (i = 0; i < ARRAY_SIZE (ops); i++)
- {
- /* Extend the odd elment from HImode to SImode using
- a paradoxical SUBREG. */
- op0 = gen_reg_rtx (SImode);
- emit_move_insn (op0, gen_lowpart (SImode,
- XVECEXP (vals, 0,
- i + i)));
-
- /* Insert the SImode value as low element of V4SImode
- vector. */
- op1 = gen_reg_rtx (V4SImode);
- op0 = gen_rtx_VEC_MERGE (V4SImode,
- gen_rtx_VEC_DUPLICATE (V4SImode,
- op0),
- CONST0_RTX (V4SImode),
- const1_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
-
- /* Cast the V4SImode vector back to a V8HImode vector. */
- op0 = gen_reg_rtx (mode);
- emit_move_insn (op0, gen_lowpart (mode, op1));
-
- /* Load even HI elements into the second positon. */
- emit_insn (gen_vec_setv8hi (op0, XVECEXP (vals, 0,
- i + i + 1),
- const1_rtx));
-
- /* Cast V8HImode vector to V4SImode vector. */
- ops[i] = gen_reg_rtx (V4SImode);
- emit_move_insn (ops[i], gen_lowpart (V4SImode, op0));
- }
+ /* Insert the SImode value as low element of V4SImode vector. */
+ op1 = gen_reg_rtx (V4SImode);
+ op0 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode,
+ op0),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
- /* Interleave low V4SIs. */
- for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++)
- {
- op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
- ops[i + 1]));
-
- /* Cast V4SImode vectors to V2DImode vectors. */
- op1 = gen_reg_rtx (V2DImode);
- emit_move_insn (op1, gen_lowpart (V2DImode, op0));
- ops[j] = op1;
- }
+ /* Cast the V4SImode vector back to a vector in orignal mode. */
+ op0 = gen_reg_rtx (mode);
+ emit_move_insn (op0, gen_lowpart (mode, op1));
+
+ /* Load even elements into the second positon. */
+ emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
+ const1_rtx));
- /* Interleave low V2DIs. */
- op0 = gen_reg_rtx (V2DImode);
- emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
+ /* Cast vector to FIRST_IMODE vector. */
+ ops[i] = gen_reg_rtx (first_imode);
+ emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
+ }
- /* Cast the V2DImode vector back to a V8HImode vector. */
- emit_insn (gen_rtx_SET (VOIDmode, target,
- gen_lowpart (mode, op0)));
- return;
- }
+ /* Interleave low FIRST_IMODE vectors. */
+ for (i = j = 0; i < n; i += 2, j++)
+ {
+ op0 = gen_reg_rtx (first_imode);
+ emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
- case V16QImode:
- if (TARGET_SSE4_1)
+ /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
+ ops[j] = gen_reg_rtx (second_imode);
+ emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
+ }
+
+ /* Interleave low SECOND_IMODE vectors. */
+ switch (second_imode)
+ {
+ case V4SImode:
+ for (i = j = 0; i < n / 2; i += 2, j++)
{
- rtx ops[8];
- unsigned int i, j;
+ op0 = gen_reg_rtx (second_imode);
+ emit_insn ((*gen_interleave_second_low) (op0, ops[i],
+ ops[i + 1]));
- for (i = 0; i < ARRAY_SIZE (ops); i++)
- {
- /* Extend the odd elment from QImode to SImode using
- a paradoxical SUBREG. */
- op0 = gen_reg_rtx (SImode);
- emit_move_insn (op0, gen_lowpart (SImode,
- XVECEXP (vals, 0,
- i + i)));
-
- /* Insert the SImode value as low element of V4SImode
- vector. */
- op1 = gen_reg_rtx (V4SImode);
- op0 = gen_rtx_VEC_MERGE (V4SImode,
- gen_rtx_VEC_DUPLICATE (V4SImode,
- op0),
- CONST0_RTX (V4SImode),
- const1_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
-
- /* Cast the V4SImode vector back to a V16QImode vector. */
- op0 = gen_reg_rtx (mode);
- emit_move_insn (op0, gen_lowpart (mode, op1));
-
- /* Load even QI elements into the second positon. */
- emit_insn (gen_vec_setv16qi (op0, XVECEXP (vals, 0,
- i + i + 1),
- const1_rtx));
-
- /* Cast V16QImode vector to V8HImode vector. */
- ops[i] = gen_reg_rtx (V8HImode);
- emit_move_insn (ops[i], gen_lowpart (V8HImode, op0));
- }
+ /* Cast the SECOND_IMODE vector to the THIRD_IMODE
+ vector. */
+ ops[j] = gen_reg_rtx (third_imode);
+ emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
+ }
+ second_imode = V2DImode;
+ gen_interleave_second_low = gen_vec_interleave_lowv2di;
+ /* FALLTHRU */
- /* Interleave low V8HIs. */
- for (i = j = 0; i < ARRAY_SIZE (ops); i += 2, j++)
- {
- op0 = gen_reg_rtx (V8HImode);
- emit_insn (gen_vec_interleave_lowv8hi (op0, ops[i],
- ops[i + 1]));
-
- /* Cast V8HImode vector to V4SImode vector. */
- op1 = gen_reg_rtx (V4SImode);
- emit_move_insn (op1, gen_lowpart (V4SImode, op0));
- ops[j] = op1;
- }
+ case V2DImode:
+ op0 = gen_reg_rtx (second_imode);
+ emit_insn ((*gen_interleave_second_low) (op0, ops[0],
+ ops[1]));
- /* Interleave low V4SIs. */
- for (i = j = 0; i < ARRAY_SIZE (ops) / 2; i += 2, j++)
- {
- op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_vec_interleave_lowv4si (op0, ops[i],
- ops[i + 1]));
-
- /* Cast V4SImode vectors to V2DImode vectors. */
- op1 = gen_reg_rtx (V2DImode);
- emit_move_insn (op1, gen_lowpart (V2DImode, op0));
- ops[j] = op1;
- }
+ /* Cast the SECOND_IMODE vector back to a vector on original
+ mode. */
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_lowpart (mode, op0)));
+ break;
- /* Interleave low V2DIs. */
- op0 = gen_reg_rtx (V2DImode);
- emit_insn (gen_vec_interleave_lowv2di (op0, ops[0], ops[1]));
+ default:
+ gcc_unreachable ();
+ }
+}
- /* Cast the V2DImode vector back to a V8HImode vector. */
- emit_insn (gen_rtx_SET (VOIDmode, target,
- gen_lowpart (mode, op0)));
- return;
- }
+/* A subroutine of ix86_expand_vector_init. Handle the most general case:
+ all values variable, and none identical. */
+
+static void
+ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx vals)
+{
+ rtx ops[16];
+ int n, i;
+
+ switch (mode)
+ {
+ case V2SFmode:
+ case V2SImode:
+ if (!mmx_ok && !TARGET_SSE)
+ break;
+ /* FALLTHRU */
+
+ case V4SFmode:
+ case V4SImode:
+ case V2DFmode:
+ case V2DImode:
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ ix86_expand_vector_init_concat (mode, target, ops, n);
+ return;
+
+ case V16QImode:
+ if (!TARGET_SSE4_1)
+ break;
+ /* FALLTHRU */
+
+ case V8HImode:
+ if (!TARGET_SSE2)
+ break;
+
+ n = GET_MODE_NUNITS (mode);
+ for (i = 0; i < n; i++)
+ ops[i] = XVECEXP (vals, 0, i);
+ ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
+ return;
case V4HImode:
case V8QImode:
gcc_unreachable ();
}
- if (use_vec_concat)
- {
- if (!register_operand (op1, half_mode))
- op1 = force_reg (half_mode, op1);
- if (!register_operand (op0, half_mode))
- op0 = force_reg (half_mode, op0);
-
- emit_insn (gen_rtx_SET (VOIDmode, target,
- gen_rtx_VEC_CONCAT (mode, op0, op1)));
- }
- else
{
int i, j, n_elts, n_words, n_elt_per_word;
enum machine_mode inner_mode;
else if (n_words == 2)
{
rtx tmp = gen_reg_rtx (mode);
- emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
+ emit_clobber (tmp);
emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
emit_move_insn (target, tmp);
else if (n_words == 4)
{
rtx tmp = gen_reg_rtx (V4SImode);
+ gcc_assert (word_mode == SImode);
vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
emit_move_insn (target, gen_lowpart (mode, tmp));
}
/* Initialize the GCC target structure. */
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
+
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES