#define COSTS_N_BYTES(N) ((N) * 2)
static const
-struct processor_costs size_cost = { /* costs for tunning for size */
+struct processor_costs size_cost = { /* costs for tuning for size */
COSTS_N_BYTES (2), /* cost of an add instruction */
COSTS_N_BYTES (3), /* cost of a lea instruction */
COSTS_N_BYTES (2), /* variable shift costs */
COSTS_N_INSNS (1), /* cost of an add instruction */
/* On all chips taken into consideration lea is 2 cycles and more. With
this cost however our current implementation of synth_mult results in
- use of unnecesary temporary registers causing regression on several
+ use of unnecessary temporary registers causing regression on several
SPECfp benchmarks. */
COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
COSTS_N_INSNS (1), /* variable shift costs */
const int x86_double_with_add = ~m_386;
const int x86_use_bit_test = m_386;
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_GENERIC;
-const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
+const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
const int x86_fisttp = m_NOCONA;
const int x86_3dnow_a = m_ATHLON_K8;
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_GENERIC;
with partial reg. dependencies used by Athlon/P4 based chips, it is better
to leave it off for generic32 for now. */
const int x86_partial_reg_stall = m_PPRO;
+const int x86_partial_flag_reg_stall = m_GENERIC;
const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_GENERIC);
const int x86_use_mov0 = m_K6;
/* ix86_regparm_string as a number */
static int ix86_regparm;
+/* -mstackrealign option */
+extern int ix86_force_align_arg_pointer;
+static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
+
/* Preferred alignment for stack boundary in bits. */
unsigned int ix86_preferred_stack_boundary;
/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
char internal_label_prefix[16];
int internal_label_prefix_len;
-
-/* Table for BUILT_IN_NORMAL to BUILT_IN_MD mapping. */
-static GTY(()) tree ix86_builtin_function_variants[(int) END_BUILTINS];
\f
static bool ix86_handle_option (size_t, const char *, int);
static void output_pic_addr_const (FILE *, rtx, int);
static rtx legitimize_tls_address (rtx, enum tls_model, int);
static void get_pc_thunk_name (char [32], unsigned int);
static rtx gen_push (rtx);
-static int ix86_flags_dependant (rtx, rtx, enum attr_type);
-static int ix86_agi_dependant (rtx, rtx, enum attr_type);
+static int ix86_flags_dependent (rtx, rtx, enum attr_type);
+static int ix86_agi_dependent (rtx, rtx, enum attr_type);
static struct machine_function * ix86_init_machine_status (void);
static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
static int ix86_nsaved_regs (void);
static int ix86_adjust_cost (rtx, rtx, rtx, int);
static int ia32_multipass_dfa_lookahead (void);
static void ix86_init_mmx_sse_builtins (void);
-static void ix86_init_sse_abi_builtins (void);
static rtx x86_this_parameter (tree);
static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree);
tree, bool);
static void ix86_init_builtins (void);
static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
-static rtx ix86_expand_library_builtin (tree, rtx, rtx, enum machine_mode, int);
static const char *ix86_mangle_fundamental_type (tree);
static tree ix86_stack_protect_fail (void);
static rtx ix86_internal_arg_pointer (void);
#define TARGET_INIT_BUILTINS ix86_init_builtins
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
-#undef TARGET_EXPAND_LIBRARY_BUILTIN
-#define TARGET_EXPAND_LIBRARY_BUILTIN ix86_expand_library_builtin
#undef TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
if (ix86_tune_string)
{
if (!strcmp (ix86_tune_string, "generic")
- || !strcmp (ix86_tune_string, "i686"))
+ || !strcmp (ix86_tune_string, "i686")
+ /* As special support for cross compilers we read -mtune=native
+ as -mtune=generic. With native compilers we won't see the
+ -mtune=native, as it was changed by the driver. */
+ || !strcmp (ix86_tune_string, "native"))
{
if (TARGET_64BIT)
ix86_tune_string = "generic64";
&& ! TARGET_SSE)
error ("-msseregparm used without SSE enabled");
- /* Accept -msselibm only if at least SSE support is enabled. */
- if (TARGET_SSELIBM
- && ! TARGET_SSE2)
- error ("-msselibm used without SSE2 enabled");
-
- /* Ignore -msselibm on 64bit targets. */
- if (TARGET_SSELIBM
- && TARGET_64BIT)
- error ("-msselibm used on a 64bit target");
-
ix86_fpmath = TARGET_FPMATH_DEFAULT;
if (ix86_fpmath_string != 0)
/* Sseregparm attribute says we are using x86_64 calling conventions
for FP arguments. */
{ "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* force_align_arg_pointer says this function realigns the stack at entry. */
+ { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
+ false, true, true, ix86_handle_cconv_attribute },
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
{ "dllimport", 0, 0, false, false, false, handle_dll_attribute },
{ "dllexport", 0, 0, false, false, false, handle_dll_attribute },
*no_add_attrs = true;
}
+ if (!TARGET_64BIT
+ && lookup_attribute (ix86_force_align_arg_pointer_string,
+ TYPE_ATTRIBUTES (*node))
+ && compare_tree_int (cst, REGPARM_MAX-1))
+ {
+ error ("%s functions limited to %d register parameters",
+ ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
+ }
+
return NULL_TREE;
}
&& decl_function_context (decl)
&& !DECL_NO_STATIC_CHAIN (decl))
local_regparm = 2;
+ /* If the function realigns its stackpointer, the
+ prologue will clobber %ecx. If we've already
+ generated code for the callee, the callee
+ DECL_STRUCT_FUNCTION is gone, so we fall back to
+ scanning the attributes for the self-realigning
+ property. */
+ if ((DECL_STRUCT_FUNCTION (decl)
+ && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
+ || (!DECL_STRUCT_FUNCTION (decl)
+ && lookup_attribute (ix86_force_align_arg_pointer_string,
+ TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
+ local_regparm = 2;
/* Each global register variable increases register preassure,
so the more global reg vars there are, the smaller regparm
optimization use, unless requested by the user explicitly. */
When we have only some of our vector isa extensions enabled, then there
are some modes for which vector_mode_supported_p is false. For these
modes, the generic vector support in gcc will choose some non-vector mode
- in order to implement the type. By computing the natural mode, we'll
+ in order to implement the type. By computing the natural mode, we'll
select the proper ABI location for the operand and not depend on whatever
the middle-end decides to do with these vector types. */
{
int num;
+ if (TREE_TYPE (field) == error_mark_node)
+ continue;
+
/* Bitfields are always classified as integer. Handle them
early, since later code would consider them to be
misaligned integers. */
subclasses[0] = X86_64_SSE_CLASS;
if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
subclasses[0] = X86_64_INTEGER_CLASS;
-
+
for (i = 0; i < words; i++)
classes[i] = subclasses[i % num];
-
+
break;
}
case UNION_TYPE:
if (TREE_CODE (field) == FIELD_DECL)
{
int num;
+
+ if (TREE_TYPE (field) == error_mark_node)
+ continue;
+
num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
TREE_TYPE (field), subclasses,
bit_offset);
return 0;
default:
gcc_assert (VECTOR_MODE_P (mode));
-
+
if (bytes > 16)
return 0;
-
+
gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
-
+
if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
classes[0] = X86_64_INTEGERSI_CLASS;
else
tree type, int in_return, int nintregs, int nsseregs,
const int *intreg, int sse_regno)
{
+ /* The following variables hold the static issued_error state. */
+ static bool issued_sse_arg_error;
+ static bool issued_sse_ret_error;
+ static bool issued_x87_ret_error;
+
enum machine_mode tmpmode;
int bytes =
(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
some less clueful developer tries to use floating-point anyway. */
if (needed_sseregs && !TARGET_SSE)
{
- static bool issued_error;
- if (!issued_error)
+ if (in_return)
{
- issued_error = true;
- if (in_return)
- error ("SSE register return with SSE disabled");
- else
- error ("SSE register argument with SSE disabled");
+ if (!issued_sse_ret_error)
+ {
+ error ("SSE register return with SSE disabled");
+ issued_sse_ret_error = true;
+ }
+ }
+ else if (!issued_sse_arg_error)
+ {
+ error ("SSE register argument with SSE disabled");
+ issued_sse_arg_error = true;
}
return NULL;
}
+ /* Likewise, error if the ABI requires us to return values in the
+ x87 registers and the user specified -mno-80387. */
+ if (!TARGET_80387 && in_return)
+ for (i = 0; i < n; i++)
+ if (class[i] == X86_64_X87_CLASS
+ || class[i] == X86_64_X87UP_CLASS
+ || class[i] == X86_64_COMPLEX_X87_CLASS)
+ {
+ if (!issued_x87_ret_error)
+ {
+ error ("x87 register return with x87 disabled");
+ issued_x87_ret_error = true;
+ }
+ return NULL;
+ }
+
/* First construct simple cases. Avoid SCmode, since we want to use
single register to pass this type. */
if (n == 1 && mode != SCmode)
case QUAL_UNION_TYPE:
{
tree field;
-
+
if (TYPE_BINFO (type))
{
tree binfo, base_binfo;
int i;
-
+
for (binfo = TYPE_BINFO (type), i = 0;
BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
if (contains_128bit_aligned_vector_p
if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
return true;
break;
-
+
default:
gcc_unreachable ();
}
gcc_assert (!TARGET_64BIT);
/* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
- we prevent this case when mmx is not available. */
- if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
- return FIRST_MMX_REG;
+ we normally prevent this case when mmx is not available. However
+ some ABIs may require the result to be returned like DImode. */
+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
+ return TARGET_MMX ? FIRST_MMX_REG : 0;
/* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
- we prevent this case when sse is not available. */
+ we prevent this case when sse is not available. However some ABIs
+ may require the result to be returned like integer TImode. */
if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
- return FIRST_SSE_REG;
+ return TARGET_SSE ? FIRST_SSE_REG : 0;
/* Decimal floating point values can go in %eax, unlike other float modes. */
if (DECIMAL_FLOAT_MODE_P (mode))
HOST_WIDE_INT words, n_gpr, n_fpr;
tree f_gpr, f_fpr, f_ovf, f_sav;
tree gpr, fpr, ovf, sav, t;
+ tree type;
/* Only 64bit target needs something special. */
if (!TARGET_64BIT)
if (cfun->va_list_gpr_size)
{
- t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
- build_int_cst (NULL_TREE, n_gpr * 8));
+ type = TREE_TYPE (gpr);
+ t = build2 (MODIFY_EXPR, type, gpr,
+ build_int_cst (type, n_gpr * 8));
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
if (cfun->va_list_fpr_size)
{
- t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
- build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
+ type = TREE_TYPE (fpr);
+ t = build2 (MODIFY_EXPR, type, fpr,
+ build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
/* Find the overflow area. */
- t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
+ type = TREE_TYPE (ovf);
+ t = make_tree (type, virtual_incoming_args_rtx);
if (words != 0)
- t = build2 (PLUS_EXPR, TREE_TYPE (ovf), t,
- build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
- t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
+ t = build2 (PLUS_EXPR, type, t,
+ build_int_cst (type, words * UNITS_PER_WORD));
+ t = build2 (MODIFY_EXPR, type, ovf, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
{
/* Find the register save area.
Prologue of the function save it right above stack frame. */
- t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
- t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
+ type = TREE_TYPE (sav);
+ t = make_tree (type, frame_pointer_rtx);
+ t = build2 (MODIFY_EXPR, type, sav, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
XFmode);
}
+/* Return 1 if mode is a valid mode for sse. */
+static int
+standard_sse_mode_p (enum machine_mode mode)
+{
+ switch (mode)
+ {
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ case V4SFmode:
+ case V2DFmode:
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
*/
int
standard_sse_constant_p (rtx x)
{
- if (x == const0_rtx)
+ enum machine_mode mode = GET_MODE (x);
+
+ if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
return 1;
- return (x == CONST0_RTX (GET_MODE (x)));
+ if (vector_all_ones_operand (x, mode)
+ && standard_sse_mode_p (mode))
+ return TARGET_SSE2 ? 2 : -1;
+
+ return 0;
+}
+
+/* Return the opcode of the special instruction to be used to load
+ the constant X. */
+
+const char *
+standard_sse_constant_opcode (rtx insn, rtx x)
+{
+ switch (standard_sse_constant_p (x))
+ {
+ case 1:
+ if (get_attr_mode (insn) == MODE_V4SF)
+ return "xorps\t%0, %0";
+ else if (get_attr_mode (insn) == MODE_V2DF)
+ return "xorpd\t%0, %0";
+ else
+ return "pxor\t%0, %0";
+ case 2:
+ return "pcmpeqd\t%0, %0";
+ }
+ gcc_unreachable ();
}
/* Returns 1 if OP contains a symbol reference */
if (from == ARG_POINTER_REGNUM)
return frame.stack_pointer_offset;
-
+
gcc_assert (from == FRAME_POINTER_REGNUM);
return frame.stack_pointer_offset - frame.frame_pointer_offset;
}
static rtx
ix86_internal_arg_pointer (void)
{
- if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
- && DECL_NAME (current_function_decl)
- && MAIN_NAME_P (DECL_NAME (current_function_decl))
- && DECL_FILE_SCOPE_P (current_function_decl))
- {
+ bool has_force_align_arg_pointer =
+ (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
+ TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
+ if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
+ && DECL_NAME (current_function_decl)
+ && MAIN_NAME_P (DECL_NAME (current_function_decl))
+ && DECL_FILE_SCOPE_P (current_function_decl))
+ || ix86_force_align_arg_pointer
+ || has_force_align_arg_pointer)
+ {
+ /* Nested functions can't realign the stack due to a register
+ conflict. */
+ if (DECL_CONTEXT (current_function_decl)
+ && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
+ {
+ if (ix86_force_align_arg_pointer)
+ warning (0, "-mstackrealign ignored for nested functions");
+ if (has_force_align_arg_pointer)
+ error ("%s not supported for nested functions",
+ ix86_force_align_arg_pointer_string);
+ return virtual_incoming_args_rtx;
+ }
cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
return copy_to_reg (cfun->machine->force_align_arg_pointer);
}
/* And here we cheat like madmen with the unwind info. We force the
cfa register back to sp+4, which is exactly what it was at the
start of the function. Re-pushing the return address results in
- the return at the same spot relative to the cfa, and thus is
+ the return at the same spot relative to the cfa, and thus is
correct wrt the unwind info. */
x = cfun->machine->force_align_arg_pointer;
x = gen_frame_mem (Pmode, plus_constant (x, -4));
{
if (pic_offset_table_rtx)
REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
+#if TARGET_MACHO
+ /* Mach-O doesn't support labels at the end of objects, so if
+ it looks like we might want one, insert a NOP. */
+ {
+ rtx insn = get_last_insn ();
+ while (insn
+ && NOTE_P (insn)
+ && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
+ insn = PREV_INSN (insn);
+ if (insn
+ && (LABEL_P (insn)
+ || (NOTE_P (insn)
+ && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
+ fputs ("\tnop\n", file);
+ }
+#endif
+
}
\f
/* Extract the parts of an RTL expression that is a valid memory address
return false;
break;
+ case CONST_DOUBLE:
+ if (GET_MODE (x) == TImode
+ && x != CONST0_RTX (TImode)
+ && !TARGET_64BIT)
+ return false;
+ break;
+
+ case CONST_VECTOR:
+ if (x == CONST0_RTX (GET_MODE (x)))
+ return true;
+ return false;
+
default:
break;
}
static bool
ix86_cannot_force_const_mem (rtx x)
{
+ /* We can always put integral constants and vectors in memory. */
+ switch (GET_CODE (x))
+ {
+ case CONST_INT:
+ case CONST_DOUBLE:
+ case CONST_VECTOR:
+ return false;
+
+ default:
+ break;
+ }
return !legitimate_constant_p (x);
}
{
rtx reg;
reason_rtx = base;
-
+
if (REG_P (base))
reg = base;
else if (GET_CODE (base) == SUBREG
goto is_legitimate_pic;
reason = "64bit address unspec";
goto report_error;
-
+
case UNSPEC_GOTPCREL:
gcc_assert (flag_pic);
goto is_legitimate_pic;
{
rtx x = ix86_tls_module_base ();
- base = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, base));
-
- set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ set_unique_reg_note (get_last_insn (), REG_EQUIV,
+ gen_rtx_MINUS (Pmode, x, tp));
}
off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
off = gen_rtx_CONST (Pmode, off);
dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
+
+ if (TARGET_GNU2_TLS)
+ {
+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
+
+ set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
+ }
+
break;
case TLS_MODEL_INITIAL_EXEC:
putc ('+', file);
output_pic_addr_const (file, XEXP (x, 1), code);
}
- else
+ else
{
gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
output_pic_addr_const (file, XEXP (x, 1), code);
/* In the name of slightly smaller debug output, and to cater to
general assembler lossage, recognize PIC+GOTOFF and turn it back
- into a direct symbol reference.
+ into a direct symbol reference.
On Darwin, this is necessary to avoid a crash, because Darwin
has a different PIC label for each routine but the DWARF debugging
if (! result)
return orig_x;
-
+
if (const_addend)
result = gen_rtx_PLUS (Pmode, result, const_addend);
if (reg_addend)
emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
slot = SLOT_CW_CEIL;
break;
-
+
case I387_CW_MASK_PM:
/* mask precision exception for nearbyint() */
emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
return "";
}
+/* Output code for x87 ffreep insn. The OPNO argument, which may only
+ have the values zero or one, indicates the ffreep insn's operand
+ from the OPERANDS array. */
+
+static const char *
+output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
+{
+ if (TARGET_USE_FFREEP)
+#if HAVE_AS_IX86_FFREEP
+ return opno ? "ffreep\t%y1" : "ffreep\t%y0";
+#else
+ switch (REGNO (operands[opno]))
+ {
+ case FIRST_STACK_REG + 0: return ".word\t0xc0df";
+ case FIRST_STACK_REG + 1: return ".word\t0xc1df";
+ case FIRST_STACK_REG + 2: return ".word\t0xc2df";
+ case FIRST_STACK_REG + 3: return ".word\t0xc3df";
+ case FIRST_STACK_REG + 4: return ".word\t0xc4df";
+ case FIRST_STACK_REG + 5: return ".word\t0xc5df";
+ case FIRST_STACK_REG + 6: return ".word\t0xc6df";
+ case FIRST_STACK_REG + 7: return ".word\t0xc7df";
+ }
+#endif
+
+ return opno ? "fstp\t%y1" : "fstp\t%y0";
+}
+
+
/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
should be used. UNORDERED_P is true when fucom should be used. */
if (stack_top_dies)
{
output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
- return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
+ return output_387_ffreep (operands, 1);
}
else
return "ftst\n\tfnstsw\t%0";
output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
else
output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
- return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
+ return output_387_ffreep (operands, 0);
}
else
{
#else
if (GET_CODE (op0) == MEM)
op1 = force_reg (Pmode, op1);
- else
+ else
op1 = legitimize_address (op1, op1, Pmode);
#endif /* TARGET_MACHO */
}
to handle some of them more efficiently. */
if ((reload_in_progress | reload_completed) == 0
&& register_operand (op0, mode)
- && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
+ && CONSTANT_P (op1)
+ && standard_sse_constant_p (op1) <= 0)
op1 = validize_mem (force_const_mem (mode, op1));
/* Make operand1 a register if it isn't already. */
emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
}
-/* Implement the movmisalign patterns for SSE. Non-SSE modes go
+/* Implement the movmisalign patterns for SSE. Non-SSE modes go
straight to ix86_expand_vector_move. */
void
if (use_sse)
mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
else
- {
- /* When not using SSE, we don't use the mask, but prefer to keep the
- same general form of the insn pattern to reduce duplication when
- it comes time to split. */
- mask = const0_rtx;
- }
+ mask = NULL_RTX;
dst = operands[0];
src = operands[1];
/* If the destination is memory, and we don't have matching source
- operands, do things in registers. */
+ operands or we're using the x87, do things in registers. */
matching_memory = false;
if (MEM_P (dst))
{
- if (rtx_equal_p (dst, src))
+ if (use_sse && rtx_equal_p (dst, src))
matching_memory = true;
else
dst = gen_reg_rtx (mode);
{
set = gen_rtx_fmt_e (code, mode, src);
set = gen_rtx_SET (VOIDmode, dst, set);
- use = gen_rtx_USE (VOIDmode, mask);
- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
+ if (mask)
+ {
+ use = gen_rtx_USE (VOIDmode, mask);
+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+ emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (3, set, use, clob)));
+ }
+ else
+ emit_insn (set);
}
if (dst != operands[0])
{
rtx tmp;
+ /* If we have emitted a compare insn, go straight to simple.
+ ix86_expand_compare won't emit anything if ix86_compare_emitted
+ is non NULL. */
+ if (ix86_compare_emitted)
+ goto simple;
+
switch (GET_MODE (ix86_compare_op0))
{
case QImode:
enum machine_mode mode =
GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
- /* Do not handle DImode compares that go trought special path. Also we can't
+ /* Do not handle DImode compares that go through special path. Also we can't
deal with FP compares yet. This is possible to add. */
if (mode == (TARGET_64BIT ? TImode : DImode))
return false;
tricks to turn this into a signed comparison against 0. */
if (code == GTU)
{
+ cop0 = force_reg (mode, cop0);
+
switch (mode)
{
case V4SImode:
{
/* The only non-offsetable memories we handle are pushes. */
int ok = push_operand (operand, VOIDmode);
-
+
gcc_assert (ok);
-
+
operand = copy_rtx (operand);
PUT_MODE (operand, Pmode);
parts[0] = parts[1] = parts[2] = operand;
default:
gcc_unreachable ();
}
-
+
if (GET_MODE (part[1][0]) == SImode)
part[1][0] = part[1][1];
}
ix86_expand_clear (low[0]);
ix86_expand_clear (high[0]);
emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
-
+
d = gen_lowpart (QImode, low[0]);
d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
s = gen_rtx_EQ (QImode, flags, const0_rtx);
by DEP_INSN and nothing set by DEP_INSN. */
static int
-ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
+ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
{
rtx set, set2;
address with operands set by DEP_INSN. */
static int
-ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
+ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
{
rtx addr;
if (GET_CODE (addr) == PARALLEL)
addr = XVECEXP (addr, 0, 0);
-
+
gcc_assert (GET_CODE (addr) == SET);
-
+
addr = SET_SRC (addr);
}
else
{
case PROCESSOR_PENTIUM:
/* Address Generation Interlock adds a cycle of latency. */
- if (ix86_agi_dependant (insn, dep_insn, insn_type))
+ if (ix86_agi_dependent (insn, dep_insn, insn_type))
cost += 1;
/* ??? Compares pair with jump/setcc. */
- if (ix86_flags_dependant (insn, dep_insn, insn_type))
+ if (ix86_flags_dependent (insn, dep_insn, insn_type))
cost = 0;
/* Floating point stores require value to be ready one cycle earlier. */
if (insn_type == TYPE_FMOV
&& get_attr_memory (insn) == MEMORY_STORE
- && !ix86_agi_dependant (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
cost += 1;
break;
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependant (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
{
/* Claim moves to take one cycle, as core can issue one load
at time and the next load can start cycle later. */
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependant (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
{
/* Claim moves to take one cycle, as core can issue one load
at time and the next load can start cycle later. */
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
- && !ix86_agi_dependant (insn, dep_insn, insn_type))
+ && !ix86_agi_dependent (insn, dep_insn, insn_type))
{
enum attr_unit unit = get_attr_unit (insn);
int loadcost = 3;
IX86_BUILTIN_VEC_SET_V8HI,
IX86_BUILTIN_VEC_SET_V4HI,
- /* SSE2 ABI functions. */
- IX86_BUILTIN_SSE2_ACOS,
- IX86_BUILTIN_SSE2_ACOSF,
- IX86_BUILTIN_SSE2_ASIN,
- IX86_BUILTIN_SSE2_ASINF,
- IX86_BUILTIN_SSE2_ATAN,
- IX86_BUILTIN_SSE2_ATANF,
- IX86_BUILTIN_SSE2_ATAN2,
- IX86_BUILTIN_SSE2_ATAN2F,
- IX86_BUILTIN_SSE2_COS,
- IX86_BUILTIN_SSE2_COSF,
- IX86_BUILTIN_SSE2_EXP,
- IX86_BUILTIN_SSE2_EXPF,
- IX86_BUILTIN_SSE2_LOG10,
- IX86_BUILTIN_SSE2_LOG10F,
- IX86_BUILTIN_SSE2_LOG,
- IX86_BUILTIN_SSE2_LOGF,
- IX86_BUILTIN_SSE2_SIN,
- IX86_BUILTIN_SSE2_SINF,
- IX86_BUILTIN_SSE2_TAN,
- IX86_BUILTIN_SSE2_TANF,
-
IX86_BUILTIN_MAX
};
{
if (TARGET_MMX)
ix86_init_mmx_sse_builtins ();
- if (TARGET_SSE2)
- ix86_init_sse_abi_builtins ();
}
/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
integer_type_node, NULL_TREE);
def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
ftype, IX86_BUILTIN_VEC_SET_V8HI);
-
+
ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
intHI_type_node,
integer_type_node, NULL_TREE);
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
ftype, IX86_BUILTIN_VEC_SET_V4HI);
}
-#undef def_builtin
-
-/* Set up all the SSE ABI builtins that we may use to override
- the normal builtins. */
-static void
-ix86_init_sse_abi_builtins (void)
-{
- tree dbl, flt, dbl2, flt2;
-
- /* Bail out in case the template definitions are not available. */
- if (! built_in_decls [BUILT_IN_SIN]
- || ! built_in_decls [BUILT_IN_SINF]
- || ! built_in_decls [BUILT_IN_ATAN2]
- || ! built_in_decls [BUILT_IN_ATAN2F])
- return;
-
- /* Build the function types as variants of the existing ones. */
- dbl = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_SIN]));
- TYPE_ATTRIBUTES (dbl)
- = tree_cons (get_identifier ("sseregparm"),
- NULL_TREE, TYPE_ATTRIBUTES (dbl));
- flt = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_SINF]));
- TYPE_ATTRIBUTES (flt)
- = tree_cons (get_identifier ("sseregparm"),
- NULL_TREE, TYPE_ATTRIBUTES (flt));
- dbl2 = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_ATAN2]));
- TYPE_ATTRIBUTES (dbl2)
- = tree_cons (get_identifier ("sseregparm"),
- NULL_TREE, TYPE_ATTRIBUTES (dbl2));
- flt2 = build_variant_type_copy (TREE_TYPE (built_in_decls [BUILT_IN_ATAN2F]));
- TYPE_ATTRIBUTES (flt2)
- = tree_cons (get_identifier ("sseregparm"),
- NULL_TREE, TYPE_ATTRIBUTES (flt2));
-
-#define def_builtin(capname, name, type) \
- ix86_builtin_function_variants [BUILT_IN_ ## capname] \
- = lang_hooks.builtin_function ("__builtin_sse2_" # name, type, \
- IX86_BUILTIN_SSE2_ ## capname, \
- BUILT_IN_NORMAL, \
- "__libm_sse2_" # name, NULL_TREE)
-
- def_builtin (ACOS, acos, dbl);
- def_builtin (ACOSF, acosf, flt);
- def_builtin (ASIN, asin, dbl);
- def_builtin (ASINF, asinf, flt);
- def_builtin (ATAN, atan, dbl);
- def_builtin (ATANF, atanf, flt);
- def_builtin (ATAN2, atan2, dbl2);
- def_builtin (ATAN2F, atan2f, flt2);
- def_builtin (COS, cos, dbl);
- def_builtin (COSF, cosf, flt);
- def_builtin (EXP, exp, dbl);
- def_builtin (EXPF, expf, flt);
- def_builtin (LOG10, log10, dbl);
- def_builtin (LOG10F, log10f, flt);
- def_builtin (LOG, log, dbl);
- def_builtin (LOGF, logf, flt);
- def_builtin (SIN, sin, dbl);
- def_builtin (SINF, sinf, flt);
- def_builtin (TAN, tan, dbl);
- def_builtin (TANF, tanf, flt);
-
-#undef def_builtin
-}
/* Errors in the source file can cause expand_expr to return const0_rtx
where we expect a vector. To avoid crashing, use one of the vector
instructions from inside the compiler, we can't allow the use of MMX
registers unless the user explicitly asks for it. So we do *not* define
vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
- we have builtins invoked by mmintrin.h that gives us license to emit
+ we have builtins invoked by mmintrin.h that gives us license to emit
these sorts of instructions. */
static rtx
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
if (!REG_P (op0))
- op0 = copy_to_mode_reg (SImode, op0);
+ op0 = copy_to_mode_reg (Pmode, op0);
if (!REG_P (op1))
op1 = copy_to_mode_reg (SImode, op1);
if (!REG_P (op2))
op2 = copy_to_mode_reg (SImode, op2);
- emit_insn (gen_sse3_monitor (op0, op1, op2));
+ if (!TARGET_64BIT)
+ emit_insn (gen_sse3_monitor (op0, op1, op2));
+ else
+ emit_insn (gen_sse3_monitor64 (op0, op1, op2));
return 0;
case IX86_BUILTIN_MWAIT:
gcc_unreachable ();
}
-/* Expand an expression EXP that calls a built-in library function,
- with result going to TARGET if that's convenient
- (and in mode MODE if that's convenient).
- SUBTARGET may be used as the target for computing one of EXP's operands.
- IGNORE is nonzero if the value is to be ignored. */
-
-static rtx
-ix86_expand_library_builtin (tree exp, rtx target,
- rtx subtarget ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED,
- int ignore)
-{
- enum built_in_function fncode;
- tree fndecl, newfn, call;
-
- /* Try expanding builtin math functions to the SSE2 ABI variants. */
- if (!TARGET_SSELIBM)
- return NULL_RTX;
-
- fncode = builtin_mathfn_code (exp);
- if (!ix86_builtin_function_variants [(int)fncode])
- return NULL_RTX;
-
- fndecl = get_callee_fndecl (exp);
- if (DECL_RTL_SET_P (fndecl))
- return NULL_RTX;
-
- /* Build the redirected call and expand it. */
- newfn = ix86_builtin_function_variants [(int)fncode];
- call = build_function_call_expr (newfn, TREE_OPERAND (exp, 1));
- return expand_call (call, target, ignore);
-}
-
/* Store OPERAND to the memory after reload is completed. This means
that we can't easily use assign_stack_local. */
rtx
ix86_force_to_memory (enum machine_mode mode, rtx operand)
{
rtx result;
-
+
gcc_assert (reload_completed);
if (TARGET_RED_ZONE)
{
enum reg_class
ix86_preferred_reload_class (rtx x, enum reg_class class)
{
- /* We're only allowed to return a subclass of CLASS. Many of the
+ enum machine_mode mode = GET_MODE (x);
+
+ /* We're only allowed to return a subclass of CLASS. Many of the
following checks fail for NO_REGS, so eliminate that early. */
if (class == NO_REGS)
return NO_REGS;
/* All classes can load zeros. */
- if (x == CONST0_RTX (GET_MODE (x)))
+ if (x == CONST0_RTX (mode))
return class;
+ /* Force constants into memory if we are loading a (nonzero) constant into
+ an MMX or SSE register. This is because there are no MMX/SSE instructions
+ to load from a constant. */
+ if (CONSTANT_P (x)
+ && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
+ return NO_REGS;
+
+ /* Prefer SSE regs only, if we can use them for math. */
+ if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
+ return SSE_CLASS_P (class) ? class : NO_REGS;
+
/* Floating-point constants need more complex checks. */
if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
{
zero above. We only want to wind up preferring 80387 registers if
we plan on doing computation with them. */
if (TARGET_80387
- && (TARGET_MIX_SSE_I387
- || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
&& standard_80387_constant_p (x))
{
/* Limit class to non-sse. */
return NO_REGS;
}
- if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
- return NO_REGS;
- if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
- return NO_REGS;
/* Generally when we see PLUS here, it's the function invariant
(plus soft-fp const_int). Which can only be computed into general
return class;
}
+/* Discourage putting floating-point values in SSE registers unless
+ SSE math is being used, and likewise for the 387 registers. */
+enum reg_class
+ix86_preferred_output_reload_class (rtx x, enum reg_class class)
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ /* Restrict the output reload class to the register bank that we are doing
+ math on. If we would like not to return a subset of CLASS, reject this
+ alternative: if reload cannot do this, it will still use its choice. */
+ mode = GET_MODE (x);
+ if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+ return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
+
+ if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
+ {
+ if (class == FP_TOP_SSE_REGS)
+ return FP_TOP_REG;
+ else if (class == FP_SECOND_SSE_REGS)
+ return FP_SECOND_REG;
+ else
+ return FLOAT_CLASS_P (class) ? class : NO_REGS;
+ }
+
+ return class;
+}
+
/* If we are copying between general and FP registers, we need a memory
location. The same is true for SSE and MMX registers.
if (!TARGET_SSE2)
return true;
- /* If the target says that inter-unit moves are more expensive
+ /* If the target says that inter-unit moves are more expensive
than moving through memory, then don't generate them. */
if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
return true;
return true;
/* ??? For the cost of one register reformat penalty, we could use
- the same instructions to move SFmode and DFmode data, but the
+ the same instructions to move SFmode and DFmode data, but the
relevant move patterns don't support those alternatives. */
if (mode == SFmode || mode == DFmode)
return true;
return true;
/* Vector registers do not support subreg with nonzero offsets, which
- are otherwise valid for integer registers. Since we can't see
+ are otherwise valid for integer registers. Since we can't see
whether we have a nonzero offset from here, prohibit all
nonparadoxical subregs changing size. */
if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
else if (VALID_FP_MODE_P (mode))
return 1;
/* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
- on to use that value in smaller contexts, this can easily force a
+ on to use that value in smaller contexts, this can easily force a
pseudo to be allocated to GENERAL_REGS. Since this is no worse than
supporting DImode, allow it. */
else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
return 0;
}
-/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
+/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
tieable integer mode. */
static bool
if (mode2 == DFmode)
return mode1 == SFmode;
- /* If MODE2 is only appropriate for an SSE register, then tie with
+ /* If MODE2 is only appropriate for an SSE register, then tie with
any other mode acceptable to SSE registers. */
if (GET_MODE_SIZE (mode2) >= 8
&& ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
if (!TARGET_SSE_MATH
|| mode == XFmode
|| (mode == DFmode && !TARGET_SSE2))
- *total = 0;
+ /* For standard 80387 constants, raise the cost to prevent
+ compress_float_constant() to generate load from memory. */
+ switch (standard_80387_constant_p (XEXP (x, 0)))
+ {
+ case -1:
+ case 0:
+ *total = 0;
+ break;
+ case 1: /* 0.0 */
+ *total = 1;
+ break;
+ default:
+ *total = (x86_ext_80387_constants & TUNEMASK
+ || optimize_size
+ ? 1 : 0);
+ }
return false;
case ABS:
if (MACHOPIC_PURE)
{
- fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
- fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
- fprintf (file, "\tjmp *%%edx\n");
+ fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
+ fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
+ fprintf (file, "\tjmp\t*%%edx\n");
}
else
- fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
+ fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
fprintf (file, "%s:\n", binder_name);
if (MACHOPIC_PURE)
{
- fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
- fprintf (file, "\tpushl %%eax\n");
+ fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
+ fprintf (file, "\tpushl\t%%eax\n");
}
else
- fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
+ fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
- fprintf (file, "\tjmp dyld_stub_binding_helper\n");
+ fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
fprintf (file, "%s:\n", lazy_ptr_name);
{
case V2SImode:
case V2SFmode:
- if (!mmx_ok && !TARGET_SSE)
+ if (!mmx_ok)
return false;
/* FALLTHRU */
wvmode = V4HImode;
goto widen;
case V8HImode:
+ if (TARGET_SSE2)
+ {
+ rtx tmp1, tmp2;
+ /* Extend HImode to SImode using a paradoxical SUBREG. */
+ tmp1 = gen_reg_rtx (SImode);
+ emit_move_insn (tmp1, gen_lowpart (SImode, val));
+ /* Insert the SImode value as low element of V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
+ /* Cast the V4SImode vector back to a V8HImode vector. */
+ tmp1 = gen_reg_rtx (V8HImode);
+ emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
+ /* Duplicate the low short through the whole low SImode word. */
+ emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
+ /* Cast the V8HImode vector back to a V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
+ /* Replicate the low element of the V4SImode vector. */
+ emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
+ /* Cast the V2SImode back to V8HImode, and store in target. */
+ emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
+ return true;
+ }
smode = HImode;
wsmode = SImode;
wvmode = V4SImode;
goto widen;
case V16QImode:
+ if (TARGET_SSE2)
+ {
+ rtx tmp1, tmp2;
+ /* Extend QImode to SImode using a paradoxical SUBREG. */
+ tmp1 = gen_reg_rtx (SImode);
+ emit_move_insn (tmp1, gen_lowpart (SImode, val));
+ /* Insert the SImode value as low element of V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ tmp1 = gen_rtx_VEC_MERGE (V4SImode,
+ gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
+ CONST0_RTX (V4SImode),
+ const1_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
+ /* Cast the V4SImode vector back to a V16QImode vector. */
+ tmp1 = gen_reg_rtx (V16QImode);
+ emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
+ /* Duplicate the low byte through the whole low SImode word. */
+ emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
+ emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
+ /* Cast the V16QImode vector back to a V4SImode vector. */
+ tmp2 = gen_reg_rtx (V4SImode);
+ emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
+ /* Replicate the low element of the V4SImode vector. */
+ emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
+ /* Cast the V2SImode back to V16QImode, and store in target. */
+ emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
+ return true;
+ }
smode = QImode;
wsmode = HImode;
wvmode = V8HImode;
}
/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
- whose low element is VAR, and other elements are zero. Return true
+ whose ONE_VAR element is VAR, and other elements are zero. Return true
if successful. */
static bool
-ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
- rtx target, rtx var)
+ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
+ rtx target, rtx var, int one_var)
{
enum machine_mode vsimode;
- rtx x;
+ rtx new_target;
+ rtx x, tmp;
switch (mode)
{
case V2SFmode:
case V2SImode:
- if (!mmx_ok && !TARGET_SSE)
+ if (!mmx_ok)
return false;
/* FALLTHRU */
case V2DFmode:
case V2DImode:
+ if (one_var != 0)
+ return false;
var = force_reg (GET_MODE_INNER (mode), var);
x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
emit_insn (gen_rtx_SET (VOIDmode, target, x));
case V4SFmode:
case V4SImode:
+ if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
+ new_target = gen_reg_rtx (mode);
+ else
+ new_target = target;
var = force_reg (GET_MODE_INNER (mode), var);
x = gen_rtx_VEC_DUPLICATE (mode, var);
x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
- emit_insn (gen_rtx_SET (VOIDmode, target, x));
+ emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
+ if (one_var != 0)
+ {
+ /* We need to shuffle the value to the correct position, so
+ create a new pseudo to store the intermediate result. */
+
+ /* With SSE2, we can use the integer shuffle insns. */
+ if (mode != V4SFmode && TARGET_SSE2)
+ {
+ emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
+ GEN_INT (1),
+ GEN_INT (one_var == 1 ? 0 : 1),
+ GEN_INT (one_var == 2 ? 0 : 1),
+ GEN_INT (one_var == 3 ? 0 : 1)));
+ if (target != new_target)
+ emit_move_insn (target, new_target);
+ return true;
+ }
+
+ /* Otherwise convert the intermediate result to V4SFmode and
+ use the SSE1 shuffle instructions. */
+ if (mode != V4SFmode)
+ {
+ tmp = gen_reg_rtx (V4SFmode);
+ emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
+ }
+ else
+ tmp = new_target;
+
+ emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
+ GEN_INT (1),
+ GEN_INT (one_var == 1 ? 0 : 1),
+ GEN_INT (one_var == 2 ? 0+4 : 1+4),
+ GEN_INT (one_var == 3 ? 0+4 : 1+4)));
+
+ if (mode != V4SFmode)
+ emit_move_insn (target, gen_lowpart (V4SImode, tmp));
+ else if (tmp != target)
+ emit_move_insn (target, tmp);
+ }
+ else if (target != new_target)
+ emit_move_insn (target, new_target);
return true;
case V8HImode:
vsimode = V2SImode;
goto widen;
widen:
+ if (one_var != 0)
+ return false;
+
/* Zero extend the variable element to SImode and recurse. */
var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
x = gen_reg_rtx (vsimode);
- if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
+ if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
+ var, one_var))
gcc_unreachable ();
emit_move_insn (target, gen_lowpart (mode, x));
if (!register_operand (op1, half_mode))
op1 = force_reg (half_mode, op1);
- emit_insn (gen_rtx_SET (VOIDmode, target,
+ emit_insn (gen_rtx_SET (VOIDmode, target,
gen_rtx_VEC_CONCAT (mode, op0, op1)));
}
else
}
}
-/* Initialize vector TARGET via VALS. Suppress the use of MMX
+/* Initialize vector TARGET via VALS. Suppress the use of MMX
instructions unless MMX_OK is true. */
void
the pool and overwritten via move later. */
if (n_var == 1)
{
- if (all_const_zero && one_var == 0
- && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
- XVECEXP (vals, 0, 0)))
+ if (all_const_zero
+ && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
+ XVECEXP (vals, 0, one_var),
+ one_var))
return;
if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
if (REG_P (operands[1])
&& find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
{
- if (REGNO (operands[0]) == FIRST_STACK_REG
- && TARGET_USE_FFREEP)
- return "ffreep\t%y0";
+ if (REGNO (operands[0]) == FIRST_STACK_REG)
+ return output_387_ffreep (operands, 0);
return "fstp\t%y0";
}
if (STACK_TOP_P (operands[0]))
{
if (flag_pic)
{
-int type = DW_EH_PE_sdata8;
+ int type = DW_EH_PE_sdata8;
if (!TARGET_64BIT
|| ix86_cmodel == CM_SMALL_PIC
|| (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))