/* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
instructions. */
~m_ATOM,
+
+ /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
+ at -O3. For the moment, the prefetching seems badly tuned for Intel
+ chips. */
+ m_K6_GEODE | m_AMD_MULTIPLE
};
/* Feature tests against the various architecture variations. */
return ret;
}
-/* Return TRUE if software prefetching is beneficial for the
- given CPU. */
-
-static bool
-software_prefetching_beneficial_p (void)
-{
- switch (ix86_tune)
- {
- case PROCESSOR_GEODE:
- case PROCESSOR_K6:
- case PROCESSOR_ATHLON:
- case PROCESSOR_K8:
- case PROCESSOR_AMDFAM10:
- case PROCESSOR_BTVER1:
- return true;
-
- default:
- return false;
- }
-}
-
/* Return true, if profiling code should be emitted before
prologue. Otherwise it returns false.
Note: For x86 with "hotfix" it is sorried. */
if (flag_prefetch_loop_arrays < 0
&& HAVE_prefetch
&& optimize >= 3
- && software_prefetching_beneficial_p ())
+ && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
flag_prefetch_loop_arrays = 1;
/* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
/* Free up memory allocated to hold the strings */
for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
- if (option_strings[i])
- free (option_strings[i]);
+ free (option_strings[i]);
}
return t;
return NULL_TREE;
}
- /* Can combine regparm with all attributes but fastcall. */
+ /* Can combine regparm with all attributes but fastcall, and thiscall. */
if (is_attribute_p ("regparm", name))
{
tree cst;
return NULL_TREE;
}
-/* This function checks if the method-function has default __thiscall
- calling-convention for 32-bit msabi.
- It returns true if TYPE is of kind METHOD_TYPE, no stdarg function,
- and the MS_ABI 32-bit is used. Otherwise it returns false. */
+/* This function determines from TYPE the calling-convention. */
-static bool
-ix86_is_msabi_thiscall (const_tree type)
+unsigned int
+ix86_get_callcvt (const_tree type)
{
- if (TARGET_64BIT || ix86_function_type_abi (type) != MS_ABI
- || TREE_CODE (type) != METHOD_TYPE || stdarg_p (type))
- return false;
- /* Check for different calling-conventions. */
- if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (type))
- || lookup_attribute ("stdcall", TYPE_ATTRIBUTES (type))
- || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))
- || lookup_attribute ("regparm", TYPE_ATTRIBUTES (type))
- || lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))
- return false;
- return true;
-}
+ unsigned int ret = 0;
+ bool is_stdarg;
+ tree attrs;
-/* This function checks if the thiscall attribute is set for the TYPE,
- or if it is an method-type with default thiscall convention.
- It returns true if function match, otherwise false is returned. */
+ if (TARGET_64BIT)
+ return IX86_CALLCVT_CDECL;
-static bool
-ix86_is_type_thiscall (const_tree type)
-{
- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type))
- || ix86_is_msabi_thiscall (type))
- return true;
- return false;
+ attrs = TYPE_ATTRIBUTES (type);
+ if (attrs != NULL_TREE)
+ {
+ if (lookup_attribute ("cdecl", attrs))
+ ret |= IX86_CALLCVT_CDECL;
+ else if (lookup_attribute ("stdcall", attrs))
+ ret |= IX86_CALLCVT_STDCALL;
+ else if (lookup_attribute ("fastcall", attrs))
+ ret |= IX86_CALLCVT_FASTCALL;
+ else if (lookup_attribute ("thiscall", attrs))
+ ret |= IX86_CALLCVT_THISCALL;
+
+ /* Regparam isn't allowed for thiscall and fastcall. */
+ if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
+ {
+ if (lookup_attribute ("regparm", attrs))
+ ret |= IX86_CALLCVT_REGPARM;
+ if (lookup_attribute ("sseregparm", attrs))
+ ret |= IX86_CALLCVT_SSEREGPARM;
+ }
+
+ if (IX86_BASE_CALLCVT(ret) != 0)
+ return ret;
+ }
+
+ is_stdarg = stdarg_p (type);
+ if (TARGET_RTD && !is_stdarg)
+ return IX86_CALLCVT_STDCALL | ret;
+
+ if (ret != 0
+ || is_stdarg
+ || TREE_CODE (type) != METHOD_TYPE
+ || ix86_function_type_abi (type) != MS_ABI)
+ return IX86_CALLCVT_CDECL | ret;
+
+ return IX86_CALLCVT_THISCALL;
}
/* Return 0 if the attributes for two types are incompatible, 1 if they
static int
ix86_comp_type_attributes (const_tree type1, const_tree type2)
{
- /* Check for mismatch of non-default calling convention. */
- bool is_thiscall = ix86_is_msabi_thiscall (type1);
- const char *const rtdstr = TARGET_RTD ? (is_thiscall ? "thiscall" : "cdecl") : "stdcall";
+ unsigned int ccvt1, ccvt2;
if (TREE_CODE (type1) != FUNCTION_TYPE
&& TREE_CODE (type1) != METHOD_TYPE)
return 1;
- /* Check for mismatched fastcall/regparm types. */
- if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
- != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
- || (ix86_function_regparm (type1, NULL)
- != ix86_function_regparm (type2, NULL)))
- return 0;
-
- /* Check for mismatched sseregparm types. */
- if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
- != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
+ ccvt1 = ix86_get_callcvt (type1);
+ ccvt2 = ix86_get_callcvt (type2);
+ if (ccvt1 != ccvt2)
return 0;
-
- /* Check for mismatched thiscall types. */
- if (is_thiscall && !TARGET_RTD)
- {
- if (!lookup_attribute ("cdecl", TYPE_ATTRIBUTES (type1))
- != !lookup_attribute ("cdecl", TYPE_ATTRIBUTES (type2)))
- return 0;
- }
- else if (!is_thiscall || TARGET_RTD)
- {
- if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
- != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
- return 0;
- }
-
- /* Check for mismatched return types (cdecl vs stdcall). */
- if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
- != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
+ if (ix86_function_regparm (type1, NULL)
+ != ix86_function_regparm (type2, NULL))
return 0;
return 1;
{
tree attr;
int regparm;
+ unsigned int ccvt;
if (TARGET_64BIT)
return (ix86_function_type_abi (type) == SYSV_ABI
? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
-
+ ccvt = ix86_get_callcvt (type);
regparm = ix86_regparm;
- attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
- if (attr)
+
+ if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
{
- regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
- return regparm;
+ attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
+ if (attr)
+ {
+ regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
+ return regparm;
+ }
}
-
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
+ else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
return 2;
-
- if (ix86_is_type_thiscall (type))
+ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
return 1;
/* Use register calling convention for local functions when possible. */
static int
ix86_return_pops_args (tree fundecl, tree funtype, int size)
{
- int rtd;
+ unsigned int ccvt;
/* None of the 64-bit ABIs pop arguments. */
if (TARGET_64BIT)
return 0;
- rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
-
- /* Cdecl functions override -mrtd, and never pop the stack. */
- if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
- {
- /* Stdcall and fastcall functions will pop the stack if not
- variable args. */
- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
- || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
- || ix86_is_type_thiscall (funtype))
- rtd = 1;
+ ccvt = ix86_get_callcvt (funtype);
- if (rtd && ! stdarg_p (funtype))
- return size;
- }
+ if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
+ | IX86_CALLCVT_THISCALL)) != 0
+ && ! stdarg_p (funtype))
+ return size;
/* Lose any fake structure return argument if it is passed on the stack. */
if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
enum calling_abi
ix86_function_type_abi (const_tree fntype)
{
- if (fntype != NULL)
+ if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
{
enum calling_abi abi = ix86_abi;
if (abi == SYSV_ABI)
else look for regparm information. */
if (fntype)
{
- if (ix86_is_type_thiscall (fntype))
+ unsigned int ccvt = ix86_get_callcvt (fntype);
+ if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
{
cum->nregs = 1;
cum->fastcall = 1; /* Same first register as in fastcall. */
}
- else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
+ else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
{
cum->nregs = 2;
cum->fastcall = 1;
decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
get_identifier (name),
- build_function_type (void_type_node, void_list_node));
+ build_function_type_list (void_type_node, NULL_TREE));
DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
NULL_TREE, void_type_node);
TREE_PUBLIC (decl) = 1;
/* Return 1 if we need to save REGNO. */
static int
-ix86_save_reg (unsigned int regno, int maybe_eh_return)
+ix86_save_reg (unsigned int regno, bool maybe_eh_return)
{
if (pic_offset_table_rtx
&& regno == REAL_PIC_OFFSET_TABLE_REGNUM
/* Reuse static chain register if it isn't used for parameter
passing. */
- if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
- && !lookup_attribute ("fastcall",
- TYPE_ATTRIBUTES (TREE_TYPE (decl)))
- && !ix86_is_type_thiscall (TREE_TYPE (decl)))
- return CX_REG;
- else
- return DI_REG;
+ if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
+ {
+ unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
+ if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
+ return CX_REG;
+ }
+ return DI_REG;
}
}
}
/* The stack has already been decremented by the instruction calling us
- so we need to probe unconditionally to preserve the protection area. */
- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+ so probe if the size is non-negative to preserve the protection area. */
+ if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
{
/* We expect the registers to be saved when probes are used. */
gcc_assert (int_registers_saved);
First register is restored from CFA - CFA_OFFSET. */
static void
ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
- int maybe_eh_return)
+ bool maybe_eh_return)
{
struct machine_function *m = cfun->machine;
unsigned int regno;
First register is restored from CFA - CFA_OFFSET. */
static void
ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
- int maybe_eh_return)
+ bool maybe_eh_return)
{
unsigned int regno;
/* Determine if a given RTX is a valid constant. We already know this
satisfies CONSTANT_P. */
-bool
-legitimate_constant_p (rtx x)
+static bool
+ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
switch (GET_CODE (x))
{
is checked above. */
static bool
-ix86_cannot_force_const_mem (rtx x)
+ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
{
/* We can always put integral constants and vectors in memory. */
switch (GET_CODE (x))
default:
break;
}
- return !legitimate_constant_p (x);
+ return !ix86_legitimate_constant_p (mode, x);
}
/* Displacement is an invalid pic construct. */
return false;
#if TARGET_MACHO
- else if (MACHO_DYNAMIC_NO_PIC_P && !legitimate_constant_p (disp))
+ else if (MACHO_DYNAMIC_NO_PIC_P
+ && !ix86_legitimate_constant_p (Pmode, disp))
/* displacment must be referenced via non_lazy_pointer */
return false;
#endif
else if (GET_CODE (disp) != LABEL_REF
&& !CONST_INT_P (disp)
&& (GET_CODE (disp) != CONST
- || !legitimate_constant_p (disp))
+ || !ix86_legitimate_constant_p (Pmode, disp))
&& (GET_CODE (disp) != SYMBOL_REF
- || !legitimate_constant_p (disp)))
+ || !ix86_legitimate_constant_p (Pmode, disp)))
/* Displacement is not constant. */
return false;
else if (TARGET_64BIT
/* Load the thread pointer. If TO_REG is true, force it into a register. */
static rtx
-get_thread_pointer (int to_reg)
+get_thread_pointer (bool to_reg)
{
rtx tp, reg, insn;
return reg;
}
+/* Construct the SYMBOL_REF for the tls_get_addr function. */
+
+static GTY(()) rtx ix86_tls_symbol;
+
+static rtx
+ix86_tls_get_addr (void)
+{
+ if (!ix86_tls_symbol)
+ {
+ const char *sym
+ = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
+ ? "___tls_get_addr" : "__tls_get_addr");
+
+ ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
+ }
+
+ return ix86_tls_symbol;
+}
+
+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
+
+static GTY(()) rtx ix86_tls_module_base_symbol;
+
+rtx
+ix86_tls_module_base (void)
+{
+ if (!ix86_tls_module_base_symbol)
+ {
+ ix86_tls_module_base_symbol
+ = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
+
+ SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
+ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
+ }
+
+ return ix86_tls_module_base_symbol;
+}
+
/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
false if we expect this to be used for a memory address and true if
we expect to load the address into a register. */
static rtx
-legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
+legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
{
- rtx dest, base, off, pic, tp;
+ rtx dest, base, off;
+ rtx pic = NULL_RTX, tp = NULL_RTX;
int type;
switch (model)
{
case TLS_MODEL_GLOBAL_DYNAMIC:
dest = gen_reg_rtx (Pmode);
- tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
- if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+ if (!TARGET_64BIT)
{
- rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
-
- start_sequence ();
- emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
- insns = get_insns ();
- end_sequence ();
-
- RTL_CONST_CALL_P (insns) = 1;
- emit_libcall_block (insns, dest, rax, x);
+ if (flag_pic)
+ pic = pic_offset_table_rtx;
+ else
+ {
+ pic = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (pic));
+ }
}
- else if (TARGET_64BIT && TARGET_GNU2_TLS)
- emit_insn (gen_tls_global_dynamic_64 (dest, x));
- else
- emit_insn (gen_tls_global_dynamic_32 (dest, x));
if (TARGET_GNU2_TLS)
{
+ if (TARGET_64BIT)
+ emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
+ else
+ emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
+
+ tp = get_thread_pointer (true);
dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
}
+ else
+ {
+ rtx caddr = ix86_tls_get_addr ();
+
+ if (TARGET_64BIT)
+ {
+ rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
+
+ start_sequence ();
+ emit_call_insn (gen_tls_global_dynamic_64 (rax, x, caddr));
+ insns = get_insns ();
+ end_sequence ();
+
+ RTL_CONST_CALL_P (insns) = 1;
+ emit_libcall_block (insns, dest, rax, x);
+ }
+ else
+ emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
+ }
break;
case TLS_MODEL_LOCAL_DYNAMIC:
base = gen_reg_rtx (Pmode);
- tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
- if (TARGET_64BIT && ! TARGET_GNU2_TLS)
+ if (!TARGET_64BIT)
{
- rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
-
- start_sequence ();
- emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
- insns = get_insns ();
- end_sequence ();
-
- note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
- note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
- RTL_CONST_CALL_P (insns) = 1;
- emit_libcall_block (insns, base, rax, note);
+ if (flag_pic)
+ pic = pic_offset_table_rtx;
+ else
+ {
+ pic = gen_reg_rtx (Pmode);
+ emit_insn (gen_set_got (pic));
+ }
}
- else if (TARGET_64BIT && TARGET_GNU2_TLS)
- emit_insn (gen_tls_local_dynamic_base_64 (base));
- else
- emit_insn (gen_tls_local_dynamic_base_32 (base));
if (TARGET_GNU2_TLS)
{
- rtx x = ix86_tls_module_base ();
+ rtx tmp = ix86_tls_module_base ();
+
+ if (TARGET_64BIT)
+ emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
+ else
+ emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
+ tp = get_thread_pointer (true);
set_unique_reg_note (get_last_insn (), REG_EQUIV,
- gen_rtx_MINUS (Pmode, x, tp));
+ gen_rtx_MINUS (Pmode, tmp, tp));
+ }
+ else
+ {
+ rtx caddr = ix86_tls_get_addr ();
+
+ if (TARGET_64BIT)
+ {
+ rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, eqv;
+
+ start_sequence ();
+ emit_call_insn (gen_tls_local_dynamic_base_64 (rax, caddr));
+ insns = get_insns ();
+ end_sequence ();
+
+ /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+ share the LD_BASE result with other LD model accesses. */
+ eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_TLS_LD_BASE);
+
+ RTL_CONST_CALL_P (insns) = 1;
+ emit_libcall_block (insns, base, rax, eqv);
+ }
+ else
+ emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
}
off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
}
-
break;
case TLS_MODEL_INITIAL_EXEC:
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_avx_movups (op0, op1));
+ emit_insn (gen_sse_movups (op0, op1));
return;
}
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
- emit_insn (gen_avx_movdqu (op0, op1));
+ emit_insn (gen_sse2_movdqu (op0, op1));
break;
case 32:
op0 = gen_lowpart (V32QImode, op0);
switch (mode)
{
case V4SFmode:
- emit_insn (gen_avx_movups (op0, op1));
+ emit_insn (gen_sse_movups (op0, op1));
break;
case V8SFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
- emit_insn (gen_avx_movups (op0, op1));
+ emit_insn (gen_sse_movups (op0, op1));
return;
}
- emit_insn (gen_avx_movupd (op0, op1));
+ emit_insn (gen_sse2_movupd (op0, op1));
break;
case V4DFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
if (req_mode == CCZmode)
return false;
/* FALLTHRU */
+ case CCZmode:
+ break;
+
case CCAmode:
case CCCmode:
case CCOmode:
case CCSmode:
- case CCZmode:
+ if (set_mode != req_mode)
+ return false;
break;
default:
ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
{
enum machine_mode imode = GET_MODE (operands[1]);
- rtx (*unpack)(rtx, rtx, rtx);
- rtx se, dest;
+ rtx tmp, dest;
- switch (imode)
+ if (TARGET_SSE4_1)
{
- case V16QImode:
- if (high_p)
- unpack = gen_vec_interleave_highv16qi;
- else
- unpack = gen_vec_interleave_lowv16qi;
- break;
- case V8HImode:
- if (high_p)
- unpack = gen_vec_interleave_highv8hi;
- else
- unpack = gen_vec_interleave_lowv8hi;
- break;
- case V4SImode:
+ rtx (*unpack)(rtx, rtx);
+
+ switch (imode)
+ {
+ case V16QImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv8qiv8hi2;
+ else
+ unpack = gen_sse4_1_sign_extendv8qiv8hi2;
+ break;
+ case V8HImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv4hiv4si2;
+ else
+ unpack = gen_sse4_1_sign_extendv4hiv4si2;
+ break;
+ case V4SImode:
+ if (unsigned_p)
+ unpack = gen_sse4_1_zero_extendv2siv2di2;
+ else
+ unpack = gen_sse4_1_sign_extendv2siv2di2;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
if (high_p)
- unpack = gen_vec_interleave_highv4si;
+ {
+ /* Shift higher 8 bytes to lower 8 bytes. */
+ tmp = gen_reg_rtx (imode);
+ emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
+ gen_lowpart (V1TImode, operands[1]),
+ GEN_INT (64)));
+ }
else
- unpack = gen_vec_interleave_lowv4si;
- break;
- default:
- gcc_unreachable ();
- }
-
- dest = gen_lowpart (imode, operands[0]);
+ tmp = operands[1];
- if (unsigned_p)
- se = force_reg (imode, CONST0_RTX (imode));
+ emit_insn (unpack (operands[0], tmp));
+ }
else
- se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
- operands[1], pc_rtx, pc_rtx);
-
- emit_insn (unpack (dest, operands[1], se));
-}
+ {
+ rtx (*unpack)(rtx, rtx, rtx);
-/* This function performs the same task as ix86_expand_sse_unpack,
- but with SSE4.1 instructions. */
+ switch (imode)
+ {
+ case V16QImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv16qi;
+ else
+ unpack = gen_vec_interleave_lowv16qi;
+ break;
+ case V8HImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv8hi;
+ else
+ unpack = gen_vec_interleave_lowv8hi;
+ break;
+ case V4SImode:
+ if (high_p)
+ unpack = gen_vec_interleave_highv4si;
+ else
+ unpack = gen_vec_interleave_lowv4si;
+ break;
+ default:
+ gcc_unreachable ();
+ }
-void
-ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
-{
- enum machine_mode imode = GET_MODE (operands[1]);
- rtx (*unpack)(rtx, rtx);
- rtx src, dest;
+ dest = gen_lowpart (imode, operands[0]);
- switch (imode)
- {
- case V16QImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv8qiv8hi2;
- else
- unpack = gen_sse4_1_sign_extendv8qiv8hi2;
- break;
- case V8HImode:
if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv4hiv4si2;
+ tmp = force_reg (imode, CONST0_RTX (imode));
else
- unpack = gen_sse4_1_sign_extendv4hiv4si2;
- break;
- case V4SImode:
- if (unsigned_p)
- unpack = gen_sse4_1_zero_extendv2siv2di2;
- else
- unpack = gen_sse4_1_sign_extendv2siv2di2;
- break;
- default:
- gcc_unreachable ();
- }
+ tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
+ operands[1], pc_rtx, pc_rtx);
- dest = operands[0];
- if (high_p)
- {
- /* Shift higher 8 bytes to lower 8 bytes. */
- src = gen_reg_rtx (imode);
- emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
- gen_lowpart (V1TImode, operands[1]),
- GEN_INT (64)));
+ emit_insn (unpack (dest, operands[1], tmp));
}
- else
- src = operands[1];
-
- emit_insn (unpack (dest, src));
}
/* Expand conditional increment or decrement using adb/sbb instructions.
ix86_stack_locals = s;
return s->rtl;
}
-
-/* Construct the SYMBOL_REF for the tls_get_addr function. */
-
-static GTY(()) rtx ix86_tls_symbol;
-rtx
-ix86_tls_get_addr (void)
-{
-
- if (!ix86_tls_symbol)
- {
- ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
- (TARGET_ANY_GNU_TLS
- && !TARGET_64BIT)
- ? "___tls_get_addr"
- : "__tls_get_addr");
- }
-
- return ix86_tls_symbol;
-}
-
-/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
-
-static GTY(()) rtx ix86_tls_module_base_symbol;
-rtx
-ix86_tls_module_base (void)
-{
-
- if (!ix86_tls_module_base_symbol)
- {
- ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
- "_TLS_MODULE_BASE_");
- SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
- |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
- }
-
- return ix86_tls_module_base_symbol;
-}
\f
/* Calculate the length of the memory address in the instruction
encoding. Does not include the one-byte modrm, opcode, or prefix. */
else
{
tree fntype;
+ unsigned int ccvt;
+
/* By default in 32-bit mode we use ECX to pass the static chain. */
regno = CX_REG;
fntype = TREE_TYPE (fndecl);
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
+ ccvt = ix86_get_callcvt (fntype);
+ if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
{
/* Fastcall functions use ecx/edx for arguments, which leaves
- us with EAX for the static chain. */
- regno = AX_REG;
- }
- else if (ix86_is_type_thiscall (fntype))
- {
- /* Thiscall functions use ecx for arguments, which leaves
- us with EAX for the static chain. */
+ us with EAX for the static chain.
+ Thiscall functions use ecx for arguments, which also
+ leaves us with EAX for the static chain. */
regno = AX_REG;
}
else if (ix86_function_regparm (fntype, fndecl) == 3)
if (nregs > 0 && !stdarg_p (type))
{
int regno;
+ unsigned int ccvt = ix86_get_callcvt (type);
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
+ if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
regno = aggr ? DX_REG : CX_REG;
- else if (ix86_is_type_thiscall (type))
+ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
{
regno = CX_REG;
if (aggr)
else
{
int tmp_regno = CX_REG;
- if (lookup_attribute ("fastcall",
- TYPE_ATTRIBUTES (TREE_TYPE (function)))
- || ix86_is_type_thiscall (TREE_TYPE (function)))
+ unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
+ if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) != 0)
tmp_regno = AX_REG;
tmp = gen_rtx_REG (SImode, tmp_regno);
}
with old MDEP_REORGS that are not CFG based. Recompute it now. */
compute_bb_for_insn ();
+ /* Run the vzeroupper optimization if needed. */
+ if (TARGET_VZEROUPPER)
+ move_or_delete_vzeroupper ();
+
if (optimize && optimize_function_for_speed_p (cfun))
{
if (TARGET_PAD_SHORT_FUNCTION)
ix86_avoid_jump_mispredicts ();
#endif
}
-
- /* Run the vzeroupper optimization if needed. */
- if (TARGET_VZEROUPPER)
- move_or_delete_vzeroupper ();
}
/* Return nonzero when QImode register that must be represented via REX prefix
break;
case V2DImode:
- use_vec_merge = TARGET_SSE4_1;
+ use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
if (use_vec_merge)
break;
+ tmp = gen_reg_rtx (GET_MODE_INNER (mode));
+ ix86_expand_vector_extract (false, tmp, target, 1 - elt);
+ if (elt == 0)
+ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
+ else
+ tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
+ emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
+ return;
+
case V2DFmode:
{
rtx op0, op1;
/* ... or we use the special-case patterns. */
expand_vec_perm_even_odd_1 (&d, odd);
}
+
+/* Expand an insert into a vector register through pinsr insn.
+ Return true if successful. */
+
+bool
+ix86_expand_pinsr (rtx *operands)
+{
+ rtx dst = operands[0];
+ rtx src = operands[3];
+
+ unsigned int size = INTVAL (operands[1]);
+ unsigned int pos = INTVAL (operands[2]);
+
+ if (GET_CODE (dst) == SUBREG)
+ {
+ pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
+ dst = SUBREG_REG (dst);
+ }
+
+ if (GET_CODE (src) == SUBREG)
+ src = SUBREG_REG (src);
+
+ switch (GET_MODE (dst))
+ {
+ case V16QImode:
+ case V8HImode:
+ case V4SImode:
+ case V2DImode:
+ {
+ enum machine_mode srcmode, dstmode;
+ rtx (*pinsr)(rtx, rtx, rtx, rtx);
+
+ srcmode = mode_for_size (size, MODE_INT, 0);
+
+ switch (srcmode)
+ {
+ case QImode:
+ if (!TARGET_SSE4_1)
+ return false;
+ dstmode = V16QImode;
+ pinsr = gen_sse4_1_pinsrb;
+ break;
+
+ case HImode:
+ if (!TARGET_SSE2)
+ return false;
+ dstmode = V8HImode;
+ pinsr = gen_sse2_pinsrw;
+ break;
+
+ case SImode:
+ if (!TARGET_SSE4_1)
+ return false;
+ dstmode = V4SImode;
+ pinsr = gen_sse4_1_pinsrd;
+ break;
+
+ case DImode:
+ gcc_assert (TARGET_64BIT);
+ if (!TARGET_SSE4_1)
+ return false;
+ dstmode = V2DImode;
+ pinsr = gen_sse4_1_pinsrq;
+ break;
+
+ default:
+ return false;
+ }
+
+ dst = gen_lowpart (dstmode, dst);
+ src = gen_lowpart (srcmode, src);
+
+ pos /= size;
+
+ emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
+ return true;
+ }
+
+ default:
+ return false;
+ }
+}
\f
/* This function returns the calling abi specific va_list type node.
It returns the FNDECL specific va_list type. */
static enum machine_mode
ix86_preferred_simd_mode (enum machine_mode mode)
{
- /* Disable double precision vectorizer if needed. */
- if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
- return word_mode;
-
- if (!TARGET_AVX && !TARGET_SSE)
+ if (!TARGET_SSE)
return word_mode;
switch (mode)
{
- case SFmode:
- return (TARGET_AVX && !flag_prefer_avx128) ? V8SFmode : V4SFmode;
- case DFmode:
- return (TARGET_AVX && !flag_prefer_avx128) ? V4DFmode : V2DFmode;
- case DImode:
- return V2DImode;
- case SImode:
- return V4SImode;
- case HImode:
- return V8HImode;
case QImode:
return V16QImode;
+ case HImode:
+ return V8HImode;
+ case SImode:
+ return V4SImode;
+ case DImode:
+ return V2DImode;
- default:;
- }
+ case SFmode:
+ if (TARGET_AVX && !flag_prefer_avx128)
+ return V8SFmode;
+ else
+ return V4SFmode;
+
+ case DFmode:
+ if (!TARGET_VECTORIZE_DOUBLE)
+ return word_mode;
+ else if (TARGET_AVX && !flag_prefer_avx128)
+ return V4DFmode;
+ else if (TARGET_SSE2)
+ return V2DFmode;
+ /* FALLTHRU */
- return word_mode;
+ default:
+ return word_mode;
+ }
}
/* If AVX is enabled then try vectorizing with both 256bit and 128bit
static unsigned int
ix86_autovectorize_vector_sizes (void)
{
- return TARGET_AVX ? 32 | 16 : 0;
+ return (TARGET_AVX && !flag_prefer_avx128) ? 32 | 16 : 0;
}
/* Initialize the GCC target structure. */
#undef TARGET_LEGITIMATE_ADDRESS_P
#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
+
#undef TARGET_FRAME_POINTER_REQUIRED
#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required