\f
static struct machine_function * ix86_init_machine_status (void);
static rtx ix86_function_value (const_tree, const_tree, bool);
+static bool ix86_function_value_regno_p (const unsigned int);
static rtx ix86_static_chain (const_tree, bool);
static int ix86_function_regparm (const_tree, const_tree);
static void ix86_compute_frame_layout (struct ix86_frame *);
if (isa && add_nl_p)
{
opts[num++][0] = isa_other;
- sprintf (isa_other, "(other isa: 0x%x)", isa);
+ sprintf (isa_other, "(other isa: %#x)", isa);
}
/* Add flag options. */
if (flags && add_nl_p)
{
opts[num++][0] = target_other;
- sprintf (target_other, "(other flags: 0x%x)", isa);
+ sprintf (target_other, "(other flags: %#x)", flags);
}
/* Add -fpmath= option. */
return true;
}
-/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
- calling convention attributes;
+/* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
+ and "sseregparm" calling convention attributes;
arguments as in struct attribute_spec.handler. */
static tree
error ("fastcall and regparm attributes are not compatible");
}
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("regparam and thiscall attributes are not compatible");
+ }
+
cst = TREE_VALUE (args);
if (TREE_CODE (cst) != INTEGER_CST)
{
{
error ("fastcall and regparm attributes are not compatible");
}
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and thiscall attributes are not compatible");
+ }
}
/* Can combine stdcall with fastcall (redundant), regparm and
{
error ("stdcall and fastcall attributes are not compatible");
}
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("stdcall and thiscall attributes are not compatible");
+ }
}
/* Can combine cdecl with regparm and sseregparm. */
{
error ("fastcall and cdecl attributes are not compatible");
}
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("cdecl and thiscall attributes are not compatible");
+ }
+ }
+ else if (is_attribute_p ("thiscall", name))
+ {
+ if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
+ warning (OPT_Wattributes, "%qE attribute is used for none class-method",
+ name);
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("stdcall and thiscall attributes are not compatible");
+ }
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and thiscall attributes are not compatible");
+ }
+ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("cdecl and thiscall attributes are not compatible");
+ }
}
/* Can combine sseregparm with all attributes. */
!= !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
return 0;
+ /* Check for mismatched thiscall types. */
+ if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
+ return 0;
+
/* Check for mismatched return types (cdecl vs stdcall). */
if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
!= !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
return 2;
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
+ return 1;
+
/* Use register calling convention for local functions when possible. */
if (decl
&& TREE_CODE (decl) == FUNCTION_DECL
/* Stdcall and fastcall functions will pop the stack if not
variable args. */
if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
- || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
+ || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
+ || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
rtd = 1;
if (rtd && ! stdarg_p (funtype))
else look for regparm information. */
if (fntype)
{
- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
+ {
+ cum->nregs = 1;
+ cum->fastcall = 1; /* Same first register as in fastcall. */
+ }
+ else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
{
cum->nregs = 2;
cum->fastcall = 1;
/* Return true if N is a possible register number of function value. */
-bool
-ix86_function_value_regno_p (int regno)
+static bool
+ix86_function_value_regno_p (const unsigned int regno)
{
switch (regno)
{
{
rtx save_area, mem;
rtx label;
- rtx label_ref;
rtx tmp_reg;
rtx nsse_reg;
alias_set_type set;
SSE saves. We need some preparation work to get this working. */
label = gen_label_rtx ();
- label_ref = gen_rtx_LABEL_REF (Pmode, label);
- /* Compute address to jump to :
- label - eax*4 + nnamed_sse_arguments*4 Or
- label - eax*5 + nnamed_sse_arguments*5 for AVX. */
- tmp_reg = gen_reg_rtx (Pmode);
nsse_reg = gen_reg_rtx (Pmode);
emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- gen_rtx_MULT (Pmode, nsse_reg,
- GEN_INT (4))));
-
- /* vmovaps is one byte longer than movaps. */
- if (TARGET_AVX)
- emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
- gen_rtx_PLUS (Pmode, tmp_reg,
- nsse_reg)));
-
- if (cum->sse_regno)
- emit_move_insn
- (nsse_reg,
- gen_rtx_CONST (DImode,
- gen_rtx_PLUS (DImode,
- label_ref,
- GEN_INT (cum->sse_regno
- * (TARGET_AVX ? 5 : 4)))));
- else
- emit_move_insn (nsse_reg, label_ref);
- emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
/* Compute address of memory block we save into. We always use pointer
pointing 127 bytes after first byte to store - this is needed to keep
mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, set);
- set_mem_align (mem, BITS_PER_WORD);
+ set_mem_align (mem, 64);
/* And finally do the dirty job! */
emit_insn (gen_sse_prologue_save (mem, nsse_reg,
- GEN_INT (cum->sse_regno), label));
+ GEN_INT (cum->sse_regno), label,
+ gen_reg_rtx (Pmode)));
}
}
int indirect_p = 0;
tree ptrtype;
enum machine_mode nat_mode;
- int arg_boundary;
+ unsigned int arg_boundary;
/* Only 64bit target needs something special. */
if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
size_int (-align));
t = fold_convert (TREE_TYPE (ovf), t);
+ if (crtl->stack_alignment_needed < arg_boundary)
+ crtl->stack_alignment_needed = arg_boundary;
}
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
&& cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
{
int count = frame->nregs;
+ struct cgraph_node *node = cgraph_node (current_function_decl);
cfun->machine->use_fast_prologue_epilogue_nregs = count;
/* The fast prologue uses move instead of push to save registers. This
slow to use many of them. */
if (count)
count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
- if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
+ if (node->frequency < NODE_FREQUENCY_NORMAL
|| (flag_branch_probabilities
- && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
+ && node->frequency < NODE_FREQUENCY_HOT))
cfun->machine->use_fast_prologue_epilogue = false;
else
cfun->machine->use_fast_prologue_epilogue
passing. */
if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
&& !lookup_attribute ("fastcall",
+ TYPE_ATTRIBUTES (TREE_TYPE (decl)))
+ && !lookup_attribute ("thiscall",
TYPE_ATTRIBUTES (TREE_TYPE (decl))))
return CX_REG;
else
rtx base_reg, index_reg;
HOST_WIDE_INT scale = 1;
rtx scale_rtx = NULL_RTX;
+ rtx tmp;
int retval = 1;
enum ix86_address_seg seg = SEG_DEFAULT;
scale_rtx = XEXP (op, 1);
break;
+ case ASHIFT:
+ if (index)
+ return 0;
+ index = XEXP (op, 0);
+ tmp = XEXP (op, 1);
+ if (!CONST_INT_P (tmp))
+ return 0;
+ scale = INTVAL (tmp);
+ if ((unsigned HOST_WIDE_INT) scale > 3)
+ return 0;
+ scale = 1 << scale;
+ break;
+
case UNSPEC:
if (XINT (op, 1) == UNSPEC_TP
&& TARGET_TLS_DIRECT_SEG_REFS
}
else if (GET_CODE (addr) == ASHIFT)
{
- rtx tmp;
-
/* We're called for lea too, which implements ashift on occasion. */
index = XEXP (addr, 0);
tmp = XEXP (addr, 1);
return cfun->machine->some_ld_name;
for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
- if (INSN_P (insn)
+ if (NONDEBUG_INSN_P (insn)
&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
return cfun->machine->some_ld_name;
rtx prev = PREV_INSN (insn);
while (prev && distance < LEA_SEARCH_THRESHOLD)
{
- if (INSN_P (prev))
+ if (NONDEBUG_INSN_P (prev))
{
distance++;
for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
&& prev != insn
&& distance < LEA_SEARCH_THRESHOLD)
{
- if (INSN_P (prev))
+ if (NONDEBUG_INSN_P (prev))
{
distance++;
for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
rtx next = NEXT_INSN (insn);
while (next && distance < LEA_SEARCH_THRESHOLD)
{
- if (INSN_P (next))
+ if (NONDEBUG_INSN_P (next))
{
distance++;
&& next != insn
&& distance < LEA_SEARCH_THRESHOLD)
{
- if (INSN_P (next))
+ if (NONDEBUG_INSN_P (next))
{
distance++;
: gen_x86_64_shld) (high[0], low[0], operands[2]));
}
- emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
+ emit_insn ((mode == DImode
+ ? gen_ashlsi3
+ : gen_ashldi3) (low[0], low[0], operands[2]));
if (TARGET_CMOVE && scratch)
{
ix86_expand_clear (scratch);
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_1
- : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
- scratch));
+ ? gen_x86_shiftsi_adj_1
+ : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
+ scratch));
}
else
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_2
- : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
+ ? gen_x86_shiftsi_adj_2
+ : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
}
void
: gen_ashrdi3) (scratch, scratch,
GEN_INT (single_width - 1)));
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_1
- : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
- scratch));
+ ? gen_x86_shiftsi_adj_1
+ : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
+ scratch));
}
else
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_3
- : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
+ ? gen_x86_shiftsi_adj_3
+ : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
}
}
{
ix86_expand_clear (scratch);
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_1
- : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
- scratch));
+ ? gen_x86_shiftsi_adj_1
+ : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
+ scratch));
}
else
emit_insn ((mode == DImode
- ? gen_x86_shift_adj_2
- : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
+ ? gen_x86_shiftsi_adj_2
+ : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
}
}
}
/* x86-64 ABI requires arrays greater than 16 bytes to be aligned
- to 16byte boundary. */
- if (TARGET_64BIT)
+ to 16byte boundary. Exact wording is:
+
+ An array uses the same alignment as its elements, except that a local or
+ global array variable of length at least 16 bytes or
+ a C99 variable-length array variable always has alignment of at least 16 bytes.
+
+ This was added to allow use of aligned SSE instructions at arrays. This
+ rule is meant for static storage (where compiler can not do the analysis
+ by itself). We follow it for automatic variables only when convenient.
+ We fully control everything in the function compiled and functions from
+ other unit can not rely on the alignment.
+
+ Exclude va_list type. It is the common case of local array where
+ we can not benefit from the alignment. */
+ if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
+ && TARGET_SSE)
{
if (AGGREGATE_TYPE_P (type)
+ && (TYPE_MAIN_VARIANT (type)
+ != TYPE_MAIN_VARIANT (va_list_type_node))
&& TYPE_SIZE (type)
&& TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
&& (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
us with EAX for the static chain. */
regno = AX_REG;
}
+ else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
+ {
+ /* Thiscall functions use ecx for arguments, which leaves
+ us with EAX for the static chain. */
+ regno = AX_REG;
+ }
else if (ix86_function_regparm (fntype, fndecl) == 3)
{
/* For regparm 3, we have no free call-clobbered registers in
/* Returns a decl of a function that implements conversion of an integer vector
- into a floating-point vector, or vice-versa. TYPE is the type of the integer
- side of the conversion.
+ into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
+ are the types involved when converting according to CODE.
Return NULL_TREE if it is not available. */
static tree
-ix86_vectorize_builtin_conversion (unsigned int code, tree type)
+ix86_vectorize_builtin_conversion (unsigned int code,
+ tree dest_type, tree src_type)
{
- if (! (TARGET_SSE2 && TREE_CODE (type) == VECTOR_TYPE))
+ if (! TARGET_SSE2)
return NULL_TREE;
switch (code)
{
case FLOAT_EXPR:
- switch (TYPE_MODE (type))
+ switch (TYPE_MODE (src_type))
{
case V4SImode:
- return TYPE_UNSIGNED (type)
- ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
- : ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
+ switch (TYPE_MODE (dest_type))
+ {
+ case V4SFmode:
+ return (TYPE_UNSIGNED (src_type)
+ ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
+ : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
+ case V4DFmode:
+ return (TYPE_UNSIGNED (src_type)
+ ? NULL_TREE
+ : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
+ default:
+ return NULL_TREE;
+ }
+ break;
+ case V8SImode:
+ switch (TYPE_MODE (dest_type))
+ {
+ case V8SFmode:
+ return (TYPE_UNSIGNED (src_type)
+ ? NULL_TREE
+ : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
+ default:
+ return NULL_TREE;
+ }
+ break;
default:
return NULL_TREE;
}
case FIX_TRUNC_EXPR:
- switch (TYPE_MODE (type))
+ switch (TYPE_MODE (dest_type))
{
case V4SImode:
- return TYPE_UNSIGNED (type)
- ? NULL_TREE
- : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
+ switch (TYPE_MODE (src_type))
+ {
+ case V4SFmode:
+ return (TYPE_UNSIGNED (dest_type)
+ ? NULL_TREE
+ : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
+ case V4DFmode:
+ return (TYPE_UNSIGNED (dest_type)
+ ? NULL_TREE
+ : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
+ default:
+ return NULL_TREE;
+ }
+ break;
+
+ case V8SImode:
+ switch (TYPE_MODE (src_type))
+ {
+ case V8SFmode:
+ return (TYPE_UNSIGNED (dest_type)
+ ? NULL_TREE
+ : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
+ default:
+ return NULL_TREE;
+ }
+ break;
+
default:
return NULL_TREE;
}
+
default:
return NULL_TREE;
-
}
+
+ return NULL_TREE;
}
/* Returns a code for a target-specific builtin that implements
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
regno = aggr ? DX_REG : CX_REG;
+ else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
+ {
+ regno = CX_REG;
+ if (aggr)
+ return gen_rtx_MEM (SImode,
+ plus_constant (stack_pointer_rtx, 4));
+ }
else
{
regno = AX_REG;
{
int tmp_regno = CX_REG;
if (lookup_attribute ("fastcall",
- TYPE_ATTRIBUTES (TREE_TYPE (function))))
+ TYPE_ATTRIBUTES (TREE_TYPE (function)))
+ || lookup_attribute ("thiscall",
+ TYPE_ATTRIBUTES (TREE_TYPE (function))))
tmp_regno = AX_REG;
tmp = gen_rtx_REG (SImode, tmp_regno);
}
replace = true;
/* Empty functions get branch mispredict even when the jump destination
is not visible to us. */
- if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
+ if (!prev && !optimize_function_for_size_p (cfun))
replace = true;
}
if (replace)
/* Fastcall attribute says callee is responsible for popping arguments
if they are not variable. */
{ "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
+ /* Thiscall attribute says callee is responsible for popping arguments
+ if they are not variable. */
+ { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
/* Cdecl attribute says the callee is a normal C declaration */
{ "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
/* Regparm attribute specifies how many integer arguments are to be
#undef TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE ix86_function_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
+
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD ix86_secondary_reload