/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002 Free Software Foundation, Inc.
+ 2002, 2003 Free Software Foundation, Inc.
This file is part of GNU CC.
#define CHECK_STACK_LIMIT (-1)
#endif
+/* Return index of given mode in mult and division cost tables. */
+#define MODE_INDEX(mode) \
+ ((mode) == QImode ? 0 \
+ : (mode) == HImode ? 1 \
+ : (mode) == SImode ? 2 \
+ : (mode) == DImode ? 3 \
+ : 4)
+
/* Processor costs (relative to an add) */
static const
struct processor_costs size_cost = { /* costs for tunning for size */
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
/* Set for machines where the type and dependencies are resolved on SSE register
- parts insetad of whole registers, so we may maintain just lower part of
+ parts instead of whole registers, so we may maintain just lower part of
scalar values in proper format leaving the upper part undefined. */
const int x86_sse_partial_regs = m_ATHLON_K8;
/* Athlon optimizes partial-register FPS special case, thus avoiding the
const int x86_use_ffreep = m_ATHLON_K8;
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
-/* In case the avreage insn count for single function invocation is
+/* In case the average insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
epilogue code. */
#define FAST_PROLOGUE_INSN_COUNT 20
-1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
- -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
- -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
};
/* Test and compare insns in i386.md store the information needed to
};
static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
+static int ix86_address_cost PARAMS ((rtx));
static bool ix86_cannot_force_const_mem PARAMS ((rtx));
static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
static int ix86_value_regno PARAMS ((enum machine_mode));
static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
+static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
+static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
/* Register class used for passing given 64bit part of the argument.
These represent classes as documented by the PS ABI, with the exception
of SSESF, SSEDF classes, that are basically SSE class, just gcc will
- use SF or DFmode move instead of DImode to avoid reformating penalties.
+ use SF or DFmode move instead of DImode to avoid reformatting penalties.
- Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
+ Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
whenever possible (upper half does contain padding).
*/
enum x86_64_reg_class
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS ix86_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST ix86_address_cost
+
struct gcc_target targetm = TARGET_INITIALIZER;
\f
/* Sometimes certain combinations of command options do not make
real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
/* Set the default values for switches whose default depends on TARGET_64BIT
- in case they weren't overwriten by command line options. */
+ in case they weren't overwritten by command line options. */
if (TARGET_64BIT)
{
if (flag_omit_frame_pointer == 2)
if (flag_asynchronous_unwind_tables == 2)
flag_asynchronous_unwind_tables = 0;
if (flag_pcc_struct_return == 2)
- flag_pcc_struct_return = 1;
+ flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
}
#ifdef SUBTARGET_OVERRIDE_OPTIONS
if (TARGET_3DNOW)
{
target_flags |= MASK_MMX;
- /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
+ /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
extensions it adds. */
if (x86_3dnow_a & (1 << ix86_arch))
target_flags |= MASK_3DNOW_A;
/* Stdcall attribute says callee is responsible for popping arguments
if they are not variable. */
{ "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
+ /* Fastcall attribute says callee is responsible for popping arguments
+ if they are not variable. */
+ { "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
/* Cdecl attribute says the callee is a normal C declaration */
{ "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
/* Regparm attribute specifies how many integer arguments are to be
{ "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
{ "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
#endif
+ { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
+ { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
{ NULL, 0, 0, false, false, false, NULL }
};
return true;
}
-/* Handle a "cdecl" or "stdcall" attribute;
+/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
arguments as in struct attribute_spec.handler. */
static tree
ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
IDENTIFIER_POINTER (name));
*no_add_attrs = true;
}
+ else
+ {
+ if (is_attribute_p ("fastcall", name))
+ {
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and stdcall attributes are not compatible");
+ }
+ else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and regparm attributes are not compatible");
+ }
+ }
+ else if (is_attribute_p ("stdcall", name))
+ {
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and stdcall attributes are not compatible");
+ }
+ }
+ }
if (TARGET_64BIT)
{
IDENTIFIER_POINTER (name), REGPARM_MAX);
*no_add_attrs = true;
}
+
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
+ {
+ error ("fastcall and regparm attributes are not compatible");
+ }
}
return NULL_TREE;
if (TREE_CODE (type1) != FUNCTION_TYPE)
return 1;
+ /* Check for mismatched fastcall types */
+ if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
+ return 0;
+
/* Check for mismatched return types (cdecl vs stdcall). */
if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
!= !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
/* Cdecl functions override -mrtd, and never pop the stack. */
if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
- /* Stdcall functions will pop the stack if not variable args. */
- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
+ /* Stdcall and fastcall functions will pop the stack if not variable args. */
+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
+ || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
rtd = 1;
if (rtd
}
cum->maybe_vaarg = false;
+ /* Use ecx and edx registers if function has fastcall attribute */
+ if (fntype && !TARGET_64BIT)
+ {
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
+ {
+ cum->nregs = 2;
+ cum->fastcall = 1;
+ }
+ }
+
+
/* Determine if this function has variable arguments. This is
indicated by the last argument being 'void_type_mode' if there
are no variable arguments. If there are variable arguments, then
if (next_param == 0 && TREE_VALUE (param) != void_type_node)
{
if (!TARGET_64BIT)
- cum->nregs = 0;
+ {
+ cum->nregs = 0;
+ cum->fastcall = 0;
+ }
cum->maybe_vaarg = true;
}
}
return;
}
-/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
+/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
of this code is to classify each 8bytes of incoming argument by the register
class and assign registers accordingly. */
break;
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
- /* Merge TImodes on aligned occassions here too. */
+ /* Merge TImodes on aligned occasions here too. */
if (i * 8 + 8 > bytes)
tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
else if (class[i] == X86_64_INTEGERSI_CLASS)
(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
- /* Handle an hidden AL argument containing number of registers for varargs
+ /* Handle a hidden AL argument containing number of registers for varargs
x86-64 functions. For i386 ABI just return constm1_rtx to avoid
any AL settings. */
if (mode == VOIDmode)
case HImode:
case QImode:
if (words <= cum->nregs)
- ret = gen_rtx_REG (mode, cum->regno);
+ {
+ int regno = cum->regno;
+
+ /* Fastcall allocates the first two DWORD (SImode) or
+ smaller arguments to ECX and EDX. */
+ if (cum->fastcall)
+ {
+ if (mode == BLKmode || mode == DImode)
+ break;
+
+ /* ECX not EAX is the first allocated register. */
+ if (regno == 0)
+ regno = 2;
+ }
+ ret = gen_rtx_REG (mode, regno);
+ }
break;
case TImode:
if (cum->sse_nregs)
return ret;
}
+/* A C expression that indicates when an argument must be passed by
+ reference. If nonzero for an argument, a copy of that argument is
+ made in memory and a pointer to the argument is passed instead of
+ the argument itself. The pointer is passed in whatever way is
+ appropriate for passing a pointer to that type. */
+
+int
+function_arg_pass_by_reference (cum, mode, type, named)
+ CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+ tree type;
+ int named ATTRIBUTE_UNUSED;
+{
+ if (!TARGET_64BIT)
+ return 0;
+
+ if (type && int_size_in_bytes (type) == -1)
+ {
+ if (TARGET_DEBUG_ARG)
+ fprintf (stderr, "function_arg_pass_by_reference\n");
+ return 1;
+ }
+
+ return 0;
+}
+
/* Gives the alignment boundary, in bits, of an argument with the specified mode
and type. */
rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
REGPARM_MAX, SSE_REGPARM_MAX,
x86_64_int_return_registers, 0);
- /* For zero sized structures, construct_continer return NULL, but we need
- to keep rest of compiler happy by returning meaningfull value. */
+ /* For zero sized structures, construct_container return NULL, but we need
+ to keep rest of compiler happy by returning meaningful value. */
if (!ret)
ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
return ret;
if (next_cum.sse_nregs)
{
/* Now emit code to save SSE registers. The AX parameter contains number
- of SSE parameter regsiters used to call this function. We use
+ of SSE parameter registers used to call this function. We use
sse_prologue_save insn template that produces computed jump across
SSE saves. We need some preparation work to get this working. */
rtx lab_false, lab_over = NULL_RTX;
rtx addr_rtx, r;
rtx container;
+ int indirect_p = 0;
/* Only 64bit target needs something special. */
if (!TARGET_64BIT)
sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
size = int_size_in_bytes (type);
+ if (size == -1)
+ {
+ /* Passed by reference. */
+ indirect_p = 1;
+ type = build_pointer_type (type);
+ size = int_size_in_bytes (type);
+ }
rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
container = construct_container (TYPE_MODE (type), type, 0,
need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
|| TYPE_ALIGN (type) > 128);
- /* In case we are passing structure, verify that it is consetuctive block
+ /* In case we are passing structure, verify that it is consecutive block
on the register save area. If not we need to do moves. */
if (!need_temp && !REG_P (container))
{
- /* Verify that all registers are strictly consetuctive */
+ /* Verify that all registers are strictly consecutive */
if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
{
int i;
if (container)
emit_label (lab_over);
+ if (indirect_p)
+ {
+ r = gen_rtx_MEM (Pmode, addr_rtx);
+ set_mem_alias_set (r, get_varargs_alias_set ());
+ emit_move_insn (addr_rtx, r);
+ }
+
return addr_rtx;
}
\f
return register_operand (op, mode) && !ANY_FP_REG_P (op);
}
-/* Return nonzero of OP is a register operand other than an
+/* Return nonzero if OP is a register operand other than an
i387 fp register. */
int
register_and_not_fp_reg_operand (op, mode)
&& (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
}
-/* True if this is a constant appropriate for an increment or decremenmt. */
+/* True if this is a constant appropriate for an increment or decrement. */
int
incdec_operand (op, mode)
}
}
+/* Return 1 if OP is a valid comparison operator testing carry flag
+ to be set. */
+int
+ix86_carry_flag_operator (op, mode)
+ register rtx op;
+ enum machine_mode mode;
+{
+ enum machine_mode inmode;
+ enum rtx_code code = GET_CODE (op);
+
+ if (mode != VOIDmode && GET_MODE (op) != mode)
+ return 0;
+ if (GET_RTX_CLASS (code) != '<')
+ return 0;
+ inmode = GET_MODE (XEXP (op, 0));
+ if (GET_CODE (XEXP (op, 0)) != REG
+ || REGNO (XEXP (op, 0)) != 17
+ || XEXP (op, 1) != const0_rtx)
+ return 0;
+
+ if (inmode == CCFPmode || inmode == CCFPUmode)
+ {
+ enum rtx_code second_code, bypass_code;
+
+ ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
+ if (bypass_code != NIL || second_code != NIL)
+ return 0;
+ code = ix86_fp_compare_code_to_integer (code);
+ }
+ else if (inmode != CCmode)
+ return 0;
+ return code == LTU;
+}
+
/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
int
{
enum machine_mode inmode;
enum rtx_code code = GET_CODE (op);
+
if (mode != VOIDmode && GET_MODE (op) != mode)
return 0;
if (GET_RTX_CLASS (code) != '<')
if (inmode == CCFPmode || inmode == CCFPUmode)
{
enum rtx_code second_code, bypass_code;
+
ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
if (bypass_code != NIL || second_code != NIL)
return 0;
if (!register_operand (op, VOIDmode))
return 0;
- /* Be curefull to accept only registers having upper parts. */
+ /* Be careful to accept only registers having upper parts. */
regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
}
is significantly longer, but also executes faster as modern hardware
can execute the moves in parallel, but can't do that for push/pop.
- Be curefull about choosing what prologue to emit: When function takes
+ Be careful about choosing what prologue to emit: When function takes
many instructions to execute we may use slow version as well as in
case function is known to be outside hot spot (this is known with
feedback only). Weight the size of function by number of registers
CALL_INSN_FUNCTION_USAGE (insn)
= gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
CALL_INSN_FUNCTION_USAGE (insn));
+
+ /* Don't allow scheduling pass to move insns across __alloca
+ call. */
+ emit_insn (gen_blockage (const0_rtx));
}
if (use_mov)
{
/* Even with accurate pre-reload life analysis, we can wind up
deleting all references to the pic register after reload.
Consider if cross-jumping unifies two sides of a branch
- controled by a comparison vs the only read from a global.
+ controlled by a comparison vs the only read from a global.
In which case, allow the set_got to be deleted, though we're
too late to do anything about the ebx save in the prologue. */
REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
while this code results in LEAVE instruction (or discrete equivalent),
so it is profitable in some other cases as well. Especially when there
are no registers to restore. We also use this code when TARGET_USE_LEAVE
- and there is exactly one register to pop. This heruistic may need some
+ and there is exactly one register to pop. This heuristic may need some
tuning in future. */
if ((!sp_valid && frame.nregs <= 1)
|| (TARGET_EPILOGUE_USING_MOVE
the address into a reg and make a new pseudo. But not if the address
requires to two regs - that would mean more pseudos with longer
lifetimes. */
-int
+static int
ix86_address_cost (x)
rtx x;
{
int cputaken = final_forward_branch_p (current_output_insn) == 0;
/* Emit hints only in the case default branch prediction
- heruistics would fail. */
+ heuristics would fail. */
if (taken != cputaken)
{
/* We use 3e (DS) prefix for taken branches and
if (unordered_p)
return "ucomiss\t{%1, %0|%0, %1}";
else
- return "comiss\t{%1, %0|%0, %y}";
+ return "comiss\t{%1, %0|%0, %1}";
else
if (unordered_p)
return "ucomisd\t{%1, %0|%0, %1}";
else
- return "comisd\t{%1, %0|%0, %y}";
+ return "comisd\t{%1, %0|%0, %1}";
}
if (! STACK_TOP_P (cmp_op0))
if ((reload_in_progress | reload_completed) == 0
&& register_operand (operands[0], mode)
&& CONSTANT_P (operands[1]))
- operands[1] = force_const_mem (mode, operands[1]);
+ operands[1] = validize_mem (force_const_mem (mode, operands[1]));
/* Make operand1 a register if it isn't already. */
if (!no_new_pseudos
return CCGCmode;
/* Codes doable only with sign flag when comparing
against zero, but we miss jump instruction for it
- so we need to use relational tests agains overflow
+ so we need to use relational tests against overflow
that thus needs to be zero. */
case GT: /* ZF=0 & SF=OF */
case LE: /* ZF=1 | SF<>OF */
/* Swap, force into registers, or otherwise massage the two operands
to a fp comparison. The operands are updated in place; the new
- comparsion code is returned. */
+ comparison code is returned. */
static enum rtx_code
ix86_prepare_fp_compare_args (code, pop0, pop1)
}
/* Return cost of comparison done fcom + arithmetics operations on AX.
- All following functions do use number of instructions as an cost metrics.
+ All following functions do use number of instructions as a cost metrics.
In future this should be tweaked to compute bytes for optimize_size and
take into account performance of various instructions on various CPUs. */
static int
enum rtx_code code;
{
enum rtx_code bypass_code, first_code, second_code;
- /* Return arbitarily high cost when instruction is not supported - this
+ /* Return arbitrarily high cost when instruction is not supported - this
prevents gcc from using it. */
if (!TARGET_CMOVE)
return 1024;
enum rtx_code code;
{
enum rtx_code bypass_code, first_code, second_code;
- /* Return arbitarily high cost when instruction is not preferred - this
+ /* Return arbitrarily high cost when instruction is not preferred - this
avoids gcc from using it. */
if (!TARGET_USE_SAHF && !optimize_size)
return 1024;
return 1; /* DONE */
}
-/* Expand comparison setting or clearing carry flag. Return true when sucesfull
+/* Expand comparison setting or clearing carry flag. Return true when successful
and set pop for the operation. */
bool
ix86_expand_carry_flag_compare (code, op0, op1, pop)
/* Do not handle DImode compares that go trought special path. Also we can't
deal with FP compares yet. This is possible to add. */
- if ((mode == DImode && !TARGET_64BIT) || !INTEGRAL_MODE_P (mode))
+ if ((mode == DImode && !TARGET_64BIT))
+ return false;
+ if (FLOAT_MODE_P (mode))
+ {
+ rtx second_test = NULL, bypass_test = NULL;
+ rtx compare_op, compare_seq;
+
+ /* Shortcut: following common codes never translate into carry flag compares. */
+ if (code == EQ || code == NE || code == UNEQ || code == LTGT
+ || code == ORDERED || code == UNORDERED)
+ return false;
+
+ /* These comparisons require zero flag; swap operands so they won't. */
+ if ((code == GT || code == UNLE || code == LE || code == UNGT)
+ && !TARGET_IEEE_FP)
+ {
+ rtx tmp = op0;
+ op0 = op1;
+ op1 = tmp;
+ code = swap_condition (code);
+ }
+
+ /* Try to expand the comparsion and verify that we end up with carry flag
+ based comparsion. This is fails to be true only when we decide to expand
+ comparsion using arithmetic that is not too common scenario. */
+ start_sequence ();
+ compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
+ &second_test, &bypass_test);
+ compare_seq = get_insns ();
+ end_sequence ();
+
+ if (second_test || bypass_test)
+ return false;
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
+ else
+ code = GET_CODE (compare_op);
+ if (code != LTU && code != GEU)
+ return false;
+ emit_insn (compare_seq);
+ *pop = compare_op;
+ return true;
+ }
+ if (!INTEGRAL_MODE_P (mode))
return false;
switch (code)
{
if (!sign_bit_compare_p)
{
+ bool fpcmp = false;
+
compare_code = GET_CODE (compare_op);
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ {
+ fpcmp = true;
+ compare_code = ix86_fp_compare_code_to_integer (compare_code);
+ }
+
/* To simplify rest of code, restrict to the GEU case. */
if (compare_code == LTU)
{
compare_code = reverse_condition (compare_code);
code = reverse_condition (code);
}
+ else
+ {
+ if (fpcmp)
+ PUT_CODE (compare_op,
+ reverse_condition_maybe_unordered
+ (GET_CODE (compare_op)));
+ else
+ PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
+ }
diff = ct - cf;
if (reg_overlap_mentioned_p (out, ix86_compare_op0)
tmp = gen_reg_rtx (mode);
if (mode == DImode)
- emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
+ emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
else
- emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp)));
+ emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
}
else
{
/* On x86_64 the lea instruction operates on Pmode, so we need
to get arithmetics done in proper mode to match. */
if (diff == 1)
- tmp = out;
+ tmp = copy_rtx (out);
else
{
rtx out1;
- out1 = out;
+ out1 = copy_rtx (out);
tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
nops++;
if (diff & 1)
if (!rtx_equal_p (tmp, out))
{
if (nops == 1)
- out = force_operand (tmp, out);
+ out = force_operand (tmp, copy_rtx (out));
else
emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
}
VOIDmode, ix86_compare_op0,
ix86_compare_op1);
}
- /* Similary try to manage result to be first operand of conditional
+ /* Similarly try to manage result to be first operand of conditional
move. We also don't support the NE comparison on SSE, so try to
avoid it. */
if ((rtx_equal_p (operands[0], operands[3])
return 1;
}
+/* Expand conditional increment or decrement using adb/sbb instructions.
+ The default case using setcc followed by the conditional move can be
+ done by generic code. */
+int
+ix86_expand_int_addcc (operands)
+ rtx operands[];
+{
+ enum rtx_code code = GET_CODE (operands[1]);
+ rtx compare_op;
+ rtx val = const0_rtx;
+ bool fpcmp = false;
+ rtx pat, clob;
+ enum machine_mode mode = GET_MODE (operands[0]);
+
+ if (operands[3] != const1_rtx
+ && operands[3] != constm1_rtx)
+ return 0;
+ if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
+ ix86_compare_op1, &compare_op))
+ return 0;
+ code = GET_CODE (compare_op);
+
+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
+ || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
+ {
+ fpcmp = true;
+ code = ix86_fp_compare_code_to_integer (code);
+ }
+
+ if (code != LTU)
+ {
+ val = constm1_rtx;
+ if (fpcmp)
+ PUT_CODE (compare_op,
+ reverse_condition_maybe_unordered
+ (GET_CODE (compare_op)));
+ else
+ PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
+ }
+ PUT_MODE (compare_op, mode);
+
+ /* Construct either adc or sbb insn. */
+ if ((code == LTU) == (operands[3] == constm1_rtx))
+ {
+ switch (GET_MODE (operands[0]))
+ {
+ case QImode:
+ emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case HImode:
+ emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case SImode:
+ emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case DImode:
+ emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
+ break;
+ default:
+ abort ();
+ }
+ }
+ else
+ {
+ switch (GET_MODE (operands[0]))
+ {
+ case QImode:
+ emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case HImode:
+ emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case SImode:
+ emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
+ break;
+ case DImode:
+ emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
+ break;
+ default:
+ abort ();
+ }
+ }
+ return 1; /* DONE */
+}
+
+
/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
works for floating pointer parameters and nonoffsetable memories.
For pushes, it returns just stack offsets; the values will be saved
unsigned HOST_WIDE_INT count = 0;
rtx insns;
- start_sequence ();
if (GET_CODE (align_exp) == CONST_INT)
align = INTVAL (align_exp);
align = 64;
if (GET_CODE (count_exp) == CONST_INT)
- count = INTVAL (count_exp);
+ {
+ count = INTVAL (count_exp);
+ if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
+ return 0;
+ }
/* Figure out proper mode for counter. For 32bits it is always SImode,
for 64bits use SImode when possible, otherwise DImode.
else
counter_mode = DImode;
+ start_sequence ();
+
if (counter_mode != SImode && counter_mode != DImode)
abort ();
able to predict the branches) and also it is friendlier to the
hardware branch prediction.
- Using loops is benefical for generic case, because we can
+ Using loops is beneficial for generic case, because we can
handle small counts using the loops. Many CPUs (such as Athlon)
have large REP prefix setup costs.
- This is quite costy. Maybe we can revisit this decision later or
+ This is quite costly. Maybe we can revisit this decision later or
add some customizability to this code. */
if (count == 0 && align < desired_alignment)
align = 32;
if (GET_CODE (count_exp) == CONST_INT)
- count = INTVAL (count_exp);
+ {
+ count = INTVAL (count_exp);
+ if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
+ return 0;
+ }
/* Figure out proper mode for counter. For 32bits it is always SImode,
for 64bits use SImode when possible, otherwise DImode.
Set count to number of bytes copied when known at compile time. */
rtx mem;
rtx tmpreg = gen_reg_rtx (SImode);
rtx scratch = gen_reg_rtx (SImode);
+ rtx cmp;
align = 0;
if (GET_CODE (align_rtx) == CONST_INT)
/* Avoid branch in fixing the byte. */
tmpreg = gen_lowpart (QImode, tmpreg);
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
+ cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
if (TARGET_64BIT)
- emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
+ emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
else
- emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
+ emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
emit_label (end_0_label);
}
rtx set, set2;
int dep_insn_code_number;
- /* Anti and output depenancies have zero cost on all CPUs. */
+ /* Anti and output dependencies have zero cost on all CPUs. */
if (REG_NOTE_KIND (link) != 0)
return 0;
if (ix86_flags_dependant (insn, dep_insn, insn_type))
cost = 0;
- /* Floating point stores require value to be ready one cycle ealier. */
+ /* Floating point stores require value to be ready one cycle earlier. */
if (insn_type == TYPE_FMOV
&& get_attr_memory (insn) == MEMORY_STORE
&& !ix86_agi_dependant (insn, dep_insn, insn_type))
size_t i;
tree pchar_type_node = build_pointer_type (char_type_node);
+ tree pcchar_type_node = build_pointer_type (
+ build_type_variant (char_type_node, 1, 0));
tree pfloat_type_node = build_pointer_type (float_type_node);
+ tree pcfloat_type_node = build_pointer_type (
+ build_type_variant (float_type_node, 1, 0));
tree pv2si_type_node = build_pointer_type (V2SI_type_node);
tree pv2di_type_node = build_pointer_type (V2DI_type_node);
tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
= build_function_type_list (void_type_node,
V8QI_type_node, V8QI_type_node,
pchar_type_node, NULL_TREE);
- tree v4sf_ftype_pfloat
- = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
+ tree v4sf_ftype_pcfloat
+ = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
/* @@@ the type is bogus */
tree v4sf_ftype_v4sf_pv2si
= build_function_type_list (V4SF_type_node,
= build_function_type_list (V2SI_type_node,
V2SF_type_node, V2SF_type_node, NULL_TREE);
tree pint_type_node = build_pointer_type (integer_type_node);
+ tree pcint_type_node = build_pointer_type (
+ build_type_variant (integer_type_node, 1, 0));
tree pdouble_type_node = build_pointer_type (double_type_node);
+ tree pcdouble_type_node = build_pointer_type (
+ build_type_variant (double_type_node, 1, 0));
tree int_ftype_v2df_v2df
= build_function_type_list (integer_type_node,
V2DF_type_node, V2DF_type_node, NULL_TREE);
tree ti_ftype_ti_ti
= build_function_type_list (intTI_type_node,
intTI_type_node, intTI_type_node, NULL_TREE);
- tree void_ftype_pvoid
- = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
+ tree void_ftype_pcvoid
+ = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
tree v2di_ftype_di
= build_function_type_list (V2DI_type_node,
long_long_unsigned_type_node, NULL_TREE);
= build_function_type_list (void_type_node,
V16QI_type_node, V16QI_type_node,
pchar_type_node, NULL_TREE);
- tree v2df_ftype_pdouble
- = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
+ tree v2df_ftype_pcdouble
+ = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
tree v2df_ftype_v2df_v2df
= build_function_type_list (V2DF_type_node,
V2DF_type_node, V2DF_type_node, NULL_TREE);
V16QI_type_node, V16QI_type_node, NULL_TREE);
tree int_ftype_v16qi
= build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
- tree v16qi_ftype_pchar
- = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
+ tree v16qi_ftype_pcchar
+ = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
tree void_ftype_pchar_v16qi
= build_function_type_list (void_type_node,
pchar_type_node, V16QI_type_node, NULL_TREE);
- tree v4si_ftype_pchar
- = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
- tree void_ftype_pchar_v4si
+ tree v4si_ftype_pcint
+ = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
+ tree void_ftype_pcint_v4si
= build_function_type_list (void_type_node,
- pchar_type_node, V4SI_type_node, NULL_TREE);
+ pcint_type_node, V4SI_type_node, NULL_TREE);
tree v2di_ftype_v2di
= build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
/* Add the remaining MMX insns with somewhat more complicated types. */
def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
- def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
- def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
+ def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
+ def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
- def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
- def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
- def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
+ def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
+ def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
+ def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
- def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
+ def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
- def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
- def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
- def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
+ def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
- def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
+ def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
/* In case of copying from general_purpose_register we may emit multiple
stores followed by single load causing memory size mismatch stall.
- Count this as arbitarily high cost of 20. */
+ Count this as arbitrarily high cost of 20. */
if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
cost += 20;
}
}
+/* Compute a (partial) cost for rtx X. Return true if the complete
+ cost has been computed, and false if subexpressions should be
+ scanned. In either case, *TOTAL contains the cost result. */
+
+static bool
+ix86_rtx_costs (x, code, outer_code, total)
+ rtx x;
+ int code, outer_code;
+ int *total;
+{
+ enum machine_mode mode = GET_MODE (x);
+
+ switch (code)
+ {
+ case CONST_INT:
+ case CONST:
+ case LABEL_REF:
+ case SYMBOL_REF:
+ if (TARGET_64BIT && !x86_64_sign_extended_value (x))
+ *total = 3;
+ else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
+ *total = 2;
+ else if (flag_pic && SYMBOLIC_CONST (x))
+ *total = 1;
+ else
+ *total = 0;
+ return true;
+
+ case CONST_DOUBLE:
+ if (mode == VOIDmode)
+ *total = 0;
+ else
+ switch (standard_80387_constant_p (x))
+ {
+ case 1: /* 0.0 */
+ *total = 1;
+ break;
+ case 2: /* 1.0 */
+ *total = 2;
+ break;
+ default:
+ /* Start with (MEM (SYMBOL_REF)), since that's where
+ it'll probably end up. Add a penalty for size. */
+ *total = (COSTS_N_INSNS (1)
+ + (flag_pic != 0)
+ + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
+ break;
+ }
+ return true;
+
+ case ZERO_EXTEND:
+ /* The zero extensions is often completely free on x86_64, so make
+ it as cheap as possible. */
+ if (TARGET_64BIT && mode == DImode
+ && GET_MODE (XEXP (x, 0)) == SImode)
+ *total = 1;
+ else if (TARGET_ZERO_EXTEND_WITH_AND)
+ *total = COSTS_N_INSNS (ix86_cost->add);
+ else
+ *total = COSTS_N_INSNS (ix86_cost->movzx);
+ return false;
+
+ case SIGN_EXTEND:
+ *total = COSTS_N_INSNS (ix86_cost->movsx);
+ return false;
+
+ case ASHIFT:
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT
+ && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
+ {
+ HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+ if (value == 1)
+ {
+ *total = COSTS_N_INSNS (ix86_cost->add);
+ return false;
+ }
+ if ((value == 2 || value == 3)
+ && !TARGET_DECOMPOSE_LEA
+ && ix86_cost->lea <= ix86_cost->shift_const)
+ {
+ *total = COSTS_N_INSNS (ix86_cost->lea);
+ return false;
+ }
+ }
+ /* FALLTHRU */
+
+ case ROTATE:
+ case ASHIFTRT:
+ case LSHIFTRT:
+ case ROTATERT:
+ if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
+ {
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ if (INTVAL (XEXP (x, 1)) > 32)
+ *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
+ else
+ *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
+ }
+ else
+ {
+ if (GET_CODE (XEXP (x, 1)) == AND)
+ *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
+ else
+ *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
+ }
+ }
+ else
+ {
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ *total = COSTS_N_INSNS (ix86_cost->shift_const);
+ else
+ *total = COSTS_N_INSNS (ix86_cost->shift_var);
+ }
+ return false;
+
+ case MULT:
+ if (FLOAT_MODE_P (mode))
+ *total = COSTS_N_INSNS (ix86_cost->fmul);
+ else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+ {
+ unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
+ int nbits;
+
+ for (nbits = 0; value != 0; value >>= 1)
+ nbits++;
+
+ *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
+ + nbits * ix86_cost->mult_bit);
+ }
+ else
+ {
+ /* This is arbitrary */
+ *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
+ + 7 * ix86_cost->mult_bit);
+ }
+ return false;
+
+ case DIV:
+ case UDIV:
+ case MOD:
+ case UMOD:
+ if (FLOAT_MODE_P (mode))
+ *total = COSTS_N_INSNS (ix86_cost->fdiv);
+ else
+ *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
+ return false;
+
+ case PLUS:
+ if (FLOAT_MODE_P (mode))
+ *total = COSTS_N_INSNS (ix86_cost->fadd);
+ else if (!TARGET_DECOMPOSE_LEA
+ && GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
+ {
+ if (GET_CODE (XEXP (x, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
+ && CONSTANT_P (XEXP (x, 1)))
+ {
+ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
+ if (val == 2 || val == 4 || val == 8)
+ {
+ *total = COSTS_N_INSNS (ix86_cost->lea);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
+ *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+ outer_code);
+ *total += rtx_cost (XEXP (x, 1), outer_code);
+ return true;
+ }
+ }
+ else if (GET_CODE (XEXP (x, 0)) == MULT
+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
+ {
+ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
+ if (val == 2 || val == 4 || val == 8)
+ {
+ *total = COSTS_N_INSNS (ix86_cost->lea);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
+ *total += rtx_cost (XEXP (x, 1), outer_code);
+ return true;
+ }
+ }
+ else if (GET_CODE (XEXP (x, 0)) == PLUS)
+ {
+ *total = COSTS_N_INSNS (ix86_cost->lea);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
+ *total += rtx_cost (XEXP (x, 1), outer_code);
+ return true;
+ }
+ }
+ /* FALLTHRU */
+
+ case MINUS:
+ if (FLOAT_MODE_P (mode))
+ {
+ *total = COSTS_N_INSNS (ix86_cost->fadd);
+ return false;
+ }
+ /* FALLTHRU */
+
+ case AND:
+ case IOR:
+ case XOR:
+ if (!TARGET_64BIT && mode == DImode)
+ {
+ *total = (COSTS_N_INSNS (ix86_cost->add) * 2
+ + (rtx_cost (XEXP (x, 0), outer_code)
+ << (GET_MODE (XEXP (x, 0)) != DImode))
+ + (rtx_cost (XEXP (x, 1), outer_code)
+ << (GET_MODE (XEXP (x, 1)) != DImode)));
+ return true;
+ }
+ /* FALLTHRU */
+
+ case NEG:
+ if (FLOAT_MODE_P (mode))
+ {
+ *total = COSTS_N_INSNS (ix86_cost->fchs);
+ return false;
+ }
+ /* FALLTHRU */
+
+ case NOT:
+ if (!TARGET_64BIT && mode == DImode)
+ *total = COSTS_N_INSNS (ix86_cost->add * 2);
+ else
+ *total = COSTS_N_INSNS (ix86_cost->add);
+ return false;
+
+ case FLOAT_EXTEND:
+ if (!TARGET_SSE_MATH || !VALID_SSE_REG_MODE (mode))
+ *total = 0;
+ return false;
+
+ case ABS:
+ if (FLOAT_MODE_P (mode))
+ *total = COSTS_N_INSNS (ix86_cost->fabs);
+ return false;
+
+ case SQRT:
+ if (FLOAT_MODE_P (mode))
+ *total = COSTS_N_INSNS (ix86_cost->fsqrt);
+ return false;
+
+ default:
+ return false;
+ }
+}
+
#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
static void
ix86_svr3_asm_out_constructor (symbol, priority)
for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
reg_alloc_order [pos++] = i;
- /* x87 registerts. */
+ /* x87 registers. */
if (TARGET_SSE_MATH)
for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
reg_alloc_order [pos++] = i;
#define TARGET_USE_MS_BITFIELD_LAYOUT 0
#endif
+/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
+ struct attribute_spec.handler. */
+static tree
+ix86_handle_struct_attribute (node, name, args, flags, no_add_attrs)
+ tree *node;
+ tree name;
+ tree args ATTRIBUTE_UNUSED;
+ int flags ATTRIBUTE_UNUSED;
+ bool *no_add_attrs;
+{
+ tree *type = NULL;
+ if (DECL_P (*node))
+ {
+ if (TREE_CODE (*node) == TYPE_DECL)
+ type = &TREE_TYPE (*node);
+ }
+ else
+ type = node;
+
+ if (!(type && (TREE_CODE (*type) == RECORD_TYPE
+ || TREE_CODE (*type) == UNION_TYPE)))
+ {
+ warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ else if ((is_attribute_p ("ms_struct", name)
+ && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
+ || ((is_attribute_p ("gcc_struct", name)
+ && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
+ {
+ warning ("`%s' incompatible attribute ignored",
+ IDENTIFIER_POINTER (name));
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+}
+
static bool
ix86_ms_bitfield_layout_p (record_type)
- tree record_type ATTRIBUTE_UNUSED;
+ tree record_type;
{
- return TARGET_USE_MS_BITFIELD_LAYOUT;
+ return (TARGET_USE_MS_BITFIELD_LAYOUT &&
+ !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
+ || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
}
/* Returns an expression indicating where the this parameter is
if (!flag_pic || (*targetm.binds_local_p) (function))
output_asm_insn ("jmp\t%P0", xops);
else
+#if TARGET_MACHO
+ if (TARGET_MACHO)
+ {
+ char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
+ tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
+ tmp = gen_rtx_MEM (QImode, tmp);
+ xops[0] = tmp;
+ output_asm_insn ("jmp\t%0", xops);
+ }
+ else
+#endif /* TARGET_MACHO */
{
tmp = gen_rtx_REG (SImode, 2 /* ECX */);
output_set_got (tmp);
void
x86_function_profiler (file, labelno)
FILE *file;
- int labelno;
+ int labelno ATTRIBUTE_UNUSED;
{
if (TARGET_64BIT)
if (flag_pic)
else
{
#ifndef NO_PROFILE_COUNTERS
- fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
+ fprintf (file, "\tmovl\t$%sP%d,%%$%s\n", LPREFIX, labelno,
PROFILE_COUNT_REGISTER);
#endif
fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
/* Implement machine specific optimizations.
At the moment we implement single transformation: AMD Athlon works faster
- when RET is not destination of conditional jump or directly preceeded
+ when RET is not destination of conditional jump or directly preceded
by other jump instruction. We avoid the penalty by inserting NOP just
before the RET instructions in such cases. */
void