#include "real.h"
#include "insn-config.h"
#include "conditions.h"
-#include "insn-flags.h"
#include "output.h"
#include "insn-attr.h"
#include "flags.h"
#include "toplev.h"
#include "basic-block.h"
#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
#ifndef CHECK_STACK_LIMIT
#define CHECK_STACK_LIMIT -1
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
+const int x86_branch_hints = m_PENT4;
const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
const int x86_partial_reg_stall = m_PPRO;
const int x86_use_loop = m_K6;
const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
+const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
+const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
+const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
struct rtx_def *ix86_compare_op0 = NULL_RTX;
struct rtx_def *ix86_compare_op1 = NULL_RTX;
-#define MAX_386_STACK_LOCALS 2
+#define MAX_386_STACK_LOCALS 3
+/* Size of the register save area. */
+#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
/* Define the structure for the machine field in struct function. */
struct machine_function
{
rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
+ int save_varrargs_registers;
int accesses_prev_frame;
};
#define ix86_stack_locals (cfun->machine->stack_locals)
+#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
/* Structure describing stack frame layout.
Stack grows downward:
{
int nregs;
int padding1;
+ int va_arg_size;
HOST_WIDE_INT frame;
int padding2;
int outgoing_arguments_size;
+ int red_zone_size;
HOST_WIDE_INT to_allocate;
/* The offsets relative to ARG_POINTER. */
const char *ix86_branch_cost_string;
/* Power of two alignment for functions. */
-int ix86_align_funcs;
const char *ix86_align_funcs_string;
-
-/* Power of two alignment for loops. */
-int ix86_align_loops;
-
-/* Power of two alignment for non-loop jumps. */
-int ix86_align_jumps;
\f
static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
static int ix86_safe_length_prefix PARAMS ((rtx));
static int ix86_nsaved_regs PARAMS((void));
static void ix86_emit_save_regs PARAMS((void));
-static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int));
-static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
+static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
+static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
static void ix86_sched_reorder_pentium PARAMS((rtx *, rtx *));
static void ix86_sched_reorder_ppro PARAMS((rtx *, rtx *));
static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
+static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
+static rtx ix86_zero_extend_to_Pmode PARAMS ((rtx));
+static rtx ix86_expand_aligntest PARAMS ((rtx, int));
+static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
struct ix86_address
{
static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
-static int ix86_save_reg PARAMS ((int));
+static int ix86_save_reg PARAMS ((int, int));
static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
+static int ix86_comp_type_attributes PARAMS ((tree, tree));
+\f
+/* Initialize the GCC target structure. */
+#undef TARGET_VALID_TYPE_ATTRIBUTE
+#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
+# define TARGET_VALID_TYPE_ATTRIBUTE i386_pe_valid_type_attribute_p
+# undef TARGET_VALID_DECL_ATTRIBUTE
+# define TARGET_VALID_DECL_ATTRIBUTE i386_pe_valid_decl_attribute_p
+# undef TARGET_MERGE_DECL_ATTRIBUTES
+# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
+#else
+# define TARGET_VALID_TYPE_ATTRIBUTE ix86_valid_type_attribute_p
+#endif
+
+#undef TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
+
+struct gcc_target target = TARGET_INITIALIZER;
\f
/* Sometimes certain combinations of command options do not make
sense on a particular target machine. You can define a macro
ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
if (ix86_cmodel == CM_LARGE)
sorry ("Code model `large' not supported yet.");
+ if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
+ sorry ("%i-bit mode not compiled in.",
+ (target_flags & MASK_64BIT) ? 64 : 32);
if (ix86_arch_string != 0)
{
else
ix86_regparm = i;
}
+ else
+ if (TARGET_64BIT)
+ ix86_regparm = REGPARM_MAX;
- /* Validate -malign-loops= value, or provide default. */
- ix86_align_loops = processor_target_table[ix86_cpu].align_loop;
+ /* If the user has provided any of the -malign-* options,
+ warn and use that value only if -falign-* is not set.
+ Remove this code in GCC 3.2 or later. */
if (ix86_align_loops_string)
{
- i = atoi (ix86_align_loops_string);
- if (i < 0 || i > MAX_CODE_ALIGN)
- error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
- else
- ix86_align_loops = i;
+ warning ("-malign-loops is obsolete, use -falign-loops");
+ if (align_loops == 0)
+ {
+ i = atoi (ix86_align_loops_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
+ else
+ align_loops = 1 << i;
+ }
}
- /* Validate -malign-jumps= value, or provide default. */
- ix86_align_jumps = processor_target_table[ix86_cpu].align_jump;
if (ix86_align_jumps_string)
{
- i = atoi (ix86_align_jumps_string);
- if (i < 0 || i > MAX_CODE_ALIGN)
- error ("-malign-jumps=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
- else
- ix86_align_jumps = i;
+ warning ("-malign-jumps is obsolete, use -falign-jumps");
+ if (align_jumps == 0)
+ {
+ i = atoi (ix86_align_jumps_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
+ else
+ align_jumps = 1 << i;
+ }
}
- /* Validate -malign-functions= value, or provide default. */
- ix86_align_funcs = processor_target_table[ix86_cpu].align_func;
if (ix86_align_funcs_string)
{
- i = atoi (ix86_align_funcs_string);
- if (i < 0 || i > MAX_CODE_ALIGN)
- error ("-malign-functions=%d is not between 0 and %d",
- i, MAX_CODE_ALIGN);
- else
- ix86_align_funcs = i;
+ warning ("-malign-functions is obsolete, use -falign-functions");
+ if (align_functions == 0)
+ {
+ i = atoi (ix86_align_funcs_string);
+ if (i < 0 || i > MAX_CODE_ALIGN)
+ error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
+ else
+ align_functions = 1 << i;
+ }
}
+ /* Default align_* from the processor table. */
+#define abs(n) (n < 0 ? -n : n)
+ if (align_loops == 0)
+ align_loops = 1 << abs (processor_target_table[ix86_cpu].align_loop);
+ if (align_jumps == 0)
+ align_jumps = 1 << abs (processor_target_table[ix86_cpu].align_jump);
+ if (align_functions == 0)
+ align_functions = 1 << abs (processor_target_table[ix86_cpu].align_func);
+
/* Validate -mpreferred-stack-boundary= value, or provide default.
The default of 128 bits is for Pentium III's SSE __m128. */
ix86_preferred_stack_boundary = 128;
if (ix86_preferred_stack_boundary_string)
{
i = atoi (ix86_preferred_stack_boundary_string);
- if (i < 2 || i > 31)
- error ("-mpreferred-stack-boundary=%d is not between 2 and 31", i);
+ if (i < (TARGET_64BIT ? 3 : 2) || i > 31)
+ error ("-mpreferred-stack-boundary=%d is not between %d and 31", i,
+ TARGET_64BIT ? 3 : 2);
else
ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
}
on by -msse. */
if (TARGET_SSE)
target_flags |= MASK_MMX;
+
+ if ((x86_accumulate_outgoing_args & CPUMASK)
+ && !(target_flags & MASK_NO_ACCUMULATE_OUTGOING_ARGS)
+ && !optimize_size)
+ target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
}
\f
void
}
\f
/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
- attribute for DECL. The attributes in ATTRIBUTES have previously been
- assigned to DECL. */
-
-int
-ix86_valid_decl_attribute_p (decl, attributes, identifier, args)
- tree decl ATTRIBUTE_UNUSED;
- tree attributes ATTRIBUTE_UNUSED;
- tree identifier ATTRIBUTE_UNUSED;
- tree args ATTRIBUTE_UNUSED;
-{
- return 0;
-}
-
-/* Return nonzero if IDENTIFIER with arguments ARGS is a valid machine specific
attribute for TYPE. The attributes in ATTRIBUTES have previously been
assigned to TYPE. */
/* Stdcall attribute says callee is responsible for popping arguments
if they are not variable. */
- if (is_attribute_p ("stdcall", identifier))
+ if (is_attribute_p ("stdcall", identifier)
+ && !TARGET_64BIT)
return (args == NULL_TREE);
/* Cdecl attribute says the callee is a normal C declaration. */
- if (is_attribute_p ("cdecl", identifier))
+ if (is_attribute_p ("cdecl", identifier)
+ && !TARGET_64BIT)
return (args == NULL_TREE);
/* Regparm attribute specifies how many integer arguments are to be
are compatible, and 2 if they are nearly compatible (which causes a
warning to be generated). */
-int
+static int
ix86_comp_type_attributes (type1, type2)
tree type1;
tree type2;
}
/* Lose any fake structure return argument. */
- if (aggregate_value_p (TREE_TYPE (funtype)))
+ if (aggregate_value_p (TREE_TYPE (funtype))
+ && !TARGET_64BIT)
return GET_MODE_SIZE (Pmode);
return 0;
(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+ if (mode == VOIDmode)
+ return constm1_rtx;
+
switch (mode)
{
/* For now, pass fp/complex values on the stack. */
}
\f
+/* Return nonzero if OP is general operand representable on x86_64. */
+
+int
+x86_64_general_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (!TARGET_64BIT)
+ return general_operand (op, mode);
+ if (nonimmediate_operand (op, mode))
+ return 1;
+ return x86_64_sign_extended_value (op);
+}
+
+/* Return nonzero if OP is general operand representable on x86_64
+ as eighter sign extended or zero extended constant. */
+
+int
+x86_64_szext_general_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (!TARGET_64BIT)
+ return general_operand (op, mode);
+ if (nonimmediate_operand (op, mode))
+ return 1;
+ return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
+}
+
+/* Return nonzero if OP is nonmemory operand representable on x86_64. */
+
+int
+x86_64_nonmemory_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (!TARGET_64BIT)
+ return nonmemory_operand (op, mode);
+ if (register_operand (op, mode))
+ return 1;
+ return x86_64_sign_extended_value (op);
+}
+
+/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
+
+int
+x86_64_movabs_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (!TARGET_64BIT || !flag_pic)
+ return nonmemory_operand (op, mode);
+ if (register_operand (op, mode) || x86_64_sign_extended_value (op))
+ return 1;
+ if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
+ return 1;
+ return 0;
+}
+
+/* Return nonzero if OP is nonmemory operand representable on x86_64. */
+
+int
+x86_64_szext_nonmemory_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (!TARGET_64BIT)
+ return nonmemory_operand (op, mode);
+ if (register_operand (op, mode))
+ return 1;
+ return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
+}
+
+/* Return nonzero if OP is immediate operand representable on x86_64. */
+
+int
+x86_64_immediate_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (!TARGET_64BIT)
+ return immediate_operand (op, mode);
+ return x86_64_sign_extended_value (op);
+}
+
+/* Return nonzero if OP is immediate operand representable on x86_64. */
+
+int
+x86_64_zext_immediate_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return x86_64_zero_extended_value (op);
+}
+
/* Return nonzero if OP is (const_int 1), else return zero. */
int
int
incdec_operand (op, mode)
register rtx op;
- enum machine_mode mode;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
{
/* On Pentium4, the inc and dec operations causes extra dependancy on flag
registers, since carry flag is not set. */
if (TARGET_PENTIUM4 && !optimize_size)
return 0;
- if (op == const1_rtx || op == constm1_rtx)
- return 1;
- if (GET_CODE (op) != CONST_INT)
- return 0;
- if (mode == SImode && INTVAL (op) == (HOST_WIDE_INT) 0xffffffff)
- return 1;
- if (mode == HImode && INTVAL (op) == (HOST_WIDE_INT) 0xffff)
- return 1;
- if (mode == QImode && INTVAL (op) == (HOST_WIDE_INT) 0xff)
- return 1;
- return 0;
+ return op == const1_rtx || op == constm1_rtx;
+}
+
+/* Return nonzero if OP is acceptable as operand of DImode shift
+ expander. */
+
+int
+shiftdi_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ if (TARGET_64BIT)
+ return nonimmediate_operand (op, mode);
+ else
+ return register_operand (op, mode);
}
/* Return false if this is the stack pointer, or any other fake
|| t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
|| t == virtual_stack_dynamic_rtx)
return 0;
+ if (REG_P (t)
+ && REGNO (t) >= FIRST_VIRTUAL_REGISTER
+ && REGNO (t) <= LAST_VIRTUAL_REGISTER)
+ return 0;
return general_operand (op, mode);
}
register rtx op;
enum machine_mode mode ATTRIBUTE_UNUSED;
{
- if (GET_MODE (op) != SImode && GET_MODE (op) != HImode)
+ int regno;
+ if ((!TARGET_64BIT || GET_MODE (op) != DImode)
+ && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
+ return 0;
+
+ if (!register_operand (op, VOIDmode))
return 0;
- return register_operand (op, VOIDmode);
+
+ /* Be curefull to accept only registers having upper parts. */
+ regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
+ return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
}
/* Return 1 if this is a valid binary floating-point operation.
rtx op;
enum machine_mode mode;
{
- if (general_operand (op, mode))
+ if (nonimmediate_operand (op, mode))
return 1;
if (GET_CODE (op) == AND
else
{
HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
- return (HOST_WIDE_INT)(int)val == val;
+ return trunc_int_for_mode (val, SImode) == val;
}
break;
{
rtx gotsym, pclab;
+ if (TARGET_64BIT)
+ abort();
+
gotsym = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
if (TARGET_DEEP_BRANCH_PREDICTION)
emit_insn (gen_prologue_set_got (pic_offset_table_rtx, gotsym, pclab));
}
-/* Generate an SImode "push" pattern for input ARG. */
+/* Generate an "push" pattern for input ARG. */
static rtx
gen_push (arg)
rtx arg;
{
return gen_rtx_SET (VOIDmode,
- gen_rtx_MEM (SImode,
- gen_rtx_PRE_DEC (SImode,
+ gen_rtx_MEM (Pmode,
+ gen_rtx_PRE_DEC (Pmode,
stack_pointer_rtx)),
arg);
}
/* Return 1 if we need to save REGNO. */
static int
-ix86_save_reg (regno)
- int regno;
-{
- int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
- || current_function_uses_const_pool);
- return ((regs_ever_live[regno] && !call_used_regs[regno]
- && !fixed_regs[regno]
- && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed))
- || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used));
+ix86_save_reg (regno, maybe_eh_return)
+ int regno;
+ int maybe_eh_return;
+{
+ if (flag_pic
+ && ! TARGET_64BIT
+ && regno == PIC_OFFSET_TABLE_REGNUM
+ && (current_function_uses_pic_offset_table
+ || current_function_uses_const_pool
+ || current_function_calls_eh_return))
+ return 1;
+ if (current_function_calls_eh_return && maybe_eh_return)
+ {
+ unsigned i;
+ for (i = 0; ; i++)
+ {
+ unsigned test = EH_RETURN_DATA_REGNO(i);
+ if (test == INVALID_REGNUM)
+ break;
+ if (test == (unsigned) regno)
+ return 1;
+ }
+ }
+
+ return (regs_ever_live[regno]
+ && !call_used_regs[regno]
+ && !fixed_regs[regno]
+ && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
}
/* Return number of registers to be saved on the stack. */
int regno;
for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
- if (ix86_save_reg (regno))
+ if (ix86_save_reg (regno, true))
nregs++;
return nregs;
}
/* Register save area */
offset += frame->nregs * UNITS_PER_WORD;
+ /* Va-arg area */
+ if (ix86_save_varrargs_registers)
+ {
+ offset += X86_64_VARARGS_SIZE;
+ frame->va_arg_size = X86_64_VARARGS_SIZE;
+ }
+ else
+ frame->va_arg_size = 0;
+
/* Align start of frame for local function. */
frame->padding1 = ((offset + stack_alignment_needed - 1)
& -stack_alignment_needed) - offset;
/* Size prologue needs to allocate. */
frame->to_allocate =
(size + frame->padding1 + frame->padding2
- + frame->outgoing_arguments_size);
+ + frame->outgoing_arguments_size + frame->va_arg_size);
+ if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
+ && current_function_is_leaf)
+ {
+ frame->red_zone_size = frame->to_allocate;
+ if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
+ frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
+ }
+ else
+ frame->red_zone_size = 0;
+ frame->to_allocate -= frame->red_zone_size;
+ frame->stack_pointer_offset -= frame->red_zone_size;
#if 0
fprintf (stderr, "nregs: %i\n", frame->nregs);
fprintf (stderr, "size: %i\n", size);
fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
fprintf (stderr, "padding1: %i\n", frame->padding1);
+ fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
fprintf (stderr, "padding2: %i\n", frame->padding2);
fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
+ fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
fprintf (stderr, "hard_frame_pointer_offset: %i\n",
frame->hard_frame_pointer_offset);
rtx insn;
for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
- if (ix86_save_reg (regno))
+ if (ix86_save_reg (regno, true))
+ {
+ insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+}
+
+/* Emit code to save registers using MOV insns. First register
+ is restored from POINTER + OFFSET. */
+static void
+ix86_emit_save_regs_using_mov (pointer, offset)
+ rtx pointer;
+ HOST_WIDE_INT offset;
+{
+ int regno;
+ rtx insn;
+
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+ if (ix86_save_reg (regno, true))
{
- insn = emit_insn (gen_push (gen_rtx_REG (SImode, regno)));
+ insn = emit_move_insn (adj_offsettable_operand (gen_rtx_MEM (Pmode,
+ pointer),
+ offset),
+ gen_rtx_REG (Pmode, regno));
RTX_FRAME_RELATED_P (insn) = 1;
+ offset += UNITS_PER_WORD;
}
}
ix86_expand_prologue ()
{
rtx insn;
- int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
- || current_function_uses_const_pool);
+ int pic_reg_used = (flag_pic && (current_function_uses_pic_offset_table
+ || current_function_uses_const_pool)
+ && !TARGET_64BIT);
struct ix86_frame frame;
+ int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
+ HOST_WIDE_INT allocate;
ix86_compute_frame_layout (&frame);
RTX_FRAME_RELATED_P (insn) = 1;
}
- ix86_emit_save_regs ();
+ allocate = frame.to_allocate;
+ /* In case we are dealing only with single register and empty frame,
+ push is equivalent of the mov+add sequence. */
+ if (allocate == 0 && frame.nregs <= 1)
+ use_mov = 0;
- if (frame.to_allocate == 0)
+ if (!use_mov)
+ ix86_emit_save_regs ();
+ else
+ allocate += frame.nregs * UNITS_PER_WORD;
+
+ if (allocate == 0)
;
- else if (! TARGET_STACK_PROBE || frame.to_allocate < CHECK_STACK_LIMIT)
+ else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
{
- if (frame_pointer_needed)
- insn = emit_insn (gen_pro_epilogue_adjust_stack
- (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (-frame.to_allocate), hard_frame_pointer_rtx));
- else
- insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (-frame.to_allocate)));
+ insn = emit_insn (gen_pro_epilogue_adjust_stack
+ (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-allocate)));
RTX_FRAME_RELATED_P (insn) = 1;
}
else
rtx arg0, sym;
+ if (TARGET_64BIT)
+ abort();
+
arg0 = gen_rtx_REG (SImode, 0);
- emit_move_insn (arg0, GEN_INT (frame.to_allocate));
+ emit_move_insn (arg0, GEN_INT (allocate));
sym = gen_rtx_MEM (FUNCTION_MODE,
gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
- insn = emit_call_insn (gen_call (sym, const0_rtx));
+ insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
CALL_INSN_FUNCTION_USAGE (insn)
= gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
CALL_INSN_FUNCTION_USAGE (insn));
}
+ if (use_mov)
+ {
+ if (!frame_pointer_needed || !frame.to_allocate)
+ ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
+ else
+ ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
+ -frame.nregs * UNITS_PER_WORD);
+ }
#ifdef SUBTARGET_PROLOGUE
SUBTARGET_PROLOGUE;
emit_insn (gen_blockage ());
}
-/* Emit code to add TSIZE to esp value. Use POP instruction when
- profitable. */
-
-static void
-ix86_emit_epilogue_esp_adjustment (tsize)
- int tsize;
-{
- /* If a frame pointer is present, we must be sure to tie the sp
- to the fp so that we don't mis-schedule. */
- if (frame_pointer_needed)
- emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (tsize),
- hard_frame_pointer_rtx));
- else
- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (tsize)));
-}
-
/* Emit code to restore saved registers using MOV insns. First register
is restored from POINTER + OFFSET. */
static void
-ix86_emit_restore_regs_using_mov (pointer, offset)
- rtx pointer;
- int offset;
+ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
+ rtx pointer;
+ int offset;
+ int maybe_eh_return;
{
int regno;
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (ix86_save_reg (regno))
+ if (ix86_save_reg (regno, maybe_eh_return))
{
emit_move_insn (gen_rtx_REG (Pmode, regno),
adj_offsettable_operand (gen_rtx_MEM (Pmode,
/* Restore function stack, frame, and registers. */
void
-ix86_expand_epilogue (emit_return)
- int emit_return;
+ix86_expand_epilogue (style)
+ int style;
{
int regno;
int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
ix86_compute_frame_layout (&frame);
- /* Calculate start of saved registers relative to ebp. */
- offset = -frame.nregs * UNITS_PER_WORD;
+ /* Calculate start of saved registers relative to ebp. Special care
+ must be taken for the normal return case of a function using
+ eh_return: the eax and edx registers are marked as saved, but not
+ restored along this path. */
+ offset = frame.nregs;
+ if (current_function_calls_eh_return && style != 2)
+ offset -= 2;
+ offset *= -UNITS_PER_WORD;
#ifdef FUNCTION_BLOCK_PROFILER_EXIT
if (profile_block_flag == 2)
and there is exactly one register to pop. This heruistic may need some
tuning in future. */
if ((!sp_valid && frame.nregs <= 1)
+ || (TARGET_EPILOGUE_USING_MOVE && !optimize_size
+ && (frame.nregs > 1 || frame.to_allocate))
|| (frame_pointer_needed && !frame.nregs && frame.to_allocate)
|| (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
- && frame.nregs == 1))
+ && frame.nregs == 1)
+ || style == 2)
{
/* Restore registers. We can use ebp or esp to address the memory
locations. If both are available, default to ebp, since offsets
mode. */
if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
- ix86_emit_restore_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
+ ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
+ frame.to_allocate, style == 2);
else
- ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, offset);
+ ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
+ offset, style == 2);
+
+ /* eh_return epilogues need %ecx added to the stack pointer. */
+ if (style == 2)
+ {
+ rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
- if (!frame_pointer_needed)
- ix86_emit_epilogue_esp_adjustment (frame.to_allocate
- + frame.nregs * UNITS_PER_WORD);
+ if (frame_pointer_needed)
+ {
+ tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
+ tmp = plus_constant (tmp, UNITS_PER_WORD);
+ emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
+
+ tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
+ emit_move_insn (hard_frame_pointer_rtx, tmp);
+
+ emit_insn (gen_pro_epilogue_adjust_stack
+ (stack_pointer_rtx, sa, const0_rtx));
+ }
+ else
+ {
+ tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
+ tmp = plus_constant (tmp, (frame.to_allocate
+ + frame.nregs * UNITS_PER_WORD));
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
+ }
+ }
+ else if (!frame_pointer_needed)
+ emit_insn (gen_pro_epilogue_adjust_stack
+ (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.to_allocate
+ + frame.nregs * UNITS_PER_WORD)));
/* If not an i386, mov & pop is faster than "leave". */
else if (TARGET_USE_LEAVE || optimize_size)
- emit_insn (gen_leave ());
+ emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
else
{
emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
hard_frame_pointer_rtx,
- const0_rtx,
- hard_frame_pointer_rtx));
- emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
+ const0_rtx));
+ if (TARGET_64BIT)
+ emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
+ else
+ emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
}
}
else
abort ();
emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
hard_frame_pointer_rtx,
- GEN_INT (offset),
- hard_frame_pointer_rtx));
+ GEN_INT (offset)));
}
else if (frame.to_allocate)
- ix86_emit_epilogue_esp_adjustment (frame.to_allocate);
+ emit_insn (gen_pro_epilogue_adjust_stack
+ (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (frame.to_allocate)));
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
- if (ix86_save_reg (regno))
- emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
+ if (ix86_save_reg (regno, false))
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
+ else
+ emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
+ }
if (frame_pointer_needed)
- emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
+ else
+ emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
+ }
}
/* Sibcall epilogues don't want a return instruction. */
- if (! emit_return)
+ if (style == 0)
return;
if (current_function_pops_args && current_function_args_size)
{
rtx ecx = gen_rtx_REG (SImode, 2);
+ /* There are is no "pascal" calling convention in 64bit ABI. */
+ if (TARGET_64BIT)
+ abort();
+
emit_insn (gen_popsi1 (ecx));
emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
emit_jump_insn (gen_return_indirect_internal (ecx));
goto report_error;
}
- if (GET_CODE (disp) == CONST_DOUBLE)
+ if (TARGET_64BIT)
{
- reason = "displacement is a const_double";
- goto report_error;
+ if (!x86_64_sign_extended_value (disp))
+ {
+ reason = "displacement is out of range";
+ goto report_error;
+ }
+ }
+ else
+ {
+ if (GET_CODE (disp) == CONST_DOUBLE)
+ {
+ reason = "displacement is a const_double";
+ goto report_error;
+ }
}
if (flag_pic && SYMBOLIC_CONST (disp))
{
+ if (TARGET_64BIT && (index || base))
+ {
+ reason = "non-constant pic memory reference";
+ goto report_error;
+ }
if (! legitimate_pic_address_disp_p (disp))
{
reason = "displacement is an invalid pic construct";
if (ASSEMBLER_DIALECT == 0 || USER_LABEL_PREFIX[0] == 0)
putc ('%', file);
- if (code == 'w')
+ if (code == 'w' || MMX_REG_P (x))
code = 2;
else if (code == 'b')
code = 1;
code = 3;
else if (code == 'h')
code = 0;
- else if (code == 'm' || MMX_REG_P (x))
- code = 5;
else
code = GET_MODE_SIZE (GET_MODE (x));
from the normal registers. */
if (REX_INT_REG_P (x))
{
+ if (!TARGET_64BIT)
+ abort ();
switch (code)
{
- case 5:
+ case 0:
error ("Extended registers have no high halves\n");
break;
case 1:
}
switch (code)
{
- case 5:
- fputs (hi_reg_name[REGNO (x)], file);
- break;
case 3:
if (STACK_TOP_P (x))
{
case 4:
case 12:
if (! ANY_FP_REG_P (x))
- putc (code == 8 ? 'r' : 'e', file);
+ putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
/* FALLTHRU */
case 16:
case 2:
L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
C -- print opcode suffix for set/cmov insn.
c -- like C, but print reversed condition
+ F,f -- likewise, but for floating-point.
R -- print the prefix for register names.
z -- print the opcode suffix for the size of the current operand.
* -- print a star (in certain assembler syntax)
w -- likewise, print the HImode name of the register.
k -- likewise, print the SImode name of the register.
q -- likewise, print the DImode name of the register.
- h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
- y -- print "st(0)" instead of "st" as a register.
- m -- print "st(n)" as an mmx register.
+ h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
+ y -- print "st(0)" instead of "st" as a register.
D -- print condition for SSE cmp instruction.
+ P -- if PIC, print an @PLT suffix.
+ X -- don't print any sort of PIC '@' suffix for a symbol.
*/
void
case 'q':
case 'h':
case 'y':
- case 'm':
case 'X':
case 'P':
break;
case 'f':
put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
return;
+ case '+':
+ {
+ rtx x;
+ if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
+ return;
+
+ x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
+ if (x)
+ {
+ int pred_val = INTVAL (XEXP (x, 0));
+
+ if (pred_val < REG_BR_PROB_BASE * 45 / 100
+ || pred_val > REG_BR_PROB_BASE * 55 / 100)
+ {
+ int taken = pred_val > REG_BR_PROB_BASE / 2;
+ int cputaken = final_forward_branch_p (current_output_insn) == 0;
+
+ /* Emit hints only in the case default branch prediction
+ heruistics would fail. */
+ if (taken != cputaken)
+ {
+ /* We use 3e (DS) prefix for taken branches and
+ 2e (CS) prefix for not taken branches. */
+ if (taken)
+ fputs ("ds ; ", file);
+ else
+ fputs ("cs ; ", file);
+ }
+ }
+ }
+ return;
+ }
default:
{
char str[50];
x = XEXP (x, 0);
if (flag_pic && CONSTANT_ADDRESS_P (x))
output_pic_addr_const (file, x, code);
+ /* Avoid (%rip) for call operands. */
+ else if (CONSTANT_ADDRESS_P (x) && code =='P'
+ && GET_CODE (x) != CONST_INT)
+ output_addr_const (file, x);
else
output_address (x);
}
output_pic_addr_const (file, addr, 0);
else
output_addr_const (file, addr);
+
+ /* Use one byte shorter RIP relative addressing for 64bit mode. */
+ if (GET_CODE (disp) != CONST_INT && TARGET_64BIT)
+ fputs ("(%rip)", file);
}
else
{
}
else if (GET_CODE (op) == REG)
{
+ if (TARGET_64BIT)
+ abort();
lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
}
else if (offsettable_memref_p (op))
{
- rtx lo_addr = XEXP (op, 0);
rtx hi_addr = XEXP (adj_offsettable_operand (op, 4), 0);
- lo_half[num] = change_address (op, SImode, lo_addr);
+
+ lo_half[num] = adjust_address (op, SImode, 0);
hi_half[num] = change_address (op, SImode, hi_addr);
}
else
return buf;
}
+/* Output code to initialize control word copies used by
+ trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
+ is set to control word rounding downwards. */
+void
+emit_i387_cw_initialization (normal, round_down)
+ rtx normal, round_down;
+{
+ rtx reg = gen_reg_rtx (HImode);
+
+ emit_insn (gen_x86_fnstcw_1 (normal));
+ emit_move_insn (reg, normal);
+ if (!TARGET_PARTIAL_REG_STALL && !optimize_size
+ && !TARGET_64BIT)
+ emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
+ else
+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
+ emit_move_insn (round_down, reg);
+}
+
/* Output code for INSN to convert a float to a signed int. OPERANDS
are the insn operands. The output may be [HSD]Imode and the input
operand may be [SDX]Fmode. */
if (dimode_p && !stack_top_dies)
output_asm_insn ("fld\t%y1", operands);
- if (! STACK_TOP_P (operands[1]))
+ if (!STACK_TOP_P (operands[1]))
abort ();
- xops[0] = GEN_INT (12);
- xops[1] = adj_offsettable_operand (operands[2], 1);
- xops[1] = change_address (xops[1], QImode, NULL_RTX);
-
- xops[2] = operands[0];
if (GET_CODE (operands[0]) != MEM)
- xops[2] = operands[3];
-
- output_asm_insn ("fnstcw\t%2", operands);
- output_asm_insn ("mov{l}\t{%2, %4|%4, %2}", operands);
- output_asm_insn ("mov{b}\t{%0, %1|%1, %0}", xops);
- output_asm_insn ("fldcw\t%2", operands);
- output_asm_insn ("mov{l}\t{%4, %2|%2, %4}", operands);
+ abort ();
+ output_asm_insn ("fldcw\t%3", operands);
if (stack_top_dies || dimode_p)
- output_asm_insn ("fistp%z2\t%2", xops);
+ output_asm_insn ("fistp%z0\t%0", operands);
else
- output_asm_insn ("fist%z2\t%2", xops);
-
+ output_asm_insn ("fist%z0\t%0", operands);
output_asm_insn ("fldcw\t%2", operands);
- if (GET_CODE (operands[0]) != MEM)
- {
- if (dimode_p)
- {
- split_di (operands+0, 1, xops+0, xops+1);
- split_di (operands+3, 1, xops+2, xops+3);
- output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
- output_asm_insn ("mov{l}\t{%3, %1|%1, %3}", xops);
- }
- else if (GET_MODE (operands[0]) == SImode)
- output_asm_insn ("mov{l}\t{%3, %0|%0, %3}", operands);
- else
- output_asm_insn ("mov{w}\t{%3, %0|%0, %3}", operands);
- }
-
return "";
}
case QImode:
case HImode:
case SImode:
+ simple:
tmp = ix86_expand_compare (code, NULL, NULL);
tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
gen_rtx_LABEL_REF (VOIDmode, label),
}
case DImode:
+ if (TARGET_64BIT)
+ goto simple;
/* Expand DImode branch into multiple compare+branch. */
{
rtx lo[2], hi[2], label2;
rtx second_test, bypass_test;
int type;
- if (GET_MODE (ix86_compare_op0) == DImode)
+ if (GET_MODE (ix86_compare_op0) == DImode
+ && !TARGET_64BIT)
return 0; /* FAIL */
/* Three modes of generation:
HImode insns, we'd be swallowed in word prefix ops. */
if (GET_MODE (operands[0]) != HImode
+ && GET_MODE (operands[0]) != DImode
&& GET_CODE (operands[2]) == CONST_INT
&& GET_CODE (operands[3]) == CONST_INT)
{
* Size 5 - 8.
*/
if (ct)
- emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
+ emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
}
else if (cf == -1)
{
*
* Size 8.
*/
- emit_insn (gen_iorsi3 (out, out, GEN_INT (ct)));
+ emit_insn (gen_iorsi3 (tmp, tmp, GEN_INT (ct)));
}
else if (diff == -1 && ct)
{
*/
emit_insn (gen_one_cmplsi2 (tmp, tmp));
if (cf)
- emit_insn (gen_addsi3 (out, out, GEN_INT (cf)));
+ emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (cf)));
}
else
{
*
* Size 8 - 11.
*/
- emit_insn (gen_andsi3 (out, out, GEN_INT (cf - ct)));
+ emit_insn (gen_andsi3 (tmp, tmp, GEN_INT (trunc_int_for_mode
+ (cf - ct, SImode))));
if (ct)
- emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
+ emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (ct)));
}
if (tmp != out)
ix86_compare_op1, VOIDmode, 0, 1);
nops = 0;
+ /* On x86_64 the lea instruction operates on Pmode, so we need to get arithmetics
+ done in proper mode to match. */
if (diff == 1)
- tmp = out;
+ {
+ if (Pmode != SImode)
+ tmp = gen_lowpart (Pmode, out);
+ else
+ tmp = out;
+ }
else
{
- tmp = gen_rtx_MULT (SImode, out, GEN_INT (diff & ~1));
+ rtx out1;
+ if (Pmode != SImode)
+ out1 = gen_lowpart (Pmode, out);
+ else
+ out1 = out;
+ tmp = gen_rtx_MULT (Pmode, out1, GEN_INT (diff & ~1));
nops++;
if (diff & 1)
{
- tmp = gen_rtx_PLUS (SImode, tmp, out);
+ tmp = gen_rtx_PLUS (Pmode, tmp, out1);
nops++;
}
}
if (cf != 0)
{
- tmp = gen_rtx_PLUS (SImode, tmp, GEN_INT (cf));
+ tmp = gen_rtx_PLUS (Pmode, tmp, GEN_INT (cf));
nops++;
}
- if (tmp != out)
+ if (tmp != out
+ && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
{
- if (nops == 0)
- emit_move_insn (out, tmp);
- else if (nops == 1)
+ if (Pmode != SImode)
+ tmp = gen_rtx_SUBREG (SImode, tmp, 0);
+
+ /* ??? We should to take care for outputing non-lea arithmetics
+ for Pmode != SImode case too, but it is quite tricky and not
+ too important, since all TARGET_64BIT machines support real
+ conditional moves. */
+ if (nops == 1 && Pmode == SImode)
{
rtx clob;
ix86_compare_op1, VOIDmode, 0, 1);
emit_insn (gen_addsi3 (out, out, constm1_rtx));
- emit_insn (gen_andsi3 (out, out, GEN_INT (cf-ct)));
+ emit_insn (gen_andsi3 (out, out, GEN_INT (trunc_int_for_mode
+ (cf - ct, SImode))));
if (ct != 0)
emit_insn (gen_addsi3 (out, out, GEN_INT (ct)));
if (out != operands[0])
emit_move_insn (tmp, operands[2]);
operands[2] = tmp;
}
+ if (! register_operand (operands[2], VOIDmode)
+ && ! register_operand (operands[3], VOIDmode))
+ operands[2] = force_reg (GET_MODE (operands[0]), operands[2]);
emit_insn (compare_seq);
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
if (((TARGET_SSE && GET_MODE (operands[0]) == SFmode)
|| (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))
&& GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
+ /* The SSE comparisons does not support the LTGT/UNEQ pair. */
+ && (!TARGET_IEEE_FP
+ || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
/* We may be called from the post-reload splitter. */
&& (!REG_P (operands[0])
|| SSE_REG_P (operands[0])
ix86_compare_op1);
}
/* Similary try to manage result to be first operand of conditional
- move. */
- if (rtx_equal_p (operands[0], operands[3]))
+ move. We also don't support the NE comparison on SSE, so try to
+ avoid it. */
+ if ((rtx_equal_p (operands[0], operands[3])
+ && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
+ || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
{
rtx tmp = operands[2];
operands[2] = operands[3];
- operands[2] = tmp;
+ operands[3] = tmp;
operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
(GET_CODE (operands[1])),
VOIDmode, ix86_compare_op0,
rtx *parts;
enum machine_mode mode;
{
- int size = mode == TFmode ? 3 : GET_MODE_SIZE (mode) / 4;
+ int size;
+
+ if (!TARGET_64BIT)
+ size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
+ else
+ size = (GET_MODE_SIZE (mode) + 4) / 8;
if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
abort ();
if (! push_operand (operand, VOIDmode))
abort ();
- PUT_MODE (operand, SImode);
+ operand = copy_rtx (operand);
+ PUT_MODE (operand, Pmode);
parts[0] = parts[1] = parts[2] = operand;
}
- else
+ else if (!TARGET_64BIT)
{
if (mode == DImode)
split_di (&operand, 1, &parts[0], &parts[1]);
}
else if (offsettable_memref_p (operand))
{
- PUT_MODE (operand, SImode);
+ operand = adjust_address (operand, SImode, 0);
parts[0] = operand;
parts[1] = adj_offsettable_operand (operand, 4);
if (size == 3)
abort ();
}
}
+ else
+ {
+ if (mode == XFmode || mode == TFmode)
+ {
+ if (REG_P (operand))
+ {
+ if (!reload_completed)
+ abort ();
+ parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
+ parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
+ }
+ else if (offsettable_memref_p (operand))
+ {
+ operand = change_address (operand, DImode, XEXP (operand, 0));
+ parts[0] = operand;
+ parts[1] = adj_offsettable_operand (operand, 8);
+ parts[1] = change_address (parts[1], SImode, XEXP (parts[1], 0));
+ }
+ else if (GET_CODE (operand) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE r;
+ long l[3];
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
+ REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
+ /* Do not use shift by 32 to avoid warning on 32bit systems. */
+ if (HOST_BITS_PER_WIDE_INT >= 64)
+ parts[0] = GEN_INT (l[0] + ((l[1] << 31) << 1));
+ else
+ parts[0] = immed_double_const (l[0], l[1], DImode);
+ parts[1] = GEN_INT (l[2]);
+ }
+ else
+ abort ();
+ }
+ }
return size;
}
insns have been emitted. Operands 2-4 contain the input values
int the correct order; operands 5-7 contain the output values. */
-int
-ix86_split_long_move (operands1)
- rtx operands1[];
+void
+ix86_split_long_move (operands)
+ rtx operands[];
{
rtx part[2][3];
- rtx operands[2];
- int size;
+ int nparts;
int push = 0;
int collisions = 0;
+ enum machine_mode mode = GET_MODE (operands[0]);
+
+ /* The DFmode expanders may ask us to move double.
+ For 64bit target this is single move. By hiding the fact
+ here we simplify i386.md splitters. */
+ if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
+ {
+ /* Optimize constant pool reference to immediates. This is used by fp moves,
+ that force all constants to memory to allow combining. */
- /* Make our own copy to avoid clobbering the operands. */
- operands[0] = copy_rtx (operands1[0]);
- operands[1] = copy_rtx (operands1[1]);
+ if (GET_CODE (operands[1]) == MEM
+ && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+ && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
+ operands[1] = get_pool_constant (XEXP (operands[1], 0));
+ if (push_operand (operands[0], VOIDmode))
+ {
+ operands[0] = copy_rtx (operands[0]);
+ PUT_MODE (operands[0], Pmode);
+ }
+ else
+ operands[0] = gen_lowpart (DImode, operands[0]);
+ operands[1] = gen_lowpart (DImode, operands[1]);
+ emit_move_insn (operands[0], operands[1]);
+ return;
+ }
/* The only non-offsettable memory we handle is push. */
if (push_operand (operands[0], VOIDmode))
&& ! offsettable_memref_p (operands[0]))
abort ();
- size = ix86_split_to_parts (operands[0], part[0], GET_MODE (operands1[0]));
- ix86_split_to_parts (operands[1], part[1], GET_MODE (operands1[0]));
+ nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
+ ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
/* When emitting push, take care for source operands on the stack. */
if (push && GET_CODE (operands[1]) == MEM
&& reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
{
- if (size == 3)
- part[1][1] = part[1][2];
- part[1][0] = part[1][1];
+ if (nparts == 3)
+ part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
+ XEXP (part[1][2], 0));
+ part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
+ XEXP (part[1][1], 0));
}
/* We need to do copy in the right order in case an address register
collisions++;
if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
collisions++;
- if (size == 3
+ if (nparts == 3
&& reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
collisions++;
/* Collision in the middle part can be handled by reordering. */
- if (collisions == 1 && size == 3
+ if (collisions == 1 && nparts == 3
&& reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
{
rtx tmp;
else if (collisions > 1)
{
collisions = 1;
- emit_insn (gen_rtx_SET (VOIDmode, part[0][size - 1],
+ emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
XEXP (part[1][0], 0)));
- part[1][0] = change_address (part[1][0], SImode, part[0][size - 1]);
- part[1][1] = adj_offsettable_operand (part[1][0], 4);
- if (size == 3)
+ part[1][0] = change_address (part[1][0],
+ TARGET_64BIT ? DImode : SImode,
+ part[0][nparts - 1]);
+ part[1][1] = adj_offsettable_operand (part[1][0],
+ UNITS_PER_WORD);
+ part[1][1] = change_address (part[1][1], GET_MODE (part[0][1]),
+ XEXP (part[1][1], 0));
+ if (nparts == 3)
part[1][2] = adj_offsettable_operand (part[1][0], 8);
}
}
if (push)
{
- if (size == 3)
+ if (!TARGET_64BIT)
{
- /* We use only first 12 bytes of TFmode value, but for pushing we
- are required to adjust stack as if we were pushing real 16byte
- value. */
- if (GET_MODE (operands1[0]) == TFmode)
- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (-4)));
- emit_insn (gen_push (part[1][2]));
+ if (nparts == 3)
+ {
+ /* We use only first 12 bytes of TFmode value, but for pushing we
+ are required to adjust stack as if we were pushing real 16byte
+ value. */
+ if (mode == TFmode && !TARGET_64BIT)
+ emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-4)));
+ emit_move_insn (part[0][2], part[1][2]);
+ }
}
- emit_insn (gen_push (part[1][1]));
- emit_insn (gen_push (part[1][0]));
- return 1;
+ else
+ {
+ /* In 64bit mode we don't have 32bit push available. In case this is
+ register, it is OK - we will just use larger counterpart. We also
+ retype memory - these comes from attempt to avoid REX prefix on
+ moving of second half of TFmode value. */
+ if (GET_MODE (part[1][1]) == SImode)
+ {
+ if (GET_CODE (part[1][1]) == MEM)
+ part[1][1] = adjust_address (part[1][1], DImode, 0);
+ else if (REG_P (part[1][1]))
+ part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
+ else
+ abort();
+ if (GET_MODE (part[1][0]) == SImode)
+ part[1][0] = part[1][1];
+ }
+ }
+ emit_move_insn (part[0][1], part[1][1]);
+ emit_move_insn (part[0][0], part[1][0]);
+ return;
}
/* Choose correct order to not overwrite the source before it is copied. */
if ((REG_P (part[0][0])
&& REG_P (part[1][1])
&& (REGNO (part[0][0]) == REGNO (part[1][1])
- || (size == 3
+ || (nparts == 3
&& REGNO (part[0][0]) == REGNO (part[1][2]))))
|| (collisions > 0
&& reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
{
- if (size == 3)
+ if (nparts == 3)
{
- operands1[2] = part[0][2];
- operands1[3] = part[0][1];
- operands1[4] = part[0][0];
- operands1[5] = part[1][2];
- operands1[6] = part[1][1];
- operands1[7] = part[1][0];
+ operands[2] = part[0][2];
+ operands[3] = part[0][1];
+ operands[4] = part[0][0];
+ operands[5] = part[1][2];
+ operands[6] = part[1][1];
+ operands[7] = part[1][0];
}
else
{
- operands1[2] = part[0][1];
- operands1[3] = part[0][0];
- operands1[5] = part[1][1];
- operands1[6] = part[1][0];
+ operands[2] = part[0][1];
+ operands[3] = part[0][0];
+ operands[5] = part[1][1];
+ operands[6] = part[1][0];
}
}
else
{
- if (size == 3)
+ if (nparts == 3)
{
- operands1[2] = part[0][0];
- operands1[3] = part[0][1];
- operands1[4] = part[0][2];
- operands1[5] = part[1][0];
- operands1[6] = part[1][1];
- operands1[7] = part[1][2];
+ operands[2] = part[0][0];
+ operands[3] = part[0][1];
+ operands[4] = part[0][2];
+ operands[5] = part[1][0];
+ operands[6] = part[1][1];
+ operands[7] = part[1][2];
}
else
{
- operands1[2] = part[0][0];
- operands1[3] = part[0][1];
- operands1[5] = part[1][0];
- operands1[6] = part[1][1];
+ operands[2] = part[0][0];
+ operands[3] = part[0][1];
+ operands[5] = part[1][0];
+ operands[6] = part[1][1];
}
}
+ emit_move_insn (operands[2], operands[5]);
+ emit_move_insn (operands[3], operands[6]);
+ if (nparts == 3)
+ emit_move_insn (operands[4], operands[7]);
- return 0;
+ return;
}
void
}
}
+/* Helper function for the string operations bellow. Dest VARIABLE whether
+ it is aligned to VALUE bytes. If true, jump to the label. */
+static rtx
+ix86_expand_aligntest (variable, value)
+ rtx variable;
+ int value;
+{
+ rtx label = gen_label_rtx ();
+ rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
+ if (GET_MODE (variable) == DImode)
+ emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
+ else
+ emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
+ emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
+ 1, 0, label);
+ return label;
+}
+
+/* Adjust COUNTER by the VALUE. */
+static void
+ix86_adjust_counter (countreg, value)
+ rtx countreg;
+ HOST_WIDE_INT value;
+{
+ if (GET_MODE (countreg) == DImode)
+ emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
+ else
+ emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
+}
+
+/* Zero extend possibly SImode EXP to Pmode register. */
+static rtx
+ix86_zero_extend_to_Pmode (exp)
+ rtx exp;
+{
+ rtx r;
+ if (GET_MODE (exp) == VOIDmode)
+ return force_reg (Pmode, exp);
+ if (GET_MODE (exp) == Pmode)
+ return copy_to_mode_reg (Pmode, exp);
+ r = gen_reg_rtx (Pmode);
+ emit_insn (gen_zero_extendsidi2 (r, exp));
+ return r;
+}
+
+/* Expand string move (memcpy) operation. Use i386 string operations when
+ profitable. expand_clrstr contains similar code. */
+int
+ix86_expand_movstr (dst, src, count_exp, align_exp)
+ rtx dst, src, count_exp, align_exp;
+{
+ rtx srcreg, destreg, countreg;
+ enum machine_mode counter_mode;
+ HOST_WIDE_INT align = 0;
+ unsigned HOST_WIDE_INT count = 0;
+ rtx insns;
+
+ start_sequence ();
+
+ if (GET_CODE (align_exp) == CONST_INT)
+ align = INTVAL (align_exp);
+
+ /* This simple hack avoids all inlining code and simplifies code bellow. */
+ if (!TARGET_ALIGN_STRINGOPS)
+ align = 64;
+
+ if (GET_CODE (count_exp) == CONST_INT)
+ count = INTVAL (count_exp);
+
+ /* Figure out proper mode for counter. For 32bits it is always SImode,
+ for 64bits use SImode when possible, otherwise DImode.
+ Set count to number of bytes copied when known at compile time. */
+ if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
+ || x86_64_zero_extended_value (count_exp))
+ counter_mode = SImode;
+ else
+ counter_mode = DImode;
+
+ if (counter_mode != SImode && counter_mode != DImode)
+ abort ();
+
+ destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
+ srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
+
+ emit_insn (gen_cld ());
+
+ /* When optimizing for size emit simple rep ; movsb instruction for
+ counts not divisible by 4. */
+
+ if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
+ {
+ countreg = ix86_zero_extend_to_Pmode (count_exp);
+ if (TARGET_64BIT)
+ emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
+ destreg, srcreg, countreg));
+ else
+ emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
+ destreg, srcreg, countreg));
+ }
+
+ /* For constant aligned (or small unaligned) copies use rep movsl
+ followed by code copying the rest. For PentiumPro ensure 8 byte
+ alignment to allow rep movsl acceleration. */
+
+ else if (count != 0
+ && (align >= 8
+ || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
+ || optimize_size || count < (unsigned int)64))
+ {
+ int size = TARGET_64BIT && !optimize_size ? 8 : 4;
+ if (count & ~(size - 1))
+ {
+ countreg = copy_to_mode_reg (counter_mode,
+ GEN_INT ((count >> (size == 4 ? 2 : 3))
+ & (TARGET_64BIT ? -1 : 0x3fffffff)));
+ countreg = ix86_zero_extend_to_Pmode (countreg);
+ if (size == 4)
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
+ destreg, srcreg, countreg));
+ else
+ emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
+ destreg, srcreg, countreg));
+ }
+ else
+ emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
+ destreg, srcreg, countreg));
+ }
+ if (size == 8 && (count & 0x04))
+ emit_insn (gen_strmovsi (destreg, srcreg));
+ if (count & 0x02)
+ emit_insn (gen_strmovhi (destreg, srcreg));
+ if (count & 0x01)
+ emit_insn (gen_strmovqi (destreg, srcreg));
+ }
+ /* The generic code based on the glibc implementation:
+ - align destination to 4 bytes (8 byte alignment is used for PentiumPro
+ allowing accelerated copying there)
+ - copy the data using rep movsl
+ - copy the rest. */
+ else
+ {
+ rtx countreg2;
+ rtx label = NULL;
+
+ /* In case we don't know anything about the alignment, default to
+ library version, since it is usually equally fast and result in
+ shorter code. */
+ if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
+ {
+ end_sequence ();
+ return 0;
+ }
+
+ if (TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+
+ countreg2 = gen_reg_rtx (Pmode);
+ countreg = copy_to_mode_reg (counter_mode, count_exp);
+
+ /* We don't use loops to align destination and to copy parts smaller
+ than 4 bytes, because gcc is able to optimize such code better (in
+ the case the destination or the count really is aligned, gcc is often
+ able to predict the branches) and also it is friendlier to the
+ hardware branch prediction.
+
+ Using loops is benefical for generic case, because we can
+ handle small counts using the loops. Many CPUs (such as Athlon)
+ have large REP prefix setup costs.
+
+ This is quite costy. Maybe we can revisit this decision later or
+ add some customizability to this code. */
+
+ if (count == 0
+ && align < (TARGET_PENTIUMPRO && (count == 0
+ || count >= (unsigned int)260)
+ ? 8 : UNITS_PER_WORD))
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
+ LEU, 0, counter_mode, 1, 0, label);
+ }
+ if (align <= 1)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 1);
+ emit_insn (gen_strmovqi (destreg, srcreg));
+ ix86_adjust_counter (countreg, 1);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 2)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ emit_insn (gen_strmovhi (destreg, srcreg));
+ ix86_adjust_counter (countreg, 2);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 4
+ && ((TARGET_PENTIUMPRO && (count == 0
+ || count >= (unsigned int)260))
+ || TARGET_64BIT))
+ {
+ rtx label = ix86_expand_aligntest (destreg, 4);
+ emit_insn (gen_strmovsi (destreg, srcreg));
+ ix86_adjust_counter (countreg, 4);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+
+ if (!TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
+ GEN_INT (3)));
+ emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
+ destreg, srcreg, countreg2));
+ }
+ else
+ {
+ emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
+ emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
+ destreg, srcreg, countreg2));
+ }
+
+ if (label)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
+ emit_insn (gen_strmovsi (destreg, srcreg));
+ if ((align <= 4 || count == 0) && TARGET_64BIT)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 4);
+ emit_insn (gen_strmovsi (destreg, srcreg));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 2 && count != 0 && (count & 2))
+ emit_insn (gen_strmovhi (destreg, srcreg));
+ if (align <= 2 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 2);
+ emit_insn (gen_strmovhi (destreg, srcreg));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 1 && count != 0 && (count & 1))
+ emit_insn (gen_strmovqi (destreg, srcreg));
+ if (align <= 1 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (countreg, 1);
+ emit_insn (gen_strmovqi (destreg, srcreg));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ }
+
+ insns = get_insns ();
+ end_sequence ();
+
+ ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
+ emit_insns (insns);
+ return 1;
+}
+
+/* Expand string clear operation (bzero). Use i386 string operations when
+ profitable. expand_movstr contains similar code. */
+int
+ix86_expand_clrstr (src, count_exp, align_exp)
+ rtx src, count_exp, align_exp;
+{
+ rtx destreg, zeroreg, countreg;
+ enum machine_mode counter_mode;
+ HOST_WIDE_INT align = 0;
+ unsigned HOST_WIDE_INT count = 0;
+
+ if (GET_CODE (align_exp) == CONST_INT)
+ align = INTVAL (align_exp);
+
+ /* This simple hack avoids all inlining code and simplifies code bellow. */
+ if (!TARGET_ALIGN_STRINGOPS)
+ align = 32;
+
+ if (GET_CODE (count_exp) == CONST_INT)
+ count = INTVAL (count_exp);
+ /* Figure out proper mode for counter. For 32bits it is always SImode,
+ for 64bits use SImode when possible, otherwise DImode.
+ Set count to number of bytes copied when known at compile time. */
+ if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
+ || x86_64_zero_extended_value (count_exp))
+ counter_mode = SImode;
+ else
+ counter_mode = DImode;
+
+ destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
+
+ emit_insn (gen_cld ());
+
+ /* When optimizing for size emit simple rep ; movsb instruction for
+ counts not divisible by 4. */
+
+ if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
+ {
+ countreg = ix86_zero_extend_to_Pmode (count_exp);
+ zeroreg = copy_to_mode_reg (QImode, const0_rtx);
+ if (TARGET_64BIT)
+ emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
+ destreg, countreg));
+ else
+ emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
+ destreg, countreg));
+ }
+ else if (count != 0
+ && (align >= 8
+ || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
+ || optimize_size || count < (unsigned int)64))
+ {
+ int size = TARGET_64BIT && !optimize_size ? 8 : 4;
+ zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
+ if (count & ~(size - 1))
+ {
+ countreg = copy_to_mode_reg (counter_mode,
+ GEN_INT ((count >> (size == 4 ? 2 : 3))
+ & (TARGET_64BIT ? -1 : 0x3fffffff)));
+ countreg = ix86_zero_extend_to_Pmode (countreg);
+ if (size == 4)
+ {
+ if (TARGET_64BIT)
+ emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
+ destreg, countreg));
+ else
+ emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
+ destreg, countreg));
+ }
+ else
+ emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
+ destreg, countreg));
+ }
+ if (size == 8 && (count & 0x04))
+ emit_insn (gen_strsetsi (destreg,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ if (count & 0x02)
+ emit_insn (gen_strsethi (destreg,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ if (count & 0x01)
+ emit_insn (gen_strsetqi (destreg,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ }
+ else
+ {
+ rtx countreg2;
+ rtx label = NULL;
+
+ /* In case we don't know anything about the alignment, default to
+ library version, since it is usually equally fast and result in
+ shorter code. */
+ if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
+ return 0;
+
+ if (TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+
+ countreg2 = gen_reg_rtx (Pmode);
+ countreg = copy_to_mode_reg (counter_mode, count_exp);
+ zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
+
+ if (count == 0
+ && align < (TARGET_PENTIUMPRO && (count == 0
+ || count >= (unsigned int)260)
+ ? 8 : UNITS_PER_WORD))
+ {
+ label = gen_label_rtx ();
+ emit_cmp_and_jump_insns (countreg, GEN_INT (UNITS_PER_WORD - 1),
+ LEU, 0, counter_mode, 1, 0, label);
+ }
+ if (align <= 1)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 1);
+ emit_insn (gen_strsetqi (destreg,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ ix86_adjust_counter (countreg, 1);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 2)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ emit_insn (gen_strsethi (destreg,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ ix86_adjust_counter (countreg, 2);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align <= 4 && TARGET_PENTIUMPRO && (count == 0
+ || count >= (unsigned int)260))
+ {
+ rtx label = ix86_expand_aligntest (destreg, 4);
+ emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
+ ? gen_rtx_SUBREG (SImode, zeroreg, 0)
+ : zeroreg)));
+ ix86_adjust_counter (countreg, 4);
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+
+ if (!TARGET_SINGLE_STRINGOP)
+ emit_insn (gen_cld ());
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
+ GEN_INT (3)));
+ emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
+ destreg, countreg2));
+ }
+ else
+ {
+ emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
+ emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
+ destreg, countreg2));
+ }
+
+ if (label)
+ {
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
+ emit_insn (gen_strsetsi (destreg,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ if (TARGET_64BIT && (align <= 4 || count == 0))
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ emit_insn (gen_strsetsi (destreg,
+ gen_rtx_SUBREG (SImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 2 && count != 0 && (count & 2))
+ emit_insn (gen_strsethi (destreg,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ if (align <= 2 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 2);
+ emit_insn (gen_strsethi (destreg,
+ gen_rtx_SUBREG (HImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ if (align > 1 && count != 0 && (count & 1))
+ emit_insn (gen_strsetqi (destreg,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ if (align <= 1 || count == 0)
+ {
+ rtx label = ix86_expand_aligntest (destreg, 1);
+ emit_insn (gen_strsetqi (destreg,
+ gen_rtx_SUBREG (QImode, zeroreg, 0)));
+ emit_label (label);
+ LABEL_NUSES (label) = 1;
+ }
+ }
+ return 1;
+}
+/* Expand strlen. */
+int
+ix86_expand_strlen (out, src, eoschar, align)
+ rtx out, src, eoschar, align;
+{
+ rtx addr, scratch1, scratch2, scratch3, scratch4;
+
+ /* The generic case of strlen expander is long. Avoid it's
+ expanding unless TARGET_INLINE_ALL_STRINGOPS. */
+
+ if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+ && !TARGET_INLINE_ALL_STRINGOPS
+ && !optimize_size
+ && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
+ return 0;
+
+ addr = force_reg (Pmode, XEXP (src, 0));
+ scratch1 = gen_reg_rtx (Pmode);
+
+ if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
+ && !optimize_size)
+ {
+ /* Well it seems that some optimizer does not combine a call like
+ foo(strlen(bar), strlen(bar));
+ when the move and the subtraction is done here. It does calculate
+ the length just once when these instructions are done inside of
+ output_strlen_unroll(). But I think since &bar[strlen(bar)] is
+ often used and I use one fewer register for the lifetime of
+ output_strlen_unroll() this is better. */
+
+ emit_move_insn (out, addr);
+
+ ix86_expand_strlensi_unroll_1 (out, align);
+
+ /* strlensi_unroll_1 returns the address of the zero at the end of
+ the string, like memchr(), so compute the length by subtracting
+ the start address. */
+ if (TARGET_64BIT)
+ emit_insn (gen_subdi3 (out, out, addr));
+ else
+ emit_insn (gen_subsi3 (out, out, addr));
+ }
+ else
+ {
+ scratch2 = gen_reg_rtx (Pmode);
+ scratch3 = gen_reg_rtx (Pmode);
+ scratch4 = force_reg (Pmode, constm1_rtx);
+
+ emit_move_insn (scratch3, addr);
+ eoschar = force_reg (QImode, eoschar);
+
+ emit_insn (gen_cld ());
+ if (TARGET_64BIT)
+ {
+ emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
+ align, scratch4, scratch3));
+ emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
+ emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
+ }
+ else
+ {
+ emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
+ align, scratch4, scratch3));
+ emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
+ emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
+ }
+ }
+ return 1;
+}
+
/* Expand the appropriate insns for doing strlen if not just doing
repnz; scasb
This is just the body. It needs the initialisations mentioned above and
some address computing at the end. These things are done in i386.md. */
-void
-ix86_expand_strlensi_unroll_1 (out, align_rtx, scratch)
- rtx out, align_rtx, scratch;
+static void
+ix86_expand_strlensi_unroll_1 (out, align_rtx)
+ rtx out, align_rtx;
{
int align;
rtx tmp;
rtx end_0_label = gen_label_rtx ();
rtx mem;
rtx tmpreg = gen_reg_rtx (SImode);
+ rtx scratch = gen_reg_rtx (SImode);
align = 0;
if (GET_CODE (align_rtx) == CONST_INT)
/* Is there a known alignment and is it less than 4? */
if (align < 4)
{
+ rtx scratch1 = gen_reg_rtx (Pmode);
+ emit_move_insn (scratch1, out);
/* Is there a known alignment and is it not 2? */
if (align != 2)
{
align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
/* Leave just the 3 lower bits. */
- align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (3),
+ align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
NULL_RTX, 0, OPTAB_WIDEN);
emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
- SImode, 1, 0, align_4_label);
+ Pmode, 1, 0, align_4_label);
emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
- SImode, 1, 0, align_2_label);
+ Pmode, 1, 0, align_2_label);
emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
- SImode, 1, 0, align_3_label);
+ Pmode, 1, 0, align_3_label);
}
else
{
/* Since the alignment is 2, we have to check 2 or 0 bytes;
check if is aligned to 4 - byte. */
- align_rtx = expand_binop (SImode, and_optab, scratch, GEN_INT (2),
+ align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
NULL_RTX, 0, OPTAB_WIDEN);
emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
- SImode, 1, 0, align_4_label);
+ Pmode, 1, 0, align_4_label);
}
mem = gen_rtx_MEM (QImode, out);
QImode, 1, 0, end_0_label);
/* Increment the address. */
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
/* Not needed with an alignment of 2 */
if (align != 2)
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
QImode, 1, 0, end_0_label);
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
emit_label (align_3_label);
}
emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
QImode, 1, 0, end_0_label);
- emit_insn (gen_addsi3 (out, out, const1_rtx));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, const1_rtx));
+ else
+ emit_insn (gen_addsi3 (out, out, const1_rtx));
}
/* Generate loop to check 4 bytes at a time. It is not a good idea to
mem = gen_rtx_MEM (SImode, out);
emit_move_insn (scratch, mem);
- emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
+ else
+ emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
/* This formula yields a nonzero result iff one of the bytes is zero.
This saves three branches inside loop and many cycles. */
emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
emit_insn (gen_one_cmplsi2 (scratch, scratch));
emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
- emit_insn (gen_andsi3 (tmpreg, tmpreg, GEN_INT (0x80808080)));
+ emit_insn (gen_andsi3 (tmpreg, tmpreg,
+ GEN_INT (trunc_int_for_mode
+ (0x80808080, SImode))));
emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0,
SImode, 1, 0, align_4_label);
if (TARGET_CMOVE)
{
rtx reg = gen_reg_rtx (SImode);
+ rtx reg2 = gen_reg_rtx (Pmode);
emit_move_insn (reg, tmpreg);
emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
reg,
tmpreg)));
/* Emit lea manually to avoid clobbering of flags. */
- emit_insn (gen_rtx_SET (SImode, reg,
- gen_rtx_PLUS (SImode, out, GEN_INT (2))));
+ emit_insn (gen_rtx_SET (SImode, reg2,
+ gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
emit_insn (gen_rtx_SET (VOIDmode, out,
- gen_rtx_IF_THEN_ELSE (SImode, tmp,
- reg,
- out)));
+ gen_rtx_IF_THEN_ELSE (Pmode, tmp,
+ reg2,
+ out)));
}
else
/* Not in the first two. Move two bytes forward. */
emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
- emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
+ if (TARGET_64BIT)
+ emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
+ else
+ emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
emit_label (end_2_label);
/* Avoid branch in fixing the byte. */
tmpreg = gen_lowpart (QImode, tmpreg);
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
- emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
+ if (TARGET_64BIT)
+ emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
+ else
+ emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
emit_label (end_0_label);
}
int len;
if (GET_CODE (addr) == PRE_DEC
- || GET_CODE (addr) == POST_INC)
+ || GET_CODE (addr) == POST_INC
+ || GET_CODE (addr) == PRE_MODIFY
+ || GET_CODE (addr) == POST_MODIFY)
return 0;
if (! ix86_decompose_address (addr, &parts))
insn_type = get_attr_type (insn);
dep_insn_type = get_attr_type (dep_insn);
- /* Prologue and epilogue allocators can have a false dependency on ebp.
- This results in one cycle extra stall on Pentium prologue scheduling,
- so handle this important case manually. */
- if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
- && dep_insn_type == TYPE_ALU
- && !reg_mentioned_p (stack_pointer_rtx, insn))
- return 0;
-
switch (ix86_cpu)
{
case PROCESSOR_PENTIUM:
|| TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
return 256;
+ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+ to 16byte boundary. */
+ if (TARGET_64BIT)
+ {
+ if (AGGREGATE_TYPE_P (type)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+ return 128;
+ }
+
if (TREE_CODE (type) == ARRAY_TYPE)
{
if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
tree type;
int align;
{
+ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
+ to 16byte boundary. */
+ if (TARGET_64BIT)
+ {
+ if (AGGREGATE_TYPE_P (type)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+ return 128;
+ }
if (TREE_CODE (type) == ARRAY_TYPE)
{
if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
}
return align;
}
+\f
+/* Emit RTL insns to initialize the variable parts of a trampoline.
+ FNADDR is an RTX for the address of the function's pure code.
+ CXT is an RTX for the static chain value for the function. */
+void
+x86_initialize_trampoline (tramp, fnaddr, cxt)
+ rtx tramp, fnaddr, cxt;
+{
+ if (!TARGET_64BIT)
+ {
+ /* Compute offset from the end of the jmp to the target function. */
+ rtx disp = expand_binop (SImode, sub_optab, fnaddr,
+ plus_constant (tramp, 10),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ emit_move_insn (gen_rtx_MEM (QImode, tramp),
+ GEN_INT (trunc_int_for_mode (0xb9, QImode)));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
+ emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
+ GEN_INT (trunc_int_for_mode (0xe9, QImode)));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
+ }
+ else
+ {
+ int offset = 0;
+ /* Try to load address using shorter movl instead of movabs.
+ We may want to support movq for kernel mode, but kernel does not use
+ trampolines at the moment. */
+ if (x86_64_zero_extended_value (fnaddr))
+ {
+ fnaddr = copy_to_mode_reg (DImode, fnaddr);
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ GEN_INT (trunc_int_for_mode (0xbb41, HImode)));
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
+ gen_lowpart (SImode, fnaddr));
+ offset += 6;
+ }
+ else
+ {
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ GEN_INT (trunc_int_for_mode (0xbb49, HImode)));
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
+ fnaddr);
+ offset += 10;
+ }
+ /* Load static chain using movabs to r10. */
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ GEN_INT (trunc_int_for_mode (0xba49, HImode)));
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
+ cxt);
+ offset += 10;
+ /* Jump to the r11 */
+ emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
+ GEN_INT (trunc_int_for_mode (0xff49, HImode)));
+ emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
+ GEN_INT (trunc_int_for_mode (0xe3, HImode)));
+ offset += 3;
+ if (offset > TRAMPOLINE_SIZE)
+ abort();
+ }
+}
#define def_builtin(NAME, TYPE, CODE) \
- builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL_PTR)
+ builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL)
struct builtin_description
{
enum insn_code icode;
case IX86_BUILTIN_SETPS1:
target = assign_386_stack_local (SFmode, 0);
arg0 = TREE_VALUE (arglist);
- emit_move_insn (change_address (target, SFmode, XEXP (target, 0)),
+ emit_move_insn (adjust_address (target, SFmode, 0),
expand_expr (arg0, NULL_RTX, VOIDmode, 0));
op0 = gen_reg_rtx (V4SFmode);
- emit_insn (gen_sse_loadss (op0, change_address (target, V4SFmode,
- XEXP (target, 0))));
+ emit_insn (gen_sse_loadss (op0, adjust_address (target, V4SFmode, 0)));
emit_insn (gen_sse_shufps (op0, op0, op0, GEN_INT (0)));
return op0;
case IX86_BUILTIN_SETPS:
target = assign_386_stack_local (V4SFmode, 0);
- op0 = change_address (target, SFmode, XEXP (target, 0));
+ op0 = adjust_address (target, SFmode, 0);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
enum machine_mode mode;
rtx operand;
{
+ rtx result;
if (!reload_completed)
abort ();
- switch (mode)
+ if (TARGET_64BIT && TARGET_RED_ZONE)
+ {
+ result = gen_rtx_MEM (mode,
+ gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx,
+ GEN_INT (-RED_ZONE_SIZE)));
+ emit_move_insn (result, operand);
+ }
+ else if (TARGET_64BIT && !TARGET_RED_ZONE)
{
- case DImode:
+ switch (mode)
{
- rtx operands[2];
- split_di (&operand, 1, operands, operands+1);
+ case HImode:
+ case SImode:
+ operand = gen_lowpart (DImode, operand);
+ /* FALLTHRU */
+ case DImode:
emit_insn (
- gen_rtx_SET (VOIDmode,
- gen_rtx_MEM (SImode,
- gen_rtx_PRE_DEC (Pmode,
- stack_pointer_rtx)),
- operands[1]));
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (DImode,
+ gen_rtx_PRE_DEC (DImode,
+ stack_pointer_rtx)),
+ operand));
+ break;
+ default:
+ abort ();
+ }
+ result = gen_rtx_MEM (mode, stack_pointer_rtx);
+ }
+ else
+ {
+ switch (mode)
+ {
+ case DImode:
+ {
+ rtx operands[2];
+ split_di (&operand, 1, operands, operands + 1);
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (SImode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx)),
+ operands[1]));
+ emit_insn (
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (SImode,
+ gen_rtx_PRE_DEC (Pmode,
+ stack_pointer_rtx)),
+ operands[0]));
+ }
+ break;
+ case HImode:
+ /* It is better to store HImodes as SImodes. */
+ if (!TARGET_PARTIAL_REG_STALL)
+ operand = gen_lowpart (SImode, operand);
+ /* FALLTHRU */
+ case SImode:
emit_insn (
- gen_rtx_SET (VOIDmode,
- gen_rtx_MEM (SImode,
- gen_rtx_PRE_DEC (Pmode,
- stack_pointer_rtx)),
- operands[0]));
+ gen_rtx_SET (VOIDmode,
+ gen_rtx_MEM (GET_MODE (operand),
+ gen_rtx_PRE_DEC (SImode,
+ stack_pointer_rtx)),
+ operand));
+ break;
+ default:
+ abort ();
}
- break;
- case HImode:
- /* It is better to store HImodes as SImodes. */
- if (!TARGET_PARTIAL_REG_STALL)
- operand = gen_lowpart (SImode, operand);
- /* FALLTHRU */
- case SImode:
- emit_insn (
- gen_rtx_SET (VOIDmode,
- gen_rtx_MEM (GET_MODE (operand),
- gen_rtx_PRE_DEC (SImode,
- stack_pointer_rtx)),
- operand));
- break;
- default:
- abort();
+ result = gen_rtx_MEM (mode, stack_pointer_rtx);
}
- return gen_rtx_MEM (mode, stack_pointer_rtx);
+ return result;
}
/* Free operand from the memory. */
ix86_free_from_memory (mode)
enum machine_mode mode;
{
- /* Use LEA to deallocate stack space. In peephole2 it will be converted
- to pop or add instruction if registers are available. */
- emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- GEN_INT (mode == DImode
- ? 8
- : mode == HImode && TARGET_PARTIAL_REG_STALL
- ? 2
- : 4))));
+ if (!TARGET_64BIT || !TARGET_RED_ZONE)
+ {
+ int size;
+
+ if (mode == DImode || TARGET_64BIT)
+ size = 8;
+ else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
+ size = 2;
+ else
+ size = 4;
+ /* Use LEA to deallocate stack space. In peephole2 it will be converted
+ to pop or add instruction if registers are available. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ GEN_INT (size))));
+ }
}
/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
stall. Count this as arbitarily high cost of 20. */
if (ix86_secondary_memory_needed (class1, class2, mode, 0))
{
+ int add_cost = 0;
if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
- return 10;
+ add_cost = 20;
return (MEMORY_MOVE_COST (mode, class1, 0)
- + MEMORY_MOVE_COST (mode, class2, 1));
+ + MEMORY_MOVE_COST (mode, class2, 1) + add_cost);
}
- /* Moves between SSE/MMX and integer unit are expensive.
- ??? We should make this cost CPU specific. */
+ /* Moves between SSE/MMX and integer unit are expensive. */
if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
|| SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
return ix86_cost->mmxsse_to_integer;