/* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
in basic block BB. Delete it if upper 128bit AVX registers are
unused. If it isn't deleted, move it to just before a jump insn.
-
+
STATE is state of the upper 128bits of AVX registers at entry. */
static void
/* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
the auto-vectorizer. */
- m_BDVER
+ m_BDVER
};
/* Feature tests against the various architecture variations. */
{ "-mmmx", OPTION_MASK_ISA_MMX },
{ "-mabm", OPTION_MASK_ISA_ABM },
{ "-mbmi", OPTION_MASK_ISA_BMI },
+ { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
{ "-mtbm", OPTION_MASK_ISA_TBM },
{ "-mpopcnt", OPTION_MASK_ISA_POPCNT },
{ "-mmovbe", OPTION_MASK_ISA_MOVBE },
PTA_RDRND = 1 << 25,
PTA_F16C = 1 << 26,
PTA_BMI = 1 << 27,
- PTA_TBM = 1 << 28
+ PTA_TBM = 1 << 28,
+ PTA_LZCNT = 1 << 29
/* if this reaches 32, need to widen struct pta flags below */
};
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
+ {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
+ | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
+ | PTA_RDRND | PTA_F16C},
{"atom", PROCESSOR_ATOM, CPU_ATOM,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
if (processor_alias_table[i].flags & PTA_BMI
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
ix86_isa_flags |= OPTION_MASK_ISA_BMI;
+ if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
+ ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
if (processor_alias_table[i].flags & PTA_TBM
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
ix86_isa_flags |= OPTION_MASK_ISA_TBM;
if (TARGET_SSE4_2 || TARGET_ABM)
ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
+ /* Turn on lzcnt instruction for -mabm. */
+ if (TARGET_ABM)
+ ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
+
/* Validate -mpreferred-stack-boundary= value or default it to
PREFERRED_STACK_BOUNDARY_DEFAULT. */
ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
target_flags |= MASK_PREFER_AVX128;
}
}
- else
+ else
{
/* Disable vzeroupper pass if TARGET_AVX is disabled. */
target_flags &= ~MASK_VZEROUPPER;
IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
IX86_ATTR_ISA ("abm", OPT_mabm),
IX86_ATTR_ISA ("bmi", OPT_mbmi),
+ IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
IX86_ATTR_ISA ("tbm", OPT_mtbm),
IX86_ATTR_ISA ("aes", OPT_maes),
IX86_ATTR_ISA ("avx", OPT_mavx),
optimize any indirect call, or a direct call to a global function,
as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
if (!TARGET_MACHO
- && !TARGET_64BIT
- && flag_pic
+ && !TARGET_64BIT
+ && flag_pic
&& (!decl || !targetm.binds_local_p (decl)))
return false;
return gen_rtx_REG (mode, AX_REG);
}
}
+ else if (POINTER_TYPE_P (valtype))
+ {
+ /* Pointers are always returned in Pmode. */
+ mode = Pmode;
+ }
ret = construct_container (mode, orig_mode, valtype, 1,
X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
}
+/* Pointer function arguments and return values are promoted to Pmode. */
+
+static enum machine_mode
+ix86_promote_function_mode (const_tree type, enum machine_mode mode,
+ int *punsignedp, const_tree fntype,
+ int for_return)
+{
+ if (type != NULL_TREE && POINTER_TYPE_P (type))
+ {
+ *punsignedp = POINTERS_EXTEND_UNSIGNED;
+ return Pmode;
+ }
+ return default_promote_function_mode (type, mode, punsignedp, fntype,
+ for_return);
+}
+
rtx
ix86_libcall_value (enum machine_mode mode)
{
alias_set_type set = get_varargs_alias_set ();
int i;
+ /* Reset to zero, as there might be a sysv vaarg used
+ before. */
+ ix86_varargs_gpr_size = 0;
+ ix86_varargs_fpr_size = 0;
+
for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
{
rtx reg, mem;
ovf_rtx = cfun->machine->split_stack_varargs_pointer;
t = make_tree (type, ovf_rtx);
if (words != 0)
- t = build2 (POINTER_PLUS_EXPR, type, t,
- size_int (words * UNITS_PER_WORD));
+ t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
t = build2 (MODIFY_EXPR, type, ovf, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
type = TREE_TYPE (sav);
t = make_tree (type, frame_pointer_rtx);
if (!ix86_varargs_gpr_size)
- t = build2 (POINTER_PLUS_EXPR, type, t,
- size_int (-8 * X86_64_REGPARM_MAX));
+ t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
t = build2 (MODIFY_EXPR, type, sav, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
if (needed_intregs)
{
/* int_addr = gpr + sav; */
- t = fold_convert (sizetype, gpr);
- t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
+ t = fold_build_pointer_plus (sav, gpr);
gimplify_assign (int_addr, t, pre_p);
}
if (needed_sseregs)
{
/* sse_addr = fpr + sav; */
- t = fold_convert (sizetype, fpr);
- t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
+ t = fold_build_pointer_plus (sav, fpr);
gimplify_assign (sse_addr, t, pre_p);
}
if (need_temp)
src_offset = REGNO (reg) * 8;
}
src_addr = fold_convert (addr_type, src_addr);
- src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
- size_int (src_offset));
+ src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
dest_addr = fold_convert (daddr_type, addr);
- dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
- size_int (prev_size));
+ dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
if (cur_size == GET_MODE_SIZE (mode))
{
src = build_va_arg_indirect_ref (src_addr);
else
{
HOST_WIDE_INT align = arg_boundary / 8;
- t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
- size_int (align - 1));
- t = fold_convert (sizetype, t);
+ t = fold_build_pointer_plus_hwi (ovf, align - 1);
t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
- size_int (-align));
- t = fold_convert (TREE_TYPE (ovf), t);
+ build_int_cst (TREE_TYPE (t), -align));
}
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
gimplify_assign (addr, t, pre_p);
- t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
- size_int (rsize * UNITS_PER_WORD));
+ t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
gimplify_assign (unshare_expr (ovf), t, pre_p);
if (container)
rtx xops[2];
int regno;
-#ifdef TARGET_SOLARIS
- solaris_code_end ();
-#endif
-
for (regno = AX_REG; regno <= SP_REG; regno++)
{
char name[32];
cfun->machine->use_fast_prologue_epilogue
= !expensive_function_p (count);
}
- if (TARGET_PROLOGUE_USING_MOVE
- && cfun->machine->use_fast_prologue_epilogue)
- frame->save_regs_using_mov = true;
- else
- frame->save_regs_using_mov = false;
- /* If static stack checking is enabled and done with probes, the registers
- need to be saved before allocating the frame. */
- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
- frame->save_regs_using_mov = false;
+ frame->save_regs_using_mov
+ = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
+ /* If static stack checking is enabled and done with probes,
+ the registers need to be saved before allocating the frame. */
+ && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
/* Skip return address. */
offset = UNITS_PER_WORD;
return len;
}
-
+
/* Return an RTX that points to CFA_OFFSET within the stack frame.
The valid base registers are taken from CFUN->MACHINE->FS. */
emit_insn (gen_cld ());
/* SEH requires that the prologue end within 256 bytes of the start of
- the function. Prevent instruction schedules that would extend that. */
+ the function. Prevent instruction schedules that would extend that.
+ Further, prevent alloca modifications to the stack pointer from being
+ combined with prologue modifications. */
if (TARGET_SEH)
- emit_insn (gen_blockage ());
+ emit_insn (gen_prologue_use (stack_pointer_rtx));
}
/* Emit code to restore REG using a POP insn. */
{
rtx reg = gen_rtx_REG (Pmode, regno);
rtx insn, mem;
-
+
mem = choose_baseaddr (cfa_offset);
mem = gen_frame_mem (Pmode, mem);
insn = emit_move_insn (reg, mem);
if (TARGET_VZEROUPPER
&& !TREE_THIS_VOLATILE (cfun->decl)
&& !cfun->machine->caller_return_avx256_p)
- emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
+ emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
if (crtl->args.pops_args && crtl->args.size)
{
}
}
\f
+/* Determine if op is suitable SUBREG RTX for address. */
+
+static bool
+ix86_address_subreg_operand (rtx op)
+{
+ enum machine_mode mode;
+
+ if (!REG_P (op))
+ return false;
+
+ mode = GET_MODE (op);
+
+ if (GET_MODE_CLASS (mode) != MODE_INT)
+ return false;
+
+ /* Don't allow SUBREGs that span more than a word. It can lead to spill
+ failures when the register is one word out of a two word structure. */
+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+ return false;
+
+ /* Allow only SUBREGs of non-eliminable hard registers. */
+ return register_no_elim_operand (op, mode);
+}
+
/* Extract the parts of an RTL expression that is a valid memory address
for an instruction. Return 0 if the structure of the address is
grossly off. Return -1 if the address contains ASHIFT, so it is not
int retval = 1;
enum ix86_address_seg seg = SEG_DEFAULT;
- if (REG_P (addr) || GET_CODE (addr) == SUBREG)
+ /* Allow zero-extended SImode addresses,
+ they will be emitted with addr32 prefix. */
+ if (TARGET_64BIT
+ && GET_CODE (addr) == ZERO_EXTEND
+ && GET_MODE (addr) == DImode
+ && GET_MODE (XEXP (addr, 0)) == SImode)
+ addr = XEXP (addr, 0);
+
+ if (REG_P (addr))
base = addr;
+ else if (GET_CODE (addr) == SUBREG)
+ {
+ if (ix86_address_subreg_operand (SUBREG_REG (addr)))
+ base = addr;
+ else
+ return 0;
+ }
else if (GET_CODE (addr) == PLUS)
{
rtx addends[4], op;
return 0;
break;
- case REG:
case SUBREG:
+ if (!ix86_address_subreg_operand (SUBREG_REG (op)))
+ return 0;
+ /* FALLTHRU */
+
+ case REG:
if (!base)
base = op;
else if (!index)
else
disp = addr; /* displacement */
+ if (index)
+ {
+ if (REG_P (index))
+ ;
+ else if (GET_CODE (index) == SUBREG
+ && ix86_address_subreg_operand (SUBREG_REG (index)))
+ ;
+ else
+ return 0;
+ }
+
/* Extract the integral value of scale. */
if (scale_rtx)
{
disp = parts.disp;
scale = parts.scale;
- /* Validate base register.
-
- Don't allow SUBREG's that span more than a word here. It can lead to spill
- failures when the base is one word out of a two word structure, which is
- represented internally as a DImode int. */
-
+ /* Validate base register. */
if (base)
{
rtx reg;
if (REG_P (base))
reg = base;
- else if (GET_CODE (base) == SUBREG
- && REG_P (SUBREG_REG (base))
- && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
- <= UNITS_PER_WORD)
- reg = SUBREG_REG (base);
+ else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
+ reg = SUBREG_REG (base);
else
/* Base is not a register. */
return false;
- if (GET_MODE (base) != Pmode)
- /* Base is not in Pmode. */
+ if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
return false;
if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
return false;
}
- /* Validate index register.
-
- Don't allow SUBREG's that span more than a word here -- same as above. */
-
+ /* Validate index register. */
if (index)
{
rtx reg;
if (REG_P (index))
reg = index;
- else if (GET_CODE (index) == SUBREG
- && REG_P (SUBREG_REG (index))
- && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
- <= UNITS_PER_WORD)
- reg = SUBREG_REG (index);
+ else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
+ reg = SUBREG_REG (index);
else
/* Index is not a register. */
return false;
- if (GET_MODE (index) != Pmode)
- /* Index is not in Pmode. */
+ if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
return false;
if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
return false;
}
+ /* Index and base should have the same mode. */
+ if (base && index
+ && GET_MODE (base) != GET_MODE (index))
+ return false;
+
/* Validate scale factor. */
if (scale != 1)
{
static rtx
get_thread_pointer (bool to_reg)
{
- rtx tp, reg, insn;
+ rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
- tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
- if (!to_reg)
- return tp;
+ if (GET_MODE (tp) != Pmode)
+ tp = convert_to_mode (Pmode, tp, 1);
- reg = gen_reg_rtx (Pmode);
- insn = gen_rtx_SET (VOIDmode, reg, tp);
- insn = emit_insn (insn);
+ if (to_reg)
+ tp = copy_addr_to_reg (tp);
- return reg;
+ return tp;
}
/* Construct the SYMBOL_REF for the tls_get_addr function. */
rtx temp = gen_reg_rtx (Pmode);
rtx val = force_operand (XEXP (x, 1), temp);
if (val != temp)
- emit_move_insn (temp, val);
+ {
+ if (GET_MODE (val) != Pmode)
+ val = convert_to_mode (Pmode, val, 1);
+ emit_move_insn (temp, val);
+ }
XEXP (x, 1) = temp;
return x;
rtx temp = gen_reg_rtx (Pmode);
rtx val = force_operand (XEXP (x, 0), temp);
if (val != temp)
- emit_move_insn (temp, val);
+ {
+ if (GET_MODE (val) != Pmode)
+ val = convert_to_mode (Pmode, val, 1);
+ emit_move_insn (temp, val);
+ }
XEXP (x, 0) = temp;
return x;
|| !MEM_P (orig_x))
return ix86_delegitimize_tls_address (orig_x);
x = XVECEXP (XEXP (x, 0), 0, 0);
- if (GET_MODE (orig_x) != Pmode)
+ if (GET_MODE (orig_x) != GET_MODE (x))
{
- x = simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
+ x = simplify_gen_subreg (GET_MODE (orig_x), x,
+ GET_MODE (x), 0);
if (x == NULL_RTX)
return orig_x;
}
gcc_assert (ok);
+ if (parts.base && GET_CODE (parts.base) == SUBREG)
+ {
+ rtx tmp = SUBREG_REG (parts.base);
+ parts.base = simplify_subreg (GET_MODE (parts.base),
+ tmp, GET_MODE (tmp), 0);
+ }
+
+ if (parts.index && GET_CODE (parts.index) == SUBREG)
+ {
+ rtx tmp = SUBREG_REG (parts.index);
+ parts.index = simplify_subreg (GET_MODE (parts.index),
+ tmp, GET_MODE (tmp), 0);
+ }
+
base = parts.base;
index = parts.index;
disp = parts.disp;
}
else
{
+ int code = 0;
+
+ /* Print SImode registers for zero-extended addresses to force
+ addr32 prefix. Otherwise print DImode registers to avoid it. */
+ if (TARGET_64BIT)
+ code = (GET_CODE (addr) == ZERO_EXTEND) ? 'l' : 'q';
+
if (ASSEMBLER_DIALECT == ASM_ATT)
{
if (disp)
putc ('(', file);
if (base)
- print_reg (base, 0, file);
+ print_reg (base, code, file);
if (index)
{
putc (',', file);
- print_reg (index, 0, file);
+ print_reg (index, code, file);
if (scale != 1)
fprintf (file, ",%d", scale);
}
putc ('[', file);
if (base)
{
- print_reg (base, 0, file);
+ print_reg (base, code, file);
if (offset)
{
if (INTVAL (offset) >= 0)
if (index)
{
putc ('+', file);
- print_reg (index, 0, file);
+ print_reg (index, code, file);
if (scale != 1)
fprintf (file, "*%d", scale);
}
const char *directive = ASM_LONG;
#ifdef ASM_QUAD
- if (TARGET_64BIT)
+ if (TARGET_LP64)
directive = ASM_QUAD;
#else
gcc_assert (!TARGET_64BIT);
op1 = force_operand (op1, op0);
if (op1 == op0)
return;
+ if (GET_MODE (op1) != mode)
+ op1 = convert_to_mode (mode, op1, 1);
}
else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op1))
op0, 1, OPTAB_DIRECT);
if (tmp == op0)
return;
+ if (GET_MODE (tmp) != mode)
+ op1 = convert_to_mode (mode, tmp, 1);
}
}
- if ((flag_pic || MACHOPIC_INDIRECT)
- && mode == Pmode && symbolic_operand (op1, Pmode))
+ if ((flag_pic || MACHOPIC_INDIRECT)
+ && symbolic_operand (op1, mode))
{
if (TARGET_MACHO && !TARGET_64BIT)
{
else
{
if (MEM_P (op0))
- op1 = force_reg (Pmode, op1);
- else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
+ op1 = force_reg (mode, op1);
+ else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
{
rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
op1 = legitimize_pic_address (op1, reg);
if (op0 == op1)
return;
+ if (GET_MODE (op1) != mode)
+ op1 = convert_to_mode (mode, op1, 1);
}
}
}
insn = emit_move_insn (operands[1], tmp1);
else
{
- /* Need a new scratch register since the old one has result
+ /* Need a new scratch register since the old one has result
of 8bit divide. */
scratch = gen_reg_rtx (mode);
emit_move_insn (scratch, tmp1);
rtx destexp;
rtx srcexp;
rtx countreg;
+ HOST_WIDE_INT rounded_count;
/* If the size is known, it is shorter to use rep movs. */
if (mode == QImode && CONST_INT_P (count)
}
if (CONST_INT_P (count))
{
- count = GEN_INT (INTVAL (count)
+ rounded_count = (INTVAL (count)
& ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
destmem = shallow_copy_rtx (destmem);
srcmem = shallow_copy_rtx (srcmem);
- set_mem_size (destmem, count);
- set_mem_size (srcmem, count);
+ set_mem_size (destmem, rounded_count);
+ set_mem_size (srcmem, rounded_count);
}
else
{
- if (MEM_SIZE (destmem))
- set_mem_size (destmem, NULL_RTX);
- if (MEM_SIZE (srcmem))
- set_mem_size (srcmem, NULL_RTX);
+ if (MEM_SIZE_KNOWN_P (destmem))
+ clear_mem_size (destmem);
+ if (MEM_SIZE_KNOWN_P (srcmem))
+ clear_mem_size (srcmem);
}
emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
destexp, srcexp));
{
rtx destexp;
rtx countreg;
+ HOST_WIDE_INT rounded_count;
if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
if (orig_value == const0_rtx && CONST_INT_P (count))
{
- count = GEN_INT (INTVAL (count)
+ rounded_count = (INTVAL (count)
& ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
destmem = shallow_copy_rtx (destmem);
- set_mem_size (destmem, count);
+ set_mem_size (destmem, rounded_count);
}
- else if (MEM_SIZE (destmem))
- set_mem_size (destmem, NULL_RTX);
+ else if (MEM_SIZE_KNOWN_P (destmem))
+ clear_mem_size (destmem);
emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
}
int desired_align, int align_bytes)
{
rtx src = *srcp;
- rtx src_size, dst_size;
+ rtx orig_dst = dst;
+ rtx orig_src = src;
int off = 0;
int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
if (src_align_bytes >= 0)
src_align_bytes = desired_align - src_align_bytes;
- src_size = MEM_SIZE (src);
- dst_size = MEM_SIZE (dst);
if (align_bytes & 1)
{
dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
set_mem_align (src, src_align * BITS_PER_UNIT);
}
- if (dst_size)
- set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
- if (src_size)
- set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
+ if (MEM_SIZE_KNOWN_P (orig_dst))
+ set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
+ if (MEM_SIZE_KNOWN_P (orig_src))
+ set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
*srcp = src;
return dst;
}
int desired_align, int align_bytes)
{
int off = 0;
- rtx dst_size = MEM_SIZE (dst);
+ rtx orig_dst = dst;
if (align_bytes & 1)
{
dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
set_mem_align (dst, desired_align * BITS_PER_UNIT);
- if (dst_size)
- set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
+ if (MEM_SIZE_KNOWN_P (orig_dst))
+ set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
return dst;
}
rtx callarg2,
rtx pop, bool sibcall)
{
+ /* We need to represent that SI and DI registers are clobbered
+ by SYSV calls. */
+ static int clobbered_registers[] = {
+ XMM6_REG, XMM7_REG, XMM8_REG,
+ XMM9_REG, XMM10_REG, XMM11_REG,
+ XMM12_REG, XMM13_REG, XMM14_REG,
+ XMM15_REG, SI_REG, DI_REG
+ };
+ rtx vec[ARRAY_SIZE (clobbered_registers) + 3];
rtx use = NULL, call;
+ unsigned int vec_len;
if (pop == const0_rtx)
pop = NULL;
? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
: !call_insn_operand (XEXP (fnaddr, 0), Pmode))
{
- fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
- fnaddr = gen_rtx_MEM (QImode, fnaddr);
+ fnaddr = XEXP (fnaddr, 0);
+ if (GET_MODE (fnaddr) != Pmode)
+ fnaddr = convert_to_mode (Pmode, fnaddr, 1);
+ fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (Pmode, fnaddr));
}
+ vec_len = 0;
call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
if (retval)
call = gen_rtx_SET (VOIDmode, retval, call);
+ vec[vec_len++] = call;
+
if (pop)
{
pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
- call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
+ vec[vec_len++] = pop;
}
+
if (TARGET_64BIT_MS_ABI
&& (!callarg2 || INTVAL (callarg2) != -2))
{
- /* We need to represent that SI and DI registers are clobbered
- by SYSV calls. */
- static int clobbered_registers[] = {
- XMM6_REG, XMM7_REG, XMM8_REG,
- XMM9_REG, XMM10_REG, XMM11_REG,
- XMM12_REG, XMM13_REG, XMM14_REG,
- XMM15_REG, SI_REG, DI_REG
- };
- unsigned int i;
- rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
- rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
- UNSPEC_MS_TO_SYSV_CALL);
+ unsigned i;
- vec[0] = call;
- vec[1] = unspec;
- for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
- vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
- ? TImode : DImode,
- gen_rtx_REG
- (SSE_REGNO_P (clobbered_registers[i])
- ? TImode : DImode,
- clobbered_registers[i]));
+ vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
+ UNSPEC_MS_TO_SYSV_CALL);
- call = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
- + 2, vec));
+ for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
+ vec[vec_len++]
+ = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ gen_rtx_REG (SSE_REGNO_P (clobbered_registers[i])
+ ? TImode : DImode,
+ clobbered_registers[i]));
}
/* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
if (TARGET_VZEROUPPER)
{
- rtx unspec;
int avx256;
-
if (cfun->machine->callee_pass_avx256_p)
{
if (cfun->machine->callee_return_avx256_p)
if (reload_completed)
emit_insn (gen_avx_vzeroupper (GEN_INT (avx256)));
else
- {
- unspec = gen_rtx_UNSPEC (VOIDmode,
- gen_rtvec (1, GEN_INT (avx256)),
- UNSPEC_CALL_NEEDS_VZEROUPPER);
- call = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (2, call, unspec));
- }
+ vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode,
+ gen_rtvec (1, GEN_INT (avx256)),
+ UNSPEC_CALL_NEEDS_VZEROUPPER);
}
+ if (vec_len > 1)
+ call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
call = emit_call_insn (call);
if (use)
CALL_INSN_FUNCTION_USAGE (call) = use;
void
ix86_split_call_vzeroupper (rtx insn, rtx vzeroupper)
{
- rtx call = XVECEXP (PATTERN (insn), 0, 0);
+ rtx pat = PATTERN (insn);
+ rtvec vec = XVEC (pat, 0);
+ int len = GET_NUM_ELEM (vec) - 1;
+
+ /* Strip off the last entry of the parallel. */
+ gcc_assert (GET_CODE (RTVEC_ELT (vec, len)) == UNSPEC);
+ gcc_assert (XINT (RTVEC_ELT (vec, len), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER);
+ if (len == 1)
+ pat = RTVEC_ELT (vec, 0);
+ else
+ pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (len, &RTVEC_ELT (vec, 0)));
+
emit_insn (gen_avx_vzeroupper (vzeroupper));
- emit_call_insn (call);
+ emit_call_insn (pat);
}
/* Output the assembly for a call instruction. */
}
\f
/* Calculate the length of the memory address in the instruction
- encoding. Does not include the one-byte modrm, opcode, or prefix. */
+ encoding. Includes addr32 prefix, does not include the one-byte modrm,
+ opcode, or other prefixes. */
int
memory_address_length (rtx addr)
base = parts.base;
index = parts.index;
disp = parts.disp;
- len = 0;
+
+ /* Add length of addr32 prefix. */
+ len = (GET_CODE (addr) == ZERO_EXTEND);
/* Rule of thumb:
- esp as the base always wants an index,
case AX_REG:
opcode = 0xb8; break;
case CX_REG:
- opcode = 0xb9; break;
+ opcode = 0xb9; break;
default:
gcc_unreachable ();
}
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
- { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
+ { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
/* BMI */
{ OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
ix86_init_mmx_sse_builtins ();
- if (TARGET_64BIT)
+ if (TARGET_LP64)
ix86_init_builtins_va_builtins_abi ();
#ifdef SUBTARGET_INIT_BUILTINS
op = expand_normal (arg);
gcc_assert (target == 0);
if (memory)
- target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+ {
+ if (GET_MODE (op) != Pmode)
+ op = convert_to_mode (Pmode, op, 1);
+ target = gen_rtx_MEM (tmode, force_reg (Pmode, op));
+ }
else
target = force_reg (tmode, op);
arg_adjust = 1;
if (i == memory)
{
/* This must be the memory operand. */
- op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
+ if (GET_MODE (op) != Pmode)
+ op = convert_to_mode (Pmode, op, 1);
+ op = gen_rtx_MEM (mode, force_reg (Pmode, op));
gcc_assert (GET_MODE (op) == mode
|| GET_MODE (op) == VOIDmode);
}
mode1 = insn_data[icode].operand[1].mode;
mode2 = insn_data[icode].operand[2].mode;
- op0 = force_reg (Pmode, op0);
- op0 = gen_rtx_MEM (mode1, op0);
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = gen_rtx_MEM (mode1, force_reg (Pmode, op0));
if (!insn_data[icode].operand[0].predicate (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
op0 = expand_normal (arg0);
icode = CODE_FOR_sse2_clflush;
if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = copy_to_mode_reg (Pmode, op0);
+ {
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = force_reg (Pmode, op0);
+ }
emit_insn (gen_sse2_clflush (op0));
return 0;
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
if (!REG_P (op0))
- op0 = copy_to_mode_reg (Pmode, op0);
+ {
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = force_reg (Pmode, op0);
+ }
if (!REG_P (op1))
op1 = copy_to_mode_reg (SImode, op1);
if (!REG_P (op2))
op0 = expand_normal (arg0);
icode = CODE_FOR_lwp_llwpcb;
if (!insn_data[icode].operand[0].predicate (op0, Pmode))
- op0 = copy_to_mode_reg (Pmode, op0);
+ {
+ if (GET_MODE (op0) != Pmode)
+ op0 = convert_to_mode (Pmode, op0, 1);
+ op0 = force_reg (Pmode, op0);
+ }
emit_insn (gen_lwp_llwpcb (op0));
return 0;
arg0 = CALL_EXPR_ARG (exp, 0);
op1 = expand_normal (arg0);
if (!address_operand (op1, VOIDmode))
- op1 = copy_addr_to_reg (op1);
+ {
+ op1 = convert_memory_address (Pmode, op1);
+ op1 = copy_addr_to_reg (op1);
+ }
emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
op1 = gen_reg_rtx (SImode);
static reg_class_t
ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
- enum machine_mode mode,
- secondary_reload_info *sri ATTRIBUTE_UNUSED)
+ enum machine_mode mode, secondary_reload_info *sri)
{
+ /* Double-word spills from general registers to non-offsettable memory
+ references (zero-extended addresses) require special handling. */
+ if (TARGET_64BIT
+ && MEM_P (x)
+ && GET_MODE_SIZE (mode) > UNITS_PER_WORD
+ && rclass == GENERAL_REGS
+ && !offsettable_memref_p (x))
+ {
+ sri->icode = (in_p
+ ? CODE_FOR_reload_noff_load
+ : CODE_FOR_reload_noff_store);
+ /* Add the cost of move to a temporary. */
+ sri->extra_cost = 1;
+
+ return NO_REGS;
+ }
+
/* QImode spills from non-QI registers require
intermediate register on 32bit targets. */
if (!TARGET_64BIT
/* This condition handles corner case where an expression involving
pointers gets vectorized. We're trying to use the address of a
- stack slot as a vector initializer.
+ stack slot as a vector initializer.
(set (reg:V2DI 74 [ vect_cst_.2 ])
(vec_duplicate:V2DI (reg/f:DI 20 frame)))
return inline_secondary_memory_needed (class1, class2, mode, strict);
}
+/* Implement the TARGET_CLASS_MAX_NREGS hook.
+
+ On the 80386, this is the size of MODE in words,
+ except in the FP regs, where a single reg is always enough. */
+
+static unsigned char
+ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
+{
+ if (MAYBE_INTEGER_CLASS_P (rclass))
+ {
+ if (mode == XFmode)
+ return (TARGET_64BIT ? 2 : 3);
+ else if (mode == XCmode)
+ return (TARGET_64BIT ? 4 : 6);
+ else
+ return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
+ }
+ else
+ {
+ if (COMPLEX_MODE_P (mode))
+ return 2;
+ else
+ return 1;
+ }
+}
+
/* Return true if the registers in CLASS cannot represent the change from
modes FROM to TO. */
/* In case of copying from general_purpose_register we may emit multiple
stores followed by single load causing memory size mismatch stall.
Count this as arbitrarily high cost of 20. */
- if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
+ if (targetm.class_max_nregs (class1, mode)
+ > targetm.class_max_nregs (class2, mode))
cost += 20;
/* In the case of FP/MMX moves, the registers actually overlap, and we
*no_add_attrs = true;
return NULL_TREE;
}
- if (!TARGET_64BIT)
- {
- warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
- name);
- *no_add_attrs = true;
- return NULL_TREE;
- }
/* Can combine regparm with all attributes but fastcall. */
if (is_attribute_p ("ms_abi", name))
/* Count the minimum number of instructions in BB. Return 4 if the
number of instructions >= 4. */
-static int
+static int
ix86_count_insn_bb (basic_block bb)
{
rtx insn;
}
-/* Count the minimum number of instructions in code path in BB.
+/* Count the minimum number of instructions in code path in BB.
Return 4 if the number of instructions >= 4. */
-static int
+static int
ix86_count_insn (basic_block bb)
{
edge e;
return clobbers;
}
-/* Implements target vector targetm.asm.encode_section_info. This
- is not used by netware. */
+/* Implements target vector targetm.asm.encode_section_info. */
static void ATTRIBUTE_UNUSED
ix86_encode_section_info (tree decl, rtx rtl, int first)
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
-#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
#undef TARGET_SCHED_INIT_GLOBAL
#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
+
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
+#undef TARGET_CLASS_MAX_NREGS
+#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
+
#undef TARGET_PREFERRED_RELOAD_CLASS
#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS