/* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
in basic block BB. Delete it if upper 128bit AVX registers are
unused. If it isn't deleted, move it to just before a jump insn.
-
+
STATE is state of the upper 128bits of AVX registers at entry. */
static void
/* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
the auto-vectorizer. */
- m_BDVER
+ m_BDVER
};
/* Feature tests against the various architecture variations. */
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL},
+ {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
+ | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
+ | PTA_RDRND | PTA_F16C},
{"atom", PROCESSOR_ATOM, CPU_ATOM,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
target_flags |= MASK_PREFER_AVX128;
}
}
- else
+ else
{
/* Disable vzeroupper pass if TARGET_AVX is disabled. */
target_flags &= ~MASK_VZEROUPPER;
optimize any indirect call, or a direct call to a global function,
as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
if (!TARGET_MACHO
- && !TARGET_64BIT
- && flag_pic
+ && !TARGET_64BIT
+ && flag_pic
&& (!decl || !targetm.binds_local_p (decl)))
return false;
alias_set_type set = get_varargs_alias_set ();
int i;
+ /* Reset to zero, as there might be a sysv vaarg used
+ before. */
+ ix86_varargs_gpr_size = 0;
+ ix86_varargs_fpr_size = 0;
+
for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
{
rtx reg, mem;
cfun->machine->use_fast_prologue_epilogue
= !expensive_function_p (count);
}
- if (TARGET_PROLOGUE_USING_MOVE
- && cfun->machine->use_fast_prologue_epilogue)
- frame->save_regs_using_mov = true;
- else
- frame->save_regs_using_mov = false;
- /* If static stack checking is enabled and done with probes, the registers
- need to be saved before allocating the frame. */
- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
- frame->save_regs_using_mov = false;
+ frame->save_regs_using_mov
+ = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
+ /* If static stack checking is enabled and done with probes,
+ the registers need to be saved before allocating the frame. */
+ && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
/* Skip return address. */
offset = UNITS_PER_WORD;
return len;
}
-
+
/* Return an RTX that points to CFA_OFFSET within the stack frame.
The valid base registers are taken from CFUN->MACHINE->FS. */
emit_insn (gen_cld ());
/* SEH requires that the prologue end within 256 bytes of the start of
- the function. Prevent instruction schedules that would extend that. */
+ the function. Prevent instruction schedules that would extend that.
+ Further, prevent alloca modifications to the stack pointer from being
+ combined with prologue modifications. */
if (TARGET_SEH)
- emit_insn (gen_blockage ());
+ emit_insn (gen_prologue_use (stack_pointer_rtx));
}
/* Emit code to restore REG using a POP insn. */
{
rtx reg = gen_rtx_REG (Pmode, regno);
rtx insn, mem;
-
+
mem = choose_baseaddr (cfa_offset);
mem = gen_frame_mem (Pmode, mem);
insn = emit_move_insn (reg, mem);
if (TARGET_VZEROUPPER
&& !TREE_THIS_VOLATILE (cfun->decl)
&& !cfun->machine->caller_return_avx256_p)
- emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
+ emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256)));
if (crtl->args.pops_args && crtl->args.size)
{
int retval = 1;
enum ix86_address_seg seg = SEG_DEFAULT;
+ /* Allow zero-extended SImode addresses,
+ they will be emitted with addr32 prefix. */
+ if (TARGET_64BIT
+ && GET_CODE (addr) == ZERO_EXTEND
+ && GET_MODE (addr) == DImode
+ && GET_MODE (XEXP (addr, 0)) == SImode)
+ addr = XEXP (addr, 0);
+
if (REG_P (addr))
base = addr;
else if (GET_CODE (addr) == SUBREG)
gcc_assert (ok);
+ if (parts.base && GET_CODE (parts.base) == SUBREG)
+ {
+ rtx tmp = SUBREG_REG (parts.base);
+ parts.base = simplify_subreg (GET_MODE (parts.base),
+ tmp, GET_MODE (tmp), 0);
+ }
+
+ if (parts.index && GET_CODE (parts.index) == SUBREG)
+ {
+ rtx tmp = SUBREG_REG (parts.index);
+ parts.index = simplify_subreg (GET_MODE (parts.index),
+ tmp, GET_MODE (tmp), 0);
+ }
+
base = parts.base;
index = parts.index;
disp = parts.disp;
}
else
{
- /* Print DImode registers on 64bit targets to avoid addr32 prefixes. */
- int code = TARGET_64BIT ? 'q' : 0;
+ int code = 0;
+
+ /* Print SImode registers for zero-extended addresses to force
+ addr32 prefix. Otherwise print DImode registers to avoid it. */
+ if (TARGET_64BIT)
+ code = (GET_CODE (addr) == ZERO_EXTEND) ? 'l' : 'q';
if (ASSEMBLER_DIALECT == ASM_ATT)
{
}
}
- if ((flag_pic || MACHOPIC_INDIRECT)
+ if ((flag_pic || MACHOPIC_INDIRECT)
&& symbolic_operand (op1, mode))
{
if (TARGET_MACHO && !TARGET_64BIT)
insn = emit_move_insn (operands[1], tmp1);
else
{
- /* Need a new scratch register since the old one has result
+ /* Need a new scratch register since the old one has result
of 8bit divide. */
scratch = gen_reg_rtx (mode);
emit_move_insn (scratch, tmp1);
}
\f
/* Calculate the length of the memory address in the instruction
- encoding. Does not include the one-byte modrm, opcode, or prefix. */
+ encoding. Includes addr32 prefix, does not include the one-byte modrm,
+ opcode, or other prefixes. */
int
memory_address_length (rtx addr)
base = parts.base;
index = parts.index;
disp = parts.disp;
- len = 0;
+
+ /* Add length of addr32 prefix. */
+ len = (GET_CODE (addr) == ZERO_EXTEND);
/* Rule of thumb:
- esp as the base always wants an index,
case AX_REG:
opcode = 0xb8; break;
case CX_REG:
- opcode = 0xb9; break;
+ opcode = 0xb9; break;
default:
gcc_unreachable ();
}
static reg_class_t
ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
- enum machine_mode mode,
- secondary_reload_info *sri ATTRIBUTE_UNUSED)
+ enum machine_mode mode, secondary_reload_info *sri)
{
+ /* Double-word spills from general registers to non-offsettable memory
+ references (zero-extended addresses) require special handling. */
+ if (TARGET_64BIT
+ && MEM_P (x)
+ && GET_MODE_SIZE (mode) > UNITS_PER_WORD
+ && rclass == GENERAL_REGS
+ && !offsettable_memref_p (x))
+ {
+ sri->icode = (in_p
+ ? CODE_FOR_reload_noff_load
+ : CODE_FOR_reload_noff_store);
+ /* Add the cost of move to a temporary. */
+ sri->extra_cost = 1;
+
+ return NO_REGS;
+ }
+
/* QImode spills from non-QI registers require
intermediate register on 32bit targets. */
if (!TARGET_64BIT
/* This condition handles corner case where an expression involving
pointers gets vectorized. We're trying to use the address of a
- stack slot as a vector initializer.
+ stack slot as a vector initializer.
(set (reg:V2DI 74 [ vect_cst_.2 ])
(vec_duplicate:V2DI (reg/f:DI 20 frame)))
/* Count the minimum number of instructions in BB. Return 4 if the
number of instructions >= 4. */
-static int
+static int
ix86_count_insn_bb (basic_block bb)
{
rtx insn;
}
-/* Count the minimum number of instructions in code path in BB.
+/* Count the minimum number of instructions in code path in BB.
Return 4 if the number of instructions >= 4. */
-static int
+static int
ix86_count_insn (basic_block bb)
{
edge e;
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
-#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
#undef TARGET_SCHED_INIT_GLOBAL
#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global