if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
return true;
+ /* Win64 SEH, very large frames need a frame-pointer as maximum stack
+ allocation is 4GB. */
+ if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
+ return true;
+
/* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
turns off the frame pointer by default. Turn it back on now if
we've not got a leaf function. */
offset += frame->nregs * UNITS_PER_WORD;
frame->reg_save_offset = offset;
+ /* On SEH target, registers are pushed just before the frame pointer
+ location. */
+ if (TARGET_SEH)
+ frame->hard_frame_pointer_offset = offset;
+
/* Align and set SSE register save area. */
if (frame->nsseregs)
{
{
HOST_WIDE_INT diff;
- /* If we can leave the frame pointer where it is, do so. */
+ /* If we can leave the frame pointer where it is, do so. Also, returns
+ the establisher frame for __builtin_frame_address (0). */
diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
- if (diff > 240 || (diff & 15) != 0)
+ if (diff <= SEH_MAX_FRAME_SIZE
+ && (diff > 240 || (diff & 15) != 0)
+ && !crtl->accesses_prior_frames)
{
/* Ideally we'd determine what portion of the local stack frame
(within the constraint of the lowest 240) is most heavily used.
struct ix86_frame frame;
HOST_WIDE_INT allocate;
bool int_registers_saved;
+ bool sse_registers_saved;
ix86_finalize_stack_realign_flags ();
m->fs.realigned = true;
}
+ int_registers_saved = (frame.nregs == 0);
+ sse_registers_saved = (frame.nsseregs == 0);
+
if (frame_pointer_needed && !m->fs.fp_valid)
{
/* Note: AT&T enter does NOT have reversed args. Enter is probably
insn = emit_insn (gen_push (hard_frame_pointer_rtx));
RTX_FRAME_RELATED_P (insn) = 1;
+ /* Push registers now, before setting the frame pointer
+ on SEH target. */
+ if (!int_registers_saved
+ && TARGET_SEH
+ && !frame.save_regs_using_mov)
+ {
+ ix86_emit_save_regs ();
+ int_registers_saved = true;
+ gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
+ }
+
if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
{
insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
}
}
- int_registers_saved = (frame.nregs == 0);
-
if (!int_registers_saved)
{
/* If saving registers via PUSH, do so now. */
current_function_static_stack_size = stack_size;
}
+ /* On SEH target with very large frame size, allocate an area to save
+ SSE registers (as the very large allocation won't be described). */
+ if (TARGET_SEH
+ && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
+ && !sse_registers_saved)
+ {
+ HOST_WIDE_INT sse_size =
+ frame.sse_reg_save_offset - frame.reg_save_offset;
+
+ gcc_assert (int_registers_saved);
+
+ /* No need to do stack checking as the area will be immediately
+ written. */
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (-sse_size), -1,
+ m->fs.cfa_reg == stack_pointer_rtx);
+ allocate -= sse_size;
+ ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
+ sse_registers_saved = true;
+ }
+
/* The stack has already been decremented by the instruction calling us
so probe if the size is non-negative to preserve the protection area. */
if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
if (!int_registers_saved)
ix86_emit_save_regs_using_mov (frame.reg_save_offset);
- if (frame.nsseregs)
+ if (!sse_registers_saved)
ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
pic_reg_used = false;
}
/* First step is to deallocate the stack frame so that we can
- pop the registers. */
- if (!m->fs.sp_valid)
+ pop the registers. Also do it on SEH target for very large
+ frame as the emitted instructions aren't allowed by the ABI in
+ epilogues. */
+ if (!m->fs.sp_valid
+ || (TARGET_SEH
+ && (m->fs.sp_offset - frame.reg_save_offset
+ >= SEH_MAX_FRAME_SIZE)))
{
pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
GEN_INT (m->fs.fp_offset
t1 = gen_reg_rtx (V8SImode);
t2 = gen_reg_rtx (V8SImode);
emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
- emit_insn (gen_avx2_permvarv8si (t2, op0, mask));
+ emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
goto merge_two;
}
return;
case V4SFmode:
t1 = gen_reg_rtx (V8SFmode);
- t2 = gen_reg_rtx (V8SFmode);
- mask = gen_lowpart (V4SFmode, mask);
+ t2 = gen_reg_rtx (V8SImode);
+ mask = gen_lowpart (V4SImode, mask);
emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
- emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask));
+ emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
return;
arg_adjust = 0;
if (optimize
|| target == 0
- || GET_MODE (target) != tmode
- || !insn_p->operand[0].predicate (target, tmode))
+ || !register_operand (target, tmode)
+ || GET_MODE (target) != tmode)
target = gen_reg_rtx (tmode);
}
else if (vmode == V32QImode)
emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
else
- emit_insn (gen_avx2_permvarv8si (target, vperm, op0));
+ emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
}
else
{