+struct scratch_reg {
+ rtx reg;
+ bool saved;
+};
+
+/* Return a short-lived scratch register for use on function entry.
+ In 32-bit mode, it is valid only after the registers are saved
+ in the prologue. This register must be released by means of
+ release_scratch_register_on_entry once it is dead. */
+
+static void
+get_scratch_register_on_entry (struct scratch_reg *sr)
+{
+ int regno;
+
+ sr->saved = false;
+
+ if (TARGET_64BIT)
+ {
+ /* We always use R11 in 64-bit mode. */
+ regno = R11_REG;
+ }
+ else
+ {
+ tree decl = current_function_decl, fntype = TREE_TYPE (decl);
+ bool fastcall_p
+ = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
+ bool static_chain_p = DECL_STATIC_CHAIN (decl);
+ int regparm = ix86_function_regparm (fntype, decl);
+ int drap_regno
+ = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
+
+ /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
+ for the static chain register. */
+ if ((regparm < 1 || (fastcall_p && !static_chain_p))
+ && drap_regno != AX_REG)
+ regno = AX_REG;
+ else if (regparm < 2 && drap_regno != DX_REG)
+ regno = DX_REG;
+ /* ecx is the static chain register. */
+ else if (regparm < 3 && !fastcall_p && !static_chain_p
+ && drap_regno != CX_REG)
+ regno = CX_REG;
+ else if (ix86_save_reg (BX_REG, true))
+ regno = BX_REG;
+ /* esi is the static chain register. */
+ else if (!(regparm == 3 && static_chain_p)
+ && ix86_save_reg (SI_REG, true))
+ regno = SI_REG;
+ else if (ix86_save_reg (DI_REG, true))
+ regno = DI_REG;
+ else
+ {
+ regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
+ sr->saved = true;
+ }
+ }
+
+ sr->reg = gen_rtx_REG (Pmode, regno);
+ if (sr->saved)
+ {
+ rtx insn = emit_insn (gen_push (sr->reg));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+}
+
+/* Release a scratch register obtained from the preceding function. */
+
+static void
+release_scratch_register_on_entry (struct scratch_reg *sr)
+{
+ if (sr->saved)
+ {
+ rtx x, insn = emit_insn (gen_pop (sr->reg));
+
+ /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
+ RTX_FRAME_RELATED_P (insn) = 1;
+ x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
+ x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
+ }
+}
+
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
+
+static void
+ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
+{
+ /* We skip the probe for the first interval + a small dope of 4 words and
+ probe that many bytes past the specified size to maintain a protection
+ area at the botton of the stack. */
+ const int dope = 4 * UNITS_PER_WORD;
+ rtx size_rtx = GEN_INT (size);
+
+ /* See if we have a constant small number of probes to generate. If so,
+ that's the easy case. The run-time loop is made up of 11 insns in the
+ generic case while the compile-time loop is made up of 3+2*(n-1) insns
+ for n # of intervals. */
+ if (size <= 5 * PROBE_INTERVAL)
+ {
+ HOST_WIDE_INT i, adjust;
+ bool first_probe = true;
+
+ /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
+ values of N from 1 until it exceeds SIZE. If only one probe is
+ needed, this will not generate any code. Then adjust and probe
+ to PROBE_INTERVAL + SIZE. */
+ for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+ {
+ if (first_probe)
+ {
+ adjust = 2 * PROBE_INTERVAL + dope;
+ first_probe = false;
+ }
+ else
+ adjust = PROBE_INTERVAL;
+
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, -adjust)));
+ emit_stack_probe (stack_pointer_rtx);
+ }
+
+ if (first_probe)
+ adjust = size + PROBE_INTERVAL + dope;
+ else
+ adjust = size + PROBE_INTERVAL - i;
+
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx, -adjust)));
+ emit_stack_probe (stack_pointer_rtx);
+
+ /* Adjust back to account for the additional first interval. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ PROBE_INTERVAL + dope)));
+ }
+
+ /* Otherwise, do the same as above, but in a loop. Note that we must be
+ extra careful with variables wrapping around because we might be at
+ the very top (or the very bottom) of the address space and we have
+ to be able to handle this case properly; in particular, we use an
+ equality test for the loop condition. */
+ else
+ {
+ HOST_WIDE_INT rounded_size;
+ struct scratch_reg sr;
+
+ get_scratch_register_on_entry (&sr);
+
+
+ /* Step 1: round SIZE to the previous multiple of the interval. */
+
+ rounded_size = size & -PROBE_INTERVAL;
+
+
+ /* Step 2: compute initial and final value of the loop counter. */
+
+ /* SP = SP_0 + PROBE_INTERVAL. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ - (PROBE_INTERVAL + dope))));
+
+ /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
+ emit_move_insn (sr.reg, GEN_INT (-rounded_size));
+ emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
+ gen_rtx_PLUS (Pmode, sr.reg,
+ stack_pointer_rtx)));
+
+
+ /* Step 3: the loop
+
+ while (SP != LAST_ADDR)
+ {
+ SP = SP + PROBE_INTERVAL
+ probe at SP
+ }
+
+ adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
+ values of N from 1 until it is equal to ROUNDED_SIZE. */
+
+ emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
+
+
+ /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
+ assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
+
+ if (size != rounded_size)
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ rounded_size - size)));
+ emit_stack_probe (stack_pointer_rtx);
+ }
+
+ /* Adjust back to account for the additional first interval. */
+ emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ plus_constant (stack_pointer_rtx,
+ PROBE_INTERVAL + dope)));
+
+ release_scratch_register_on_entry (&sr);
+ }
+
+ gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
+ cfun->machine->fs.sp_offset += size;
+
+ /* Make sure nothing is scheduled before we are done. */
+ emit_insn (gen_blockage ());
+}
+
+/* Adjust the stack pointer up to REG while probing it. */
+
+const char *
+output_adjust_stack_and_probe (rtx reg)
+{
+ static int labelno = 0;
+ char loop_lab[32], end_lab[32];
+ rtx xops[2];
+
+ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+ ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+ /* Jump to END_LAB if SP == LAST_ADDR. */
+ xops[0] = stack_pointer_rtx;
+ xops[1] = reg;
+ output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
+ fputs ("\tje\t", asm_out_file);
+ assemble_name_raw (asm_out_file, end_lab);
+ fputc ('\n', asm_out_file);
+
+ /* SP = SP + PROBE_INTERVAL. */
+ xops[1] = GEN_INT (PROBE_INTERVAL);
+ output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
+
+ /* Probe at SP. */
+ xops[1] = const0_rtx;
+ output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
+
+ fprintf (asm_out_file, "\tjmp\t");
+ assemble_name_raw (asm_out_file, loop_lab);
+ fputc ('\n', asm_out_file);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+ return "";
+}
+
+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
+ inclusive. These are offsets from the current stack pointer. */
+
+static void
+ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+{
+ /* See if we have a constant small number of probes to generate. If so,
+ that's the easy case. The run-time loop is made up of 7 insns in the
+ generic case while the compile-time loop is made up of n insns for n #
+ of intervals. */
+ if (size <= 7 * PROBE_INTERVAL)
+ {
+ HOST_WIDE_INT i;
+
+ /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
+ it exceeds SIZE. If only one probe is needed, this will not
+ generate any code. Then probe at FIRST + SIZE. */
+ for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+ emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
+
+ emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
+ }
+
+ /* Otherwise, do the same as above, but in a loop. Note that we must be
+ extra careful with variables wrapping around because we might be at
+ the very top (or the very bottom) of the address space and we have
+ to be able to handle this case properly; in particular, we use an
+ equality test for the loop condition. */
+ else
+ {
+ HOST_WIDE_INT rounded_size, last;
+ struct scratch_reg sr;
+
+ get_scratch_register_on_entry (&sr);
+
+
+ /* Step 1: round SIZE to the previous multiple of the interval. */
+
+ rounded_size = size & -PROBE_INTERVAL;
+
+
+ /* Step 2: compute initial and final value of the loop counter. */
+
+ /* TEST_OFFSET = FIRST. */
+ emit_move_insn (sr.reg, GEN_INT (-first));
+
+ /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
+ last = first + rounded_size;
+
+
+ /* Step 3: the loop
+
+ while (TEST_ADDR != LAST_ADDR)
+ {
+ TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+ probe at TEST_ADDR
+ }
+
+ probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+ until it is equal to ROUNDED_SIZE. */
+
+ emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
+
+
+ /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+ that SIZE is equal to ROUNDED_SIZE. */
+
+ if (size != rounded_size)
+ emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx,
+ sr.reg),
+ rounded_size - size));
+
+ release_scratch_register_on_entry (&sr);
+ }
+
+ /* Make sure nothing is scheduled before we are done. */
+ emit_insn (gen_blockage ());
+}
+
+/* Probe a range of stack addresses from REG to END, inclusive. These are
+ offsets from the current stack pointer. */
+
+const char *
+output_probe_stack_range (rtx reg, rtx end)
+{
+ static int labelno = 0;
+ char loop_lab[32], end_lab[32];
+ rtx xops[3];
+
+ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
+ ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
+
+ /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
+ xops[0] = reg;
+ xops[1] = end;
+ output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
+ fputs ("\tje\t", asm_out_file);
+ assemble_name_raw (asm_out_file, end_lab);
+ fputc ('\n', asm_out_file);
+
+ /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
+ xops[1] = GEN_INT (PROBE_INTERVAL);
+ output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
+
+ /* Probe at TEST_ADDR. */
+ xops[0] = stack_pointer_rtx;
+ xops[1] = reg;
+ xops[2] = const0_rtx;
+ output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
+
+ fprintf (asm_out_file, "\tjmp\t");
+ assemble_name_raw (asm_out_file, loop_lab);
+ fputc ('\n', asm_out_file);
+
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
+
+ return "";
+}
+