struct bfin_cpu bfin_cpus[] =
{
+ {"bf522", BFIN_CPU_BF522, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf522", BFIN_CPU_BF522, 0x0000,
WA_SPECULATIVE_LOADS | WA_RETS},
+ {"bf523", BFIN_CPU_BF523, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf523", BFIN_CPU_BF523, 0x0000,
WA_SPECULATIVE_LOADS | WA_RETS},
+ {"bf524", BFIN_CPU_BF524, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf524", BFIN_CPU_BF524, 0x0000,
WA_SPECULATIVE_LOADS | WA_RETS},
+ {"bf525", BFIN_CPU_BF525, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf525", BFIN_CPU_BF525, 0x0000,
WA_SPECULATIVE_LOADS | WA_RETS},
+ {"bf526", BFIN_CPU_BF526, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf526", BFIN_CPU_BF526, 0x0000,
WA_SPECULATIVE_LOADS | WA_RETS},
+ {"bf527", BFIN_CPU_BF527, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf527", BFIN_CPU_BF527, 0x0000,
WA_SPECULATIVE_LOADS | WA_RETS},
WA_SPECULATIVE_LOADS | WA_RETS},
{"bf538", BFIN_CPU_BF538, 0x0003,
WA_SPECULATIVE_LOADS | WA_RETS},
+ {"bf538", BFIN_CPU_BF538, 0x0002,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf539", BFIN_CPU_BF539, 0x0004,
WA_SPECULATIVE_LOADS | WA_RETS},
{"bf539", BFIN_CPU_BF539, 0x0002,
WA_SPECULATIVE_LOADS | WA_RETS},
+ {"bf542", BFIN_CPU_BF542, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf542", BFIN_CPU_BF542, 0x0000,
WA_SPECULATIVE_LOADS | WA_RETS},
+ {"bf544", BFIN_CPU_BF544, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf544", BFIN_CPU_BF544, 0x0000,
WA_SPECULATIVE_LOADS | WA_RETS},
+ {"bf547", BFIN_CPU_BF547, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf547", BFIN_CPU_BF547, 0x0000,
WA_SPECULATIVE_LOADS | WA_RETS},
+ {"bf548", BFIN_CPU_BF548, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf548", BFIN_CPU_BF548, 0x0000,
WA_SPECULATIVE_LOADS | WA_RETS},
+ {"bf549", BFIN_CPU_BF549, 0x0001,
+ WA_SPECULATIVE_LOADS | WA_RETS},
{"bf549", BFIN_CPU_BF549, 0x0000,
WA_SPECULATIVE_LOADS | WA_RETS},
}
if (crtl->limit_stack
- || TARGET_STACK_CHECK_L1)
+ || (TARGET_STACK_CHECK_L1
+ && !DECL_NO_LIMIT_STACK (current_function_decl)))
{
HOST_WIDE_INT offset
= bfin_initial_elimination_offset (ARG_POINTER_REGNUM,
not need to reload P5 in the prologue, but the sibcall wil pop P5 in the
sibcall epilogue, and we end up with the wrong value in P5. */
+ if (!decl)
+ /* Not enough information. */
+ return false;
this_func = cgraph_local_info (current_function_decl);
called_func = cgraph_local_info (decl);
bfin_workarounds |= bfin_cpus[i].workarounds;
}
- if (bfin_cpu_type == BFIN_CPU_BF561)
- warning (0, "bf561 support is incomplete yet.");
-
return true;
}
/* Used for communication between {push,pop}_multiple_operation (which
we use not only as a predicate) and the corresponding output functions. */
static int first_preg_to_save, first_dreg_to_save;
+static int n_regs_to_save;
int
push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
lastpreg++;
}
}
+ n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
return 1;
}
}
first_dreg_to_save = lastdreg;
first_preg_to_save = lastpreg;
+ n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
return 1;
}
{
gcc_assert (slot[1] != NULL_RTX);
+ /* Don't add extra NOPs if optimizing for size. */
+ if (optimize_size
+ && (slot[0] == NULL_RTX || slot[2] == NULL_RTX))
+ return false;
+
/* Verify that we really can do the multi-issue. */
if (slot[0])
{
}
}
\f
+/* On some silicon revisions, functions shorter than a certain number of cycles
+ can cause unpredictable behaviour. Work around this by adding NOPs as
+ needed. */
+static void
+workaround_rts_anomaly (void)
+{
+ rtx insn, first_insn = NULL_RTX;
+ int cycles = 4;
+
+ if (! ENABLE_WA_RETS)
+ return;
+
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ rtx pat;
+
+ if (BARRIER_P (insn))
+ return;
+
+ if (NOTE_P (insn) || LABEL_P (insn))
+ continue;
+
+ if (first_insn == NULL_RTX)
+ first_insn = insn;
+ pat = PATTERN (insn);
+ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+ || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
+ || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
+ continue;
+
+ if (CALL_P (insn))
+ return;
+
+ if (JUMP_P (insn))
+ {
+ if (recog_memoized (insn) == CODE_FOR_return_internal)
+ break;
+
+ /* Nothing to worry about for direct jumps. */
+ if (!any_condjump_p (insn))
+ return;
+ if (cycles <= 1)
+ return;
+ cycles--;
+ }
+ else if (INSN_P (insn))
+ {
+ rtx pat = PATTERN (insn);
+ int this_cycles = 1;
+
+ if (GET_CODE (pat) == PARALLEL)
+ {
+ if (push_multiple_operation (pat, VOIDmode)
+ || pop_multiple_operation (pat, VOIDmode))
+ this_cycles = n_regs_to_save;
+ }
+ else
+ {
+ enum insn_code icode = recog_memoized (insn);
+ if (icode == CODE_FOR_link)
+ this_cycles = 4;
+ else if (icode == CODE_FOR_unlink)
+ this_cycles = 3;
+ else if (icode == CODE_FOR_mulsi3)
+ this_cycles = 5;
+ }
+ if (this_cycles >= cycles)
+ return;
+
+ cycles -= this_cycles;
+ }
+ }
+ while (cycles > 0)
+ {
+ emit_insn_before (gen_nop (), first_insn);
+ cycles--;
+ }
+}
+
/* Return an insn type for INSN that can be used by the caller for anomaly
workarounds. This differs from plain get_attr_type in that it handles
SEQUENCEs. */
return NULL_RTX;
}
-/* We use the machine specific reorg pass for emitting CSYNC instructions
- after conditional branches as needed.
-
- The Blackfin is unusual in that a code sequence like
- if cc jump label
- r0 = (p0)
- may speculatively perform the load even if the condition isn't true. This
- happens for a branch that is predicted not taken, because the pipeline
- isn't flushed or stalled, so the early stages of the following instructions,
- which perform the memory reference, are allowed to execute before the
- jump condition is evaluated.
- Therefore, we must insert additional instructions in all places where this
- could lead to incorrect behavior. The manual recommends CSYNC, while
- VDSP seems to use NOPs (even though its corresponding compiler option is
- named CSYNC).
-
- When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
- When optimizing for size, we turn the branch into a predicted taken one.
- This may be slower due to mispredicts, but saves code size. */
-
static void
-bfin_reorg (void)
+workaround_speculation (void)
{
rtx insn, next;
rtx last_condjump = NULL_RTX;
int cycles_since_jump = INT_MAX;
- /* We are freeing block_for_insn in the toplev to keep compatibility
- with old MDEP_REORGS that are not CFG based. Recompute it now. */
- compute_bb_for_insn ();
-
- if (bfin_flag_schedule_insns2)
- {
- splitting_for_sched = 1;
- split_all_insns ();
- splitting_for_sched = 0;
-
- timevar_push (TV_SCHED2);
- schedule_insns ();
- timevar_pop (TV_SCHED2);
-
- /* Examine the schedule and insert nops as necessary for 64-bit parallel
- instructions. */
- bfin_gen_bundles ();
- }
-
- df_analyze ();
-
- /* Doloop optimization */
- if (cfun->machine->has_hardware_loops)
- bfin_reorg_loops (dump_file);
-
if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS)
return;
if (! ENABLE_WA_SPECULATIVE_SYNCS)
return;
- if (! ENABLE_WA_RETS)
- return;
-
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
+ int cycles_since_jump;
if (JUMP_P (insn)
&& any_condjump_p (insn)
&& (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken
}
}
}
+}
+
+/* We use the machine specific reorg pass for emitting CSYNC instructions
+ after conditional branches as needed.
+
+ The Blackfin is unusual in that a code sequence like
+ if cc jump label
+ r0 = (p0)
+ may speculatively perform the load even if the condition isn't true. This
+ happens for a branch that is predicted not taken, because the pipeline
+ isn't flushed or stalled, so the early stages of the following instructions,
+ which perform the memory reference, are allowed to execute before the
+ jump condition is evaluated.
+ Therefore, we must insert additional instructions in all places where this
+ could lead to incorrect behavior. The manual recommends CSYNC, while
+ VDSP seems to use NOPs (even though its corresponding compiler option is
+ named CSYNC).
+
+ When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
+ When optimizing for size, we turn the branch into a predicted taken one.
+ This may be slower due to mispredicts, but saves code size. */
+
+static void
+bfin_reorg (void)
+{
+ /* We are freeing block_for_insn in the toplev to keep compatibility
+ with old MDEP_REORGS that are not CFG based. Recompute it now. */
+ compute_bb_for_insn ();
+
+ if (bfin_flag_schedule_insns2)
+ {
+ splitting_for_sched = 1;
+ split_all_insns ();
+ splitting_for_sched = 0;
+
+ timevar_push (TV_SCHED2);
+ schedule_insns ();
+ timevar_pop (TV_SCHED2);
+
+ /* Examine the schedule and insert nops as necessary for 64-bit parallel
+ instructions. */
+ bfin_gen_bundles ();
+ }
+
+ df_analyze ();
+
+ /* Doloop optimization */
+ if (cfun->machine->has_hardware_loops)
+ bfin_reorg_loops (dump_file);
+
+ workaround_speculation ();
if (bfin_flag_var_tracking)
{
reorder_var_tracking_notes ();
timevar_pop (TV_VAR_TRACKING);
}
+
df_finish_pass (false);
+
+ workaround_rts_anomaly ();
}
\f
/* Handle interrupt_handler, exception_handler and nmi_handler function