+}
+
+/* Called just before the final scheduling pass. If we need to insert NOPs
+ later on to work around speculative loads, insert special placeholder
+ insns that cause loads to be delayed for as many cycles as necessary
+ (and possible). This reduces the number of NOPs we need to add.
+ The dummy insns we generate are later removed by bfin_gen_bundles. */
+static void
+add_sched_insns_for_speculation (void)
+{
+ rtx insn;
+
+ if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
+ && ! ENABLE_WA_INDIRECT_CALLS)
+ return;
+
+ /* First pass: find predicted-false branches; if something after them
+ needs nops, insert them or change the branch to predict true. */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ rtx pat;
+
+ if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
+ continue;
+
+ pat = PATTERN (insn);
+ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+ || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
+ || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
+ continue;
+
+ if (JUMP_P (insn))
+ {
+ if (any_condjump_p (insn)
+ && !cbranch_predicted_taken_p (insn))
+ {
+ rtx n = next_real_insn (insn);
+ emit_insn_before (gen_stall (GEN_INT (3)), n);
+ }
+ }
+ }
+
+ /* Second pass: for predicted-true branches, see if anything at the
+ branch destination needs extra nops. */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (JUMP_P (insn)
+ && any_condjump_p (insn)
+ && (cbranch_predicted_taken_p (insn)))
+ {
+ rtx target = JUMP_LABEL (insn);
+ rtx next = next_real_insn (target);
+
+ if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
+ && get_attr_type (next) == TYPE_STALL)
+ continue;
+ emit_insn_before (gen_stall (GEN_INT (1)), next);
+ }
+ }
+}
+
+/* We use the machine specific reorg pass for emitting CSYNC instructions
+ after conditional branches as needed.
+
+ The Blackfin is unusual in that a code sequence like
+ if cc jump label
+ r0 = (p0)
+ may speculatively perform the load even if the condition isn't true. This
+ happens for a branch that is predicted not taken, because the pipeline
+ isn't flushed or stalled, so the early stages of the following instructions,
+ which perform the memory reference, are allowed to execute before the
+ jump condition is evaluated.
+ Therefore, we must insert additional instructions in all places where this
+ could lead to incorrect behavior. The manual recommends CSYNC, while
+ VDSP seems to use NOPs (even though its corresponding compiler option is
+ named CSYNC).
+
+ When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
+ When optimizing for size, we turn the branch into a predicted taken one.
+ This may be slower due to mispredicts, but saves code size. */
+
+static void
+bfin_reorg (void)
+{
+ /* We are freeing block_for_insn in the toplev to keep compatibility
+ with old MDEP_REORGS that are not CFG based. Recompute it now. */
+ compute_bb_for_insn ();
+
+ if (bfin_flag_schedule_insns2)
+ {
+ splitting_for_sched = 1;
+ split_all_insns ();
+ splitting_for_sched = 0;
+
+ add_sched_insns_for_speculation ();
+
+ timevar_push (TV_SCHED2);
+ schedule_insns ();
+ timevar_pop (TV_SCHED2);
+
+ /* Examine the schedule and insert nops as necessary for 64-bit parallel
+ instructions. */
+ bfin_gen_bundles ();
+ }
+
+ df_analyze ();
+
+ /* Doloop optimization */
+ if (cfun->machine->has_hardware_loops)
+ bfin_reorg_loops (dump_file);
+
+ workaround_speculation ();