+2009-09-07 Bernd Schmidt <bernd.schmidt@analog.com>
+
+ * config/bfin/bfin.md (UNSPEC_VOLATILE_STALL): New constant.
+ (attr "addrtype"): New member "spreg".
+ Use it if mem_spfp_address_operand is true for the address.
+ (attr "type"): New entry "stall".
+ (cpu_unit "load"): New.
+ (insn_reservations "load32", "loadp", "loadi"): Add reservation of
+ "load".
+ (insn_reservation "loadsp"): New.
+ (insn_reservation "load_stall1"): New.
+ (insn_reservation "load_stall3"): New.
+ (stall): New insn.
+ * config/bfin/predicates.md (const1_operand, const3_operand): New.
+ (mem_p_address_operand): Exclude stack and frame pointer based
+ addresses.
+ (mem_spfp_address_operand): New; match them here.
+ * config/bfin/bfin.c (add_sched_insns_for_speculation): New function.
+ (bfin_reorg): Call it if scheduling insns.
+ (bfin_gen_bundles): Remove dummy insns created by
+ add_sched_insns_for_speculation.
+
2009-09-07 Martin Jambor <mjambor@suse.cz>
PR middle-end/41282
for (insn = BB_HEAD (bb);; insn = next)
{
int at_end;
+ rtx delete_this = NULL_RTX;
+
if (INSN_P (insn))
{
- if (get_attr_type (insn) == TYPE_DSP32)
- slot[0] = insn;
- else if (slot[1] == NULL_RTX)
- slot[1] = insn;
+ enum attr_type type = get_attr_type (insn);
+
+ if (type == TYPE_STALL)
+ {
+ gcc_assert (n_filled == 0);
+ delete_this = insn;
+ }
else
- slot[2] = insn;
- n_filled++;
+ {
+ if (type == TYPE_DSP32)
+ slot[0] = insn;
+ else if (slot[1] == NULL_RTX)
+ slot[1] = insn;
+ else
+ slot[2] = insn;
+ n_filled++;
+ }
}
next = NEXT_INSN (insn);
/* BB_END can change due to emitting extra NOPs, so check here. */
at_end = insn == BB_END (bb);
- if (at_end || GET_MODE (next) == TImode)
+ if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode))
{
if ((n_filled < 2
|| !gen_one_bundle (slot))
n_filled = 0;
slot[0] = slot[1] = slot[2] = NULL_RTX;
}
+ if (delete_this != NULL_RTX)
+ delete_insn (delete_this);
if (at_end)
break;
}
}
}
+/* Called just before the final scheduling pass. If we need to insert NOPs
+ later on to work around speculative loads, insert special placeholder
+ insns that cause loads to be delayed for as many cycles as necessary
+ (and possible). This reduces the number of NOPs we need to add.
+ The dummy insns we generate are later removed by bfin_gen_bundles. */
+static void
+add_sched_insns_for_speculation (void)
+{
+ rtx insn;
+
+ if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
+ && ! ENABLE_WA_INDIRECT_CALLS)
+ return;
+
+ /* First pass: find predicted-false branches; if something after them
+ needs nops, insert them or change the branch to predict true. */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ rtx pat;
+
+ if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
+ continue;
+
+ pat = PATTERN (insn);
+ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+ || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
+ || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
+ continue;
+
+ if (JUMP_P (insn))
+ {
+ if (any_condjump_p (insn)
+ && !cbranch_predicted_taken_p (insn))
+ {
+ rtx n = next_real_insn (insn);
+ emit_insn_before (gen_stall (GEN_INT (3)), n);
+ }
+ }
+ }
+
+ /* Second pass: for predicted-true branches, see if anything at the
+ branch destination needs extra nops. */
+ for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+ {
+ if (JUMP_P (insn)
+ && any_condjump_p (insn)
+ && (cbranch_predicted_taken_p (insn)))
+ {
+ rtx target = JUMP_LABEL (insn);
+ rtx next = next_real_insn (target);
+
+ if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
+ && get_attr_type (next) == TYPE_STALL)
+ continue;
+ emit_insn_before (gen_stall (GEN_INT (1)), next);
+ }
+ }
+}
+
/* We use the machine specific reorg pass for emitting CSYNC instructions
after conditional branches as needed.
split_all_insns ();
splitting_for_sched = 0;
+ add_sched_insns_for_speculation ();
+
timevar_push (TV_SCHED2);
schedule_insns ();
timevar_pop (TV_SCHED2);
(UNSPEC_VOLATILE_SSYNC 2)
(UNSPEC_VOLATILE_LOAD_FUNCDESC 3)
(UNSPEC_VOLATILE_STORE_EH_HANDLER 4)
- (UNSPEC_VOLATILE_DUMMY 5)])
+ (UNSPEC_VOLATILE_DUMMY 5)
+ (UNSPEC_VOLATILE_STALL 6)])
(define_constants
[(MACFLAG_NONE 0)
(MACFLAG_IH 11)])
(define_attr "type"
- "move,movcc,mvi,mcld,mcst,dsp32,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy"
+ "move,movcc,mvi,mcld,mcst,dsp32,mult,alu0,shft,brcc,br,call,misc,sync,compare,dummy,stall"
(const_string "misc"))
-(define_attr "addrtype" "32bit,preg,ireg"
+(define_attr "addrtype" "32bit,preg,spreg,ireg"
(cond [(and (eq_attr "type" "mcld")
(and (match_operand 0 "d_register_operand" "")
(match_operand 1 "mem_p_address_operand" "")))
(const_string "preg")
(and (eq_attr "type" "mcld")
(and (match_operand 0 "d_register_operand" "")
+ (match_operand 1 "mem_spfp_address_operand" "")))
+ (const_string "spreg")
+ (and (eq_attr "type" "mcld")
+ (and (match_operand 0 "d_register_operand" "")
(match_operand 1 "mem_i_address_operand" "")))
(const_string "ireg")
(and (eq_attr "type" "mcst")
(const_string "preg")
(and (eq_attr "type" "mcst")
(and (match_operand 1 "d_register_operand" "")
+ (match_operand 0 "mem_spfp_address_operand" "")))
+ (const_string "spreg")
+ (and (eq_attr "type" "mcst")
+ (and (match_operand 1 "d_register_operand" "")
(match_operand 0 "mem_i_address_operand" "")))
(const_string "ireg")]
(const_string "32bit")))
(define_cpu_unit "store" "bfin")
(define_cpu_unit "pregs" "bfin")
+;; A dummy unit used to delay scheduling of loads after a conditional
+;; branch.
+(define_cpu_unit "load" "bfin")
+
(define_reservation "core" "slot0+slot1+slot2")
(define_insn_reservation "alu" 1
(define_insn_reservation "load32" 1
(and (not (eq_attr "seq_insns" "multi"))
(and (eq_attr "type" "mcld") (eq_attr "addrtype" "32bit")))
- "core")
+ "core+load")
(define_insn_reservation "loadp" 1
(and (not (eq_attr "seq_insns" "multi"))
(and (eq_attr "type" "mcld") (eq_attr "addrtype" "preg")))
+ "(slot1|slot2)+pregs+load")
+
+(define_insn_reservation "loadsp" 1
+ (and (not (eq_attr "seq_insns" "multi"))
+ (and (eq_attr "type" "mcld") (eq_attr "addrtype" "spreg")))
"(slot1|slot2)+pregs")
(define_insn_reservation "loadi" 1
(and (not (eq_attr "seq_insns" "multi"))
(and (eq_attr "type" "mcld") (eq_attr "addrtype" "ireg")))
- "(slot1|slot2)")
+ "(slot1|slot2)+load")
(define_insn_reservation "store32" 1
(and (not (eq_attr "seq_insns" "multi"))
(define_insn_reservation "storep" 1
(and (not (eq_attr "seq_insns" "multi"))
- (and (eq_attr "type" "mcst") (eq_attr "addrtype" "preg")))
+ (and (eq_attr "type" "mcst")
+ (ior (eq_attr "addrtype" "preg") (eq_attr "addrtype" "spreg"))))
"(slot1|slot2)+pregs+store")
(define_insn_reservation "storei" 1
(eq_attr "seq_insns" "multi")
"core")
+(define_insn_reservation "load_stall1" 1
+ (and (eq_attr "type" "stall")
+ (match_operand 0 "const1_operand" ""))
+ "core+load*2")
+
+(define_insn_reservation "load_stall3" 1
+ (and (eq_attr "type" "stall")
+ (match_operand 0 "const3_operand" ""))
+ "core+load*4")
+
(absence_set "slot0" "slot1,slot2")
(absence_set "slot1" "slot2")
gcc_unreachable ();
})
+;; When used at a location where CC contains 1, causes a speculative load
+;; that is later cancelled. This is used for certain workarounds in
+;; interrupt handler prologues.
(define_insn "dummy_load"
[(unspec_volatile [(match_operand 0 "register_operand" "a")
(match_operand 1 "register_operand" "C")]
(set_attr "length" "4")
(set_attr "seq_insns" "multi")])
+;; A placeholder insn inserted before the final scheduling pass. It is used
+;; to improve scheduling of loads when workarounds for speculative loads are
+;; needed, by not placing them in the first few cycles after a conditional
+;; branch.
+(define_insn "stall"
+ [(unspec_volatile [(match_operand 0 "const_int_operand" "P1P3")]
+ UNSPEC_VOLATILE_STALL)]
+ ""
+ ""
+ [(set_attr "type" "stall")])
+
(define_insn "csync"
[(unspec_volatile [(const_int 0)] UNSPEC_VOLATILE_CSYNC)]
""
(and (match_code "const_int")
(match_test "op == const0_rtx || op == const1_rtx")))
+(define_predicate "const1_operand"
+ (and (match_code "const_int")
+ (match_test "op == const1_rtx")))
+
+(define_predicate "const3_operand"
+ (and (match_code "const_int")
+ (match_test "INTVAL (op) == 3")))
+
(define_predicate "vec_shift_operand"
(ior (and (match_code "const_int")
(match_test "INTVAL (op) >= -16 && INTVAL (op) < 15"))
(define_predicate "bfin_direct_comparison_operator"
(match_code "eq,lt,le,leu,ltu"))
-;; The following two are used to compute the addrtype attribute. They return
+;; The following three are used to compute the addrtype attribute. They return
;; true if passed a memory address usable for a 16-bit load or store using a
;; P or I register, respectively. If neither matches, we know we have a
;; 32-bit instruction.
+;; We subdivide the P case into normal P registers, and SP/FP. We can assume
+;; that speculative loads through SP and FP are no problem, so this has
+;; an effect on the anomaly workaround code.
+
(define_predicate "mem_p_address_operand"
(match_code "mem")
{
if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
op = XEXP (op, 0);
gcc_assert (REG_P (op));
- return PREG_P (op);
+ return PREG_P (op) && op != stack_pointer_rtx && op != frame_pointer_rtx;
+})
+
+(define_predicate "mem_spfp_address_operand"
+ (match_code "mem")
+{
+ if (effective_address_32bit_p (op, mode))
+ return 0;
+ op = XEXP (op, 0);
+ if (GET_CODE (op) == PLUS || GET_RTX_CLASS (GET_CODE (op)) == RTX_AUTOINC)
+ op = XEXP (op, 0);
+ gcc_assert (REG_P (op));
+ return op == stack_pointer_rtx || op == frame_pointer_rtx;
})
(define_predicate "mem_i_address_operand"