rtx insn, next;
rtx last_condjump = NULL_RTX;
int cycles_since_jump = INT_MAX;
+ int delay_added = 0;
if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS)
return;
for (insn = get_insns (); insn; insn = next)
{
rtx pat;
+ int delay_needed = 0;
next = find_next_insn_start (insn);
&& ! cbranch_predicted_taken_p (insn))
{
last_condjump = insn;
+ delay_added = 0;
cycles_since_jump = 0;
}
else
{
rtx load_insn = find_load (insn);
enum attr_type type = type_for_anomaly (insn);
- int delay_needed = 0;
+
if (cycles_since_jump < INT_MAX)
cycles_since_jump++;
if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
{
if (trapping_loads_p (load_insn))
- delay_needed = 3;
+ delay_needed = 4;
}
else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
- delay_needed = 4;
+ delay_needed = 3;
+ }
- if (delay_needed > cycles_since_jump)
- {
- rtx pat;
- int num_clobbers;
- rtx *op = recog_data.operand;
+ if (delay_needed > cycles_since_jump
+ && (delay_needed - cycles_since_jump) > delay_added)
+ {
+ rtx pat1;
+ int num_clobbers;
+ rtx *op = recog_data.operand;
- delay_needed -= cycles_since_jump;
+ delay_needed -= cycles_since_jump;
- extract_insn (last_condjump);
- if (optimize_size)
- {
- pat = gen_cbranch_predicted_taken (op[0], op[1], op[2],
- op[3]);
- cycles_since_jump = INT_MAX;
- }
- else
- /* Do not adjust cycles_since_jump in this case, so that
- we'll increase the number of NOPs for a subsequent insn
- if necessary. */
- pat = gen_cbranch_with_nops (op[0], op[1], op[2], op[3],
- GEN_INT (delay_needed));
- PATTERN (last_condjump) = pat;
- INSN_CODE (last_condjump) = recog (pat, insn, &num_clobbers);
+ extract_insn (last_condjump);
+ if (optimize_size)
+ {
+ pat1 = gen_cbranch_predicted_taken (op[0], op[1], op[2],
+ op[3]);
+ cycles_since_jump = INT_MAX;
+ }
+ else
+ {
+ /* Do not adjust cycles_since_jump in this case, so that
+ we'll increase the number of NOPs for a subsequent insn
+ if necessary. */
+ pat1 = gen_cbranch_with_nops (op[0], op[1], op[2], op[3],
+ GEN_INT (delay_needed));
+ delay_added = delay_needed;
}
+ PATTERN (last_condjump) = pat1;
+ INSN_CODE (last_condjump) = recog (pat1, insn, &num_clobbers);
+ }
+ if (CALL_P (insn))
+ {
+ cycles_since_jump = INT_MAX;
+ delay_added = 0;
}
}
+
/* Second pass: for predicted-true branches, see if anything at the
branch destination needs extra nops. */
- if (! ENABLE_WA_SPECULATIVE_SYNCS)
- return;
-
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
int cycles_since_jump;
{
rtx target = JUMP_LABEL (insn);
rtx label = target;
+ rtx next_tgt;
+
cycles_since_jump = 0;
- for (; target && cycles_since_jump < 3; target = NEXT_INSN (target))
+ for (; target && cycles_since_jump < 3; target = next_tgt)
{
rtx pat;
+ next_tgt = find_next_insn_start (target);
+
if (NOTE_P (target) || BARRIER_P (target) || LABEL_P (target))
continue;
if (INSN_P (target))
{
+ rtx load_insn = find_load (target);
enum attr_type type = type_for_anomaly (target);
int delay_needed = 0;
if (cycles_since_jump < INT_MAX)
cycles_since_jump++;
- if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
+ if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
+ {
+ if (trapping_loads_p (load_insn))
+ delay_needed = 2;
+ }
+ else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
delay_needed = 2;
if (delay_needed > cycles_since_jump)