+ /* The successor block of the loop. This is the one the loop_end insn
+ falls into. */
+ basic_block successor;
+
+ /* The last instruction in the tail. */
+ rtx last_insn;
+
+ /* The loop_end insn. */
+ rtx loop_end;
+
+ /* The iteration register. */
+ rtx iter_reg;
+
+ /* The new initialization insn. */
+ rtx init;
+
+ /* The new initialization instruction. */
+ rtx loop_init;
+
+ /* The new label placed at the beginning of the loop. */
+ rtx start_label;
+
+ /* The new label placed at the end of the loop. */
+ rtx end_label;
+
+ /* The length of the loop. */
+ int length;
+
+ /* The nesting depth of the loop. */
+ int depth;
+
+ /* Nonzero if we can't optimize this loop. */
+ int bad;
+
+ /* True if we have visited this loop. */
+ int visited;
+
+ /* True if this loop body clobbers any of LC0, LT0, or LB0. */
+ int clobber_loop0;
+
+ /* True if this loop body clobbers any of LC1, LT1, or LB1. */
+ int clobber_loop1;
+
+ /* Next loop in the graph. */
+ struct loop_info *next;
+
+ /* Immediate outer loop of this loop. */
+ struct loop_info *outer;
+
+ /* Vector of blocks only within the loop, including those within
+ inner loops. */
+ VEC (basic_block,heap) *blocks;
+
+ /* Same information in a bitmap. */
+ bitmap block_bitmap;
+
+ /* Vector of inner loops within this loop */
+ VEC (loop_info,heap) *loops;
+};
+
+static void
+bfin_dump_loops (loop_info loops)
+{
+ loop_info loop;
+
+ for (loop = loops; loop; loop = loop->next)
+ {
+ loop_info i;
+ basic_block b;
+ unsigned ix;
+
+ fprintf (dump_file, ";; loop %d: ", loop->loop_no);
+ if (loop->bad)
+ fprintf (dump_file, "(bad) ");
+ fprintf (dump_file, "{head:%d, depth:%d}", loop->head->index, loop->depth);
+
+ fprintf (dump_file, " blocks: [ ");
+ for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++)
+ fprintf (dump_file, "%d ", b->index);
+ fprintf (dump_file, "] ");
+
+ fprintf (dump_file, " inner loops: [ ");
+ for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, i); ix++)
+ fprintf (dump_file, "%d ", i->loop_no);
+ fprintf (dump_file, "]\n");
+ }
+ fprintf (dump_file, "\n");
+}
+
+/* Scan the blocks of LOOP (and its inferiors) looking for basic block
+ BB. Return true, if we find it. */
+
+static bool
+bfin_bb_in_loop (loop_info loop, basic_block bb)
+{
+ return bitmap_bit_p (loop->block_bitmap, bb->index);
+}
+
+/* Scan the blocks of LOOP (and its inferiors) looking for uses of
+ REG. Return true, if we find any. Don't count the loop's loop_end
+ insn if it matches LOOP_END. */
+
+static bool
+bfin_scan_loop (loop_info loop, rtx reg, rtx loop_end)
+{
+ unsigned ix;
+ basic_block bb;
+
+ for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
+ {
+ rtx insn;
+
+ for (insn = BB_HEAD (bb);
+ insn != NEXT_INSN (BB_END (bb));
+ insn = NEXT_INSN (insn))
+ {
+ if (!INSN_P (insn))
+ continue;
+ if (insn == loop_end)
+ continue;
+ if (reg_mentioned_p (reg, PATTERN (insn)))
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Estimate the length of INSN conservatively. */
+
+static int
+length_for_loop (rtx insn)
+{
+ int length = 0;
+ if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size)
+ {
+ if (TARGET_CSYNC_ANOMALY)
+ length = 8;
+ else if (TARGET_SPECLD_ANOMALY)
+ length = 6;
+ }
+ else if (LABEL_P (insn))
+ {
+ if (TARGET_CSYNC_ANOMALY)
+ length = 4;
+ }
+
+ if (INSN_P (insn))
+ length += get_attr_length (insn);
+
+ return length;
+}
+
+/* Optimize LOOP. */
+
+static void
+bfin_optimize_loop (loop_info loop)
+{
+ basic_block bb;
+ loop_info inner;
+ rtx insn, init_insn, last_insn, nop_insn;
+ rtx loop_init, start_label, end_label;
+ rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1;
+ rtx iter_reg;
+ rtx lc_reg, lt_reg, lb_reg;
+ rtx seq, seq_end;
+ int length;
+ unsigned ix;
+ int inner_depth = 0;
+
+ if (loop->visited)
+ return;
+
+ loop->visited = 1;
+
+ if (loop->bad)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
+ goto bad_loop;
+ }
+
+ /* Every loop contains in its list of inner loops every loop nested inside
+ it, even if there are intermediate loops. This works because we're doing
+ a depth-first search here and never visit a loop more than once. */
+ for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, inner); ix++)
+ {
+ bfin_optimize_loop (inner);
+
+ if (!inner->bad && inner_depth < inner->depth)
+ {
+ inner_depth = inner->depth;
+
+ loop->clobber_loop0 |= inner->clobber_loop0;
+ loop->clobber_loop1 |= inner->clobber_loop1;
+ }
+ }
+
+ loop->depth = inner_depth + 1;
+ if (loop->depth > MAX_LOOP_DEPTH)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no);
+ goto bad_loop;
+ }
+
+ /* Get the loop iteration register. */
+ iter_reg = loop->iter_reg;
+
+ if (!DPREG_P (iter_reg))
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d iteration count NOT in PREG or DREG\n",
+ loop->loop_no);
+ goto bad_loop;
+ }
+
+ if (loop->incoming_src)
+ {
+ /* Make sure the predecessor is before the loop start label, as required by
+ the LSETUP instruction. */
+ length = 0;
+ for (insn = BB_END (loop->incoming_src);
+ insn && insn != loop->start_label;
+ insn = NEXT_INSN (insn))
+ length += length_for_loop (insn);
+
+ if (!insn)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d lsetup not before loop_start\n",
+ loop->loop_no);
+ goto bad_loop;
+ }
+
+ if (length > MAX_LSETUP_DISTANCE)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d lsetup too far away\n", loop->loop_no);
+ goto bad_loop;
+ }
+ }
+
+ /* Check if start_label appears before loop_end and calculate the
+ offset between them. We calculate the length of instructions
+ conservatively. */
+ length = 0;
+ for (insn = loop->start_label;
+ insn && insn != loop->loop_end;
+ insn = NEXT_INSN (insn))
+ length += length_for_loop (insn);
+
+ if (!insn)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
+ loop->loop_no);
+ goto bad_loop;
+ }
+
+ loop->length = length;
+ if (loop->length > MAX_LOOP_LENGTH)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
+ goto bad_loop;
+ }
+
+ /* Scan all the blocks to make sure they don't use iter_reg. */
+ if (bfin_scan_loop (loop, iter_reg, loop->loop_end))
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no);
+ goto bad_loop;
+ }
+
+ /* Scan all the insns to see if the loop body clobber
+ any hardware loop registers. */
+
+ reg_lc0 = gen_rtx_REG (SImode, REG_LC0);
+ reg_lc1 = gen_rtx_REG (SImode, REG_LC1);
+ reg_lt0 = gen_rtx_REG (SImode, REG_LT0);
+ reg_lt1 = gen_rtx_REG (SImode, REG_LT1);
+ reg_lb0 = gen_rtx_REG (SImode, REG_LB0);
+ reg_lb1 = gen_rtx_REG (SImode, REG_LB1);
+
+ for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
+ {
+ rtx insn;
+
+ for (insn = BB_HEAD (bb);
+ insn != NEXT_INSN (BB_END (bb));
+ insn = NEXT_INSN (insn))
+ {
+ if (!INSN_P (insn))
+ continue;
+
+ if (reg_set_p (reg_lc0, insn)
+ || reg_set_p (reg_lt0, insn)
+ || reg_set_p (reg_lb0, insn))
+ loop->clobber_loop0 = 1;
+
+ if (reg_set_p (reg_lc1, insn)
+ || reg_set_p (reg_lt1, insn)
+ || reg_set_p (reg_lb1, insn))
+ loop->clobber_loop1 |= 1;
+ }
+ }
+
+ if ((loop->clobber_loop0 && loop->clobber_loop1)
+ || (loop->depth == MAX_LOOP_DEPTH && loop->clobber_loop0))
+ {
+ loop->depth = MAX_LOOP_DEPTH + 1;
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d no loop reg available\n",
+ loop->loop_no);
+ goto bad_loop;
+ }
+
+ /* There should be an instruction before the loop_end instruction
+ in the same basic block. And the instruction must not be
+ - JUMP
+ - CONDITIONAL BRANCH
+ - CALL
+ - CSYNC
+ - SSYNC
+ - Returns (RTS, RTN, etc.) */
+
+ bb = loop->tail;
+ last_insn = PREV_INSN (loop->loop_end);
+
+ while (1)
+ {
+ for (; last_insn != PREV_INSN (BB_HEAD (bb));
+ last_insn = PREV_INSN (last_insn))
+ if (INSN_P (last_insn))
+ break;
+
+ if (last_insn != PREV_INSN (BB_HEAD (bb)))
+ break;
+
+ if (single_pred_p (bb)
+ && single_pred (bb) != ENTRY_BLOCK_PTR)
+ {
+ bb = single_pred (bb);
+ last_insn = BB_END (bb);
+ continue;
+ }
+ else
+ {
+ last_insn = NULL_RTX;
+ break;
+ }
+ }
+
+ if (!last_insn)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d has no last instruction\n",
+ loop->loop_no);
+ goto bad_loop;
+ }
+
+ if (JUMP_P (last_insn))
+ {
+ loop_info inner = bb->aux;
+ if (inner
+ && inner->outer == loop
+ && inner->loop_end == last_insn
+ && inner->depth == 1)
+ /* This jump_insn is the exact loop_end of an inner loop
+ and to be optimized away. So use the inner's last_insn. */
+ last_insn = inner->last_insn;
+ else
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d has bad last instruction\n",
+ loop->loop_no);
+ goto bad_loop;
+ }
+ }
+ else if (CALL_P (last_insn)
+ || (GET_CODE (PATTERN (last_insn)) != SEQUENCE
+ && get_attr_type (last_insn) == TYPE_SYNC)
+ || recog_memoized (last_insn) == CODE_FOR_return_internal)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d has bad last instruction\n",
+ loop->loop_no);
+ goto bad_loop;
+ }
+
+ if (GET_CODE (PATTERN (last_insn)) == ASM_INPUT
+ || asm_noperands (PATTERN (last_insn)) >= 0
+ || (GET_CODE (PATTERN (last_insn)) != SEQUENCE
+ && get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI))
+ {
+ nop_insn = emit_insn_after (gen_nop (), last_insn);
+ last_insn = nop_insn;
+ }
+
+ loop->last_insn = last_insn;
+
+ /* The loop is good for replacement. */
+ start_label = loop->start_label;
+ end_label = gen_label_rtx ();
+ iter_reg = loop->iter_reg;
+
+ if (loop->depth == 1 && !loop->clobber_loop1)
+ {
+ lc_reg = reg_lc1;
+ lt_reg = reg_lt1;
+ lb_reg = reg_lb1;
+ loop->clobber_loop1 = 1;
+ }
+ else
+ {
+ lc_reg = reg_lc0;
+ lt_reg = reg_lt0;
+ lb_reg = reg_lb0;
+ loop->clobber_loop0 = 1;
+ }
+
+ /* If iter_reg is a DREG, we need generate an instruction to load
+ the loop count into LC register. */
+ if (D_REGNO_P (REGNO (iter_reg)))
+ {
+ init_insn = gen_movsi (lc_reg, iter_reg);
+ loop_init = gen_lsetup_without_autoinit (lt_reg, start_label,
+ lb_reg, end_label,
+ lc_reg);
+ }
+ else if (P_REGNO_P (REGNO (iter_reg)))
+ {
+ init_insn = NULL_RTX;
+ loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
+ lb_reg, end_label,
+ lc_reg, iter_reg);
+ }
+ else
+ gcc_unreachable ();
+
+ loop->init = init_insn;
+ loop->end_label = end_label;
+ loop->loop_init = loop_init;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; replacing loop %d initializer with\n",
+ loop->loop_no);
+ print_rtl_single (dump_file, loop->loop_init);
+ fprintf (dump_file, ";; replacing loop %d terminator with\n",
+ loop->loop_no);
+ print_rtl_single (dump_file, loop->loop_end);
+ }
+
+ start_sequence ();
+
+ if (loop->init != NULL_RTX)
+ emit_insn (loop->init);
+ seq_end = emit_insn (loop->loop_init);
+
+ seq = get_insns ();
+ end_sequence ();
+
+ if (loop->incoming_src)
+ {
+ rtx prev = BB_END (loop->incoming_src);
+ if (VEC_length (edge, loop->incoming) > 1
+ || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU))
+ {
+ gcc_assert (JUMP_P (prev));
+ prev = PREV_INSN (prev);
+ }
+ emit_insn_after (seq, prev);
+ }
+ else
+ {
+ basic_block new_bb;
+ edge e;
+ edge_iterator ei;
+
+ if (loop->head != loop->incoming_dest)
+ {
+ FOR_EACH_EDGE (e, ei, loop->head->preds)
+ {
+ if (e->flags & EDGE_FALLTHRU)
+ {
+ rtx newjump = gen_jump (loop->start_label);
+ emit_insn_before (newjump, BB_HEAD (loop->head));
+ new_bb = create_basic_block (newjump, newjump, loop->head->prev_bb);
+ gcc_assert (new_bb = loop->head->prev_bb);
+ break;
+ }
+ }
+ }
+
+ emit_insn_before (seq, BB_HEAD (loop->head));
+ seq = emit_label_before (gen_label_rtx (), seq);
+
+ new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb);
+ FOR_EACH_EDGE (e, ei, loop->incoming)
+ {
+ if (!(e->flags & EDGE_FALLTHRU)
+ || e->dest != loop->head)
+ redirect_edge_and_branch_force (e, new_bb);
+ else
+ redirect_edge_succ (e, new_bb);
+ }
+ }
+
+ delete_insn (loop->loop_end);
+ /* Insert the loop end label before the last instruction of the loop. */
+ emit_label_before (loop->end_label, loop->last_insn);
+
+ return;
+
+ bad_loop:
+
+ if (dump_file)
+ fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
+
+ loop->bad = 1;
+
+ if (DPREG_P (loop->iter_reg))
+ {
+ /* If loop->iter_reg is a DREG or PREG, we can split it here
+ without scratch register. */
+ rtx insn;
+
+ emit_insn_before (gen_addsi3 (loop->iter_reg,
+ loop->iter_reg,
+ constm1_rtx),
+ loop->loop_end);
+
+ emit_insn_before (gen_cmpsi (loop->iter_reg, const0_rtx),
+ loop->loop_end);
+
+ insn = emit_jump_insn_before (gen_bne (loop->start_label),
+ loop->loop_end);
+
+ JUMP_LABEL (insn) = loop->start_label;
+ LABEL_NUSES (loop->start_label)++;
+ delete_insn (loop->loop_end);
+ }
+}
+
+/* Called from bfin_reorg_loops when a potential loop end is found. LOOP is
+ a newly set up structure describing the loop, it is this function's
+ responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
+ loop_end insn and its enclosing basic block. */
+
+static void
+bfin_discover_loop (loop_info loop, basic_block tail_bb, rtx tail_insn)
+{
+ unsigned dwork = 0;
+ basic_block bb;
+ VEC (basic_block,heap) *works = VEC_alloc (basic_block,heap,20);
+
+ loop->tail = tail_bb;
+ loop->head = BRANCH_EDGE (tail_bb)->dest;
+ loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
+ loop->loop_end = tail_insn;
+ loop->last_insn = NULL_RTX;
+ loop->iter_reg = SET_DEST (XVECEXP (PATTERN (tail_insn), 0, 1));
+ loop->depth = loop->length = 0;
+ loop->visited = 0;
+ loop->clobber_loop0 = loop->clobber_loop1 = 0;
+ loop->outer = NULL;
+ loop->loops = NULL;
+ loop->incoming = VEC_alloc (edge, gc, 2);
+ loop->init = loop->loop_init = NULL_RTX;
+ loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail_insn), 0, 0)), 1), 0);
+ loop->end_label = NULL_RTX;
+ loop->bad = 0;
+
+ VEC_safe_push (basic_block, heap, works, loop->head);
+
+ while (VEC_iterate (basic_block, works, dwork++, bb))
+ {
+ edge e;
+ edge_iterator ei;
+ if (bb == EXIT_BLOCK_PTR)
+ {
+ /* We've reached the exit block. The loop must be bad. */
+ if (dump_file)
+ fprintf (dump_file,
+ ";; Loop is bad - reached exit block while scanning\n");
+ loop->bad = 1;
+ break;
+ }
+
+ if (bitmap_bit_p (loop->block_bitmap, bb->index))
+ continue;
+
+ /* We've not seen this block before. Add it to the loop's
+ list and then add each successor to the work list. */
+
+ VEC_safe_push (basic_block, heap, loop->blocks, bb);
+ bitmap_set_bit (loop->block_bitmap, bb->index);
+
+ if (bb != tail_bb)
+ {
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
+ if (!REGNO_REG_SET_P (df_get_live_in (succ),
+ REGNO (loop->iter_reg)))
+ continue;
+ if (!VEC_space (basic_block, works, 1))
+ {
+ if (dwork)
+ {
+ VEC_block_remove (basic_block, works, 0, dwork);
+ dwork = 0;
+ }
+ else
+ VEC_reserve (basic_block, heap, works, 1);
+ }
+ VEC_quick_push (basic_block, works, succ);
+ }
+ }
+ }
+
+ /* Find the predecessor, and make sure nothing else jumps into this loop. */
+ if (!loop->bad)
+ {
+ int pass, retry;
+ for (dwork = 0; VEC_iterate (basic_block, loop->blocks, dwork, bb); dwork++)
+ {
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ basic_block pred = e->src;
+
+ if (!bfin_bb_in_loop (loop, pred))
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
+ loop->loop_no, pred->index,
+ e->dest->index);
+ VEC_safe_push (edge, gc, loop->incoming, e);
+ }
+ }
+ }
+
+ for (pass = 0, retry = 1; retry && pass < 2; pass++)
+ {
+ edge e;
+ edge_iterator ei;
+ bool first = true;
+ retry = 0;
+
+ FOR_EACH_EDGE (e, ei, loop->incoming)
+ {
+ if (first)
+ {
+ loop->incoming_src = e->src;
+ loop->incoming_dest = e->dest;
+ first = false;
+ }
+ else
+ {
+ if (e->dest != loop->incoming_dest)
+ loop->incoming_dest = NULL;
+ if (e->src != loop->incoming_src)
+ loop->incoming_src = NULL;
+ }
+ if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
+ {
+ if (pass == 0)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ ";; retrying loop %d with forwarder blocks\n",
+ loop->loop_no);
+ retry = 1;
+ break;
+ }
+ loop->bad = 1;
+ if (dump_file)
+ fprintf (dump_file,
+ ";; can't find suitable entry for loop %d\n",
+ loop->loop_no);
+ goto out;
+ }
+ }
+ if (retry)
+ {
+ retry = 0;
+ FOR_EACH_EDGE (e, ei, loop->incoming)
+ {
+ if (forwarder_block_p (e->src))
+ {
+ edge e2;
+ edge_iterator ei2;
+
+ if (dump_file)
+ fprintf (dump_file,
+ ";; Adding forwarder block %d to loop %d and retrying\n",
+ e->src->index, loop->loop_no);
+ VEC_safe_push (basic_block, heap, loop->blocks, e->src);
+ bitmap_set_bit (loop->block_bitmap, e->src->index);
+ FOR_EACH_EDGE (e2, ei2, e->src->preds)
+ VEC_safe_push (edge, gc, loop->incoming, e2);
+ VEC_unordered_remove (edge, loop->incoming, ei.index);
+ retry = 1;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ out:
+ VEC_free (basic_block, heap, works);
+}
+
+/* Analyze the structure of the loops in the current function. Use STACK
+ for bitmap allocations. Returns all the valid candidates for hardware
+ loops found in this function. */
+static loop_info
+bfin_discover_loops (bitmap_obstack *stack, FILE *dump_file)
+{
+ loop_info loops = NULL;
+ loop_info loop;
+ basic_block bb;
+ bitmap tmp_bitmap;
+ int nloops = 0;
+
+ /* Find all the possible loop tails. This means searching for every
+ loop_end instruction. For each one found, create a loop_info
+ structure and add the head block to the work list. */
+ FOR_EACH_BB (bb)
+ {
+ rtx tail = BB_END (bb);
+
+ while (GET_CODE (tail) == NOTE)
+ tail = PREV_INSN (tail);
+
+ bb->aux = NULL;
+
+ if (INSN_P (tail) && recog_memoized (tail) == CODE_FOR_loop_end)
+ {
+ /* A possible loop end */
+
+ loop = XNEW (struct loop_info);
+ loop->next = loops;
+ loops = loop;
+ loop->loop_no = nloops++;
+ loop->blocks = VEC_alloc (basic_block, heap, 20);
+ loop->block_bitmap = BITMAP_ALLOC (stack);
+ bb->aux = loop;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; potential loop %d ending at\n",
+ loop->loop_no);
+ print_rtl_single (dump_file, tail);
+ }
+
+ bfin_discover_loop (loop, bb, tail);
+ }
+ }
+
+ tmp_bitmap = BITMAP_ALLOC (stack);
+ /* Compute loop nestings. */
+ for (loop = loops; loop; loop = loop->next)
+ {
+ loop_info other;
+ if (loop->bad)
+ continue;
+
+ for (other = loop->next; other; other = other->next)
+ {
+ if (other->bad)
+ continue;
+
+ bitmap_and (tmp_bitmap, other->block_bitmap, loop->block_bitmap);
+ if (bitmap_empty_p (tmp_bitmap))
+ continue;
+ if (bitmap_equal_p (tmp_bitmap, other->block_bitmap))
+ {
+ other->outer = loop;
+ VEC_safe_push (loop_info, heap, loop->loops, other);
+ }
+ else if (bitmap_equal_p (tmp_bitmap, loop->block_bitmap))
+ {
+ loop->outer = other;
+ VEC_safe_push (loop_info, heap, other->loops, loop);
+ }
+ else
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ ";; can't find suitable nesting for loops %d and %d\n",
+ loop->loop_no, other->loop_no);
+ loop->bad = other->bad = 1;
+ }
+ }
+ }
+ BITMAP_FREE (tmp_bitmap);
+
+ return loops;
+}
+
+/* Free up the loop structures in LOOPS. */
+static void
+free_loops (loop_info loops)
+{
+ while (loops)
+ {
+ loop_info loop = loops;
+ loops = loop->next;
+ VEC_free (loop_info, heap, loop->loops);
+ VEC_free (basic_block, heap, loop->blocks);
+ BITMAP_FREE (loop->block_bitmap);
+ XDELETE (loop);
+ }
+}
+
+#define BB_AUX_INDEX(BB) ((unsigned)(BB)->aux)
+
+/* The taken-branch edge from the loop end can actually go forward. Since the
+ Blackfin's LSETUP instruction requires that the loop end be after the loop
+ start, try to reorder a loop's basic blocks when we find such a case. */
+static void
+bfin_reorder_loops (loop_info loops, FILE *dump_file)
+{
+ basic_block bb;
+ loop_info loop;
+
+ FOR_EACH_BB (bb)
+ bb->aux = NULL;
+ cfg_layout_initialize (0);
+
+ for (loop = loops; loop; loop = loop->next)
+ {
+ unsigned index;
+ basic_block bb;
+ edge e;
+ edge_iterator ei;
+
+ if (loop->bad)
+ continue;
+
+ /* Recreate an index for basic blocks that represents their order. */
+ for (bb = ENTRY_BLOCK_PTR->next_bb, index = 0;
+ bb != EXIT_BLOCK_PTR;
+ bb = bb->next_bb, index++)
+ bb->aux = (PTR) index;
+
+ if (BB_AUX_INDEX (loop->head) < BB_AUX_INDEX (loop->tail))
+ continue;
+
+ FOR_EACH_EDGE (e, ei, loop->head->succs)
+ {
+ if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
+ && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
+ {
+ basic_block start_bb = e->dest;
+ basic_block start_prev_bb = start_bb->prev_bb;
+
+ if (dump_file)
+ fprintf (dump_file, ";; Moving block %d before block %d\n",
+ loop->head->index, start_bb->index);
+ loop->head->prev_bb->next_bb = loop->head->next_bb;
+ loop->head->next_bb->prev_bb = loop->head->prev_bb;
+
+ loop->head->prev_bb = start_prev_bb;
+ loop->head->next_bb = start_bb;
+ start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
+ break;
+ }
+ }
+ loops = loops->next;
+ }
+
+ FOR_EACH_BB (bb)
+ {
+ if (bb->next_bb != EXIT_BLOCK_PTR)
+ bb->aux = bb->next_bb;
+ else
+ bb->aux = NULL;
+ }
+ cfg_layout_finalize ();
+ df_analyze ();
+}
+
+/* Run from machine_dependent_reorg, this pass looks for doloop_end insns
+ and tries to rewrite the RTL of these loops so that proper Blackfin
+ hardware loops are generated. */
+
+static void
+bfin_reorg_loops (FILE *dump_file)
+{
+ loop_info loops = NULL;
+ loop_info loop;
+ basic_block bb;
+ bitmap_obstack stack;
+
+ bitmap_obstack_initialize (&stack);
+
+ if (dump_file)
+ fprintf (dump_file, ";; Find loops, first pass\n\n");
+
+ loops = bfin_discover_loops (&stack, dump_file);
+
+ if (dump_file)
+ bfin_dump_loops (loops);
+
+ bfin_reorder_loops (loops, dump_file);
+ free_loops (loops);
+
+ if (dump_file)
+ fprintf (dump_file, ";; Find loops, second pass\n\n");
+
+ loops = bfin_discover_loops (&stack, dump_file);
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; All loops found:\n\n");
+ bfin_dump_loops (loops);
+ }
+
+ /* Now apply the optimizations. */
+ for (loop = loops; loop; loop = loop->next)
+ bfin_optimize_loop (loop);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; After hardware loops optimization:\n\n");
+ bfin_dump_loops (loops);
+ }
+
+ free_loops (loops);
+
+ if (dump_file)
+ print_rtl (dump_file, get_insns ());
+
+ FOR_EACH_BB (bb)
+ bb->aux = NULL;
+}
+\f
+/* Possibly generate a SEQUENCE out of three insns found in SLOT.
+ Returns true if we modified the insn chain, false otherwise. */
+static bool
+gen_one_bundle (rtx slot[3])
+{
+ gcc_assert (slot[1] != NULL_RTX);
+
+ /* Verify that we really can do the multi-issue. */
+ if (slot[0])
+ {
+ rtx t = NEXT_INSN (slot[0]);
+ while (t != slot[1])
+ {
+ if (GET_CODE (t) != NOTE
+ || NOTE_KIND (t) != NOTE_INSN_DELETED)
+ return false;
+ t = NEXT_INSN (t);
+ }
+ }
+ if (slot[2])
+ {
+ rtx t = NEXT_INSN (slot[1]);
+ while (t != slot[2])
+ {
+ if (GET_CODE (t) != NOTE
+ || NOTE_KIND (t) != NOTE_INSN_DELETED)
+ return false;
+ t = NEXT_INSN (t);
+ }
+ }
+
+ if (slot[0] == NULL_RTX)
+ {
+ slot[0] = emit_insn_before (gen_mnop (), slot[1]);
+ df_insn_rescan (slot[0]);
+ }
+ if (slot[2] == NULL_RTX)
+ {
+ slot[2] = emit_insn_after (gen_forced_nop (), slot[1]);
+ df_insn_rescan (slot[2]);
+ }
+
+ /* Avoid line number information being printed inside one bundle. */
+ if (INSN_LOCATOR (slot[1])
+ && INSN_LOCATOR (slot[1]) != INSN_LOCATOR (slot[0]))
+ INSN_LOCATOR (slot[1]) = INSN_LOCATOR (slot[0]);
+ if (INSN_LOCATOR (slot[2])
+ && INSN_LOCATOR (slot[2]) != INSN_LOCATOR (slot[0]))
+ INSN_LOCATOR (slot[2]) = INSN_LOCATOR (slot[0]);
+
+ /* Terminate them with "|| " instead of ";" in the output. */
+ PUT_MODE (slot[0], SImode);
+ PUT_MODE (slot[1], SImode);
+ /* Terminate the bundle, for the benefit of reorder_var_tracking_notes. */
+ PUT_MODE (slot[2], QImode);
+ return true;
+}
+
+/* Go through all insns, and use the information generated during scheduling
+ to generate SEQUENCEs to represent bundles of instructions issued
+ simultaneously. */
+
+static void
+bfin_gen_bundles (void)
+{
+ basic_block bb;
+ FOR_EACH_BB (bb)
+ {
+ rtx insn, next;
+ rtx slot[3];
+ int n_filled = 0;
+
+ slot[0] = slot[1] = slot[2] = NULL_RTX;
+ for (insn = BB_HEAD (bb);; insn = next)
+ {
+ int at_end;
+ if (INSN_P (insn))
+ {
+ if (get_attr_type (insn) == TYPE_DSP32)
+ slot[0] = insn;
+ else if (slot[1] == NULL_RTX)
+ slot[1] = insn;
+ else
+ slot[2] = insn;
+ n_filled++;
+ }
+
+ next = NEXT_INSN (insn);
+ while (next && insn != BB_END (bb)
+ && !(INSN_P (next)
+ && GET_CODE (PATTERN (next)) != USE
+ && GET_CODE (PATTERN (next)) != CLOBBER))
+ {
+ insn = next;
+ next = NEXT_INSN (insn);
+ }
+
+ /* BB_END can change due to emitting extra NOPs, so check here. */
+ at_end = insn == BB_END (bb);
+ if (at_end || GET_MODE (next) == TImode)
+ {
+ if ((n_filled < 2
+ || !gen_one_bundle (slot))
+ && slot[0] != NULL_RTX)
+ {
+ rtx pat = PATTERN (slot[0]);
+ if (GET_CODE (pat) == SET
+ && GET_CODE (SET_SRC (pat)) == UNSPEC
+ && XINT (SET_SRC (pat), 1) == UNSPEC_32BIT)
+ {
+ SET_SRC (pat) = XVECEXP (SET_SRC (pat), 0, 0);
+ INSN_CODE (slot[0]) = -1;
+ df_insn_rescan (slot[0]);
+ }
+ }
+ n_filled = 0;
+ slot[0] = slot[1] = slot[2] = NULL_RTX;
+ }
+ if (at_end)
+ break;
+ }
+ }
+}
+
+/* Ensure that no var tracking notes are emitted in the middle of a
+ three-instruction bundle. */
+
+static void
+reorder_var_tracking_notes (void)
+{
+ basic_block bb;
+ FOR_EACH_BB (bb)
+ {
+ rtx insn, next;
+ rtx queue = NULL_RTX;
+ bool in_bundle = false;
+
+ for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next)
+ {
+ next = NEXT_INSN (insn);
+
+ if (INSN_P (insn))
+ {
+ /* Emit queued up notes at the last instruction of a bundle. */
+ if (GET_MODE (insn) == QImode)
+ {
+ while (queue)
+ {
+ rtx next_queue = PREV_INSN (queue);
+ PREV_INSN (NEXT_INSN (insn)) = queue;
+ NEXT_INSN (queue) = NEXT_INSN (insn);
+ NEXT_INSN (insn) = queue;
+ PREV_INSN (queue) = insn;
+ queue = next_queue;
+ }
+ in_bundle = false;
+ }
+ else if (GET_MODE (insn) == SImode)
+ in_bundle = true;
+ }
+ else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
+ {
+ if (in_bundle)
+ {
+ rtx prev = PREV_INSN (insn);
+ PREV_INSN (next) = prev;
+ NEXT_INSN (prev) = next;
+
+ PREV_INSN (insn) = queue;
+ queue = insn;
+ }
+ }
+ }
+ }
+}
+\f
+/* Return an insn type for INSN that can be used by the caller for anomaly
+ workarounds. This differs from plain get_attr_type in that it handles
+ SEQUENCEs. */
+
+static enum attr_type
+type_for_anomaly (rtx insn)
+{
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) == SEQUENCE)
+ {
+ enum attr_type t;
+ t = get_attr_type (XVECEXP (pat, 0, 1));
+ if (t == TYPE_MCLD)
+ return t;
+ t = get_attr_type (XVECEXP (pat, 0, 2));
+ if (t == TYPE_MCLD)
+ return t;
+ return TYPE_MCST;
+ }
+ else
+ return get_attr_type (insn);
+}
+
+/* Return nonzero if INSN contains any loads that may trap. It handles
+ SEQUENCEs correctly. */
+
+static bool
+trapping_loads_p (rtx insn)
+{
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) == SEQUENCE)
+ {
+ enum attr_type t;
+ t = get_attr_type (XVECEXP (pat, 0, 1));
+ if (t == TYPE_MCLD
+ && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 1)))))
+ return true;
+ t = get_attr_type (XVECEXP (pat, 0, 2));
+ if (t == TYPE_MCLD
+ && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 2)))))
+ return true;
+ return false;
+ }
+ else
+ return may_trap_p (SET_SRC (single_set (insn)));
+}
+
+/* This function acts like NEXT_INSN, but is aware of three-insn bundles and
+ skips all subsequent parallel instructions if INSN is the start of such
+ a group. */
+static rtx
+find_next_insn_start (rtx insn)
+{
+ if (GET_MODE (insn) == SImode)
+ {
+ while (GET_MODE (insn) != QImode)
+ insn = NEXT_INSN (insn);
+ }
+ return NEXT_INSN (insn);
+}
+
+/* Return INSN if it is of TYPE_MCLD. Alternatively, if INSN is the start of
+ a three-insn bundle, see if one of them is a load and return that if so.
+ Return NULL_RTX if the insn does not contain loads. */
+static rtx
+find_load (rtx insn)
+{
+ if (get_attr_type (insn) == TYPE_MCLD)
+ return insn;
+ if (GET_MODE (insn) != SImode)
+ return NULL_RTX;
+ do {
+ insn = NEXT_INSN (insn);
+ if ((GET_MODE (insn) == SImode || GET_MODE (insn) == QImode)
+ && get_attr_type (insn) == TYPE_MCLD)
+ return insn;
+ } while (GET_MODE (insn) != QImode);
+ return NULL_RTX;
+}
+
+/* We use the machine specific reorg pass for emitting CSYNC instructions
+ after conditional branches as needed.
+
+ The Blackfin is unusual in that a code sequence like
+ if cc jump label
+ r0 = (p0)
+ may speculatively perform the load even if the condition isn't true. This
+ happens for a branch that is predicted not taken, because the pipeline
+ isn't flushed or stalled, so the early stages of the following instructions,
+ which perform the memory reference, are allowed to execute before the
+ jump condition is evaluated.
+ Therefore, we must insert additional instructions in all places where this
+ could lead to incorrect behavior. The manual recommends CSYNC, while
+ VDSP seems to use NOPs (even though its corresponding compiler option is
+ named CSYNC).
+
+ When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
+ When optimizing for size, we turn the branch into a predicted taken one.
+ This may be slower due to mispredicts, but saves code size. */
+
+static void
+bfin_reorg (void)
+{
+ rtx insn, next;
+ rtx last_condjump = NULL_RTX;
+ int cycles_since_jump = INT_MAX;
+
+ /* We are freeing block_for_insn in the toplev to keep compatibility
+ with old MDEP_REORGS that are not CFG based. Recompute it now. */
+ compute_bb_for_insn ();
+
+ if (bfin_flag_schedule_insns2)
+ {
+ splitting_for_sched = 1;
+ split_all_insns ();
+ splitting_for_sched = 0;
+
+ timevar_push (TV_SCHED2);
+ schedule_insns ();
+ timevar_pop (TV_SCHED2);
+
+ /* Examine the schedule and insert nops as necessary for 64-bit parallel
+ instructions. */
+ bfin_gen_bundles ();
+ }
+
+ df_analyze ();
+
+ /* Doloop optimization */
+ if (cfun->machine->has_hardware_loops)
+ bfin_reorg_loops (dump_file);
+
+ if (! TARGET_SPECLD_ANOMALY && ! TARGET_CSYNC_ANOMALY)
+ return;
+
+ /* First pass: find predicted-false branches; if something after them
+ needs nops, insert them or change the branch to predict true. */
+ for (insn = get_insns (); insn; insn = next)
+ {
+ rtx pat;
+
+ next = find_next_insn_start (insn);
+
+ if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
+ continue;
+
+ pat = PATTERN (insn);
+ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
+ || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
+ || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
+ continue;
+
+ if (JUMP_P (insn))