#include "debug.h"
#include "dwarf2out.h"
#include "sched-int.h"
+#include "sbitmap.h"
+#include "fibheap.h"
enum upper_128bits_state
{
bool unchanged;
/* TRUE if block has been processed. */
bool processed;
+ /* TRUE if block has been scanned. */
+ bool scanned;
+ /* Previous state of the upper 128bits of AVX registers at entry. */
+ enum upper_128bits_state prev;
} *block_info;
#define BLOCK_INFO(B) ((block_info) (B)->aux)
return;
}
+ if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
+ {
+ if (dump_file)
+ fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
+ bb->index, BLOCK_INFO (bb)->state);
+ return;
+ }
+
+ BLOCK_INFO (bb)->prev = state;
+
if (dump_file)
fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
bb->index, state);
BLOCK_INFO (bb)->state = state;
BLOCK_INFO (bb)->unchanged = unchanged;
+ BLOCK_INFO (bb)->scanned = true;
if (dump_file)
fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
/* Helper function for move_or_delete_vzeroupper. Process vzeroupper
in BLOCK and check its predecessor blocks. Treat UNKNOWN state
- as USED if UNKNOWN_IS_UNUSED is true. */
+ as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
+ state is changed. */
-static void
+static bool
move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
{
edge e;
block->index, BLOCK_INFO (block)->processed);
if (BLOCK_INFO (block)->processed)
- return;
+ return false;
state = unused;
/* Need to rescan if the upper 128bits of AVX registers are changed
to USED at exit. */
- if (new_state != old_state && new_state == used)
- cfun->machine->rescan_vzeroupper_p = 1;
+ if (new_state != old_state)
+ {
+ if (new_state == used)
+ cfun->machine->rescan_vzeroupper_p = 1;
+ return true;
+ }
+ else
+ return false;
}
/* Go through the instruction stream looking for vzeroupper. Delete
edge e;
edge_iterator ei;
basic_block bb;
- unsigned int count;
+ fibheap_t worklist, pending, fibheap_swap;
+ sbitmap visited, in_worklist, in_pending, sbitmap_swap;
+ int *bb_order;
+ int *rc_order;
+ int i;
/* Set up block info for each basic block. */
alloc_aux_for_blocks (sizeof (struct block_info_def));
- /* Process successor blocks of all entry points. */
+ /* Process outgoing edges of entry point. */
if (dump_file)
- fprintf (dump_file, "Process all entry points\n");
+ fprintf (dump_file, "Process outgoing edges of entry point\n");
FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
{
BLOCK_INFO (e->dest)->processed = true;
}
- /* Process all basic blocks. */
- count = 0;
- do
+ /* Compute reverse completion order of depth first search of the CFG
+ so that the data-flow runs faster. */
+ rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
+ bb_order = XNEWVEC (int, last_basic_block);
+ pre_and_rev_post_order_compute (NULL, rc_order, false);
+ for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
+ bb_order[rc_order[i]] = i;
+ free (rc_order);
+
+ worklist = fibheap_new ();
+ pending = fibheap_new ();
+ visited = sbitmap_alloc (last_basic_block);
+ in_worklist = sbitmap_alloc (last_basic_block);
+ in_pending = sbitmap_alloc (last_basic_block);
+ sbitmap_zero (in_worklist);
+
+ /* Don't check outgoing edges of entry point. */
+ sbitmap_ones (in_pending);
+ FOR_EACH_BB (bb)
+ if (BLOCK_INFO (bb)->processed)
+ RESET_BIT (in_pending, bb->index);
+ else
+ {
+ move_or_delete_vzeroupper_1 (bb, false);
+ fibheap_insert (pending, bb_order[bb->index], bb);
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "Check remaining basic blocks\n");
+
+ while (!fibheap_empty (pending))
{
- if (dump_file)
- fprintf (dump_file, "Process all basic blocks: trip %d\n",
- count);
+ fibheap_swap = pending;
+ pending = worklist;
+ worklist = fibheap_swap;
+ sbitmap_swap = in_pending;
+ in_pending = in_worklist;
+ in_worklist = sbitmap_swap;
+
+ sbitmap_zero (visited);
+
cfun->machine->rescan_vzeroupper_p = 0;
- FOR_EACH_BB (bb)
- move_or_delete_vzeroupper_1 (bb, false);
+
+ while (!fibheap_empty (worklist))
+ {
+ bb = (basic_block) fibheap_extract_min (worklist);
+ RESET_BIT (in_worklist, bb->index);
+ gcc_assert (!TEST_BIT (visited, bb->index));
+ if (!TEST_BIT (visited, bb->index))
+ {
+ edge_iterator ei;
+
+ SET_BIT (visited, bb->index);
+
+ if (move_or_delete_vzeroupper_1 (bb, false))
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (e->dest == EXIT_BLOCK_PTR
+ || BLOCK_INFO (e->dest)->processed)
+ continue;
+
+ if (TEST_BIT (visited, e->dest->index))
+ {
+ if (!TEST_BIT (in_pending, e->dest->index))
+ {
+ /* Send E->DEST to next round. */
+ SET_BIT (in_pending, e->dest->index);
+ fibheap_insert (pending,
+ bb_order[e->dest->index],
+ e->dest);
+ }
+ }
+ else if (!TEST_BIT (in_worklist, e->dest->index))
+ {
+ /* Add E->DEST to current round. */
+ SET_BIT (in_worklist, e->dest->index);
+ fibheap_insert (worklist, bb_order[e->dest->index],
+ e->dest);
+ }
+ }
+ }
+ }
+
+ if (!cfun->machine->rescan_vzeroupper_p)
+ break;
}
- while (cfun->machine->rescan_vzeroupper_p && count++ < 20);
- /* FIXME: Is 20 big enough? */
- if (count >= 20)
- gcc_unreachable ();
+ free (bb_order);
+ fibheap_delete (worklist);
+ fibheap_delete (pending);
+ sbitmap_free (visited);
+ sbitmap_free (in_worklist);
+ sbitmap_free (in_pending);
if (dump_file)
- fprintf (dump_file, "Process all basic blocks\n");
+ fprintf (dump_file, "Process remaining basic blocks\n");
FOR_EACH_BB (bb)
move_or_delete_vzeroupper_1 (bb, true);