OSDN Git Service

Visit basic blocks using the work-list based algorithm.
authorhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 24 Jan 2011 17:29:58 +0000 (17:29 +0000)
committerhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 24 Jan 2011 17:29:58 +0000 (17:29 +0000)
2011-01-24  H.J. Lu  <hongjiu.lu@intel.com>

PR target/46519
* config/i386/i386.c: Include sbitmap.h and fibheap.h.
(block_info): Add scanned and prev.
(move_or_delete_vzeroupper_2): Return if the basic block
has been scanned and the upper 128bit state is unchanged
from the last scan.
(move_or_delete_vzeroupper_1): Return true if the exit
state is changed.
(move_or_delete_vzeroupper): Visit basic blocks using the
work-list based algorithm based on vt_find_locations in
var-tracking.c.

* config/i386/t-i386: Also depend on sbitmap.h and $(FIBHEAP_H).

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@169173 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/config/i386/t-i386

index 2007a76..c181a54 100644 (file)
@@ -1,3 +1,19 @@
+2011-01-24  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/46519
+       * config/i386/i386.c: Include sbitmap.h and fibheap.h.
+       (block_info): Add scanned and prev.
+       (move_or_delete_vzeroupper_2): Return if the basic block
+       has been scanned and the upper 128bit state is unchanged
+       from the last scan.
+       (move_or_delete_vzeroupper_1): Return true if the exit
+       state is changed.
+       (move_or_delete_vzeroupper): Visit basic blocks using the
+       work-list based algorithm based on vt_find_locations in
+       var-tracking.c.
+
+       * config/i386/t-i386: Also depend on sbitmap.h and $(FIBHEAP_H).
+
 2011-01-24  Nick Clifton  <nickc@redhat.com>
 
        * config/v850/v850.opt (mv850es): New option - alias for -mv850e1.
index 1484dea..ee1790f 100644 (file)
@@ -56,6 +56,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "debug.h"
 #include "dwarf2out.h"
 #include "sched-int.h"
+#include "sbitmap.h"
+#include "fibheap.h"
 
 enum upper_128bits_state
 {
@@ -73,6 +75,10 @@ typedef struct block_info_def
   bool unchanged;
   /* TRUE if block has been processed.  */
   bool processed;
+  /* TRUE if block has been scanned.  */
+  bool scanned;
+  /* Previous state of the upper 128bits of AVX registers at entry.  */
+  enum upper_128bits_state prev;
 } *block_info;
 
 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
@@ -135,6 +141,16 @@ move_or_delete_vzeroupper_2 (basic_block bb,
       return;
     }
 
+  if (BLOCK_INFO (bb)->scanned && BLOCK_INFO (bb)->prev == state)
+    {
+      if (dump_file)
+       fprintf (dump_file, " [bb %i] scanned: upper 128bits: %d\n",
+                bb->index, BLOCK_INFO (bb)->state);
+      return;
+    }
+
+  BLOCK_INFO (bb)->prev = state;
+
   if (dump_file)
     fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
             bb->index, state);
@@ -264,6 +280,7 @@ move_or_delete_vzeroupper_2 (basic_block bb,
 
   BLOCK_INFO (bb)->state = state;
   BLOCK_INFO (bb)->unchanged = unchanged;
+  BLOCK_INFO (bb)->scanned = true;
 
   if (dump_file)
     fprintf (dump_file, " [bb %i] exit: %s: upper 128bits: %d\n",
@@ -273,9 +290,10 @@ move_or_delete_vzeroupper_2 (basic_block bb,
 
 /* Helper function for move_or_delete_vzeroupper.  Process vzeroupper
    in BLOCK and check its predecessor blocks.  Treat UNKNOWN state
-   as USED if UNKNOWN_IS_UNUSED is true.  */
+   as USED if UNKNOWN_IS_UNUSED is true.  Return TRUE if the exit
+   state is changed.  */
 
-static void
+static bool
 move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
 {
   edge e;
@@ -288,7 +306,7 @@ move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
             block->index, BLOCK_INFO (block)->processed);
 
   if (BLOCK_INFO (block)->processed)
-    return;
+    return false;
 
   state = unused;
 
@@ -324,8 +342,14 @@ done:
 
   /* Need to rescan if the upper 128bits of AVX registers are changed
      to USED at exit.  */
-  if (new_state != old_state && new_state == used)
-    cfun->machine->rescan_vzeroupper_p = 1;
+  if (new_state != old_state)
+    {
+      if (new_state == used)
+       cfun->machine->rescan_vzeroupper_p = 1;
+      return true;
+    }
+  else
+    return false;
 }
 
 /* Go through the instruction stream looking for vzeroupper.  Delete
@@ -338,14 +362,18 @@ move_or_delete_vzeroupper (void)
   edge e;
   edge_iterator ei;
   basic_block bb;
-  unsigned int count;
+  fibheap_t worklist, pending, fibheap_swap;
+  sbitmap visited, in_worklist, in_pending, sbitmap_swap;
+  int *bb_order;
+  int *rc_order;
+  int i;
 
   /* Set up block info for each basic block.  */
   alloc_aux_for_blocks (sizeof (struct block_info_def));
 
-  /* Process successor blocks of all entry points.  */
+  /* Process outgoing edges of entry point.  */
   if (dump_file)
-    fprintf (dump_file, "Process all entry points\n");
+    fprintf (dump_file, "Process outgoing edges of entry point\n");
 
   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
     {
@@ -355,25 +383,102 @@ move_or_delete_vzeroupper (void)
       BLOCK_INFO (e->dest)->processed = true;
     }
 
-  /* Process all basic blocks.  */
-  count = 0;
-  do
+  /* Compute reverse completion order of depth first search of the CFG
+     so that the data-flow runs faster.  */
+  rc_order = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
+  bb_order = XNEWVEC (int, last_basic_block);
+  pre_and_rev_post_order_compute (NULL, rc_order, false);
+  for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
+    bb_order[rc_order[i]] = i;
+  free (rc_order);
+
+  worklist = fibheap_new ();
+  pending = fibheap_new ();
+  visited = sbitmap_alloc (last_basic_block);
+  in_worklist = sbitmap_alloc (last_basic_block);
+  in_pending = sbitmap_alloc (last_basic_block);
+  sbitmap_zero (in_worklist);
+
+  /* Don't check outgoing edges of entry point.  */
+  sbitmap_ones (in_pending);
+  FOR_EACH_BB (bb)
+    if (BLOCK_INFO (bb)->processed)
+      RESET_BIT (in_pending, bb->index);
+    else
+      {
+       move_or_delete_vzeroupper_1 (bb, false);
+       fibheap_insert (pending, bb_order[bb->index], bb);
+      }
+
+  if (dump_file)
+    fprintf (dump_file, "Check remaining basic blocks\n");
+
+  while (!fibheap_empty (pending))
     {
-      if (dump_file)
-       fprintf (dump_file, "Process all basic blocks: trip %d\n",
-                count);
+      fibheap_swap = pending;
+      pending = worklist;
+      worklist = fibheap_swap;
+      sbitmap_swap = in_pending;
+      in_pending = in_worklist;
+      in_worklist = sbitmap_swap;
+
+      sbitmap_zero (visited);
+
       cfun->machine->rescan_vzeroupper_p = 0;
-      FOR_EACH_BB (bb)
-       move_or_delete_vzeroupper_1 (bb, false);
+
+      while (!fibheap_empty (worklist))
+       {
+         bb = (basic_block) fibheap_extract_min (worklist);
+         RESET_BIT (in_worklist, bb->index);
+         gcc_assert (!TEST_BIT (visited, bb->index));
+         if (!TEST_BIT (visited, bb->index))
+           {
+             edge_iterator ei;
+
+             SET_BIT (visited, bb->index);
+
+             if (move_or_delete_vzeroupper_1 (bb, false))
+               FOR_EACH_EDGE (e, ei, bb->succs)
+                 {
+                   if (e->dest == EXIT_BLOCK_PTR
+                       || BLOCK_INFO (e->dest)->processed)
+                     continue;
+
+                   if (TEST_BIT (visited, e->dest->index))
+                     {
+                       if (!TEST_BIT (in_pending, e->dest->index))
+                         {
+                           /* Send E->DEST to next round.  */
+                           SET_BIT (in_pending, e->dest->index);
+                           fibheap_insert (pending,
+                                           bb_order[e->dest->index],
+                                           e->dest);
+                         }
+                     }
+                   else if (!TEST_BIT (in_worklist, e->dest->index))
+                     {
+                       /* Add E->DEST to current round.  */
+                       SET_BIT (in_worklist, e->dest->index);
+                       fibheap_insert (worklist, bb_order[e->dest->index],
+                                       e->dest);
+                     }
+                 }
+           }
+       }
+
+      if (!cfun->machine->rescan_vzeroupper_p)
+       break;
     }
-  while (cfun->machine->rescan_vzeroupper_p && count++ < 20);
 
-  /* FIXME: Is 20 big enough?  */
-  if (count >= 20)
-    gcc_unreachable ();
+  free (bb_order);
+  fibheap_delete (worklist);
+  fibheap_delete (pending);
+  sbitmap_free (visited);
+  sbitmap_free (in_worklist);
+  sbitmap_free (in_pending);
 
   if (dump_file)
-    fprintf (dump_file, "Process all basic blocks\n");
+    fprintf (dump_file, "Process remaining basic blocks\n");
 
   FOR_EACH_BB (bb)
     move_or_delete_vzeroupper_1 (bb, true);
index 6c801a5..1c658a1 100644 (file)
@@ -23,7 +23,7 @@ i386.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
   $(RECOG_H) $(EXPR_H) $(OPTABS_H) toplev.h $(BASIC_BLOCK_H) \
   $(GGC_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h $(CGRAPH_H) \
   $(TREE_GIMPLE_H) $(DWARF2_H) $(DF_H) tm-constrs.h $(PARAMS_H) \
-  i386-builtin-types.inc debug.h dwarf2out.h
+  i386-builtin-types.inc debug.h dwarf2out.h sbitmap.h $(FIBHEAP_H)
 
 i386-c.o: $(srcdir)/config/i386/i386-c.c \
   $(srcdir)/config/i386/i386-protos.h $(CONFIG_H) $(SYSTEM_H) coretypes.h \