OSDN Git Service

Repeat processing all basic blocks for vzeroupper optimization.
authorhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 30 Dec 2010 13:12:02 +0000 (13:12 +0000)
committerhjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>
Thu, 30 Dec 2010 13:12:02 +0000 (13:12 +0000)
gcc/

2010-12-30  H.J. Lu  <hongjiu.lu@intel.com>

PR target/46519
* config/i386/i386.c (block_info_def): Remove referenced, count
and rescanned.
(move_or_delete_vzeroupper_2): Updated.
(move_or_delete_vzeroupper_1): Rewritten to avoid recursive call.
(rescan_move_or_delete_vzeroupper): Removed.
(move_or_delete_vzeroupper): Repeat processing all basic blocks
until no basic block state is changed to used at exit.

gcc/testsuite/

2010-12-30  H.J. Lu  <hongjiu.lu@intel.com>

PR target/46519
* gfortran.dg/pr46519-2.f90: New.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@168342 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/testsuite/ChangeLog
gcc/testsuite/gfortran.dg/pr46519-2.f90 [new file with mode: 0644]

index a3e7fae..cc0f6fe 100644 (file)
@@ -1,3 +1,14 @@
+2010-12-30  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/46519
+       * config/i386/i386.c (block_info_def): Remove referenced, count
+       and rescanned.
+       (move_or_delete_vzeroupper_2): Updated.
+       (move_or_delete_vzeroupper_1): Rewritten to avoid recursive call.
+       (rescan_move_or_delete_vzeroupper): Removed.
+       (move_or_delete_vzeroupper): Repeat processing all basic blocks
+       until no basic block state is changed to used at exit.
+
 2010-12-30  Paul Koning  <ni1d@arrl.net>
 
        * config/pdp11/pdp11.md (movmemhi, movmemhi1): Correct
index 9156d89..2fb1dfa 100644 (file)
@@ -68,14 +68,8 @@ typedef struct block_info_def
 {
   /* State of the upper 128bits of any AVX registers at exit.  */
   enum upper_128bits_state state;
-  /* If the upper 128bits of any AVX registers are referenced.  */
-  enum upper_128bits_state referenced;
-  /* Number of vzerouppers in this block.  */
-  unsigned int count;
   /* TRUE if block has been processed.  */
   bool processed;
-  /* TRUE if block has been rescanned.  */
-  bool rescanned;
 } *block_info;
 
 #define BLOCK_INFO(B)   ((block_info) (B)->aux)
@@ -127,8 +121,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
   rtx vzeroupper_insn = NULL_RTX;
   rtx pat;
   int avx256;
-  enum upper_128bits_state referenced = BLOCK_INFO (bb)->referenced;
-  int count = BLOCK_INFO (bb)->count;
 
   if (dump_file)
     fprintf (dump_file, " [bb %i] entry: upper 128bits: %d\n",
@@ -191,24 +183,20 @@ move_or_delete_vzeroupper_2 (basic_block bb,
              /* Delete pending vzeroupper insertion.  */
              if (vzeroupper_insn)
                {
-                 count--;
                  delete_insn (vzeroupper_insn);
                  vzeroupper_insn = NULL_RTX;
                }
            }
-         else if (state != used && referenced != unused)
+         else if (state != used)
            {
              /* No need to call note_stores if the upper 128bits of
                 AVX registers are never referenced.  */
              note_stores (pat, check_avx256_stores, &state);
-             if (state == used)
-               referenced = used;
            }
          continue;
        }
 
       /* Process vzeroupper intrinsic.  */
-      count++;
       avx256 = INTVAL (XVECEXP (pat, 0, 0));
 
       if (state == unused)
@@ -226,7 +214,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
              fprintf (dump_file, "Delete redundant vzeroupper:\n");
              print_rtl_single (dump_file, insn);
            }
-         count--;
          delete_insn (insn);
        }
       else
@@ -246,7 +233,6 @@ move_or_delete_vzeroupper_2 (basic_block bb,
                  fprintf (dump_file, "Delete callee pass vzeroupper:\n");
                  print_rtl_single (dump_file, insn);
                }
-             count--;
              delete_insn (insn);
            }
          else
@@ -256,30 +242,22 @@ move_or_delete_vzeroupper_2 (basic_block bb,
 
   BLOCK_INFO (bb)->state = state;
 
-  if (BLOCK_INFO (bb)->referenced == unknown)
-    {
-      /* The upper 128bits of AVX registers are never referenced if
-        REFERENCED isn't updated.  */
-      if (referenced == unknown)
-       referenced = unused;
-      BLOCK_INFO (bb)->referenced = referenced;
-      BLOCK_INFO (bb)->count = count;
-    }
-
   if (dump_file)
     fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n",
             bb->index, state);
 }
 
 /* Helper function for move_or_delete_vzeroupper.  Process vzeroupper
-   in BLOCK and its predecessor blocks recursively.  */
+   in BLOCK and check its predecessor blocks.  Treat UNKNOWN state
+   as USED if UNKNOWN_IS_UNUSED is true.  */
 
 static void
-move_or_delete_vzeroupper_1 (basic_block block)
+move_or_delete_vzeroupper_1 (basic_block block, bool unknown_is_unused)
 {
   edge e;
   edge_iterator ei;
-  enum upper_128bits_state state;
+  enum upper_128bits_state state, old_state, new_state;
+  bool seen_unknown;
 
   if (dump_file)
     fprintf (dump_file, " Process [bb %i]: status: %d\n",
@@ -288,83 +266,42 @@ move_or_delete_vzeroupper_1 (basic_block block)
   if (BLOCK_INFO (block)->processed)
     return;
 
-  BLOCK_INFO (block)->processed = true;
-
-  state = unknown;
+  state = unused;
 
-  /* Process all predecessor edges of this block.  */
+  /* Check all predecessor edges of this block.  */
+  seen_unknown = false;
   FOR_EACH_EDGE (e, ei, block->preds)
     {
       if (e->src == block)
        continue;
-      move_or_delete_vzeroupper_1 (e->src);
       switch (BLOCK_INFO (e->src)->state)
        {
        case unknown:
-         if (state == unused)
-           state = unknown;
+         if (!unknown_is_unused)
+           seen_unknown = true;
+       case unused:
          break;
        case used:
          state = used;
-         break;
-       case unused:
-         break;
+         goto done;
        }
     }
 
-  /* If state of any predecessor edges is unknown, we need to rescan.  */
-  if (state == unknown)
-    cfun->machine->rescan_vzeroupper_p = 1;
+  if (seen_unknown)
+    state = unknown;
 
-  /* Process this block.  */
+done:
+  old_state = BLOCK_INFO (block)->state;
   move_or_delete_vzeroupper_2 (block, state);
-}
-
-/* Helper function for move_or_delete_vzeroupper.  Rescan vzeroupper
-   in BLOCK and its predecessor blocks recursively.  */
-
-static void
-rescan_move_or_delete_vzeroupper (basic_block block)
-{
-  edge e;
-  edge_iterator ei;
-  enum upper_128bits_state state;
-
-  if (dump_file)
-    fprintf (dump_file, " Rescan [bb %i]: status: %d\n",
-            block->index, BLOCK_INFO (block)->rescanned);
-
-  if (BLOCK_INFO (block)->rescanned)
-    return;
-
-  BLOCK_INFO (block)->rescanned = true;
-
-  state = unused;
+  new_state = BLOCK_INFO (block)->state;
 
-  /* Rescan all predecessor edges of this block.  */
-  FOR_EACH_EDGE (e, ei, block->preds)
-    {
-      if (e->src == block)
-       continue;
-      rescan_move_or_delete_vzeroupper (e->src);
-      /* For rescan, UKKNOWN state is treated as UNUSED.  */
-      if (BLOCK_INFO (e->src)->state == used)
-       state = used;
-    }
+  if (state != unknown || new_state == used)
+    BLOCK_INFO (block)->processed = true;
 
-  /* Rescan this block only if there are vzerouppers or the upper
-     128bits of AVX registers are referenced.  */
-  if (BLOCK_INFO (block)->count == 0
-      && (state == used || BLOCK_INFO (block)->referenced != used))
-    {
-      if (state == used)
-       BLOCK_INFO (block)->state = state;
-      if (dump_file)
-       fprintf (dump_file, " [bb %i] exit: upper 128bits: %d\n",
-                block->index, BLOCK_INFO (block)->state);
-    }
-  else
-    move_or_delete_vzeroupper_2 (block, state);
+  /* Need to rescan if the upper 128bits of AVX registers are changed
+     to USED at exit.  */
+  if (new_state != old_state && new_state == used)
+    cfun->machine->rescan_vzeroupper_p = 1;
 }
 
 /* Go through the instruction stream looking for vzeroupper.  Delete
@@ -377,7 +314,7 @@ move_or_delete_vzeroupper (void)
   edge e;
   edge_iterator ei;
   basic_block bb;
-  unsigned int count = 0;
+  unsigned int count;
 
   /* Set up block info for each basic block.  */
   alloc_aux_for_blocks (sizeof (struct block_info_def));
@@ -392,28 +329,30 @@ move_or_delete_vzeroupper (void)
                                   cfun->machine->caller_pass_avx256_p
                                   ? used : unused);
       BLOCK_INFO (e->dest)->processed = true;
-      BLOCK_INFO (e->dest)->rescanned = true;
     }
 
   /* Process all basic blocks.  */
-  if (dump_file)
-    fprintf (dump_file, "Process all basic blocks\n");
-
-  FOR_EACH_BB (bb)
-    {
-      move_or_delete_vzeroupper_1 (bb);
-      count += BLOCK_INFO (bb)->count;
-    }
-
-  /* Rescan all basic blocks if needed.  */
-  if (count && cfun->machine->rescan_vzeroupper_p)
+  count = 0;
+  do
     {
       if (dump_file)
-       fprintf (dump_file, "Rescan all basic blocks\n");
-
+       fprintf (dump_file, "Process all basic blocks: trip %d\n",
+                count);
+      cfun->machine->rescan_vzeroupper_p = 0;
       FOR_EACH_BB (bb)
-       rescan_move_or_delete_vzeroupper (bb);
+       move_or_delete_vzeroupper_1 (bb, false);
     }
+  while (cfun->machine->rescan_vzeroupper_p && count++ < 20);
+
+  /* FIXME: Is 20 big enough?  */
+  if (count >= 20)
+    gcc_unreachable ();
+
+  if (dump_file)
+    fprintf (dump_file, "Process all basic blocks\n");
+
+  FOR_EACH_BB (bb)
+    move_or_delete_vzeroupper_1 (bb, true);
 
   free_aux_for_blocks ();
 }
index 7941751..69ab495 100644 (file)
@@ -1,3 +1,8 @@
+2010-12-30  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/46519
+       * gfortran.dg/pr46519-2.f90: New.
+
 2010-12-30  Janus Weil  <janus@gcc.gnu.org>
 
        PR fortran/47085
diff --git a/gcc/testsuite/gfortran.dg/pr46519-2.f90 b/gcc/testsuite/gfortran.dg/pr46519-2.f90
new file mode 100644 (file)
index 0000000..b4d6980
--- /dev/null
@@ -0,0 +1,31 @@
+! { dg-do compile { target i?86-*-* x86_64-*-* } }
+! { dg-options "-O3 -mavx -mvzeroupper -mtune=generic -dp" }
+
+  SUBROUTINE func(kts, kte, qrz, qiz, rho)
+  IMPLICIT NONE
+  INTEGER, INTENT(IN)               :: kts, kte
+  REAL,    DIMENSION(kts:kte), INTENT(INOUT) :: qrz, qiz, rho
+  INTEGER                              :: k
+  REAL, DIMENSION(kts:kte)    ::  praci, vtiold
+  REAL                          :: fluxout
+  INTEGER                       :: min_q, max_q, var
+  do k=kts,kte
+    praci(k)=0.0
+  enddo
+  min_q=kte
+  max_q=kts-1
+  DO var=1,20
+    do k=max_q,min_q,-1
+       fluxout=rho(k)*qrz(k)
+    enddo
+    qrz(min_q-1)=qrz(min_q-1)+fluxout
+  ENDDO
+  DO var=1,20
+    do k=kts,kte-1
+      vtiold(k)= (rho(k))**0.16
+    enddo
+  ENDDO
+  STOP
+  END SUBROUTINE func
+
+! { dg-final { scan-assembler "avx_vzeroupper" } }