Fix off-by-one error in vrev for Neon.

[pf3gnuchains/gcc-fork.git] / libgomp / work.c
diff --git a/libgomp/work.c b/libgomp/work.c

index cd20c9d..6bd9c24 100644 (file)
--- a/libgomp/work.c
+++ b/libgomp/work.c
@@ -1,67 +1,163 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
     Contributed by Richard Henderson <rth@redhat.com>.
  
     This file is part of the GNU OpenMP Library (libgomp).
  
     Libgomp is free software; you can redistribute it and/or modify it
-   under the terms of the GNU Lesser General Public License as published by
-   the Free Software Foundation; either version 2.1 of the License, or
-   (at your option) any later version.
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
  
     Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
     WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-   FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
     more details.
  
-   You should have received a copy of the GNU Lesser General Public License 
-   along with libgomp; see the file COPYING.LIB.  If not, write to the
-   Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
-   MA 02110-1301, USA.  */
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
  
-/* As a special exception, if you link this library with other files, some
-   of which are compiled with GCC, to produce an executable, this library
-   does not by itself cause the resulting executable to be covered by the
-   GNU General Public License.  This exception does not however invalidate
-   any other reasons why the executable file might be covered by the GNU
-   General Public License.  */
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
  
  /* This file contains routines to manage the work-share queue for a team
     of threads.  */
  
  #include "libgomp.h"
+#include <stddef.h>
  #include <stdlib.h>
  #include <string.h>
  
  
-/* Create a new work share structure.  */
+/* Allocate a new work share structure, preferably from current team's
+   free gomp_work_share cache.  */
  
-struct gomp_work_share *
-gomp_new_work_share (bool ordered, unsigned nthreads)
+static struct gomp_work_share *
+alloc_work_share (struct gomp_team *team)
  {
    struct gomp_work_share *ws;
-  size_t size;
+  unsigned int i;
  
-  size = sizeof (*ws);
-  if (ordered)
-    size += nthreads * sizeof (ws->ordered_team_ids[0]);
+  /* This is called in a critical section.  */
+  if (team->work_share_list_alloc != NULL)
+    {
+      ws = team->work_share_list_alloc;
+      team->work_share_list_alloc = ws->next_free;
+      return ws;
+    }
  
-  ws = gomp_malloc_cleared (size);
-  gomp_mutex_init (&ws->lock);
-  ws->ordered_owner = -1;
+#ifdef HAVE_SYNC_BUILTINS
+  ws = team->work_share_list_free;
+  /* We need atomic read from work_share_list_free,
+     as free_work_share can be called concurrently.  */
+  __asm ("" : "+r" (ws));
+
+  if (ws && ws->next_free)
+    {
+      struct gomp_work_share *next = ws->next_free;
+      ws->next_free = NULL;
+      team->work_share_list_alloc = next->next_free;
+      return next;
+    }
+#else
+  gomp_mutex_lock (&team->work_share_list_free_lock);
+  ws = team->work_share_list_free;
+  if (ws)
+    {
+      team->work_share_list_alloc = ws->next_free;
+      team->work_share_list_free = NULL;
+      gomp_mutex_unlock (&team->work_share_list_free_lock);
+      return ws;
+    }
+  gomp_mutex_unlock (&team->work_share_list_free_lock);
+#endif
  
+  team->work_share_chunk *= 2;
+  ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
+  ws->next_alloc = team->work_shares[0].next_alloc;
+  team->work_shares[0].next_alloc = ws;
+  team->work_share_list_alloc = &ws[1];
+  for (i = 1; i < team->work_share_chunk - 1; i++)
+    ws[i].next_free = &ws[i + 1];
+  ws[i].next_free = NULL;
    return ws;
  }
  
+/* Initialize an already allocated struct gomp_work_share.
+   This shouldn't touch the next_alloc field.  */
+
+void
+gomp_init_work_share (struct gomp_work_share *ws, bool ordered,
+                     unsigned nthreads)
+{
+  gomp_mutex_init (&ws->lock);
+  if (__builtin_expect (ordered, 0))
+    {
+#define INLINE_ORDERED_TEAM_IDS_CNT \
+  ((sizeof (struct gomp_work_share) \
+    - offsetof (struct gomp_work_share, inline_ordered_team_ids)) \
+   / sizeof (((struct gomp_work_share *) 0)->inline_ordered_team_ids[0]))
+
+      if (nthreads > INLINE_ORDERED_TEAM_IDS_CNT)
+       ws->ordered_team_ids
+         = gomp_malloc (nthreads * sizeof (*ws->ordered_team_ids));
+      else
+       ws->ordered_team_ids = ws->inline_ordered_team_ids;
+      memset (ws->ordered_team_ids, '\0',
+             nthreads * sizeof (*ws->ordered_team_ids));
+      ws->ordered_num_used = 0;
+      ws->ordered_owner = -1;
+      ws->ordered_cur = 0;
+    }
+  else
+    ws->ordered_team_ids = NULL;
+  gomp_ptrlock_init (&ws->next_ws, NULL);
+  ws->threads_completed = 0;
+}
  
-/* Free a work share structure.  */
+/* Do any needed destruction of gomp_work_share fields before it
+   is put back into free gomp_work_share cache or freed.  */
  
-static void
-free_work_share (struct gomp_work_share *ws)
+void
+gomp_fini_work_share (struct gomp_work_share *ws)
  {
    gomp_mutex_destroy (&ws->lock);
-  free (ws);
+  if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
+    free (ws->ordered_team_ids);
+  gomp_ptrlock_destroy (&ws->next_ws);
  }
  
+/* Free a work share struct, if not orphaned, put it into current
+   team's free gomp_work_share cache.  */
+
+static inline void
+free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
+{
+  gomp_fini_work_share (ws);
+  if (__builtin_expect (team == NULL, 0))
+    free (ws);
+  else
+    {
+      struct gomp_work_share *next_ws;
+#ifdef HAVE_SYNC_BUILTINS
+      do
+       {
+         next_ws = team->work_share_list_free;
+         ws->next_free = next_ws;
+       }
+      while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
+                                           next_ws, ws));
+#else
+      gomp_mutex_lock (&team->work_share_list_free_lock);
+      next_ws = team->work_share_list_free;
+      ws->next_free = next_ws;
+      team->work_share_list_free = ws;
+      gomp_mutex_unlock (&team->work_share_list_free_lock);
+#endif
+    }
+}
  
  /* The current thread is ready to begin the next work sharing construct.
     In all cases, thr->ts.work_share is updated to point to the new
@@ -74,71 +170,34 @@ gomp_work_share_start (bool ordered)
    struct gomp_thread *thr = gomp_thread ();
    struct gomp_team *team = thr->ts.team;
    struct gomp_work_share *ws;
-  unsigned ws_index, ws_gen;
  
    /* Work sharing constructs can be orphaned.  */
    if (team == NULL)
      {
-      ws = gomp_new_work_share (ordered, 1);
+      ws = gomp_malloc (sizeof (*ws));
+      gomp_init_work_share (ws, ordered, 1);
        thr->ts.work_share = ws;
-      thr->ts.static_trip = 0;
-      gomp_mutex_lock (&ws->lock);
-      return true;
+      return ws;
      }
  
-  gomp_mutex_lock (&team->work_share_lock);
-
-  /* This thread is beginning its next generation.  */
-  ws_gen = ++thr->ts.work_share_generation;
-
-  /* If this next generation is not newer than any other generation in
-     the team, then simply reference the existing construct.  */
-  if (ws_gen - team->oldest_live_gen < team->num_live_gen)
+  ws = thr->ts.work_share;
+  thr->ts.last_work_share = ws;
+  ws = gomp_ptrlock_get (&ws->next_ws);
+  if (ws == NULL)
      {
-      ws_index = ws_gen & team->generation_mask;
-      ws = team->work_shares[ws_index];
+      /* This thread encountered a new ws first.  */
+      struct gomp_work_share *ws = alloc_work_share (team);
+      gomp_init_work_share (ws, ordered, team->nthreads);
        thr->ts.work_share = ws;
-      thr->ts.static_trip = 0;
-
-      gomp_mutex_lock (&ws->lock);
-      gomp_mutex_unlock (&team->work_share_lock);
-
-      return false;
+      return true;
      }
-
-  /* Resize the work shares queue if we've run out of space.  */
-  if (team->num_live_gen++ == team->generation_mask)
+  else
      {
-      team->work_shares = gomp_realloc (team->work_shares,
-                                       2 * team->num_live_gen
-                                       * sizeof (*team->work_shares));
-
-      /* Unless oldest_live_gen is zero, the sequence of live elements
-        wraps around the end of the array.  If we do nothing, we break
-        lookup of the existing elements.  Fix that by unwrapping the
-        data from the front to the end.  */
-      if (team->oldest_live_gen > 0)
-       memcpy (team->work_shares + team->num_live_gen,
-               team->work_shares,
-               (team->oldest_live_gen & team->generation_mask)
-               * sizeof (*team->work_shares));
-
-      team->generation_mask = team->generation_mask * 2 + 1;
+      thr->ts.work_share = ws;
+      return false;
      }
-
-  ws_index = ws_gen & team->generation_mask;
-  ws = gomp_new_work_share (ordered, team->nthreads);
-  thr->ts.work_share = ws;
-  thr->ts.static_trip = 0;
-  team->work_shares[ws_index] = ws;
-
-  gomp_mutex_lock (&ws->lock);
-  gomp_mutex_unlock (&team->work_share_lock);
-
-  return true;
  }
  
-
  /* The current thread is done with its current work sharing construct.
     This version does imply a barrier at the end of the work-share.  */
  
@@ -147,36 +206,28 @@ gomp_work_share_end (void)
  {
    struct gomp_thread *thr = gomp_thread ();
    struct gomp_team *team = thr->ts.team;
-  struct gomp_work_share *ws = thr->ts.work_share;
-  bool last;
-
-  thr->ts.work_share = NULL;
+  gomp_barrier_state_t bstate;
  
    /* Work sharing constructs can be orphaned.  */
    if (team == NULL)
      {
-      free_work_share (ws);
+      free_work_share (NULL, thr->ts.work_share);
+      thr->ts.work_share = NULL;
        return;
      }
  
-  last = gomp_barrier_wait_start (&team->barrier);
+  bstate = gomp_barrier_wait_start (&team->barrier);
  
-  if (last)
+  if (gomp_barrier_last_thread (bstate))
      {
-      unsigned ws_index;
-
-      ws_index = thr->ts.work_share_generation & team->generation_mask;
-      team->work_shares[ws_index] = NULL;
-      team->oldest_live_gen++;
-      team->num_live_gen = 0;
-
-      free_work_share (ws);
+      if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
+       free_work_share (team, thr->ts.last_work_share);
      }
  
-  gomp_barrier_wait_end (&team->barrier, last);
+  gomp_team_barrier_wait_end (&team->barrier, bstate);
+  thr->ts.last_work_share = NULL;
  }
  
-
  /* The current thread is done with its current work sharing construct.
     This version does NOT imply a barrier at the end of the work-share.  */
  
@@ -188,15 +239,17 @@ gomp_work_share_end_nowait (void)
    struct gomp_work_share *ws = thr->ts.work_share;
    unsigned completed;
  
-  thr->ts.work_share = NULL;
-
    /* Work sharing constructs can be orphaned.  */
    if (team == NULL)
      {
-      free_work_share (ws);
+      free_work_share (NULL, ws);
+      thr->ts.work_share = NULL;
        return;
      }
  
+  if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
+    return;
+
  #ifdef HAVE_SYNC_BUILTINS
    completed = __sync_add_and_fetch (&ws->threads_completed, 1);
  #else
@@ -206,18 +259,6 @@ gomp_work_share_end_nowait (void)
  #endif
  
    if (completed == team->nthreads)
-    {
-      unsigned ws_index;
-
-      gomp_mutex_lock (&team->work_share_lock);
-
-      ws_index = thr->ts.work_share_generation & team->generation_mask;
-      team->work_shares[ws_index] = NULL;
-      team->oldest_live_gen++;
-      team->num_live_gen--;
-
-      gomp_mutex_unlock (&team->work_share_lock);
-
-      free_work_share (ws);
-    }
+    free_work_share (team, thr->ts.last_work_share);
+  thr->ts.last_work_share = NULL;
  }