libstdc++/stl/pthread_alloc.h

   1 /*
   2  * Copyright (c) 1996
   3  * Silicon Graphics Computer Systems, Inc.
   4  *
   5  * Permission to use, copy, modify, distribute and sell this software
   6  * and its documentation for any purpose is hereby granted without fee,
   7  * provided that the above copyright notice appear in all copies and
   8  * that both that copyright notice and this permission notice appear
   9  * in supporting documentation.  Silicon Graphics makes no
  10  * representations about the suitability of this software for any
  11  * purpose.  It is provided "as is" without express or implied warranty.
  12  */
  13
  14 #ifndef __PTHREAD_ALLOC_H
  15 #define __PTHREAD_ALLOC_H
  16
  17 // Pthread-specific node allocator.
  18 // This is similar to the default allocator, except that free-list
  19 // information is kept separately for each thread, avoiding locking.
  20 // This should be reasonably fast even in the presence of threads.
  21 // The down side is that storage may not be well-utilized.
  22 // It is not an error to allocate memory in thread A and deallocate
  23 // it n thread B.  But this effectively transfers ownership of the memory,
  24 // so that it can only be reallocated by thread B.  Thus this can effectively
  25 // result in a storage leak if it's done on a regular basis.
  26 // It can also result in frequent sharing of
  27 // cache lines among processors, with potentially serious performance
  28 // consequences.
  29
  30
  31 #include <stddef.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34 #include <pthread.h>
  35 #include <alloc.h>
  36 #ifndef __RESTRICT
  37 #  define __RESTRICT
  38 #endif
  39
  40 // Note that this class has nonstatic members.  We instantiate it once
  41 // per thread.
  42 template <bool dummy>
  43 class __pthread_alloc_template {
  44
  45 private:
  46   enum {ALIGN = 8};
  47   enum {MAX_BYTES = 128};  // power of 2
  48   enum {NFREELISTS = MAX_BYTES/ALIGN};
  49
  50   union obj {
  51         union obj * free_list_link;
  52         char client_data[ALIGN];    /* The client sees this.        */
  53   };
  54
  55   // Per instance state
  56   obj* volatile free_list[NFREELISTS];
  57   __pthread_alloc_template<dummy>* next;        // Free list link
  58
  59   static size_t ROUND_UP(size_t bytes) {
  60         return (((bytes) + ALIGN-1) & ~(ALIGN - 1));
  61   }
  62   static size_t FREELIST_INDEX(size_t bytes) {
  63         return (((bytes) + ALIGN-1)/ALIGN - 1);
  64   }
  65
  66   // Returns an object of size n, and optionally adds to size n free list.
  67   void *refill(size_t n);
  68   // Allocates a chunk for nobjs of size size.  nobjs may be reduced
  69   // if it is inconvenient to allocate the requested number.
  70   static char *chunk_alloc(size_t size, int &nobjs);
  71
  72   // Chunk allocation state. And other shared state.
  73   // Protected by chunk_allocator_lock.
  74   static pthread_mutex_t chunk_allocator_lock;
  75   static char *start_free;
  76   static char *end_free;
  77   static size_t heap_size;
  78   static __pthread_alloc_template<dummy>* free_allocators;
  79   static pthread_key_t key;
  80   static bool key_initialized;
  81         // Pthread key under which allocator is stored.
  82         // Allocator instances that are currently unclaimed by any thread.
  83   static void destructor(void *instance);
  84         // Function to be called on thread exit to reclaim allocator
  85         // instance.
  86   static __pthread_alloc_template<dummy> *new_allocator();
  87         // Return a recycled or new allocator instance.
  88   static __pthread_alloc_template<dummy> *get_allocator_instance();
  89         // ensure that the current thread has an associated
  90         // allocator instance.
  91   class lock {
  92       public:
  93         lock () { pthread_mutex_lock(&chunk_allocator_lock); }
  94         ~lock () { pthread_mutex_unlock(&chunk_allocator_lock); }
  95   };
  96   friend class lock;
  97
  98
  99 public:
 100
 101   __pthread_alloc_template() : next(0)
 102   {
 103     memset((void *)free_list, 0, NFREELISTS * sizeof(obj *));
 104   }
 105
 106   /* n must be > 0      */
 107   static void * allocate(size_t n)
 108   {
 109     obj * volatile * my_free_list;
 110     obj * __RESTRICT result;
 111     __pthread_alloc_template<dummy>* a;
 112
 113     if (n > MAX_BYTES) {
 114         return(malloc(n));
 115     }
 116     if (!key_initialized ||
 117         !(a = (__pthread_alloc_template<dummy>*)
 118                 pthread_getspecific(key))) {
 119         a = get_allocator_instance();
 120     }
 121     my_free_list = a -> free_list + FREELIST_INDEX(n);
 122     result = *my_free_list;
 123     if (result == 0) {
 124         void *r = a -> refill(ROUND_UP(n));
 125         return r;
 126     }
 127     *my_free_list = result -> free_list_link;
 128     return (result);
 129   };
 130
 131   /* p may not be 0 */
 132   static void deallocate(void *p, size_t n)
 133   {
 134     obj *q = (obj *)p;
 135     obj * volatile * my_free_list;
 136     __pthread_alloc_template<dummy>* a;
 137
 138     if (n > MAX_BYTES) {
 139         free(p);
 140         return;
 141     }
 142     if (!key_initialized ||
 143         !(a = (__pthread_alloc_template<dummy>*)
 144                 pthread_getspecific(key))) {
 145         a = get_allocator_instance();
 146     }
 147     my_free_list = a->free_list + FREELIST_INDEX(n);
 148     q -> free_list_link = *my_free_list;
 149     *my_free_list = q;
 150   }
 151
 152   static void * reallocate(void *p, size_t old_sz, size_t new_sz);
 153
 154 } ;
 155
 156 typedef __pthread_alloc_template<false> pthread_alloc;
 157
 158
 159 template <bool dummy>
 160 void __pthread_alloc_template<dummy>::destructor(void * instance)
 161 {
 162     __pthread_alloc_template<dummy>* a =
 163         (__pthread_alloc_template<dummy>*)instance;
 164     a -> next = free_allocators;
 165     free_allocators = a;
 166 }
 167
 168 template <bool dummy>
 169 __pthread_alloc_template<dummy>*
 170 __pthread_alloc_template<dummy>::new_allocator()
 171 {
 172     if (0 != free_allocators) {
 173         __pthread_alloc_template<dummy>* result = free_allocators;
 174         free_allocators = free_allocators -> next;
 175         return result;
 176     } else {
 177         return new __pthread_alloc_template<dummy>;
 178     }
 179 }
 180
 181 template <bool dummy>
 182 __pthread_alloc_template<dummy>*
 183 __pthread_alloc_template<dummy>::get_allocator_instance()
 184 {
 185     __pthread_alloc_template<dummy>* result;
 186     if (!key_initialized) {
 187         /*REFERENCED*/
 188         lock lock_instance;
 189         if (!key_initialized) {
 190             if (pthread_key_create(&key, destructor)) {
 191                 abort();  // failed
 192             }
 193             key_initialized = true;
 194         }
 195     }
 196     result = new_allocator();
 197     if (pthread_setspecific(key, result)) abort();
 198     return result;
 199 }
 200
 201 /* We allocate memory in large chunks in order to avoid fragmenting     */
 202 /* the malloc heap too much.                                            */
 203 /* We assume that size is properly aligned.                             */
 204 template <bool dummy>
 205 char *__pthread_alloc_template<dummy>
 206 ::chunk_alloc(size_t size, int &nobjs)
 207 {
 208   {
 209     char * result;
 210     size_t total_bytes;
 211     size_t bytes_left;
 212     /*REFERENCED*/
 213     lock lock_instance;         // Acquire lock for this routine
 214
 215     total_bytes = size * nobjs;
 216     bytes_left = end_free - start_free;
 217     if (bytes_left >= total_bytes) {
 218         result = start_free;
 219         start_free += total_bytes;
 220         return(result);
 221     } else if (bytes_left >= size) {
 222         nobjs = bytes_left/size;
 223         total_bytes = size * nobjs;
 224         result = start_free;
 225         start_free += total_bytes;
 226         return(result);
 227     } else {
 228         size_t bytes_to_get = 2 * total_bytes + ROUND_UP(heap_size >> 4);
 229         // Try to make use of the left-over piece.
 230         if (bytes_left > 0) {
 231             __pthread_alloc_template<dummy>* a =
 232                 (__pthread_alloc_template<dummy>*)pthread_getspecific(key);
 233             obj * volatile * my_free_list =
 234                         a->free_list + FREELIST_INDEX(bytes_left);
 235
 236             ((obj *)start_free) -> free_list_link = *my_free_list;
 237             *my_free_list = (obj *)start_free;
 238         }
 239 #       ifdef _SGI_SOURCE
 240           // Try to get memory that's aligned on something like a
 241           // cache line boundary, so as to avoid parceling out
 242           // parts of the same line to different threads and thus
 243           // possibly different processors.
 244           {
 245             const int cache_line_size = 128;  // probable upper bound
 246             bytes_to_get &= ~(cache_line_size-1);
 247             start_free = (char *)memalign(cache_line_size, bytes_to_get);
 248             if (0 == start_free) {
 249               start_free = (char *)malloc_alloc::allocate(bytes_to_get);
 250             }
 251           }
 252 #       else  /* !SGI_SOURCE */
 253           start_free = (char *)malloc_alloc::allocate(bytes_to_get);
 254 #       endif
 255         heap_size += bytes_to_get;
 256         end_free = start_free + bytes_to_get;
 257     }
 258   }
 259   // lock is released here
 260   return(chunk_alloc(size, nobjs));
 261 }
 262
 263
 264 /* Returns an object of size n, and optionally adds to size n free list.*/
 265 /* We assume that n is properly aligned.                                */
 266 /* We hold the allocation lock.                                         */
 267 template <bool dummy>
 268 void *__pthread_alloc_template<dummy>
 269 ::refill(size_t n)
 270 {
 271     int nobjs = 128;
 272     char * chunk = chunk_alloc(n, nobjs);
 273     obj * volatile * my_free_list;
 274     obj * result;
 275     obj * current_obj, * next_obj;
 276     int i;
 277
 278     if (1 == nobjs)  {
 279         return(chunk);
 280     }
 281     my_free_list = free_list + FREELIST_INDEX(n);
 282
 283     /* Build free list in chunk */
 284       result = (obj *)chunk;
 285       *my_free_list = next_obj = (obj *)(chunk + n);
 286       for (i = 1; ; i++) {
 287         current_obj = next_obj;
 288         next_obj = (obj *)((char *)next_obj + n);
 289         if (nobjs - 1 == i) {
 290             current_obj -> free_list_link = 0;
 291             break;
 292         } else {
 293             current_obj -> free_list_link = next_obj;
 294         }
 295       }
 296     return(result);
 297 }
 298
 299 template <bool dummy>
 300 void *__pthread_alloc_template<dummy>
 301 ::reallocate(void *p, size_t old_sz, size_t new_sz)
 302 {
 303     void * result;
 304     size_t copy_sz;
 305
 306     if (old_sz > MAX_BYTES && new_sz > MAX_BYTES) {
 307         return(realloc(p, new_sz));
 308     }
 309     if (ROUND_UP(old_sz) == ROUND_UP(new_sz)) return(p);
 310     result = allocate(new_sz);
 311     copy_sz = new_sz > old_sz? old_sz : new_sz;
 312     memcpy(result, p, copy_sz);
 313     deallocate(p, old_sz);
 314     return(result);
 315 }
 316
 317 template <bool dummy>
 318 __pthread_alloc_template<dummy> *
 319 __pthread_alloc_template<dummy>::free_allocators = 0;
 320
 321 template <bool dummy>
 322 pthread_key_t __pthread_alloc_template<dummy>::key;
 323
 324 template <bool dummy>
 325 bool __pthread_alloc_template<dummy>::key_initialized = false;
 326
 327 template <bool dummy>
 328 pthread_mutex_t __pthread_alloc_template<dummy>::chunk_allocator_lock
 329 = PTHREAD_MUTEX_INITIALIZER;
 330
 331 template <bool dummy>
 332 char *__pthread_alloc_template<dummy>
 333 ::start_free = 0;
 334
 335 template <bool dummy>
 336 char *__pthread_alloc_template<dummy>
 337 ::end_free = 0;
 338
 339 template <bool dummy>
 340 size_t __pthread_alloc_template<dummy>
 341 ::heap_size = 0;
 342
 343
 344 #endif /* __NODE_ALLOC_H */