libitm/method-gl.cc

   1 /* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
   2    Contributed by Torvald Riegel <triegel@redhat.com>.
   3
   4    This file is part of the GNU Transactional Memory Library (libitm).
   5
   6    Libitm is free software; you can redistribute it and/or modify it
   7    under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
  12    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  13    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  14    more details.
  15
  16    Under Section 7 of GPL version 3, you are granted additional
  17    permissions described in the GCC Runtime Library Exception, version
  18    3.1, as published by the Free Software Foundation.
  19
  20    You should have received a copy of the GNU General Public License and
  21    a copy of the GCC Runtime Library Exception along with this program;
  22    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  23    <http://www.gnu.org/licenses/>.  */
  24
  25 #include "libitm_i.h"
  26
  27 using namespace GTM;
  28
  29 namespace {
  30
  31 // This group consists of all TM methods that synchronize via just a single
  32 // global lock (or ownership record).
  33 struct gl_mg : public method_group
  34 {
  35   static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1;
  36   // We can't use the full bitrange because ~0 in gtm_thread::shared_state has
  37   // special meaning.
  38   static const gtm_word VERSION_MAX = (~(gtm_word)0 >> 1) - 1;
  39   static bool is_locked(gtm_word l) { return l & LOCK_BIT; }
  40   static gtm_word set_locked(gtm_word l) { return l | LOCK_BIT; }
  41   static gtm_word clear_locked(gtm_word l) { return l & ~LOCK_BIT; }
  42
  43   // The global ownership record.
  44   atomic<gtm_word> orec;
  45
  46   virtual void init()
  47   {
  48     // This store is only executed while holding the serial lock, so relaxed
  49     // memory order is sufficient here.
  50     orec.store(0, memory_order_relaxed);
  51   }
  52   virtual void fini() { }
  53 };
  54
  55 // TODO cacheline padding
  56 static gl_mg o_gl_mg;
  57
  58
  59 // The global lock, write-through TM method.
  60 // Acquires the orec eagerly before the first write, and then writes through.
  61 // Reads abort if the global orec's version number changed or if it is locked.
  62 // Currently, writes require undo-logging to prevent deadlock between the
  63 // serial lock and the global orec (writer txn acquires orec, reader txn
  64 // upgrades to serial and waits for all other txns, writer tries to upgrade to
  65 // serial too but cannot, writer cannot abort either, deadlock). We could
  66 // avoid this if the serial lock would allow us to prevent other threads from
  67 // going to serial mode, but this probably is too much additional complexity
  68 // just to optimize this TM method.
  69 // gtm_thread::shared_state is used to store a transaction's current
  70 // snapshot time (or commit time). The serial lock uses ~0 for inactive
  71 // transactions and 0 for active ones. Thus, we always have a meaningful
  72 // timestamp in shared_state that can be used to implement quiescence-based
  73 // privatization safety. This even holds if a writing transaction has the
  74 // lock bit set in its shared_state because this is fine for both the serial
  75 // lock (the value will be smaller than ~0) and privatization safety (we
  76 // validate that no other update transaction comitted before we acquired the
  77 // orec, so we have the most recent timestamp and no other transaction can
  78 // commit until we have committed).
  79 // However, we therefore cannot use this method for a serial transaction
  80 // (because shared_state needs to remain at ~0) and we have to be careful
  81 // when switching to serial mode (see the special handling in trycommit() and
  82 // rollback()).
  83 // ??? This sharing adds some complexity wrt. serial mode. Just use a separate
  84 // state variable?
  85 class gl_wt_dispatch : public abi_dispatch
  86 {
  87 protected:
  88   static void pre_write(const void *addr, size_t len,
  89       gtm_thread *tx = gtm_thr())
  90   {
  91     gtm_word v = tx->shared_state.load(memory_order_relaxed);
  92     if (unlikely(!gl_mg::is_locked(v)))
  93       {
  94         // Check for and handle version number overflow.
  95         if (unlikely(v >= gl_mg::VERSION_MAX))
  96           tx->restart(RESTART_INIT_METHOD_GROUP);
  97
  98         // This validates that we have a consistent snapshot, which is also
  99         // for making privatization safety work (see the class' comments).
 100         // Note that this check here will be performed by the subsequent CAS
 101         // again, so relaxed memory order is fine.
 102         gtm_word now = o_gl_mg.orec.load(memory_order_relaxed);
 103         if (now != v)
 104           tx->restart(RESTART_VALIDATE_WRITE);
 105
 106         // CAS global orec from our snapshot time to the locked state.
 107         // We need acquire memory order here to synchronize with other
 108         // (ownership) releases of the orec.  We do not need acq_rel order
 109         // because whenever another thread reads from this CAS'
 110         // modification, then it will abort anyway and does not rely on
 111         // any further happens-before relation to be established.
 112         // Also note that unlike in ml_wt's increase of the global time
 113         // base (remember that the global orec is used as time base), we do
 114         // not need require memory order here because we do not need to make
 115         // prior orec acquisitions visible to other threads that try to
 116         // extend their snapshot time.
 117         if (!o_gl_mg.orec.compare_exchange_strong (now, gl_mg::set_locked(now),
 118                                                    memory_order_acquire))
 119           tx->restart(RESTART_LOCKED_WRITE);
 120
 121         // We use an explicit fence here to avoid having to use release
 122         // memory order for all subsequent data stores.  This fence will
 123         // synchronize with loads of the data with acquire memory order.  See
 124         // validate() for why this is necessary.
 125         // Adding require memory order to the prior CAS is not sufficient,
 126         // at least according to the Batty et al. formalization of the
 127         // memory model.
 128         atomic_thread_fence(memory_order_release);
 129
 130         // Set shared_state to new value.
 131         tx->shared_state.store(gl_mg::set_locked(now), memory_order_release);
 132       }
 133
 134     tx->undolog.log(addr, len);
 135   }
 136
 137   static void validate(gtm_thread *tx = gtm_thr())
 138   {
 139     // Check that snapshot is consistent.  We expect the previous data load to
 140     // have acquire memory order, or be atomic and followed by an acquire
 141     // fence.
 142     // As a result, the data load will synchronize with the release fence
 143     // issued by the transactions whose data updates the data load has read
 144     // from.  This forces the orec load to read from a visible sequence of side
 145     // effects that starts with the other updating transaction's store that
 146     // acquired the orec and set it to locked.
 147     // We therefore either read a value with the locked bit set (and restart)
 148     // or read an orec value that was written after the data had been written.
 149     // Either will allow us to detect inconsistent reads because it will have
 150     // a higher/different value.
 151     gtm_word l = o_gl_mg.orec.load(memory_order_relaxed);
 152     if (l != tx->shared_state.load(memory_order_relaxed))
 153       tx->restart(RESTART_VALIDATE_READ);
 154   }
 155
 156   template <typename V> static V load(const V* addr, ls_modifier mod)
 157   {
 158     // Read-for-write should be unlikely, but we need to handle it or will
 159     // break later WaW optimizations.
 160     if (unlikely(mod == RfW))
 161       {
 162         pre_write(addr, sizeof(V));
 163         return *addr;
 164       }
 165     if (unlikely(mod == RaW))
 166       return *addr;
 167
 168     // We do not have acquired the orec, so we need to load a value and then
 169     // validate that this was consistent.
 170     // This needs to have acquire memory order (see validate()).
 171     // Alternatively, we can put an acquire fence after the data load but this
 172     // is probably less efficient.
 173     // FIXME We would need an atomic load with acquire memory order here but
 174     // we can't just forge an atomic load for nonatomic data because this
 175     // might not work on all implementations of atomics.  However, we need
 176     // the acquire memory order and we can only establish this if we link
 177     // it to the matching release using a reads-from relation between atomic
 178     // loads.  Also, the compiler is allowed to optimize nonatomic accesses
 179     // differently than atomic accesses (e.g., if the load would be moved to
 180     // after the fence, we potentially don't synchronize properly anymore).
 181     // Instead of the following, just use an ordinary load followed by an
 182     // acquire fence, and hope that this is good enough for now:
 183     // V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire);
 184     V v = *addr;
 185     atomic_thread_fence(memory_order_acquire);
 186     validate();
 187     return v;
 188   }
 189
 190   template <typename V> static void store(V* addr, const V value,
 191       ls_modifier mod)
 192   {
 193     if (likely(mod != WaW))
 194       pre_write(addr, sizeof(V));
 195     // FIXME We would need an atomic store here but we can't just forge an
 196     // atomic load for nonatomic data because this might not work on all
 197     // implementations of atomics.  However, we need this store to link the
 198     // release fence in pre_write() to the acquire operation in load, which
 199     // is only guaranteed if we have a reads-from relation between atomic
 200     // accesses.  Also, the compiler is allowed to optimize nonatomic accesses
 201     // differently than atomic accesses (e.g., if the store would be moved
 202     // to before the release fence in pre_write(), things could go wrong).
 203     // atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed);
 204     *addr = value;
 205   }
 206
 207 public:
 208   static void memtransfer_static(void *dst, const void* src, size_t size,
 209       bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod)
 210   {
 211     gtm_thread *tx = gtm_thr();
 212     if (dst_mod != WaW && dst_mod != NONTXNAL)
 213       pre_write(dst, size, tx);
 214     // We need at least undo-logging for an RfW src region because we might
 215     // subsequently write there with WaW.
 216     if (src_mod == RfW)
 217       pre_write(src, size, tx);
 218
 219     // FIXME We should use atomics here (see store()).  Let's just hope that
 220     // memcpy/memmove are good enough.
 221     if (!may_overlap)
 222       ::memcpy(dst, src, size);
 223     else
 224       ::memmove(dst, src, size);
 225
 226     if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL
 227         && dst_mod != WaW)
 228       validate(tx);
 229   }
 230
 231   static void memset_static(void *dst, int c, size_t size, ls_modifier mod)
 232   {
 233     if (mod != WaW)
 234       pre_write(dst, size);
 235     // FIXME We should use atomics here (see store()).  Let's just hope that
 236     // memset is good enough.
 237     ::memset(dst, c, size);
 238   }
 239
 240   virtual gtm_restart_reason begin_or_restart()
 241   {
 242     // We don't need to do anything for nested transactions.
 243     gtm_thread *tx = gtm_thr();
 244     if (tx->parent_txns.size() > 0)
 245       return NO_RESTART;
 246
 247     // Spin until global orec is not locked.
 248     // TODO This is not necessary if there are no pure loads (check txn props).
 249     unsigned i = 0;
 250     gtm_word v;
 251     while (1)
 252       {
 253         // We need acquire memory order here so that this load will
 254         // synchronize with the store that releases the orec in trycommit().
 255         // In turn, this makes sure that subsequent data loads will read from
 256         // a visible sequence of side effects that starts with the most recent
 257         // store to the data right before the release of the orec.
 258         v = o_gl_mg.orec.load(memory_order_acquire);
 259         if (!gl_mg::is_locked(v))
 260           break;
 261         // TODO need method-specific max spin count
 262         if (++i > gtm_spin_count_var)
 263           return RESTART_VALIDATE_READ;
 264         cpu_relax();
 265       }
 266
 267     // Everything is okay, we have a snapshot time.
 268     // We don't need to enforce any ordering for the following store. There
 269     // are no earlier data loads in this transaction, so the store cannot
 270     // become visible before those (which could lead to the violation of
 271     // privatization safety). The store can become visible after later loads
 272     // but this does not matter because the previous value will have been
 273     // smaller or equal (the serial lock will set shared_state to zero when
 274     // marking the transaction as active, and restarts enforce immediate
 275     // visibility of a smaller or equal value with a barrier (see
 276     // rollback()).
 277     tx->shared_state.store(v, memory_order_relaxed);
 278     return NO_RESTART;
 279   }
 280
 281   virtual bool trycommit(gtm_word& priv_time)
 282   {
 283     gtm_thread* tx = gtm_thr();
 284     gtm_word v = tx->shared_state.load(memory_order_relaxed);
 285
 286     // Special case: If shared_state is ~0, then we have acquired the
 287     // serial lock (tx->state is not updated yet). In this case, the previous
 288     // value isn't available anymore, so grab it from the global lock, which
 289     // must have a meaningful value because no other transactions are active
 290     // anymore. In particular, if it is locked, then we are an update
 291     // transaction, which is all we care about for commit.
 292     if (v == ~(typeof v)0)
 293       v = o_gl_mg.orec.load(memory_order_relaxed);
 294
 295     // Release the orec but do not reset shared_state, which will be modified
 296     // by the serial lock right after our commit anyway. Also, resetting
 297     // shared state here would interfere with the serial lock's use of this
 298     // location.
 299     if (gl_mg::is_locked(v))
 300       {
 301         // Release the global orec, increasing its version number / timestamp.
 302         // See begin_or_restart() for why we need release memory order here.
 303         v = gl_mg::clear_locked(v) + 1;
 304         o_gl_mg.orec.store(v, memory_order_release);
 305
 306         // Need to ensure privatization safety. Every other transaction must
 307         // have a snapshot time that is at least as high as our commit time
 308         // (i.e., our commit must be visible to them).
 309         priv_time = v;
 310       }
 311     return true;
 312   }
 313
 314   virtual void rollback(gtm_transaction_cp *cp)
 315   {
 316     // We don't do anything for rollbacks of nested transactions.
 317     if (cp != 0)
 318       return;
 319
 320     gtm_thread *tx = gtm_thr();
 321     gtm_word v = tx->shared_state.load(memory_order_relaxed);
 322     // Special case: If shared_state is ~0, then we have acquired the
 323     // serial lock (tx->state is not updated yet). In this case, the previous
 324     // value isn't available anymore, so grab it from the global lock, which
 325     // must have a meaningful value because no other transactions are active
 326     // anymore. In particular, if it is locked, then we are an update
 327     // transaction, which is all we care about for rollback.
 328     bool is_serial = v == ~(typeof v)0;
 329     if (is_serial)
 330       v = o_gl_mg.orec.load(memory_order_relaxed);
 331
 332     // Release lock and increment version number to prevent dirty reads.
 333     // Also reset shared state here, so that begin_or_restart() can expect a
 334     // value that is correct wrt. privatization safety.
 335     if (gl_mg::is_locked(v))
 336       {
 337         // Release the global orec, increasing its version number / timestamp.
 338         // See begin_or_restart() for why we need release memory order here.
 339         v = gl_mg::clear_locked(v) + 1;
 340         o_gl_mg.orec.store(v, memory_order_release);
 341
 342         // Also reset the timestamp published via shared_state.
 343         // Special case: Only do this if we are not a serial transaction
 344         // because otherwise, we would interfere with the serial lock.
 345         if (!is_serial)
 346           tx->shared_state.store(v, memory_order_release);
 347
 348         // We need a store-load barrier after this store to prevent it
 349         // from becoming visible after later data loads because the
 350         // previous value of shared_state has been higher than the actual
 351         // snapshot time (the lock bit had been set), which could break
 352         // privatization safety. We do not need a barrier before this
 353         // store (see pre_write() for an explanation).
 354         // ??? What is the precise reasoning in the C++11 model?
 355         atomic_thread_fence(memory_order_seq_cst);
 356       }
 357
 358   }
 359
 360   CREATE_DISPATCH_METHODS(virtual, )
 361   CREATE_DISPATCH_METHODS_MEM()
 362
 363   gl_wt_dispatch() : abi_dispatch(false, true, false, false, &o_gl_mg)
 364   { }
 365 };
 366
 367 } // anon namespace
 368
 369 static const gl_wt_dispatch o_gl_wt_dispatch;
 370
 371 abi_dispatch *
 372 GTM::dispatch_gl_wt ()
 373 {
 374   return const_cast<gl_wt_dispatch *>(&o_gl_wt_dispatch);
 375 }