2 Copyright (C) 2011, 2012
3 Free Software Foundation, Inc.
4 Contributed by Walter Lee (walt@tilera.com)
6 This file is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 This file is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
29 /* This code should be inlined by the compiler, but for now support
30 it as out-of-line methods in libgcc. */
33 pre_atomic_barrier (int model)
35 switch ((enum memmodel) model)
37 case MEMMODEL_RELEASE:
38 case MEMMODEL_ACQ_REL:
39 case MEMMODEL_SEQ_CST:
40 __atomic_thread_fence (model);
49 post_atomic_barrier (int model)
51 switch ((enum memmodel) model)
53 case MEMMODEL_ACQUIRE:
54 case MEMMODEL_ACQ_REL:
55 case MEMMODEL_SEQ_CST:
56 __atomic_thread_fence (model);
64 #define __unused __attribute__((unused))
66 #define __fetch_and_do(proto, type, size, opname, top, bottom) \
70 type rv = arch_atomic_##opname(p, i); \
75 #define __atomic_fetch_and_do(type, size, opname) \
76 __fetch_and_do(type __atomic_fetch_##opname##_##size(type* p, type i, int model), \
78 pre_atomic_barrier(model), \
79 post_atomic_barrier(model)) \
81 __atomic_fetch_and_do (int, 4, add)
82 __atomic_fetch_and_do (int, 4, sub)
83 __atomic_fetch_and_do (int, 4, or)
84 __atomic_fetch_and_do (int, 4, and)
85 __atomic_fetch_and_do (int, 4, xor)
86 __atomic_fetch_and_do (int, 4, nand)
87 __atomic_fetch_and_do (long long, 8, add)
88 __atomic_fetch_and_do (long long, 8, sub)
89 __atomic_fetch_and_do (long long, 8, or)
90 __atomic_fetch_and_do (long long, 8, and)
91 __atomic_fetch_and_do (long long, 8, xor)
92 __atomic_fetch_and_do (long long, 8, nand)
94 #define __sync_fetch_and_do(type, size, opname) \
95 __fetch_and_do(type __sync_fetch_and_##opname##_##size(type* p, type i), \
97 arch_atomic_write_barrier(), \
98 arch_atomic_read_barrier())
100 __sync_fetch_and_do (int, 4, add)
101 __sync_fetch_and_do (int, 4, sub)
102 __sync_fetch_and_do (int, 4, or)
103 __sync_fetch_and_do (int, 4, and)
104 __sync_fetch_and_do (int, 4, xor)
105 __sync_fetch_and_do (int, 4, nand)
106 __sync_fetch_and_do (long long, 8, add)
107 __sync_fetch_and_do (long long, 8, sub)
108 __sync_fetch_and_do (long long, 8, or)
109 __sync_fetch_and_do (long long, 8, and)
110 __sync_fetch_and_do (long long, 8, xor)
111 __sync_fetch_and_do (long long, 8, nand)
113 #define __do_and_fetch(proto, type, size, opname, op, op2, top, bottom) \
117 type rv = op2 (arch_atomic_##opname(p, i) op i); \
122 #define __atomic_do_and_fetch(type, size, opname, op, op2) \
123 __do_and_fetch(type __atomic_##opname##_fetch_##size(type* p, type i, int model), \
124 type, size, opname, op, op2, \
125 pre_atomic_barrier(model), \
126 post_atomic_barrier(model)) \
128 __atomic_do_and_fetch (int, 4, add, +, )
129 __atomic_do_and_fetch (int, 4, sub, -, )
130 __atomic_do_and_fetch (int, 4, or, |, )
131 __atomic_do_and_fetch (int, 4, and, &, )
132 __atomic_do_and_fetch (int, 4, xor, |, )
133 __atomic_do_and_fetch (int, 4, nand, &, ~)
134 __atomic_do_and_fetch (long long, 8, add, +, )
135 __atomic_do_and_fetch (long long, 8, sub, -, )
136 __atomic_do_and_fetch (long long, 8, or, |, )
137 __atomic_do_and_fetch (long long, 8, and, &, )
138 __atomic_do_and_fetch (long long, 8, xor, |, )
139 __atomic_do_and_fetch (long long, 8, nand, &, ~)
141 #define __sync_do_and_fetch(type, size, opname, op, op2) \
142 __do_and_fetch(type __sync_##opname##_and_fetch_##size(type* p, type i), \
143 type, size, opname, op, op2, \
144 arch_atomic_write_barrier(), \
145 arch_atomic_read_barrier()) \
147 __sync_do_and_fetch (int, 4, add, +, )
148 __sync_do_and_fetch (int, 4, sub, -, )
149 __sync_do_and_fetch (int, 4, or, |, )
150 __sync_do_and_fetch (int, 4, and, &, )
151 __sync_do_and_fetch (int, 4, xor, |, )
152 __sync_do_and_fetch (int, 4, nand, &, ~)
153 __sync_do_and_fetch (long long, 8, add, +, )
154 __sync_do_and_fetch (long long, 8, sub, -, )
155 __sync_do_and_fetch (long long, 8, or, |, )
156 __sync_do_and_fetch (long long, 8, and, &, )
157 __sync_do_and_fetch (long long, 8, xor, |, )
158 __sync_do_and_fetch (long long, 8, nand, &, ~)
160 #define __atomic_exchange_methods(type, size) \
162 __atomic_compare_exchange_##size(volatile type* ptr, type* oldvalp, \
163 type newval, bool weak __unused, \
164 int models, int modelf __unused) \
166 type oldval = *oldvalp; \
167 pre_atomic_barrier(models); \
168 type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \
169 post_atomic_barrier(models); \
170 bool success = (retval == oldval); \
176 __atomic_exchange_##size(volatile type* ptr, type val, int model) \
178 pre_atomic_barrier(model); \
179 type retval = arch_atomic_exchange(ptr, val); \
180 post_atomic_barrier(model); \
184 __atomic_exchange_methods (int, 4)
185 __atomic_exchange_methods (long long, 8)
187 #define __sync_exchange_methods(type, size) \
189 __sync_val_compare_and_swap_##size(type* ptr, type oldval, type newval) \
191 arch_atomic_write_barrier(); \
192 type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \
193 arch_atomic_read_barrier(); \
198 __sync_bool_compare_and_swap_##size(type* ptr, type oldval, type newval) \
200 arch_atomic_write_barrier(); \
201 bool retval = arch_atomic_bool_compare_and_exchange(ptr, oldval, newval); \
202 arch_atomic_read_barrier(); \
207 __sync_lock_test_and_set_##size(type* ptr, type val) \
209 type retval = arch_atomic_exchange(ptr, val); \
210 arch_atomic_acquire_barrier_value(retval); \
214 __sync_exchange_methods (int, 4)
215 __sync_exchange_methods (long long, 8)
217 #ifdef __LITTLE_ENDIAN__
218 #define BIT_OFFSET(n, type) ((n) * 8)
220 #define BIT_OFFSET(n, type) ((4 - sizeof(type) - (n)) * 8)
223 /* Subword methods require the same approach for both TILEPro and
224 TILE-Gx. We load the background data for the word, insert the
225 desired subword piece, then compare-and-exchange it into place. */
226 #define u8 unsigned char
227 #define u16 unsigned short
229 #define __subword_cmpxchg_body(type, size, ptr, guess, val) \
231 unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \
232 const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type); \
233 const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \
234 const unsigned int bgmask = ~(valmask << shift); \
235 unsigned int oldword = *p; \
236 type oldval = (oldword >> shift) & valmask; \
237 if (__builtin_expect((oldval == guess), 1)) { \
238 unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
239 oldword = arch_atomic_val_compare_and_exchange(p, oldword, word); \
240 oldval = (oldword >> shift) & valmask; \
245 #define __atomic_subword_cmpxchg(type, size) \
248 __atomic_compare_exchange_##size(volatile type* ptr, type* guess_ptr, \
249 type val, bool weak __unused, int models, \
250 int modelf __unused) \
252 pre_atomic_barrier(models); \
253 type guess = *guess_ptr; \
254 type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val); \
255 post_atomic_barrier(models); \
256 bool success = (oldval == guess); \
257 *guess_ptr = oldval; \
261 __atomic_subword_cmpxchg (u8, 1)
262 __atomic_subword_cmpxchg (u16, 2)
264 #define __sync_subword_cmpxchg(type, size) \
267 __sync_val_compare_and_swap_##size(type* ptr, type guess, type val) \
269 arch_atomic_write_barrier(); \
270 type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val); \
271 arch_atomic_read_barrier(); \
276 __sync_bool_compare_and_swap_##size(type* ptr, type guess, type val) \
278 type oldval = __sync_val_compare_and_swap_##size(ptr, guess, val); \
279 return oldval == guess; \
282 __sync_subword_cmpxchg (u8, 1)
283 __sync_subword_cmpxchg (u16, 2)
285 /* For the atomic-update subword methods, we use the same approach as
286 above, but we retry until we succeed if the compare-and-exchange
288 #define __subword(type, proto, top, expr, bottom) \
292 unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \
293 const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type); \
294 const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \
295 const unsigned int bgmask = ~(valmask << shift); \
296 unsigned int oldword, xword = *p; \
300 oldval = (oldword >> shift) & valmask; \
302 unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
303 xword = arch_atomic_val_compare_and_exchange(p, oldword, word); \
304 } while (__builtin_expect(xword != oldword, 0)); \
308 #define __atomic_subword_fetch(type, funcname, expr, retval) \
310 type __atomic_ ## funcname(volatile type *ptr, type i, int model), \
311 pre_atomic_barrier(model);, \
313 post_atomic_barrier(model); return retval;)
315 __atomic_subword_fetch (u8, fetch_add_1, oldval + i, oldval)
316 __atomic_subword_fetch (u8, fetch_sub_1, oldval - i, oldval)
317 __atomic_subword_fetch (u8, fetch_or_1, oldval | i, oldval)
318 __atomic_subword_fetch (u8, fetch_and_1, oldval & i, oldval)
319 __atomic_subword_fetch (u8, fetch_xor_1, oldval ^ i, oldval)
320 __atomic_subword_fetch (u8, fetch_nand_1, ~(oldval & i), oldval)
322 __atomic_subword_fetch (u16, fetch_add_2, oldval + i, oldval)
323 __atomic_subword_fetch (u16, fetch_sub_2, oldval - i, oldval)
324 __atomic_subword_fetch (u16, fetch_or_2, oldval | i, oldval)
325 __atomic_subword_fetch (u16, fetch_and_2, oldval & i, oldval)
326 __atomic_subword_fetch (u16, fetch_xor_2, oldval ^ i, oldval)
327 __atomic_subword_fetch (u16, fetch_nand_2, ~(oldval & i), oldval)
329 __atomic_subword_fetch (u8, add_fetch_1, oldval + i, val)
330 __atomic_subword_fetch (u8, sub_fetch_1, oldval - i, val)
331 __atomic_subword_fetch (u8, or_fetch_1, oldval | i, val)
332 __atomic_subword_fetch (u8, and_fetch_1, oldval & i, val)
333 __atomic_subword_fetch (u8, xor_fetch_1, oldval ^ i, val)
334 __atomic_subword_fetch (u8, nand_fetch_1, ~(oldval & i), val)
336 __atomic_subword_fetch (u16, add_fetch_2, oldval + i, val)
337 __atomic_subword_fetch (u16, sub_fetch_2, oldval - i, val)
338 __atomic_subword_fetch (u16, or_fetch_2, oldval | i, val)
339 __atomic_subword_fetch (u16, and_fetch_2, oldval & i, val)
340 __atomic_subword_fetch (u16, xor_fetch_2, oldval ^ i, val)
341 __atomic_subword_fetch (u16, nand_fetch_2, ~(oldval & i), val)
343 #define __sync_subword_fetch(type, funcname, expr, retval) \
345 type __sync_ ## funcname(type *ptr, type i), \
346 arch_atomic_read_barrier();, \
348 arch_atomic_write_barrier(); return retval;)
350 __sync_subword_fetch (u8, fetch_and_add_1, oldval + i, oldval)
351 __sync_subword_fetch (u8, fetch_and_sub_1, oldval - i, oldval)
352 __sync_subword_fetch (u8, fetch_and_or_1, oldval | i, oldval)
353 __sync_subword_fetch (u8, fetch_and_and_1, oldval & i, oldval)
354 __sync_subword_fetch (u8, fetch_and_xor_1, oldval ^ i, oldval)
355 __sync_subword_fetch (u8, fetch_and_nand_1, ~(oldval & i), oldval)
357 __sync_subword_fetch (u16, fetch_and_add_2, oldval + i, oldval)
358 __sync_subword_fetch (u16, fetch_and_sub_2, oldval - i, oldval)
359 __sync_subword_fetch (u16, fetch_and_or_2, oldval | i, oldval)
360 __sync_subword_fetch (u16, fetch_and_and_2, oldval & i, oldval)
361 __sync_subword_fetch (u16, fetch_and_xor_2, oldval ^ i, oldval)
362 __sync_subword_fetch (u16, fetch_and_nand_2, ~(oldval & i), oldval)
364 __sync_subword_fetch (u8, add_and_fetch_1, oldval + i, val)
365 __sync_subword_fetch (u8, sub_and_fetch_1, oldval - i, val)
366 __sync_subword_fetch (u8, or_and_fetch_1, oldval | i, val)
367 __sync_subword_fetch (u8, and_and_fetch_1, oldval & i, val)
368 __sync_subword_fetch (u8, xor_and_fetch_1, oldval ^ i, val)
369 __sync_subword_fetch (u8, nand_and_fetch_1, ~(oldval & i), val)
371 __sync_subword_fetch (u16, add_and_fetch_2, oldval + i, val)
372 __sync_subword_fetch (u16, sub_and_fetch_2, oldval - i, val)
373 __sync_subword_fetch (u16, or_and_fetch_2, oldval | i, val)
374 __sync_subword_fetch (u16, and_and_fetch_2, oldval & i, val)
375 __sync_subword_fetch (u16, xor_and_fetch_2, oldval ^ i, val)
376 __sync_subword_fetch (u16, nand_and_fetch_2, ~(oldval & i), val)
378 #define __atomic_subword_lock(type, size) \
380 type __atomic_exchange_##size(volatile type* ptr, type nval, int model), \
381 pre_atomic_barrier(model);, \
383 post_atomic_barrier(model); return oldval;)
385 __atomic_subword_lock (u8, 1)
386 __atomic_subword_lock (u16, 2)
388 #define __sync_subword_lock(type, size) \
390 type __sync_lock_test_and_set_##size(type* ptr, type nval), \
393 arch_atomic_acquire_barrier_value(oldval); return oldval;)
395 __sync_subword_lock (u8, 1)
396 __sync_subword_lock (u16, 2)