mirror of
https://github.com/phabrics/Run-Sun3-SunOS-4.1.1.git
synced 2026-04-29 11:02:59 -04:00
863 lines
44 KiB
C
863 lines
44 KiB
C
/* automatically generated by memory-auto.sh, do not edit! */
|
|
|
|
/*
|
|
* Copyright (c) 2005, 2006 Matt Fredette
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by Matt Fredette.
|
|
* 4. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
|
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
_TME_RCSID("$Id: memory-auto.sh,v 1.2 2010/02/15 15:16:28 fredette Exp $");
|
|
|
|
/* macros: */
|
|
|
|
/* the plain partial read internal macro: */
|
|
#define _tme_memory_read(type_whole, type_part, mem, offset) \
|
|
(((type_whole) \
|
|
*((_tme_const type_part *) \
|
|
(_tme_cast_pointer_const(tme_uint8_t *, type_whole *, mem) \
|
|
+ (offset)))) \
|
|
<< (8 * (TME_ENDIAN_NATIVE == TME_ENDIAN_BIG \
|
|
? (sizeof(type_whole) \
|
|
- ((offset) + sizeof(type_part))) \
|
|
: (offset))))
|
|
|
|
/* the plain partial write internal macro: */
|
|
#define _tme_memory_write(type_whole, type_part, mem, offset, x) \
|
|
do { \
|
|
*((type_part *) \
|
|
(_tme_cast_pointer(tme_uint8_t *, type_whole *, mem) \
|
|
+ (offset))) \
|
|
= (type_part) \
|
|
(((type_whole) (x)) \
|
|
>> (8 * (TME_ENDIAN_NATIVE == TME_ENDIAN_BIG \
|
|
? (sizeof(type_whole) \
|
|
- ((offset) + sizeof(type_part))) \
|
|
: (offset)))); \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* this tests bits in a memory address: */
|
|
#define _tme_memory_address_test(mem, bits, align_min) \
|
|
(((bits) & ~((align_min - 1))) & ((unsigned long) (mem)))
|
|
|
|
/* this returns a mask of all-bits-one in given type: */
|
|
#define _tme_memory_type_mask(type, shift) \
|
|
((type) ((((type) 0) - ((type) 1)) shift))
|
|
|
|
|
|
/* the bus 16-bit read slow function: */
|
|
tme_uint16_t tme_memory_bus_read16 _TME_P((_tme_const tme_shared tme_uint16_t *, tme_rwlock_t *, unsigned int, unsigned int));
|
|
|
|
/* the bus 16-bit write slow function: */
|
|
void tme_memory_bus_write16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int, unsigned int));
|
|
|
|
/* the bus 32-bit read slow function: */
|
|
tme_uint32_t tme_memory_bus_read32 _TME_P((_tme_const tme_shared tme_uint32_t *, tme_rwlock_t *, unsigned int, unsigned int));
|
|
|
|
/* the bus 32-bit write slow function: */
|
|
void tme_memory_bus_write32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int, unsigned int));
|
|
|
|
#ifdef TME_HAVE_INT64_T
|
|
|
|
/* the bus 64-bit read slow function: */
|
|
tme_uint64_t tme_memory_bus_read64 _TME_P((_tme_const tme_shared tme_uint64_t *, tme_rwlock_t *, unsigned int, unsigned int));
|
|
|
|
/* the bus 64-bit write slow function: */
|
|
void tme_memory_bus_write64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int, unsigned int));
|
|
|
|
#endif /* TME_HAVE_INT64_T */
|
|
|
|
/* the bus read buffer function and default macro implementation: */
|
|
void tme_memory_bus_read_buffer _TME_P((_tme_const tme_shared tme_uint8_t *, tme_uint8_t *, unsigned long, tme_rwlock_t *, unsigned int, unsigned int));
|
|
#define tme_memory_bus_read_buffer(mem, buffer, count, rwlock, align_min, bus_boundary) \
|
|
do { \
|
|
if (TME_THREADS_COOPERATIVE) { \
|
|
memcpy((buffer), ((_tme_const tme_uint8_t *) (mem)), (count)); \
|
|
} \
|
|
else { \
|
|
tme_memory_bus_read_buffer(((_tme_const tme_shared tme_uint8_t *) (mem)), ((tme_uint8_t *) _tme_audit_pointer(buffer)), (count), (rwlock), (align_min), (bus_boundary)); \
|
|
} \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* the bus write buffer function and default macro implementation: */
|
|
void tme_memory_bus_write_buffer _TME_P((tme_shared tme_uint8_t *, _tme_const tme_uint8_t *, unsigned long, tme_rwlock_t *, unsigned int, unsigned int));
|
|
#define tme_memory_bus_write_buffer(mem, buffer, count, rwlock, align_min, bus_boundary) \
|
|
do { \
|
|
if (TME_THREADS_COOPERATIVE) { \
|
|
memcpy((tme_uint8_t *) (mem), (buffer), (count)); \
|
|
} \
|
|
else { \
|
|
tme_memory_bus_write_buffer(((tme_shared tme_uint8_t *) _tme_audit_pointer_shared(mem)), ((_tme_const tme_uint8_t *) _tme_audit_pointer_const(buffer)), (count), (rwlock), (align_min), (bus_boundary)); \
|
|
} \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* the 8-bit atomic operations: */
|
|
tme_uint8_t tme_memory_atomic_add8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint8_t tme_memory_atomic_sub8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint8_t tme_memory_atomic_mul8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint8_t tme_memory_atomic_div8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint8_t tme_memory_atomic_and8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint8_t tme_memory_atomic_or8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint8_t tme_memory_atomic_xor8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint8_t tme_memory_atomic_not8 _TME_P((tme_shared tme_uint8_t *, tme_rwlock_t *, unsigned int));
|
|
tme_uint8_t tme_memory_atomic_neg8 _TME_P((tme_shared tme_uint8_t *, tme_rwlock_t *, unsigned int));
|
|
tme_uint8_t tme_memory_atomic_xchg8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint8_t tme_memory_atomic_cx8 _TME_P((tme_shared tme_uint8_t *, tme_uint8_t, tme_uint8_t, tme_rwlock_t *, unsigned int));
|
|
|
|
/* the default 16-bit memory plain read macro: */
|
|
#define tme_memory_read16(mem, align_min) \
|
|
( \
|
|
/* if we know at compile time that the memory is aligned \
|
|
enough to read directly, do the single direct read. \
|
|
\
|
|
otherwise, if we know at compile time that the memory \
|
|
is less aligned than the smallest acceptable parts size, \
|
|
test if the memory is aligned enough to read directly, \
|
|
and do the single direct read if it is: */ \
|
|
(__tme_predict_true((_TME_ALIGNOF_INT16_T == 1 \
|
|
|| (align_min) >= _TME_ALIGNOF_INT16_T) \
|
|
|| ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint16_t) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT16_T - 1, align_min) == 0))) \
|
|
? \
|
|
_tme_memory_read(tme_uint16_t, tme_uint16_t, mem, 0) \
|
|
: \
|
|
(_tme_memory_read(tme_uint16_t, tme_uint8_t, mem, (0 / 8)) \
|
|
| _tme_memory_read(tme_uint16_t, tme_uint8_t, mem, (8 / 8))) \
|
|
)
|
|
|
|
/* the default 16-bit memory plain write macro: */
|
|
#define tme_memory_write16(mem, x, align_min) \
|
|
do { \
|
|
if \
|
|
/* if we know at compile time that the memory is aligned \
|
|
enough to write directly, do the single direct write. \
|
|
\
|
|
otherwise, if we know at compile time that the memory \
|
|
is less aligned than the smallest acceptable parts size, \
|
|
test if the memory is aligned enough to write directly, \
|
|
and do the single direct write if it is: */ \
|
|
(__tme_predict_true((_TME_ALIGNOF_INT16_T == 1 \
|
|
|| (align_min) >= _TME_ALIGNOF_INT16_T) \
|
|
|| ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint16_t) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT16_T - 1, align_min) == 0))) \
|
|
{ \
|
|
_tme_memory_write(tme_uint16_t, tme_uint16_t, mem, 0, x); \
|
|
} \
|
|
else \
|
|
{ \
|
|
_tme_memory_write(tme_uint16_t, tme_uint8_t, mem, (0 / 8), x); \
|
|
_tme_memory_write(tme_uint16_t, tme_uint8_t, mem, (8 / 8), x); \
|
|
} \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* the default 16-bit memory atomic read macro: */
|
|
#define tme_memory_atomic_read16(mem, lock, align_min) \
|
|
( \
|
|
/* if threads are cooperative, do a plain read: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
? \
|
|
tme_memory_read16((_tme_const tme_uint16_t *) _tme_audit_type(mem, tme_uint16_t *), align_min) \
|
|
/* otherwise, if we aren't locking for all memory accesses, and we can \
|
|
make direct 16-bit accesses, and this memory is aligned \
|
|
enough to make a single direct atomic access, do the single \
|
|
direct atomic read: */ \
|
|
: \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) != 0 \
|
|
&& _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) - 1, align_min) == 0)) \
|
|
? \
|
|
(*_tme_audit_type(mem, tme_uint16_t *)) \
|
|
/* otherwise, we must do a slow indirect atomic read: */ \
|
|
: \
|
|
tme_memory_atomic_read16(mem, lock, align_min) \
|
|
)
|
|
|
|
/* the default 16-bit memory atomic write macro: */
|
|
#define tme_memory_atomic_write16(mem, x, lock, align_min) \
|
|
do { \
|
|
if \
|
|
/* if threads are cooperative, do a plain write: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
{ \
|
|
tme_memory_write16((tme_uint16_t *) _tme_cast_pointer_shared(tme_uint16_t *, tme_uint16_t *, mem), x, align_min); \
|
|
/* otherwise, if we aren't locking for all memory accesses, and we can \
|
|
make direct 16-bit accesses, and this memory is aligned \
|
|
enough to make a single direct atomic access, do the single \
|
|
direct atomic write: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) != 0 \
|
|
&& _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) - 1, align_min) == 0)) \
|
|
{ \
|
|
(*_tme_audit_type(mem, tme_uint16_t *)) \
|
|
= (x); \
|
|
/* otherwise, we must do a slow indirect atomic write: */ \
|
|
} \
|
|
else \
|
|
{ \
|
|
tme_memory_atomic_write16(mem, x, lock, align_min); \
|
|
} \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* the default 16-bit memory bus read macro: */
|
|
#define tme_memory_bus_read16(mem, lock, align_min, bus_boundary) \
|
|
( \
|
|
/* if threads are cooperative, do a plain read: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
? \
|
|
tme_memory_read16((_tme_const tme_uint16_t *) _tme_audit_type(mem, tme_uint16_t *), align_min) \
|
|
/* otherwise, if we aren't locking for all memory accesses, the \
|
|
host supports misaligned 16-bit accesses, the host's bus \
|
|
boundary is greater than or equal to the emulated bus \
|
|
boundary, and this memory is aligned enough, do a single \
|
|
direct bus read: */ \
|
|
: \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& _TME_ALIGNOF_INT16_T < sizeof(tme_uint16_t) \
|
|
&& TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT16_T - 1, align_min) == 0)) \
|
|
? \
|
|
(*_tme_audit_type(mem, tme_uint16_t *)) \
|
|
/* otherwise, if we're locking for all memory accesses, or \
|
|
if this memory must cross at least one host bus boundary \
|
|
and the host bus boundary is less than the emulated bus \
|
|
boundary, do a slow indirect atomic read: */ \
|
|
: \
|
|
(__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \
|
|
|| (sizeof(tme_uint16_t) > TME_MEMORY_BUS_BOUNDARY \
|
|
&& TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \
|
|
? \
|
|
tme_memory_atomic_read16(mem, lock, align_min) \
|
|
/* otherwise, if the memory is not larger than the emulated \
|
|
bus boundary, or if size-alignment would mean an atomic \
|
|
host access and it is size-aligned, do a single atomic \
|
|
read, which may be direct or slow: */ \
|
|
: \
|
|
(__tme_predict_true((sizeof(tme_uint16_t) <= (bus_boundary) \
|
|
|| (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) <= sizeof(tme_uint16_t))) \
|
|
&& _tme_memory_address_test(mem, sizeof(tme_uint16_t) - 1, align_min) == 0)) \
|
|
? \
|
|
tme_memory_atomic_read16(mem, lock, sizeof(tme_uint16_t)) \
|
|
/* otherwise, we must do a slow bus read: */ \
|
|
: \
|
|
tme_memory_bus_read16(mem, lock, align_min, bus_boundary) \
|
|
)
|
|
|
|
/* the default 16-bit memory bus write macro: */
|
|
#define tme_memory_bus_write16(mem, x, lock, align_min, bus_boundary) \
|
|
do { \
|
|
if \
|
|
/* if threads are cooperative, do a plain write: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
{ \
|
|
tme_memory_write16((tme_uint16_t *) _tme_cast_pointer_shared(tme_uint16_t *, tme_uint16_t *, mem), x, align_min); \
|
|
/* otherwise, if we aren't locking for all memory accesses, the \
|
|
host supports misaligned 16-bit accesses, the host's bus \
|
|
boundary is greater than or equal to the emulated bus \
|
|
boundary, and this memory is aligned enough, do a single \
|
|
direct bus write: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& _TME_ALIGNOF_INT16_T < sizeof(tme_uint16_t) \
|
|
&& TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT16_T - 1, align_min) == 0)) \
|
|
{ \
|
|
(*_tme_audit_type(mem, tme_uint16_t *)) \
|
|
= (x); \
|
|
/* otherwise, if we're locking for all memory accesses, or \
|
|
if this memory must cross at least one host bus boundary \
|
|
and the host bus boundary is less than the emulated bus \
|
|
boundary, do a slow indirect atomic write: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \
|
|
|| (sizeof(tme_uint16_t) > TME_MEMORY_BUS_BOUNDARY \
|
|
&& TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \
|
|
{ \
|
|
tme_memory_atomic_write16(mem, x, lock, align_min); \
|
|
/* otherwise, if the memory is not larger than the emulated \
|
|
bus boundary, or if size-alignment would mean an atomic \
|
|
host access and it is size-aligned, do a single atomic \
|
|
write, which may be direct or slow: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_true((sizeof(tme_uint16_t) <= (bus_boundary) \
|
|
|| (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint16_t) <= sizeof(tme_uint16_t))) \
|
|
&& _tme_memory_address_test(mem, sizeof(tme_uint16_t) - 1, align_min) == 0)) \
|
|
{ \
|
|
tme_memory_atomic_write16(mem, x, lock, sizeof(tme_uint16_t)); \
|
|
/* otherwise, we must do a slow bus write: */ \
|
|
} \
|
|
else \
|
|
{ \
|
|
tme_memory_bus_write16(mem, x, lock, align_min, bus_boundary); \
|
|
} \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* the 16-bit atomic operations: */
|
|
tme_uint16_t tme_memory_atomic_add16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint16_t tme_memory_atomic_sub16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint16_t tme_memory_atomic_mul16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint16_t tme_memory_atomic_div16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint16_t tme_memory_atomic_and16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint16_t tme_memory_atomic_or16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint16_t tme_memory_atomic_xor16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint16_t tme_memory_atomic_not16 _TME_P((tme_shared tme_uint16_t *, tme_rwlock_t *, unsigned int));
|
|
tme_uint16_t tme_memory_atomic_neg16 _TME_P((tme_shared tme_uint16_t *, tme_rwlock_t *, unsigned int));
|
|
tme_uint16_t tme_memory_atomic_xchg16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint16_t tme_memory_atomic_cx16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_uint16_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint16_t tme_memory_atomic_read16 _TME_P((_tme_const tme_shared tme_uint16_t *, tme_rwlock_t *, unsigned int));
|
|
void tme_memory_atomic_write16 _TME_P((tme_shared tme_uint16_t *, tme_uint16_t, tme_rwlock_t *, unsigned int));
|
|
|
|
/* the default 32-bit memory plain read macro: */
|
|
#define tme_memory_read32(mem, align_min) \
|
|
( \
|
|
/* if we know at compile time that the memory is aligned \
|
|
enough to read directly, do the single direct read. \
|
|
\
|
|
otherwise, if we know at compile time that the memory \
|
|
is less aligned than the smallest acceptable parts size, \
|
|
test if the memory is aligned enough to read directly, \
|
|
and do the single direct read if it is: */ \
|
|
(__tme_predict_true((_TME_ALIGNOF_INT32_T == 1 \
|
|
|| (align_min) >= _TME_ALIGNOF_INT32_T) \
|
|
|| ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint32_t) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT32_T - 1, align_min) == 0))) \
|
|
? \
|
|
_tme_memory_read(tme_uint32_t, tme_uint32_t, mem, 0) \
|
|
: \
|
|
((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint32_t) <= sizeof(tme_uint8_t)) \
|
|
&& ((align_min) <= sizeof(tme_uint8_t))) \
|
|
? \
|
|
(_tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (0 / 8)) \
|
|
| _tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (8 / 8)) \
|
|
| _tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (16 / 8)) \
|
|
| _tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (24 / 8))) \
|
|
: \
|
|
(_tme_memory_address_test(mem, sizeof(tme_uint8_t), align_min) != 0) \
|
|
? \
|
|
(_tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (0 / 8)) \
|
|
| _tme_memory_read(tme_uint32_t, tme_uint16_t, mem, (8 / 8)) \
|
|
| _tme_memory_read(tme_uint32_t, tme_uint8_t, mem, (24 / 8))) \
|
|
: \
|
|
(_tme_memory_read(tme_uint32_t, tme_uint16_t, mem, (0 / 8)) \
|
|
| _tme_memory_read(tme_uint32_t, tme_uint16_t, mem, (16 / 8))) \
|
|
)
|
|
|
|
/* the default 32-bit memory plain write macro: */
|
|
#define tme_memory_write32(mem, x, align_min) \
|
|
do { \
|
|
if \
|
|
/* if we know at compile time that the memory is aligned \
|
|
enough to write directly, do the single direct write. \
|
|
\
|
|
otherwise, if we know at compile time that the memory \
|
|
is less aligned than the smallest acceptable parts size, \
|
|
test if the memory is aligned enough to write directly, \
|
|
and do the single direct write if it is: */ \
|
|
(__tme_predict_true((_TME_ALIGNOF_INT32_T == 1 \
|
|
|| (align_min) >= _TME_ALIGNOF_INT32_T) \
|
|
|| ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint32_t) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT32_T - 1, align_min) == 0))) \
|
|
{ \
|
|
_tme_memory_write(tme_uint32_t, tme_uint32_t, mem, 0, x); \
|
|
} \
|
|
else if \
|
|
((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint32_t) <= sizeof(tme_uint8_t)) \
|
|
&& ((align_min) <= sizeof(tme_uint8_t))) \
|
|
{ \
|
|
_tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (0 / 8), x); \
|
|
_tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (8 / 8), x); \
|
|
_tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (16 / 8), x); \
|
|
_tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (24 / 8), x); \
|
|
} \
|
|
else if \
|
|
(_tme_memory_address_test(mem, sizeof(tme_uint8_t), align_min) != 0) \
|
|
{ \
|
|
_tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (0 / 8), x); \
|
|
_tme_memory_write(tme_uint32_t, tme_uint16_t, mem, (8 / 8), x); \
|
|
_tme_memory_write(tme_uint32_t, tme_uint8_t, mem, (24 / 8), x); \
|
|
} \
|
|
else \
|
|
{ \
|
|
_tme_memory_write(tme_uint32_t, tme_uint16_t, mem, (0 / 8), x); \
|
|
_tme_memory_write(tme_uint32_t, tme_uint16_t, mem, (16 / 8), x); \
|
|
} \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* the default 32-bit memory atomic read macro: */
|
|
#define tme_memory_atomic_read32(mem, lock, align_min) \
|
|
( \
|
|
/* if threads are cooperative, do a plain read: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
? \
|
|
tme_memory_read32((_tme_const tme_uint32_t *) _tme_audit_type(mem, tme_uint32_t *), align_min) \
|
|
/* otherwise, if we aren't locking for all memory accesses, and we can \
|
|
make direct 32-bit accesses, and this memory is aligned \
|
|
enough to make a single direct atomic access, do the single \
|
|
direct atomic read: */ \
|
|
: \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) != 0 \
|
|
&& _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) - 1, align_min) == 0)) \
|
|
? \
|
|
(*_tme_audit_type(mem, tme_uint32_t *)) \
|
|
/* otherwise, we must do a slow indirect atomic read: */ \
|
|
: \
|
|
tme_memory_atomic_read32(mem, lock, align_min) \
|
|
)
|
|
|
|
/* the default 32-bit memory atomic write macro: */
|
|
#define tme_memory_atomic_write32(mem, x, lock, align_min) \
|
|
do { \
|
|
if \
|
|
/* if threads are cooperative, do a plain write: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
{ \
|
|
tme_memory_write32((tme_uint32_t *) _tme_cast_pointer_shared(tme_uint32_t *, tme_uint32_t *, mem), x, align_min); \
|
|
/* otherwise, if we aren't locking for all memory accesses, and we can \
|
|
make direct 32-bit accesses, and this memory is aligned \
|
|
enough to make a single direct atomic access, do the single \
|
|
direct atomic write: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) != 0 \
|
|
&& _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) - 1, align_min) == 0)) \
|
|
{ \
|
|
(*_tme_audit_type(mem, tme_uint32_t *)) \
|
|
= (x); \
|
|
/* otherwise, we must do a slow indirect atomic write: */ \
|
|
} \
|
|
else \
|
|
{ \
|
|
tme_memory_atomic_write32(mem, x, lock, align_min); \
|
|
} \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* the default 32-bit memory bus read macro: */
|
|
#define tme_memory_bus_read32(mem, lock, align_min, bus_boundary) \
|
|
( \
|
|
/* if threads are cooperative, do a plain read: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
? \
|
|
tme_memory_read32((_tme_const tme_uint32_t *) _tme_audit_type(mem, tme_uint32_t *), align_min) \
|
|
/* otherwise, if we aren't locking for all memory accesses, the \
|
|
host supports misaligned 32-bit accesses, the host's bus \
|
|
boundary is greater than or equal to the emulated bus \
|
|
boundary, and this memory is aligned enough, do a single \
|
|
direct bus read: */ \
|
|
: \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& _TME_ALIGNOF_INT32_T < sizeof(tme_uint32_t) \
|
|
&& TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT32_T - 1, align_min) == 0)) \
|
|
? \
|
|
(*_tme_audit_type(mem, tme_uint32_t *)) \
|
|
/* otherwise, if we're locking for all memory accesses, or \
|
|
if this memory must cross at least one host bus boundary \
|
|
and the host bus boundary is less than the emulated bus \
|
|
boundary, do a slow indirect atomic read: */ \
|
|
: \
|
|
(__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \
|
|
|| (sizeof(tme_uint32_t) > TME_MEMORY_BUS_BOUNDARY \
|
|
&& TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \
|
|
? \
|
|
tme_memory_atomic_read32(mem, lock, align_min) \
|
|
/* otherwise, if the memory is not larger than the emulated \
|
|
bus boundary, or if size-alignment would mean an atomic \
|
|
host access and it is size-aligned, do a single atomic \
|
|
read, which may be direct or slow: */ \
|
|
: \
|
|
(__tme_predict_true((sizeof(tme_uint32_t) <= (bus_boundary) \
|
|
|| (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) <= sizeof(tme_uint32_t))) \
|
|
&& _tme_memory_address_test(mem, sizeof(tme_uint32_t) - 1, align_min) == 0)) \
|
|
? \
|
|
tme_memory_atomic_read32(mem, lock, sizeof(tme_uint32_t)) \
|
|
/* otherwise, we must do a slow bus read: */ \
|
|
: \
|
|
tme_memory_bus_read32(mem, lock, align_min, bus_boundary) \
|
|
)
|
|
|
|
/* the default 32-bit memory bus write macro: */
|
|
#define tme_memory_bus_write32(mem, x, lock, align_min, bus_boundary) \
|
|
do { \
|
|
if \
|
|
/* if threads are cooperative, do a plain write: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
{ \
|
|
tme_memory_write32((tme_uint32_t *) _tme_cast_pointer_shared(tme_uint32_t *, tme_uint32_t *, mem), x, align_min); \
|
|
/* otherwise, if we aren't locking for all memory accesses, the \
|
|
host supports misaligned 32-bit accesses, the host's bus \
|
|
boundary is greater than or equal to the emulated bus \
|
|
boundary, and this memory is aligned enough, do a single \
|
|
direct bus write: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& _TME_ALIGNOF_INT32_T < sizeof(tme_uint32_t) \
|
|
&& TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT32_T - 1, align_min) == 0)) \
|
|
{ \
|
|
(*_tme_audit_type(mem, tme_uint32_t *)) \
|
|
= (x); \
|
|
/* otherwise, if we're locking for all memory accesses, or \
|
|
if this memory must cross at least one host bus boundary \
|
|
and the host bus boundary is less than the emulated bus \
|
|
boundary, do a slow indirect atomic write: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \
|
|
|| (sizeof(tme_uint32_t) > TME_MEMORY_BUS_BOUNDARY \
|
|
&& TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \
|
|
{ \
|
|
tme_memory_atomic_write32(mem, x, lock, align_min); \
|
|
/* otherwise, if the memory is not larger than the emulated \
|
|
bus boundary, or if size-alignment would mean an atomic \
|
|
host access and it is size-aligned, do a single atomic \
|
|
write, which may be direct or slow: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_true((sizeof(tme_uint32_t) <= (bus_boundary) \
|
|
|| (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint32_t) <= sizeof(tme_uint32_t))) \
|
|
&& _tme_memory_address_test(mem, sizeof(tme_uint32_t) - 1, align_min) == 0)) \
|
|
{ \
|
|
tme_memory_atomic_write32(mem, x, lock, sizeof(tme_uint32_t)); \
|
|
/* otherwise, we must do a slow bus write: */ \
|
|
} \
|
|
else \
|
|
{ \
|
|
tme_memory_bus_write32(mem, x, lock, align_min, bus_boundary); \
|
|
} \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* the 32-bit atomic operations: */
|
|
tme_uint32_t tme_memory_atomic_add32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint32_t tme_memory_atomic_sub32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint32_t tme_memory_atomic_mul32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint32_t tme_memory_atomic_div32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint32_t tme_memory_atomic_and32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint32_t tme_memory_atomic_or32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint32_t tme_memory_atomic_xor32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint32_t tme_memory_atomic_not32 _TME_P((tme_shared tme_uint32_t *, tme_rwlock_t *, unsigned int));
|
|
tme_uint32_t tme_memory_atomic_neg32 _TME_P((tme_shared tme_uint32_t *, tme_rwlock_t *, unsigned int));
|
|
tme_uint32_t tme_memory_atomic_xchg32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint32_t tme_memory_atomic_cx32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_uint32_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint32_t tme_memory_atomic_read32 _TME_P((_tme_const tme_shared tme_uint32_t *, tme_rwlock_t *, unsigned int));
|
|
void tme_memory_atomic_write32 _TME_P((tme_shared tme_uint32_t *, tme_uint32_t, tme_rwlock_t *, unsigned int));
|
|
|
|
#ifdef TME_HAVE_INT64_T
|
|
|
|
/* the default 64-bit memory plain read macro: */
|
|
#define tme_memory_read64(mem, align_min) \
|
|
( \
|
|
/* if we know at compile time that the memory is aligned \
|
|
enough to read directly, do the single direct read. \
|
|
\
|
|
otherwise, if we know at compile time that the memory \
|
|
is less aligned than the smallest acceptable parts size, \
|
|
test if the memory is aligned enough to read directly, \
|
|
and do the single direct read if it is: */ \
|
|
(__tme_predict_true((_TME_ALIGNOF_INT64_T == 1 \
|
|
|| (align_min) >= _TME_ALIGNOF_INT64_T) \
|
|
|| ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT64_T - 1, align_min) == 0))) \
|
|
? \
|
|
_tme_memory_read(tme_uint64_t, tme_uint64_t, mem, 0) \
|
|
: \
|
|
((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) <= sizeof(tme_uint8_t)) \
|
|
&& ((align_min) <= sizeof(tme_uint8_t))) \
|
|
? \
|
|
(_tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (0 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (8 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (16 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (24 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (32 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (40 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (48 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (56 / 8))) \
|
|
: \
|
|
(_tme_memory_address_test(mem, sizeof(tme_uint8_t), align_min) != 0) \
|
|
? \
|
|
(_tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (0 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (8 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (24 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (40 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint8_t, mem, (56 / 8))) \
|
|
: \
|
|
((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) <= sizeof(tme_uint16_t)) \
|
|
&& ((align_min) <= sizeof(tme_uint16_t))) \
|
|
? \
|
|
(_tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (0 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (16 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (32 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (48 / 8))) \
|
|
: \
|
|
(_tme_memory_address_test(mem, sizeof(tme_uint16_t), align_min) != 0) \
|
|
? \
|
|
(_tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (0 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint32_t, mem, (16 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint16_t, mem, (48 / 8))) \
|
|
: \
|
|
(_tme_memory_read(tme_uint64_t, tme_uint32_t, mem, (0 / 8)) \
|
|
| _tme_memory_read(tme_uint64_t, tme_uint32_t, mem, (32 / 8))) \
|
|
)
|
|
|
|
/* the default 64-bit memory plain write macro: */
|
|
#define tme_memory_write64(mem, x, align_min) \
|
|
do { \
|
|
if \
|
|
/* if we know at compile time that the memory is aligned \
|
|
enough to write directly, do the single direct write. \
|
|
\
|
|
otherwise, if we know at compile time that the memory \
|
|
is less aligned than the smallest acceptable parts size, \
|
|
test if the memory is aligned enough to write directly, \
|
|
and do the single direct write if it is: */ \
|
|
(__tme_predict_true((_TME_ALIGNOF_INT64_T == 1 \
|
|
|| (align_min) >= _TME_ALIGNOF_INT64_T) \
|
|
|| ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT64_T - 1, align_min) == 0))) \
|
|
{ \
|
|
_tme_memory_write(tme_uint64_t, tme_uint64_t, mem, 0, x); \
|
|
} \
|
|
else if \
|
|
((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) <= sizeof(tme_uint8_t)) \
|
|
&& ((align_min) <= sizeof(tme_uint8_t))) \
|
|
{ \
|
|
_tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (0 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (8 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (16 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (24 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (32 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (40 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (48 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (56 / 8), x); \
|
|
} \
|
|
else if \
|
|
(_tme_memory_address_test(mem, sizeof(tme_uint8_t), align_min) != 0) \
|
|
{ \
|
|
_tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (0 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (8 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (24 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (40 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint8_t, mem, (56 / 8), x); \
|
|
} \
|
|
else if \
|
|
((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint64_t) <= sizeof(tme_uint16_t)) \
|
|
&& ((align_min) <= sizeof(tme_uint16_t))) \
|
|
{ \
|
|
_tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (0 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (16 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (32 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (48 / 8), x); \
|
|
} \
|
|
else if \
|
|
(_tme_memory_address_test(mem, sizeof(tme_uint16_t), align_min) != 0) \
|
|
{ \
|
|
_tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (0 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint32_t, mem, (16 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint16_t, mem, (48 / 8), x); \
|
|
} \
|
|
else \
|
|
{ \
|
|
_tme_memory_write(tme_uint64_t, tme_uint32_t, mem, (0 / 8), x); \
|
|
_tme_memory_write(tme_uint64_t, tme_uint32_t, mem, (32 / 8), x); \
|
|
} \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* the default 64-bit memory atomic read macro: */
|
|
#define tme_memory_atomic_read64(mem, lock, align_min) \
|
|
( \
|
|
/* if threads are cooperative, do a plain read: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
? \
|
|
tme_memory_read64((_tme_const tme_uint64_t *) _tme_audit_type(mem, tme_uint64_t *), align_min) \
|
|
/* otherwise, if we aren't locking for all memory accesses, and we can \
|
|
make direct 64-bit accesses, and this memory is aligned \
|
|
enough to make a single direct atomic access, do the single \
|
|
direct atomic read: */ \
|
|
: \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) != 0 \
|
|
&& _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) - 1, align_min) == 0)) \
|
|
? \
|
|
(*_tme_audit_type(mem, tme_uint64_t *)) \
|
|
/* otherwise, we must do a slow indirect atomic read: */ \
|
|
: \
|
|
tme_memory_atomic_read64(mem, lock, align_min) \
|
|
)
|
|
|
|
/* the default 64-bit memory atomic write macro: */
|
|
#define tme_memory_atomic_write64(mem, x, lock, align_min) \
|
|
do { \
|
|
if \
|
|
/* if threads are cooperative, do a plain write: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
{ \
|
|
tme_memory_write64((tme_uint64_t *) _tme_cast_pointer_shared(tme_uint64_t *, tme_uint64_t *, mem), x, align_min); \
|
|
/* otherwise, if we aren't locking for all memory accesses, and we can \
|
|
make direct 64-bit accesses, and this memory is aligned \
|
|
enough to make a single direct atomic access, do the single \
|
|
direct atomic write: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) != 0 \
|
|
&& _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) - 1, align_min) == 0)) \
|
|
{ \
|
|
(*_tme_audit_type(mem, tme_uint64_t *)) \
|
|
= (x); \
|
|
/* otherwise, we must do a slow indirect atomic write: */ \
|
|
} \
|
|
else \
|
|
{ \
|
|
tme_memory_atomic_write64(mem, x, lock, align_min); \
|
|
} \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* the default 64-bit memory bus read macro: */
|
|
#define tme_memory_bus_read64(mem, lock, align_min, bus_boundary) \
|
|
( \
|
|
/* if threads are cooperative, do a plain read: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
? \
|
|
tme_memory_read64((_tme_const tme_uint64_t *) _tme_audit_type(mem, tme_uint64_t *), align_min) \
|
|
/* otherwise, if we aren't locking for all memory accesses, the \
|
|
host supports misaligned 64-bit accesses, the host's bus \
|
|
boundary is greater than or equal to the emulated bus \
|
|
boundary, and this memory is aligned enough, do a single \
|
|
direct bus read: */ \
|
|
: \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& _TME_ALIGNOF_INT64_T < sizeof(tme_uint64_t) \
|
|
&& TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT64_T - 1, align_min) == 0)) \
|
|
? \
|
|
(*_tme_audit_type(mem, tme_uint64_t *)) \
|
|
/* otherwise, if we're locking for all memory accesses, or \
|
|
if this memory must cross at least one host bus boundary \
|
|
and the host bus boundary is less than the emulated bus \
|
|
boundary, do a slow indirect atomic read: */ \
|
|
: \
|
|
(__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \
|
|
|| (sizeof(tme_uint64_t) > TME_MEMORY_BUS_BOUNDARY \
|
|
&& TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \
|
|
? \
|
|
tme_memory_atomic_read64(mem, lock, align_min) \
|
|
/* otherwise, if the memory is not larger than the emulated \
|
|
bus boundary, or if size-alignment would mean an atomic \
|
|
host access and it is size-aligned, do a single atomic \
|
|
read, which may be direct or slow: */ \
|
|
: \
|
|
(__tme_predict_true((sizeof(tme_uint64_t) <= (bus_boundary) \
|
|
|| (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) <= sizeof(tme_uint64_t))) \
|
|
&& _tme_memory_address_test(mem, sizeof(tme_uint64_t) - 1, align_min) == 0)) \
|
|
? \
|
|
tme_memory_atomic_read64(mem, lock, sizeof(tme_uint64_t)) \
|
|
/* otherwise, we must do a slow bus read: */ \
|
|
: \
|
|
tme_memory_bus_read64(mem, lock, align_min, bus_boundary) \
|
|
)
|
|
|
|
/* the default 64-bit memory bus write macro: */
|
|
#define tme_memory_bus_write64(mem, x, lock, align_min, bus_boundary) \
|
|
do { \
|
|
if \
|
|
/* if threads are cooperative, do a plain write: */ \
|
|
(TME_THREADS_COOPERATIVE) \
|
|
{ \
|
|
tme_memory_write64((tme_uint64_t *) _tme_cast_pointer_shared(tme_uint64_t *, tme_uint64_t *, mem), x, align_min); \
|
|
/* otherwise, if we aren't locking for all memory accesses, the \
|
|
host supports misaligned 64-bit accesses, the host's bus \
|
|
boundary is greater than or equal to the emulated bus \
|
|
boundary, and this memory is aligned enough, do a single \
|
|
direct bus write: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \
|
|
&& _TME_ALIGNOF_INT64_T < sizeof(tme_uint64_t) \
|
|
&& TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \
|
|
&& _tme_memory_address_test(mem, _TME_ALIGNOF_INT64_T - 1, align_min) == 0)) \
|
|
{ \
|
|
(*_tme_audit_type(mem, tme_uint64_t *)) \
|
|
= (x); \
|
|
/* otherwise, if we're locking for all memory accesses, or \
|
|
if this memory must cross at least one host bus boundary \
|
|
and the host bus boundary is less than the emulated bus \
|
|
boundary, do a slow indirect atomic write: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \
|
|
|| (sizeof(tme_uint64_t) > TME_MEMORY_BUS_BOUNDARY \
|
|
&& TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \
|
|
{ \
|
|
tme_memory_atomic_write64(mem, x, lock, align_min); \
|
|
/* otherwise, if the memory is not larger than the emulated \
|
|
bus boundary, or if size-alignment would mean an atomic \
|
|
host access and it is size-aligned, do a single atomic \
|
|
write, which may be direct or slow: */ \
|
|
} \
|
|
else if \
|
|
(__tme_predict_true((sizeof(tme_uint64_t) <= (bus_boundary) \
|
|
|| (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) != 0 \
|
|
&& TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint64_t) <= sizeof(tme_uint64_t))) \
|
|
&& _tme_memory_address_test(mem, sizeof(tme_uint64_t) - 1, align_min) == 0)) \
|
|
{ \
|
|
tme_memory_atomic_write64(mem, x, lock, sizeof(tme_uint64_t)); \
|
|
/* otherwise, we must do a slow bus write: */ \
|
|
} \
|
|
else \
|
|
{ \
|
|
tme_memory_bus_write64(mem, x, lock, align_min, bus_boundary); \
|
|
} \
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* the 64-bit atomic operations: */
|
|
tme_uint64_t tme_memory_atomic_add64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint64_t tme_memory_atomic_sub64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint64_t tme_memory_atomic_mul64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint64_t tme_memory_atomic_div64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint64_t tme_memory_atomic_and64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint64_t tme_memory_atomic_or64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint64_t tme_memory_atomic_xor64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint64_t tme_memory_atomic_not64 _TME_P((tme_shared tme_uint64_t *, tme_rwlock_t *, unsigned int));
|
|
tme_uint64_t tme_memory_atomic_neg64 _TME_P((tme_shared tme_uint64_t *, tme_rwlock_t *, unsigned int));
|
|
tme_uint64_t tme_memory_atomic_xchg64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint64_t tme_memory_atomic_cx64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_uint64_t, tme_rwlock_t *, unsigned int));
|
|
tme_uint64_t tme_memory_atomic_read64 _TME_P((_tme_const tme_shared tme_uint64_t *, tme_rwlock_t *, unsigned int));
|
|
void tme_memory_atomic_write64 _TME_P((tme_shared tme_uint64_t *, tme_uint64_t, tme_rwlock_t *, unsigned int));
|
|
|
|
#endif /* TME_HAVE_INT64_T */
|