mirror of
https://github.com/phabrics/Run-Sun3-SunOS-4.1.1.git
synced 2026-04-29 11:02:59 -04:00
1347 lines
50 KiB
Bash
1347 lines
50 KiB
Bash
#! /bin/sh
|
|
|
|
# $Id: memory-auto.sh,v 1.2 2010/02/15 15:16:28 fredette Exp $
|
|
|
|
# libtme/memory-auto.sh - automatically generates C code for
|
|
# memory support:
|
|
|
|
#
|
|
# Copyright (c) 2005, 2006 Matt Fredette
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions
|
|
# are met:
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# 3. All advertising materials mentioning features or use of this software
|
|
# must display the following acknowledgement:
|
|
# This product includes software developed by Matt Fredette.
|
|
# 4. The name of the author may not be used to endorse or promote products
|
|
# derived from this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
# DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
|
|
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
|
|
header=false
|
|
|
|
for option
|
|
do
|
|
case $option in
|
|
--header) header=true ;;
|
|
esac
|
|
done
|
|
|
|
PROG=`basename $0`
|
|
cat <<EOF
|
|
/* automatically generated by $PROG, do not edit! */
|
|
|
|
/*
|
|
* Copyright (c) 2005, 2006 Matt Fredette
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by Matt Fredette.
|
|
* 4. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
|
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
EOF
|
|
|
|
if $header; then :; else
|
|
cat <<EOF
|
|
|
|
/* includes: */
|
|
#include <tme/memory.h>
|
|
|
|
EOF
|
|
fi
|
|
cat <<EOF
|
|
|
|
_TME_RCSID("\$Id: memory-auto.sh,v 1.2 2010/02/15 15:16:28 fredette Exp $");
|
|
EOF
|
|
if $header; then
|
|
cat <<EOF
|
|
|
|
/* macros: */
|
|
|
|
/* the plain partial read internal macro: */
|
|
#define _tme_memory_read(type_whole, type_part, mem, offset) \\
|
|
(((type_whole) \\
|
|
*((_tme_const type_part *) \\
|
|
(_tme_cast_pointer_const(tme_uint8_t *, type_whole *, mem) \\
|
|
+ (offset)))) \\
|
|
<< (8 * (TME_ENDIAN_NATIVE == TME_ENDIAN_BIG \\
|
|
? (sizeof(type_whole) \\
|
|
- ((offset) + sizeof(type_part))) \\
|
|
: (offset))))
|
|
|
|
/* the plain partial write internal macro: */
|
|
#define _tme_memory_write(type_whole, type_part, mem, offset, x) \\
|
|
do { \\
|
|
*((type_part *) \\
|
|
(_tme_cast_pointer(tme_uint8_t *, type_whole *, mem) \\
|
|
+ (offset))) \\
|
|
= (type_part) \\
|
|
(((type_whole) (x)) \\
|
|
>> (8 * (TME_ENDIAN_NATIVE == TME_ENDIAN_BIG \\
|
|
? (sizeof(type_whole) \\
|
|
- ((offset) + sizeof(type_part))) \\
|
|
: (offset)))); \\
|
|
} while (/* CONSTCOND */ 0)
|
|
|
|
/* this tests bits in a memory address: */
|
|
#define _tme_memory_address_test(mem, bits, align_min) \\
|
|
(((bits) & ~((align_min - 1))) & ((unsigned long) (mem)))
|
|
|
|
/* this returns a mask of all-bits-one in given type: */
|
|
#define _tme_memory_type_mask(type, shift) \\
|
|
((type) ((((type) 0) - ((type) 1)) shift))
|
|
|
|
EOF
|
|
fi
|
|
|
|
# the different sizes we handle, and the smallest size that we have to
|
|
# wrap in an ifdef:
|
|
#
|
|
sizes='8 16 32 64'
|
|
size_ifdef=64
|
|
|
|
# the largest possible host boundary that we will consider:
|
|
#
|
|
host_boundary_largest=64
|
|
|
|
# permute for the different sizes:
|
|
#
|
|
for size in ${sizes}; do
|
|
|
|
# we don't need bus eight-bit read and write slow functions:
|
|
#
|
|
if test ${size} = 8; then continue; fi
|
|
|
|
if test `expr ${size} \>= ${size_ifdef}` = 1; then
|
|
echo ""
|
|
echo "#ifdef TME_HAVE_INT${size}_T"
|
|
fi
|
|
|
|
# emit the bus read and write slow functions:
|
|
#
|
|
for op in read write; do
|
|
|
|
# dispatch on the operation:
|
|
#
|
|
if test ${op} = read; then
|
|
op_return_type="tme_uint${size}_t"
|
|
op_cap=READ
|
|
op_const="_tme_const "
|
|
op_proto_operand=
|
|
op_operand=
|
|
else
|
|
op_return_type=void
|
|
op_cap=WRITE
|
|
op_const=
|
|
op_proto_operand=", tme_uint${size}_t"
|
|
op_operand="${op_proto_operand} x"
|
|
fi
|
|
|
|
# if we're making the header, just emit a prototype:
|
|
#
|
|
if $header; then
|
|
echo ""
|
|
echo "/* the bus ${size}-bit ${op} slow function: */"
|
|
echo "${op_return_type} tme_memory_bus_${op}${size} _TME_P((${op_const}tme_shared tme_uint${size}_t *${op_proto_operand}, tme_rwlock_t *, unsigned int, unsigned int));"
|
|
continue
|
|
fi
|
|
|
|
echo ""
|
|
echo "/* undefine the macro version of tme_memory_bus_${op}${size}: */"
|
|
echo "#undef tme_memory_bus_${op}${size}"
|
|
echo ""
|
|
echo "/* the bus ${size}-bit ${op} slow function: */"
|
|
echo ${op_return_type}
|
|
echo "tme_memory_bus_${op}${size}(${op_const}tme_shared tme_uint${size}_t *mem${op_operand}, tme_rwlock_t *rwlock, unsigned int align_min, unsigned int bus_boundary)"
|
|
echo "{"
|
|
echo " const unsigned int host_boundary = TME_MEMORY_BUS_BOUNDARY;"
|
|
echo " unsigned int size_skip;"
|
|
echo " unsigned int size_done;"
|
|
if test ${op} = read; then
|
|
echo " tme_uint${size}_t x;"
|
|
fi
|
|
|
|
# emit the locals for the possible host boundaries:
|
|
#
|
|
host_boundary=${host_boundary_largest}
|
|
while test ${host_boundary} != 4; do
|
|
if test `expr ${host_boundary} \>= ${size_ifdef}` = 1; then
|
|
echo "#ifdef TME_HAVE_INT${host_boundary}_T"
|
|
fi
|
|
echo " ${op_const}tme_shared tme_uint${host_boundary}_t *parts${host_boundary};"
|
|
echo " tme_uint${host_boundary}_t part${host_boundary};"
|
|
if test ${op} = write; then
|
|
echo " tme_uint${host_boundary}_t part${host_boundary}_cmp;"
|
|
fi
|
|
if test `expr ${host_boundary} \>= ${size_ifdef}` = 1; then
|
|
echo "#endif /* TME_HAVE_INT${host_boundary}_T */"
|
|
fi
|
|
host_boundary=`expr ${host_boundary} / 2`
|
|
done
|
|
echo ""
|
|
echo -n " assert (bus_boundary != 0 && bus_boundary <= host_boundary);"
|
|
|
|
# loop over the possible host boundaries:
|
|
#
|
|
host_boundary=${host_boundary_largest}
|
|
while test ${host_boundary} != 4; do
|
|
|
|
# calculate the worst number of host boundaries that an
|
|
# access of this size could cross:
|
|
#
|
|
host_boundaries_worst=`expr ${size} - 16`
|
|
host_boundaries_worst=`expr ${host_boundaries_worst} / ${host_boundary}`
|
|
host_boundaries_worst=`expr ${host_boundaries_worst} + 1`
|
|
|
|
# open this host boundary:
|
|
#
|
|
if test ${host_boundary} != 8; then
|
|
if test `expr ${host_boundary} \>= ${size_ifdef}` = 1; then
|
|
echo ""
|
|
echo ""
|
|
echo "#ifdef TME_HAVE_INT${host_boundary}_T"
|
|
echo ""
|
|
echo -n " "
|
|
fi
|
|
echo -n " if (host_boundary == sizeof(tme_uint${host_boundary}_t))"
|
|
fi
|
|
echo " {"
|
|
|
|
echo ""
|
|
echo " /* prepare to ${op} the first ${host_boundary}-bit part of the memory: */"
|
|
echo " parts${host_boundary} = (${op_const}tme_shared tme_uint${host_boundary}_t *) (((unsigned long) mem) & (((unsigned long) 0) - (${host_boundary} / 8)));"
|
|
echo " size_skip = (((unsigned int) (unsigned long) mem) % (${host_boundary} / 8)) * 8;"
|
|
echo " size_done = 0;"
|
|
|
|
echo ""
|
|
echo " /* ${op} the first ${host_boundary}-bit part of the memory: */"
|
|
|
|
# emit two accesses. if the worst number of boundaries
|
|
# that this access could cross is one, the second access
|
|
# is inside an if, else it's in a for loop:
|
|
#
|
|
indent0=
|
|
size_done=0
|
|
size_skip=size_skip
|
|
access_or=
|
|
while true; do
|
|
|
|
# read this memory part:
|
|
#
|
|
echo "${indent0} part${host_boundary} = tme_memory_atomic_read${host_boundary}(parts${host_boundary}, rwlock, sizeof(tme_uint${host_boundary}_t));"
|
|
|
|
# if this is a read:
|
|
#
|
|
if test ${op} = read; then
|
|
|
|
echo ""
|
|
echo "${indent0} /* on a little-endian host, we shift off the skip"
|
|
echo "${indent0} data on the right, and shift the remaining data"
|
|
echo "${indent0} up into position in the result: */"
|
|
echo "${indent0} if (TME_ENDIAN_NATIVE == TME_ENDIAN_LITTLE) {"
|
|
echo "${indent0} x ${access_or}= (((tme_uint${size}_t) (part${host_boundary} >> ${size_skip})) << ${size_done});"
|
|
echo "${indent0} }"
|
|
echo ""
|
|
echo "${indent0} /* on a big-endian host, we shift off the skip data"
|
|
echo "${indent0} on the left, and shift the remaining data down"
|
|
echo "${indent0} into position in the result: */"
|
|
#
|
|
# NB: on a big-endian host, because the skip data
|
|
# is on the left, the type of what we shift
|
|
# depends on how this host boundary size compares
|
|
# to the access size:
|
|
#
|
|
echo "${indent0} else {"
|
|
echo -n "${indent0} x ${access_or}= "
|
|
if test `expr ${host_boundary} \> ${size}` = 1; then
|
|
echo "((part${host_boundary} << ${size_skip}) >> ((${host_boundary} - ${size}) + ${size_done}));"
|
|
else
|
|
echo "((((tme_uint${size}_t) part${host_boundary}) << ((${size} - ${host_boundary}) + ${size_skip})) >> ${size_done});"
|
|
fi
|
|
echo "${indent0} }"
|
|
|
|
# otherwise, this is a write:
|
|
#
|
|
else
|
|
|
|
# start the compare-and-exchange do loop:
|
|
#
|
|
echo "${indent0} do {"
|
|
echo "${indent0} part${host_boundary}_cmp = part${host_boundary};"
|
|
echo ""
|
|
echo "${indent0} /* on a little-endian host, we clear with zeroes"
|
|
echo "${indent0} shifted up past the skip data, and then we"
|
|
echo "${indent0} insert the data shifted up past the skip data: */"
|
|
echo "${indent0} if (TME_ENDIAN_NATIVE == TME_ENDIAN_LITTLE) {"
|
|
echo "${indent0} part${host_boundary} &= (_tme_memory_type_mask(tme_uint${host_boundary}_t, + 0) ^ (((tme_uint${host_boundary}_t) _tme_memory_type_mask(tme_uint${size}_t, << ${size_done})) << ${size_skip}));"
|
|
echo "${indent0} part${host_boundary} |= (((tme_uint${host_boundary}_t) x) << ${size_skip});"
|
|
echo "${indent0} }"
|
|
echo ""
|
|
echo "${indent0} /* on a big-endian host, we clear with zeroes"
|
|
echo "${indent0} shifted down past the skip data, and then we"
|
|
echo "${indent0} insert the data shifted down past the skip data: */"
|
|
#
|
|
# NB: on a big-endian host, because the skip data
|
|
# is on the left, exactly how we shift depends on
|
|
# how this host boundary size compares to the
|
|
# access size:
|
|
#
|
|
echo "${indent0} else {"
|
|
if test `expr ${host_boundary} \> ${size}` = 1; then
|
|
echo "${indent0} part${host_boundary} &= ~((((tme_uint${host_boundary}_t) _tme_memory_type_mask(tme_uint${size}_t, + 0)) << ((${host_boundary} - ${size}) + ${size_done})) >> ${size_skip});"
|
|
echo "${indent0} part${host_boundary} |= ((((tme_uint${host_boundary}_t) x) << (${host_boundary} - ${size})) >> ${size_skip});"
|
|
else
|
|
echo "${indent0} part${host_boundary} &= ~(_tme_memory_type_mask(tme_uint${host_boundary}_t, << ${size_done}) >> ${size_skip});"
|
|
echo "${indent0} part${host_boundary} |= (x >> ((${size} - ${host_boundary}) + ${size_skip}));"
|
|
fi
|
|
echo "${indent0} }"
|
|
echo ""
|
|
echo "${indent0} /* loop until we can atomically update this part: */"
|
|
echo "${indent0} part${host_boundary} = tme_memory_atomic_cx${host_boundary}(parts${host_boundary}, part${host_boundary}_cmp, part${host_boundary}, rwlock, sizeof(tme_uint${host_boundary}_t));"
|
|
echo "${indent0} } while (part${host_boundary} != part${host_boundary}_cmp);"
|
|
if test ${host_boundaries_worst} != 1 || test ${size_skip} != 0; then
|
|
echo "${indent0} if (TME_ENDIAN_NATIVE == TME_ENDIAN_LITTLE) {"
|
|
echo "${indent0} x >>= (${host_boundary} - ${size_skip});"
|
|
echo "${indent0} }"
|
|
echo "${indent0} else {"
|
|
echo "${indent0} x <<= (${host_boundary} - ${size_skip});"
|
|
echo "${indent0} }"
|
|
fi
|
|
fi
|
|
|
|
# if this was the first access:
|
|
#
|
|
if test ${size_done} = 0; then
|
|
|
|
echo " size_done = ${host_boundary} - size_skip;"
|
|
size_done=size_done
|
|
size_skip=0
|
|
access_or='|'
|
|
|
|
# if the worst number of boundaries that this
|
|
# access could cross is more than one, we will do
|
|
# the remaining accesses in a loop, otherwise we
|
|
# will do one more access, if necessary:
|
|
#
|
|
if test ${host_boundaries_worst} = 1; then
|
|
access_if=true
|
|
else
|
|
access_if=false
|
|
|
|
# as an optimization, writes of full parts are
|
|
# done directly:
|
|
#
|
|
if test ${op} = write; then
|
|
if test `expr ${host_boundary} \< ${size}` = 1; then
|
|
echo ""
|
|
if test ${host_boundaries_worst} = 2; then
|
|
echo " /* try to write one full ${host_boundary}-bit part of memory: */"
|
|
echo -n " if (__tme_predict_true(size_done <= (${size} - ${host_boundary})))"
|
|
else
|
|
echo " /* write as many full ${host_boundary}-bit parts of the memory as we can: */"
|
|
echo -n " for (; size_done <= (${size} - ${host_boundary}); )"
|
|
fi
|
|
echo " {"
|
|
echo ""
|
|
echo " /* make a boundary: */"
|
|
echo " tme_memory_barrier(mem, (${size} / 8), TME_MEMORY_BARRIER_${op_cap}_BEFORE_${op_cap});"
|
|
echo ""
|
|
echo " /* write a full ${host_boundary}-bit part of memory: */"
|
|
echo " part${host_boundary} = (x >> ((TME_ENDIAN_NATIVE == TME_ENDIAN_BIG) * (${size} - ${host_boundary})));"
|
|
echo " parts${host_boundary}++;"
|
|
echo " tme_memory_atomic_write${host_boundary}(parts${host_boundary}, part${host_boundary}, rwlock, sizeof(tme_uint${host_boundary}_t));"
|
|
echo " size_done += ${host_boundary};"
|
|
echo " if (TME_ENDIAN_NATIVE == TME_ENDIAN_LITTLE) {"
|
|
echo " x >>= ${host_boundary};"
|
|
echo " }"
|
|
echo " else {"
|
|
echo " x <<= ${host_boundary};"
|
|
echo " }"
|
|
echo " }"
|
|
access_if=true
|
|
fi
|
|
fi
|
|
fi
|
|
echo ""
|
|
if $access_if; then
|
|
echo " /* ${op} at most one remaining ${host_boundary}-bit part of the memory: */"
|
|
echo -n " if (__tme_predict_false(size_done < ${size}))"
|
|
else
|
|
echo " /* ${op} any remaining ${host_boundary}-bit parts of the memory: */"
|
|
echo -n " for (; size_done < ${size}; size_done += ${host_boundary})"
|
|
fi
|
|
echo " {"
|
|
|
|
echo ""
|
|
echo " /* make a boundary: */"
|
|
echo " tme_memory_barrier(mem, (${size} / 8), TME_MEMORY_BARRIER_${op_cap}_BEFORE_${op_cap});"
|
|
echo ""
|
|
echo " /* ${op} the next ${host_boundary}-bit part of the memory: */"
|
|
echo " parts${host_boundary}++;"
|
|
indent0=" "
|
|
|
|
# otherwise, this was the loop or second access:
|
|
#
|
|
else
|
|
echo " }"
|
|
break
|
|
fi
|
|
|
|
done
|
|
|
|
echo " }"
|
|
|
|
# close this host boundary:
|
|
#
|
|
if test ${host_boundary} != 8; then
|
|
echo ""
|
|
echo -n " else"
|
|
if test `expr ${host_boundary} \>= ${size_ifdef}` = 1; then
|
|
echo ""
|
|
echo ""
|
|
echo -n "#endif /* TME_HAVE_INT${host_boundary}_T */"
|
|
if test ${host_boundary} = ${size_ifdef}; then
|
|
echo ""
|
|
echo ""
|
|
echo -n " "
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# advance:
|
|
#
|
|
host_boundary=`expr ${host_boundary} / 2`
|
|
done
|
|
if test ${op} = read; then
|
|
echo ""
|
|
echo " /* return the value read: */"
|
|
echo " return (x);"
|
|
fi
|
|
echo "}"
|
|
done
|
|
|
|
if test `expr ${size} \>= ${size_ifdef}` = 1; then
|
|
echo ""
|
|
echo "#endif /* TME_HAVE_INT${size}_T */"
|
|
fi
|
|
|
|
done
|
|
|
|
# emit the bus read and write buffer functions:
|
|
#
|
|
for op in read write; do
|
|
|
|
# dispatch on the operation:
|
|
#
|
|
if test ${op} = read; then
|
|
op_cap=READ
|
|
op_rwlock=rd
|
|
op_const_mem="_tme_const "
|
|
op_const_buffer=
|
|
op_audit_mem=
|
|
op_audit_buffer=_tme_audit_pointer
|
|
op_memcpy="(buffer), ((_tme_const tme_uint8_t *) (mem))"
|
|
op_copy="*buffer = *part_buffer"
|
|
else
|
|
op_cap=WRITE
|
|
op_rwlock=wr
|
|
op_const_mem=
|
|
op_const_buffer="_tme_const "
|
|
op_audit_mem=_tme_audit_pointer_shared
|
|
op_audit_buffer=_tme_audit_pointer_const
|
|
op_memcpy="(tme_uint8_t *) (mem), (buffer)"
|
|
op_copy="*part_buffer = *buffer"
|
|
fi
|
|
|
|
# if we're making the header:
|
|
#
|
|
if $header; then
|
|
|
|
echo ""
|
|
echo "/* the bus ${op} buffer function and default macro implementation: */"
|
|
|
|
# emit the prototype:
|
|
#
|
|
echo "void tme_memory_bus_${op}_buffer _TME_P((${op_const_mem}tme_shared tme_uint8_t *, ${op_const_buffer}tme_uint8_t *, unsigned long, tme_rwlock_t *, unsigned int, unsigned int));"
|
|
|
|
# emit the default macro definition:
|
|
#
|
|
echo "#define tme_memory_bus_${op}_buffer(mem, buffer, count, rwlock, align_min, bus_boundary) \\"
|
|
echo " do { \\"
|
|
echo " if (TME_THREADS_COOPERATIVE) { \\"
|
|
echo " memcpy(${op_memcpy}, (count)); \\"
|
|
echo " } \\"
|
|
echo " else { \\"
|
|
echo " tme_memory_bus_${op}_buffer(((${op_const_mem}tme_shared tme_uint8_t *) ${op_audit_mem}(mem)), ((${op_const_buffer}tme_uint8_t *) ${op_audit_buffer}(buffer)), (count), (rwlock), (align_min), (bus_boundary)); \\"
|
|
echo " } \\"
|
|
echo " } while (/* CONSTCOND */ 0)"
|
|
|
|
continue
|
|
fi
|
|
|
|
# start the function:
|
|
#
|
|
echo ""
|
|
echo "/* undefine the macro version of tme_memory_bus_${op}_buffer: */"
|
|
echo "#undef tme_memory_bus_${op}_buffer"
|
|
echo ""
|
|
echo "/* the bus ${op} buffer function: */"
|
|
echo "void"
|
|
echo "tme_memory_bus_${op}_buffer(${op_const_mem}tme_shared tme_uint8_t *mem, ${op_const_buffer}tme_uint8_t *buffer, unsigned long count, tme_rwlock_t *rwlock, unsigned int align_min, unsigned int bus_boundary)"
|
|
echo "{"
|
|
echo " const unsigned int host_boundary = TME_MEMORY_BUS_BOUNDARY;"
|
|
echo " ${op_const_mem}tme_uint8_t *part_buffer;"
|
|
echo " unsigned int count_done;"
|
|
echo " unsigned int count_misaligned;"
|
|
echo " unsigned int bits_misaligned;"
|
|
|
|
# emit the locals for the possible host boundaries:
|
|
#
|
|
host_boundary=${host_boundary_largest}
|
|
while test ${host_boundary} != 4; do
|
|
if test `expr ${host_boundary} \>= ${size_ifdef}` = 1; then
|
|
echo "#ifdef TME_HAVE_INT${host_boundary}_T"
|
|
fi
|
|
echo " ${op_const_mem}tme_shared tme_uint${host_boundary}_t *parts${host_boundary};"
|
|
echo " tme_uint${host_boundary}_t part${host_boundary}_buffer;"
|
|
echo " tme_uint${host_boundary}_t part${host_boundary};"
|
|
echo " tme_uint${host_boundary}_t part${host_boundary}_next;"
|
|
if test ${op} = write; then
|
|
echo " tme_uint${host_boundary}_t part${host_boundary}_mask;"
|
|
echo " tme_uint${host_boundary}_t part${host_boundary}_cmp;"
|
|
fi
|
|
if test `expr ${host_boundary} \>= ${size_ifdef}` = 1; then
|
|
echo "#endif /* TME_HAVE_INT${host_boundary}_T */"
|
|
fi
|
|
host_boundary=`expr ${host_boundary} / 2`
|
|
done
|
|
echo ""
|
|
echo " assert (count != 0);"
|
|
echo " assert (bus_boundary != 0);"
|
|
|
|
echo ""
|
|
echo " /* if we are locking for all memory accesses, lock memory"
|
|
echo " around a memcpy: */"
|
|
echo " if (TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0) {"
|
|
echo " tme_rwlock_${op_rwlock}lock(rwlock);"
|
|
echo " memcpy(${op_memcpy}, (count));"
|
|
echo " tme_rwlock_unlock(rwlock);"
|
|
echo " }"
|
|
echo ""
|
|
echo " /* otherwise, if the emulated bus boundary is greater than the"
|
|
echo " host's bus boundary, we are forced to stop all other threads"
|
|
echo " around a memcpy: */"
|
|
echo " else if (__tme_predict_false(bus_boundary == 0"
|
|
echo " || bus_boundary > host_boundary)) {"
|
|
echo " tme_thread_suspend_others();"
|
|
echo " memcpy(${op_memcpy}, (count) + (0 && align_min));"
|
|
echo " tme_thread_resume_others();"
|
|
echo " }"
|
|
|
|
# loop over the possible host boundaries:
|
|
#
|
|
host_boundary=${host_boundary_largest}
|
|
indent0=
|
|
while test ${host_boundary} != 4; do
|
|
|
|
# open this host boundary:
|
|
#
|
|
if test `expr ${host_boundary} \>= ${size_ifdef}` = 1; then
|
|
echo ""
|
|
echo "#ifdef TME_HAVE_INT${host_boundary}_T"
|
|
fi
|
|
echo ""
|
|
echo -n " else"
|
|
if test ${host_boundary} != 8; then
|
|
echo -n " if (host_boundary == sizeof(tme_uint${host_boundary}_t))"
|
|
fi
|
|
echo " {"
|
|
|
|
if test ${op} = write; then
|
|
op_part_read="tme_memory_read${host_boundary}((const tme_uint${host_boundary}_t *) buffer, sizeof(tme_uint${host_boundary}_t))"
|
|
op_part_write="tme_memory_atomic_write${host_boundary}(parts${host_boundary}, part${host_boundary}, rwlock, sizeof(tme_uint${host_boundary}_t))"
|
|
else
|
|
op_part_read="tme_memory_atomic_read${host_boundary}(parts${host_boundary}, rwlock, sizeof(tme_uint${host_boundary}_t))"
|
|
op_part_write="tme_memory_write${host_boundary}((tme_uint${host_boundary}_t *) buffer, part${host_boundary}, sizeof(tme_uint${host_boundary}_t))"
|
|
fi
|
|
|
|
echo ""
|
|
echo " /* make a ${host_boundary}-bit pointer to the memory: */"
|
|
echo " parts${host_boundary} = (${op_const_mem}tme_shared tme_uint${host_boundary}_t *) mem;"
|
|
echo ""
|
|
echo " /* if this pointer is not ${host_boundary}-bit aligned: */"
|
|
echo " if (__tme_predict_false((((unsigned long) parts${host_boundary}) % sizeof(tme_uint${host_boundary}_t)) != 0)) {"
|
|
echo ""
|
|
echo " /* get the misalignment from the previous ${host_boundary}-bit boundary: */"
|
|
echo " count_misaligned = ((unsigned long) parts${host_boundary}) % sizeof(tme_uint${host_boundary}_t);"
|
|
echo ""
|
|
echo " /* truncate this pointer to the previous ${host_boundary}-bit boundary: */"
|
|
echo " parts${host_boundary} = (${op_const_mem}tme_shared tme_uint${host_boundary}_t *) (((unsigned long) parts${host_boundary}) & (((unsigned long) 0) - sizeof(tme_uint${host_boundary}_t)));"
|
|
echo ""
|
|
echo " /* get the number of bytes to ${op} in the first ${host_boundary}-bit memory part: */"
|
|
echo " count_done = sizeof(tme_uint${host_boundary}_t) - count_misaligned;"
|
|
echo " if (__tme_predict_false(count_done > count)) {"
|
|
echo " count_done = count;"
|
|
echo " }"
|
|
|
|
if test ${op} = write; then
|
|
echo ""
|
|
echo " /* make a mask that clears for the data to write in the"
|
|
echo " first ${host_boundary}-bit memory part: */"
|
|
echo " part${host_boundary}_mask = 1;"
|
|
echo " part${host_boundary}_mask = (part${host_boundary}_mask << (count_done * 8)) - 1;"
|
|
echo " part${host_boundary}_mask"
|
|
echo " <<= (TME_ENDIAN_NATIVE == TME_ENDIAN_LITTLE"
|
|
echo " ? (count_misaligned * 8)"
|
|
echo " : (${host_boundary} - ((count_misaligned + count_done) * 8)));"
|
|
echo " part${host_boundary}_mask = ~part${host_boundary}_mask;"
|
|
echo ""
|
|
echo " /* copy from the buffer the bytes to write in the first"
|
|
echo " ${host_boundary}-bit memory part: */"
|
|
echo " part${host_boundary}_buffer = 0;"
|
|
else
|
|
echo ""
|
|
echo " /* read the first ${host_boundary}-bit memory part: */"
|
|
echo " part${host_boundary}_buffer = ${op_part_read};"
|
|
echo " parts${host_boundary}++;"
|
|
echo ""
|
|
echo " /* copy to the buffer the bytes to read in the first"
|
|
echo " ${host_boundary}-bit memory part: */"
|
|
fi
|
|
echo " part_buffer = ((tme_uint8_t *) &part${host_boundary}_buffer) + count_misaligned;"
|
|
echo " count -= count_done;"
|
|
echo " do {"
|
|
echo " ${op_copy};"
|
|
echo " part_buffer++;"
|
|
echo " buffer++;"
|
|
echo " } while (--count_done != 0);"
|
|
|
|
if test ${op} = write; then
|
|
echo ""
|
|
echo " /* compare-and-exchange the first ${host_boundary}-bit memory part: */"
|
|
echo " part${host_boundary} = ${op_part_read};"
|
|
echo " do {"
|
|
echo " part${host_boundary}_cmp = part${host_boundary};"
|
|
echo " part${host_boundary} = (part${host_boundary} & part${host_boundary}_mask) | part${host_boundary}_buffer;"
|
|
echo " part${host_boundary} = tme_memory_atomic_cx${host_boundary}(parts${host_boundary}, part${host_boundary}_cmp, part${host_boundary}, rwlock, sizeof(tme_uint${host_boundary}_t));"
|
|
echo " } while (part${host_boundary} != part${host_boundary}_cmp);"
|
|
echo " parts${host_boundary}++;"
|
|
fi
|
|
echo " }"
|
|
|
|
echo ""
|
|
echo " /* if we have full ${host_boundary}-bit parts to ${op}: */"
|
|
echo " if (__tme_predict_true(count >= sizeof(tme_uint${host_boundary}_t))) {"
|
|
echo ""
|
|
echo " /* if the buffer is ${host_boundary}-bit aligned: */"
|
|
echo " if (__tme_predict_true((((unsigned long) buffer) % sizeof(tme_uint${host_boundary}_t)) == 0)) {"
|
|
echo ""
|
|
echo " /* ${op} full ${host_boundary}-bit parts without shifting: */"
|
|
echo " do {"
|
|
echo " part${host_boundary} = ${op_part_read};"
|
|
echo " ${op_part_write};"
|
|
echo ""
|
|
echo " /* advance: */"
|
|
echo " parts${host_boundary}++;"
|
|
echo " buffer += sizeof(tme_uint${host_boundary}_t);"
|
|
echo " count -= sizeof(tme_uint${host_boundary}_t);"
|
|
echo " } while (count >= sizeof(tme_uint${host_boundary}_t));"
|
|
echo " }"
|
|
echo ""
|
|
echo " /* otherwise, the buffer is not ${host_boundary}-bit aligned: */"
|
|
echo " else {"
|
|
echo ""
|
|
echo " /* get the misalignment to the next ${host_boundary}-bit boundary: */"
|
|
echo " count_misaligned = (sizeof(tme_uint${host_boundary}_t) - ((unsigned int) (unsigned long) buffer)) % sizeof(tme_uint${host_boundary}_t);"
|
|
if test ${op} = write; then
|
|
echo ""
|
|
echo " /* copy from the buffer until it is aligned: */"
|
|
echo " part${host_boundary}_buffer = 0;"
|
|
else
|
|
echo ""
|
|
echo " /* read the next ${host_boundary}-bit memory part: */"
|
|
echo " part${host_boundary}_buffer = ${op_part_read};"
|
|
echo " parts${host_boundary}++;"
|
|
echo ""
|
|
echo " /* copy to the buffer until it is aligned: */"
|
|
fi
|
|
echo " part_buffer = ((${op_const_mem}tme_uint8_t *) &part${host_boundary}_buffer);"
|
|
echo " count_done = count_misaligned;"
|
|
echo " count -= count_misaligned;"
|
|
echo " do {"
|
|
echo " ${op_copy};"
|
|
echo " part_buffer++;"
|
|
echo " buffer++;"
|
|
echo " } while (--count_done != 0);"
|
|
|
|
echo ""
|
|
echo " /* ${op} full ${host_boundary}-bit words with shifting: */"
|
|
echo " bits_misaligned = count_misaligned * 8;"
|
|
if test ${op} = write; then
|
|
op_shift=bits_misaligned
|
|
op_shift_next="(${host_boundary} - bits_misaligned)"
|
|
echo " part${host_boundary} = part${host_boundary}_buffer;"
|
|
else
|
|
op_shift="(${host_boundary} - bits_misaligned)"
|
|
op_shift_next=bits_misaligned
|
|
echo " part${host_boundary}"
|
|
echo " = (TME_ENDIAN_NATIVE == TME_ENDIAN_LITTLE"
|
|
echo " ? (part${host_boundary}_buffer >> ${op_shift_next})"
|
|
echo " : (part${host_boundary}_buffer << ${op_shift_next}));"
|
|
fi
|
|
echo " for (; count >= sizeof(tme_uint${host_boundary}_t); ) {"
|
|
echo " part${host_boundary}_next = ${op_part_read};"
|
|
echo " if (TME_ENDIAN_NATIVE == TME_ENDIAN_LITTLE) {"
|
|
echo " part${host_boundary} |= (part${host_boundary}_next << ${op_shift});"
|
|
echo " ${op_part_write};"
|
|
echo " part${host_boundary} = (part${host_boundary}_next >> ${op_shift_next});"
|
|
echo " }"
|
|
echo " else {"
|
|
echo " part${host_boundary} |= (part${host_boundary}_next >> ${op_shift});"
|
|
echo " ${op_part_write};"
|
|
echo " part${host_boundary} = (part${host_boundary}_next << ${op_shift_next});"
|
|
echo " }"
|
|
echo ""
|
|
echo " /* advance: */"
|
|
echo " parts${host_boundary}++;"
|
|
echo " buffer += sizeof(tme_uint${host_boundary}_t);"
|
|
echo " count -= sizeof(tme_uint${host_boundary}_t);"
|
|
echo " }"
|
|
|
|
echo ""
|
|
echo " /* calculate how many more bytes there are to ${op} in this"
|
|
echo " ${host_boundary}-bit memory part: */"
|
|
echo " count_done = sizeof(tme_uint${host_boundary}_t) - count_misaligned;"
|
|
echo " part${host_boundary}_buffer = part${host_boundary};"
|
|
if test ${op} = write; then
|
|
echo ""
|
|
echo " /* if we can't write one more full ${host_boundary}-bit memory part: */"
|
|
echo " if (count_done > count) {"
|
|
echo ""
|
|
echo " /* we will reread this data to write below: */"
|
|
echo " buffer -= count_misaligned;"
|
|
echo " count += count_misaligned;"
|
|
echo " }"
|
|
echo ""
|
|
echo " /* otherwise, we can write one more full ${host_boundary}-bit memory part: */"
|
|
echo " else {"
|
|
echo ""
|
|
echo " /* copy from the buffer until we have the full ${host_boundary}-bit part: */"
|
|
echo " part_buffer = ((${op_const_mem}tme_uint8_t *) &part${host_boundary}_buffer) + count_misaligned;"
|
|
echo " count -= count_done;"
|
|
echo " do {"
|
|
echo " ${op_copy};"
|
|
echo " part_buffer++;"
|
|
echo " buffer++;"
|
|
echo " } while (--count_done != 0);"
|
|
echo ""
|
|
echo " /* write the last full ${host_boundary}-bit memory part: */"
|
|
echo " part${host_boundary} = part${host_boundary}_buffer;"
|
|
echo " ${op_part_write};"
|
|
echo " }"
|
|
else
|
|
echo ""
|
|
echo " /* copy to the buffer the remaining bytes in this ${host_boundary}-bit part: */"
|
|
echo " if (count_done > count) {"
|
|
echo " count_done = count;"
|
|
echo " }"
|
|
echo " part_buffer = ((${op_const_mem}tme_uint8_t *) &part${host_boundary}_buffer);"
|
|
echo " count -= count_done;"
|
|
echo " do {"
|
|
echo " ${op_copy};"
|
|
echo " part_buffer++;"
|
|
echo " buffer++;"
|
|
echo " } while (--count_done != 0);"
|
|
fi
|
|
echo " }"
|
|
echo " }"
|
|
|
|
echo ""
|
|
echo " /* if we still have bytes to ${op}: */"
|
|
echo " if (__tme_predict_false(count > 0)) {"
|
|
echo ""
|
|
echo " /* we must have less than a full ${host_boundary}-bit part to ${op}: */"
|
|
echo " assert (count < sizeof(tme_uint${host_boundary}_t));"
|
|
if test ${op} = write; then
|
|
echo ""
|
|
echo " /* make a mask that clears for the data to write in the last"
|
|
echo " ${host_boundary}-bit memory part: */"
|
|
echo " part${host_boundary}_mask"
|
|
echo " = (TME_ENDIAN_NATIVE == TME_ENDIAN_LITTLE"
|
|
echo " ? _tme_memory_type_mask(tme_uint${host_boundary}_t, << (count * 8))"
|
|
echo " : _tme_memory_type_mask(tme_uint${host_boundary}_t, >> (count * 8)));"
|
|
echo ""
|
|
echo " /* copy from the buffer the bytes to write in the last"
|
|
echo " ${host_boundary}-bit memory part: */"
|
|
echo " part${host_boundary}_buffer = 0;"
|
|
else
|
|
echo ""
|
|
echo " /* read the last ${host_boundary}-bit memory part: */"
|
|
echo " part${host_boundary}_buffer = ${op_part_read};"
|
|
echo ""
|
|
echo " /* copy to the buffer the bytes to read in the first"
|
|
echo " ${host_boundary}-bit memory part: */"
|
|
fi
|
|
echo " part_buffer = ((${op_const_mem}tme_uint8_t *) &part${host_boundary}_buffer);"
|
|
echo " count_done = count;"
|
|
echo " do {"
|
|
echo " ${op_copy};"
|
|
echo " part_buffer++;"
|
|
echo " buffer++;"
|
|
echo " } while (--count_done != 0);"
|
|
if test ${op} = write; then
|
|
echo ""
|
|
echo " /* compare-and-exchange the last ${host_boundary}-bit memory part: */"
|
|
echo " part${host_boundary} = ${op_part_read};"
|
|
echo " do {"
|
|
echo " part${host_boundary}_cmp = part${host_boundary};"
|
|
echo " part${host_boundary} = (part${host_boundary} & part${host_boundary}_mask) | part${host_boundary}_buffer;"
|
|
echo " part${host_boundary} = tme_memory_atomic_cx${host_boundary}(parts${host_boundary}, part${host_boundary}_cmp, part${host_boundary}, rwlock, sizeof(tme_uint${host_boundary}_t));"
|
|
echo " } while (part${host_boundary} != part${host_boundary}_cmp);"
|
|
fi
|
|
echo " }"
|
|
|
|
# close this host boundary:
|
|
#
|
|
echo ""
|
|
echo " }"
|
|
if test `expr ${host_boundary} \>= ${size_ifdef}` = 1; then
|
|
echo ""
|
|
echo "#endif /* TME_HAVE_INT${host_boundary}_T */"
|
|
fi
|
|
|
|
# advance:
|
|
#
|
|
host_boundary=`expr ${host_boundary} / 2`
|
|
done
|
|
|
|
echo "}"
|
|
done
|
|
|
|
# permute for the different sizes:
|
|
#
|
|
for size in ${sizes}; do
|
|
|
|
if test `expr ${size} \>= ${size_ifdef}` = 1; then
|
|
echo ""
|
|
echo "#ifdef TME_HAVE_INT${size}_T"
|
|
fi
|
|
|
|
# permute for the different types of memory read and write macros.
|
|
# all of these macros work with memory of any alignment, but for
|
|
# best performance they accept the minimum alignment of the memory
|
|
# known at compile time.
|
|
#
|
|
# the "plain" macros are used to read and write memory that is not
|
|
# shared.
|
|
#
|
|
# the "atomic" macros are used to read and write memory that is
|
|
# shared. the total read or write is always atomic.
|
|
#
|
|
# the "bus" macros are used to read and write memory that is
|
|
# shared, with the access split across the given bus boundary into
|
|
# atomic partials.
|
|
#
|
|
for type_user in plain atomic bus; do
|
|
|
|
# permute over read and write:
|
|
#
|
|
for op in read write; do
|
|
|
|
# these macros are for the header file, and there are
|
|
# hand-coded macros for 8-bit values:
|
|
#
|
|
if $header; then :; else continue; fi
|
|
if test ${size} = 8; then continue; fi
|
|
|
|
# characterize this macro:
|
|
#
|
|
type="_${type_user}"
|
|
type_lock=", lock"
|
|
type_part=_atomic
|
|
type_bus_boundary=
|
|
case ${type_user} in
|
|
plain)
|
|
type=
|
|
type_lock=
|
|
type_part=
|
|
;;
|
|
bus)
|
|
type_bus_boundary=", bus_boundary"
|
|
;;
|
|
esac
|
|
if test ${op} = read; then
|
|
op_x=
|
|
op_open=" ( \\"
|
|
op_indent0=" "
|
|
op_indent1=${op_indent0}
|
|
op_then="${op_indent1}? \\"
|
|
op_else_if="${op_indent1}: \\"
|
|
op_else=${op_else_if}
|
|
op_indent2="${op_indent1} "
|
|
op_semi=
|
|
op_close=" )"
|
|
else
|
|
op_x=", x"
|
|
op_open=" do { \\@ if \\"
|
|
op_indent0=" "
|
|
op_indent1=${op_indent0}
|
|
op_then=" { \\"
|
|
op_else_if=" } \\@ else if \\"
|
|
op_else=" } \\@ else \\@${op_then}"
|
|
op_indent2=" "
|
|
op_semi=';'
|
|
op_close=" } \\@ } while (/* CONSTCOND */ 0)"
|
|
fi
|
|
|
|
# start the macro:
|
|
#
|
|
echo ""
|
|
echo "/* the default ${size}-bit memory ${type_user} ${op} macro: */"
|
|
echo "#define tme_memory${type}_${op}${size}(mem${op_x}${type_lock}, align_min${type_bus_boundary}) \\"
|
|
echo "${op_open}" | tr '@' '\n'
|
|
|
|
# dispatch on the macro type:
|
|
#
|
|
case ${type_user} in
|
|
|
|
plain)
|
|
echo "${op_indent0}/* if we know at compile time that the memory is aligned \\"
|
|
echo "${op_indent0} enough to ${op} directly, do the single direct ${op}. \\"
|
|
echo "${op_indent0}\\"
|
|
echo "${op_indent0} otherwise, if we know at compile time that the memory \\"
|
|
echo "${op_indent0} is less aligned than the smallest acceptable parts size, \\"
|
|
echo "${op_indent0} test if the memory is aligned enough to ${op} directly, \\"
|
|
echo "${op_indent0} and do the single direct ${op} if it is: */ \\"
|
|
echo "${op_indent0}(__tme_predict_true((_TME_ALIGNOF_INT${size}_T == 1 \\"
|
|
echo "${op_indent0} || (align_min) >= _TME_ALIGNOF_INT${size}_T) \\"
|
|
echo "${op_indent0} || ((align_min) < TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint${size}_t) \\"
|
|
echo "${op_indent0} && _tme_memory_address_test(mem, _TME_ALIGNOF_INT${size}_T - 1, align_min) == 0))) \\"
|
|
echo "${op_then}"
|
|
echo "${op_indent2}_tme_memory_${op}(tme_uint${size}_t, tme_uint${size}_t, mem, 0${op_x})${op_semi} \\"
|
|
|
|
# loop over the possible part sizes:
|
|
#
|
|
size_part=8
|
|
while test ${size_part} != ${size}; do
|
|
|
|
# at this point, we know that the memory is at
|
|
# least aligned to the part size:
|
|
#
|
|
|
|
# if this is the last possible part size:
|
|
#
|
|
if test `expr ${size_part} \* 2` = ${size}; then
|
|
|
|
# we will just access all parts of this size:
|
|
#
|
|
echo "${op_else}" | tr '@' '\n'
|
|
|
|
# otherwise, this is not the last possible part
|
|
# size:
|
|
#
|
|
else
|
|
|
|
# if we know at compile time that accessing
|
|
# all parts of this size is acceptable over
|
|
# further testing of the address, and the
|
|
# memory is not more aligned than this part
|
|
# size, we will just access all parts of this
|
|
# size:
|
|
#
|
|
echo "${op_else_if}" | tr '@' '\n'
|
|
echo "${op_indent1}((TME_MEMORY_ALIGNMENT_ACCEPT(tme_uint${size}_t) <= sizeof(tme_uint${size_part}_t)) \\"
|
|
echo "${op_indent1} && ((align_min) <= sizeof(tme_uint${size_part}_t))) \\"
|
|
echo "${op_then}"
|
|
fi
|
|
|
|
# we always emit one set of partial accesses, all
|
|
# of this part size.
|
|
#
|
|
# then, if this isn't the last possible part size,
|
|
# we test if the address is aligned only to this
|
|
# part size. if it is, we emit a second set of partial
|
|
# accesses, to transfer this part size, some number
|
|
# of parts of the next part size, and one part of this
|
|
# part size:
|
|
#
|
|
size_now=${size_part}
|
|
misaligned=false
|
|
while true; do
|
|
size_done=0
|
|
while test ${size_done} != ${size}; do
|
|
|
|
# emit one partial transfer:
|
|
#
|
|
echo -n "${op_indent2}"
|
|
op_delim=${op_semi}
|
|
if test ${op} = read; then
|
|
if test ${size_done} = 0; then echo -n '('; else echo -n ' | '; fi
|
|
if test `expr ${size_done} + ${size_now}` = ${size}; then op_delim=')'; fi
|
|
fi
|
|
echo "_tme_memory${type}_${op}(tme_uint${size}_t, tme_uint${size_now}_t, mem, (${size_done} / 8)${op_x})${op_delim} \\"
|
|
|
|
# advance:
|
|
#
|
|
size_done=`expr ${size_done} + ${size_now}`
|
|
size_now=`expr ${size} - ${size_done}`
|
|
if test `expr ${size_now} \> ${size_part}` = 1; then
|
|
size_now=${size_part}
|
|
fi
|
|
done
|
|
|
|
# if we have already done the misaligned set,
|
|
# stop now:
|
|
#
|
|
if ${misaligned}; then
|
|
break
|
|
fi
|
|
misaligned=true
|
|
|
|
# advance to test and do the misaligned set:
|
|
#
|
|
size_now=${size_part}
|
|
size_part=`expr ${size_part} \* 2`
|
|
if test ${size_part} = ${size}; then
|
|
break
|
|
fi
|
|
echo "${op_else_if}" | tr '@' '\n'
|
|
echo "${op_indent1}(_tme_memory_address_test(mem, sizeof(tme_uint${size_now}_t), align_min) != 0) \\"
|
|
echo "${op_then}"
|
|
done
|
|
done
|
|
;;
|
|
|
|
atomic)
|
|
echo "${op_indent0}/* if threads are cooperative, do a plain ${op}: */ \\"
|
|
echo "${op_indent0}(TME_THREADS_COOPERATIVE) \\"
|
|
echo "${op_then}"
|
|
echo -n "${op_indent2}tme_memory_${op}${size}("
|
|
# this strips off the tme_shared qualifier:
|
|
#
|
|
if test ${op} = read; then
|
|
echo -n "(_tme_const tme_uint${size}_t *) _tme_audit_type(mem, tme_uint${size}_t *)"
|
|
else
|
|
echo -n "(tme_uint${size}_t *) _tme_cast_pointer_shared(tme_uint${size}_t *, tme_uint${size}_t *, mem)"
|
|
fi
|
|
echo "${op_x}, align_min)${op_semi} \\"
|
|
|
|
echo "${op_indent1}/* otherwise, if we aren't locking for all memory accesses, and we can \\"
|
|
echo "${op_indent1} make direct ${size}-bit accesses, and this memory is aligned \\"
|
|
echo "${op_indent1} enough to make a single direct atomic access, do the single \\"
|
|
echo "${op_indent1} direct atomic ${op}: */ \\"
|
|
echo "${op_else_if}" | tr '@' '\n'
|
|
echo "${op_indent1}(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \\"
|
|
echo "${op_indent1} && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint${size}_t) != 0 \\"
|
|
echo "${op_indent1} && _tme_memory_address_test(mem, TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint${size}_t) - 1, align_min) == 0)) \\"
|
|
echo "${op_then}"
|
|
echo "${op_indent2}(*_tme_audit_type(mem, tme_uint${size}_t *)) \\"
|
|
if test ${op} = write; then
|
|
echo "${op_indent2} = (x); \\"
|
|
fi
|
|
echo "${op_indent1}/* otherwise, we must do a slow indirect atomic ${op}: */ \\"
|
|
echo "${op_else}" | tr '@' '\n'
|
|
echo "${op_indent2}tme_memory${type}_${op}${size}(mem${op_x}${type_lock}, align_min)${op_semi} \\"
|
|
;;
|
|
|
|
bus)
|
|
echo "${op_indent0}/* if threads are cooperative, do a plain ${op}: */ \\"
|
|
echo "${op_indent0}(TME_THREADS_COOPERATIVE) \\"
|
|
echo "${op_then}"
|
|
echo -n "${op_indent2}tme_memory_${op}${size}("
|
|
# this strips off the tme_shared qualifier:
|
|
#
|
|
if test ${op} = read; then
|
|
echo -n "(_tme_const tme_uint${size}_t *) _tme_audit_type(mem, tme_uint${size}_t *)"
|
|
else
|
|
echo -n "(tme_uint${size}_t *) _tme_cast_pointer_shared(tme_uint${size}_t *, tme_uint${size}_t *, mem)"
|
|
fi
|
|
echo "${op_x}, align_min)${op_semi} \\"
|
|
|
|
echo "${op_indent1}/* otherwise, if we aren't locking for all memory accesses, the \\"
|
|
echo "${op_indent1} host supports misaligned ${size}-bit accesses, the host's bus \\"
|
|
echo "${op_indent1} boundary is greater than or equal to the emulated bus \\"
|
|
echo "${op_indent1} boundary, and this memory is aligned enough, do a single \\"
|
|
echo "${op_indent1} direct bus ${op}: */ \\"
|
|
echo "${op_else_if}" | tr '@' '\n'
|
|
echo "${op_indent1}(__tme_predict_true(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) != 0 \\"
|
|
echo "${op_indent1} && _TME_ALIGNOF_INT${size}_T < sizeof(tme_uint${size}_t) \\"
|
|
echo "${op_indent1} && TME_MEMORY_BUS_BOUNDARY >= (bus_boundary) \\"
|
|
echo "${op_indent1} && _tme_memory_address_test(mem, _TME_ALIGNOF_INT${size}_T - 1, align_min) == 0)) \\"
|
|
echo "${op_then}"
|
|
echo "${op_indent2}(*_tme_audit_type(mem, tme_uint${size}_t *)) \\"
|
|
if test ${op} = write; then
|
|
echo "${op_indent2} = (x); \\"
|
|
fi
|
|
|
|
echo "${op_indent1}/* otherwise, if we're locking for all memory accesses, or \\"
|
|
echo "${op_indent1} if this memory must cross at least one host bus boundary \\"
|
|
echo "${op_indent1} and the host bus boundary is less than the emulated bus \\"
|
|
echo "${op_indent1} boundary, do a slow indirect atomic ${op}: */ \\"
|
|
echo "${op_else_if}" | tr '@' '\n'
|
|
echo "${op_indent1}(__tme_predict_false(TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0 \\"
|
|
echo "${op_indent1} || (sizeof(tme_uint${size}_t) > TME_MEMORY_BUS_BOUNDARY \\"
|
|
echo "${op_indent1} && TME_MEMORY_BUS_BOUNDARY < (bus_boundary)))) \\"
|
|
echo "${op_then}"
|
|
echo "${op_indent2}tme_memory_atomic_${op}${size}(mem${op_x}, lock, align_min)${op_semi} \\"
|
|
|
|
echo "${op_indent1}/* otherwise, if the memory is not larger than the emulated \\"
|
|
echo "${op_indent1} bus boundary, or if size-alignment would mean an atomic \\"
|
|
echo "${op_indent1} host access and it is size-aligned, do a single atomic \\"
|
|
echo "${op_indent1} ${op}, which may be direct or slow: */ \\"
|
|
echo "${op_else_if}" | tr '@' '\n'
|
|
echo "${op_indent1}(__tme_predict_true((sizeof(tme_uint${size}_t) <= (bus_boundary) \\"
|
|
echo "${op_indent1} || (TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint${size}_t) != 0 \\"
|
|
echo "${op_indent1} && TME_MEMORY_ALIGNMENT_ATOMIC(tme_uint${size}_t) <= sizeof(tme_uint${size}_t))) \\"
|
|
echo "${op_indent1} && _tme_memory_address_test(mem, sizeof(tme_uint${size}_t) - 1, align_min) == 0)) \\"
|
|
echo "${op_then}"
|
|
echo "${op_indent2}tme_memory_atomic_${op}${size}(mem${op_x}, lock, sizeof(tme_uint${size}_t))${op_semi} \\"
|
|
echo "${op_indent1}/* otherwise, we must do a slow bus ${op}: */ \\"
|
|
echo "${op_else}" | tr '@' '\n'
|
|
echo "${op_indent2}tme_memory${type}_${op}${size}(mem${op_x}${type_lock}, align_min, bus_boundary)${op_semi} \\"
|
|
;;
|
|
|
|
esac
|
|
|
|
# close this macro:
|
|
#
|
|
echo "${op_close}" | tr '@' '\n'
|
|
done
|
|
done
|
|
|
|
echo ""
|
|
echo "/* the ${size}-bit atomic operations: */"
|
|
|
|
# the atomic operations. NB that cx, read, and write are
|
|
# deliberately at the end and in that order, to allow all earlier
|
|
# default implementations to still use any host CPU-specific cx,
|
|
# read and write macros:
|
|
#
|
|
for op in add sub mul div and or xor not neg xchg cx read write; do
|
|
|
|
# dispatch on the operation. NB that we don't need to
|
|
# generate 8-bit atomic read and write operations:
|
|
#
|
|
op_rval="tme_uint${size}_t"
|
|
op_const=
|
|
op_proto_operand=", tme_uint${size}_t"
|
|
op_operand=operand
|
|
op_from=to
|
|
op_operator=
|
|
op_operation=
|
|
op_indent=
|
|
case ${op} in
|
|
read)
|
|
if test ${size} = 8; then continue; fi
|
|
op_const="_tme_const "
|
|
op_from=from
|
|
op_proto_operand=
|
|
;;
|
|
write)
|
|
if test ${size} = 8; then continue; fi
|
|
op_rval=void
|
|
op_operand=value_written
|
|
;;
|
|
cx)
|
|
op_proto_operand=", tme_uint${size}_t, tme_uint${size}_t"
|
|
op_operand=value_written
|
|
op_indent=" "
|
|
;;
|
|
add) op_operator='+' ;;
|
|
sub) op_operator='-' ;;
|
|
mul) op_operator='*' ;;
|
|
div) op_operator='/' ;;
|
|
and) op_operator='&' ;;
|
|
or) op_operator='|' ;;
|
|
xor) op_operator='^' ;;
|
|
not) op_operation='~value_read' ; op_proto_operand= ;;
|
|
neg) op_operation='0 - value_read' ; op_proto_operand= ;;
|
|
xchg) op_operand=value_written
|
|
esac
|
|
if test "x${op_operator}" != x; then
|
|
op_operation="value_read ${op_operator} ${op_operand}"
|
|
fi
|
|
|
|
# if we're making the header, just emit a prototype:
|
|
#
|
|
if $header; then
|
|
echo "${op_rval} tme_memory_atomic_${op}${size} _TME_P((${op_const}tme_shared tme_uint${size}_t *${op_proto_operand}, tme_rwlock_t *, unsigned int));"
|
|
continue
|
|
fi
|
|
|
|
echo ""
|
|
echo "/* undefine any macro version of tme_memory_atomic_${op}${size}: */"
|
|
echo "#undef tme_memory_atomic_${op}${size}"
|
|
echo ""
|
|
echo "/* the ${size}-bit atomic ${op} function: */"
|
|
echo "${op_rval}"
|
|
echo "tme_memory_atomic_${op}${size}(${op_const}tme_shared tme_uint${size}_t *memory,"
|
|
if test ${op} = cx; then
|
|
echo " tme_uint${size}_t value_cmp,"
|
|
fi
|
|
if test "x${op_proto_operand}" != x; then
|
|
echo " tme_uint${size}_t ${op_operand},"
|
|
fi
|
|
echo " tme_rwlock_t *rwlock,"
|
|
echo " unsigned int align_min)"
|
|
echo "{"
|
|
if test ${op} != write; then
|
|
echo " tme_uint${size}_t value_read;"
|
|
fi
|
|
if test "x${op_operation}" != x; then
|
|
echo " tme_uint${size}_t value_written;"
|
|
fi
|
|
if test ${op} = read || test ${op} = write || test ${op} = cx; then :; else
|
|
echo " tme_uint${size}_t value_read_verify;"
|
|
fi
|
|
echo ""
|
|
echo " /* if we can't make direct accesses at all, all atomic"
|
|
echo " accesses must be done under lock. (when threads are"
|
|
echo " cooperative the actual locking isn't needed): */"
|
|
echo " if (TME_MEMORY_ALIGNMENT_ATOMIC(TME_MEMORY_TYPE_COMMON) == 0) {"
|
|
echo " if (!TME_THREADS_COOPERATIVE) {"
|
|
if test ${op} = read; then
|
|
echo " tme_rwlock_rdlock(rwlock);"
|
|
else
|
|
echo " tme_rwlock_wrlock(rwlock);"
|
|
fi
|
|
echo " }"
|
|
if test ${op} != write; then
|
|
echo " value_read = tme_memory_read${size}((_tme_const tme_uint${size}_t *) memory, align_min);"
|
|
fi
|
|
if test ${op} = cx; then
|
|
echo " if (value_read == value_cmp) {"
|
|
fi
|
|
if test "x${op_operation}" != x; then
|
|
echo " value_written = ${op_operation};"
|
|
fi
|
|
if test ${op} != read; then
|
|
echo "${op_indent} tme_memory_write${size}((tme_uint${size}_t *) memory, value_written, align_min);"
|
|
fi
|
|
if test "x${op_indent}" != x; then
|
|
echo " }"
|
|
fi
|
|
echo " if (!TME_THREADS_COOPERATIVE) {"
|
|
echo " tme_rwlock_unlock(rwlock);"
|
|
echo " }"
|
|
echo " }"
|
|
echo ""
|
|
echo " /* otherwise, threads are not cooperative and this host CPU"
|
|
echo " can make atomic accesses to at least the most common memory"
|
|
echo " size."
|
|
echo ""
|
|
echo " in that case, the only reason this function should get"
|
|
echo " called is if the host CPU can't do an atomic ${size}-bit"
|
|
echo " ${op} at all, or if it can't do it at this alignment."
|
|
if test ${op} = read || test ${op} = write || test ${op} = cx; then
|
|
echo ""
|
|
echo " we assume that these problematic atomic ${op}s are rare,"
|
|
echo " and to emulate them we simply stop all other threads while"
|
|
echo " doing the ${op}: */"
|
|
echo " else {"
|
|
echo " tme_thread_suspend_others();"
|
|
if test ${op} != write; then
|
|
echo " value_read = tme_memory_read${size}((_tme_const tme_uint${size}_t *) memory, align_min);"
|
|
fi
|
|
if test ${op} = cx; then
|
|
echo " if (value_read == value_cmp) {"
|
|
fi
|
|
if test ${op} != read; then
|
|
echo "${op_indent} tme_memory_write${size}((tme_uint${size}_t *) memory, value_written, align_min);"
|
|
fi
|
|
if test "x${op_indent}" != x; then
|
|
echo " }"
|
|
fi
|
|
echo " tme_thread_resume_others();"
|
|
echo " }"
|
|
else
|
|
echo ""
|
|
echo " we emulate the atomic ${size}-bit ${op} with a compare-and-exchange: */"
|
|
echo " else {"
|
|
echo ""
|
|
echo " /* do an atomic read of the memory: */"
|
|
echo " value_read = tme_memory_atomic_read${size}(memory, rwlock, align_min);"
|
|
echo ""
|
|
echo " /* spin the ${op} in a compare-and-exchange loop: */"
|
|
echo " for (;;) {"
|
|
if test "x${op_operation}" != x; then
|
|
echo ""
|
|
echo " /* make the value to write: */"
|
|
echo " value_written = ${op_operation};"
|
|
fi
|
|
echo ""
|
|
echo " /* try the compare-and-exchange: */"
|
|
echo " value_read_verify = tme_memory_atomic_cx${size}(memory, value_read, value_written, rwlock, align_min);"
|
|
echo ""
|
|
echo " /* if the compare-and-exchange failed: */"
|
|
echo " if (__tme_predict_false(value_read_verify != value_read)) {"
|
|
echo ""
|
|
echo " /* loop with the new value read from the memory: */"
|
|
echo " value_read = value_read_verify;"
|
|
echo " continue;"
|
|
echo " }"
|
|
echo ""
|
|
echo " /* stop now: */"
|
|
echo " break;"
|
|
echo " }"
|
|
echo " }"
|
|
fi
|
|
if test ${op} != write; then
|
|
echo ""
|
|
echo " /* return the value read: */"
|
|
echo " return (value_read);"
|
|
fi
|
|
echo "}"
|
|
done
|
|
|
|
if test `expr ${size} \>= ${size_ifdef}` = 1; then
|
|
echo ""
|
|
echo "#endif /* TME_HAVE_INT${size}_T */"
|
|
fi
|
|
|
|
done
|
|
|
|
# done:
|
|
#
|
|
exit 0
|