mirror of
https://github.com/phabrics/Run-Sun3-SunOS-4.1.1.git
synced 2026-04-29 19:12:58 -04:00
297 lines
10 KiB
ArmAsm
297 lines
10 KiB
ArmAsm
/* $Id: rc-x86-subs-asm.S,v 1.2 2009/09/07 15:25:23 fredette Exp $ */
|
|
|
|
/* libtme/host/x86/rc-x86-subs-asm.S - hand-coded x86 host recode subs: */
|
|
|
|
/*
|
|
* Copyright (c) 2007 Matt Fredette
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by Matt Fredette.
|
|
* 4. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
|
|
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
.text
|
|
|
|
.ascii "$Id: rc-x86-subs-asm.S,v 1.2 2009/09/07 15:25:23 fredette Exp $"
|
|
|
|
# concatenation:
|
|
#
|
|
#if ((defined(__STDC__) || defined(__cplusplus) || defined(c_plusplus)) && !defined(UNIXCPP)) || defined(ANSICPP)
|
|
#define __TME_CONCAT(a,b) a ## b
|
|
#define _TME_CONCAT(a,b) __TME_CONCAT(a,b)
|
|
#else
|
|
#define _TME_CONCAT(a,b) a/**/b
|
|
#endif
|
|
|
|
#ifdef __x86_64__
|
|
#define TME_RECODE_SIZE_HOST 6
|
|
#define TME_RECODE_BITS_HOST(x) _TME_CONCAT(x, 64)
|
|
#define TME_RECODE_BITS_DOUBLE_HOST(x) _TME_CONCAT(x, 128)
|
|
#define TME_RECODE_X86_OPN(x) _TME_CONCAT(x, q)
|
|
#define TME_RECODE_X86_REGN(x) _TME_CONCAT(%r, x)
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_DST_N %r12
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_DST_L %r12d
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_SRC1_N %rbp
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_SRC1_L %ebp
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_SRC1_W %bp
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_SRC1_B %bpl
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_SRC1_P1_N %rax
|
|
#else /* !__x86_64__ */
|
|
#define TME_RECODE_SIZE_HOST 5
|
|
#define TME_RECODE_BITS_HOST(x) _TME_CONCAT(x, 32)
|
|
#define TME_RECODE_BITS_DOUBLE_HOST(x) _TME_CONCAT(x, 64)
|
|
#define TME_RECODE_X86_OPN(x) _TME_CONCAT(x, l)
|
|
#define TME_RECODE_X86_REGN(x) _TME_CONCAT(%e, x)
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_DST_N %edi
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_DST_L %edi
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_SRC1_N %ebp
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_SRC1_L %ebp
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_SRC1_W %bp
|
|
#define TME_RECODE_X86_REG_HOST_SUBS_SRC1_P1_N %eax
|
|
#endif /* !__x86_64__ */
|
|
|
|
# this macro does a double-host-size shift:
|
|
#
|
|
.macro tme_recode_x86_double_shift name shift shiftd reg_first reg_second arith=0
|
|
.align 16
|
|
.globl \name
|
|
\name:
|
|
|
|
# branch if the most-significant half of the shift count in
|
|
# src1 is nonzero, swapping the least-significant half of the
|
|
# shift count in src1 with the scratch c register in the
|
|
# meantime. we will swap them back after the shift.
|
|
#
|
|
# NB: we do the swaps with the c register instead of just
|
|
# overwriting it, to cooperate with subs that keep the
|
|
# most-significant half of src0 in the c register:
|
|
#
|
|
TME_RECODE_X86_OPN(test) TME_RECODE_X86_REG_HOST_SUBS_SRC1_P1_N, TME_RECODE_X86_REG_HOST_SUBS_SRC1_P1_N
|
|
TME_RECODE_X86_OPN(xchg) TME_RECODE_X86_REG_HOST_SUBS_SRC1_N, TME_RECODE_X86_REGN(cx)
|
|
jnz .L_double_shift_2_\@
|
|
|
|
# branch if the shift count is greater than the host size:
|
|
#
|
|
TME_RECODE_X86_OPN(cmp) $(1 << TME_RECODE_SIZE_HOST), TME_RECODE_X86_REGN(cx)
|
|
jae .L_double_shift_1_\@
|
|
|
|
# do the double-precision shift:
|
|
#
|
|
\shiftd %cl, \reg_second, \reg_first
|
|
\shift %cl, \reg_second
|
|
TME_RECODE_X86_OPN(xchg) TME_RECODE_X86_REG_HOST_SUBS_SRC1_N, TME_RECODE_X86_REGN(cx)
|
|
ret
|
|
|
|
.L_double_shift_1_\@:
|
|
|
|
# branch if the shift count is greater than or equal to the
|
|
# double-host size:
|
|
#
|
|
TME_RECODE_X86_OPN(cmp) $2*(1 << TME_RECODE_SIZE_HOST), TME_RECODE_X86_REGN(cx)
|
|
jae .L_double_shift_2_\@
|
|
|
|
# first do a host-size shift by doing a register move and
|
|
# then a clear for a logical shift, or a copy of the most
|
|
# significant bit of the second register down into all other
|
|
# bits:
|
|
#
|
|
TME_RECODE_X86_OPN(mov) \reg_second, \reg_first
|
|
.if \arith
|
|
TME_RECODE_X86_OPN(shl) $1, \reg_second
|
|
TME_RECODE_X86_OPN(sbb) \reg_second, \reg_second
|
|
.else
|
|
TME_RECODE_X86_OPN(xor) \reg_second, \reg_second
|
|
.endif
|
|
|
|
# do the remainder of the shift as a host-size shift (which
|
|
# masks the count in %cl to the host size):
|
|
#
|
|
\shift %cl, \reg_first
|
|
TME_RECODE_X86_OPN(xchg) TME_RECODE_X86_REG_HOST_SUBS_SRC1_N, TME_RECODE_X86_REGN(cx)
|
|
ret
|
|
|
|
# the shift count is greater than the double-host-size. for a
|
|
# logical shift, clear both registers. for an arithmetic shift,
|
|
# copy the most significant bit of the second register down into
|
|
# all bits in both registers:
|
|
#
|
|
.L_double_shift_2_\@:
|
|
.if \arith
|
|
TME_RECODE_X86_OPN(shl) $1, \reg_second
|
|
TME_RECODE_X86_OPN(sbb) \reg_second, \reg_second
|
|
TME_RECODE_X86_OPN(mov) \reg_second, \reg_first
|
|
.else
|
|
TME_RECODE_X86_OPN(xor) \reg_first, \reg_first
|
|
TME_RECODE_X86_OPN(xor) \reg_second, \reg_second
|
|
.endif
|
|
TME_RECODE_X86_OPN(xchg) TME_RECODE_X86_REG_HOST_SUBS_SRC1_N, TME_RECODE_X86_REGN(cx)
|
|
ret
|
|
.endm
|
|
|
|
# this macro does a host-size or smaller shift:
|
|
#
|
|
.macro tme_recode_x86_shift insn size shift
|
|
.align 16
|
|
.globl tme_recode_x86_\insn\size
|
|
tme_recode_x86_\insn\size:
|
|
|
|
# _if this is a right shift smaller than host size, first
|
|
# zero-extend or sign-extend the destination to host size,
|
|
# so we can do a 32-bit shift, or branch to
|
|
# _tme_recode_x86_shift_arithmetic_all (which assumes that
|
|
# the destination is host size):
|
|
#
|
|
.ifnc \insn,shll
|
|
.if \size < TME_RECODE_BITS_HOST(/**/)
|
|
|
|
# _if this is an 8-bit shift on an ia32 host, we can't encode
|
|
# %dil for a movzbl or movsbl, so we do an and for a movzbl and
|
|
# a movsbl through the c register:
|
|
#
|
|
.ifeq (TME_RECODE_BITS_HOST(/**/) - 32) | (\size - 8)
|
|
.ifc \insn,shra
|
|
movl %edi, %ecx
|
|
movsbl %cl, %edi
|
|
.else
|
|
andl $0xff, %edi
|
|
.endif
|
|
.else
|
|
|
|
# otherwise, emit a movz or movs instruction to extend
|
|
# the destination in TME_RECODE_X86_REG_HOST_SUBS_DST:
|
|
#
|
|
.ifeq \size - 32
|
|
# the x86_64 32-bit extensions are different:
|
|
#
|
|
.ifc \insn,shra
|
|
movslq TME_RECODE_X86_REG_HOST_SUBS_DST_L, TME_RECODE_X86_REG_HOST_SUBS_DST_N
|
|
.else
|
|
movl TME_RECODE_X86_REG_HOST_SUBS_DST_L, TME_RECODE_X86_REG_HOST_SUBS_DST_L
|
|
.endif
|
|
.else
|
|
#ifdef __x86_64__
|
|
.byte 0x48 + (1 << 0) + (1 << 2) # TME_RECODE_X86_REX_R(TME_RECODE_SIZE_64, %r12) + TME_RECODE_X86_REX_B(0, %r12)
|
|
#endif /* __x86_64__ */
|
|
.byte 0x0f # TME_RECODE_X86_OPCODE_ESC_0F
|
|
.ifc \insn,shra
|
|
.byte 0xbf - ((\size / 8) & 1) # TME_RECODE_X86_OPCODE0F_MOVS_Ew_Gv or TME_RECODE_X86_OPCODE0F_MOVS_Eb_Gv
|
|
.else
|
|
.byte 0xb7 - ((\size / 8) & 1) # TME_RECODE_X86_OPCODE0F_MOVZ_Ew_Gv or TME_RECODE_X86_OPCODE0F_MOVZ_Eb_Gv
|
|
.endif
|
|
#ifdef __x86_64__
|
|
.byte (0xc0 + (12 % 8)) + ((12 % 8) << 3) # %r12, %r12
|
|
#else /* !__x86_64__ */
|
|
.byte (0xc0 + 7) + (7 << 3) # %edi, %edi
|
|
#endif /* __x86_64__ */
|
|
.endif
|
|
.endif
|
|
.endif
|
|
.endif
|
|
|
|
# compare the shift count in TME_RECODE_X86_REG_HOST_SUBS_SRC1
|
|
# to the size:
|
|
#
|
|
.ifeq \size - 64
|
|
cmpq $\size, TME_RECODE_X86_REG_HOST_SUBS_SRC1_N
|
|
.else
|
|
.ifeq \size - 32
|
|
cmpl $\size, TME_RECODE_X86_REG_HOST_SUBS_SRC1_L
|
|
.else
|
|
.ifeq \size - 16
|
|
cmpw $\size, TME_RECODE_X86_REG_HOST_SUBS_SRC1_W
|
|
.else
|
|
.ifeq \size - 8
|
|
#ifdef TME_RECODE_X86_REG_HOST_SUBS_SRC1_B
|
|
cmpb $\size, TME_RECODE_X86_REG_HOST_SUBS_SRC1_B
|
|
#else
|
|
movl TME_RECODE_X86_REG_HOST_SUBS_SRC1_L, %ecx
|
|
cmpb $\size, %cl
|
|
#endif
|
|
.endif
|
|
.endif
|
|
.endif
|
|
.endif
|
|
|
|
# put the shift count into the c register. this has already
|
|
# been done if this is an 8-bit shift on an ia32 host:
|
|
#
|
|
.ifne (TME_RECODE_BITS_HOST(/**/) - 32) | (\size - 8)
|
|
movl TME_RECODE_X86_REG_HOST_SUBS_SRC1_L, %ecx
|
|
.endif
|
|
|
|
# _if the shift count is greater than or equal to the size,
|
|
# for an arithmetic shift copy the most-significant bit down
|
|
# into all other bits, otherwise do a clear:
|
|
#
|
|
.ifc \insn,shra
|
|
jae _tme_recode_x86_shift_arithmetic_all
|
|
.else
|
|
jae _tme_recode_x86_shift_logical_all
|
|
.endif
|
|
|
|
# otherwise, do the shift:
|
|
#
|
|
.ifeq (\size - 64)
|
|
\shift %cl, TME_RECODE_X86_REG_HOST_SUBS_DST_N
|
|
.else
|
|
\shift %cl, TME_RECODE_X86_REG_HOST_SUBS_DST_L
|
|
.endif
|
|
ret
|
|
.endm
|
|
|
|
# the shifts:
|
|
#
|
|
#ifdef __x86_64__
|
|
tme_recode_x86_double_shift tme_recode_x86_shll128 shlq shldq %r13 %r12
|
|
tme_recode_x86_double_shift tme_recode_x86_shrl128 shrq shrdq %r12 %r13
|
|
tme_recode_x86_double_shift tme_recode_x86_shra128 sarq shrdq %r12 %r13 1
|
|
tme_recode_x86_shift shll 64 shlq
|
|
tme_recode_x86_shift shrl 64 shrq
|
|
tme_recode_x86_shift shra 64 sarq
|
|
#else /* !__x86_64__ */
|
|
tme_recode_x86_double_shift tme_recode_x86_shll64 shll shldl %esi %edi
|
|
tme_recode_x86_double_shift tme_recode_x86_shrl64 shrl shrdl %edi %esi
|
|
tme_recode_x86_double_shift tme_recode_x86_shra64 sarl shrdl %edi %esi 1
|
|
#endif /* !__x86_64__ */
|
|
tme_recode_x86_shift shll 32 shll
|
|
tme_recode_x86_shift shrl 32 shrl
|
|
tme_recode_x86_shift shra 32 sarl
|
|
tme_recode_x86_shift shll 16 shll
|
|
tme_recode_x86_shift shrl 16 shrl
|
|
tme_recode_x86_shift shra 16 sarl
|
|
tme_recode_x86_shift shll 8 shll
|
|
tme_recode_x86_shift shrl 8 shrl
|
|
tme_recode_x86_shift shra 8 sarl
|
|
|
|
_tme_recode_x86_shift_arithmetic_all:
|
|
TME_RECODE_X86_OPN(add) TME_RECODE_X86_REG_HOST_SUBS_DST_N, TME_RECODE_X86_REG_HOST_SUBS_DST_N
|
|
TME_RECODE_X86_OPN(sbb) TME_RECODE_X86_REG_HOST_SUBS_DST_N, TME_RECODE_X86_REG_HOST_SUBS_DST_N
|
|
ret
|
|
|
|
_tme_recode_x86_shift_logical_all:
|
|
TME_RECODE_X86_OPN(xor) TME_RECODE_X86_REG_HOST_SUBS_DST_N, TME_RECODE_X86_REG_HOST_SUBS_DST_N
|
|
ret
|