*******************************************************************************
* *
* SUBROUTINE #2 *
* 03/06/09 (dkc) *
* *
* This C64 subroutine performs the following function; *
* *
* div64_32(K, T, 3); // k/3 *
* U[0]=T[0]; *
* U[1]=T[1]; *
* add64(T, U); *
* add64(T, U); // (k/3)*3 *
* sub64(K, U); // k-(k/3)*3 *
* if ((U[0]==0)&&(U[1]==0)) *
* goto askip; *
* *
* The calling sequence of the subroutine is; *
* *
* K[0]=>a4 *
* K[1]=>b4 *
* address of product=>a6 *
* *
*******************************************************************************
.global _subr2
.global _mul64_64
.text
_subr2:
b.s2 _mul64_64
|| mvkl.s1 0x55555555, a6 ; load 1/3
|| mv.l1 a6, a8 ; save address of product
|| addab.d1 a6, 0, a17 ; save address of product
mvkh.s1 0x55555555, a6 ; load 1/3
|| mvkl.s2 0x55555556, b6 ; load 1/3
|| mv.l1 a4, a16 ; save K[0]
|| mv.l2 b4, b16 ; save K[1]
mvkh.s2 0x55555556, b6 ; load 1/3
mvkl.s2 askip, b3
|| mv.l2 b3, b17 ; save return address
mvkh.s2 askip, b3
nop
*
askip ldw.d1 *+a17[1], a0 ; load A[1]
ldw.d1 *a17, b6 ; load A[0]
nop 2
zero.s1 a1 ; load 0
addu.l1 a1:a0, a0, a1:a0 ; A[1] + B[1]
|| mv.s1 a0, a2 ; save A[1]
and.l1 a1, 1, a3 ; isolate carry bit
|| zero.s1 a1 ; load 0
|| add.l2 b6, b6, b7 ; A[0] + B[0] - carry
addu.l1 a1:a0, a2, a1:a0 ; A[1] + B[1]
|| add.l2 b7, b6, b7 ; A[0] + B[0] - carry
|| mpy.m1 a9, 0, a9 ; load 0
add.s2x b7, a3, b6 ; A[0] + B[0] - carry
|| and.l1 a1, 1, a1 ; isolate carry bit
|| not.s1 a0, a8 ; not(B[1])
|| mv.d1x b16, a0 ; load K[1]
add.l2x b6, a1, b6 ; A[0] + B[0]
|| mv.s1 a16, a6 ; load K[0]
|| add.l1 a9:a8, 1, a9:a8 ; not(B[1]) + 1
not.l2 b6, b6 ; not(B[0])
|| and.l1 a9, 1, a9 ; isolate carry bit
|| zero.s1 a1 ; load 0
|| b.s2 b17 ; return
addu.l1 a1:a0, a8, a1:a0 ; A[1] - B[1]
|| add.l2x b6, a9, b6 ; -B[0]::B[1]
and.s1 a1, 1, a1 ; isolate carry bit
|| add.l2x b6, a6, b6 ; A[0] - B[0] - carry
|| cmpeq.l1 a0, 0, a2 ; compare to 0
add.l1x b6, a1, a1 ; A[0] - B[0]
cmpeq.l1 a1, 0, a1 ; compare to 0
and.l1 a1, a2, a4 ; "and" comparisons
.end