﻿ C driver and C64 assembly language
```/*****************************************************************************/
/*									     */
/*  FACTOR (a**p + b**p)/(a + b)					     */
/*  11/03/06 (dkc)							     */
/*									     */
/*  This C program finds a and b such that (a**p + b**p)/(a + b) is a cube   */
/*  or p times a cube.	The "weak" Furtwangler conditions must be fulfilled. */
/*  p is set to 3.                    				             */
/*									     */
/*  Note:  The maximum value of d**2-d*e+e**2 is (3/4)*d**2 if d is even or  */
/*  (3/4)*(d**2-1)+1 if d is odd.					     */
/*									     */
/*****************************************************************************/
#include <math.h>
void sum(unsigned int *addend, unsigned int *augend);
void bigprod(unsigned int a, unsigned int b, unsigned int c, unsigned int *p);
void quotient(unsigned int *a, unsigned int *b, unsigned int);
void midprod(unsigned int a, unsigned int b, unsigned int *output);
unsigned int dloop(unsigned int a, unsigned int b, unsigned int c,
unsigned int d, unsigned int *e);

int main ()
{
unsigned int dbeg=1000;
unsigned int tflag=0;

extern far unsigned short table3[];
extern far unsigned int output[];
extern far unsigned int error[];
extern far unsigned int d;
extern far unsigned int e;
unsigned int t3size=2556;
unsigned int outsiz=1999*3;
unsigned int n=0;
unsigned int i,j,k,l;
unsigned int T[2],V[2],X[3];
double croot2,croot4;
croot2=1.259921;
croot4=1.587401;
/*****************************************/
/* find minimum index into look-up table */
/*****************************************/
midprod(dbeg, dbeg, V);
bigprod(V[0], V[1], 3, X);
V[0]=X[1];
V[1]=X[2];
quotient(V, V, 4);
if (tflag!=0) {
T[0]=0;
T[1]=1;
quotient(V, V, 7);
sum(T, V);
quotient(V, V, 7);
sum(T, V);
quotient(V, V, 7);
sum(T, V);
if (tflag==2) {
quotient(V, V, 7);
sum(T, V);
quotient(V, V, 7);
sum(T, V);
quotient(V, V, 7);
sum(T, V);
}
}
if (V[0]==0)
l = 32 - _lmbd(1, V[1]);
else
l = 64 - _lmbd(1, V[0]);
j=l-(l/3)*3;
l=l/3;
l = 1 << l;
if (j==1)
l=(int)(((double)(l))*croot2);
if (j==2)
l=(int)(((double)(l))*croot4);
l=l+1;
if (l>table3[t3size-1]) {
error[0]=5;
goto bskip;
}
else {
k=0;
for (i=0; i<t3size; i++) {
if (table3[i] < l) k=i;
else break;
}
}
/***********************************/
/*  factor (d**p + e**p)/(d + e)   */
/***********************************/
d=0;
e=0;
error[0]=0;	// clear error array
n=dloop(dbeg, k, n, outsiz, output);
bskip:
output[n]=0xffffffff;
return(0);
}
*******************************************************************************
*									      *
*  COMPUTE DIFFERENCES							      *
*  02/01/99 (dkc)							      *
*									      *
*     address of subtrahend (A[0], A[1], A[2], A[3]) => a4		      *
*     address of minuend (B[0], B[1], B[2], B[3]) => b4 		      *
*									      *
*******************************************************************************
.global _bigbigd
.text
_bigbigd:
ldw.d1 *+a4[3], a0	  ;  load A[3]
||	ldw.d2 *+b4[3], b0	  ;  load B[3]
||	mv.l2 b3, b9		  ;  save return address

ldw.d1 *+a4[2], a2	  ;  load A[2]
||	ldw.d2 *+b4[2], b2	  ;  load B[2]

ldw.d1 *+a4[1], a6	  ;  load A[1]
||	ldw.d2 *+b4[1], b6	  ;  load B[1]

ldw.d1 *a4, a8		  ;  load A[0]
||	ldw.d2 *b4, b8		  ;  load B[0]

nop

not.s2 b0, b0		  ;  invert B[3]
||	zero.l2 b1		  ;  load zero

not.s2 b2, b2		  ;  invert B[2]
||	add.l2 b1:b0, 1, b1:b0	  ;  -B[3]
||	subab.d2 b3, b3, b3	  ;  load zero

not.s2 b6, b6		  ;  invert B[1]
||	subab.d2 b7, b7, b7	  ;  load zero

not.s2 b8, b8		  ;  invert B[0]
||	zero.s1 a1		  ;  load 0
||	subab.d2 b3, b3, b3	  ;  load 0

||	addu.l1x a1:a0, b0, a1:a0 ;  A[3] + B[3]
||	addu.l2x b3:b2, a2, b3:b2 ;  A[2] + B[2] - carry
||	subab.d1 a7, a7, a7	  ;  load 0

addu.l1x a7:a6, b6, a7:a6 ;  A[1] + B[1] - carry
||	addu.l2x b3:b2, a1, b3:b2 ;  A[2] + B[2]
||	b.s2 b9 		  ;  return

addu.l1x a7:a6, b3, a7:a6 ;  A[1] + B[1]
||	add.s2x b8, a8, b8	  ;  A[0] + B[0] - carry

add.l2x b8, a7, b8	  ;  A[0] + B[0]
||	stw.d2 a0, *+b4[3]	  ;  store A[3]+B[3]

stw.d2 b2, *+b4[2]	  ;  store A[2]+B[2]

stw.d2 a6, *+b4[1]	  ;  store A[1]+B[1]

stw.d2 b8, *b4		  ;  store A[0]+B[0]
.end
*******************************************************************************
*									      *
*  COMPUTE d*m (64*64)							      *
*  01/30/99 (dkc)							      *
*									      *
*     multiplicand (d[0], d[1]) => a4, b4				      *
*     multiplier (m[0], m[1]) => a6, b6 				      *
*     address of product => a8						      *
*									      *
*******************************************************************************
.def _bigbigp
.text
_bigbigp:

sub.s1x b15, 8, a3		;  load sp-2

mpyhlu.m1x a4, b6, a0		;  d3 * m0
||	mpyhlu.m2 b4, b6, b0		;  d1 * m0
||	mv.l1 a8, a7			;  save address of product
||	mv.l2x a6, b12			;  load m[0]
||	stw.d2 b12, *b15--		;  save b12

mpyu.m1x a4, b6, a8		;  d2 * m0
||	mpyu.m2 b4, b6, b8		;  d0 * m0
||	stw.d2 a10, *b15--[2]		;  save a10
||	stw.d1 b10, *a3--[2]		;  save b10

shl.s1 a0, 16, a1		;  d3*m0 << 16
||	shl.s2 b0, 16, b1		;  d1*m0 << 16
||	zero.l1 a9			;  zero odd register of pair
||	zero.l2 b9			;  zero odd register of pair
||	mpyhu.m1x a4, b6, a2		;  d3 * m1
||	mpyhu.m2 b4, b6, b2		;  d1 * m1
||	stw.d2 a11, *b15--[2]		;  save a11
||	stw.d1 b11, *a3--[2]		;  save b11

addu.l1 a9:a8, a1, a9:a8	;  d3*m0<<16 + d2*m0
||	addu.l2 b9:b8, b1, b9:b8	;  p0 = d1*m0<<16 + d0*m0
||	shru.s1 a0, 16, a0		;  p2 = d3*m0 >> 16
||	shru.s2 b0, 16, b0		;  d1*m0 >> 16
||	subab.d2 b1, b1, b1		;  load 0
||	mpylhu.m1x a4, b6, a10		;  d2 * m1
||	mpylhu.m2 b4, b6, b10		;  d0 * m1

addab.d1 a0, a9, a0		;  p2 = p2 + carry
||	addu.l2 b1:b0, b9, b1:b0	;  d1*m0>>16 + carry
||	mv.s2x a8, b9			;  load d3*m0<<16 + d2*m0
||	shl.s1 a2, 16, a3		;  d3*m1 << 16
||	mpy.m2 b11, 0, b11		;  zero odd register of pair
||	zero.l1 a11			;  zero odd register of pair

addu.l2 b1:b0, b9, b1:b0	;  p1 = d1*m0>>16+carry+d3*m0<<16+d2*mp
||	shl.s2 b2, 16, b3		;  d1*m1 << 16
||	shru.s1 a2, 16, a2		;  p2' = d3*m1 >> 16

add.s1x a0, b1, a0		;  p2 = p2 + carry
||	addu.l1 a11:a10, a3, a11:a10	;  d3*m1<<16 + d2*m1
||	addu.l2 b11:b10, b3, b11:b10	;  p0' = d1*m1<<16 + d0*m1
||	shru.s2 b2, 16, b2		;  d1*m1 >> 16
||	subab.d2 b3, b3, b3		;  load 0

addab.d1 a2, a11, a2		;  p2' = p2' + carry
||	addu.l2 b3:b2, b11, b3:b2	;  d1*m1>>16 + carry
||	mv.s2x a10, b11 		;  load d3*m1<<16 + d2*m1
||	mpy.m2 b9, 0, b9		;  load 0

addu.l2 b3:b2, b11, b3:b2	;  p1' = d1*m0>>16+carry+d3*m0<<16+d2*mp
||	shl.s2 b10, 16, b11		;  p0' << 16
||	mpy.m2 b1, 0, b1		;  load 0

add.s1x a2, b3, a2		;  p2' = p2' + carry
||	addu.l2 b9:b8, b11, b9:b8	;  P0 = p0 + p0'<<16
||	shru.s2 b10, 16, b10		;  p0' >> 16

addu.l2 b1:b0, b9, b1:b0	;  p1 + carry
||	shl.s2 b2, 16, b9		;  p1' << 16
||	shl.s1 a2, 16, a2		;  p2' << 16

addu.l2 b1:b0, b10, b1:b0	;  p1 + carry + p0'>>16
||	shru.s2 b2, 16, b2		;  p1' >> 16
||	add.l1 a0, a2, a0		;  p2 + p2'<<16

addu.l2 b1:b0, b9, b1:b0	;  P1 = p1'<<16 + p1 + carry + p0'>>16
||	add.s2x a0, b2, b2		;  P2 = p2 + p2'<<16 + p1'>>16
||	mpyhlu.m1 a4, a6, a0		;  d3 * m2
||	mpyhlu.m2 b4, b6, b0		;  d1 * m2

add.s2 b2, b1, b13		;  P2 = P2 + carry
||	stw.d2 b13, *b15--		;  save b13
||	mpyu.m1 a4, a6, a8		;  d2 * m2
||	mpyu.m2x b4, a6, b8		;  d0 * m2
||	mv.l2 b0, b12			;  save P1

shl.s1 a0, 16, a1		;  d3*m2 << 16
||	shl.s2 b0, 16, b1		;  d1*m2 << 16
||	zero.l1 a9			;  zero odd register of pair
||	zero.l2 b9			;  zero odd register of pair
||	mpyhu.m1 a4, a6, a2		;  d3 * m3
||	mpyhu.m2x b4, a6, b2		;  d1 * m3
||	stw.d1 b8, *+a7[3]		;  store P0

addu.l1 a9:a8, a1, a9:a8	;  d3*m2<<16 + d2*m2
||	addu.l2 b9:b8, b1, b9:b8	;  p0 = d1*m2<<16 + d0*m2
||	shru.s1 a0, 16, a0		;  p2 = d3*m2 >> 16
||	shru.s2 b0, 16, b0		;  d1*m2 >> 16
||	subab.d2 b1, b1, b1		;  load 0
||	mpylhu.m1 a4, a6, a10		;  d2 * m3
||	mpylhu.m2x b4, a6, b10		;  d0 * m3

addab.d1 a0, a9, a0		;  p2 = p2 + carry
||	addu.l2 b1:b0, b9, b1:b0	;  d1*m2>>16 + carry
||	mv.s2x a8, b9			;  load d3*m2<<16 + d2*m2
||	shl.s1 a2, 16, a3		;  d3*m3 << 16
||	mpy.m2 b11, 0, b11		;  zero odd register of pair
||	zero.l1 a11			;  zero odd register of pair

addu.l2 b1:b0, b9, b1:b0	;  p1 = d1*m2>>16+carry+d3*m2<<16+d2*mp
||	shl.s2 b2, 16, b3		;  d1*m3 << 16
||	shru.s1 a2, 16, a2		;  p2' = d3*m3 >> 16

add.s1x a0, b1, a0		;  p2 = p2 + carry
||	addu.l1 a11:a10, a3, a11:a10	;  d3*m3<<16 + d2*m3
||	addu.l2 b11:b10, b3, b11:b10	;  p0' = d1*m3<<16 + d0*m3
||	shru.s2 b2, 16, b2		;  d1*m3 >> 16
||	subab.d2 b3, b3, b3		;  load 0

addab.d1 a2, a11, a2		;  p2' = p2' + carry
||	addu.l2 b3:b2, b11, b3:b2	;  d1*m3>>16 + carry
||	mv.s2x a10, b11 		;  load d3*m3<<16 + d2*m3
||	mpy.m2 b9, 0, b9		;  load 0

addu.l2 b3:b2, b11, b3:b2	;  p1' = d1*m2>>16+carry+d3*m2<<16+d2*mp
||	shl.s2 b10, 16, b11		;  p0' << 16
||	mpy.m2 b1, 0, b1		;  load 0

add.s1x a2, b3, a2		;  p2' = p2' + carry
||	addu.l2 b9:b8, b11, b9:b8	;  P0 = p0 + p0'<<16
||	shru.s2 b10, 16, b10		;  p0' >> 16

addu.l2 b1:b0, b9, b1:b0	;  p1 + carry
||	shl.s2 b2, 16, b9		;  p1' << 16
||	shl.s1 a2, 16, a2		;  p2' << 16

addu.l2 b1:b0, b10, b1:b0	;  p1 + carry + p0'>>16
||	shru.s2 b2, 16, b2		;  p1' >> 16
||	add.l1 a0, a2, a0		;  p2 + p2'<<16
||	mpy.m2 b9, 0, b9		;  load 0

addu.l2 b1:b0, b9, b1:b0	;  P1 = p1'<<16 + p1 + carry + p0'>>16
||	add.l1x a0, b2, a0		;  P2 = p2 + p2'<<16 + p1'>>16
||	mpy.m2 b1, 0, b1		;  load 0

add.s1x a0, b1, a0		;  P2 = P2 + carry
||	addu.l2 b9:b8, b12, b9:b8	;  P0 + old P1

addu.l2 b1:b0, b9, b1:b0	;  P1 + carry

addu.l2 b1:b0, b13, b1:b0	;  P1 + carry + old P2

add.l2x a0, b1, b1		;  P2 + carry
||	ldw.d2 *++b15[1], b13		;  restore b13
||	ldw.d1 *++a3[1], a0		;  restore b11

ldw.d2 *++b15[2], a11		;  restore a11
||	ldw.d1 *++a3[2], b10		;  restore b10
||	b.s2 b7 			;  return

ldw.d2 *++b15[2], a10		;  restore a10
||	ldw.d1 *++a3[2], b12		;  restore b12

stw.d1 b8, *+a7[2]		;  store P0
||	addaw.d2 b15, 1, b15		;  pop stack

stw.d1 b0, *+a7[1]		;  store P1

stw.d1 b1, *a7			;  store P2
||	mv.l2x a0, b11			;  load b11

nop
.end

*******************************************************************************
*									      *
*  COMPUTE QUOTIENTS (128/64)						      *
*  01/31/99 (dkc)							      *
*									      *
*     dividend (A[0], A[1], A[2], A[3]) => a4, b4, a6, b6		      *
*     address of quotient => a8 					      *
*     divisor (D[0], D[1]) => b8, a10					      *
*									      *
*******************************************************************************
.global _bigbigq
.text
_bigbigq:
mv.l1 a4, a1	      ;  load A[0]
||	mv.s1x b4, a4	      ;  load A[1]
||	mvk.s2 32, b9	      ;  load 32
||	stw.d2 a13, *b15--    ;  save a13

lmbd.l2 1, b8, b5     ;  left-most bit detection
||	lmbd.l1 1, a10, a5    ;  left-most bit detection
||	mv.s1x b6, a3	      ;  load A[3]
||	mv.s2x a13, b2	      ;  load A[2]

[!b1] add.l2x b5, a5, b5    ;  divisor left-most bit detection
||	cmpltu.l1x a13, b8, a2	;  compare A[2] to D[2]
||	stw.d2 a12, *b15--    ;  save a12
||	or.s1 a1, a4, a9      ;  A[0] | A[1]

lmbd.l1 1, a5, a0     ;  left-most bit detection (A[0])
||	lmbd.l2x 1, a4, b0    ;  left-most bit detection (A[1])
||	sub.s2 b2, b8, b2     ;  compare A[2] to D[2]
||	mv.s1x b5, a7	      ;  load divisor left-most bit detection
||	stw.d2 b11, *b15--    ;  save b11
||	mpy.m2 b11, 0, b11    ;  zero D[0]

[!a1] add.s1x b0, a0, a0    ;  left-most bit detection (A[0] and A[1])
||[!b2] cmpltu.l1 a12, a6, a2  ;  compare A[3] to D[3]
||	lmbd.l2x 1, a13, b5   ;  left-most bit detection (A[2])
||	stw.d2 b10, *b15--    ;  save b10
||	mpy.m2 b10, 0, b10    ;  zero D[1]

[!a1] add.s1x a0, b5, a0    ;  left-most bit detection (A[0], A[1], and A[2])
||	or.l1 a1, a13, a1     ;  A[0] | A[1] | A[2]
||	lmbd.l2x 1, a12, b5   ;  left-most bit detection (A[3])
||	stw.d2 b12, *b15--    ;  save b12
|| [a1] subab.d1 a2, a2, a2   ;  load zero

[!a1] add.l1x a0, b5, a0    ;  dividend left-most bit detection
||	addk.s1 64, a7	      ;  divisor left-most bit detection + 64
|| [a2] subab.d1 a4, a4, a4   ;  load 0
|| [a2] mpy.m1 a13, 0, a13    ;  load 0

subab.d1 a7, a0, a8   ;  shift = lmbd(1,x2) - lmbd(1,x1)
||	mv.l1x b8, a7	      ;  load D[2]
|| [a2] b.s2 zskip	      ;  return zero
|| [a2] mpy.m1 a5, 0, a5      ;  load 0
||	mvk.s1 32, a3	      ;  load 32

[!a2] cmplt.l1 a8, a3, a2   ;  compare shift to 32
|| [a2] subab.d1 a2, a2, a2   ;  load zero
||	sub.l2x b9, a8, b0    ;  32 - shift
||	sub.s1 a8, a3, a9     ;  shift - 32
||	mvk.s2 32, b5	      ;  load 32
|| [a2] mpy.m1 a12, 0, a12    ;  load 0

[!a2] addab.d1 a6, 0, a7    ;  D[2] = D[3]
||[!a2] mpy.m1 a6, 0, a6      ;  clear D[3]
||[!a2] addab.d2 b10, 0, b11  ;  D[0] = D[1]
||[!a2] mv.l2 b8, b10	      ;  D[1] = D[2]
|| [a2] shru.s2x a6, b0, b2   ;  D[2] = D[3] >> (32-shift)
|| [a2] b.s1 askip	      ;  branch if shift < 32

[a2] shl.s1 a7, a8, a7     ;  D[2] << shift
|| [a2] shru.s2 b10, b0, b1   ;  D[0] = D[1] >> (32-shift)
||	mv.l2x a8, b8	      ;  load shift
|| [a2] addab.d1 a7, 0, a0    ;  save D[2]
|| [a2] mv.l1x b0, a1	      ;  load 32-shift

[a2] or.l1x a7, b2, a7     ;  load D[2]
|| [a2] shl.s2 b11, b8, b11   ;  D[0] << shift
|| [a2] shru.s1 a0, a1, a0    ;  D[1] = D[2] >> (32-shift)

[a2] or.l2 b11, b1, b11    ;  load D[0]
|| [a2] shl.s2 b10, b8, b10   ;  D[1] << shift
|| [a2] shl.s1 a6, a8, a6     ;  D[3] << shift

[a2] or.l2x b10, a0, b10   ;  load D[1]
||	cmplt.l1 a9, a3, a2   ;  compare shift-32 to 32
||[!a2] add.s2 b0, b5, b0     ;  64 - shift
||[!a2] sub.s1 a9, a3, a9     ;  shift - 64
||[!a2] subab.d2 b8, b5, b8   ;  shift - 32

mvk.s2 3, b12	      ;  load 3
*
[!a2] mpy.m1 a7, 0, a7      ;  clear D[2]
||[!a2] addab.d2 b10, 0, b11  ;  D[0] = D[1]
||[!a2] mv.l2x a7, b10	      ;  D[1] = D[2]
|| [a2] shru.s2 b10, b0, b1   ;  D[0] = D[1] >> (64-shift)
|| [a2] b.s1 askip	      ;  branch if shift < 64
|| [a2] mv.l1x b0, a1	      ;  load 64-shift

[a2] shl.s2 b11, b8, b11   ;  D[0] << (shift-32)
|| [a2] shru.s1 a7, a1, a0    ;  D[1] = D[2] >> (64-shift)
|| [a2] mv.l1x b8, a1	      ;  load shift - 32

[a2] or.l2 b11, b1, b11    ;  load D[0]
|| [a2] shl.s2 b10, b8, b10   ;  D[1] << (shift-32)
|| [a2] shl.s1 a7, a1, a7     ;  D[2] << (shift-32)

[a2] or.l2x b10, a0, b10   ;  load D[1]
||	cmplt.l1 a9, a3, a2   ;  compare shift-64 to 32
||[!a2] add.s2 b0, b5, b0     ;  96 - shift
||[!a2] sub.s1 a9, a3, a9     ;  shift - 96
||[!a2] subab.d2 b8, b5, b8   ;  shift - 64

mv.l1x b8, a0	      ;  load shift - 64
||	subab.d2 b12, 1, b12  ;  load 2

nop
*
[!a2] addab.d2 b10, 0, b11  ;  D[0] = D[1]
||[!a2] mpy.m2 b10, 0, b10    ;  D[1] = 0
|| [a2] shru.s2 b10, b0, b1   ;  D[0] = D[1] >> (96-shift)
||	sub.l2 b8, b5, b2     ;  shift - 96

[a2] shl.s1x b11, a0, a0   ;  D[0] << (shift-64)
||[!a2] shl.s2 b11, b2, b11   ;  D[0] << (shift-96)
||	subab.d2 b12, 1, b12  ;  load 1
||[!a2] add.l2 b0, b5, b0     ;  128 - shift

[a2] or.l2x b1, a0, b11    ;  load D[0]
|| [a2] shl.s2 b10, b8, b10   ;  D[1] << (shift-64)
||[!a2] subab.d2 b12, 1, b12  ;  load 0

askip	not.l1 a7, a0	      ;  invert D[2]
||	not.s1 a6, a8	      ;  invert D[3]
||	subab.d1 a9, a9, a9   ;  load 0
||	mpy.m1 a1, 0, a1      ;  load 0
||	mv.l2x a8, b2	      ;  load shift
||	sub.s2 b0, 1, b0      ;  127-shift

add.l1 a9:a8, 1, a9:a8	;  -D[3]
||	mpy.m1 a3, 0, a3      ;  load 0
||	shl.s2 b0, 5, b9      ;  (127-shift) << 5

add.l1 a1:a0, a9, a1:a0  ;  -D[2]
||	not.s1x b10, a2       ;  invert D[1]
||	or.l2 b9, b0, b9      ;  (127-shift)::(127-shift)

add.l1 a3:a2, a1, a3:a2  ;  -D[1]
||	not.s1x b11, a0       ;  invert D[0]

add.l1 a0, a3, a0     ;  -D[0]
||	mv.l2x a2, b10	      ;  load -D[1]
||	mv.s1 a13, a14	      ;  save A[2]
||	mpy.m1 a13, 0, a13    ;  load 0
||	stw.d2 a14, *b15--    ;  save a14

mv.l2x a0, b11	      ;  load -D[0]
||	zero.s1 a1	      ;  load 0
||	stw.d2 a15, *b15--    ;  save a15
*****************
*  begin loop	*
*****************
aloop	addu.l1 a13:a12, a8, a1:a0  ;  A[3] - D[3]
||	subab.d1 a15, a15, a15	 ;  load 0
||	shru.s2x a14, 31, b7  ;  isolate MSB of A[2]
||	shru.s1 a12, 31, a3   ;  isolate MSB of A[3]

addu.l1 a15:a14, a1, a15:a14  ;  A[2] + carry
||	shru.s2x a0, 31, b0   ;  isolate MSB of delta[3]
||	addab.d1 a0, a0, a0   ;  (A[3]-D[3]) << 1
||	shl.s1 a14, 1, a13    ;  A[2] << 1
||	mpy.m1 a7, 0, a7      ;  load 0

addu.l1 a15:a14, a9, a15:a14  ;  A[2] - D[2]
||	or.s1 a0, 1, a0       ;  (A[3]-D[3])<<1 | 1
||	shru.s2x a4, 31, b5   ;  isolate MSB of A[1]

addu.l1 a7:a6, a15, a7:a6  ;  A[1] + carry
||	addab.d1 a14, a14, a1 ;  (A[2]-D[2]) << 1
||	shru.s2x a14, 31, b1   ;  isolate MSB of delta[2]
||	mv.s1x b10, a14       ;  load D[1]

addu.l1 a7:a6, a14, a7:a6   ;  A[1] - D[1]
||	or.s1x a1, b0, a1     ;  (A[2]-D[2])<<1 | LSB
||	addab.d1 a12, a12, a12	;  A[3] << 1
|| [b2] b.s2 aloop	      ;  conditional branch to loop beginning

add.l1 a5, a7, a7     ;  A[0] + carry
||	shru.s1 a6, 31, a5    ;  isolate MSB of delta[1]
||	addab.d1 a6, a6, a6   ;  (A[1]-D[1]) << 1
||	shl.s2x a5, 1, b6     ;  A[0] << 1

add.l1x a7, b11, a7   ;  A[0] - D[0]
||	or.s1 a13, a3, a14    ;  (A[2]<<1) | LSB
||	addab.d1 a4, a4, a4   ;  A[1] << 1
||	or.l2 b6, b5, b6      ;  (A[0]<<1) | LSB

cmplt.l1 a7, 0, a2    ;  compare delta to 0
||	addab.d1 a7, a7, a7   ;  (A[0]-D[0]) << 1
||	or.s1x a6, b1, a6     ;  (A[1]-D[1])<<1 | LSB
||	or.l2x b7, a4, b7     ;  (A[1]<<1) | LSB
||	mpy.m1 a1, 0, a1      ;  load 0

[!a2] or.l1 a7, a5, a5      ;  A[0] = (A[0]-D[0])<<1 | LSB
|| [a2] mv.s1x b7, a4	      ;  A[1] = (A[1]<<1) | LSB
||[!a2] addab.d1 a1, 0, a14   ;  A[2] = (A[2]-D[2])<<1 | LSB
||	mpy.m1 a13, 0, a13    ;  load 0

[!a2] addab.d1 a0, 0, a12   ;  A[3] = (A[3]-D[2])<<1
||[!a2] mv.l1 a6, a4	      ;  A[1] = (A[1]-D[1])<<1 | LSB
|| [a2] mv.s1x b6, a5	      ;  A[0] = (A[0]<<1) | LSB
|| [b2] sub.l2 b2, 1, b2      ;  decrement loop count
*****************
*  end loop	*
*****************
cmpeq.l2 b12, 3, b1   ;  compare flag to 3
||	mv.l1x b9, a0	      ;  load shift
||	mv.s1 a14, a13	      ;  load A[2]

[b1] extu.s1 a12, a0, a12  ;  A[3] << shift
|| [b1] zero.l1 a13	      ;  A[2] = 0
|| [b1] subab.d1 a4, a4, a4   ;  A[1] = 0
|| [b1] mpy.m1 a5, 0, a5      ;  A[0] = 0
||	cmpeq.l2 b12, 2, b1   ;  compare flag to 2

[b1] extu.s1 a13, a0, a13  ;  A[2] << shift
|| [b1] zero.l1 a4	      ;  A[1] = 0
|| [b1] subab.d1 a5, a5, a5   ;  A[0] = 0
||	cmpeq.l2 b12, 1, b1   ;  compare flag to 1

[b1] extu.s1 a4, a0, a4    ;  A[1] << shift
|| [b1] zero.l1 a5	      ;  A[0] = 0
||	cmpeq.l2 b12, 0, b1   ;  compare flag to 0
||	ldw.d2 *++b15[1], a15 ;  restore a15

[b1] extu.s1 a5, a0, a5    ;  A[0] << shift
||	ldw.d2 *++b15[1], a14 ;  restore a14

zskip	ldw.d2 *++b15[1], b12 ;  restore b12

ldw.d2 *++b15[1], b10 ;  restore b10

ldw.d2 *++b15[1], b11 ;  restore b11

ldw.d2 *++b15[1], a12 ;  restore a12
||	b.s2 b3

ldw.d2 *++b15[1], a13 ;  restore a13

stw.d2 a5, *b4	      ;  store quotient

stw.d2 a4, *+b4[1]    ;  store quotient

stw.d2 a13, *+b4[2]   ;  store quotient
||	mv.l1 a12, a0	      ;  save quotient

stw.d2 a0, *+b4[3]    ;  store quotient
.end
*******************************************************************************
*									      *
*  COMPUTE SUMS 							      *
*  02/01/99 (dkc)							      *
*									      *
*     address of augend (A[0], A[1], A[2], A[3]) => a4			      *
*									      *
*******************************************************************************
.global _bigbigs
.text
_bigbigs:
ldw.d1 *+a4[3], a0	  ;  load A[3]
||	ldw.d2 *+b4[3], b0	  ;  load B[3]
||	mv.l2 b3, b9		  ;  save return address

ldw.d1 *+a4[2], a2	  ;  load A[2]
||	ldw.d2 *+b4[2], b2	  ;  load B[2]

ldw.d1 *+a4[1], a6	  ;  load A[1]
||	ldw.d2 *+b4[1], b6	  ;  load B[1]

ldw.d1 *a4, a8		  ;  load A[0]
||	ldw.d2 *b4, b8		  ;  load B[0]

nop 2

||	zero.s2 b3		  ;  load 0

addu.l1x a1:a0, b0, a1:a0 ;  A[3] + B[3]
||	addu.l2x b3:b2, a2, b3:b2 ;  A[2] + B[2] - carry
||	subab.d1 a7, a7, a7	  ;  load 0

addu.l1x a7:a6, b6, a7:a6 ;  A[1] + B[1] - carry
||	addu.l2x b3:b2, a1, b3:b2 ;  A[2] + B[2]
||	b.s2 b9 		  ;  return

addu.l1x a7:a6, b3, a7:a6 ;  A[1] + B[1]
||	add.s2x b8, a8, b8	  ;  A[0] + B[0] - carry

add.l2x b8, a7, b8	  ;  A[0] + B[0]
||	stw.d2 a0, *+b4[3]	  ;  store A[3]+B[3]

stw.d2 b2, *+b4[2]	  ;  store A[2]+B[2]

stw.d2 a6, *+b4[1]	  ;  store A[1]+B[1]

stw.d2 b8, *b4		  ;  store A[0]+B[0]
.end
*******************************************************************************
*									      *
*  COMPUTE d*m (64*32)							      *
*  01/30/99 (dkc)							      *
*									      *
*     multiplicand (d[0], d[1]) => a4, b4				      *
*     multiplier (m[0]) => a6						      *
*     address of product => b6						      *
*									      *
*******************************************************************************
.def _bigprod
.text
_bigprod:
mpyhlu.m1 a4, a6, a0		;  d3 * m0
||	mpyhlu.m2x b4, a6, b0		;  d1 * m0
||	sub.s1x b15, 4, a3		;  load sp-1

mpyu.m1 a4, a6, a8		;  d2 * m0
||	mpyu.m2x b4, a6, b8		;  d0 * m0
||	stw.d2 a10, *b15--[2]		;  save a10
||	stw.d1 b10, *a3--[2]		;  save b10

shl.s1 a0, 16, a1		;  d3*m0 << 16
||	shl.s2 b0, 16, b1		;  d1*m0 << 16
||	zero.l1 a9			;  zero odd register of pair
||	zero.l2 b9			;  zero odd register of pair
||	mpyhu.m1 a4, a6, a2		;  d3 * m1
||	mpyhu.m2x b4, a6, b2		;  d1 * m1
||	stw.d2 a11, *b15--[2]		;  save a11
||	stw.d1 b11, *a3--[2]		;  save b11

addu.l1 a9:a8, a1, a9:a8	;  d3*m0<<16 + d2*m0
||	addu.l2 b9:b8, b1, b9:b8	;  p0 = d1*m0<<16 + d0*m0
||	shru.s1 a0, 16, a0		;  p2 = d3*m0 >> 16
||	shru.s2 b0, 16, b0		;  d1*m0 >> 16
||	subab.d2 b1, b1, b1		;  load 0
||	mpylhu.m1 a4, a6, a10		;  d2 * m1
||	mpylhu.m2x b4, a6, b10		;  d0 * m1

addab.d1 a0, a9, a0		;  p2 = p2 + carry
||	addu.l2 b1:b0, b9, b1:b0	;  d1*m0>>16 + carry
||	mv.s2x a8, b9			;  load d3*m0<<16 + d2*m0
||	shl.s1 a2, 16, a3		;  d3*m1 << 16
||	mpy.m2 b11, 0, b11		;  zero odd register of pair
||	zero.l1 a11			;  zero odd register of pair

addu.l2 b1:b0, b9, b1:b0	;  p1 = d1*m0>>16+carry+d3*m0<<16+d2*mp
||	shl.s2 b2, 16, b3		;  d1*m1 << 16
||	shru.s1 a2, 16, a2		;  p2' = d3*m1 >> 16

add.s1x a0, b1, a0		;  p2 = p2 + carry
||	addu.l1 a11:a10, a3, a11:a10	;  d3*m1<<16 + d2*m1
||	addu.l2 b11:b10, b3, b11:b10	;  p0' = d1*m1<<16 + d0*m1
||	shru.s2 b2, 16, b2		;  d1*m1 >> 16
||	subab.d2 b3, b3, b3		;  load 0

addab.d1 a2, a11, a2		;  p2' = p2' + carry
||	addu.l2 b3:b2, b11, b3:b2	;  d1*m1>>16 + carry
||	mv.s2x a10, b11 		;  load d3*m1<<16 + d2*m1
||	mpy.m2 b9, 0, b9		;  load 0

addu.l2 b3:b2, b11, b3:b2	;  p1' = d1*m0>>16+carry+d3*m0<<16+d2*mp
||	shl.s2 b10, 16, b11		;  p0' << 16
||	mpy.m2 b1, 0, b1		;  load 0

add.s1x a2, b3, a2		;  p2' = p2' + carry
||	addu.l2 b9:b8, b11, b9:b8	;  P0 = p0 + p0'<<16
||	shru.s2 b10, 16, b10		;  p0' >> 16
||	ldw.d2 *++b15[1], b11		;  restore b11

addu.l2 b1:b0, b9, b1:b0	;  p1 + carry
||	shl.s2 b2, 16, b9		;  p1' << 16
||	shl.s1 a2, 16, a2		;  p2' << 16

addu.l2 b1:b0, b10, b1:b0	;  p1 + carry + p0'>>16
||	shru.s2 b2, 16, b2		;  p1' >> 16
||	add.l1 a0, a2, a0		;  p2 + p2'<<16
||	ldw.d2 *++b15[1], a11		;  restore a11

addu.l2 b1:b0, b9, b1:b0	;  P1 = p1'<<16 + p1 + carry + p0'>>16
||	add.l1x a0, b2, a0		;  P0 = p2 + p2'<<16 + p1'>>16
||	b.s2 b7 			;  return
||	ldw.d2 *++b15[1], b10		;  restore b10

ldw.d2 *++b15[1], a10		;  restore a10

stw.d2 b8, *+b6[2]		;  store P0

add.l1x a0, b1, a0		;  P0 = P0 + carry
||	stw.d2 b0, *+b6[1]		;  store P1

stw.d2 a0, *b6			;  store P0

nop
.end

*******************************************************************************
*									      *
*  COMPUTE X**((Q-1)/P)=1(MOD Q)					      *
*  06/25/99 (dkc)							      *
*									      *
*     (q-1)/p => a4, b4 						      *
*     q => a6, b6							      *
*     address of output => a8						      *
*     x => b8								      *
*									      *
*******************************************************************************
.global _bigresx
.global _bigbigp, _bigbigq
.bss save, 64*8, 4
.bss temp, 4*4, 4
.bss temp1, 4*4, 4
.text
_bigresx:
mv.l1 a4, a10	     ;	save MSW of (q-1)/p
||	stw.d2 a10, *b15--   ;	save a10

mv.l2 b4, b10	     ;	save LSW of (q-1)/p
||	stw.d2 b10, *b15--   ;	save b10

mv.l1 a6, a11	     ;	save MSW of q
||	stw.d2 a11, *b15--   ;	save a11

mv.l2 b6, b11	     ;	save LSW of q
||	stw.d2 b11, *b15--   ;	save b11

||	stw.d2 a12, *b15--   ;	save a12

||	mv.l2 b8, b12	     ;	save x
||	stw.d2 b12, *b15--   ;	save b12

stw.d2 a13, *b15--   ;	save a13
||	zero.l1 a13	     ;	load 0

stw.d2 b13, *b15--   ;	save b13
||	mv.l2 b8, b13	     ;	load x

stw.d2 a14, *b15--   ;	save a14
||	mvk.s1 1, a14	     ;	load P[1]

stw.d2 b14, *b15--   ;	save b14

stw.d2 a15, *b15--   ;	save a14
||	zero.s1 a15	     ;	load P[0]

stw.d2 a8, *b15--    ;	save address of output

stw.d2 b3, *b15--    ;	save return address
||	zero.s1 a2	     ;	load 0
*****************
*  begin loop	*
*****************
aloop:
[!a2] b.s2 _bigbigp	     ;	call product subroutine
||	mv.l1 a13, a4	     ;	A[0] = MSW(x**(2**n))
||	mv.l2 b13, b4	     ;	A[1] = LSW(x**(2**n))
||	addab.d1 a13, 0, a6  ;	M[0] = MSW(x**(2**n))
||	addab.d2 b13, 0, b6  ;	M[1] = LSW(x**(2**n))

shl.s1 a15:a14, 1, a15:a14   ;	P[0]:P[1] = P[0]:P[1]*2
||	add.l1 a15, a15, a0  ;	P[0] << 1

and.l1 a15, 1, a15   ;	isolate carry bit
||	stw.d2 a13, *b14++   ;	save MSW(x**(2**n))

or.l1 a15, a0, a15   ;	P[0]:P[1]
||	stw.d2 b13, *b14++   ;	save LSW(x**(2**n))

nop 2
*
pskip	cmpgtu.l1 a15, a10, a1	;  compare P[0] to MSW (q-1)/p
||	sub.s1 a15, a10, a2  ;	compare P[0] to MSW (q-1)/p
||	ldw.d1 *a12, a4      ;	load product[0]

[!a2] cmpgtu.l1x a14, b10, a1 ; compare P[1] to LSW (q-1)/p
||	ldw.d1 *+a12[1], b4  ;	load product[1]

[a1] b.s2 askip	     ;	exit loop
||	ldw.d1 *+a12[2], a6  ;	load product[2]
|| [a1] shru.s1 a15:a14, 1, a15:a14 ;  P[1] >> 1
|| [a1] mv.l1 a15, a0	     ;	save P[0]

[!a1] b.s2 _bigbigq	     ;	call quotient subroutine
||	ldw.d1 *+a12[3], b6  ;	load product[3]
||	mv.l2x a11, b8	      ;  load q[0]
|| [a1] shru.s1 a0, 1, a15   ;	P[0] >> 1

||[!a1] mv.l1x b11, a10      ;	load q[1]
||[!a1] stw.d2 a10, *b15--   ;	save MSW (q-1)/p

|| [a1] subaw.d2 b14, 3, b14 ;	decrement pointer

||[!a1] stw.d2 a11, *b15--   ;	save q[0]

nop
*
nop
*
qskip	b.s2 _bigbigp	     ;	call product subroutine
||	ldw.d1 *+a11[2], b0  ;	load quotient[0]
||	ldw.d2 *++b15[1], a11  ;  restore q[0]

ldw.d1 *+a11[3], b4  ;	load quotient[1]
||	ldw.d2 *++b15[1], a10  ;  restore MSW (q-1)/p
||	mv.l2 b11, b6	     ;	load q[1]

||	stw.d2 b11, *b15--   ;	save q[1]

nop 2

mv.l1 a11, a6	     ;	load q[0]
||	mv.s1x b0, a4	     ;	load quotient[0]
*
||	ldw.d1 *+a12[3], a7  ;	load proda[3]
||	ldw.d2 *++b15[1], b11  ;  restore q[1]

ldw.d1 *+a12[2], a6  ;	load proda[2]
||	ldw.d2 *+b9[3], b7   ;	load prodb[3]

ldw.d1 *+a12[1], a5  ;	load proda[1]
||	ldw.d2 *+b9[2], b6   ;	load prodb[2]

ldw.d1 *a12, a4      ;	load proda[0]
||	ldw.d2 *+b9[1], b5   ;	load prodb[1]

ldw.d2 *b9, b4	     ;	load prodb[0]

nop

not.s2 b7, b8	     ;	not(prodb[3])
||	zero.l2 b9	     ;	load zero
||	mpy.m2 b7, 0, b7     ;	load zero

not.s2 b6, b6	     ;	not(prodb[2])
||	add.l2 b9:b8, 1, b9:b8 ;  -prodb[3]
||	mv.l1 a7, a8	     ;	load proda[3]

not.s2 b5, b8	     ;	not(prodb[1])
||	addu.l2 b7:b6, b9, b7:b6 ;  -prodb[2]
||	mv.l1x b8, a3	     ;	load -prodb[3]
||	zero.s1 a9	     ;	load zero
||	subab.d2 b9, b9, b9  ;	load zero
||	mpy.m1 a7, 0, a7     ;	load zero

not.s2 b4, b4	     ;	not(prodb[0])
||	addu.l2 b9:b8, b7, b9:b8  ;  -prodb[1]
||	addu.l1 a9:a8, a3, a9:a8  ;  differ[3]
||	mv.s1x b6, a2	     ;	load -prodb[2]
||	mpy.m1 a9, 0, a9     ;	load zero

addu.l1 a7:a6, a9, a7:a6  ;  proda[2] + carry
||	add.l2 b4, b9, b4    ;	- prodb[0]
||	mv.s2x a8, b13	     ;	load differ[3]
||	mpy.m1 a3, 0, a3     ;	load zero

mv.s1x b8, a1	     ;	load -prodb[1]
||	addu.l1 a7:a6, a2, a7:a6  ;  differ[2]

||	b.s2 aloop	     ;	branch to loop beginning
||	addu.l1 a3:a2, a7, a3:a2  ;  proda[1] + carry
||	mv.s1x b8, a5	     ;	load -prodb[1]

addu.l1 a3:a2, a5, a3:a2  ;  differ[1]
||	zero.s1 a5	     ;	load zero

addu.l1 a5:a4, a3, a5:a4  ;  proda[0] + carry

add.l1x a4, b4, a4   ;	differ[0]

or.l1 a4, a2, a2     ;	differ[0] | differ[1]

[a2] b.s1 error	     ;	branch to error if non-zero
*
error	b.s2 error	     ;	spin if error

nop 5
*****************
*  end loop	*
*****************
||	not.l2x a14, b4      ;	not(LSW((q-1)/p))
||	zero.s2 b5	     ;	load zero

add.l2 b5:b4, 1, b5:b4 ;  -LSW((q-1)/p))
||	not.s1 a15, a0	     ;	not(MSW((q-1)/p))
||	mpy.m1 a5, 0, a5     ;	load zero

add.l2x a0, b5, b5   ;	-MSW((q-1)/p)
||	mv.l1x b4, a0	     ;	load -LSW((q-1)/p))
||	shru.s1 a15:a14, 1, a15:a14  ;	P[1] >> 1
||	addab.d1 a15, 0, a2  ;	save P[0]

addu.l1 a5:a4, a0, a5:a4  ;  LSW((q-1)/p)) - P[1]
||	shru.s1 a2, 1, a15   ;	P[0] >> 1

add.l1 a10, a5, a10  ;	MSW((q-1)/p)) + carry
||	mv.l2x a4, b10	     ;	load LSW((q-1)/p)) - P[1]

add.l1x a10, b5, a10 ;	MSW((q-1)/p)) - P[0]

or.l1x a10, b10, a2  ;	count[0] | count[1]

[!a2] b.s2 zskip	     ;	branch if count=0

nop 3
*****************
*  begin loop	*
*****************
bloop	cmpgtu.l1 a15, a10, a1	;  compare P[0] to MSW (q-1)/p
||	subab.d1 a15, a10, a2  ;  compare P[0] to MSW (q-1)/p
||	mv.s1x b10, a4	     ;	load LSW (q-1)/p
||	not.l2x a14, b4      ;	not(LSW((q-1)/p))
||	zero.s2 b5	     ;	load zero

[!a2] cmpgtu.l1 a14, a4, a1  ; compare P[1] to LSW (q-1)/p
||	add.l2 b5:b4, 1, b5:b4 ;  -LSW((q-1)/p))
||	not.s1 a15, a0	     ;	not(MSW((q-1)/p))
||	mpy.m1 a5, 0, a5     ;	load zero

add.l2x a0, b5, b5   ;	-MSW((q-1)/p)
||	mv.l1x b4, a0	     ;	load -LSW((q-1)/p))
||	shru.s1 a15:a14, 1, a15:a14  ;	P[1] >> 1
||	addab.d1 a15, 0, a2  ;	save P[0]
||	ldw.d2 *b14--, b6    ;	load LSW(x**(2**n))
||[!a1] b.s2 _bigbigp	     ;	call product subroutine

[!a1] addu.l1 a5:a4, a0, a5:a4  ;  LSW((q-1)/p)) - P[1]
||	shru.s1 a2, 1, a15   ;	P[0] >> 1
||	ldw.d2 *b14--, a6    ;	load MSW(x**(2**n))
|| [a1] b.s2 zskip	     ;	conditional branch

[!a1] add.l1 a10, a5, a10  ;	MSW((q-1)/p)) + carry
||[!a1] mv.l2x a4, b10	     ;	load LSW((q-1)/p)) - P[1]
||	mv.s1 a13, a4	     ;	load MSW(x**(2**n))

[!a1] add.l1x a10, b5, a10  ;  MSW((q-1)/p)) - P[0]

nop 2
*
nop
*
wskip	ldw.d1 *a12, a4      ;	load product[0]
||	mv.l2x a11, b8	     ;	load q[0]
||	mv.l1 a10, a13	     ;	save count[0]

ldw.d1 *+a12[1], b4  ;	load product[1]
||	mv.l1x b11, a10      ;	load q[1]

b.s2 _bigbigq	     ;	call division subroutine
||	ldw.d1 *+a12[2], a6  ;	load product[2]
||	mv.l2x a8, b13	     ;	save address of quotient

ldw.d1 *+a12[3], b6  ;	load product[3]

nop 4
*
xskip	mv.l1 a13, a10	     ;	load count[0]
||	ldw.d2 *+b13[2], a4  ;	load quotient[2]
||	b.s1 _bigbigp	     ;	call multiplication subroutine

ldw.d2 *+b13[3], b4  ;	load quotient[3]
||	mv.l2 b11, b6	     ;	load q[1]

mv.l2x a8, b13	     ;	save address of product

nop 2
*
yskip	ldw.d1 *+a12[3], a7  ;	load proda[3]

ldw.d1 *+a12[2], a6  ;	load proda[2]
||	ldw.d2 *+b13[3], b7  ;	load prodb[3]

ldw.d1 *+a12[1], a5  ;	load proda[1]
||	ldw.d2 *+b13[2], b6  ;	load prodb[2]

ldw.d1 *a12, a4      ;	load proda[0]
||	ldw.d2 *+b13[1], b5  ;	load prodb[1]

ldw.d2 *b13, b4      ;	load prodb[0]

nop

not.s2 b7, b8	     ;	not(prodb[3])
||	zero.l2 b9	     ;	load zero
||	mpy.m2 b7, 0, b7     ;	load zero

not.s2 b6, b6	     ;	not(prodb[2])
||	add.l2 b9:b8, 1, b9:b8 ;  -prodb[3]
||	mv.l1 a7, a8	     ;	load proda[3]

not.s2 b5, b8	     ;	not(prodb[1])
||	addu.l2 b7:b6, b9, b7:b6 ;  -prodb[2]
||	mv.l1x b8, a3	     ;	load -prodb[3]
||	zero.s1 a9	     ;	load zero
||	subab.d2 b9, b9, b9  ;	load zero
||	mpy.m1 a7, 0, a7     ;	load zero

not.s2 b4, b4	     ;	not(prodb[0])
||	addu.l2 b9:b8, b7, b9:b8  ;  -prodb[1]
||	addu.l1 a9:a8, a3, a9:a8  ;  differ[3]
||	mv.s1x b6, a2	     ;	load -prodb[2]
||	mpy.m1 a9, 0, a9     ;	load zero

addu.l1 a7:a6, a9, a7:a6  ;  proda[2] + carry
||	add.l2 b4, b9, b4    ;	- prodb[0]
||	mv.s2x a8, b13	     ;	load differ[3]
||	mpy.m1 a3, 0, a3     ;	load zero

mv.s1x b8, a1	     ;	load -prodb[1]
||	addu.l1 a7:a6, a2, a7:a6  ;  differ[2]

||	addu.l1 a3:a2, a7, a3:a2  ;  proda[1] + carry
||	mv.s1x b8, a5	     ;	load -prodb[1]

addu.l1 a3:a2, a5, a3:a2  ;  differ[1]
||	zero.s1 a5	     ;	load zero

addu.l1 a5:a4, a3, a5:a4  ;  proda[0] + carry

add.l1x a4, b4, a4   ;	differ[0]

or.l1 a4, a2, a2     ;	differ[0] | differ[1]

[a2] b.s1 error	     ;	branch to error if non-zero
|| [a2] zero.l1 a10	     ;	load 0
|| [a2] zero.l2 b10	     ;	load 0

zskip	or.l1x a10, b10, a2  ;	count[0] | count[1]

[a2] b.s2 bloop	     ;	branch if not zero

nop 5
*****************
*  end loop	*
*****************
ldw.d2 *++b15, b3    ;	restore return address

ldw.d2 *++b15, a8    ;	restore address of output

ldw.d2 *++b15, a15   ;	restore a14

ldw.d2 *++b15, b14   ;	restore b14

ldw.d2 *++b15, a14   ;	restore a14

ldw.d2 *++b15, b13   ;	restore b13

ldw.d2 *++b15, a13   ;	restore a13

ldw.d2 *++b15, b12   ;	restore b12
||	stw.d1 a13, *a8      ;	store residue

ldw.d2 *++b15, a12   ;	restore a12
||	stw.d1 b13, *+a8[1]  ;	store residue

ldw.d2 *++b15, b11   ;	restore b11

ldw.d2 *++b15, a11   ;	restore a11

ldw.d2 *++b15, b10   ;	restore b10
||	b.s2 b3 	     ;	return

ldw.d2 *++b15, a10   ;	restore a10

nop 4
.end
******************************************************************************
*									     *
*  CHECK FOR COMMON FACTORS						     *
*  09/23/05 (dkc)							     *
*									     *
******************************************************************************
.global _comfact
.global _midprod, _differ, _recip
.bss T, 2*4, 4
.bss U, 2*4, 4
.bss X, 2*4, 4
.text
_comfact:
stw.d2 a10, *b15--	 ;  save a10
||	mv.l1 a4, a10		 ;  save d

stw.d2 b10, *b15--	 ;  save b10
||	mv.l2 b4, b10		 ;  save e

stw.d2 a11, *b15--	 ;  save a11
||	sub.l1 a6, 1, a11	 ;  load loop count

stw.d2 b11, *b15--	 ;  save b11
||	mv.l2x a0, b11		 ;  save address of recip

stw.d2 a12, *b15--	 ;  save a12
||	mv.l1 a1, a12		 ;  save address of X

stw.d2 b12, *b15--	 ;  save b12
||	mv.l2 b1, b12		 ;  save address of T

stw.d2 a13, *b15--	 ;  save a13

stw.d2 b13, *b15--	 ;  save b13
||	zero.l2 b13		 ;  flag=0

stw.d2 b3, *b15--	 ;  save return address
*****************
*  begin loop	*
*****************
aloop	b.s2 _midprod		 ;  call product subroutine
||	mv.l2x a10, b4		 ;  load d
||	ldw.d2 *b11, a4 	 ;  load recip[2*i]

nop 3

*
askip	b.s2 _midprod		 ;  call product subroutine
||	ldw.d2 *+b11[1], a4	 ;  load recip[2*i+1]

ldw.d1 *a12, b4 	 ;  load X[0]

nop 2

*
bskip	ldw.d1 *+a12[1], a1	 ;  load X[1]
||	b.s2 _differ		 ;  call difference subroutine

||	zero.l1 a2		 ;  load 0

stw.d2 a2, *b4		 ;  T[0]=0

stw.d1 a2, *a4		 ;  U[0]=0

stw.d1 a10, *+a4[1]	 ;  U[1]=d

stw.d2 a1, *+b4[1]	 ;  T[1]=X[1]
*
cskip	ldw.d2 *b12, a0 	 ;  load T[0]
||	mvk.s1 1, a13		 ;  flag0=1

ldw.d2 *+b12[1], a1	 ;  load T[1]

b.s2 _midprod		 ;  call product subroutine
||	mv.l2 b10, b4		 ;  load e

ldw.d2 *b11++, a4	 ;  load recip[2*i]

nop 2

||	or.l1 a0, a1, a2	 ;  "or" values

||[!a2] zero.s1 a13		 ;  if ((T[0]==0)&&(T[1]==0)) flag0=1
*
dskip	b.s2 _midprod		 ;  call product subroutine
||	ldw.d2 *b11++, a4	 ;  load recip[2*i+1]

ldw.d1 *a12, b4 	 ;  load X[0]

nop 2

*
eskip	ldw.d1 *+a12[1], a1	 ;  load X[1]
||	b.s2 _differ		 ;  call difference subroutine

||	zero.l1 a2		 ;  load 0

stw.d2 a2, *b4		 ;  T[0]=0

stw.d1 a2, *a4		 ;  U[0]=0

stw.d1 b10, *+a4[1]	 ;  U[1]=e

stw.d2 a1, *+b4[1]	 ;  T[1]=X[1]

fskip	ldw.d2 *b12, a0 	 ;  load T[0]

ldw.d2 *+b12[1], a1	 ;  load T[1]

nop 4

or.l1 a0, a1, a2	 ;  "or" values

or.l1 a13, a2, a1	 ;  "or" values

[!a1] zero.l1 a11		 ;  break if both remainders are zero
||[!a1] mvk.s2 1, b13		 ;  set flag if both remainders are zero

mv.l1 a11, a1		 ;  load loop count

[a1] b.s2 aloop		 ;  conditional branch to loop beginning
|| [a1] sub.l1 a1, 1, a1	 ;  decrement loop count

mv.l1 a1, a11		 ;  save loop count

nop 4
*****************
*  end loop	*
*****************
ldw.d2 *++b15, b3	 ;  restore return address
||     mv.l1x b13, a4		 ;  return flag

ldw.d2 *++b15, b13	 ;  restore b13

ldw.d2 *++b15, a13	 ;  restore a13

ldw.d2 *++b15, b12	 ;  restore b12

ldw.d2 *++b15, a12	 ;  restore a12

ldw.d2 *++b15, b11	 ;  restore b11

ldw.d2 *++b15, a11	 ;  restore a11

ldw.d2 *++b15, b10	 ;  restore b10
||     b.s2 b3			 ;  return

ldw.d2 *++b15, a10	 ;  restore a10

nop 4
.end
*******************************************************************************
*									      *
*  COMPUTE (D**P+E**P)/(D+E)						      *
*  09/25/05 (dkc)							      *
*									      *
*  Note:  Currently, p is set to 3.					      *
*									      *
*******************************************************************************
.global _depower
.global _bigprod, _bigbigs, _bigbigq, _powquot
.bss W, 3*4, 4
.bss X, 3*4, 4
.bss Y, 4*4, 4
.bss Z, 4*4, 4
.text
_depower stw.d2 a10, *b15--	  ;  save a10

||	 stw.d2 b10, *b15--	  ;  save b10

||	 mv.l1 a4, a11		  ;  save d
||	 stw.d2 a11, *b15--	  ;  save a11

||	 mv.l2 b4, b11		  ;  save e
||	 stw.d2 b11, *b15--	  ;  save b11

||	 sub.l1 a6, 2, a13	  ;  save p-2
||	 stw.d2 a13, *b15--	  ;  save a13

||	 stw.d2 a12, *b15--	  ;  save a12

||	 stw.d2 b12, *b15--	  ;  save b12

mv.l1 a6, a14		  ;  save p
||	 stw.d2 a14, *b15--	  ;  save a13

mv.l2 b6, b13		  ;  save address of S
||	 stw.d2 b13, *b15--	  ;  save b13

stw.d2 b3, *b15--	  ;  save return address
||	 zero.l1 a0		  ;  load 0
||	 zero.l2 b0		  ;  load 0

stw.d1 a0, *a10	  ;  Y[0]=0
||	 stw.d2 b0, *b10	  ;  Z[0]=0

stw.d1 a0, *+a10[1]	  ;  Y[1]=0
||	 stw.d2 b0, *+b10[1]	  ;  Z[1]=0

stw.d1 a0, *+a10[2]	  ;  Y[2]=0
||	 stw.d2 b0, *+b10[2]	  ;  Z[2]=0

stw.d1 a4, *+a10[3]	  ;  Y[3]=d
||	 stw.d2 b4, *+b10[3]	  ;  Z[3]=e
*****************
*  begin loop	*
*****************
aloop	 b.s1 _bigprod		  ;  call product subroutine
||	 mv.l1 a11, a6		  ;  load d
||	 ldw.d1 *+a10[2], a4	  ;  load Y[2]

ldw.d1 *+a10[3], b4	  ;  load Y[3]

nop 2

*
askip	 b.s1 _bigprod		  ;  call product subroutine
||	 mv.l1x b11, a6 	  ;  load e
||	 ldw.d2 *+b10[2], a4	  ;  load Z[2]

ldw.d2 *+b10[3], b4	  ;  load Z[3]

nop 2

*
bskip	 ldw.d1 *a12, a5	  ;  load W[0]
||	 ldw.d2 *b12, b5	  ;  load X[0]

ldw.d1 *+a12[1], a7	  ;  load W[1]
||	 ldw.d2 *+b12[1], b7	  ;  load X[1]
||	 mv.l1 a13, a2		  ;  load loop count

ldw.d1 *+a12[2], a9	  ;  load W[2]
||	 ldw.d2 *+b12[2], b9	  ;  load X[2
||  [a2] sub.l1 a2, 1, a2	  ;  decrement loop count
||  [a2] b.s2 aloop		  ;  conditional branch to loop beginning

nop 2

stw.d1 a5, *+a10[1]	  ;  Y[1]=W[0]
||	 stw.d2 b5, *+b10[1]	  ;  Z[1]=X[0]
||	 mv.l1 a2, a13		  ;  save loop count

stw.d1 a7, *+a10[2]	  ;  Y[2]=W[1]
||	 stw.d2 b7, *+b10[2]	  ;  Z[2]=X[1]

stw.d1 a9, *+a10[3]	  ;  Y[3]=W[2]
||	 stw.d2 b9, *+b10[3]	  ;  Z[3]=X[2]
*****************
*  end loop	*
*****************
b.s2 _bigbigs		  ;  call sum subroutine

nop 3

*
cskip	 add.l1x a11, b11, a4	  ;  d+e
||	 zero.l2 b4		  ;  clear index
||	 b.s2 _powquot		  ;  call divide subroutine

nop 3

||	 mv.l1 a4, a11		  ;  save d+e

*
dskip	 mpyhlu.m1 a4, a14, a0	  ;  hi((d+e)/p)*p

mpyu.m1 a4, a14, a1	  ;  lo((d+e)/p)*p

shl.s1 a0, 16, a0	  ;  align hi((d+e)/p)*p

add.l1 a1, a0, a0	  ;  ((d+e)/p)*p

cmpeq.l1 a11, a0, a1	  ;  compare ((d+e)/p)*p to (d+e)

mpyhlu.m1 a11, a14, a0   ;  hi((d+e))*p

mpyu.m1 a11, a14, a2	  ;  lo((d+e))*p

shl.s1 a0, 16, a0	  ;  align hi(temp)*p
||	 ldw.d2 *b10, a4	  ;  load Z[0]

[a1] add.l1 a2, a0, a11	  ;  temp=(d+e)*p
||	 ldw.d2 *+b10[1], b4	  ;  load Z[1]

b.s2 _bigbigq		  ;  call quotient subroutine
||	 mv.s1 a11, a10 	  ;  load temp
||	 ldw.d2 *+b10[2], a6	  ;  load Z[2]

ldw.d2 *+b10[3], b6	  ;  load Z[3]
||	 zero.l2 b8		  ;  load 0

nop 2

*
eskip	 ldw.d1 *+a11[2], a5	  ;  load Y[2]
||	 ldw.d2 *++b15, b3	  ;  restore b3

ldw.d1 *+a11[3], a7	  ;  load Y[3]
||	 ldw.d2 *++b15, b13	  ;  restore b13

ldw.d2 *++b15, a14	  ;  restore a14

ldw.d2 *++b15, b12	  ;  restore b12

ldw.d2 *++b15, a12	  ;  restore a12

stw.d1 a5, *a0 	  ;  S[0]=Y[2]
||	 ldw.d2 *++b15, b1	  ;  restore a13

stw.d1 a7, *+a0[1]	  ;  S[1]=Y[3]
||	 ldw.d2 *++b15, b11	  ;  restore b11

ldw.d2 *++b15, a11	  ;  restore a11

b.s2 b3		  ;  return
||	 ldw.d2 *++b15, b10	  ;  restore b10

ldw.d2 *++b15, a10	  ;  restore a10

mv.l1x b1, a13 	  ;  restore a13

nop 3
.end
*******************************************************************************
*									      *
*  COMPUTE DIFFERENCES							      *
*  02/01/99 (dkc)							      *
*									      *
*     address of minuend (A[0], A[1]) => a4				      *
*     address of subtrahend (B[0], B[1]) => b4				      *
*									      *
*******************************************************************************
.global _differ
.text
_differ:
ldw.d1 *+a4[1], a0	  ;  load A[1]
||	ldw.d2 *+b4[1], b0	  ;  load B[1]

ldw.d1 *a4, a6		  ;  load A[0]
||	ldw.d2 *b4, b6		  ;  load B[0]

nop 3

not.l1x b0, a8		  ;  not(B[1])
||	zero.s1 a9		  ;  load 0

not.l2 b6, b6		  ;  not(B[0])
||	add.l1 a9:a8, 1, a9:a8	  ;  not(B[1]) + 1
||	b.s2 b3 		  ;  return

and.l1 a9, 1, a9	  ;  isolate carry bit
||	zero.s1 a1		  ;  load 0

addu.l1 a1:a0, a8, a1:a0  ;  A[1] - B[1]
||	add.l2x b6, a9, b6	  ;  -B[0]::B[1]

and.l1 a1, 1, a1	  ;  isolate carry bit
||	add.l2x b6, a6, b6	  ;  A[0] - B[0] - carry
||	stw.d2 a0, *+b4[1]	  ;  store A[1]-B[1]

add.l2x b6, a1, b6	  ;  A[0] - B[0]

stw.d2 b6, *b4		  ;  store A[0]-B[0]
.end
*******************************************************************************
*									      *
*  OUTER LOOP								      *
*  09/26/05 (dkc)							      *
*									      *
*******************************************************************************
.global _dloop
.global _test, _eloop, _error, _d
.text
_dloop	 stw.d2 a10, *b15--	  ;  save a10
||	 mv.l1 a4, a10		  ;  save d

stw.d2 b10, *b15--	  ;  save b10
||	 mv.l2 b4, b10		  ;  save k

stw.d2 a11, *b15--	  ;  save a11
||	 mv.l1 a6, a11		  ;  save n

stw.d2 b11, *b15--	  ;  save b11
||	 mv.l2 b6, b11		  ;  save outsize

stw.d2 a12, *b15--	  ;  save a12
||	 mv.l1 a8, a12		  ;  save address of output

stw.d2 b12, *b15--	  ;  save b12
||	 sub.l2x a4, 2, b12	  ;  save loop count

stw.d2 b3, *b15--	  ;  save return address
*****************
*  begin loop	*
*****************
aloop	 b.s2 _test		  ;  call Furtwangler subroutine
||	 mv.l1 a10, a4		  ;  load d

nop 3

*
askip	 cmpeq.l1 a4, 0, a1	  ;  compare flag to 0

[!a1] b.s2 zskip		  ;  if flag!=0, continue

[a1] b.s2 _eloop		  ;  call inner loop subroutine
||	 mv.l1 a10, a4		  ;  load d
||	 mv.l2 b10, b4		  ;  load k
||	 mv.s1 a11, a6		  ;  load n

nop 3

*
*
bskip	 mv.s1 a4, a11		  ;  load n
||	 cmpgtu.l1x a4, b11, a1   ;  compare n to outsiz

[a1] zero.l2 b12		  ;  if n>outsiz, break
||	 mvk.s1 6, a0		  ;  load 6

[a1] stw.d2 a0, *b0 	  ;  if n>outsize, error[0]=6

zskip	 mv.l2 b12, b2		  ;  load loop count
||	 sub.l1 a10, 1, a10	  ;  decrement d

[b2] b.s2 aloop		  ;  conditional branch to loop beginning
||  [b2] sub.l2 b2, 1, b2	  ;  decrement loop count

mv.l2 b2, b12		  ;  save loop count
||	 stw.d1 a10, *a0	  ;  store d

nop 4
*****************
*  end loop	*
*****************
ldw.d2 *++b15, b3	  ;  restore return address
||	 mv.l1 a11, a4		  ;  return n

ldw.d2 *++b15, b12	  ;  restore b12

ldw.d2 *++b15, a12	  ;  restore a12

ldw.d2 *++b15, b11	  ;  restore b11

ldw.d2 *++b15, a11	  ;  restore a11

ldw.d2 *++b15, b10	  ;  restore b10
||	 b.s2 b3		  ;  return

ldw.d2 *++b15, a10	  ;  restore a10

nop 4
.end
.global _dummy
.text
_dummy	b.s2 b3
nop 5
.end
*******************************************************************************
*									      *
*  INNER LOOP								      *
*  09/26/05 (dkc)							      *
*									      *
*******************************************************************************
.global _eloop
.global _test, _comfact, _euclid, _furcond, _depower, _search, _e
.bss S, 2*4, 4
.text
_eloop	 stw.d2 a10, *b15--	;  save a10
||	 mv.l1 a4, a10		;  save d

stw.d2 b10, *b15--	;  save b10
||	 sub.l2x a4, 1, b10	;  save e

stw.d2 a11, *b15--	;  save a11
||	 sub.l1 a4, 2, a11	;  save loop count

stw.d2 b11, *b15--	;  save b11
||	 mv.l2 b4, b11		;  save k

stw.d2 a12, *b15--	;  save a12
||	 mv.l1 a6, a12		;  save n

stw.d2 b12, *b15--	;  save b12
||	 mv.l2x a8, b12 	 ;  save address of output

stw.d2 a13, *b15--	;  save a13
||	 mv.l1x b6, a13 	;  save outsiz

stw.d2 b3, *b15--	;  save return address
*****************
*  begin loop	*
*****************
aloop	 and.l1 a10, 1, a1	;  check if d is odd
||	 and.l2 b10, 1, b1	;  check if e is odd

or.l1x a1, b1, a1	;  "or" conditiions

[!a1] b.s2 zskip		;  continue if both even
***********************
*  Furtwangler test   *
***********************
[a1] b.s2 _test		;  call Furtwangler subroutine
||	 mv.l1x b10, a4 	;  load e

nop 3

*
*
askip	 cmpeq.l1 a4, 0, a1	;  compare flag to 0

[!a1] b.s2 zskip		;  continue if flag!=0
*************************
*  common factor test	*
*************************
[a1] b.s2 _comfact		;  call common factor subroutine
||	 mv.l1 a10, a4		;  load d
||	 mv.l2 b10, b4		;  load e
||	 mvk.s1 4, a6		;  load count

nop 3

*
*
bskip	 cmpeq.l1 a4, 0, a1	;  compare flag to 0

[!a1] b.s2 zskip		;  continue if flag!=0
***********************
*  Euclidean G.C.D.   *
***********************
[a1] b.s2 _euclid		;  call Euclidean G.C.D. subroutine
||	 mv.l1 a10, a4		;  load d
||	 mv.l2 b10, b4		;  load e

nop 3

*
*
cskip	 cmpeq.l1 a4, 1, a1	;  compare flag to 1

[!a1] b.s2 zskip		;  continue if flag!=1
***********************
*  Furtwangler test   *
***********************
[a1] b.s2 _furcond		;  call Furtwangler subroutine
||	 mv.l1 a10, a4		;  load d
||	 mv.l2 b10, b4		;  load e

nop 3

*
*
dskip	 cmpeq.l1 a4, 0, a1	;  compare flag to 0

[!a1] b.s2 zskip		;  continue if flag!=0
***********************
*  (d**p+e**p)/(d+e)  *
***********************
[a1] b.s2 _depower		;  call (d**p+e**p)/(d+e) subroutine
||	 mv.l1 a10, a4		;  load d
||	 mv.l2 b10, b4		;  load e
||	 mvk.s1 3, a6		;  load p

nop

*
**************************
*  search for solutions  *
**************************
eskip	 b.s2 _search		;  call search subroutine
||	 mv.l2 b11, b4		;  load k

nop 2

*
fskip	 cmpeq.l1 a4, 0, a1	;  compare count to 0
||	 mv.l2x a12, b0 	;  load n

[!a1] stw.d2 a10, *+b12[b0]	;  output[n]=d
||	 add.l2 b0, 1, b0	;  n+1
|| [!a1] add.l1 a12, 3, a12	;  n=n+3

[!a1] stw.d2 b10, *+b12[b0]	;  output[n+1]=e
||	 add.l2 b0, 1, b0	;  n+2
||	 cmpgtu.l1 a12, a13, a2 ;  compare n to outsiz

[!a1] stw.d2 a4, *+b12[b0]	;  output[n+2]=count
||  [a2] zero.l1 a11		;  if n>outsiz, break

zskip	 mv.l1 a11, a2		;  load loop count
||	 sub.l2 b10, 1, b10	;  e=e-1

[a2] b.s2 aloop		;  conditional branch to loop beginning
||  [a2] sub.l1 a2, 1, a2	;  decrement loop count

mv.l1 a2, a11		;  save loop count
||	 stw.d1 b10, *a0	;  store e

nop 4
*****************
*  end loop	*
*****************
ldw.d2 *++b15, b3	;  restore return address
||	 mv.l1 a12, a4		;  return n

ldw.d2 *++b15, a13	;  restore a13

ldw.d2 *++b15, b12	;  restore b12

ldw.d2 *++b15, a12	;  restore a12

ldw.d2 *++b15, b11	;  restore b11

ldw.d2 *++b15, a11	;  restore a11

ldw.d2 *++b15, b10	;  restore b10
||	 b.s2 b3		;  return

ldw.d2 *++b15, a10	;  restore a10

nop 4
.end
*******************************************************************************
*									      *
*  EUCLIDEAN G.C.D.							      *
*  09/25/05 (dkc)							      *
*									      *
*******************************************************************************
.global _euclid
.global __divu
.text
_euclid: cmpgtu.l1x b4, a4, a2	  ;  compare b to a
||	 stw.d2 b10, *b15--	  ;  save b10

[a2] mv.l1x b4, a4		  ;  if greater than, a=b
||  [a2] mv.l2x a4, b4		  ;  if greater than, b=a
||	 stw.d2 a10, *b15--	  ;  save a10
*****************
*  begin loop	*
*****************
aloop:	 b.s2 __divu		  ;  call integer divide subroutine
||	 mv.l1 a4, a10		  ;  save a
||	 mv.l2 b4, b10		  ;  save b

stw.d2 b3, *b15--	  ;  save return address

nop 2

*
askip	 mpylhu.m1x a4, b10, a0   ;  lo(a/b)*hi(b)
||	 mpylhu.m2x b10, a4, b6   ;  lo(b)*hi(a/b)
||	 ldw.d2 *++b15, b3	  ;  restore return address

mpyu.m1x b10, a4, a3	  ;  lo(b)*lo(a/b)

add.l1x b6, a0, a0	  ;  lo(a/b)*hi(b)+lo(b)*hi(a/b)

shl.s1 a0, 16, a0	  ;  align lo(a/b)*hi(b)+lo(b)*hi(a/b)

add.l1 a3, a0, a0	  ;  (a/b)*b

sub.l1 a10, a0, a2	  ;  temp=a-(a/b)*b

mv.l1x b10, a4 	  ;  a=b
||	 mv.l2x a2, b4		  ;  b=temp
||  [a2] b.s2 aloop		  ;  branch if temp!=0
|| [!a2] ldw.d2 *++b15, a10	  ;  restore a10

[!a2] b.s2 b3		  ;  return
|| [!a2] ldw.d2 *++b15, b10	  ;  restore b10

nop 4
*****************
*  end loop	*
*****************
nop
.end
*******************************************************************************
*									      *
*  FURTWANGLER CONDITIONS						      *
*  09/25/05 (dkc)							      *
*									      *
*******************************************************************************
.global _furcond
.global _powquot
.text
_furcond:
b.s1 _powquot		    ;  call divide subroutine
||	 mvc.s2 b3, nrp 	    ;  save return address
||	 zero.l2 b4		    ;  load 0
||	 addab.d2 b4, 0, b0	    ;  save e

mv.l1 a4, a10		    ;  save d
||	 add.l2 b4, 2, b4	    ;  index=2
||	 stw.d2 a10, *b15--	    ;  save a10

nop 2

mv.l2 b0, b10		    ;  save e
||	 stw.d2 b10, *b15--	    ;  save b10

||	 stw.d2 a11, *b15--	    ;  save a11
*
askip	 b.s1 _powquot		    ;  call divide subroutine
||	 mv.l1x b10, a4 	    ;  load e
||	 addab.d1 a4, 0, a11	    ;  save d/p2
||	 mvk.s2 2, b4		    ;  index=2

nop 3

*
bskip	 shl.s1 a11, 2, a0	    ;  (d/p2)*4
||	 addab.d1 a11, a11, a1	    ;  (d/p2)*2
||	 shl.s2x a4, 2, b0	    ;  (e/p2)*4
||	 add.l1 a4, a4, a2	    ;  (e/p2)*2

add.l1 a0, a1, a0	    ;  (d/p2)*p2
||	 add.l2x b0, a2, b0	    ;  (e/p2)*p2

cmpeq.l1 a10, a0, a1	    ;  compare d to (d/p2)*p2
||	 cmpeq.l2 b10, b0, b1	    ;  compare e to (e/p2)*p2

or.l1x a1, b1, a2	    ;  "or" conditions

[!a2] b.s2 zskip		    ;  return
|| [!a2] zero.l1 a4		    ;  return 0

[a2] b.s1 _powquot		    ;  call divide subroutine
||  [a2] sub.l1x a10, b10, a4	    ;  a-b
||	 mvk.s2 1, b4		    ;  index=1

nop 3

*
*
cskip	 shl.s1 a4, 3, a0	    ;  8*(a-b)/ps
||	 sub.l1x a10, b10, a1	    ;  a-b

add.l1 a4, a0, a0	    ;  ((a-b)/ps)*ps

sub.l1 a1, a0, a0	    ;  remdif=(a-b)-((a-b)/ps)*ps

cmpeq.l1 a0, 1, a2	    ;  compare remdif to 1
||	 cmpeq.l2x a0, 8, b2	    ;  compare remdif to 8

or.l1x a2, b2, a2	    ;  "or" conditions

[!a2] b.s2 zskip		    ; return
||	 mvk.s1 1, a4		    ;  set return value

[a2] b.s1 _powquot		    ;  call divide subroutine
||  [a2] add.l1x a10, b10, a4	    ;  a+b
||	 mvk.s2 1, b4		    ;  index=1

nop 3

*
*
dskip	 shl.s1 a4, 3, a0	    ;  8*(a+b)/ps
||	 add.l1x a10, b10, a1	    ;  a+b

add.l1 a4, a0, a0	    ;  ((a+b)/ps)*ps

sub.l1 a1, a0, a0	    ;  remsum=(a+b)-((a+b)/ps)*ps

cmpeq.l1 a0, 1, a2	    ;  compare remsum to 1
||	 cmpeq.l2x a0, 8, b2	    ;  compare remsum to 8

or.l1x a2, b2, a2	    ;  "or" conditions

[!a2] mvk.s1 1, a4		    ;  set return value
||  [a2] zero.l1 a4		    ;  clear return value

zskip	 mvc.s2 nrp, b3 	    ;  restore return address

ldw.d2 *++b15, a11	    ;  restore a11

ldw.d2 *++b15, b10	    ;  restore b10
||	 b.s2 b3		    ;  return

ldw.d2 *++b15, a10	    ;  restore a10

nop 4
.end

*******************************************************************************
*									      *
*  TEST 								      *
*  09/23/05 (dkc							      *
*									      *
*******************************************************************************
.global _test
.global _powquot
.text
_test:	 stw.d2 b10, *b15--	;  save b10
||	 mv.l2 b3, b10		;  save SP

b.s2 _powquot		;  call powquot
||	 zero.l2 b4		;  index=0
||	 stw.d2 a10, *b15--	;  save d
||	 mv.s1 a4, a10		;  save d

nop 3

||	 stw.d2 a11, *b15--	;  save a11
||	 mvk.s1 1, a11		;  load 1

*
askip	 b.s2 _powquot		;  call powquot subroutine
||	 zero.l2 b4		;  load 0
||	 shl.s1 a4, 1, a0	;  quotient*2
||	 and.l1 a10, 1, a1	;  check if d is odd
||	 ldw.d2 *++b15, a11	;  restore a11

add.l1 a4, a0, a0	;  quotient*3
||	 add.l2 b4, 1, b4	;  index=1
||	 addab.d1 a10, 1, a3	;  d+1
|| [!a1] shl.s1 a10, 3, a4	;  d*8

[!a1] cmpeq.l1 a10, a0, a2	;  if d is even, check if quotient*3 equals d
||  [a1] mvk.s1 1, a2		;  set condition
|| [!a1] addab.d1 a3, a4, a4	;  if d is even, f=d*ps+1

[!a2] shru.s1 a10, 1, a4	;  if d is even and (quotient*p!=d), f=d/2
||  [a1] cmpeq.l1 a10, a0, a2	;  if d is odd, check if quotient*3 equals d
|| [!a1] addab.d1 a11, 0, a2	;  set condition

|| [!a2] mv.l1 a10, a4		;  if d is odd and (quotient*p!=d), f=d

||	 mv.l1 a4, a10		;  save f
*
bskip	 shl.s1 a4, 3, a0	;  quotient*8
||	 b.s2 b10		;  return
||	 ldw.d2 *++b15, a10	;  restore a10

add.l1 a4, a0, a0	;  quotient*9
||	 ldw.d2 *++b15, b10	;  restore b10

sub.l1 a10, a0, a0	;  remd=f-quotient*ps

cmpeq.l1 a0, 1, a1	;  compare remd to 1
||	 mvk.s1 1, a4		;  set flag

[a1] zero.s1 a4		;  if remd==1, flag=0
||	 cmpeq.l1 a0, 8, a1	;  compare remd to 8

[a1] zero.s1 a4		;  if remd==8, flag=0

.end
*******************************************************************************
*									      *
*  PRODUCT (128x16)							      *
*  12/04/00 (dkc)							      *
*									      *
*  (a[0], a[1], a[2], a[3]) => a4, b4, a6, b6				      *
*  address of product => a8						      *
*  b => b8								      *
*									      *
*******************************************************************************
.global _hugeprod
.text
_hugeprod:
mpyhlu.m2 b6, b8, b1	  ;  hi(a[3]) * b
||	mpyhlu.m1x a6, b8, a1	  ;  hi(a[2]) * b
||	stw.d2 b11, *b15--	  ;  save b11

mpyhlu.m2 b4, b8, b11	  ;  hi(a[1]) * b
||	mpyhlu.m1x a4, b8, a11	  ;  hi(a[0]) * b
||	stw.d2 a11, *b15--	  ;  save a11

mpyu.m2 b6, b8, b0	  ;  lo(a[3]) * b
||	mpyu.m1x a6, b8, a0	  ;  lo(a[2]) * b
||	shl.s2 b1, 16, b5	  ;  align hi(a[3])*b
||	shl.s1 a1, 16, a5	  ;  align hi(a[2])*b
||	ldw.d2 *++b15, a11	  ;  restore a11

mpyu.m2 b4, b8, b2	  ;  lo(a[1]) * b
||	mpyu.m1x a4, b8, a2	  ;  lo(a[0]) * b
||	shru.s2 b1, 16, b7	  ;  align hi(a[3])*b
||	shru.s1 a1, 16, a7	  ;  align hi(a[2])*b
||	zero.l2 b1		  ;  clear sum
||	zero.l1 a1		  ;  clear sum

addu.l2 b1:b0, b5, b1:b0  ;  sum[3]
||	addu.l1 a1:a0, a5, a1:a0  ;  sum[2] - carry
||	shl.s2 b11, 16, b9	  ;  align hi(a[1])*b
||	shl.s1 a11, 16, a9	  ;  align hi(a[0])*b
||	zero.d2 b3		  ;  clear sum

addu.l1x a1:a0, b7, a1:a0 ;  sum[2] - carry
||	addu.l2x b3:b2, a7, b3:b2 ;  sum[1] - carry
||	add.s1 a2, a9, a2	  ;  sum[0] - carry
||	b.s2 b8 		  ;  return
||	ldw.d2 *++b15, b11	  ;  restore b11

addu.l1x a1:a0, b1, a1:a0 ;  sum[2]
||	shru.s2 b11, 16, b11	  ;  align hi(a1[1])*b
||	stw.d1 b0, *+a8[3]	  ;  store sum[3]

addu.l2x b3:b2, a1, b3:b2 ;  sum[1] - carry
||	add.l1x a2, b11, a2	  ;  sum[0]
||	stw.d1 a0, *+a8[2]	  ;  store sum[2]

addu.l2 b3:b2, b9, b3:b2  ;  sum[1]

add.l1x a2, b3, a2	  ;  sum[0]
||	stw.d1 b2, *+a8[1]	  ;  store sum[1]

stw.d1 a2, *a8		  ;  store sum[0]
.end
*******************************************************************************
*									      *
*  COMPUTE X**((Q-1)/P)=1(MOD Q)					      *
*  03/06/02 (dkc)							      *
*									      *
*     (q-1)/p => a4, b4 						      *
*     q => a6, b6							      *
*     address of output => a8						      *
*     x => b8, a10							      *
*									      *
*******************************************************************************
.global _hugeres
.global _bigbigp, _bigbigq
.bss save, 64*8, 4
.bss temp, 4*4, 4
.bss temp1, 4*4, 4
.text
_hugeres:
mv.l1 a4, a10	     ;	save MSW of (q-1)/p
||	mv.s1 a10, a0	     ;	save LSW of x
||	stw.d2 a10, *b15--   ;	save a10

mv.l2 b4, b10	     ;	save LSW of (q-1)/p
||	stw.d2 b10, *b15--   ;	save b10

mv.l1 a6, a11	     ;	save MSW of q
||	stw.d2 a11, *b15--   ;	save a11

mv.l2 b6, b11	     ;	save LSW of q
||	stw.d2 b11, *b15--   ;	save b11

||	stw.d2 a12, *b15--   ;	save a12

||	stw.d2 b12, *b15--   ;	save b12

stw.d2 a13, *b15--   ;	save a13
||	mv.l1x b8, a13	     ;	load MSW of x

stw.d2 b13, *b15--   ;	save b13
||	mv.l2x a0, b13	     ;	load LSW of x

stw.d2 a14, *b15--   ;	save a14
||	mvk.s1 1, a14	     ;	load P[1]

stw.d2 b14, *b15--   ;	save b14

stw.d2 a15, *b15--   ;	save a14
||	zero.s1 a15	     ;	load P[0]

stw.d2 a8, *b15--    ;	save address of output
||	mvk.s1 1, a1	     ;	load 1

stw.d2 b3, *b15--    ;	save return address
||	zero.s1 a2	     ;	load 0
||	zero.l1 a6	     ;	load 0
||	mvk.s2 1, b6	     ;	load 1
*****************
*  begin loop	*
*****************
aloop:
[!a2] b.s2 _bigbigp	     ;	call product subroutine
||	mv.l1 a13, a4	     ;	A[0] = MSW(x**(2**n))
||	mv.l2 b13, b4	     ;	A[1] = LSW(x**(2**n))
||[!a1] addab.d1 a13, 0, a6  ;	M[0] = MSW(x**(2**n))
||[!a1] addab.d2 b13, 0, b6  ;	M[1] = LSW(x**(2**n))

[!a1] shl.s1 a15:a14, 1, a15:a14   ;	P[0]:P[1] = P[0]:P[1]*2
||	add.l1 a15, a15, a0  ;	P[0] << 1

and.l1 a15, 1, a15   ;	isolate carry bit
||[!a1] stw.d2 a13, *b14++   ;	save MSW(x**(2**n))

[!a1] or.l1 a15, a0, a15   ;	P[0]:P[1]
||[!a1] stw.d2 b13, *b14++   ;	save LSW(x**(2**n))

nop 2
*
pskip	cmpgtu.l1 a15, a10, a1	;  compare P[0] to MSW (q-1)/p
||	sub.s1 a15, a10, a2  ;	compare P[0] to MSW (q-1)/p
||	ldw.d1 *a12, a4      ;	load product[0]

[!a2] cmpgtu.l1x a14, b10, a1 ; compare P[1] to LSW (q-1)/p
||	ldw.d1 *+a12[1], b4  ;	load product[1]

[a1] b.s2 askip	     ;	exit loop
||	ldw.d1 *+a12[2], a6  ;	load product[2]
|| [a1] shru.s1 a15:a14, 1, a15:a14 ;  P[1] >> 1
|| [a1] mv.l1 a15, a0	     ;	save P[0]

[!a1] b.s2 _bigbigq	     ;	call quotient subroutine
||	ldw.d1 *+a12[3], b6  ;	load product[3]
||	mv.l2x a11, b8	      ;  load q[0]
|| [a1] shru.s1 a0, 1, a15   ;	P[0] >> 1

||[!a1] mv.l1x b11, a10      ;	load q[1]
||[!a1] stw.d2 a10, *b15--   ;	save MSW (q-1)/p

|| [a1] subaw.d2 b14, 3, b14 ;	decrement pointer

||[!a1] stw.d2 a11, *b15--   ;	save q[0]

nop
*
nop
*
qskip	b.s2 _bigbigp	     ;	call product subroutine
||	ldw.d1 *+a11[2], b0  ;	load quotient[0]
||	ldw.d2 *++b15[1], a11  ;  restore q[0]

ldw.d1 *+a11[3], b4  ;	load quotient[1]
||	ldw.d2 *++b15[1], a10  ;  restore MSW (q-1)/p
||	mv.l2 b11, b6	     ;	load q[1]

||	stw.d2 b11, *b15--   ;	save q[1]

nop 2

mv.l1 a11, a6	     ;	load q[0]
||	mv.s1x b0, a4	     ;	load quotient[0]
*
||	ldw.d1 *+a12[3], a7  ;	load proda[3]
||	ldw.d2 *++b15[1], b11  ;  restore q[1]

ldw.d1 *+a12[2], a6  ;	load proda[2]
||	ldw.d2 *+b9[3], b7   ;	load prodb[3]

ldw.d1 *+a12[1], a5  ;	load proda[1]
||	ldw.d2 *+b9[2], b6   ;	load prodb[2]

ldw.d1 *a12, a4      ;	load proda[0]
||	ldw.d2 *+b9[1], b5   ;	load prodb[1]

ldw.d2 *b9, b4	     ;	load prodb[0]

nop

not.s2 b7, b8	     ;	not(prodb[3])
||	zero.l2 b9	     ;	load zero
||	mpy.m2 b7, 0, b7     ;	load zero

not.s2 b6, b6	     ;	not(prodb[2])
||	add.l2 b9:b8, 1, b9:b8 ;  -prodb[3]
||	mv.l1 a7, a8	     ;	load proda[3]

not.s2 b5, b8	     ;	not(prodb[1])
||	addu.l2 b7:b6, b9, b7:b6 ;  -prodb[2]
||	mv.l1x b8, a3	     ;	load -prodb[3]
||	zero.s1 a9	     ;	load zero
||	subab.d2 b9, b9, b9  ;	load zero
||	mpy.m1 a7, 0, a7     ;	load zero

not.s2 b4, b4	     ;	not(prodb[0])
||	addu.l2 b9:b8, b7, b9:b8  ;  -prodb[1]
||	addu.l1 a9:a8, a3, a9:a8  ;  differ[3]
||	mv.s1x b6, a2	     ;	load -prodb[2]
||	mpy.m1 a9, 0, a9     ;	load zero

addu.l1 a7:a6, a9, a7:a6  ;  proda[2] + carry
||	add.l2 b4, b9, b4    ;	- prodb[0]
||	mv.s2x a8, b13	     ;	load differ[3]
||	mpy.m1 a3, 0, a3     ;	load zero

addu.l1 a7:a6, a2, a7:a6  ;  differ[2]

||	b.s2 aloop	     ;	branch to loop beginning
||	addu.l1 a3:a2, a7, a3:a2  ;  proda[1] + carry
||	mv.s1x b8, a5	     ;	load -prodb[1]

addu.l1 a3:a2, a5, a3:a2  ;  differ[1]
||	zero.s1 a5	     ;	load zero

addu.l1 a5:a4, a3, a5:a4  ;  proda[0] + carry

add.l1x a4, b4, a4   ;	differ[0]

or.l1 a4, a2, a2     ;	differ[0] | differ[1]

[a2] b.s1 error	     ;	branch to error if non-zero
||	zero.l1 a1	     ;	clear flag
*
error	b.s2 error	     ;	spin if error

nop 5
*****************
*  end loop	*
*****************
||	not.l2x a14, b4      ;	not(LSW((q-1)/p))
||	zero.s2 b5	     ;	load zero

add.l2 b5:b4, 1, b5:b4 ;  -LSW((q-1)/p))
||	not.s1 a15, a0	     ;	not(MSW((q-1)/p))
||	mpy.m1 a5, 0, a5     ;	load zero

add.l2x a0, b5, b5   ;	-MSW((q-1)/p)
||	mv.l1x b4, a0	     ;	load -LSW((q-1)/p))
||	shru.s1 a15:a14, 1, a15:a14  ;	P[1] >> 1
||	addab.d1 a15, 0, a2  ;	save P[0]

addu.l1 a5:a4, a0, a5:a4  ;  LSW((q-1)/p)) - P[1]
||	shru.s1 a2, 1, a15   ;	P[0] >> 1

add.l1 a10, a5, a10  ;	MSW((q-1)/p)) + carry
||	mv.l2x a4, b10	     ;	load LSW((q-1)/p)) - P[1]

add.l1x a10, b5, a10 ;	MSW((q-1)/p)) - P[0]

or.l1x a10, b10, a2  ;	count[0] | count[1]

[!a2] b.s2 zskip	     ;	branch if count=0

nop 3
*****************
*  begin loop	*
*****************
bloop	cmpgtu.l1 a15, a10, a1	;  compare P[0] to MSW (q-1)/p
||	subab.d1 a15, a10, a2  ;  compare P[0] to MSW (q-1)/p
||	mv.s1x b10, a4	     ;	load LSW (q-1)/p
||	not.l2x a14, b4      ;	not(LSW((q-1)/p))
||	zero.s2 b5	     ;	load zero

[!a2] cmpgtu.l1 a14, a4, a1  ; compare P[1] to LSW (q-1)/p
||	add.l2 b5:b4, 1, b5:b4 ;  -LSW((q-1)/p))
||	not.s1 a15, a0	     ;	not(MSW((q-1)/p))
||	mpy.m1 a5, 0, a5     ;	load zero

add.l2x a0, b5, b5   ;	-MSW((q-1)/p)
||	mv.l1x b4, a0	     ;	load -LSW((q-1)/p))
||	shru.s1 a15:a14, 1, a15:a14  ;	P[1] >> 1
||	addab.d1 a15, 0, a2  ;	save P[0]
||	ldw.d2 *b14--, b6    ;	load LSW(x**(2**n))
||[!a1] b.s2 _bigbigp	     ;	call product subroutine

[!a1] addu.l1 a5:a4, a0, a5:a4  ;  LSW((q-1)/p)) - P[1]
||	shru.s1 a2, 1, a15   ;	P[0] >> 1
||	ldw.d2 *b14--, a6    ;	load MSW(x**(2**n))
|| [a1] b.s2 zskip	     ;	conditional branch

[!a1] add.l1 a10, a5, a10  ;	MSW((q-1)/p)) + carry
||[!a1] mv.l2x a4, b10	     ;	load LSW((q-1)/p)) - P[1]
||	mv.s1 a13, a4	     ;	load MSW(x**(2**n))

[!a1] add.l1x a10, b5, a10  ;  MSW((q-1)/p)) - P[0]

nop 2
*
nop
*
wskip	ldw.d1 *a12, a4      ;	load product[0]
||	mv.l2x a11, b8	     ;	load q[0]
||	mv.l1 a10, a13	     ;	save count[0]

ldw.d1 *+a12[1], b4  ;	load product[1]
||	mv.l1x b11, a10      ;	load q[1]

b.s2 _bigbigq	     ;	call division subroutine
||	ldw.d1 *+a12[2], a6  ;	load product[2]
||	mv.l2x a8, b13	     ;	save address of quotient

ldw.d1 *+a12[3], b6  ;	load product[3]

nop 4
*
xskip	mv.l1 a13, a10	     ;	load count[0]
||	ldw.d2 *+b13[2], a4  ;	load quotient[2]
||	b.s1 _bigbigp	     ;	call multiplication subroutine

ldw.d2 *+b13[3], b4  ;	load quotient[3]
||	mv.l2 b11, b6	     ;	load q[1]

mv.l2x a8, b13	     ;	save address of product

nop 2
*
yskip	ldw.d1 *+a12[3], a7  ;	load proda[3]

ldw.d1 *+a12[2], a6  ;	load proda[2]
||	ldw.d2 *+b13[3], b7  ;	load prodb[3]

ldw.d1 *+a12[1], a5  ;	load proda[1]
||	ldw.d2 *+b13[2], b6  ;	load prodb[2]

ldw.d1 *a12, a4      ;	load proda[0]
||	ldw.d2 *+b13[1], b5  ;	load prodb[1]

ldw.d2 *b13, b4      ;	load prodb[0]

nop

not.s2 b7, b8	     ;	not(prodb[3])
||	zero.l2 b9	     ;	load zero
||	mpy.m2 b7, 0, b7     ;	load zero

not.s2 b6, b6	     ;	not(prodb[2])
||	add.l2 b9:b8, 1, b9:b8 ;  -prodb[3]
||	mv.l1 a7, a8	     ;	load proda[3]

not.s2 b5, b8	     ;	not(prodb[1])
||	addu.l2 b7:b6, b9, b7:b6 ;  -prodb[2]
||	mv.l1x b8, a3	     ;	load -prodb[3]
||	zero.s1 a9	     ;	load zero
||	subab.d2 b9, b9, b9  ;	load zero
||	mpy.m1 a7, 0, a7     ;	load zero

not.s2 b4, b4	     ;	not(prodb[0])
||	addu.l2 b9:b8, b7, b9:b8  ;  -prodb[1]
||	addu.l1 a9:a8, a3, a9:a8  ;  differ[3]
||	mv.s1x b6, a2	     ;	load -prodb[2]
||	mpy.m1 a9, 0, a9     ;	load zero

addu.l1 a7:a6, a9, a7:a6  ;  proda[2] + carry
||	add.l2 b4, b9, b4    ;	- prodb[0]
||	mv.s2x a8, b13	     ;	load differ[3]
||	mpy.m1 a3, 0, a3     ;	load zero

mv.s1x b8, a1	     ;	load -prodb[1]
||	addu.l1 a7:a6, a2, a7:a6  ;  differ[2]

||	addu.l1 a3:a2, a7, a3:a2  ;  proda[1] + carry
||	mv.s1x b8, a5	     ;	load -prodb[1]

addu.l1 a3:a2, a5, a3:a2  ;  differ[1]
||	zero.s1 a5	     ;	load zero

addu.l1 a5:a4, a3, a5:a4  ;  proda[0] + carry

add.l1x a4, b4, a4   ;	differ[0]

or.l1 a4, a2, a2     ;	differ[0] | differ[1]

[a2] b.s1 error	     ;	branch to error if non-zero
|| [a2] zero.l1 a10	     ;	load 0
|| [a2] zero.l2 b10	     ;	load 0

zskip	or.l1x a10, b10, a2  ;	count[0] | count[1]

[a2] b.s2 bloop	     ;	branch if not zero

nop 5
*****************
*  end loop	*
*****************
ldw.d2 *++b15, b3    ;	restore return address

ldw.d2 *++b15, a8    ;	restore address of output

ldw.d2 *++b15, a15   ;	restore a14

ldw.d2 *++b15, b14   ;	restore b14

ldw.d2 *++b15, a14   ;	restore a14

ldw.d2 *++b15, b13   ;	restore b13

ldw.d2 *++b15, a13   ;	restore a13

ldw.d2 *++b15, b12   ;	restore b12
||	stw.d1 a13, *a8      ;	store residue

ldw.d2 *++b15, a12   ;	restore a12
||	stw.d1 b13, *+a8[1]  ;	store residue

ldw.d2 *++b15, b11   ;	restore b11

ldw.d2 *++b15, a11   ;	restore a11

ldw.d2 *++b15, b10   ;	restore b10
||	b.s2 b3 	     ;	return

ldw.d2 *++b15, a10   ;	restore a10

nop 4
.end
*******************************************************************************
*									      *
*  32x32 SIGNED MULTIPLY						      *
*  03/30/02 (dkc)							      *
*									      *
*  This C callable C60 subroutine does a 32x32 signed multiply.  The calling  *
*  sequence of the subroutine is;					      *
*									      *
*     multiplicand => a4						      *
*     multiplier => b4							      *
*     address of product (a two-word array) => a6			      *
*									      *
*******************************************************************************
.def _midprod
.text
_midprod:
mpyhslu.m1x a4, b4, a0		;  A_hi * B_lo
||	mpyhslu.m2x b4, a4, b0		;  B_hi * A_lo

mpyu.m1x a4, b4, a1		;  A_lo * B_lo
||	mpyh.m2x b4, a4, b1		;  A_hi * B_hi

shl.s1 a0, 16, a4		;  A_hi*B_lo << 16
||	zero.l1 a5			;  zero odd register of pair
||	b.s2 b3 			;  return

shl.s1x b0, 16, a1		;  B_hi*A_lo << 16
||	shr.s2x a0, 16, b2		;  A_hi*B_lo >> 16
||	addu.l1 a5:a4, a1, a5:a4	;  sum_lo = A_hi*B_lo<<16 + A_lo*B_lo

add.l2 b1, b2, b1		;  sum_hi = A_hi*B_lo>>16 + A_hi*B_hi
||	shr.s2 b0, 16, b0		;  B_hi*A_lo >> 16
||	addu.l1 a5:a4, a1, a5:a4	;  sum_lo = sum_lo + B_hi*A_lo<<16

add.l2 b1, b0, b1		;  sum_hi = sum_hi + B_hi*A_lo>>16
||	stw.d1 a4, *+a6[1]		;  store LSW of product

add.l2x b1, a5, b1		;  sum_hi = sum_hi + carry

stw.d1 b1, *a6			;  store MSW of product
.end
******************************************************************************
*									     *
*  COMPUTE QUOTIENT FOR DIVISORS OF P, P*P, OR 2*P			     *
*  09/23/05 (dkc)							     *
*									     *
******************************************************************************
.global _powquot
.global _midprod, _recip1
.bss X, 2*4, 4
.text
||	 stw.d2 a10, *b15--	 ;  save a10

||	 mv.l2 b3, b10		 ;  save return address
||	 stw.d2 b10, *b15--	 ;  save b10
||	 b.s1 _midprod		 ;  call product subroutine

ldw.d2 *+b0[b4], a4	 ;  load recip1[index]
||	 mv.l2x a4, b4		 ;  load a

mv.l1 a6, a10		 ;  save address of X

nop

*
askip	 ldw.d2 *++b15, b10	 ;  restore b10
||	 ldw.d1 *a10, a4	 ;  load X[0]
||	 b.s2 b10		 ;  return

ldw.d2 *++b15, a10	 ;  restore a10

nop 4
.end
*******************************************************************************
*									      *
*  COMPUTE d**p (64x16 unsigned product)				      *
*  01/30/99 (dkc)							      *
*									      *
*     address of multiplicand vector (d[0], d[1]) => a4 		      *
*     multiplier => b4							      *
*     number of iterations => a6					      *
*									      *
*******************************************************************************
.def _product
.text
_product:
sub.l2x a6, 1, b2		;  load decremented loop count
||	mv.s2 b4, b6			;  load multiplier
||	mv.s1x b4, a6			;  load multiplier
||	mv.l1 a4, a8			;  save address of input vector
||	ldw.d1 *+a4[1], b4		;  load input[1]

ldw.d1 *a4, a4			;  load input[0]

nop 4
*****************
*  begin loop	*
*****************
aloop	mpyhlu.m1 a4, a6, a0		;  d3 * p
||	mpyhlu.m2 b4, b6, b0		;  d1 * p

mpyu.m1 a4, a6, a4		;  d2 * p
||	mpyu.m2 b4, b6, b4		;  d0 * p
|| [b2] b.s2 aloop			;  conditional branch to loop beginning
|| [b2] sub.l2 b2, 1, b2		;  decrement loop count

shl.s1 a0, 16, a0		;  d3*p << 16
||	shl.s2 b0, 16, b1		;  d1*p << 16
||	zero.l2 b5			;  zero odd register of pair

add.l1 a4, a0, a4		;  d3::d2 = d3*p<<16 + d2*p
||	addu.l2 b5:b4, b1, b5:b4	;  d1::d0 = d1*p<<16 + d0*p

and.l2 b5, 1, b5		;  isolate carry bit
||	shru.s2 b0, 16, b0		;  d1*p >> 16

add.l2 b5, b0, b5		;  d1*p>>16 + carry

add.l1x a4, b5, a4		;  d3::d2 = d3::d2+(d1*p>>16 + carry)
*****************
*  end loop	*
*****************
b.s2 b3 			;  return
||	stw.d1 a4, *a8			;  store MSW of product

stw.d1 b4, *+a8[1]		;  store LSW of product

nop 4
.end
*******************************************************************************
*									      *
*  COMPUTE QUOTIENTS							      *
*  01/31/99 (dkc)							      *
*									      *
*     address of dividend (A[0], A[1]) => a4				      *
*     address of quotient (B[0], B[1]) => b4				      *
*     divisor => a6							      *
*									      *
*******************************************************************************
.global _quotient
.text
_quotient:
ldw.d1 *a4, a1	      ;  load A[0]
||	lmbd.l1 1, a6, a5     ;  left-most bit detection
||	mvk.s1 32, a3	      ;  load 32

ldw.d1 *+a4[1], a4    ;  load A[1]
||	add.l1 a5, a3, a5     ;  divisor left-most bit detection

nop 4

lmbd.l1 1, a1, a0     ;  left-most bit detection
||	lmbd.l2x 1, a4, b0    ;  left-most bit detection
||	zero.s1 a2	      ;  clear flag

[!a1] add.s1x b0, a3, a0    ;  dividend left-most bit detection
||[!a1] cmpltu.l1 a4, a6, a2  ;  compare A[1] to divisor

sub.l1 a5, a0, a8     ;  shift = lmbd(1,x2) - lmbd(1,x1)
||	mv.l2x a3, b0	      ;  load 32
||	mpy.m1 a7, 0, a7      ;  clear D[0]
|| [a2] b.s2 askip	      ;  return zero
||	mv.s1 a1, a5	      ;  load A[0]

cmplt.l1 a8, a3, a2   ;  compare shift to 32
||	sub.l2x b0, a8, b0    ;  32 - shift
||	sub.s1 a8, a3, a9     ;  shift - 32
|| [a2] mpy.m1 a5, 0, a5      ;  load 0
|| [a2] subab.d1 a4, a4, a4   ;  load 0
||	mvk.s2 32, b5	      ;  load 32

[!a2] mv.l1 a6, a7	      ;  D[0] = D[1]
||[!a2] mpy.m1 a6, 0, a6      ;  clear D[1]
|| [a2] shl.s1 a6, a8, a6     ;  D[1] << shift
|| [a2] shru.s2x a6, b0, b0   ;  D[0] = D[1] >> (32-shift)

[!a2] shl.s1 a7, a9, a7     ;  D[0] = D[0] << (shift-32)
|| [a2] mv.l1x b0, a7	      ;  load D[0]
||	mvk.s2 63, b0	      ;  load 63
||	mv.l2x a8, b2	      ;  load shift
||	subab.d1 a9, a9, a9   ;  load 0

not.l1 a7, a0	      ;  invert D[0]
||	not.s1 a6, a8	      ;  invert D[1]
||	mv.l2x a8, b2	      ;  load shift - 1
||	subab.d2 b0, b2, b0   ;  64 - shift
||	mvk.s2 31, b6	      ;  load 31

cmplt.l2 b0, b5, b1   ;  compare 64-shift to 32
||	shl.s2 b0, 5, b9      ;  (64-shift) << 5
||	subab.d2 b6, b2, b6   ;  32 - shift
||	addu.l1 a9:a8, a1, a9:a8  ;   -D[0]::D[1]
*
and.l1 a9, 1, a9      ;  isolate carry bit
||	mv.s1 a5, a10	      ;  save A[0]
||	mpy.m1 a5, 0, a5      ;  load 0
||	or.l2 b9, b0, b9      ;  (64-shift)::(64-shift)
||	shl.s2 b6, 5, b7      ;  (32-shift) << 5
||	stw.d2 a10, *b15--    ;  save a10

add.l1 a0, a9, a9     ;  -D[0]::D[1]
||	cmpgt.l2 b0, b5, b0   ;  compare 64-shift to 32
||	or.s2 b6, b7, b6      ;  (32-shift)::(32-shift)
||	zero.s1 a1	      ;  load 0
||	stw.d2 a11, *b15--    ;  save a11
*****************
*  begin loop	*
*****************
aloop	addu.l1 a5:a4, a8, a1:a0  ;  A[1] - D[1]
||	shru.s1 a4, 31, a3    ;  isolate MSB of A[1]
||	addab.d1 a10, a10, a6 ;  A[0] << 1
|| [b2] b.s2 aloop	      ;  conditional branch to loop beginning
|| [b2] sub.l2 b2, 1, b2      ;  decrement loop count

and.l1 a1, 1, a7      ;  isolate carry bit
||	addab.d1 a10, a9, a1  ;  A[0] - D[0] - carry
||	shl.s1 a4, 1, a11     ;  A[1] << 1

add.l1 a1, a7, a1     ;  A[0] - D[0]
||	or.s1 a3, a6, a3      ;  A[0] << 1 | LSB

cmplt.l1 a1, 0, a2    ;  compare A[1]::A[0] - D[1]::D[0] to zero
||	shl.s1 a1:a0, 1, a1:a0	;  (A[1] - D[1]) << 1
||	addab.d1 a1, a1, a7   ;  (A[0] - D[0]) << 1

[a2] addab.d1 a3, 0, a10   ;  if less than, A[0] = A[0] << 1
|| [a2] mv.s1 a11, a4	      ;  if less than, A[1] = A[1] << 1
||	or.l1 a0, 1, a0       ;  ((A[1] - D[1]) << 1) | 1
||	and.l2x a1, 1, b7     ;  isolate LSB of (A[0] - D[0])
||	mpy.m1x b9, 1, a0     ;  load (64-shift)::(64-shift)

[!a2] or.l1x a7, b7, a10    ;  if greater than or equal, A[0] = (delta<<1)|1
||[!a2] mv.s1 a0, a4	      ;  if greater than or equal, A[1] = (delta<<1)|1
||	subab.d1 a1, a1, a1   ;  load 0
*****************
*  end loop	*
*****************
[b1] extu.s1 a10, a0, a5   ;  A[0] << (64-shift)
||	mv.l1x b6, a0	      ;  (32-shift)::(32-shift)
||	ldw.d2 *++b15[1], a11 ;  restore a11

[!b1] zero.l1 a5	      ;  zero A[0]
|| [b0] extu.s1 a4, a0, a4    ;  A[1] << (64-shift)
||	ldw.d2 *++b15[1], a10 ;  restore a10

||	stw.d2 a5, *b4	      ;  store quotient

stw.d2 a4, *+b4[1]    ;  store quotient

nop 4
.end
*******************************************************************************
*									      *
*  SEARCH								      *
*  09/25/05 (dkc)							      *
*									      *
*******************************************************************************
.global _search
.global _quotient, _bigprod, _powquot, _table3
.bss T, 2*4, 4
.bss V, 2*4, 4
.bss X, 3*4, 4
.text
||	 stw.d2 b10, *b15--	   ;  save b10

||	 mv.l1 a4, a10		   ;  save address of S
||	 stw.d2 a10, *b15--	   ;  save a10

||	 stw.d2 b11, *b15--	   ;  save b11

||	 stw.d2 a11, *b15--	   ;  save a11

||	 stw.d2 b12, *b15--	   ;  save b12

||	 stw.d2 a12, *b15--	   ;  save a12

stw.d2 b13, *b15--	   ;  save b13
||	 zero.l2 b13		   ;  count=0

stw.d2 a13, *b15--	   ;  save a13

stw.d2 b3, *b15--	   ;  save return address
*****************
*  begin loop	*
*****************
aloop	 ldw.d2 *b10++, a6	   ;  l=table[i]
||	 b.s2 _quotient 	   ;  call quotient subroutine

nop 3

||	 mv.l1 a6, a11		   ;  save l
*
||	 mv.l1 a11, a6		   ;  load l
||	 b.s2 _bigprod		   ;  call product subroutine

ldw.d2 *+b11[1], b4	   ;  load V[1]

nop 2

*
bskip	 ldw.d1 *a10, a5	   ;  load S[0]
||	 ldw.d2 *+b12[1], b5	   ;  load X[1]

ldw.d1 *+a10[1], a7	   ;  load S[1]
||	 ldw.d2 *+b12[2], b7	   ;  load X[2]

ldw.d2 *b11, a9	   ;  load V[0]

ldw.d2 *+b11[1], b9	   ;  load V[1]

nop

cmpeq.l1x a5, b5, a1	   ;  compare S[0] to X[1]

cmpeq.l1x a7, b7, a2	   ;  compare S[1] to X[2]

and.l1 a1, a2, a2	   ;  "and" conditions

[!a2] b.s2 yskip		   ;  continue if ((S[0]!=X[1])||(S[1]!=X[2]))
||  [a2] stw.d1 a9, *a10	   ;  S[0]=V[0]
||	 zero.s1 a12		   ;  m=0

[a2] stw.d1 b9, *+a10[1]	   ;  S[1]=V[1]

nop 4
***********************
*  begin inner loop   *
***********************
||	 add.s1 a12, 1, a12	   ;  m=m+1
||	 b.s2 _quotient 	   ;  call quotient subroutine

nop 3

*
cskip	 ldw.d2 *b11, a4	   ;  load V[0]
||	 mv.l1 a11, a6		   ;  load l
||	 b.s2 _bigprod		   ;  call product subroutine

ldw.d2 *+b11[1], b4	   ;  load V[1]

nop 2

*
dskip	 ldw.d1 *a10, a5	   ;  load S[0]
||	 ldw.d2 *+b12[1], b5	   ;  load X[1]

ldw.d1 *+a10[1], a7	   ;  load S[1]
||	 ldw.d2 *+b12[2], b7	   ;  load X[2]

ldw.d2 *b11, a9	   ;  load V[0]

ldw.d2 *+b11[1], b9	   ;  load V[1]

nop

cmpeq.l1x a5, b5, a1	   ;  compare S[0] to X[1]

cmpeq.l1x a7, b7, a2	   ;  compare S[1] to X[2]

and.l1 a1, a2, a2	   ;  "and" conditions

[a2] b.s2 bloop		   ;  loop if ((S[0]==X[1])&&(S[1]==X[2]))
||  [a2] stw.d1 a9, *a10	   ;  S[0]=V[0]

[a2] stw.d1 b9, *+a10[1]	   ;  S[1]=V[1]

nop 4
*********************
*  end inner loop   *
*********************
b.s2 _powquot		   ;  call quotient subroutine
||	 mv.l1 a12, a4		   ;  load m
||	 zero.l2 b4		   ;  zero index

nop 3

*
eskip	 shl.s1 a4, 1, a0	   ;  m*2
||	 ldw.d1 *a10, a5	   ;  load S[0]

add.l1 a4, a0, a4	   ;  m*3
||	 ldw.d1 *+a10[1], a7	   ;  load S[1]

cmpeq.l1 a4, a12, a2	   ;  compare (m/3)*3 to m

[!a2] b.s2 zskip		   ;  if ((m/3)*3!=3) break;
|| [!a2] subab.d2 b13, b13, b13    ;  if ((m/3)*3!=3) count=0
||  [a2] add.l2 b13, 1, b13	   ;  if ((m/3)*3==3) count=count+1

nop

cmpeq.l1 a5, 0, a1	   ;  compare S[0] to 0

cmpeq.l1 a7, 1, a2	   ;  compare S[1] to 1

and.l1 a1, a2, a2	   ;  "and" conditions

[a2] zero.l1 a13		   ;  if ((S[0]==0)&&(S[1]==1)) break;
*
yskip	 mv.l1 a13, a2		   ;  load loop count

[a2] b.s2 aloop
||  [a2] sub.l1 a13, 1, a13	   ;  decrement loop count

nop 5
*****************
*  end loop	*
*****************
zskip	 ldw.d1 *a10, a5	   ;  load S[0]
||	 mv.l1x b13, a4 	   ;  return count

ldw.d1 *+a10[1], a7	   ;  load S[1]

ldw.d2 *++b15, b3	   ;  restore return address

ldw.d2 *++b15, a13	   ;  restore a13

ldw.d2 *++b15, b13	   ;  restore b13

ldw.d2 *++b15, a12	   ;  restore a12
||	 cmpeq.l1 a5, 0, a1	   ;  compare S[0] to 0

ldw.d2 *++b15, b12	   ;  restore b12
||	 cmpeq.l1 a7, 1, a2	   ;  compare S[1] to 1

ldw.d2 *++b15, a11	   ;  restore a11
||	 and.l1 a1, a2, a2	   ;  "and" conditions

ldw.d2 *++b15, b11	   ;  restore b11
|| [!a2] zero.l1 a4		   ;  if ((S[0]!=0)||(S[1]!=1)) count=0

ldw.d2 *++b15, a10	   ;  restore a10
||	 b.s2 b3		   ;  return

ldw.d2 *++b15, b10	   ;  restore b10

nop 4
.end
*******************************************************************************
*									      *
*  SEARCH								      *
*  09/25/05 (dkc)							      *
*									      *
*******************************************************************************
.global _search
.global _quotient, _bigprod, _powquot, _table3
.bss T, 2*4, 4
.bss V, 2*4, 4
.bss X, 3*4, 4
.text
||	 stw.d2 b10, *b15--	   ;  save b10

||	 mv.l1 a4, a10		   ;  save address of S
||	 stw.d2 a10, *b15--	   ;  save a10

||	 stw.d2 b11, *b15--	   ;  save b11

||	 stw.d2 a11, *b15--	   ;  save a11

||	 stw.d2 b12, *b15--	   ;  save b12

||	 stw.d2 a12, *b15--	   ;  save a12

stw.d2 b13, *b15--	   ;  save b13
||	 zero.l2 b13		   ;  count=0

stw.d2 a13, *b15--	   ;  save a13

stw.d2 b3, *b15--	   ;  save return address
*****************
*  begin loop	*
*****************
aloop	 ldhu.d2 *b10++, a6	   ;  l=table[i]
||	 b.s2 _quotient 	   ;  call quotient subroutine

nop 3

||	 mv.l1 a6, a11		   ;  save l
*
||	 mv.l1 a11, a6		   ;  load l
||	 b.s2 _bigprod		   ;  call product subroutine

ldw.d2 *+b11[1], b4	   ;  load V[1]

nop 2

*
bskip	 ldw.d1 *a10, a5	   ;  load S[0]
||	 ldw.d2 *+b12[1], b5	   ;  load X[1]

ldw.d1 *+a10[1], a7	   ;  load S[1]
||	 ldw.d2 *+b12[2], b7	   ;  load X[2]

ldw.d2 *b11, a9	   ;  load V[0]

ldw.d2 *+b11[1], b9	   ;  load V[1]

nop

cmpeq.l1x a5, b5, a1	   ;  compare S[0] to X[1]

cmpeq.l1x a7, b7, a2	   ;  compare S[1] to X[2]

and.l1 a1, a2, a2	   ;  "and" conditions

[!a2] b.s2 yskip		   ;  continue if ((S[0]!=X[1])||(S[1]!=X[2]))
||  [a2] stw.d1 a9, *a10	   ;  S[0]=V[0]
||	 zero.s1 a12		   ;  m=0

[a2] stw.d1 b9, *+a10[1]	   ;  S[1]=V[1]

nop 4
***********************
*  begin inner loop   *
***********************
||	 add.s1 a12, 1, a12	   ;  m=m+1
||	 b.s2 _quotient 	   ;  call quotient subroutine

nop 3

*
cskip	 ldw.d2 *b11, a4	   ;  load V[0]
||	 mv.l1 a11, a6		   ;  load l
||	 b.s2 _bigprod		   ;  call product subroutine

ldw.d2 *+b11[1], b4	   ;  load V[1]

nop 2

*
dskip	 ldw.d1 *a10, a5	   ;  load S[0]
||	 ldw.d2 *+b12[1], b5	   ;  load X[1]

ldw.d1 *+a10[1], a7	   ;  load S[1]
||	 ldw.d2 *+b12[2], b7	   ;  load X[2]

ldw.d2 *b11, a9	   ;  load V[0]

ldw.d2 *+b11[1], b9	   ;  load V[1]

nop

cmpeq.l1x a5, b5, a1	   ;  compare S[0] to X[1]

cmpeq.l1x a7, b7, a2	   ;  compare S[1] to X[2]

and.l1 a1, a2, a2	   ;  "and" conditions

[a2] b.s2 bloop		   ;  loop if ((S[0]==X[1])&&(S[1]==X[2]))
||  [a2] stw.d1 a9, *a10	   ;  S[0]=V[0]

[a2] stw.d1 b9, *+a10[1]	   ;  S[1]=V[1]

nop 4
*********************
*  end inner loop   *
*********************
b.s2 _powquot		   ;  call quotient subroutine
||	 mv.l1 a12, a4		   ;  load m
||	 zero.l2 b4		   ;  zero index

nop 3

*
eskip	 shl.s1 a4, 1, a0	   ;  m*2
||	 ldw.d1 *a10, a5	   ;  load S[0]

add.l1 a4, a0, a4	   ;  m*3
||	 ldw.d1 *+a10[1], a7	   ;  load S[1]

cmpeq.l1 a4, a12, a2	   ;  compare (m/3)*3 to m

[!a2] b.s2 zskip		   ;  if ((m/3)*3!=3) break;
|| [!a2] subab.d2 b13, b13, b13    ;  if ((m/3)*3!=3) count=0
||  [a2] add.l2 b13, 1, b13	   ;  if ((m/3)*3==3) count=count+1

nop

cmpeq.l1 a5, 0, a1	   ;  compare S[0] to 0

cmpeq.l1 a7, 1, a2	   ;  compare S[1] to 1

and.l1 a1, a2, a2	   ;  "and" conditions

[a2] zero.l1 a13		   ;  if ((S[0]==0)&&(S[1]==1)) break;
*
yskip	 mv.l1 a13, a2		   ;  load loop count

[a2] b.s2 aloop
||  [a2] sub.l1 a13, 1, a13	   ;  decrement loop count

nop 5
*****************
*  end loop	*
*****************
zskip	 ldw.d1 *a10, a5	   ;  load S[0]
||	 mv.l1x b13, a4 	   ;  return count

ldw.d1 *+a10[1], a7	   ;  load S[1]

ldw.d2 *++b15, b3	   ;  restore return address

ldw.d2 *++b15, a13	   ;  restore a13

ldw.d2 *++b15, b13	   ;  restore b13

ldw.d2 *++b15, a12	   ;  restore a12
||	 cmpeq.l1 a5, 0, a1	   ;  compare S[0] to 0

ldw.d2 *++b15, b12	   ;  restore b12
||	 cmpeq.l1 a7, 1, a2	   ;  compare S[1] to 1

ldw.d2 *++b15, a11	   ;  restore a11
||	 and.l1 a1, a2, a2	   ;  "and" conditions

ldw.d2 *++b15, b11	   ;  restore b11
|| [!a2] zero.l1 a4		   ;  if ((S[0]!=0)||(S[1]!=1)) count=0

ldw.d2 *++b15, a10	   ;  restore a10
||	 b.s2 b3		   ;  return

ldw.d2 *++b15, b10	   ;  restore b10

nop 4
.end
*******************************************************************************
*									      *
*  LEFT SHIFT BY 16							      *
*  12/07/00 (dkc)							      *
*									      *
*     d[0] => a4							      *
*     d[1] => b4							      *
*     d{2] => a6							      *
*     d[3] => b6							      *
*     address of output => a8						      *
*									      *
*******************************************************************************
.global _shift16
.text
_shift16:
shl.s1 a4, 16, a4
||	shru.s2 b4, 16, b0

b.s2 b3

or.l1x a4, b0, a4
||	shl.s2 b4, 16, b4
||	shru.s1 a6, 16, a0

or.l2x b4, a0, b4
||	shl.s1 a6, 16, a6
||	shru.s2 b6, 16, b0
||	stw.d1 a4, *a8

or.l1x a6, b0, a6
||	shl.s2 b6, 16, b6
||	stw.d1 b4, *+a8[1]

stw.d1 a6, *+a8[2]

stw.d1 b6, *+a8[3]
.end
*******************************************************************************
*									      *
*  COMPUTE SUMS 							      *
*  02/01/99 (dkc)							      *
*									      *
*     address of augend (A[0], A[1]) => a4				      *
*									      *
*******************************************************************************
.global _sum
.text
_sum	ldw.d1 *+a4[1], a0	  ;  load A[1]
||	ldw.d2 *+b4[1], b0	  ;  load B[1]

ldw.d1 *a4, a6		  ;  load A[0]
||	ldw.d2 *b4, b6		  ;  load B[0]

nop 2

b.s2 b3 		  ;  return

mv.l1x b0, a9		  ;  load B[1]
||	zero.s1 a1		  ;  load 0

addu.l1 a1:a0, a9, a1:a0  ;  A[1] + B[1]
||	add.l2x b6, a6, b6	  ;  A[0] + B[0] - carry

and.l1 a1, 1, a1	  ;  isolate carry bit
||	stw.d2 a0, *+b4[1]	  ;  store A[1]+B[1]

add.l2x b6, a1, b6	  ;  A[0] + B[0]

stw.d2 b6, *b4		  ;  store A[0]+B[0]
.end
*******************************************************************************
*									      *
*  PRIME LOOK-UP TABLE							      *
*  01/30/99 (dkc)							      *
*									      *
*  This table consists of primes of the form p*k+1 for p=3.  The least	      *
*  residue table is modulus p**2.					      *
*									      *
*******************************************************************************
.global _table3, _output, _error, _residue, _count, _recip, _recip1
.global _d, _e
.bss _error, 10*4, 4
.bss _count, 4, 4
.bss _d, 4, 4
.bss _e, 4, 4
.bss _output, 2001*3*4, 4
.data
.align 2
_residue .half 1
.half 8
.align 4
_recip:
.word 0x55555556, 0x00000003
.word 0x33333334, 0x00000005
.word 0x24924925, 0x00000007
.word 0x1745d175, 0x0000000b
.word 0x13b13b14, 0x0000000d
.word 0x0f0f0f10, 0x00000011
.word 0x0d79435f, 0x00000013
.word 0x0b21642d, 0x00000017
.word 0x08d3dcb1, 0x0000001d
.word 0x08421085, 0x0000001f

_recip1:
.word 0x55555556   ; 1/3
.word 0x1c71c71d   ; 1/9
.word 0x2aaaaaab   ; 1/6

.align 2
_table3:
.half 7
.half 13
.half 19
.half 31
.half 37
.half 43
.half 61
.half 67
.half 73
.half 79
.half 97
.half 103
.half 109
.half 127
.half 139
.half 151
.half 157
.half 163
.half 181
.half 193
.half 199
.half 211
.half 223
.half 229
.half 241
.half 271
.half 277
.half 283
.half 307
.half 313
.half 331
.half 337
.half 349
.half 367
.half 373
.half 379
.half 397
.half 409
.half 421
.half 433
.half 439
.half 457
.half 463
.half 487
.half 499
.half 523
.half 541
.half 547
.half 571
.half 577
.half 601
.half 607
.half 613
.half 619
.half 631
.half 643
.half 661
.half 673
.half 691
.half 709
.half 727
.half 733
.half 739
.half 751
.half 757
.half 769
.half 787
.half 811
.half 823
.half 829
.half 853
.half 859
.half 877
.half 883
.half 907
.half 919
.half 937
.half 967
.half 991
.half 997
.half 1009
.half 1021
.half 1033
.half 1039
.half 1051
.half 1063
.half 1069
.half 1087
.half 1093
.half 1117
.half 1123
.half 1129
.half 1153
.half 1171
.half 1201
.half 1213
.half 1231
.half 1237
.half 1249
.half 1279
.half 1291
.half 1297
.half 1303
.half 1321
.half 1327
.half 1381
.half 1399
.half 1423
.half 1429
.half 1447
.half 1453
.half 1459
.half 1471
.half 1483
.half 1489
.half 1531
.half 1543
.half 1549
.half 1567
.half 1579
.half 1597
.half 1609
.half 1621
.half 1627
.half 1657
.half 1663
.half 1669
.half 1693
.half 1699
.half 1723
.half 1741
.half 1747
.half 1753
.half 1759
.half 1777
.half 1783
.half 1789
.half 1801
.half 1831
.half 1861
.half 1867
.half 1873
.half 1879
.half 1933
.half 1951
.half 1987
.half 1993
.half 1999
.half 2011
.half 2017
.half 2029
.half 2053
.half 2083
.half 2089
.half 2113
.half 2131
.half 2137
.half 2143
.half 2161
.half 2179
.half 2203
.half 2221
.half 2239
.half 2251
.half 2269
.half 2281
.half 2287
.half 2293
.half 2311
.half 2341
.half 2347
.half 2371
.half 2377
.half 2383
.half 2389
.half 2437
.half 2467
.half 2473
.half 2503
.half 2521
.half 2539
.half 2551
.half 2557
.half 2593
.half 2617
.half 2647
.half 2659
.half 2671
.half 2677
.half 2683
.half 2689
.half 2707
.half 2713
.half 2719
.half 2731
.half 2749
.half 2767
.half 2791
.half 2797
.half 2803
.half 2833
.half 2851
.half 2857
.half 2887
.half 2917
.half 2953
.half 2971
.half 3001
.half 3019
.half 3037
.half 3049
.half 3061
.half 3067
.half 3079
.half 3109
.half 3121
.half 3163
.half 3169
.half 3181
.half 3187
.half 3217
.half 3229
.half 3253
.half 3259
.half 3271
.half 3301
.half 3307
.half 3313
.half 3319
.half 3331
.half 3343
.half 3361
.half 3373
.half 3391
.half 3433
.half 3457
.half 3463
.half 3469
.half 3499
.half 3511
.half 3517
.half 3529
.half 3541
.half 3547
.half 3559
.half 3571
.half 3583
.half 3607
.half 3613
.half 3631
.half 3637
.half 3643
.half 3673
.half 3691
.half 3697
.half 3709
.half 3727
.half 3733
.half 3739
.half 3769
.half 3793
.half 3823
.half 3847
.half 3853
.half 3877
.half 3889
.half 3907
.half 3919
.half 3931
.half 3943
.half 3967
.half 4003
.half 4021
.half 4027
.half 4051
.half 4057
.half 4093
.half 4099
.half 4111
.half 4129
.half 4153
.half 4159
.half 4177
.half 4201
.half 4219
.half 4231
.half 4243
.half 4261
.half 4273
.half 4297
.half 4327
.half 4339
.half 4357
.half 4363
.half 4423
.half 4441
.half 4447
.half 4483
.half 4507
.half 4513
.half 4519
.half 4549
.half 4561
.half 4567
.half 4591
.half 4597
.half 4603
.half 4621
.half 4639
.half 4651
.half 4657
.half 4663
.half 4723
.half 4729
.half 4759
.half 4783
.half 4789
.half 4801
.half 4813
.half 4831
.half 4861
.half 4903
.half 4909
.half 4933
.half 4951
.half 4957
.half 4969
.half 4987
.half 4993
.half 4999
.half 5011
.half 5023
.half 5059
.half 5077
.half 5101
.half 5107
.half 5113
.half 5119
.half 5167
.half 5179
.half 5197
.half 5209
.half 5227
.half 5233
.half 5281
.half 5323
.half 5347
.half 5407
.half 5413
.half 5419
.half 5431
.half 5437
.half 5443
.half 5449
.half 5479
.half 5503
.half 5521
.half 5527
.half 5557
.half 5563
.half 5569
.half 5581
.half 5623
.half 5641
.half 5647
.half 5653
.half 5659
.half 5683
.half 5689
.half 5701
.half 5737
.half 5743
.half 5749
.half 5779
.half 5791
.half 5821
.half 5827
.half 5839
.half 5851
.half 5857
.half 5869
.half 5881
.half 5923
.half 5953
.half 6007
.half 6037
.half 6043
.half 6067
.half 6073
.half 6079
.half 6091
.half 6121
.half 6133
.half 6151
.half 6163
.half 6199
.half 6211
.half 6217
.half 6229
.half 6247
.half 6271
.half 6277
.half 6301
.half 6337
.half 6343
.half 6361
.half 6367
.half 6373
.half 6379
.half 6397
.half 6421
.half 6427
.half 6451
.half 6469
.half 6481
.half 6529
.half 6547
.half 6553
.half 6571
.half 6577
.half 6607
.half 6619
.half 6637
.half 6661
.half 6673
.half 6679
.half 6691
.half 6703
.half 6709
.half 6733
.half 6763
.half 6781
.half 6793
.half 6823
.half 6829
.half 6841
.half 6871
.half 6883
.half 6907
.half 6949
.half 6961
.half 6967
.half 6991
.half 6997
.half 7027
.half 7039
.half 7057
.half 7069
.half 7129
.half 7159
.half 7177
.half 7207
.half 7213
.half 7219
.half 7237
.half 7243
.half 7297
.half 7309
.half 7321
.half 7333
.half 7351
.half 7369
.half 7393
.half 7411
.half 7417
.half 7459
.half 7477
.half 7489
.half 7507
.half 7537
.half 7549
.half 7561
.half 7573
.half 7591
.half 7603
.half 7621
.half 7639
.half 7669
.half 7681
.half 7687
.half 7699
.half 7717
.half 7723
.half 7741
.half 7753
.half 7759
.half 7789
.half 7867
.half 7873
.half 7879
.half 7927
.half 7933
.half 7951
.half 7963
.half 7993
.half 8011
.half 8017
.half 8053
.half 8059
.half 8089
.half 8101
.half 8161
.half 8167
.half 8179
.half 8191
.half 8209
.half 8221
.half 8233
.half 8263
.half 8269
.half 8287
.half 8293
.half 8311
.half 8317
.half 8329
.half 8353
.half 8377
.half 8389
.half 8419
.half 8431
.half 8443
.half 8461
.half 8467
.half 8521
.half 8527
.half 8539
.half 8563
.half 8581
.half 8599
.half 8623
.half 8629
.half 8641
.half 8647
.half 8677
.half 8689
.half 8707
.half 8713
.half 8719
.half 8731
.half 8737
.half 8761
.half 8779
.half 8803
.half 8821
.half 8839
.half 8863
.half 8887
.half 8893
.half 8923
.half 8929
.half 8941
.half 8971
.half 9001
.half 9007
.half 9013
.half 9043
.half 9049
.half 9067
.half 9091
.half 9103
.half 9109
.half 9127
.half 9133
.half 9151
.half 9157
.half 9181
.half 9187
.half 9199
.half 9241
.half 9277
.half 9283
.half 9319
.half 9337
.half 9343
.half 9349
.half 9391
.half 9397
.half 9403
.half 9421
.half 9433
.half 9439
.half 9463
.half 9511
.half 9547
.half 9601
.half 9613
.half 9619
.half 9631
.half 9643
.half 9649
.half 9661
.half 9679
.half 9697
.half 9721
.half 9733
.half 9739
.half 9769
.half 9781
.half 9787
.half 9811
.half 9817
.half 9829
.half 9859
.half 9871
.half 9883
.half 9901
.half 9907
.half 9931
.half 9949
.half 9967
.half 9973
.half 10009
.half 10039
.half 10069
.half 10093
.half 10099
.half 10111
.half 10141
.half 10159
.half 10177
.half 10243
.half 10267
.half 10273
.half 10303
.half 10321
.half 10333
.half 10357
.half 10369
.half 10399
.half 10429
.half 10453
.half 10459
.half 10477
.half 10501
.half 10513
.half 10531
.half 10567
.half 10597
.half 10627
.half 10639
.half 10651
.half 10657
.half 10663
.half 10687
.half 10711
.half 10723
.half 10729
.half 10753
.half 10771
.half 10789
.half 10831
.half 10837
.half 10861
.half 10867
.half 10891
.half 10903
.half 10909
.half 10939
.half 10957
.half 10987
.half 10993
.half 11047
.half 11059
.half 11071
.half 11083
.half 11113
.half 11119
.half 11131
.half 11149
.half 11161
.half 11173
.half 11197
.half 11239
.half 11251
.half 11257
.half 11287
.half 11299
.half 11311
.half 11317
.half 11329
.half 11353
.half 11383
.half 11437
.half 11443
.half 11467
.half 11491
.half 11497
.half 11503
.half 11527
.half 11551
.half 11587
.half 11593
.half 11617
.half 11677
.half 11689
.half 11701
.half 11719
.half 11731
.half 11743
.half 11779
.half 11821
.half 11827
.half 11833
.half 11839
.half 11863
.half 11887
.half 11923
.half 11941
.half 11953
.half 11959
.half 11971
.half 12007
.half 12037
.half 12043
.half 12049
.half 12073
.half 12097
.half 12109
.half 12157
.half 12163
.half 12211
.half 12241
.half 12253
.half 12277
.half 12289
.half 12301
.half 12343
.half 12373
.half 12379
.half 12391
.half 12409
.half 12421
.half 12433
.half 12451
.half 12457
.half 12487
.half 12511
.half 12517
.half 12541
.half 12547
.half 12553
.half 12577
.half 12583
.half 12589
.half 12601
.half 12613
.half 12619
.half 12637
.half 12697
.half 12703
.half 12721
.half 12739
.half 12757
.half 12763
.half 12781
.half 12799
.half 12823
.half 12829
.half 12841
.half 12853
.half 12889
.half 12907
.half 12919
.half 12967
.half 12973
.half 12979
.half 13003
.half 13009
.half 13033
.half 13063
.half 13093
.half 13099
.half 13147
.half 13159
.half 13171
.half 13177
.half 13183
.half 13219
.half 13249
.half 13267
.half 13291
.half 13297
.half 13309
.half 13327
.half 13339
.half 13381
.half 13399
.half 13411
.half 13417
.half 13441
.half 13477
.half 13513
.half 13537
.half 13567
.half 13591
.half 13597
.half 13627
.half 13633
.half 13669
.half 13681
.half 13687
.half 13693
.half 13711
.half 13723
.half 13729
.half 13759
.half 13789
.half 13807
.half 13831
.half 13873
.half 13879
.half 13903
.half 13921
.half 13933
.half 13963
.half 13999
.half 14011
.half 14029
.half 14071
.half 14083
.half 14107
.half 14143
.half 14149
.half 14173
.half 14197
.half 14221
.half 14251
.half 14281
.half 14293
.half 14323
.half 14341
.half 14347
.half 14389
.half 14401
.half 14407
.half 14419
.half 14431
.half 14437
.half 14449
.half 14461
.half 14479
.half 14503
.half 14533
.half 14551
.half 14557
.half 14563
.half 14593
.half 14629
.half 14653
.half 14683
.half 14713
.half 14731
.half 14737
.half 14767
.half 14779
.half 14797
.half 14821
.half 14827
.half 14851
.half 14869
.half 14887
.half 14923
.half 14929
.half 14947
.half 14983
.half 15013
.half 15031
.half 15061
.half 15073
.half 15091
.half 15121
.half 15139
.half 15187
.half 15193
.half 15199
.half 15217
.half 15241
.half 15259
.half 15271
.half 15277
.half 15289
.half 15307
.half 15313
.half 15319
.half 15331
.half 15349
.half 15361
.half 15373
.half 15391
.half 15427
.half 15439
.half 15451
.half 15493
.half 15511
.half 15541
.half 15559
.half 15583
.half 15601
.half 15607
.half 15619
.half 15643
.half 15649
.half 15661
.half 15667
.half 15679
.half 15727
.half 15733
.half 15739
.half 15787
.half 15817
.half 15823
.half 15859
.half 15877
.half 15889
.half 15901
.half 15907
.half 15913
.half 15919
.half 15937
.half 15973
.half 15991
.half 16033
.half 16057
.half 16063
.half 16069
.half 16087
.half 16111
.half 16141
.half 16183
.half 16189
.half 16231
.half 16249
.half 16267
.half 16273
.half 16333
.half 16339
.half 16363
.half 16369
.half 16381
.half 16411
.half 16417
.half 16447
.half 16453
.half 16477
.half 16519
.half 16561
.half 16567
.half 16573
.half 16603
.half 16633
.half 16651
.half 16657
.half 16693
.half 16699
.half 16729
.half 16741
.half 16747
.half 16759
.half 16831
.half 16843
.half 16879
.half 16903
.half 16921
.half 16927
.half 16963
.half 16981
.half 16987
.half 16993
.half 17011
.half 17029
.half 17041
.half 17047
.half 17053
.half 17077
.half 17107
.half 17137
.half 17167
.half 17191
.half 17203
.half 17209
.half 17239
.half 17257
.half 17293
.half 17299
.half 17317
.half 17341
.half 17359
.half 17377
.half 17383
.half 17389
.half 17401
.half 17419
.half 17431
.half 17443
.half 17449
.half 17467
.half 17491
.half 17497
.half 17509
.half 17539
.half 17551
.half 17569
.half 17581
.half 17599
.half 17623
.half 17659
.half 17683
.half 17707
.half 17713
.half 17737
.half 17749
.half 17761
.half 17791
.half 17827
.half 17839
.half 17851
.half 17863
.half 17881
.half 17911
.half 17923
.half 17929
.half 17959
.half 17971
.half 17977
.half 17989
.half 18013
.half 18043
.half 18049
.half 18061
.half 18097
.half 18121
.half 18127
.half 18133
.half 18169
.half 18181
.half 18199
.half 18211
.half 18217
.half 18223
.half 18229
.half 18253
.half 18289
.half 18301
.half 18307
.half 18313
.half 18367
.half 18379
.half 18397
.half 18427
.half 18433
.half 18439
.half 18451
.half 18457
.half 18481
.half 18493
.half 18517
.half 18523
.half 18541
.half 18553
.half 18583
.half 18637
.half 18661
.half 18679
.half 18691
.half 18757
.half 18787
.half 18793
.half 18859
.half 18913
.half 18919
.half 18973
.half 18979
.half 19009
.half 19051
.half 19069
.half 19081
.half 19087
.half 19141
.half 19183
.half 19207
.half 19213
.half 19219
.half 19231
.half 19237
.half 19249
.half 19267
.half 19273
.half 19309
.half 19333
.half 19381
.half 19387
.half 19417
.half 19423
.half 19429
.half 19441
.half 19447
.half 19471
.half 19477
.half 19483
.half 19489
.half 19501
.half 19507
.half 19531
.half 19543
.half 19597
.half 19603
.half 19609
.half 19681
.half 19687
.half 19699
.half 19717
.half 19753
.half 19759
.half 19777
.half 19801
.half 19813
.half 19819
.half 19843
.half 19861
.half 19867
.half 19891
.half 19927
.half 19963
.half 19993
.half 20011
.half 20023
.half 20029
.half 20047
.half 20071
.half 20089
.half 20101
.half 20107
.half 20113
.half 20143
.half 20149
.half 20161
.half 20173
.half 20233
.half 20269
.half 20287
.half 20323
.half 20341
.half 20347
.half 20353
.half 20359
.half 20389
.half 20407
.half 20431
.half 20443
.half 20479
.half 20509
.half 20521
.half 20533
.half 20551
.half 20563
.half 20593
.half 20599
.half 20611
.half 20641
.half 20707
.half 20719
.half 20731
.half 20743
.half 20749
.half 20773
.half 20809
.half 20857
.half 20887
.half 20899
.half 20929
.half 20947
.half 20959
.half 20983
.half 21001
.half 21013
.half 21019
.half 21031
.half 21061
.half 21067
.half 21121
.half 21139
.half 21157
.half 21163
.half 21169
.half 21187
.half 21193
.half 21211
.half 21247
.half 21277
.half 21283
.half 21313
.half 21319
.half 21379
.half 21391
.half 21397
.half 21433
.half 21481
.half 21487
.half 21493
.half 21499
.half 21517
.half 21523
.half 21529
.half 21559
.half 21577
.half 21589
.half 21601
.half 21613
.half 21649
.half 21661
.half 21673
.half 21727
.half 21739
.half 21751
.half 21757
.half 21787
.half 21799
.half 21817
.half 21841
.half 21859
.half 21871
.half 21937
.half 21943
.half 21961
.half 21991
.half 21997
.half 22003
.half 22027
.half 22039
.half 22051
.half 22063
.half 22093
.half 22111
.half 22123
.half 22129
.half 22147
.half 22153
.half 22159
.half 22171
.half 22189
.half 22273
.half 22279
.half 22291
.half 22303
.half 22369
.half 22381
.half 22441
.half 22447
.half 22453
.half 22483
.half 22501
.half 22531
.half 22543
.half 22549
.half 22567
.half 22573
.half 22621
.half 22639
.half 22651
.half 22669
.half 22699
.half 22717
.half 22741
.half 22777
.half 22783
.half 22807
.half 22861
.half 22921
.half 22963
.half 22993
.half 23011
.half 23017
.half 23029
.half 23041
.half 23053
.half 23059
.half 23071
.half 23131
.half 23143
.half 23167
.half 23173
.half 23197
.half 23203
.half 23209
.half 23227
.half 23251
.half 23269
.half 23293
.half 23311
.half 23371
.half 23431
.half 23473
.half 23497
.half 23509
.half 23539
.half 23557
.half 23563
.half 23581
.half 23593
.half 23599
.half 23623
.half 23629
.half 23671
.half 23677
.half 23689
.half 23719
.half 23743
.half 23761
.half 23767
.half 23773
.half 23827
.half 23833
.half 23857
.half 23869
.half 23887
.half 23893
.half 23899
.half 23911
.half 23917
.half 23929
.half 23971
.half 23977
.half 24001
.half 24007
.half 24019
.half 24043
.half 24049
.half 24061
.half 24091
.half 24097
.half 24103
.half 24109
.half 24121
.half 24133
.half 24151
.half 24169
.half 24181
.half 24223
.half 24229
.half 24247
.half 24337
.half 24373
.half 24379
.half 24391
.half 24421
.half 24439
.half 24469
.half 24481
.half 24499
.half 24517
.half 24547
.half 24571
.half 24631
.half 24691
.half 24697
.half 24709
.half 24733
.half 24763
.half 24781
.half 24793
.half 24799
.half 24841
.half 24847
.half 24859
.half 24877
.half 24889
.half 24907
.half 24919
.half 24943
.half 24967
.half 24979
.half 25033
.half 25057
.half 25087
.half 25111
.half 25117
.half 25147
.half 25153
.half 25171
.half 25183
.half 25189
.half 25219
.half 25237
.half 25243
.half 25261
.half 25303
.half 25309
.half 25321
.half 25339
.half 25357
.half 25411
.half 25423
.half 25447
.half 25453
.half 25471
.half 25537
.half 25561
.half 25579
.half 25603
.half 25609
.half 25621
.half 25633
.half 25639
.half 25657
.half 25693
.half 25717
.half 25741
.half 25747
.half 25759
.half 25771
.half 25801
.half 25819
.half 25849
.half 25867
.half 25873
.half 25903
.half 25933
.half 25939
.half 25951
.half 25969
.half 25981
.half 25999
.half 26017
.half 26029
.half 26041
.half 26053
.half 26083
.half 26107
.half 26113
.half 26119
.half 26161
.half 26203
.half 26209
.half 26227
.half 26251
.half 26263
.half 26293
.half 26317
.half 26347
.half 26371
.half 26407
.half 26431
.half 26437
.half 26449
.half 26479
.half 26497
.half 26539
.half 26557
.half 26641
.half 26647
.half 26683
.half 26701
.half 26713
.half 26731
.half 26737
.half 26821
.half 26833
.half 26839
.half 26863
.half 26881
.half 26893
.half 26947
.half 26953
.half 26959
.half 27031
.half 27043
.half 27061
.half 27067
.half 27073
.half 27091
.half 27103
.half 27109
.half 27127
.half 27211
.half 27241
.half 27253
.half 27259
.half 27271
.half 27277
.half 27283
.half 27337
.half 27361
.half 27367
.half 27397
.half 27409
.half 27427
.half 27457
.half 27481
.half 27487
.half 27529
.half 27541
.half 27583
.half 27631
.half 27673
.half 27691
.half 27697
.half 27733
.half 27739
.half 27751
.half 27763
.half 27793
.half 27799
.half 27817
.half 27823
.half 27847
.half 27883
.half 27901
.half 27919
.half 27943
.half 27961
.half 27967
.half 27997
.half 28027
.half 28051
.half 28057
.half 28069
.half 28081
.half 28087
.half 28099
.half 28111
.half 28123
.half 28183
.half 28201
.half 28219
.half 28279
.half 28297
.half 28309
.half 28351
.half 28387
.half 28393
.half 28411
.half 28429
.half 28447
.half 28477
.half 28513
.half 28537
.half 28549
.half 28573
.half 28579
.half 28591
.half 28597
.half 28603
.half 28621
.half 28627
.half 28657
.half 28663
.half 28669
.half 28687
.half 28711
.half 28723
.half 28729
.half 28753
.half 28759
.half 28771
.half 28789
.half 28807
.half 28813
.half 28837
.half 28843
.half 28867
.half 28879
.half 28909
.half 28921
.half 28927
.half 28933
.half 29017
.half 29023
.half 29059
.half 29077
.half 29101
.half 29131
.half 29137
.half 29167
.half 29173
.half 29179
.half 29191
.half 29209
.half 29221
.half 29251
.half 29269
.half 29287
.half 29311
.half 29347
.half 29383
.half 29389
.half 29401
.half 29437
.half 29443
.half 29473
.half 29527
.half 29569
.half 29581
.half 29587
.half 29599
.half 29611
.half 29629
.half 29641
.half 29671
.half 29683
.half 29761
.half 29803
.half 29833
.half 29851
.half 29863
.half 29881
.half 29917
.half 29947
.half 29959
.half 29983
.half 29989
.half 30013
.half 30091
.half 30097
.half 30103
.half 30109
.half 30133
.half 30139
.half 30169
.half 30181
.half 30187
.half 30211
.half 30223
.half 30241
.half 30253
.half 30259
.half 30271
.half 30307
.half 30313
.half 30319
.half 30367
.half 30391
.half 30403
.half 30427
.half 30469
.half 30493
.half 30517
.half 30529
.half 30553
.half 30559
.half 30577
.half 30631
.half 30637
.half 30643
.half 30649
.half 30661
.half 30697
.half 30703
.half 30727
.half 30757
.half 30763
.half 30781
.half 30817
.half 30829
.half 30841
.half 30853
.half 30859
.half 30871
.half 30931
.half 30937
.half 30949
.half 31033
.half 31039
.half 31051
.half 31063
.half 31069
.half 31081
.half 31123
.half 31147
.half 31153
.half 31159
.half 31177
.half 31183
.half 31189
.half 31219
.half 31231
.half 31237
.half 31249
.half 31267
.half 31321
.half 31327
.half 31333
.half 31357
.half 31387
.half 31393
.half 31477
.half 31489
.half 31513
.half 31531
.half 31543
.half 31567
.half 31573
.half 31627
.half 31657
.half 31663
.half 31687
.half 31699
.half 31723
.half 31729
.half 31741
.half 31771
.half 31849
.half 31873
.half 31891
.half 31957
.half 31963
.half 31981
.half 32029
.half 32059
.half 32077
.half 32083
.half 32089
.half 32119
.half 32143
.half 32173
.half 32191
.half 32203
.half 32233
.half 32251
.half 32257
.half 32299
.half 32323
.half 32341
.half 32353
.half 32359
.half 32371
.half 32377
.half 32401
.half 32413
.half 32443
.half 32467
.half 32479
.half 32491
.half 32497
.half 32503
.half 32533
.half 32563
.half 32569
.half 32587
.half 32611
.half 32647
.half 32653
.half 32707
.half 32713
.half 32719
.half 32749
.half 32779
.half 32797
.half 32803
.half 32833
.half 32839
.half 32869
.half 32887
.half 32911
.half 32917
.half 32941
.half 32971
.half 32983
.half 33013
.half 33037
.half 33049
.half 33073
.half 33091
.half 33151
.half 33181
.half 33199
.half 33211
.half 33223
.half 33247
.half 33289
.half 33301
.half 33331
.half 33343
.half 33349
.half 33391
.half 33403
.half 33409
.half 33427
.half 33457
.half 33469
.half 33487
.half 33493
.half 33529
.half 33547
.half 33577
.half 33589
.half 33601
.half 33613
.half 33619
.half 33637
.half 33679
.half 33703
.half 33721
.half 33739
.half 33751
.half 33757
.half 33769
.half 33811
.half 33829
.half 33871
.half 33889
.half 33931
.half 33937
.half 33961
.half 33967
.half 33997
.half 34033
.half 34039
.half 34057
.half 34123
.half 34129
.half 34141
.half 34147
.half 34159
.half 34171
.half 34183
.half 34213
.half 34231
.half 34261
.half 34267
.half 34273
.half 34297
.half 34303
.half 34327
.half 34351
.half 34369
.half 34381
.half 34429
.half 34471
.half 34483
.half 34501
.half 34513
.half 34519
.half 34537
.half 34543
.half 34549
.half 34591
.half 34603
.half 34651
.half 34687
.half 34693
.half 34729
.half 34747
.half 34759
.half 34807
.half 34819
.half 34843
.half 34849
.half 34897
.half 34939
.half 34963
.half 34981
.half 35023
.half 35053
.half 35059
.half 35083
.half 35089
.half 35107
.half 35149
.half 35221
.half 35227
.half 35251
.half 35257
.half 35281
.half 35311
.half 35317
.half 35323
.half 35353
.half 35401
.half 35407
.half 35419
.half 35437
.half 35449
.half 35461
.half 35491
.half 35509
.half 35521
.half 35527
.half 35533
.half 35569
.half 35593
.half 35617
.half 35671
.half 35677
.half 35731
.half 35797
.half 35803
.half 35809
.half 35839
.half 35851
.half 35863
.half 35869
.half 35899
.half 35911
.half 35923
.half 35977
.half 35983
.half 36007
.half 36013
.half 36037
.half 36061
.half 36067
.half 36073
.half 36097
.half 36109
.half 36151
.half 36187
.half 36217
.half 36229
.half 36241
.half 36277
.half 36307
.half 36313
.half 36319
.half 36343
.half 36373
.half 36433
.half 36451
.half 36457
.half 36469
.half 36493
.half 36523
.half 36529
.half 36541
.half 36559
.half 36571
.half 36583
.half 36607
.half 36637
.half 36643
.half 36691
.half 36697
.half 36709
.half 36721
.half 36739
.half 36781
.half 36787
.half 36793
.half 36847
.half 36871
.half 36877
.half 36901
.half 36913
.half 36919
.half 36931
.half 36943
.half 36973
.half 36979
.half 36997
.half 37003
.half 37021
.half 37039
.half 37057
.half 37087
.half 37117
.half 37123
.half 37159
.half 37171
.half 37189
.half 37201
.half 37243
.half 37273
.half 37309
.half 37321
.half 37339
.half 37357
.half 37363
.half 37369
.half 37423
.half 37441
.half 37447
.half 37483
.half 37489
.half 37501
.half 37507
.half 37537
.half 37549
.half 37561
.half 37567
.half 37573
.half 37579
.half 37591
.half 37633
.half 37657
.half 37663
.half 37693
.half 37699
.half 37717
.half 37747
.half 37783
.half 37813
.half 37831
.half 37861
.half 37879
.half 37897
.half 37951
.half 37957
.half 37963
.half 37987
.half 37993
.half 38011
.half 38047
.half 38053
.half 38083
.half 38113
.half 38119
.half 38149
.half 38167
.half 38197
.half 38239
.half 38281
.half 38287
.half 38299
.half 38317
.half 38329
.half 38371
.half 38377
.half 38431
.half 38449
.half 38461
.half 38557
.half 38569
.half 38593
.half 38611
.half 38629
.half 38653
.half 38671
.half 38677
.half 38707
.half 38713
.half 38737
.half 38749
.half 38767
.half 38791
.half 38803
.half 38821
.half 38833
.half 38839
.half 38851
.half 38917
.half 38923
.half 38953
.half 38959
.half 38971
.half 38977
.half 39019
.half 39043
.half 39079
.half 39097
.half 39103
.half 39133
.half 39139
.half 39157
.half 39163
.half 39181
.half 39199
.half 39217
.half 39229
.half 39241
.half 39301
.half 39313
.half 39343
.half 39367
.half 39373
.half 39397
.half 39409
.half 39439
.half 39451
.half 39499
.half 39511
.half 39541
.half 39607
.half 39619
.half 39631
.half 39667
.half 39679
.half 39703
.half 39709
.half 39727
.half 39733
.half 39769
.half 39799
.half 39829
.half 39841
.half 39847
.half 39877
.half 39883
.half 39901
.half 39937
.half 39979
.half 40009
.half 40039
.half 40063
.half 40087
.half 40093
.half 40099
.half 40111
.half 40123
.half 40129
.half 40153
.half 40177
.half 40189
.half 40213
.half 40231
.half 40237
.half 40351
.half 40357
.half 40387
.half 40423
.half 40429
.half 40459
.half 40471
.half 40483
.half 40507
.half 40519
.half 40531
.half 40543
.half 40591
.half 40597
.half 40609
.half 40627
.half 40639
.half 40693
.half 40699
.half 40759
.half 40771
.half 40801
.half 40813
.half 40819
.half 40849
.half 40867
.half 40879
.half 40897
.half 40903
.half 40927
.half 40933
.half 40939
.half 40993
.half 41011
.half 41017
.half 41023
.half 41047
.half 41077
.half 41113
.half 41131
.half 41143
.half 41149
.half 41161
.half 41179
.half 41203
.half 41221
.half 41227
.half 41233
.half 41257
.half 41263
.half 41269
.half 41281
.half 41299
.half 41341
.half 41389
.half 41413
.half 41443
.half 41467
.half 41479
.half 41491
.half 41521
.half 41539
.half 41593
.half 41611
.half 41617
.half 41641
.half 41647
.half 41659
.half 41719
.half 41737
.half 41761
.half 41809
.half 41851
.half 41863
.half 41887
.half 41893
.half 41911
.half 41941
.half 41947
.half 41953
.half 41959
.half 41983
.half 42013
.half 42019
.half 42043
.half 42061
.half 42073
.half 42139
.half 42157
.half 42169
.half 42181
.half 42187
.half 42193
.half 42223
.half 42283
.half 42307
.half 42331
.half 42337
.half 42349
.half 42373
.half 42379
.half 42391
.half 42397
.half 42403
.half 42409
.half 42433
.half 42451
.half 42457
.half 42463
.half 42487
.half 42499
.half 42571
.half 42577
.half 42589
.half 42643
.half 42649
.half 42667
.half 42697
.half 42703
.half 42709
.half 42727
.half 42751
.half 42787
.half 42793
.half 42829
.half 42841
.half 42853
.half 42859
.half 42901
.half 42937
.half 42943
.half 42961
.half 42967
.half 42979
.half 43003
.half 43051
.half 43063
.half 43093
.half 43117
.half 43159
.half 43177
.half 43189
.half 43201
.half 43207
.half 43237
.half 43261
.half 43291
.half 43321
.half 43399
.half 43411
.half 43441
.half 43543
.half 43573
.half 43579
.half 43591
.half 43597
.half 43609
.half 43627
.half 43633
.half 43651
.half 43669
.half 43711
.half 43717
.half 43753
.half 43759
.half 43777
.half 43783
.half 43789
.half 43801
.half 43867
.half 43891
.half 43933
.half 43951
.half 43963
.half 43969
.half 43987
.half 44017
.half 44029
.half 44041
.half 44053
.half 44059
.half 44071
.half 44089
.half 44101
.half 44119
.half 44131
.half 44179
.half 44203
.half 44221
.half 44257
.half 44263
.half 44269
.half 44281
.half 44293
.half 44371
.half 44383
.half 44389
.half 44449
.half 44491
.half 44497
.half 44533
.half 44563
.half 44587
.half 44617
.half 44623
.half 44641
.half 44647
.half 44683
.half 44701
.half 44773
.half 44797
.half 44809
.half 44839
.half 44851
.half 44887
.half 44893
.half 44917
.half 44953
.half 44959
.half 44971
.half 44983
.half 45007
.half 45013
.half 45061
.half 45121
.half 45127
.half 45139
.half 45181
.half 45247
.half 45259
.half 45289
.half 45307
.half 45319
.half 45337
.half 45343
.half 45361
.half 45403
.half 45427
.half 45433
.half 45439
.half 45481
.half 45523
.half 45541
.half 45553
.half 45589
.half 45613
.half 45631
.half 45667
.half 45673
.half 45691
.half 45697
.half 45751
.half 45757
.half 45763
.half 45817
.half 45823
.half 45841
.half 45853
.half 45943
.half 45949
.half 45979
.half 46021
.half 46027
.half 46051
.half 46093
.half 46099
.half 46141
.half 46147
.half 46153
.half 46171
.half 46183
.half 46219
.half 46237
.half 46261
.half 46273
.half 46279
.half 46309
.half 46327
.half 46351
.half 46381
.half 46399
.half 46411
.half 46441
.half 46447
.half 46471
.half 46477
.half 46489
.half 46507
.half 46549
.half 46567
.half 46573
.half 46591
.half 46633
.half 46639
.half 46663
.half 46681
.half 46687
.half 46723
.half 46747
.half 46771
.half 46807
.half 46819
.half 46831
.half 46861
.half 46867
.half 46933
.half 46957
.half 46993
.half 47017
.half 47041
.half 47059
.half 47119
.half 47137
.half 47143
.half 47149
.half 47161
.half 47221
.half 47251
.half 47269
.half 47287
.half 47293
.half 47317
.half 47353
.half 47389
.half 47407
.half 47419
.half 47431
.half 47491
.half 47497
.half 47521
.half 47527
.half 47533
.half 47563
.half 47569
.half 47581
.half 47599
.half 47623
.half 47629
.half 47653
.half 47659
.half 47701
.half 47713
.half 47737
.half 47743
.half 47779
.half 47791
.half 47797
.half 47809
.half 47857
.half 47869
.half 47881
.half 47911
.half 47917
.half 47947
.half 47977
.half 48049
.half 48073
.half 48079
.half 48091
.half 48109
.half 48121
.half 48157
.half 48163
.half 48187
.half 48193
.half 48247
.half 48259
.half 48271
.half 48313
.half 48337
.half 48397
.half 48409
.half 48463
.half 48481
.half 48487
.half 48523
.half 48541
.half 48571
.half 48589
.half 48619
.half 48649
.half 48661
.half 48673
.half 48679
.half 48733
.half 48751
.half 48757
.half 48781
.half 48787
.half 48799
.half 48817
.half 48823
.half 48847
.half 48859
.half 48871
.half 48883
.half 48889
.half 48907
.half 48973
.half 48991
.half 49003
.half 49009
.half 49033
.half 49057
.half 49069
.half 49081
.half 49117
.half 49123
.half 49171
.half 49177
.half 49201
.half 49207
.half 49261
.half 49279
.half 49297
.half 49333
.half 49339
.half 49363
.half 49369
.half 49393
.half 49411
.half 49417
.half 49429
.half 49459
.half 49477
.half 49531
.half 49537
.half 49549
.half 49597
.half 49603
.half 49627
.half 49633
.half 49639
.half 49663
.half 49669
.half 49681
.half 49711
.half 49741
.half 49747
.half 49783
.half 49789
.half 49801
.half 49807
.half 49831
.half 49843
.half 49891
.half 49921
.half 49927
.half 49939
.half 49957
.half 49993
.half 49999
.end

//
// batch file
//
asm6x -v6400 sum.asm
asm6x -v6400 differ.asm
asm6x -v6400 bigprod.asm
asm6x -v6400 quotient.asm
asm6x -v6400 bigbigs.asm
asm6x -v6400 bigbigq.asm
asm6x -v6400 dummy.asm
asm6x -v6400 midprod.asm
asm6x -v6400 furtest.asm
asm6x -v6400 euclid.asm
asm6x -v6400 search1.asm
asm6x -v6400 depower.asm
asm6x -v6400 furcond.asm
asm6x -v6400 eloop.asm
asm6x -v6400 dloop.asm
asm6x -v6400 powquot.asm
asm6x -v6400 comfact.asm
asm6x -v6400 table0.asm
cl6x -mv6400 test1p.c
lnk6x test1p.cmd

//
//
test1p.obj
table0.obj
sum.obj
differ.obj
bigprod.obj
quotient.obj
bigbigs.obj
bigbigq.obj
dummy.obj
comfact.obj
midprod.obj
furtest.obj
powquot.obj
euclid.obj
search1.obj
depower.obj
furcond.obj
eloop.obj
dloop.obj

-cr
-stack 0x600
-l c:\CCStudio\c6000\cgtools\lib\rts6400.lib
-o test1p.out
-m test1p.map

MEMORY
{
VECS:  origin = 0, length = 0x3c0
PMEM:  origin = 000003c0h, length = 0000fc40h
DMEM:  origin = 80000h, length = 80000h
}

SECTIONS
{
.text: > PMEM
.data: > DMEM
.bss: > DMEM
.stack: > DMEM
vectors: > VECS
}
```