*******************************************************************************
*									      *
*  DO JUMPS								      *
*  04/21/13 (dkc)							      *
*									      *
*  This C64 subroutine does a succession of jumps.  The calling sequence of   *
*  the subroutine is;							      *
*									      *
*     K[0] (MSW of odd sequence value) => a4				      *
*     K[1] (LSW of odd sequence value) => b4				      *
*     iters (number of jumps) => a6					      *
*     c => b6								      *
*     max (maximum allowable value of first word) => a8 		      *
*     address to store output (K[0], K[1]) => b8			      *
*									      *
*******************************************************************************
       .global _jumps
       .text
_jumps:
       mv.l2x a4, b16		 ; load K[0]
||     mvk.s2 32, b9		 ; load 32
||     mvk.s1 1, a4		 ; load return value
||     mpy.m2 b31, 0, b31	 ; load 0

       addu.l2 b4, b6, b5:b4	 ; K[1]=K[1]+c

       add.l2 b16, b5, b16	 ; K[0]=K[0]+carry
||     bitr.m2 b4, b0		 ; bit-reverse K[1]

       shru.s2 b16, 31, b2	 ; isolate sign

       lmbd.l2 1, b0, b7	 ; count bits

       shru.s2 b4, b7, b4	 ; K[1]>>count
||     sub.l2 b9, b7, b0	 ; 32-count
||     subab.d2 b7, 1, b1	 ; load decremented loop count

       shl.s2 b16, b0, b0	 ; K[0]<<(32-count)
**********************
*  begin outer loop  *
**********************
aloop: or.l2 b4, b0, b4 	 ; K[1]=(K[1]>>count)|(K[0]<<(32-count)
||     shr.s2 b16, b7, b16	 ; K[0]=K[0]>>count
||     mv.l1x b1, a2		 ; load decremented loop count
||     mpy.m1 a0, 0, a0 	 ; load 0

  [b2] subu.l2 b31, b4, b5:b4	 ; K[1]=-K[1]
||     neg.s2 b16, b17		 ; -K[0]
||[b1] subab.d2 b1, 1, b1	 ; decrement loop count

       shru.s2 b5, 7, b5	 ; isolate carry

  [b2] subab.d2 b17, b5, b16	 ; K[0]=-K[0]-carry
||     mv.s2 b4, b18		 ; save K[1]
||     addu.l2 b4, b4, b5:b4	 ; K[1]*2

       addu.l2 b5:b4, b18, b5:b4 ; K[1]=K[1]*3
||     addab.d2 b16, b16, b17	 ; K[0]*2
||[b1] b.s2 bloop		 ; conditional branch to loop beginning

       addab.d2 b16, b17, b16	 ; K[0]*3
||     rotl.m2 b4, 0, b18	 ; save K[1]

       addab.d2 b16, b5, b16	 ; K[0]=K[0]*3+carry
||[a2] addu.l2 b4, b4, b5:b4	 ; K[1]*2
||[b1] sub.s2 b1, 1, b1 	 ; decrement loop count
****************
*  begin loop  *
****************
bloop:
  [a2] addu.l2 b5:b4, b18, b5:b4 ; K[1]=K[1]*3
||[a2] addab.d2 b16, b16, b17	 ; K[0]*2
||[b1] b.s2 bloop		 ; conditional branch to loop beginning

  [a2] addab.d2 b16, b17, b16	 ; K[0]*3
||[a2] rotl.m2 b4, 0, b18	 ; save K[1]
||[b1] sub.s2 b1, 1, b1 	 ; decrement loop count
||     mv.l2 b4, b30		 ; save K[1]
||     cmpgt.l1x b16, a8, a1	 ; compare K[0] to max

  [a2] addab.d2 b16, b5, b16	 ; K[0]=K[0]*3+carry
||[a2] addu.l2 b4, b4, b5:b4	 ; K[1]*2
||[a1] zero.s2 b1		 ; clear loop count
||     or.l1 a0, a1, a0 	 ; "or" conditions
****************
*  end loop    *
****************
  [a0] b.s2 b3			 ; return
||[a0] mpy.m1 a4, 0, a4 	 ; clear return value
||[b2] subu.l2 b31, b30, b5:b4	 ; K[1]=-K[1]
||[!b2] addab.d2 b30, 0, b4	 ; load K[1]
||     mv.l1 a6, a2		 ; load loop count

       subab.d2 b31, b16, b17	 ; -K[0]
||     shru.s2 b5, 7, b7	 ; isolate carry
||     subu.l2 b4, b6, b5:b4	 ; K[1]=K[1]-c
||[a2] sub.l1 a2, 1, a2 	 ; decrement loop count

  [b2] subab.d2 b17, b7, b16	 ; K[0]=-K[0]-carry
||     shru.s2 b5, 7, b7	 ; isolate carry
||     mv.l1 a2, a6		 ; save loop count

       sub.l2 b16, b7, b16	 ; K[0]=K[0]-carry
||     shru.s2 b4, 1, b4	 ; K[1]>>1

       shl.s2 b16, 31, b17	 ; K[0]<<31
||     and.l2 b4, 1, b0 	 ; check if odd

       shr.s2 b16, 1, b16	 ; K[0]=K[0]>>1
||     or.l2 b4, b17, b4	 ; K[1]=(K[1]>>1)|(K[0]<<31)
||[!b0] zero.s1 a2		 ; clear loop count
||[!b0] zero.l1 a4		 ; clear return value

  [a2] b.s1 aloop		 ; conditional branch to loop beginning
||     addu.l2 b4, b6, b5:b4	 ; K[1]=K[1]+c
||[!a2] stw.d2 b4, *+b8[1]	 ; save K[1]

       add.l2 b16, b5, b16	 ; K[0]=K[0]+carry
||     bitr.m2 b4, b0		 ; bit-reverse K[1]
||[!a2] b.s2 b3 		 ; return
||[!a2] stw.d2 b16, *b8 	 ; save K[0]

       shru.s2 b16, 31, b2	 ; isolate sign

       lmbd.l2 1, b0, b7	 ; count bits

       shru.s2 b4, b7, b4	 ; K[1]>>count
||     sub.l2 b9, b7, b0	 ; 32-count
||     subab.d2 b7, 1, b1	 ; load decremented loop count

       shl.s2 b16, b0, b0	 ; K[0]<<(32-count)
*********************
*  end outer loop   *
*********************
       nop
       .end