*  REGENERATE 3N+C CYCLES						      *
*  04/19/13 (dkc)							      *
*									      *
*  This C64 subroutine regenerates a 3n+c cycle given an odd element and finds*
*  the minimum element and the number of even and odd elements.  The calling  *
*  sequence of the subroutine is;					      *
*									      *
*     K[0] (MSW of odd element) => a4					      *
*     K[1] (LSW of odd element) => b4					      *
*     address to store output minimum (MIN[0],MIN[1]), (L+2*K,K), etc. => a6  *
*     c => b6								      *
*									      *
*  If an attachment point is found, a value of 1 is returned.  Eleven	      *
*  instruction cycles are required to compute the next odd element in the     *
*  sequence and compare this odd element to the minimum.		      *
*									      *
*******************************************************************************
       .global _regen
       .text
_regen:zero.l2 b19			; load 0
       mv.d2 b4, b18			; load K[1]
||     mvk.s1 0, a2			; clear flag
||     rotl.m1 a4, 0, a21		; save K[0]
||     mv.l1x b4, a20			; save K[1]
||     rotl.m2 b4, 0, b30		; MIN[1]=K[1]

       addu.l2 b19:b18, b18, b21:b20	; K[1]*2
||     rotl.m2x a4, 0, b16		; load K[0]
||     mvk.s1 0, a17			; g=0
||     mpy.m1 a19, 0, a19		; o=0

       addu.l2 b21:b20, b18, b21:b20	; K[1]*3
||     rotl.m2x a4, 0, b31		; MIN[0]=K[0]

       addu.l2 b21:b20, b6, b21:b20	; K[1]=K[1]*3+c
||     addab.d2 b16, b16, b17		; K[0]*2

       bitr.m2 b20, b0			; bit-reverse K[1]
||     and.l2 b20, 7, b1		; check for attachment point
||     addab.d2 b17, b16, b16		; K[0]*3

  [!b1] mvk.s1 1, a2			; set flag
||[!b1] stw.d1 b20, *+a6[4]		; save K[1]
||     addab.d2 b16, b21, b16		; K[0]=K[0]*3+carry

       lmbd.l2 1, b0, b29		; count bits
||     mvk.s2 32, b2			; load 32
||[!b1] stw.d1 b16, *+a6[3]		; save K[0]

       subab.d2 b2, b29, b2		; 32-count
||     shru.s2 b20, b29, b20		; K[1]>>count

       shl.s2 b16, b2, b2		; K[0]<<(32-count)
||     add.l1x b29, a17, a17		; increment g
***************
* begin loop  *
***************
aloop:
       shr.s2 b16, b29, b16		; K[0]=K[0]>>count
||     or.l2 b20, b2, b20		; K[1]=K[1]|(K[0]<<(32-count))
||     mpy.m2 b19, 0, b19		; load 0
||     add.l1 a19, 1, a19		; o=o+1

       subu.l2 b30, b20, b1:b0		; MIN[1]-K[1]
||     sub.s2 b31, b16, b2		; MIN[0]-K[0]
||     addab.d2 b20, 0, b18		; load K[1]
||     add.l1 a19, a17, a18		; g+o
||     rotl.m2 b20, 0, b22		; save K[1]

       shru.s2 b1, 7, b1		; isolate carry
||     cmpeq.l1x b20, a20, a1		; compare to K[1]
||     shl.s1 a18, 16, a18		; (g+o)<<16
||     rotl.m2 b16, 0, b23		; save K[0]

       sub.s2 b2, b1, b2		; MIN[0]-K[0]-carry
||     addu.l2 b19:b18, b18, b21:b20	; K[1]*2
||[a1] cmpeq.l1x b16, a21, a1		; compare to K[0]
||     or.s1 a18, a19, a18		; ((g+o)<<16)|o

 [!a1] shr.s2 b2, 31, b2		; isolate sign
||[a1] mvk.d2 1, b2			; set condition
||     addu.l2 b21:b20, b18, b21:b20	; K[1]*3

 [!a1] b.s1 aloop
||     addu.l2 b21:b20, b6, b21:b20	; K[1]=K[1]*3+c
||[!b2] addab.d2 b23, 0, b31		; MIN[0]=K[0]
||[!b2] rotl.m2 b22, 0, b30		; MIN[1]=K[1]
||     mvk.s2 1, b1			; load 1

       addab.d2 b16, b16, b17		; K[0]*2
||[!a2] and.l2 b20, 7, b1		; check for attachment point
||     bitr.m2 b20, b0			; bit-reverse K[1]
||[a1] b.s2 b3				; return
||[a1] stw.d1 b31, *a6			; save MIN[0]

       addab.d2 b17, b16, b16		; K[0]*3
||[!b1] mvk.s1 1, a2			; set flag
||[!b1] stw.d1 b20, *+a6[4]		; save K[1]

       addab.d2 b16, b21, b16		; K[0]=K[0]*3+carry
||     lmbd.l2 1, b0, b29		; count bits
||     mvk.s2 32, b2			; load 32
||[a1] stw.d1 b30, *+a6[1]		; save MIN[1]

       subab.d2 b2, b29, b2		; 32-count
||     shru.s2 b20, b29, b20		; K[1]>>count
||[a1] stw.d1 a18, *+a6[2]		; store ((g+o)<<16)|o

       shl.s2 b16, b2, b2		; K[0]<<(32-count)
||     add.l1x b29, a17, a17		; increment g
||[!b1] stw.d1 b16, *+a6[3]		; save K[0]
****************
*  end loop    *
****************
       mv.l1 a2, a4			; return flag
       .end