******************************************************************************* * * * FIND LIMB * * 09/03/10 (dkc) * * * * This C64 subroutine finds a limb in S given its parity vector. The calling* * sequence of the subroutine is; * * * * I[0] (order/2+2) => a4 * * I[1] (order/2+2) => b4 * * amount to shift off LSB's of order => a6 * * inner loop count => b6 * * delta => a8 * * address of G => b8 * * * * Note: The shift amount must be small enough so that there are significant * * bits left in the upper word of the order. * * * ******************************************************************************* .global _limb .ref _sv .text _limb: mvkl.s2 _sv, b16 ; load address of parity vector || sub.l2 b6, 1, b6 ; j-1 || mvk.d1 1, a3 ; c=1 mvkh.s2 _sv, b16 ; load address of parity vector || mv.d1 a6, a17 ; load order (shift amount) || mpy.m1 a31, 0, a31 ; clear carry mv.s1x b4, a30 ; H[1]=I[1] || mv.l1 a4, a5 ; G[0]=I[0] || ldw.d2 *b16, a19 ; load sv[index] mpy.m1 a23, 0, a23 ; oldg=0 || mv.s1 a30, a4 ; G[1]=H[1] || addu.l1 a31:a30, a8, a31:a30 ; H=H+delta || mv.l2 b6, b2 ; load j-1 mv.d2 b16, b18 ; load address of parity vector || add.l1 a5, a31, a31 ; H=H+delta mpy.m1 a1, 0, a1 ; clear flag and.l1 a4, 1, a0 ; G[1]&1 || mpy.m1 a7, 0, a7 ; clear upper word || shl.s2x a5, 1, b9 ; G[0]+G[0] || mv.s1 a4, a6 ; load G[1] || mvk.d2 1, b26 ; set flag [!a0] shru.s1 a4, 1, a4 ; G[1]>>1 || xor.l1 a19, a23, a2 ; sv[index]^oldg ||[!a0] mvk.d1 1, a23 ; oldg=1 || [a0] mpy.m1 a23, 0, a23 ; oldg=0 ||[!a0] addaw.d2 b18, 1, b18 ; index=index+1 || mvk.s2 1, b0 ; set flag [!a0] shl.s1 a5, 31, a6 ; G[0]<<31 ||[!a2] zero.s2 b2 ; clear inner loop count || [a0] add.d1x a5, b9, a5 ; G[0]+G[0]+G[0] || [a0] addu.l1 a7:a6, a4, a7:a6 ; G[1]+G[1] || [a2] ldw.d2 *b18, a19 ; load sv[index] *********************** * begin outer loop * *********************** *********************** * begin inner loop * *********************** cloop: [!a0] or.d1 a4, a6, a4 ; G[1]=G[1]|(G[0>>31) ||[!a0] shru.s1 a5, 1, a5 ; G[0]=G[0]>>1 || [a0] addu.l1 a7:a6, a4, a7:a6 ; G[1]+G[1]+G[1] ||[!a2] zero.l2 b0 ; clear flag [a0] addu.l1 a7:a6, a3, a7:a6 ; G[1]+G[1]+G[1]+1 || shru.s1 a31, a17, a29 ; shift off LSB's ||[!a0] and.d1 a4, 1, a18 ; check if G[1] is even || mpy.m1 a7, 0, a7 ; clear upper word || [b2] b.s2 cloop ; conditional branch to loop beginning [a0] add.s1 a7, a5, a5 ; G[0] || [a0] mv.l1 a6, a4 ; load G[1] || [a0] and.d1 a6, 1, a18 ; check if G[1] is even ||[!b2] mpy.m1 a27, 0, a27 ; clear MSB's * [b2] shru.s1 a5, a17, a27 ; shift off LSB's ||[!a1] rotl.m1 a4, 0, a20 ; save G[1] || [b2] and.l1 a4, 1, a0 ; G[1]&1 ||[!b2] mvk.d1 1, a0 ; set flag [!a1] cmpgtu.l1 a27, 2, a1 ; compare to 2 || shl.s1 a5, 1, a28 ; G[0]+G[0] || rotl.m1 a4, 0, a6 ; load G[1] ||[!b2] rotl.m2x a18, 0, b26 ; save G[1]&1 || [b2] sub.l2 b2, 1, b2 ; decrement loop count [!a0] shru.s1 a4, 1, a4 ; G[1]>>1 ||[!a0] xor.l1 a19, a23, a2 ; sv[index]^oldg ||[!a0] mvk.d1 1, a23 ; oldg=1 ||[!a0] addaw.d2 b18, 1, b18 ; index=index+1 ||[!a1] rotl.m1 a5, 0, a21 ; save G[0] || [a1] mpy.m2 b2, 0, b2 ; exit loop || [a1] zero.s2 b0 ; clear flag [!a0] shl.s1 a5, 31, a6 ; G[0]<<31 ||[!a2] zero.s2 b2 ; clear inner loop count || [a0] add.d1 a5, a28, a5 ; G[0]+G[0]+G[0] || [a0] addu.l1 a7:a6, a4, a7:a6 ; G[1]+G[1] || [a0] mpy.m1 a23, 0, a23 ; oldg=0 || [a2] ldw.d2 *b18, a19 ; load sv[index] ||[!a1] and.l2 b0, b26, b0 ; "and" conditions || rotl.m2x a5, 0, b5 ; save G[0] ********************* * end inner loop * ********************* [b0] b.s2 cskip ; solution found || [b0] mvk.s1 1, a2 ; set outer loop count ||[!b0] cmpgtu.l1 a29, 2, a2 ; compare to order ||[!b0] ldw.d2 *b16, a19 ; load sv[index] [!a2] b.s2 cloop ; conditional branch to loop beginning || mv.s1 a31, a5 ; G[0]=H[0] || mv.d1 a30, a4 ; G[1]=H[1] || mpy.m1 a23, 0, a23 ; oldg=0 ||[!b0] zero.l1 a31 ; clear carry bit || mv.l2 b6, b2 ; load j-1 [!b0] addu.l1 a31:a30, a8, a31:a30 ; H=H+delta || mv.l2 b16, b18 ; load address of parity vector [!b0] add.l1 a5, a31, a31 || mpy.m1 a1, 0, a1 ; clear flag and.l1 a4, 1, a0 ; G[1]&1 || mpy.m1 a7, 0, a7 ; clear upper word || shl.s2x a5, 1, b9 ; G[0]+G[0] || mv.s1 a4, a6 ; load G[1] || mvk.d2 1, b26 ; set flag [!a0] shru.s1 a4, 1, a4 ; G[1]>>1 || xor.l1 a19, a23, a2 ; sv[index]^oldg ||[!a0] mvk.d1 1, a23 ; oldg=1 || [a0] mpy.m1 a23, 0, a23 ; oldg=0 ||[!a0] addaw.d2 b18, 1, b18 ; index=index+1 || mvk.s2 1, b0 ; set flag [!a0] shl.s1 a5, 31, a6 ; G[0]<<31 ||[!a2] zero.s2 b2 ; clear inner loop count || [a0] add.d1x a5, b9, a5 ; G[0]+G[0]+G[0] || [a0] addu.l1 a7:a6, a4, a7:a6 ; G[1]+G[1] || [a2] ldw.d2 *b18, a19 ; load sv[index] ******************** * end outer loop * ******************** b.s2 b3 ; return || mvk.d1 0, a4 ; clear flag nop 5 ************* * return * ************* cskip: b.s2 b3 ; return || stw.d2 a21, *b8 ; store G[0] stw.d2 a20, *+b8[1] ; store G[1] || mvk.d1 1, a4 ; set flag stw.d2 a31, *+b8[2] ; store H[0] stw.d2 a30, *+b8[3] ; store H[1] nop 2 .end