*******************************************************************************
*									      *
*  64-BIT SUBTRACTION							      *
*  01/30/07 (dkc)							      *
*									      *
*  This C64 subroutine does 64-bit subtraction.  The calling sequence of the  *
*  subroutine is;							      *
*									      *
*     address of subtrahend (A[0], A[1]) => a4				      *
*     address of minuend (B[0], B[1]) => b4				      *
*									      *
*******************************************************************************
	.global _sub64
	.text
_sub64:
	ldw.d1 *+a4[1], a0	  ;  load A[1]
||	ldw.d2 *+b4[1], b0	  ;  load B[1]

	ldw.d1 *a4, a6		  ;  load A[0]
||	ldw.d2 *b4, b6		  ;  load B[0]

	nop 3

	not.l1x b0, a8		  ;  not(B[1])
||	zero.s1 a9		  ;  load 0

	not.l2 b6, b6		  ;  not(B[0])
||	add.l1 a9:a8, 1, a9:a8	  ;  not(B[1]) + 1
||	b.s2 b3 		  ;  return

	and.l1 a9, 1, a9	  ;  isolate carry bit
||	zero.s1 a1		  ;  load 0

	addu.l1 a1:a0, a8, a1:a0  ;  A[1] - B[1]
||	add.l2x b6, a9, b6	  ;  -B[0]::B[1]

	and.l1 a1, 1, a1	  ;  isolate carry bit
||	add.l2x b6, a6, b6	  ;  A[0] - B[0] - carry
||	stw.d2 a0, *+b4[1]	  ;  store A[1]-B[1]

	add.l2x b6, a1, b6	  ;  A[0] - B[0]

	stw.d2 b6, *b4		  ;  store A[0]-B[0]
	.end