Difference between revisions of "Z80 Routines:Math:Division"
m (correct some typos)  | 
				 (Added an optimized 32/16)  | 
				||
| (7 intermediate revisions by 3 users not shown) | |||
| Line 41: | Line 41: | ||
    add	hl, hl  |     add	hl, hl  | ||
    rla  |     rla  | ||
| + |    jr	c, $+5  | ||
    cp	c  |     cp	c  | ||
    jr	c, $+4  |     jr	c, $+4  | ||
| + | |||
    sub	c  |     sub	c  | ||
    inc	l  |     inc	l  | ||
| Line 87: | Line 89: | ||
    rl	e  |     rl	e  | ||
    rla  |     rla  | ||
| + |    jr	c, $+5  | ||
    cp	d  |     cp	d  | ||
    jr	c, $+4  |     jr	c, $+4  | ||
| + | |||
    sub	d  |     sub	d  | ||
    inc	l  |     inc	l  | ||
| Line 111: | Line 115: | ||
    rl	d  |     rl	d  | ||
    rla  |     rla  | ||
| + |    jr	c, $+5  | ||
    cp	c  |     cp	c  | ||
    jr	c, $+4  |     jr	c, $+4  | ||
| + | |||
    sub	c  |     sub	c  | ||
    inc	l  |     inc	l  | ||
| Line 118: | Line 124: | ||
    djnz	_loop  |     djnz	_loop  | ||
| + |    ret  | ||
| + |  </nowiki>  | ||
| + | == 32/16 division ==  | ||
| + | |||
| + | The following routine divides acix by de and places the quotient in acix and the remainder in hl  | ||
| + | |||
| + |  <nowiki>  | ||
| + | Div32By16:  | ||
| + | ; IN:	ACIX=dividend, DE=divisor  | ||
| + | ; OUT:	ACIX=quotient, DE=divisor, HL=remainder, B=0  | ||
| + | 	ld	hl,0  | ||
| + | 	ld	b,32  | ||
| + | Div32By16_Loop:  | ||
| + | 	add	ix,ix  | ||
| + | 	rl	c  | ||
| + | 	rla  | ||
| + | 	adc	hl,hl  | ||
| + | 	jr	c,Div32By16_Overflow  | ||
| + | 	sbc	hl,de  | ||
| + | 	jr	nc,Div32By16_SetBit  | ||
| + | 	add	hl,de  | ||
| + | 	djnz	Div32By16_Loop  | ||
| + | 	ret  | ||
| + | Div32By16_Overflow:  | ||
| + | 	or	a  | ||
| + | 	sbc	hl,de  | ||
| + | Div32By16_SetBit:  | ||
| + | 	.db	$DD,$2C		; inc ixl, change to inc ix to avoid undocumented  | ||
| + | 	djnz	Div32By16_Loop  | ||
| + | 	ret  | ||
| + |  </nowiki>  | ||
| + | |||
| + | == 32/16 division, Speed Optimized ==  | ||
| + | |||
| + | At the cost of 30 bytes (exactly double the size), we can save 600cc on average, a 19.5% speed up. However, the inputs are slightly different here!  | ||
| + | |||
| + |  <nowiki>  | ||
| + | div32_16:  | ||
| + | ;DEIX/BC -> HLIX remainder DE  | ||
| + | ;170+4*div32_16_sub8  | ||
| + | ;min: 2182cc  | ||
| + | ;max: 2790cc  | ||
| + | ;avg: 2462cc  | ||
| + | ;60 bytes  | ||
| + | |||
| + | ; Negate BC to allow add instead of sbc  | ||
| + |   xor a      ; 4  | ||
| + | ; Need to set HL to 0 anyways, so save 2cc and a byte  | ||
| + |   ld h,a     ; 4  | ||
| + |   ld l,a     ; 4  | ||
| + |   sub c      ; 4  | ||
| + |   ld c,a     ; 4  | ||
| + |   sbc a,a    ; 4  | ||
| + |   sub b      ; 4  | ||
| + |   ld b,a     ; 4  | ||
| + | |||
| + | |||
| + |   ld a,d              ; 4  | ||
| + |   call div32_16_sub8  ; 17  | ||
| + |   rla                 ; 4  | ||
| + |   ld d,a              ; 4  | ||
| + | |||
| + |   ld a,e              ; 4  | ||
| + |   call div32_16_sub8  ; 17  | ||
| + |   rla                 ; 4  | ||
| + |   ld e,a              ; 4  | ||
| + | |||
| + |   ld a,ixh            ; 8  | ||
| + |   call div32_16_sub8  ; 17  | ||
| + |   rla                 ; 4  | ||
| + |   ld ixh,a            ; 8  | ||
| + | |||
| + |   ld a,ixl            ; 8  | ||
| + |   call div32_16_sub8  ; 17  | ||
| + |   rla                 ; 4  | ||
| + |   ld ixl,a            ; 8  | ||
| + | |||
| + |   ex de,hl   ; 4  | ||
| + |   ret        ; 10  | ||
| + | |||
| + | div32_16_sub8:  | ||
| + | ;119+8*div32_16_sub  | ||
| + | ;min: 503cc  | ||
| + | ;max: 655cc  | ||
| + | ;avg: 573cc  | ||
| + |   call +_  | ||
| + | _:  | ||
| + | ;17+2(17+2(div32_16_sub)))  | ||
| + |   call +_  | ||
| + | _:  | ||
| + | ;17+2(div32_16_sub)  | ||
| + |   call div32_16_sub  | ||
| + | div32_16_sub:  | ||
| + | ;48+{8,0+{0,19}}  | ||
| + | ;min: 48cc  | ||
| + | ;max: 67cc  | ||
| + | ;avg: 56.75cc  | ||
| + |   rla        ; 4  | ||
| + |   adc hl,hl  ; 15  | ||
| + |   jr c,+_    ;12/7  | ||
| + |   add hl,bc  ; 11  | ||
| + |   ret c      ;11/5  | ||
| + |   sbc hl,bc  ; 15  | ||
| + |   ret        ; 10  | ||
| + | _:  | ||
| + |   add hl,bc  ; 11  | ||
| + |   scf        ; 4  | ||
| + |   ret        ; 10  | ||
| + |  </nowiki>  | ||
| + | |||
| + | == Rounded 16/8 division ==  | ||
| + | |||
| + | The following routine divides hl by c and places the rounded quotient in hl and twice the prerounded remainder in a.  | ||
| + | |||
| + |  <nowiki>  | ||
| + | RoundHL_Div_C:  | ||
| + |    xor	a  | ||
| + |    ld	b, 16  | ||
| + | |||
| + | _loop:  | ||
| + |    add	hl, hl  | ||
| + |    rla  | ||
| + |    jr	c, $+5  | ||
| + |    cp	c  | ||
| + |    jr	c, $+4  | ||
| + |    sub	c  | ||
| + |    inc	l     | ||
| + |    djnz	_loop  | ||
| + | ;This part is the rounding  | ||
| + |    add a,a  | ||
| + |    cp	c  | ||
| + |    ret	c  | ||
| + |    inc hl  | ||
    ret  |     ret  | ||
  </nowiki>  |   </nowiki>  | ||
Latest revision as of 15:18, 30 September 2019
Contents
Introduction
All these routines use the restoring division algorithm, adapted to the z80 architecture to maximize speed. They can easily be unrolled to gain some speed.
8/8 division
The following routine divides d by e and places the quotient in d and the remainder in a
div_d_e: xor a ld b, 8 _loop: sla d rla cp e jr c, $+4 sub e inc d djnz _loop ret
16/8 division
The following routine divides hl by c and places the quotient in hl and the remainder in a
div_hl_c: xor a ld b, 16 _loop: add hl, hl rla jr c, $+5 cp c jr c, $+4 sub c inc l djnz _loop ret
16/16 division
The following routine divides ac by de and places the quotient in ac and the remainder in hl
div_ac_de: ld hl, 0 ld b, 16 _loop: sll c rla adc hl, hl sbc hl, de jr nc, $+4 add hl, de dec c djnz _loop ret
24/8 division
The following routine divides ehl by d and places the quotient in ehl and the remainder in a
div_ehl_d: xor a ld b, 24 _loop: add hl, hl rl e rla jr c, $+5 cp d jr c, $+4 sub d inc l djnz _loop ret
32/8 division
The following routine divides dehl by c and places the quotient in dehl and the remainder in a
div_dehl_c: xor a ld b, 32 _loop: add hl, hl rl e rl d rla jr c, $+5 cp c jr c, $+4 sub c inc l djnz _loop ret
32/16 division
The following routine divides acix by de and places the quotient in acix and the remainder in hl
Div32By16: ; IN: ACIX=dividend, DE=divisor ; OUT: ACIX=quotient, DE=divisor, HL=remainder, B=0 ld hl,0 ld b,32 Div32By16_Loop: add ix,ix rl c rla adc hl,hl jr c,Div32By16_Overflow sbc hl,de jr nc,Div32By16_SetBit add hl,de djnz Div32By16_Loop ret Div32By16_Overflow: or a sbc hl,de Div32By16_SetBit: .db $DD,$2C ; inc ixl, change to inc ix to avoid undocumented djnz Div32By16_Loop ret
32/16 division, Speed Optimized
At the cost of 30 bytes (exactly double the size), we can save 600cc on average, a 19.5% speed up. However, the inputs are slightly different here!
div32_16:
;DEIX/BC -> HLIX remainder DE
;170+4*div32_16_sub8
;min: 2182cc
;max: 2790cc
;avg: 2462cc
;60 bytes
; Negate BC to allow add instead of sbc
  xor a      ; 4
; Need to set HL to 0 anyways, so save 2cc and a byte
  ld h,a     ; 4
  ld l,a     ; 4
  sub c      ; 4
  ld c,a     ; 4
  sbc a,a    ; 4
  sub b      ; 4
  ld b,a     ; 4
  ld a,d              ; 4
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld d,a              ; 4
  ld a,e              ; 4
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld e,a              ; 4
  ld a,ixh            ; 8
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld ixh,a            ; 8
  ld a,ixl            ; 8
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld ixl,a            ; 8
  ex de,hl   ; 4
  ret        ; 10
div32_16_sub8:
;119+8*div32_16_sub
;min: 503cc
;max: 655cc
;avg: 573cc
  call +_
_:
;17+2(17+2(div32_16_sub)))
  call +_
_:
;17+2(div32_16_sub)
  call div32_16_sub
div32_16_sub:
;48+{8,0+{0,19}}
;min: 48cc
;max: 67cc
;avg: 56.75cc
  rla        ; 4
  adc hl,hl  ; 15
  jr c,+_    ;12/7
  add hl,bc  ; 11
  ret c      ;11/5
  sbc hl,bc  ; 15
  ret        ; 10
_:
  add hl,bc  ; 11
  scf        ; 4
  ret        ; 10
 
Rounded 16/8 division
The following routine divides hl by c and places the rounded quotient in hl and twice the prerounded remainder in a.
RoundHL_Div_C: xor a ld b, 16 _loop: add hl, hl rla jr c, $+5 cp c jr c, $+4 sub c inc l djnz _loop ;This part is the rounding add a,a cp c ret c inc hl ret