Z80 Routines:Math:Division

Introduction

All these routines use the restoring division algorithm, adapted to the z80 architecture to maximize speed. They can easily be unrolled to gain some speed.

8/8 division

The following routine divides d by e and places the quotient in d and the remainder in a

div_d_e:
xor	a
ld	b, 8

_loop:
sla	d
rla
cp	e
jr	c, \$+4
sub	e
inc	d

djnz	_loop

ret

16/8 division

The following routine divides hl by c and places the quotient in hl and the remainder in a

div_hl_c:
xor	a
ld	b, 16

_loop:
rla
jr	c, \$+5
cp	c
jr	c, \$+4

sub	c
inc	l

djnz	_loop

ret

16/16 division

The following routine divides ac by de and places the quotient in ac and the remainder in hl

div_ac_de:
ld	hl, 0
ld	b, 16

_loop:
sll	c
rla
sbc	hl, de
jr	nc, \$+4
dec	c

djnz	_loop

ret

24/8 division

The following routine divides ehl by d and places the quotient in ehl and the remainder in a

div_ehl_d:
xor	a
ld	b, 24

_loop:
rl	e
rla
jr	c, \$+5
cp	d
jr	c, \$+4

sub	d
inc	l

djnz	_loop

ret

32/8 division

The following routine divides dehl by c and places the quotient in dehl and the remainder in a

div_dehl_c:
xor	a
ld	b, 32

_loop:
rl	e
rl	d
rla
jr	c, \$+5
cp	c
jr	c, \$+4

sub	c
inc	l

djnz	_loop

ret

32/16 division

The following routine divides acix by de and places the quotient in acix and the remainder in hl

Div32By16:
; IN:	ACIX=dividend, DE=divisor
; OUT:	ACIX=quotient, DE=divisor, HL=remainder, B=0
ld	hl,0
ld	b,32
Div32By16_Loop:
rl	c
rla
jr	c,Div32By16_Overflow
sbc	hl,de
jr	nc,Div32By16_SetBit
djnz	Div32By16_Loop
ret
Div32By16_Overflow:
or	a
sbc	hl,de
Div32By16_SetBit:
.db	\$DD,\$2C		; inc ixl, change to inc ix to avoid undocumented
djnz	Div32By16_Loop
ret

32/16 division, Speed Optimized

At the cost of 30 bytes (exactly double the size), we can save 600cc on average, a 19.5% speed up. However, the inputs are slightly different here!

div32_16:
;DEIX/BC -> HLIX remainder DE
;170+4*div32_16_sub8
;min: 2182cc
;max: 2790cc
;avg: 2462cc
;60 bytes

; Negate BC to allow add instead of sbc
xor a      ; 4
; Need to set HL to 0 anyways, so save 2cc and a byte
ld h,a     ; 4
ld l,a     ; 4
sub c      ; 4
ld c,a     ; 4
sbc a,a    ; 4
sub b      ; 4
ld b,a     ; 4

ld a,d              ; 4
call div32_16_sub8  ; 17
rla                 ; 4
ld d,a              ; 4

ld a,e              ; 4
call div32_16_sub8  ; 17
rla                 ; 4
ld e,a              ; 4

ld a,ixh            ; 8
call div32_16_sub8  ; 17
rla                 ; 4
ld ixh,a            ; 8

ld a,ixl            ; 8
call div32_16_sub8  ; 17
rla                 ; 4
ld ixl,a            ; 8

ex de,hl   ; 4
ret        ; 10

div32_16_sub8:
;119+8*div32_16_sub
;min: 503cc
;max: 655cc
;avg: 573cc
call +_
_:
;17+2(17+2(div32_16_sub)))
call +_
_:
;17+2(div32_16_sub)
call div32_16_sub
div32_16_sub:
;48+{8,0+{0,19}}
;min: 48cc
;max: 67cc
;avg: 56.75cc
rla        ; 4
adc hl,hl  ; 15
jr c,+_    ;12/7
add hl,bc  ; 11
ret c      ;11/5
sbc hl,bc  ; 15
ret        ; 10
_:
add hl,bc  ; 11
scf        ; 4
ret        ; 10

Rounded 16/8 division

The following routine divides hl by c and places the rounded quotient in hl and twice the prerounded remainder in a.

RoundHL_Div_C:
xor	a
ld	b, 16

_loop: