Difference between revisions of "Z80 Routines:Math:Division"

Latest revision as of 16:18, 30 September 2019

Introduction

All these routines use the restoring division algorithm, adapted to the z80 architecture to maximize speed. They can easily be unrolled to gain some speed.

8/8 division

The following routine divides d by e and places the quotient in d and the remainder in a

div_d_e:
   xor	a
   ld	b, 8

_loop:
   sla	d
   rla
   cp	e
   jr	c, $+4
   sub	e
   inc	d
   
   djnz	_loop
   
   ret

16/8 division

The following routine divides hl by c and places the quotient in hl and the remainder in a

div_hl_c:
   xor	a
   ld	b, 16

_loop:
   add	hl, hl
   rla
   jr	c, $+5
   cp	c
   jr	c, $+4

   sub	c
   inc	l
   
   djnz	_loop
   
   ret

16/16 division

The following routine divides ac by de and places the quotient in ac and the remainder in hl

div_ac_de:
   ld	hl, 0
   ld	b, 16

_loop:
   sll	c
   rla
   adc	hl, hl
   sbc	hl, de
   jr	nc, $+4
   add	hl, de
   dec	c
   
   djnz	_loop
   
   ret

24/8 division

The following routine divides ehl by d and places the quotient in ehl and the remainder in a

div_ehl_d:
   xor	a
   ld	b, 24

_loop:
   add	hl, hl
   rl	e
   rla
   jr	c, $+5
   cp	d
   jr	c, $+4

   sub	d
   inc	l
   
   djnz	_loop
   
   ret

32/8 division

The following routine divides dehl by c and places the quotient in dehl and the remainder in a

div_dehl_c:
   xor	a
   ld	b, 32

_loop:
   add	hl, hl
   rl	e
   rl	d
   rla
   jr	c, $+5
   cp	c
   jr	c, $+4

   sub	c
   inc	l
   
   djnz	_loop
   
   ret

32/16 division

The following routine divides acix by de and places the quotient in acix and the remainder in hl

Div32By16:
; IN:	ACIX=dividend, DE=divisor
; OUT:	ACIX=quotient, DE=divisor, HL=remainder, B=0
	ld	hl,0
	ld	b,32
Div32By16_Loop:
	add	ix,ix
	rl	c
	rla
	adc	hl,hl
	jr	c,Div32By16_Overflow
	sbc	hl,de
	jr	nc,Div32By16_SetBit
	add	hl,de
	djnz	Div32By16_Loop
	ret
Div32By16_Overflow:
	or	a
	sbc	hl,de
Div32By16_SetBit:
	.db	$DD,$2C		; inc ixl, change to inc ix to avoid undocumented
	djnz	Div32By16_Loop
	ret

32/16 division, Speed Optimized

At the cost of 30 bytes (exactly double the size), we can save 600cc on average, a 19.5% speed up. However, the inputs are slightly different here!

div32_16:
;DEIX/BC -> HLIX remainder DE
;170+4*div32_16_sub8
;min: 2182cc
;max: 2790cc
;avg: 2462cc
;60 bytes

; Negate BC to allow add instead of sbc
  xor a      ; 4
; Need to set HL to 0 anyways, so save 2cc and a byte
  ld h,a     ; 4
  ld l,a     ; 4
  sub c      ; 4
  ld c,a     ; 4
  sbc a,a    ; 4
  sub b      ; 4
  ld b,a     ; 4


  ld a,d              ; 4
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld d,a              ; 4

  ld a,e              ; 4
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld e,a              ; 4

  ld a,ixh            ; 8
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld ixh,a            ; 8

  ld a,ixl            ; 8
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld ixl,a            ; 8

  ex de,hl   ; 4
  ret        ; 10

div32_16_sub8:
;119+8*div32_16_sub
;min: 503cc
;max: 655cc
;avg: 573cc
  call +_
_:
;17+2(17+2(div32_16_sub)))
  call +_
_:
;17+2(div32_16_sub)
  call div32_16_sub
div32_16_sub:
;48+{8,0+{0,19}}
;min: 48cc
;max: 67cc
;avg: 56.75cc
  rla        ; 4
  adc hl,hl  ; 15
  jr c,+_    ;12/7
  add hl,bc  ; 11
  ret c      ;11/5
  sbc hl,bc  ; 15
  ret        ; 10
_:
  add hl,bc  ; 11
  scf        ; 4
  ret        ; 10

Rounded 16/8 division

The following routine divides hl by c and places the rounded quotient in hl and twice the prerounded remainder in a.

RoundHL_Div_C:
   xor	a
   ld	b, 16

_loop:
   add	hl, hl
   rla
   jr	c, $+5
   cp	c
   jr	c, $+4
   sub	c
   inc	l   
   djnz	_loop
;This part is the rounding
   add a,a
   cp	c
   ret	c
   inc hl
   ret

Difference between revisions of "Z80 Routines:Math:Division"

Latest revision as of 16:18, 30 September 2019

Contents

Introduction

8/8 division

16/8 division

16/16 division

24/8 division

32/8 division

32/16 division

32/16 division, Speed Optimized

Rounded 16/8 division

Navigation menu

Views

Personal tools

Navigation

Search

Tools

@@ Line 4: / Line 4: @@
 == Introduction ==
-All these routines use the restoring divison algorithm, adapted to the z80 architecture to maximize speed.
+All these routines use the restoring division algorithm, adapted to the z80 architecture to maximize speed.
 They can easily be unrolled to gain some speed.
 == 8/8 division ==
-The following routine multiplies d by e and places the quotient in d and the remainder in a
+The following routine divides d by e and places the quotient in d and the remainder in a
   <nowiki>
@@ Line 31: / Line 31: @@
 == 16/8 division ==
-The following routine multiplies hl by c and places the quotient in hl and the remainder in a
+The following routine divides hl by c and places the quotient in hl and the remainder in a
   <nowiki>
@@ Line 41: / Line 41: @@
     add	hl, hl
     rla
+   jr	c, $+5
     cp	c
     jr	c, $+4
     sub	c
     inc	l
@@ Line 87: / Line 89: @@
     rl	e
     rla
+   jr	c, $+5
     cp	d
     jr	c, $+4
     sub	d
     inc	l
@@ Line 111: / Line 115: @@
     rl	d
     rla
+   jr	c, $+5
     cp	c
     jr	c, $+4
     sub	c
     inc	l
@@ Line 118: / Line 124: @@
     djnz	_loop
+   ret
+ </nowiki>
+== 32/16 division ==
+The following routine divides acix by de and places the quotient in acix and the remainder in hl
+ <nowiki>
+Div32By16:
+; IN:	ACIX=dividend, DE=divisor
+; OUT:	ACIX=quotient, DE=divisor, HL=remainder, B=0
+	ld	hl,0
+	ld	b,32
+Div32By16_Loop:
+	add	ix,ix
+	rl	c
+	rla
+	adc	hl,hl
+	jr	c,Div32By16_Overflow
+	sbc	hl,de
+	jr	nc,Div32By16_SetBit
+	add	hl,de
+	djnz	Div32By16_Loop
+	ret
+Div32By16_Overflow:
+	or	a
+	sbc	hl,de
+Div32By16_SetBit:
+	.db	$DD,$2C		; inc ixl, change to inc ix to avoid undocumented
+	djnz	Div32By16_Loop
+	ret
+ </nowiki>
+== 32/16 division, Speed Optimized ==
+At the cost of 30 bytes (exactly double the size), we can save 600cc on average, a 19.5% speed up. However, the inputs are slightly different here!
+ <nowiki>
+div32_16:
+;DEIX/BC -> HLIX remainder DE
+;170+4*div32_16_sub8
+;min: 2182cc
+;max: 2790cc
+;avg: 2462cc
+;60 bytes
+; Negate BC to allow add instead of sbc
+  xor a      ; 4
+; Need to set HL to 0 anyways, so save 2cc and a byte
+  ld h,a     ; 4
+  ld l,a     ; 4
+  sub c      ; 4
+  ld c,a     ; 4
+  sbc a,a    ; 4
+  sub b      ; 4
+  ld b,a     ; 4
+  ld a,d              ; 4
+  call div32_16_sub8  ; 17
+  rla                 ; 4
+  ld d,a              ; 4
+  ld a,e              ; 4
+  call div32_16_sub8  ; 17
+  rla                 ; 4
+  ld e,a              ; 4
+  ld a,ixh            ; 8
+  call div32_16_sub8  ; 17
+  rla                 ; 4
+  ld ixh,a            ; 8
+  ld a,ixl            ; 8
+  call div32_16_sub8  ; 17
+  rla                 ; 4
+  ld ixl,a            ; 8
+  ex de,hl   ; 4
+  ret        ; 10
+div32_16_sub8:
+;119+8*div32_16_sub
+;min: 503cc
+;max: 655cc
+;avg: 573cc
+  call +_
+_:
+;17+2(17+2(div32_16_sub)))
+  call +_
+_:
+;17+2(div32_16_sub)
+  call div32_16_sub
+div32_16_sub:
+;48+{8,0+{0,19}}
+;min: 48cc
+;max: 67cc
+;avg: 56.75cc
+  rla        ; 4
+  adc hl,hl  ; 15
+  jr c,+_    ;12/7
+  add hl,bc  ; 11
+  ret c      ;11/5
+  sbc hl,bc  ; 15
+  ret        ; 10
+_:
+  add hl,bc  ; 11
+  scf        ; 4
+  ret        ; 10
+ </nowiki>
+== Rounded 16/8 division ==
+The following routine divides hl by c and places the rounded quotient in hl and twice the prerounded remainder in a.
+ <nowiki>
+RoundHL_Div_C:
+   xor	a
+   ld	b, 16
+_loop:
+   add	hl, hl
+   rla
+   jr	c, $+5
+   cp	c
+   jr	c, $+4
+   sub	c
+   inc	l
+   djnz	_loop
+;This part is the rounding
+   add a,a
+   cp	c
+   ret	c
+   inc hl
     ret
   </nowiki>