Difference between revisions of "Z80 Routines:Math:Division"

From WikiTI
Jump to: navigation, search
(added categorization)
(Added an optimized 32/16)
 
(8 intermediate revisions by 4 users not shown)
Line 4: Line 4:
 
== Introduction ==
 
== Introduction ==
  
All these routines use the restoring divison algorithm, adapted to the z80 architecture to maximize speed.
+
All these routines use the restoring division algorithm, adapted to the z80 architecture to maximize speed.
 
They can easily be unrolled to gain some speed.
 
They can easily be unrolled to gain some speed.
  
 
== 8/8 division ==
 
== 8/8 division ==
  
The following routine multiplies d by e and places the quotient in d and the remainder in a
+
The following routine divides d by e and places the quotient in d and the remainder in a
  
 
  <nowiki>
 
  <nowiki>
Line 31: Line 31:
 
== 16/8 division ==
 
== 16/8 division ==
  
The following routine multiplies hl by c and places the quotient in hl and the remainder in a
+
The following routine divides hl by c and places the quotient in hl and the remainder in a
  
 
  <nowiki>
 
  <nowiki>
Line 41: Line 41:
 
   add hl, hl
 
   add hl, hl
 
   rla
 
   rla
 +
  jr c, $+5
 
   cp c
 
   cp c
 
   jr c, $+4
 
   jr c, $+4
 +
 
   sub c
 
   sub c
 
   inc l
 
   inc l
Line 87: Line 89:
 
   rl e
 
   rl e
 
   rla
 
   rla
 +
  jr c, $+5
 
   cp d
 
   cp d
 
   jr c, $+4
 
   jr c, $+4
 +
 
   sub d
 
   sub d
 
   inc l
 
   inc l
Line 111: Line 115:
 
   rl d
 
   rl d
 
   rla
 
   rla
 +
  jr c, $+5
 
   cp c
 
   cp c
 
   jr c, $+4
 
   jr c, $+4
 +
 
   sub c
 
   sub c
 
   inc l
 
   inc l
Line 118: Line 124:
 
   djnz _loop
 
   djnz _loop
 
    
 
    
 +
  ret
 +
</nowiki>
 +
== 32/16 division ==
 +
 +
The following routine divides acix by de and places the quotient in acix and the remainder in hl
 +
 +
<nowiki>
 +
Div32By16:
 +
; IN: ACIX=dividend, DE=divisor
 +
; OUT: ACIX=quotient, DE=divisor, HL=remainder, B=0
 +
ld hl,0
 +
ld b,32
 +
Div32By16_Loop:
 +
add ix,ix
 +
rl c
 +
rla
 +
adc hl,hl
 +
jr c,Div32By16_Overflow
 +
sbc hl,de
 +
jr nc,Div32By16_SetBit
 +
add hl,de
 +
djnz Div32By16_Loop
 +
ret
 +
Div32By16_Overflow:
 +
or a
 +
sbc hl,de
 +
Div32By16_SetBit:
 +
.db $DD,$2C ; inc ixl, change to inc ix to avoid undocumented
 +
djnz Div32By16_Loop
 +
ret
 +
</nowiki>
 +
 +
== 32/16 division, Speed Optimized ==
 +
 +
At the cost of 30 bytes (exactly double the size), we can save 600cc on average, a 19.5% speed up. However, the inputs are slightly different here!
 +
 +
<nowiki>
 +
div32_16:
 +
;DEIX/BC -> HLIX remainder DE
 +
;170+4*div32_16_sub8
 +
;min: 2182cc
 +
;max: 2790cc
 +
;avg: 2462cc
 +
;60 bytes
 +
 +
; Negate BC to allow add instead of sbc
 +
  xor a      ; 4
 +
; Need to set HL to 0 anyways, so save 2cc and a byte
 +
  ld h,a    ; 4
 +
  ld l,a    ; 4
 +
  sub c      ; 4
 +
  ld c,a    ; 4
 +
  sbc a,a    ; 4
 +
  sub b      ; 4
 +
  ld b,a    ; 4
 +
 +
 +
  ld a,d              ; 4
 +
  call div32_16_sub8  ; 17
 +
  rla                ; 4
 +
  ld d,a              ; 4
 +
 +
  ld a,e              ; 4
 +
  call div32_16_sub8  ; 17
 +
  rla                ; 4
 +
  ld e,a              ; 4
 +
 +
  ld a,ixh            ; 8
 +
  call div32_16_sub8  ; 17
 +
  rla                ; 4
 +
  ld ixh,a            ; 8
 +
 +
  ld a,ixl            ; 8
 +
  call div32_16_sub8  ; 17
 +
  rla                ; 4
 +
  ld ixl,a            ; 8
 +
 +
  ex de,hl  ; 4
 +
  ret        ; 10
 +
 +
div32_16_sub8:
 +
;119+8*div32_16_sub
 +
;min: 503cc
 +
;max: 655cc
 +
;avg: 573cc
 +
  call +_
 +
_:
 +
;17+2(17+2(div32_16_sub)))
 +
  call +_
 +
_:
 +
;17+2(div32_16_sub)
 +
  call div32_16_sub
 +
div32_16_sub:
 +
;48+{8,0+{0,19}}
 +
;min: 48cc
 +
;max: 67cc
 +
;avg: 56.75cc
 +
  rla        ; 4
 +
  adc hl,hl  ; 15
 +
  jr c,+_    ;12/7
 +
  add hl,bc  ; 11
 +
  ret c      ;11/5
 +
  sbc hl,bc  ; 15
 +
  ret        ; 10
 +
_:
 +
  add hl,bc  ; 11
 +
  scf        ; 4
 +
  ret        ; 10
 +
</nowiki>
 +
 +
== Rounded 16/8 division ==
 +
 +
The following routine divides hl by c and places the rounded quotient in hl and twice the prerounded remainder in a.
 +
 +
<nowiki>
 +
RoundHL_Div_C:
 +
  xor a
 +
  ld b, 16
 +
 +
_loop:
 +
  add hl, hl
 +
  rla
 +
  jr c, $+5
 +
  cp c
 +
  jr c, $+4
 +
  sub c
 +
  inc l 
 +
  djnz _loop
 +
;This part is the rounding
 +
  add a,a
 +
  cp c
 +
  ret c
 +
  inc hl
 
   ret
 
   ret
 
  </nowiki>
 
  </nowiki>

Latest revision as of 16:18, 30 September 2019


Introduction

All these routines use the restoring division algorithm, adapted to the z80 architecture to maximize speed. They can easily be unrolled to gain some speed.

8/8 division

The following routine divides d by e and places the quotient in d and the remainder in a

div_d_e:
   xor	a
   ld	b, 8

_loop:
   sla	d
   rla
   cp	e
   jr	c, $+4
   sub	e
   inc	d
   
   djnz	_loop
   
   ret
 

16/8 division

The following routine divides hl by c and places the quotient in hl and the remainder in a

div_hl_c:
   xor	a
   ld	b, 16

_loop:
   add	hl, hl
   rla
   jr	c, $+5
   cp	c
   jr	c, $+4

   sub	c
   inc	l
   
   djnz	_loop
   
   ret
 

16/16 division

The following routine divides ac by de and places the quotient in ac and the remainder in hl

div_ac_de:
   ld	hl, 0
   ld	b, 16

_loop:
   sll	c
   rla
   adc	hl, hl
   sbc	hl, de
   jr	nc, $+4
   add	hl, de
   dec	c
   
   djnz	_loop
   
   ret
 

24/8 division

The following routine divides ehl by d and places the quotient in ehl and the remainder in a

div_ehl_d:
   xor	a
   ld	b, 24

_loop:
   add	hl, hl
   rl	e
   rla
   jr	c, $+5
   cp	d
   jr	c, $+4

   sub	d
   inc	l
   
   djnz	_loop
   
   ret
 

32/8 division

The following routine divides dehl by c and places the quotient in dehl and the remainder in a

div_dehl_c:
   xor	a
   ld	b, 32

_loop:
   add	hl, hl
   rl	e
   rl	d
   rla
   jr	c, $+5
   cp	c
   jr	c, $+4

   sub	c
   inc	l
   
   djnz	_loop
   
   ret
 

32/16 division

The following routine divides acix by de and places the quotient in acix and the remainder in hl

Div32By16:
; IN:	ACIX=dividend, DE=divisor
; OUT:	ACIX=quotient, DE=divisor, HL=remainder, B=0
	ld	hl,0
	ld	b,32
Div32By16_Loop:
	add	ix,ix
	rl	c
	rla
	adc	hl,hl
	jr	c,Div32By16_Overflow
	sbc	hl,de
	jr	nc,Div32By16_SetBit
	add	hl,de
	djnz	Div32By16_Loop
	ret
Div32By16_Overflow:
	or	a
	sbc	hl,de
Div32By16_SetBit:
	.db	$DD,$2C		; inc ixl, change to inc ix to avoid undocumented
	djnz	Div32By16_Loop
	ret
 

32/16 division, Speed Optimized

At the cost of 30 bytes (exactly double the size), we can save 600cc on average, a 19.5% speed up. However, the inputs are slightly different here!

div32_16:
;DEIX/BC -> HLIX remainder DE
;170+4*div32_16_sub8
;min: 2182cc
;max: 2790cc
;avg: 2462cc
;60 bytes

; Negate BC to allow add instead of sbc
  xor a      ; 4
; Need to set HL to 0 anyways, so save 2cc and a byte
  ld h,a     ; 4
  ld l,a     ; 4
  sub c      ; 4
  ld c,a     ; 4
  sbc a,a    ; 4
  sub b      ; 4
  ld b,a     ; 4


  ld a,d              ; 4
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld d,a              ; 4

  ld a,e              ; 4
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld e,a              ; 4

  ld a,ixh            ; 8
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld ixh,a            ; 8

  ld a,ixl            ; 8
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld ixl,a            ; 8

  ex de,hl   ; 4
  ret        ; 10

div32_16_sub8:
;119+8*div32_16_sub
;min: 503cc
;max: 655cc
;avg: 573cc
  call +_
_:
;17+2(17+2(div32_16_sub)))
  call +_
_:
;17+2(div32_16_sub)
  call div32_16_sub
div32_16_sub:
;48+{8,0+{0,19}}
;min: 48cc
;max: 67cc
;avg: 56.75cc
  rla        ; 4
  adc hl,hl  ; 15
  jr c,+_    ;12/7
  add hl,bc  ; 11
  ret c      ;11/5
  sbc hl,bc  ; 15
  ret        ; 10
_:
  add hl,bc  ; 11
  scf        ; 4
  ret        ; 10
 

Rounded 16/8 division

The following routine divides hl by c and places the rounded quotient in hl and twice the prerounded remainder in a.

RoundHL_Div_C:
   xor	a
   ld	b, 16

_loop:
   add	hl, hl
   rla
   jr	c, $+5
   cp	c
   jr	c, $+4
   sub	c
   inc	l   
   djnz	_loop
;This part is the rounding
   add a,a
   cp	c
   ret	c
   inc hl
   ret