Difference between revisions of "Z80 Routines:Math:Division"
 (Fixed 24/8 and 32/8.)  | 
				 (Added an optimized 32/16)  | 
				||
| (2 intermediate revisions by one other user not shown) | |||
| Line 154: | Line 154: | ||
	djnz	Div32By16_Loop  | 	djnz	Div32By16_Loop  | ||
	ret  | 	ret  | ||
| + |  </nowiki>  | ||
| + | |||
| + | == 32/16 division, Speed Optimized ==  | ||
| + | |||
| + | At the cost of 30 bytes (exactly double the size), we can save 600cc on average, a 19.5% speed up. However, the inputs are slightly different here!  | ||
| + | |||
| + |  <nowiki>  | ||
| + | div32_16:  | ||
| + | ;DEIX/BC -> HLIX remainder DE  | ||
| + | ;170+4*div32_16_sub8  | ||
| + | ;min: 2182cc  | ||
| + | ;max: 2790cc  | ||
| + | ;avg: 2462cc  | ||
| + | ;60 bytes  | ||
| + | |||
| + | ; Negate BC to allow add instead of sbc  | ||
| + |   xor a      ; 4  | ||
| + | ; Need to set HL to 0 anyways, so save 2cc and a byte  | ||
| + |   ld h,a     ; 4  | ||
| + |   ld l,a     ; 4  | ||
| + |   sub c      ; 4  | ||
| + |   ld c,a     ; 4  | ||
| + |   sbc a,a    ; 4  | ||
| + |   sub b      ; 4  | ||
| + |   ld b,a     ; 4  | ||
| + | |||
| + | |||
| + |   ld a,d              ; 4  | ||
| + |   call div32_16_sub8  ; 17  | ||
| + |   rla                 ; 4  | ||
| + |   ld d,a              ; 4  | ||
| + | |||
| + |   ld a,e              ; 4  | ||
| + |   call div32_16_sub8  ; 17  | ||
| + |   rla                 ; 4  | ||
| + |   ld e,a              ; 4  | ||
| + | |||
| + |   ld a,ixh            ; 8  | ||
| + |   call div32_16_sub8  ; 17  | ||
| + |   rla                 ; 4  | ||
| + |   ld ixh,a            ; 8  | ||
| + | |||
| + |   ld a,ixl            ; 8  | ||
| + |   call div32_16_sub8  ; 17  | ||
| + |   rla                 ; 4  | ||
| + |   ld ixl,a            ; 8  | ||
| + | |||
| + |   ex de,hl   ; 4  | ||
| + |   ret        ; 10  | ||
| + | |||
| + | div32_16_sub8:  | ||
| + | ;119+8*div32_16_sub  | ||
| + | ;min: 503cc  | ||
| + | ;max: 655cc  | ||
| + | ;avg: 573cc  | ||
| + |   call +_  | ||
| + | _:  | ||
| + | ;17+2(17+2(div32_16_sub)))  | ||
| + |   call +_  | ||
| + | _:  | ||
| + | ;17+2(div32_16_sub)  | ||
| + |   call div32_16_sub  | ||
| + | div32_16_sub:  | ||
| + | ;48+{8,0+{0,19}}  | ||
| + | ;min: 48cc  | ||
| + | ;max: 67cc  | ||
| + | ;avg: 56.75cc  | ||
| + |   rla        ; 4  | ||
| + |   adc hl,hl  ; 15  | ||
| + |   jr c,+_    ;12/7  | ||
| + |   add hl,bc  ; 11  | ||
| + |   ret c      ;11/5  | ||
| + |   sbc hl,bc  ; 15  | ||
| + |   ret        ; 10  | ||
| + | _:  | ||
| + |   add hl,bc  ; 11  | ||
| + |   scf        ; 4  | ||
| + |   ret        ; 10  | ||
  </nowiki>  |   </nowiki>  | ||
| Line 168: | Line 246: | ||
    add	hl, hl  |     add	hl, hl  | ||
    rla  |     rla  | ||
| + |    jr	c, $+5  | ||
    cp	c  |     cp	c  | ||
    jr	c, $+4  |     jr	c, $+4  | ||
Latest revision as of 15:18, 30 September 2019
Contents
Introduction
All these routines use the restoring division algorithm, adapted to the z80 architecture to maximize speed. They can easily be unrolled to gain some speed.
8/8 division
The following routine divides d by e and places the quotient in d and the remainder in a
div_d_e: xor a ld b, 8 _loop: sla d rla cp e jr c, $+4 sub e inc d djnz _loop ret
16/8 division
The following routine divides hl by c and places the quotient in hl and the remainder in a
div_hl_c: xor a ld b, 16 _loop: add hl, hl rla jr c, $+5 cp c jr c, $+4 sub c inc l djnz _loop ret
16/16 division
The following routine divides ac by de and places the quotient in ac and the remainder in hl
div_ac_de: ld hl, 0 ld b, 16 _loop: sll c rla adc hl, hl sbc hl, de jr nc, $+4 add hl, de dec c djnz _loop ret
24/8 division
The following routine divides ehl by d and places the quotient in ehl and the remainder in a
div_ehl_d: xor a ld b, 24 _loop: add hl, hl rl e rla jr c, $+5 cp d jr c, $+4 sub d inc l djnz _loop ret
32/8 division
The following routine divides dehl by c and places the quotient in dehl and the remainder in a
div_dehl_c: xor a ld b, 32 _loop: add hl, hl rl e rl d rla jr c, $+5 cp c jr c, $+4 sub c inc l djnz _loop ret
32/16 division
The following routine divides acix by de and places the quotient in acix and the remainder in hl
Div32By16: ; IN: ACIX=dividend, DE=divisor ; OUT: ACIX=quotient, DE=divisor, HL=remainder, B=0 ld hl,0 ld b,32 Div32By16_Loop: add ix,ix rl c rla adc hl,hl jr c,Div32By16_Overflow sbc hl,de jr nc,Div32By16_SetBit add hl,de djnz Div32By16_Loop ret Div32By16_Overflow: or a sbc hl,de Div32By16_SetBit: .db $DD,$2C ; inc ixl, change to inc ix to avoid undocumented djnz Div32By16_Loop ret
32/16 division, Speed Optimized
At the cost of 30 bytes (exactly double the size), we can save 600cc on average, a 19.5% speed up. However, the inputs are slightly different here!
div32_16:
;DEIX/BC -> HLIX remainder DE
;170+4*div32_16_sub8
;min: 2182cc
;max: 2790cc
;avg: 2462cc
;60 bytes
; Negate BC to allow add instead of sbc
  xor a      ; 4
; Need to set HL to 0 anyways, so save 2cc and a byte
  ld h,a     ; 4
  ld l,a     ; 4
  sub c      ; 4
  ld c,a     ; 4
  sbc a,a    ; 4
  sub b      ; 4
  ld b,a     ; 4
  ld a,d              ; 4
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld d,a              ; 4
  ld a,e              ; 4
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld e,a              ; 4
  ld a,ixh            ; 8
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld ixh,a            ; 8
  ld a,ixl            ; 8
  call div32_16_sub8  ; 17
  rla                 ; 4
  ld ixl,a            ; 8
  ex de,hl   ; 4
  ret        ; 10
div32_16_sub8:
;119+8*div32_16_sub
;min: 503cc
;max: 655cc
;avg: 573cc
  call +_
_:
;17+2(17+2(div32_16_sub)))
  call +_
_:
;17+2(div32_16_sub)
  call div32_16_sub
div32_16_sub:
;48+{8,0+{0,19}}
;min: 48cc
;max: 67cc
;avg: 56.75cc
  rla        ; 4
  adc hl,hl  ; 15
  jr c,+_    ;12/7
  add hl,bc  ; 11
  ret c      ;11/5
  sbc hl,bc  ; 15
  ret        ; 10
_:
  add hl,bc  ; 11
  scf        ; 4
  ret        ; 10
 
Rounded 16/8 division
The following routine divides hl by c and places the rounded quotient in hl and twice the prerounded remainder in a.
RoundHL_Div_C: xor a ld b, 16 _loop: add hl, hl rla jr c, $+5 cp c jr c, $+4 sub c inc l djnz _loop ;This part is the rounding add a,a cp c ret c inc hl ret