Z80 Routines:Math:Square root

From WikiTI
Revision as of 13:13, 20 July 2006 by Jim e (Talk | contribs)

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search


Size Optimization

This version is size optimized, it compares every perfect square against HL until a square that is larger is found. Obviously slower, but does get the job done in only 12 bytes.

;-------------------------------
;Square Root
;Inputs:
;HL = number to be square rooted
;Outputs:
;A  = square root

sqrt:
   ld a,$ff
   ld de,1
sqrtloop:
   inc a
   dec e
   dec de
   add hl,de
   jr c,sqrtloop
   ret 


Speed Optimization

This version uses the high school method of finding a square root and so it is much faster, running at about ~850 tstates. Unfortunately it requires 180 bytes and is quite obfuscated.

;-------------------------------
;Square Root
;Inputs:
;DE = number to be square rooted
;Outputs:
;A  = square root

sqrt:
    xor a
    ld h,a
    ld l,a
    ld b,a
    rl d
    rl l
    rl d
    rl l
    ld c,1
    sbc hl,bc
    jp c,$+3+2+1
    sbc hl,bc
    inc a
    add hl,bc
    add a,a
    rl d
    rl l
    rl d
    rl l
    ld c,a
    scf
    rl c
    sbc hl,bc
    jp c,$+3+2+1
    sbc hl,bc
    inc a
    add hl,bc
    add a,a
    rl d
    rl l
    rl d
    rl l
    ld c,a
    scf
    rl c
    sbc hl,bc
    jp c,$+3+2+1
    sbc hl,bc
    inc a
    add hl,bc
    add a,a
    rl d
    rl l
    rl d
    rl l
    ld c,a
    scf
    rl c
    sbc hl,bc
    jp c,$+3+2+1
    sbc hl,bc
    inc a
    add hl,bc
    add a,a
    rl e
    adc hl,hl
    rl e
    adc hl,hl
    ld c,a
    scf
    rl c
    sbc hl,bc
    jp c,$+3+2+1
    sbc hl,bc
    inc a
    add hl,bc
    add a,a
    rl e
    adc hl,hl
    rl e
    adc hl,hl
    ld c,a
    scf
    rl c
    sbc hl,bc
    jp c,$+3+2+1
    sbc hl,bc
    inc a
    add hl,bc
    add a,a
    rl e
    adc hl,hl
    rl e
    adc hl,hl
    ld c,a
    scf
    rl c
    sbc hl,bc
    jp c,$+3+2+1
    sbc hl,bc
    inc a
    add hl,bc
    add a,a
    rl e
    adc hl,hl
    rl e
    adc hl,hl
    ld c,a
    scf
    rl c
    rl b
    sbc hl,bc
    jp c,$+3+2+1
    sbc hl,bc
    inc a
    add hl,bc
    ret


Balanced Optimization

This version is a balance between speed and size. It also uses the high school method and runs under 1200 tstates. It only costs 41 bytes.

;-------------------------------
;Square Root
;Inputs:
;DE = number to be square rooted
;Outputs:
;A  = square root

Sqrt:
    ld hl,0
    ld c,l
    ld b,h
    ld a,8
Sqrtloop:
    sla e
    rl d
    adc hl,hl
    sla e
    rl d
    adc hl,hl
    scf               ;Can be optimised
    rl c              ;with SL1 instruction
    rl b
    sbc hl,bc
    jr nc,Sqrtaddbit
    add hl,bc
    dec c
Sqrtaddbit:
    inc c
    res 0,c
    dec a
    jr nz,Sqrtloop
    ld a,c
    rr b
    rra
    ret


Other Options

A binary search of a square table would yield much better best case scenarios and the worst case scenarios would be similar to the high school method. However this would also require 512 byte table making it significantly larger than the other routines. Of course the table could also serve as a rapid squaring method.

Credits and Contributions

  • James Montelongo