Difference between revisions of "Z80 Routines:Math:Advance Math"

From WikiTI
Jump to: navigation, search
m ((using real name for credits))
 
(2 intermediate revisions by the same user not shown)
Line 8: Line 8:
 
=== nCr Algorithm ===
 
=== nCr Algorithm ===
  
This computes "n choose r" using an algorithm that makes use of both shadow registers and other calls. This can very likely be optimised, so feel free to edit with a new version.
+
This computes "n choose r" in such a way as to avoid overflow unless the final result would overflow 16 bits.
  
 
  <nowiki>
 
  <nowiki>
;===============================================================
+
;Written by Zeda
nCrHL_DE:
+
 
;===============================================================
+
; Requires
;Inputs:
+
;    mul16          ;BC*DE ==> DEHL
;     hl is "n"
+
;    DEHL_Div_BC    ;DEHL/BC ==> DEHL
;     de is "r"
+
 
 +
ncr_HL_DE:
 +
;"n choose r", defined as n!/(r!(n-r)!)
 +
;Computes "HL choose DE"
 +
;Inputs: HL,DE
 
;Outputs:
 
;Outputs:
;     interrupts off
+
;   HL is the result
;    a is 0
+
;       "HL choose DE"
;    bc is an intermediate result
+
;   carry flag reset means overflow
;     de is "n"
+
;Destroys:
;     hl is the result
+
;   A,BC,DE,IX
;     a' is not changed
+
;Notes:
;     bc' is "r"+1
+
;  Overflow is returned as 0
;     de' is the same as bc
+
;   Overflow happens if HL choose DE exceeds 65535
;     hl' is "r" or the compliment, whichever is smaller
+
;  This algorithm is constructed in such a way that intermediate
;===============================================================
+
;   operations won't erroneously trigger overflow.
    or a                     ;reset carry flag
+
;66 bytes
    sbc hl,de
+
  ld bc,1
    ret c                   ;r should not be bigger than n
+
  or a
    sbc hl,de \ add hl,de
+
  sbc hl,de
    jr nc,$+3
+
  jr c,ncr_oob
      ex de,hl
+
  jr z,ncr_exit
                            ;hl is R
+
  sbc hl,de
    push de
+
  add hl,de
    ld bc,1                ;A
+
  jr c,$+3
    exx
+
  ex de,hl
    pop de                  ;N
+
  ld a,h
    ld bc,1                ;C
+
  or l
    ld h,b \ ld l,c         ;D
+
  push hl
nCrLoop:
+
  pop ix
    push de
+
ncr_exit:
    push hl
+
  ld h,b
    call DE_Times_BC        ;Returns BC unchanged, DEHL is the product
+
  ld l,c
    push hl \ exx \ pop de
+
  scf
    push hl
+
  ret z
    call DE_Div_BC          ;Returns HL is the quotient, BC is not changed
+
ncr_loop:
    pop de
+
  push bc \ push de
    push hl \ ex de,hl \ exx \ pop hl
+
  push hl \ push bc
    ld b,h \ ld c,l
+
  ld b,h
    pop de \ add hl,de
+
  ld c,l
    pop de \ inc de
+
  call mul16          ;BC*DE ==> DEHL
    exx
+
  pop bc
    inc bc
+
  call DEHL_Div_BC    ;result in DEHL
    or a \ sbc hl,bc \ add hl,bc
+
  ld a,d
    exx
+
  or e
    jr nc,nCrLoop
+
  pop bc
    ret
+
  pop de
 +
  jr nz,ncr_overflow
 +
  add hl,bc
 +
  jr c,ncr_overflow
 +
  pop bc
 +
  inc bc
 +
  ld a,b
 +
  cp ixh
 +
  jr c,ncr_loop
 +
  ld a,ixl
 +
  cp c
 +
  jr nc,ncr_loop
 +
  ret
 +
ncr_overflow:
 +
  pop bc
 +
  xor a
 +
  ld b,a
 +
ncr_oob:
 +
  ld h,b
 +
  ld l,b
 +
  ret
 +
 
 
  </nowiki>
 
  </nowiki>
  
=== GCDHL_BC ===
+
=== GCDHL_DE ===
  
This finds the greatest common divisor (GCD) of HL and BC.
+
This finds the greatest common divisor (GCD) of HL and DE using the binary GCD algorithm to improve performance.
 
  <nowiki>
 
  <nowiki>
GCDHL_BC:
+
gcdHL_DE:
;Inputs:
+
;gcd(HL,DE)->HL
;    HL is a number
+
;Output:
;     BC is a number
+
;   B=0
;Outputs:
+
;   HL is the GCD of the inputs
;     A is 0
+
;     BC is the GCD
+
;    DE is 0
+
 
;Destroys:
 
;Destroys:
;     HL
+
;   A,DE
;Size:  25 bytes
+
;     DE is guaranteed 0 unless the output is 0 (which only happens if one of the inputs is 0).
;Speed: 30 to 49708 cycles
+
;Uses the binary GCD algorithm
;       -As slow as about 126 times per second at 6MHz
+
;65 bytes
;      -As fast as about 209715 times per second at 6MHz
+
 
;Speed break down:
+
;B is our cofactor-of-2 counter
;     If HL=BC, 30 cycles
+
     ld b,0
;    24+1552x
+
 
;     If BC>HL, add 20 cycles
+
;If HL=0, return 0
;     *x is from 1 to at most 32 (because we use 2 16-bit numbers)
+
     ld a,h \ or l \ ret z
;
+
 
    or a \ sbc hl,bc    ;B7ED42    19
+
;If DE=0, return 0
    ret z               ;C8        5|11
+
    ex de,hl
    add hl,bc            ;09        11
+
    ld a,h \ or l \ jr nz,gcd_test_cofactor_of_2
    jr nc,$+8            ;3006      11|31
+
    ret
      ld a,h             ;7C        --
+
 
      ld h,b             ;60        --
+
gcd_cofactor_2_loop:
      ld b,a            ;47        --
+
    inc b
      ld a,l             ;7D        --
+
    srl h \ rr l
      ld l,c            ;69        --
+
    srl d \ rr e
      ld c,a            ;4F        --
+
gcd_test_cofactor_of_2:
Loop:
+
    inc b
    call HL_Div_BC      ;CD****    1511    returns BC unchanged, DE is the remainder
+
    ld a,e
    ld a,d \ or e        ;7AB2      8
+
    or l
    ret z                ;C8        5|11
+
    rra
    ld h,b \ ld l,c      ;6069      8
+
    jr nc,gcd_cofactor_2_loop
    ld b,d \ ld c,e      ;424B      8
+
 
    jr $-10              ;18F8      12
+
gcd_remove_factors_of_2_op2:
 +
    srl h \ rr l \ jr nc,gcd_remove_factors_of_2_op2
 +
    adc hl,hl
 +
    jr gcd_swap_ops
 +
 
 +
gcd_swap_ops_negate:
 +
;At this point, HL needs to be negated and swapped with DE
 +
    xor a \ sub l \ ld l,a \ sbc a,a \ sub h \ ld h,a
 +
gcd_swap_ops:
 +
    ex de,hl
 +
gcd_remove_factors_of_2_op1:
 +
    srl h \ rr l \ jr nc,gcd_remove_factors_of_2_op1
 +
    adc hl,hl
 +
    sbc hl,de
 +
    jr c,gcd_swap_ops_negate
 +
    jp nz,gcd_remove_factors_of_2_op1
 +
 
 +
;DE is the GCD, need to shift it left B-1 times.
 +
    ex de,hl
 +
    dec b
 +
    ret z
 +
    add hl,hl \ djnz $-1
 +
    ret
 
  </nowiki>
 
  </nowiki>
  
Line 109: Line 153:
  
 
== Credits and Contributions ==
 
== Credits and Contributions ==
* '''Zeda (Xeda) Elnara''' for the nCr and GCD algorithm
+
* '''Zeda Thomas''' for the nCr and GCD algorithm

Latest revision as of 16:08, 30 September 2019


Introduction

These are routines designed for math of a slightly higher level. These don't necessarily contribute to everyday coding, but might be useful for an OS that handles such math (or programming language).

nCr Algorithm

This computes "n choose r" in such a way as to avoid overflow unless the final result would overflow 16 bits.

;Written by Zeda

; Requires
;    mul16          ;BC*DE ==> DEHL
;    DEHL_Div_BC    ;DEHL/BC ==> DEHL

ncr_HL_DE:
;"n choose r", defined as n!/(r!(n-r)!)
;Computes "HL choose DE"
;Inputs: HL,DE
;Outputs:
;   HL is the result
;       "HL choose DE"
;   carry flag reset means overflow
;Destroys:
;   A,BC,DE,IX
;Notes:
;   Overflow is returned as 0
;   Overflow happens if HL choose DE exceeds 65535
;   This algorithm is constructed in such a way that intermediate
;   operations won't erroneously trigger overflow.
;66 bytes
  ld bc,1
  or a
  sbc hl,de
  jr c,ncr_oob
  jr z,ncr_exit
  sbc hl,de
  add hl,de
  jr c,$+3
  ex de,hl
  ld a,h
  or l
  push hl
  pop ix
ncr_exit:
  ld h,b
  ld l,c
  scf
  ret z
ncr_loop:
  push bc \ push de
  push hl \ push bc
  ld b,h
  ld c,l
  call mul16          ;BC*DE ==> DEHL
  pop bc
  call DEHL_Div_BC    ;result in DEHL
  ld a,d
  or e
  pop bc
  pop de
  jr nz,ncr_overflow
  add hl,bc
  jr c,ncr_overflow
  pop bc
  inc bc
  ld a,b
  cp ixh
  jr c,ncr_loop
  ld a,ixl
  cp c
  jr nc,ncr_loop
  ret
ncr_overflow:
  pop bc
  xor a
  ld b,a
ncr_oob:
  ld h,b
  ld l,b
  ret

 

GCDHL_DE

This finds the greatest common divisor (GCD) of HL and DE using the binary GCD algorithm to improve performance.

gcdHL_DE:
;gcd(HL,DE)->HL
;Output:
;   B=0
;   HL is the GCD of the inputs
;Destroys:
;   A,DE
;     DE is guaranteed 0 unless the output is 0 (which only happens if one of the inputs is 0).
;Uses the binary GCD algorithm
;65 bytes

;B is our cofactor-of-2 counter
    ld b,0

;If HL=0, return 0
    ld a,h \ or l \ ret z

;If DE=0, return 0
    ex de,hl
    ld a,h \ or l \ jr nz,gcd_test_cofactor_of_2
    ret

gcd_cofactor_2_loop:
    inc b
    srl h \ rr l
    srl d \ rr e
gcd_test_cofactor_of_2:
    inc b
    ld a,e
    or l
    rra
    jr nc,gcd_cofactor_2_loop

gcd_remove_factors_of_2_op2:
    srl h \ rr l \ jr nc,gcd_remove_factors_of_2_op2
    adc hl,hl
    jr gcd_swap_ops

gcd_swap_ops_negate:
;At this point, HL needs to be negated and swapped with DE
    xor a \ sub l \ ld l,a \ sbc a,a \ sub h \ ld h,a
gcd_swap_ops:
    ex de,hl
gcd_remove_factors_of_2_op1:
    srl h \ rr l \ jr nc,gcd_remove_factors_of_2_op1
    adc hl,hl
    sbc hl,de
    jr c,gcd_swap_ops_negate
    jp nz,gcd_remove_factors_of_2_op1

;DE is the GCD, need to shift it left B-1 times.
    ex de,hl
    dec b
    ret z
    add hl,hl \ djnz $-1
    ret
 

LCM

This is as simple as multiplying the two numbers and dividing by the GCD.


Credits and Contributions

  • Zeda Thomas for the nCr and GCD algorithm