Figure 3-40. 32-Bit Implementation of 64-Bit Unsigned Division Code Sequence
|
|
# (R3:R4) = (R3:R4) / (R5:R6) (64b) = (64b / 64b)
|
|
|
# quo dvd dvs
|
|
|
#
|
|
|
|
|
# Remainder is returned in R5:R6.
|
|
|
#
|
|
|
|
|
# Code comment notation:
|
|
|
# msw = most-significant (high-order) word, i.e. bits 0..31
|
|
|
# lsw = least-significant (low-order) word, i.e. bits 32..63
|
|
|
# LZ = Leading Zeroes
|
|
|
# SD = Significant Digits
|
|
|
#
|
|
|
|
|
# R3:R4 = dvd (input dividend); quo (output quotient)
|
|
|
# R5:R6 = dvs (input divisor); rem (output remainder)
|
|
|
#
|
|
|
|
|
# R7:R8 = tmp
|
|
|
|
|
|
|
|
# count the number of leading 0s in the dividend
|
|
|
cmpwi
|
cr0,R3,0
|
# dvd.msw == 0?
|
|
|
cntlzw
|
R0,R3
|
# R0 = dvd.msw.LZ
|
|
|
cntlzw
|
R9,R4
|
# R9 = dvd.lsw.LZ
|
|
|
bne
|
cr0,lab1
|
# if(dvd.msw == 0) dvd.LZ = dvd.msw.LZ
|
|
|
addi
|
R0,R9,32
|
# dvd.LZ = dvd.lsw.LZ + 32
|
|
|
|
|
|
|
lab1:
|
|
|
|
|
|
# count the number of leading 0s in the divisor
|
|
|
cmpwi
|
cr0,R5,0
|
# dvd.msw == 0?
|
|
|
cntlzw
|
R9,R5
|
# R9 = dvs.msw.LZ
|
|
|
cntlzw
|
R10,R6
|
# R10 = dvs.lsw.LZ
|
|
|
bne
|
cr0,lab2
|
# if(dvs.msw == 0) dvs.LZ = dvs.msw.LZ
|
|
|
addi
|
R9,R10,32
|
# dvs.LZ = dvs.lsw.LZ + 32
|
|
|
|
|
|
|
lab2:
|
|
|
|
|
|
# determine shift amounts to minimize the number of iterations
|
|
|
cmpw
|
cr0,R0,R9
|
# compare dvd.LZ to dvs.LZ
|
|
|
subfic
|
R10,R0,64
|
# R10 = dvd.SD
|
|
|
bgt
|
cr0,lab9
|
# if(dvs > dvd) quotient = 0
|
|
|
addi
|
R9,R9,1
|
# ++dvs.LZ (or --dvs.SD)
|
|
|
subfic
|
R9,R9,64
|
# R9 = dvs.SD
|
|
|
add
|
R0,R0,R9
|
# (dvd.LZ + dvs.SD) = left shift of dvd for
|
|
|
|
|
# initial dvd
|
|
|
subf
|
R9,R9,R10
|
# (dvd.SD - dvs.SD) = right shift of dvd for
|
|
|
|
|
# initial tmp
|
|
|
mtctr
|
R9
|
# number of iterations = dvd.SD - dvs.SD
|
|
|
|
|
|
|
|
# R7:R8 = R3:R4 >> R9
|
|
|
cmpwi
|
cr0,R9,32
|
# compare R9 to 32
|
|
|
addi
|
R7,R9,-32
|
|
|
|
blt
|
cr0,lab3
|
# if(R9 < 32) jump to lab3
|
|
|
srw
|
R8,R3,R7
|
# tmp.lsw = dvd.msw >> (R9 - 32)
|
|
|
li
|
R7,0
|
# tmp.msw = 0
|
|
|
b
|
lab4
|
|
|
lab3:
|
|
|
|
|
|
srw
|
R8,R4,R9
|
# R8 = dvd.lsw >> R9
|
|
|
subfic
|
R7,R9,32
|
|
|
|
slw
|
R7,R3,R7
|
# R7 = dvd.msw << 32 - R9
|
|
|
or
|
R8,R8,R7
|
# tmp.lsw = R8 | R7
|
|
|
srw
|
R7,R3,R9
|
# tmp.msw = dvd.msw >> R9
|
|
|
|
|
|
|
lab4:
|
|
|
|
|
|
# R3:R4 = R3:R4 << R0
|
|
|
cmpwi
|
cr0,R0,32
|
# compare R0 to 32
|
|
|
addic
|
R9,R0,-32
|
|
|
|
blt
|
cr0,lab5
|
# if(R0 < 32) jump to lab5
|
|
|
slw
|
R3,R4,R9
|
# dvd.msw = dvd.lsw << R9
|
|
|
li
|
R4,0
|
# dvd.lsw = 0
|
|
|
b
|
lab6
|
|
|
lab5:
|
|
|
|
|
|
slw
|
R3,R3,R0
|
# R3 = dvd.msw << R0
|
|
|
subfic
|
R9,R0,32
|
|
|
|
srw
|
R9,R4,R9
|
# R9 = dvd.lsw >> 32 - R0
|
|
|
or
|
R3,R3,R9
|
# dvd.msw = R3 | R9
|
|
|
slw
|
R4,R4,R0
|
# dvd.lsw = dvd.lsw << R0
|
|
|
|
|
|
|
lab6:
|
|
|
|
|
|
# restoring division shift and subtract loop
|
|
|
li
|
R10,-1
|
# R10 = -1
|
|
|
addic
|
R7,R7,0
|
# clear carry bit before loop starts
|
|
lab7:
|
|
|
|
|
|
# tmp:dvd is considered one large register
|
|
|
# each portion is shifted left 1 bit by adding it to itself
|
|
|
# adde sums the carry from the previous and creates a new carry
|
|
|
adde
|
R4,R4,R4
|
# shift dvd.lsw left 1 bit
|
|
|
adde
|
R3,R3,R3
|
# shift dvd.msw to left 1 bit
|
|
|
adde
|
R8,R8,R8
|
# shift tmp.lsw to left 1 bit
|
|
|
adde
|
R7,R7,R7
|
# shift tmp.msw to left 1 bit
|
|
|
subfc
|
R0,R6,R8
|
# tmp.lsw - dvs.lsw
|
|
|
subfe.
|
R9,R5,R7
|
# tmp.msw - dvs.msw
|
|
|
blt
|
cr0,lab8
|
# if(result < 0) clear carry bit
|
|
|
mr
|
R8,R0
|
# move lsw
|
|
|
mr
|
R7,R9
|
# move msw
|
|
|
addic
|
R0,R10,1
|
# set carry bit
|
|
lab8:
|
|
|
|
|
|
bdnz
|
lab7
|
|
|
|
|
|
|
|
|
# write quotient and remainder
|
|
|
adde
|
R4,R4,R4
|
# quo.lsw (lsb = CA)
|
|
|
adde
|
R3,R3,R3
|
# quo.msw (lsb from lsw)
|
|
|
mr
|
R6,R8
|
# rem.lsw
|
|
|
mr
|
R5,R7
|
# rem.msw
|
|
|
blr
|
|
# return
|
|
lab9:
|
|
|
|
|
|
# Quotient is 0 (dvs > dvd)
|
|
|
mr
|
R6,R4
|
# rmd.lsw = dvd.lsw
|
|
|
mr
|
R5,R3
|
# rmd.msw = dvd.msw
|
|
|
li
|
R4,0
|
# dvd.lsw = 0
|
|
|
li
|
R3,0
|
# dvd.msw = 0
|
|
|
blr
|
|
# return
|