#include "asm.h" /* ** At the start of each iteration we have a partial square root, a, ** and a remainder, r = v-a**2. At the end of the iteration we have ** a' = a+b, and r' = v-a'**2 = v-(a+b)**2 = r-(2ab+b**2). ** ** d0 contains a/2, and d1,d2 contains r, with the radix point ** moving one place left each iteration. d3 contains the next bit, ** b/4. When viewed with the same radix point position as the ** remainder, d3 is b**2 and d0 is 2ab. ** ** Double precision subtractions are needed only in the final two ** iterations. Also, at the end of the first iteration the next ** bit (d3) is right-shifted two places instead of left-shifting ** the remainder one place. */ .text // unsigned long sqrt64(unsigned long long v); .global sqrt64 sqrt64: movel d2,-(sp) movel d3,-(sp) movel 12(sp),d1 // Upper longword movel 16(sp),d2 clrl d3 // Initialize next bit bset #30,d3 // to b/2 // First iteration: subl d3,d1 // Trial subtraction jcs rstore0 movel d3,d0 // Set d0 = 2a jra step0 rstore0: addl d3,d1 // Restore remainder clrl d0 // Set d0 = 2a step0: lsrl #2,d3 // Advance to next lower bit position // Here are the central 29 iterations: loop: addl d3,d0 // d0 = 2a + b subl d0,d1 // Trial subtraction jcs rstore // Failed, add back addl d3,d0 // d0 = 2a' ' jra step rstore: addl d0,d1 // Restore remainder subl d3,d0 // and 2a' ' step: asll #1,d2 // Pick up another bit addxl d1,d1 // of the remainder lsrl #1,d3 // Advance to next lower bit position jne loop // And the final two iterations: bset #31,d3 // 2a + b in d0,d3 subl d3,d2 // Trial subtraction subxl d0,d1 jcs rstore1 addl #1,d0 // d0 = 2a' ' jra step1 rstore1: addl d3,d2 // Restore remainder addxl d0,d1 // and 2a' ' step1: asll #1,d2 // Pick up another bit addxl d1,d1 // of the remainder lsrl #1,d3 // Advance to next lower bit position subl d3,d2 // Last trial sutraction subxl d0,d1 jbcs final addl d0,d0 addl #1,d0 jra exit final: addl d0,d0 exit: movel (sp)+,d3 movel (sp)+,d2 rts