float Q_rsqrt( float number ) { long i; float x2, y; const float threehalfs = 1.5F; x2 = number * 0.5F; y = number; i = * ( long * ) &y; // evil floating point bit level hacking i = 0x5f3759df - ( i >> 1 ); // what the fuck? y = * ( float * ) &i; y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration // y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed return y; }
从代码可以看出算法的关键为牛顿法的初值,也就是引起很多人关注和研究的魔法数 0x5f3759df. 下面是该算法的Fortran实现:
module invsqrt_mod use iso_fortran_env, only : sp=>real32 implicit none contains function invsqrt(x) result(y) real(sp), intent(in) :: x real(sp) :: y, xhalf integer :: i xhalf=0.5_sp*x i=transfer(x,i) i=z'5f3759df'-shiftr(i,1) y=transfer(i,y) y=y*(1.5_sp-xhalf*y*y) ! y=y*(1.5_sp-xhalf*y*y) ! y=y*(1.5_sp-xhalf*y*y) end function invsqrt end module invsqrt_mod program test use invsqrt_mod use iso_fortran_env, only : sp=>real32 implicit none real(sp) :: x real :: t1, t2 integer :: i, num print*,'num=' read*,num call cpu_time(t1) do i=1,num x=1._sp/sqrt(real(i,sp)) end do call cpu_time(t2) print*,'cpu_time by sqrt=', t2-t1 call cpu_time(t1) do i=1,num x=invsqrt(real(i,sp)) end do call cpu_time(t2) print*,'cpu_time by invsqrt=', t2-t1 print*,'x=' read*,x print*,'sqrt(x)=',sqrt(x) print*,'by invsqrt=',x*invsqrt(x) end program test
在我的计算机上上述代码采用 -O3 优化时与采用内置函数 sqrt 的计算结果在计算时间上几乎相同,如果编译时不进行优化 则比内置函数要慢很多。 注意代码实现上也可以用 equivalence 语句代替 transfer 内置函数,但因为前者已被声明为过时特性,这里没有采用。