Is there any way to make sure the floating point arithmetic result the same in both linux and windows -
my programe runs both in linux , windows, have make sure floating point arithmetic same result in different os.
here code:
for (int = 0; < 100000; ++i) { float d_value = 10.0f / float(i); float p_value = 0.01f * float(i) + 100.0f; }
i use "g++ -m32 -c -static -g -o0 -ffloat-store" build code in linux. use "/fp:precise /o2" build code in windows vs2005.
when printf "d_value" , "p_value", "d_value" same both in linux , windows. "p_value" different sometimes. exsample, print "p_value" hexadecimal format:
windows: 42d5d1eb linux: 42d5d1ec
why dose happen?
my g++ version is
configured with: ../src/configure -v --with-pkgversion='debian 4.4.5-8' --with-bugurl=file:///usr/share/doc/gcc-4.4/readme.bugs --enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-4.4 --enable-shared --enable-multiarch --enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.4 --libdir=/usr/lib --enable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-objc-gc --enable-targets=all --with-arch-32=i586 --with-tune=generic --enable-checking=release --build=i486-linux-gnu --host=i486-linux-gnu --target=i486-linux-gnu thread model: posix gcc version 4.4.5 (debian 4.4.5-8)
i use flag -ffloat-store
, because of someone's suggestion here: different math rounding behaviour between linux, mac os x , windows
use /fp:strict
on windows tell compiler produce code strictly follows ieee 754, , gcc -msse2 -mfpmath=sse
on linux obtain same behavior there.
the reasons differences seeing have been discussed in spots on stackoverflow, best survey david monniaux's article.
the assembly instructions obtain when compiling gcc -msse2 -mpfmath=sse
follow. instructions cvtsi2ssq
, divss
, mulss
, addss
correct instructions use, , result in program p_value
contains @ 1 point 42d5d1ec
.
.globl _main .align 4, 0x90 _main: ## @main .cfi_startproc ## bb#0: pushq %rbp ltmp2: .cfi_def_cfa_offset 16 ltmp3: .cfi_offset %rbp, -16 movq %rsp, %rbp ltmp4: .cfi_def_cfa_register %rbp subq $32, %rsp movl $0, -4(%rbp) movl $0, -8(%rbp) lbb0_1: ## =>this inner loop header: depth=1 cmpl $100000, -8(%rbp) ## imm = 0x186a0 jge lbb0_4 ## bb#2: ## in loop: header=bb0_1 depth=1 movq _p_value@gotpcrel(%rip), %rax movabsq $100, %rcx cvtsi2ssq %rcx, %xmm0 movss lcpi0_0(%rip), %xmm1 movabsq $10, %rcx cvtsi2ssq %rcx, %xmm2 cvtsi2ss -8(%rbp), %xmm3 divss %xmm3, %xmm2 movss %xmm2, -12(%rbp) cvtsi2ss -8(%rbp), %xmm2 mulss %xmm2, %xmm1 addss %xmm0, %xmm1 movss %xmm1, (%rax) movl (%rax), %edx movl %edx, -16(%rbp) leaq l_.str(%rip), %rdi movl -16(%rbp), %esi movb $0, %al callq _printf movl %eax, -20(%rbp) ## 4-byte spill ## bb#3: ## in loop: header=bb0_1 depth=1 movl -8(%rbp), %eax addl $1, %eax movl %eax, -8(%rbp) jmp lbb0_1 lbb0_4: movl -4(%rbp), %eax addq $32, %rsp popq %rbp ret
Comments
Post a Comment