extern printf section .data ; ALL DATA STRUCTURES sse: times 512 db 0 xrd: dd 1.0 ; 1+[0 subps xmm2,xmm1 ; dr=dom-r movaps xmm3,xmm2 mulps xmm2,xmm0 ; dr*n mulps xmm3,xmm3 ; dr*dr shufps xmm2,xmm2,0x39 ; calculate b movss xmm4,xmm2 ; x shufps xmm2,xmm2,0x39 addss xmm4,xmm2 ; x+y shufps xmm2,xmm2,0x39 addss xmm4,xmm2 ; b=x+y+z shufps xmm4,xmm0,0x00 ; 00bb shufps xmm3,xmm3,0x39 ; calculate c movss xmm4,xmm3 ; x shufps xmm3,xmm3,0x39 addss xmm4,xmm3 ; x+y shufps xmm3,xmm3,0x39 addss xmm4,xmm3 ; x+y+z movlps [cc],xmm4 ; 00bc fld dword [cc] fld dword [omr] fsubp st1 ; C=c-omr^2 fld dword [bc] fmul st0 fsubrp st1 ; D=b^2-C fldz fcomip st1 jnc sph1 fsqrt fld dword [bc] fsub st1 ; b-sqrt(D) fldz fcomip st1 jc sph2 fstp st0 fld dword [bc] fadd st1 ; b+sqrt(D) sph2: fxch st1 fstp st0 fldz fcomip st1 jnc sph1 fcomi st1 jnc sph1 fxch st1 mov ebx,[dom] ; detected! sph1: fstp st0 ; <-- sphere -- fstp dword [sca] movaps xmm4,xmm0 ; -- advance -- addss xmm4,xmm5 ; 1/cm-n vector movss xmm3,[sca] shufps xmm3,xmm3,0x00 mulps xmm4,xmm3 addps xmm1,xmm4 cmp ebx,0 jz prp4 ret ; done prp4: fld dword [xx] fcomip st1 jnc prp3 call rotxrd jmp prp1 prp3: fstp st0 ret ; <-- prop -- rotxrd: ; MAIN ROTATION fld dword [g] ; g call rand fadd st0 ; 2*xi fld1 fsubp st1 ; xi=1-2*xi fldz fcomip st2 jz rot1 fmul st1 ; g*xi fld1 faddp st1 ; 1+g*xi fld st1 fmul st0 ; g^2 fld1 fsub st1 ; 1-g^2 fdiv st2 ; ga=(1-g^2)/(1+g*xi) fmul st0 ; ga^2 fld1 faddp st2 ; 1+g^2 fsubp st1 ; 1+g^2-ga^2 fld st2 faddp st3 ; 2*g fdivrp st2 ; (1+g^2-ga^2)/(2*g) fstp st0 rot1: fld1 fcomip st1 ; compare with +1 jnc rot2 fstp st0 fld1 jmp rot3 rot2: fld1 fchs fcomip st1 ; compare with -1 jc rot3 fstp st0 fld1 fchs rot3: fld1 fld st1 fmul st0 fsubp st1 fsqrt ; sqrt(1-cos^2) fstp dword [zsin] fstp dword [zcos] ; <-- rotxrd -- rotate: call rand ; AUXILIARY ROTATION movaps xmm7,xmm0 mulps xmm7,xmm7 ; n^2 movaps xmm2,xmm7 shufps xmm2,xmm2,0xe5 ; zyxx x movaps xmm3,xmm7 shufps xmm3,xmm3,0xe6 ; zyxy y movaps xmm4,xmm7 shufps xmm4,xmm4,0xe7 ; zyxz z comiss xmm2,xmm3 jc jl1 ; if x shufps xmm3,xmm3,0x10 ; 0x00 movaps xmm4,xmm0 shufps xmm4,xmm4,0x08 ; 00y0 subps xmm3,xmm4 ; 0xY0 movaps xmm6,xmm7 shufps xmm6,xmm6,0x00 ; x+y mulps xmm6,xmm3 ; p1 movaps xmm3,xmm0 ; <--> shufps xmm3,xmm3,0x40 ; x000 movaps xmm4,xmm0 shufps xmm4,xmm4,0x0c ; 00z0 subps xmm3,xmm4 ; x0Z0 shufps xmm7,xmm7,0x55 ; x+z mulps xmm7,xmm3 ; p2 jmp jl2 cm1: movaps xmm7,xmm3 shufps xmm7,xmm7,0x1b ; yxyz addps xmm7,xmm3 rsqrtps xmm7,xmm7 movaps xmm4,xmm0 ; <--> shufps xmm4,xmm4,0x80 ; y000 movaps xmm2,xmm0 shufps xmm2,xmm2,0x30 ; 0z00 subps xmm4,xmm2 ; yZ00 movaps xmm6,xmm7 shufps xmm6,xmm6,0x00 ; x+z mulps xmm6,xmm4 ; p1 movaps xmm4,xmm0 ; <--> shufps xmm4,xmm4,0x08 ; 00y0 movaps xmm2,xmm0 shufps xmm2,xmm2,0x10 ; 0x00 subps xmm4,xmm2 ; 0Xy0 shufps xmm7,xmm7,0x55 ; x+y mulps xmm7,xmm4 ; p2 jmp jl2 cm2: movaps xmm7,xmm4 shufps xmm7,xmm7,0x4e ; xzzy addps xmm7,xmm4 rsqrtps xmm7,xmm7 movaps xmm2,xmm0 ; <--> shufps xmm2,xmm2,0x0c ; 00z0 movaps xmm3,xmm0 shufps xmm3,xmm3,0x40 ; x000 subps xmm2,xmm3 ; X0z0 movaps xmm6,xmm7 shufps xmm6,xmm6,0x55 ; x+z mulps xmm6,xmm2 ; p1 movaps xmm2,xmm0 ; <--> shufps xmm2,xmm2,0x30 ; 0z00 movaps xmm3,xmm0 shufps xmm3,xmm3,0x80 ; y000 subps xmm2,xmm3 ; Yz00 shufps xmm7,xmm7,0x00 ; y+z mulps xmm7,xmm2 ; p2 jl2: movaps xmm3,xmm6 subps xmm3,xmm7 ; p1-p2 addps xmm7,xmm6 ; p1+p2 movaps xmm4,xmm3 mulps xmm4,xmm4 ; (p1-p2)^2 movaps xmm6,xmm7 mulps xmm6,xmm6 ; (p1+p2)^2 movaps xmm2,xmm4 shufps xmm2,xmm6,0x11 ; 0101 shufps xmm4,xmm6,0xee ; 3232 addps xmm4,xmm2 ; z x+y z x+y movaps xmm2,xmm4 shufps xmm2,xmm2,0xb1 addps xmm4,xmm2 ; x+y+z rsqrtps xmm4,xmm4 movaps xmm6,xmm4 shufps xmm4,xmm4,0x00 ; 0000 shufps xmm6,xmm6,0xaa ; 2222 mulps xmm3,xmm4 ; a1 mulps xmm7,xmm6 ; a2 fldpi fadd st0 ; pi^2 fmulp st1 fsincos fstp dword [xcos] fstp dword [xsin] movlps xmm4,[xcos] movaps xmm6,xmm4 shufps xmm6,xmm6,0x00 ; cos shufps xmm4,xmm4,0x55 ; sin mulps xmm6,xmm3 mulps xmm4,xmm7 addps xmm4,xmm6 ; u movlps xmm2,[zcos] movaps xmm3,xmm2 shufps xmm3,xmm3,0x00 ; cos shufps xmm2,xmm2,0x55 ; sin mulps xmm4,xmm2 mulps xmm0,xmm3 addps xmm0,xmm4 ; new n movaps xmm7,xmm0 mulps xmm7,xmm7 shufps xmm7,xmm7,0x39 ; rotate movss xmm2,xmm7 shufps xmm7,xmm7,0x39 ; rotate movss xmm3,xmm7 shufps xmm7,xmm7,0x39 ; rotate movss xmm4,xmm7 addss xmm2,xmm3 addss xmm2,xmm4 rsqrtss xmm2,xmm2 shufps xmm2,xmm2,0x00 mulps xmm0,xmm2 ; new n jl3: ret ; <-- rotate -- rand: mov eax,[rnd] ; RANDOM NUMBER GENERATOR mul dword [mtp] add eax,[cry] adc edx,0 mov [cry],edx mov [rnd],eax shr eax,9 cmp eax,0 jz rand or eax,0x3f800000 ; or 1.0 mov [xrd],eax fld dword [xrd] fld1 fsubp st1 ret ; <-- rand -- printh: movaps xmm7,xmm0 ; PRINT HIT INFO shufps xmm7,xmm7,0x93 movss [tmp],xmm7 fld dword [tmp] fstp qword [tmp] push dword [tmp+4] push dword [tmp] movss [tmp],xmm1 fld dword [tmp] fstp qword [tmp] push dword [tmp+4] push dword [tmp] mov eax,ebx and eax,0xff push eax shr ebx,8 and ebx,0xff push ebx push dword outh call printf add esp, 28 ret ; <-- printh -- print4: mov ecx,4 ; PRINT 4 XMM7 NUMBERS prn1: shufps xmm7,xmm7,0x93 ; rotate movss [tmp],xmm7 fld dword [tmp] fstp qword [tmp] push dword [tmp+4] push dword [tmp] loop prn1 push dword fmt4 call printf add esp, 36 ret ; <-- print4 -- print: fstp qword [tmp] ; PRINT FLOAT NUMBER push dword [tmp+4] push dword [tmp] push dword fmt call printf add esp, 12 ret ; <-- print -- exit: mov eax,1 ; EXIT FUNCTION mov ebx,0 int 0x80 ; <-- exit --