extern printf section .data ; ALL DATA STRUCTURES align 16 sse: times 512 db 0 omr: dd 0.178 omx: dd 125.0 omy: dd 0.0 omz: dd 0.0 xrd: dd 362436069 ; next random number zer: dd 0 ; forms qword with above cry: dd 1234567 ; carry mtp: dd 2083801278 ; multiplier xcos: dd 0.8 ; azimuthal rotation xsin: dd 0.6 zcos: dd 0.8 ; zenith inclination zsin: dd 0.6 cc: dd 0.0 bc: dd 0.0 sca: dd 0.0 ocm: dd 1.0 ; 1/cm ls: dd 21.8 la: dd 105. g: dd 0.8 c: dd 0.299792458 np: dd 1.31950 ng: dd 1.35634 xx: dd 1.e-5 c2: dd 2.0 pi2: dt 0.0 ; 2pi bse: dt -32.0 ; 2^-32 tmp: dq 5.0 ovr: dd 5.0 prc: dw 127 fmt: db "%g",10,0 fmt4: db "%g %g %g %g",10,0 section .text global main main: call init ; MAIN ENTRY POINT fldcw [prc] mov ecx,1000000 nxt1: push ecx fld1 fstp dword [sca] xorps xmm0,xmm0 movss xmm0,[sca] shufps xmm0,xmm0,0x51 ; direction n=0010 xorps xmm1,xmm1 ; position r=0000 xorps xmm5,xmm5 movss xmm5,[ocm] ; 1/cm-0 vector fld dword [c2] fld tword [pi2] call prop ffree st0 fincstp ffree st0 fincstp cmp ebx,0 jz nxt2 movaps xmm7,xmm1 call print4 nxt2: pop ecx loop nxt1 ret ; <-- main -- xmpl: fxsave [sse] ; EXAMPLE ROUTINE fldcw [prc] ; change precision fstcw [prc] fild dword [prc] call print call init mov ecx,1 xmp1: push ecx call rand call print pop ecx loop xmp1 fld dword [omr] call print movaps xmm7,[omr] call print4 fxrstor [sse] ret ; <-- xmpl -- init: fld1 ; MAIN INITIALIZATION fldpi fscale ffree st1 fstp tword [pi2] fld tword [bse] fld1 fscale ffree st1 fstp tword [bse] fld1 fld dword [g] fsubp st1 fmul dword [ls] fstp dword [ls] fld dword [c] fdivr dword [ng] fstp dword [ocm] fld dword [omr] fmul dword [ovr] fstp dword [omr] ret ; <-- init -- prop: xor ebx,ebx ; PROPAGATE ROUTINE fldln2 call rand fyl2x fchs ; TOT prp1: fldln2 call rand fyl2x fchs ; SCA fld dword [ls] fmulp st1 ; sca fld dword [la] fmul st2 ; tot fcomi st1 jnc prp2 fxch st1 prp2: ffree st0 fincstp fld dword [la] fdivr st1 fsubp st2 ; TOT-=sca/la movaps xmm2,[omr] ; -- sphere --> subss xmm2,xmm2 ; unnecessary subps xmm2,xmm1 ; dr=omr-r movaps xmm3,xmm2 mulps xmm2,xmm0 ; dr*n mulps xmm3,xmm3 ; dr*dr shufps xmm2,xmm2,0x39 ; calculate b movss xmm4,xmm2 ; x shufps xmm2,xmm2,0x39 addss xmm4,xmm2 ; x+y shufps xmm2,xmm2,0x39 addss xmm4,xmm2 ; b=x+y+z shufps xmm4,xmm0,0x00 ; 00bb shufps xmm3,xmm3,0x39 ; calculate c movss xmm4,xmm3 ; x shufps xmm3,xmm3,0x39 addss xmm4,xmm3 ; x+y shufps xmm3,xmm3,0x39 addss xmm4,xmm3 ; x+y+z movlps [cc],xmm4 ; 00bc fld dword [cc] fld dword [omr] fmul st0 fsubp st1 ; C=c-omr^2 fld dword [bc] fmul st0 fsubrp st1 ; D=b^2-C fldz fcomip st1 jnc sph1 fsqrt fld dword [bc] fsub st1 ; b-sqrt(D) fldz fcomip st1 jc sph2 ffree st0 fincstp fld dword [bc] fadd st1 ; b+sqrt(D) sph2: fxch st1 ffree st0 fincstp fldz fcomip st1 jnc sph1 fcomi st1 jnc sph1 fxch st1 inc ebx ; detected! sph1: ffree st0 fincstp ; <-- sphere -- fstp dword [sca] movaps xmm4,xmm0 ; -- advance -- addss xmm4,xmm5 ; 1/cm-n vector movss xmm3,[sca] shufps xmm3,xmm3,0x00 mulps xmm4,xmm3 addps xmm1,xmm4 cmp ebx,0 jz prp4 ret ; done prp4: fld dword [xx] fcomip st1 jnc prp3 call rotxrd jmp prp1 prp3: ffree st0 fincstp ret ; <-- prop -- rotxrd: ; MAIN ROTATION fld dword [g] ; g call rand fmul st4 ; dword [c2] fld1 fsubp st1 ; xi=1-2*g fldz fcomip st2 jz rot1 fmul st1 ; g*xi fld1 faddp st1 ; 1+g*xi fld st1 fmul st0 ; g^2 fld1 fsub st1 ; 1-g^2 fdiv st2 ; ga=(1-g^2)/(1+g*xi) fmul st0 ; ga^2 fld1 faddp st2 ; 1+g^2 fsubp st1 ; 1+g^2-ga^2 fld st5 ; dword [c2] fmulp st3 ; 2*g fdivrp st2 ; (1+g^2-ga^2)/(2*g) ffree st0 fincstp rot1: fld1 fcomip st1 ; compare with +1 jnc rot2 ffree st0 fincstp fld1 jmp rot3 rot2: fld1 fchs fcomip st1 ; compare with -1 jc rot3 ffree st0 fincstp fld1 fchs rot3: fld1 fld st1 fmul st0 fsubp st1 fsqrt ; sqrt(1-cos^2) fstp dword [zsin] fstp dword [zcos] call rotate ret ; <-- rotxrd -- rotate: call rand ; AUXILIARY ROTATION movaps xmm7,xmm0 mulps xmm7,xmm7 ; n^2 movaps xmm2,xmm7 shufps xmm2,xmm2,0xe5 ; zyxx x movaps xmm3,xmm7 shufps xmm3,xmm3,0xe6 ; zyxy y movaps xmm4,xmm7 shufps xmm4,xmm4,0xe7 ; zyxz z comiss xmm2,xmm3 jc jl1 ; if x shufps xmm3,xmm3,0x10 ; 0x00 movaps xmm4,xmm0 shufps xmm4,xmm4,0x08 ; 00y0 subps xmm3,xmm4 ; 0xY0 movaps xmm6,xmm7 shufps xmm6,xmm6,0x00 ; x+y mulps xmm6,xmm3 ; p1 movaps xmm3,xmm0 ; <--> shufps xmm3,xmm3,0x40 ; x000 movaps xmm4,xmm0 shufps xmm4,xmm4,0x0c ; 00z0 subps xmm3,xmm4 ; x0Z0 shufps xmm7,xmm7,0x55 ; x+z mulps xmm7,xmm3 ; p2 jmp jl2 cm1: movaps xmm7,xmm3 shufps xmm7,xmm7,0x1b ; yxyz addps xmm7,xmm3 rsqrtps xmm7,xmm7 movaps xmm4,xmm0 ; <--> shufps xmm4,xmm4,0x80 ; y000 movaps xmm2,xmm0 shufps xmm2,xmm2,0x30 ; 0z00 subps xmm4,xmm2 ; yZ00 movaps xmm6,xmm7 shufps xmm6,xmm6,0x00 ; x+z mulps xmm6,xmm4 ; p1 movaps xmm4,xmm0 ; <--> shufps xmm4,xmm4,0x08 ; 00y0 movaps xmm2,xmm0 shufps xmm2,xmm2,0x10 ; 0x00 subps xmm4,xmm2 ; 0Xy0 shufps xmm7,xmm7,0x55 ; x+y mulps xmm7,xmm4 ; p2 jmp jl2 cm2: movaps xmm7,xmm4 shufps xmm7,xmm7,0x4e ; xzzy addps xmm7,xmm4 rsqrtps xmm7,xmm7 movaps xmm2,xmm0 ; <--> shufps xmm2,xmm2,0x0c ; 00z0 movaps xmm3,xmm0 shufps xmm3,xmm3,0x40 ; x000 subps xmm2,xmm3 ; X0z0 movaps xmm6,xmm7 shufps xmm6,xmm6,0x55 ; x+z mulps xmm6,xmm2 ; p1 movaps xmm2,xmm0 ; <--> shufps xmm2,xmm2,0x30 ; 0z00 movaps xmm3,xmm0 shufps xmm3,xmm3,0x80 ; y000 subps xmm2,xmm3 ; Yz00 shufps xmm7,xmm7,0x00 ; y+z mulps xmm7,xmm2 ; p2 jl2: movaps xmm3,xmm6 subps xmm3,xmm7 ; p1-p2 addps xmm7,xmm6 ; p1+p2 movaps xmm4,xmm3 mulps xmm4,xmm4 ; (p1-p2)^2 movaps xmm6,xmm7 mulps xmm6,xmm6 ; (p1+p2)^2 movaps xmm2,xmm4 shufps xmm2,xmm6,0x11 ; 0101 shufps xmm4,xmm6,0xee ; 3232 addps xmm4,xmm2 ; z x+y z x+y movaps xmm2,xmm4 shufps xmm2,xmm2,0xb1 addps xmm4,xmm2 ; x+y+z rsqrtps xmm4,xmm4 movaps xmm6,xmm4 shufps xmm4,xmm4,0x00 ; 0000 shufps xmm6,xmm6,0xaa ; 2222 mulps xmm3,xmm4 ; a1 mulps xmm7,xmm6 ; a2 fld st2 ; tword [pi2] fmulp st1 fsincos fstp dword [xcos] fstp dword [xsin] movlps xmm4,[xcos] movaps xmm6,xmm4 shufps xmm6,xmm6,0x00 ; cos shufps xmm4,xmm4,0x55 ; sin mulps xmm6,xmm3 mulps xmm4,xmm7 addps xmm4,xmm6 ; u movlps xmm2,[zcos] movaps xmm3,xmm2 shufps xmm3,xmm3,0x00 ; cos shufps xmm2,xmm2,0x55 ; sin mulps xmm4,xmm2 mulps xmm0,xmm3 addps xmm0,xmm4 ; new n movaps xmm7,xmm0 mulps xmm7,xmm7 shufps xmm7,xmm7,0x39 ; rotate movss xmm2,xmm7 shufps xmm7,xmm7,0x39 ; rotate movss xmm3,xmm7 shufps xmm7,xmm7,0x39 ; rotate movss xmm4,xmm7 addss xmm2,xmm3 addss xmm2,xmm4 rsqrtss xmm2,xmm2 shufps xmm2,xmm2,0x00 mulps xmm0,xmm2 ; new n jl3: ret ; <-- rotate -- rand: mov eax,[xrd] ; RANDOM NUMBER GENERATOR mov edx,[mtp] mul edx mov ecx,[cry] add eax,ecx adc edx,0 mov [cry],edx mov [xrd],eax cmp eax,0 jz rand fild qword [xrd] fld tword [bse] fmulp st1 ret ; <-- rand -- print: fstp qword [tmp] ; PRINT FLOAT NUMBER push dword [tmp+4] push dword [tmp] push dword fmt call printf add esp, 12 ret ; <-- print -- print4: mov ecx,4 ; PRINT 4 XMM7 NUMBERS prn1: shufps xmm7,xmm7,0x93 ; rotate movss [tmp],xmm7 fld dword [tmp] fstp qword [tmp] push dword [tmp+4] push dword [tmp] loop prn1 push dword fmt4 call printf add esp, 36 ret ; <-- print4 -- exit: mov eax,1 ; EXIT FUNCTION mov ebx,0 int 0x80 ; <-- exit --