extern asm_sse extern asm_nq extern asm_zi extern asm_z2 extern asm_cos extern asm_zsn extern asm_zcs extern asm_dbl extern asm_fmt extern printf section .text global rot rot: ;fxsave [asm_sse] fldpi fld qword [asm_z2] fmulp st1 fld qword [asm_zi] fmulp st1 fsincos movaps xmm0,[asm_nq] ; old n movaps xmm1,xmm0 mulps xmm1,xmm1 ; n^2 movaps xmm2,xmm1 shufps xmm2,xmm2,0xe5 ; zyxx x movaps xmm3,xmm1 shufps xmm3,xmm3,0xe6 ; zyxy y movaps xmm4,xmm1 shufps xmm4,xmm4,0xe7 ; zyxz z comiss xmm2,xmm3 jc jl1 ; if x shufps xmm6,xmm6,0x10 ; 0x00 movaps xmm7,xmm0 shufps xmm7,xmm7,0x08 ; 00y0 subps xmm6,xmm7 ; 0xY0 movaps xmm1,xmm5 shufps xmm1,xmm1,0x00 ; x+y mulps xmm1,xmm6 ; p1 movaps xmm6,xmm0 ; <--> shufps xmm6,xmm6,0x40 ; x000 movaps xmm7,xmm0 shufps xmm7,xmm7,0x0c ; 00z0 subps xmm6,xmm7 ; x0Z0 movaps xmm2,xmm5 shufps xmm2,xmm2,0x55 ; x+z mulps xmm2,xmm6 ; p2 jmp jl2 cm1: movaps xmm5,xmm3 shufps xmm5,xmm5,0x1b ; yxyz addps xmm5,xmm3 rsqrtps xmm5,xmm5 movaps xmm6,xmm0 ; <--> shufps xmm6,xmm6,0x80 ; y000 movaps xmm7,xmm0 shufps xmm7,xmm7,0x30 ; 0z00 subps xmm6,xmm7 ; yZ00 movaps xmm1,xmm5 shufps xmm1,xmm1,0x00 ; x+z mulps xmm1,xmm6 ; p1 movaps xmm6,xmm0 ; <--> shufps xmm6,xmm6,0x08 ; 00y0 movaps xmm7,xmm0 shufps xmm7,xmm7,0x10 ; 0x00 subps xmm6,xmm7 ; 0Xy0 movaps xmm2,xmm5 shufps xmm2,xmm2,0x55 ; x+y mulps xmm2,xmm6 ; p2 jmp jl2 cm2: movaps xmm5,xmm4 shufps xmm5,xmm5,0x4e ; xzzy addps xmm5,xmm4 rsqrtps xmm5,xmm5 movaps xmm6,xmm0 ; <--> shufps xmm6,xmm6,0x0c ; 00z0 movaps xmm7,xmm0 shufps xmm7,xmm7,0x40 ; x000 subps xmm6,xmm7 ; X0z0 movaps xmm1,xmm5 shufps xmm1,xmm1,0x55 ; x+z mulps xmm1,xmm6 ; p1 movaps xmm6,xmm0 ; <--> shufps xmm6,xmm6,0x30 ; 0z00 movaps xmm7,xmm0 shufps xmm7,xmm7,0x80 ; y000 subps xmm6,xmm7 ; Yz00 movaps xmm2,xmm5 shufps xmm2,xmm2,0x00 ; y+z mulps xmm2,xmm6 ; p2 jl2: movaps xmm3,xmm1 subps xmm3,xmm2 ; p1-p2 addps xmm2,xmm1 ; p1+p2 movaps xmm4,xmm3 mulps xmm4,xmm4 ; (p1-p2)^2 movaps xmm5,xmm2 mulps xmm5,xmm5 ; (p1+p2)^2 movaps xmm6,xmm4 shufps xmm6,xmm5,0x11 ; 0101 movaps xmm7,xmm4 shufps xmm7,xmm5,0x22 ; 0202 shufps xmm4,xmm5,0x33 ; 0303 addps xmm4,xmm6 addps xmm4,xmm7 rsqrtps xmm4,xmm4 movaps xmm5,xmm4 shufps xmm4,xmm4,0x00 ; 0000 shufps xmm5,xmm5,0xaa ; 2222 mulps xmm3,xmm4 ; a1 mulps xmm2,xmm5 ; a2 fwait fstp dword [asm_cos] fstp dword [asm_cos+4] movlps xmm4,[asm_cos] movaps xmm5,xmm4 shufps xmm5,xmm5,0x00 ; cos shufps xmm4,xmm4,0x55 ; sin mulps xmm5,xmm3 mulps xmm4,xmm2 addps xmm4,xmm5 ; u movlps xmm2,[asm_zsn] movaps xmm3,xmm2 shufps xmm2,xmm2,0x00 ; sin shufps xmm3,xmm3,0x55 ; cos mulps xmm4,xmm2 mulps xmm0,xmm3 addps xmm0,xmm4 ; new n movaps xmm1,xmm0 mulps xmm1,xmm1 shufps xmm1,xmm1,0x39 ; rotate movss xmm2,xmm1 shufps xmm1,xmm1,0x39 ; rotate movss xmm3,xmm1 shufps xmm1,xmm1,0x39 ; rotate movss xmm4,xmm1 addss xmm2,xmm3 addss xmm2,xmm4 rsqrtss xmm2,xmm2 shufps xmm2,xmm2,0x00 mulps xmm0,xmm2 ; new n jl3: movaps [asm_nq],xmm0 ;fxrstor [asm_sse] ret fld dword [asm_cos] fstp qword [asm_dbl] push dword [asm_dbl+4] push dword [asm_dbl] push dword [asm_fmt] call printf add esp, 12