#ifdef SHRT #define STRINGIFY(A) #A #define XTRINGIFY(A) STRINGIFY(A) string kernel_source = XTRINGIFY(( ; #endif struct DOM{ cl_float r[3]; }; struct hit{ cl_uint i; cl_float t; cl_uint n; cl_float z; }; struct photon{ cl_float r[4]; // location, time cl_float n[4]; // direction, track length cl_uint q; // track segment #ifdef ANGW cl_float f; // fraction of light from muon alone (without cascades) #endif #ifdef LONG cl_float a, b; // longitudinal development parametrization coefficients #endif }; struct ices{ cl_float wvl; // wavelength of this block cl_float ocm; // 1 / speed of light in medium cl_float coschr, sinchr; // cos and sin of the cherenkov angle struct{ cl_float abs; // absorption cl_float sca; // scattering } z [MAXLYS]; }; struct line{ cl_short n, max; cl_float x, y, r; cl_float h, d; cl_float dl, dh; }; struct dats{ cl_uint hidx; cl_uint ab; // if TOT was abnormal cl_int type; // 0=cascade/1=flasher/2=flasher 45/3=laser up/4=laser down cl_float r[3]; cl_uint hnum; // size of hits buffer cl_int size; // size of kurt table cl_int rsize; // count of multipliers cl_int gsize; // count of initialized OMs cl_float dh, hdh, rdh, hmin; // step, step/2, 1/step, and min depth cl_float ocv; // 1 / speed of light in vacuum cl_float sf; // scattering function: 0=HG; 1=SAM cl_float g, g2, gr; // g=, g2=g*g and gr=(1-g)/(1+g) cl_float R, R2, zR; // DOM radius, radius^2, and inverse "oversize" scaling factor cl_int cn[2]; cl_float cl[2], crst[2]; cl_uchar is[CX][CY]; cl_uchar ls[NSTR]; struct line sc[NSTR]; cl_float rx; cl_float fldr; // horizontal direction of the flasher led #1 cl_float eff; // OM efficiency correction #ifdef ASENS cl_float mas; // maximum angular sensitivity cl_float s[ANUM]; // ang. sens. coefficients #endif #ifdef ROMB cl_float cb[2][2]; #endif #ifdef OFLA cl_short fla; #endif #ifdef TILT cl_int lnum, lpts, l0; cl_float lmin, lrdz, r0; cl_float lnx, lny; cl_float lr[LMAX]; cl_float lp[LMAX][LYRS]; #endif }; struct datz{ struct ices w[WNUM]; cl_uint rm[MAXRND]; cl_ulong rs[MAXRND]; }; #ifdef SHRT #define sin native_sin #define cos native_cos #define pow native_powr #define exp native_exp #define log native_log #define sqrt native_sqrt #define rsqrt native_rsqrt float xrnd(uint s[4]){ uint tmp; do{ ulong sda = s[2] * (ulong) s[0]; sda += s[1]; s[0] = sda; s[1] = sda >> 32; tmp = s[0] >> 9; } while(tmp==0); return as_float(tmp|0x3f800000)-1.0f; } #ifdef LONG float mrnd(float k, uint s[4]){ // gamma distribution float x; if(k<1){ // Weibull algorithm float c=1/k; float d=(1-k)*pow(k, k/(1-k)); float z, e; do{ z=-log(xrnd(s)); e=-log(xrnd(s)); x=pow(z, c); } while(z+efabs(n0)){ i0=i; n0=n[i]; } int i1=(i0+1)%3, i2=(i0+2)%3; float n1=n[i1], n2=n[i2], nq=n0*n0; float r1=rsqrt(nq+n1*n1), r2=rsqrt(nq+n2*n2); p1[i0]=-n1*r1; p1[i1]=n0*r1; p1[i2]=0; p2[i0]=-n2*r2; p2[i1]=0; p2[i2]=n0*r2; } { float q1[3], q2[3], r1=0, r2=0; for(int i=0; i<3; i++){ float a1=p1[i]-p2[i]; q1[i]=a1; r1+=a1*a1; float a2=p1[i]+p2[i]; q2[i]=a2; r2+=a2*a2; } r1=rsqrt(r1); r2=rsqrt(r2); for(int i=0; i<3; i++){ p1[i]=q1[i]*r1; p2[i]=q2[i]*r2; } } float zi=xrnd(s); zi*=2*M_PI; float px=cos(zi), py=sin(zi); for(int i=0; i<3; i++) u[i]=px*p1[i]+py*p2[i]; } { float r=0; for(int i=0; i<3; i++){ float a=n[i]*cs+u[i]*si; n[i]=a; r+=a*a; } r=rsqrt(r); for(int i=0; i<3; i++) n[i]*=r; } } #ifdef TILT float zshift(__local struct dats * d, float r[4]){ if(d->lnum==0) return 0; float z=(r[2]-d->lmin)*d->lrdz; int k=min(max(convert_int_sat_rtn(z), 0), d->lpts-2); int l=k+1; float nr=d->lnx*r[0]+d->lny*r[1]-d->r0; for(int j=1; jlr[j] || j==d->lnum-1){ int i=j-1; return ( (d->lp[j][l]*(z-k)+d->lp[j][k]*(l-z))*(nr-d->lr[i]) + (d->lp[i][l]*(z-k)+d->lp[i][k]*(l-z))*(d->lr[j]-nr) )/(d->lr[j]-d->lr[i]); } return 0; } #endif void ctr(__local struct dats * d, float r[2], float p[2]){ #ifdef ROMB p[0]=d->cb[0][0]*r[0]+d->cb[1][0]*r[1]; p[1]=d->cb[0][1]*r[0]+d->cb[1][1]*r[1]; #else p[0]=r[0]; p[1]=r[1]; #endif } __kernel void propagate(__private uint num, __global struct dats * ed, __global struct datz * ez, __global struct hit * eh, __global struct photon * ep, __constant struct DOM * oms){ if(num==0){ ed->hidx=0; ed->ab=0; write_mem_fence(CLK_GLOBAL_MEM_FENCE); return; } uint s[4]; float r[4]={0}; float n[3]={0}; __global struct ices * w; __local struct dats e; const uint lidx=get_local_id(0), lsiz=get_local_size(0); const uint idx=get_global_id(0), siz=get_global_size(0); for(uint i=lidx; 4*irs[s[3]]; s[1]=ez->rs[s[3]] >> 32; s[2]=ez->rm[s[3]]; } int niw=0, old; float TOT=0, sca=0; for(uint i=idx; iw[min(convert_int_sat_rtn(WNUM*xrnd(s)), WNUM-1)]; if(e.type>0){ r[0]=e.r[0]; r[1]=e.r[1]; r[2]=e.r[2]; r[3]=0; float ka=0, up=0; const float fcv=FPI/180.f; switch(e.type){ case 1: ka=square(fcv*9.7f); up=fcv*0.0f; break; case 2: ka=square(fcv*9.7f); up=fcv*48.f; break; case 3: ka=0.0f; up=fcv*(90.0f-41.13f); break; case 4: ka=0.0f; up=fcv*(41.13f-90.0f); break; } float xi=xrnd(s); if(e.fldr<0) xi*=2*FPI; else{ int r=convert_int_sat_rtn(e.fldr/360)+1; int s=convert_int_sat_rtn(xi*r); xi=(e.fldr+s*360/r)*fcv; } n[0]=cos(xi), n[1]=sin(xi); float np=cos(up); n[2]=sin(up); n[0]*=np; n[1]*=np; if(ka>0){ do{ xi=1+ka*log(xrnd(s)); } while (xi<-1); float si=sqrt(1-xi*xi); turn(xi, si, n, s); } } else{ struct photon p=ep[i/OVER]; r[0]=p.r[0]; r[1]=p.r[1]; r[2]=p.r[2]; r[3]=p.r[3]; n[0]=p.n[0]; n[1]=p.n[1]; n[2]=p.n[2]; float l=p.n[3]; niw=p.q; if(l>0) l*=xrnd(s); #ifdef LONG else if(p.b>0) l=p.b*mrnd(p.a, s); #endif if(l>0){ r[3]+=e.ocv*l; r[0]+=n[0]*l; r[1]+=n[1]*l; r[2]+=n[2]*l; } #ifdef ANGW if(p.fcoschr, w->sinchr, n, s); } TOT=-log(xrnd(s)); } if(sca==0){ // get distance for overburden float z = r[2]; #ifdef TILT z-= zshift(&e, r); #endif int i=convert_int_sat_rte((z-e.hmin)*e.rdh); if(i<0) i=0; else if(i>=e.size) i=e.size-1; float h=e.hmin+i*e.dh; // middle of the layer float ahx=n[2]<0?h-e.hdh:h+e.hdh; float SCA=-log(xrnd(s)); float ais=(n[2]*SCA-(ahx-z)*w->z[i].sca)*e.rdh; float aia=(n[2]*TOT-(ahx-z)*w->z[i].abs)*e.rdh; int j=i; if(n[2]<0) for(; j>0 && ais<0 && aia<0; ahx-=e.dh, ais+=w->z[j].sca, aia+=w->z[j].abs) --j; else for(; j0 && aia>0; ahx+=e.dh, ais-=w->z[j].sca, aia-=w->z[j].abs) ++j; float tot; if(i==j || fabs(n[2])z[j].sca, tot=TOT/w->z[j].abs; else sca=(ais*e.dh/w->z[j].sca+ahx-z)/n[2], tot=(aia*e.dh/w->z[j].abs+ahx-z)/n[2]; // get overburden for distance if(totz[j].abs; old=-1; } int om=-1; float del=sca; { // sphere float ri[2], rf[2], pi[2], pf[2]; ri[0]=r[0]; rf[0]=r[0]+del*n[0]; ri[1]=r[1]; rf[1]=r[1]+del*n[1]; ctr(&e, ri, pi); ctr(&e, rf, pf); ri[0]=min(pi[0], pf[0])-e.rx; rf[0]=max(pi[0], pf[0])+e.rx; ri[1]=min(pi[1], pf[1])-e.rx; rf[1]=max(pi[1], pf[1])+e.rx; int xl[2], xh[2]; xl[0]=min(max(convert_int_sat_rte((ri[0]-e.cl[0])*e.crst[0]), 0), e.cn[0]); xh[0]=max(min(convert_int_sat_rte((rf[0]-e.cl[0])*e.crst[0]), e.cn[0]-1), -1); xl[1]=min(max(convert_int_sat_rte((ri[1]-e.cl[1])*e.crst[1]), 0), e.cn[1]); xh[1]=max(min(convert_int_sat_rte((rf[1]-e.cl[1])*e.crst[1]), e.cn[1]-1), -1); for(int i=xl[0], j=xl[1]; i<=xh[0] && j<=xh[1]; ++j<=xh[1]?:(j=xl[1],i++)) for(uchar k=e.is[i][j]; k!=0x80; ){ uchar m=e.ls[k]; __local struct line * s = & e.sc[m&0x7f]; k=m&0x80?0x80:k+1; float b=0, c=0, dr; dr=s->x-r[0]; b+=n[0]*dr; c+=dr*dr; dr=s->y-r[1]; b+=n[1]*dr; c+=dr*dr; float np=1-n[2]*n[2]; float D=b*b-(c-s->r*s->r)*np; if(D>=0){ D=sqrt(D); float h1=b-D, h2=b+D; if(h2>=0 && h1<=del*np){ if(np>XXX){ h1/=np, h2/=np; if(h1<0) h1=0; if(h2>del) h2=del; } else h1=0, h2=del; h1=r[2]+n[2]*h1, h2=r[2]+n[2]*h2; float zl, zh; if(n[2]>0) zl=h1, zh=h2; else zl=h2, zh=h1; int omin=0, omax=s->max; int n1=s->n-omin+min(omax+1, max(omin, convert_int_sat_rtp(omin-(zh-s->dl-s->h)*s->d))); int n2=s->n-omin+max(omin-1, min(omax, convert_int_sat_rtn(omin-(zl-s->dh-s->h)*s->d))); for(int l=n1; l<=n2; l++) if(l!=old){ #ifdef OFLA if(l==e.fla) continue; #endif struct DOM dom = oms[l]; float b=0, c=0, dr; dr=dom.r[0]-r[0]; b+=n[0]*dr; c+=dr*dr; dr=dom.r[1]-r[1]; b+=n[1]*dr; c+=dr*dr; dr=dom.r[2]-r[2]; b+=n[2]*dr; c+=dr*dr; float D=b*b-c+e.R2; if(D>=0){ D=sqrt(D); float h=b-D*e.zR; if(h>0 && h<=del){ om=l; del=h; } } } } } } } { // advance r[0]+=del*n[0]; r[1]+=del*n[1]; r[2]+=del*n[2]; r[3]+=del*w->ocm; sca-=del; } if(!isfinite(TOT) || !isfinite(sca)) atom_add(&ed->ab, 1), TOT=0, sca=0, om=-1; float xi=xrnd(s); if(om!=-1){ bool flag=true; struct hit h; h.i=om; h.t=r[3]; h.n=niw; h.z=n[2]; #ifdef ASENS float sum; { float x = n[2]; float y=1; sum=e.s[0]; for(int i=1; i0){ float dt=0, dr; const struct DOM dom = oms[om]; for(int i=0; i<3; i++, dt+=dr*dr) dr=dom.r[i]-e.r[i]; if(h.t<(sqrt(dt)-OMR)*w->ocm) flag=false; } if(flag){ uint j = atom_add(&ed->hidx, 1); if(je.sf){ xi=(1-xi)/(1-e.sf); xi=2*xi-1; if(e.g!=0){ float ga=(1-e.g2)/(1+e.g*xi); xi=(1+e.g2-ga*ga)/(2*e.g); } } else{ xi/=e.sf; xi=2*pow(xi, e.gr)-1; } if(xi>1) xi=1; else if(xi<-1) xi=-1; float si=sqrt(1-xi*xi); turn(xi, si, n, s); } } } { ez->rs[s[3]]=s[0] | (ulong) s[1] << 32; barrier(CLK_LOCAL_MEM_FENCE); write_mem_fence(CLK_GLOBAL_MEM_FENCE); } } #endif #ifdef SHRT )); #endif