#include #include #include #include #include #include #include #include #include #include #ifdef __APPLE_CC__ #include #else #include #endif using namespace std; namespace xppc{ #define OFLA // omit the flasher DOM #define ROMB // use rhomb cells aligned with the array #define ASENS // enable angular sensitivity #define RAND // disable for deterministic results #define TILT // enable tilted ice layers #define ANIZ // enable anisotropic ice #define MKOW // photon yield parametrizations by M. Kowalski #define ANGW // smear cherenkov cone due to shower development #define LONG // simulate longitudinal cascade development #define CWLR // new parameterizations by C. Wiebusch and L. Raedel // requires that MKOW, ANGW, and LONG are all defined #ifdef ASENS #define ANUM 11 // number of coefficients in the angular sensitivity curve #endif #ifdef TILT #define LMAX 6 // number of dust loggers #define LYRS 170 // number of depth points #endif #ifdef ROMB #define DIR1 9.3 #define DIR2 129.3 #define CX 21 #define CY 19 #define NSTR 94 #else #define CX 13 #define CY 13 #define NSTR 94 #endif #define USMA // enable use of local shared memory #define XAMD // enable more OpenCL-specific optimizations #define OVER 10 // size of photon bunches along the muon track #define HQUO 16 // save at most photons/HQUO hits #define NPHO 1024 // maximum number of photons propagated by one thread #define WNUM 32 // number of wavelength slices #define MAXLYS 172 // maximum number of ice layers #define MAXGEO 5200 // maximum number of OMs #define MAXRND 131072 // max. number of random number multipliers #define XXX 1.e-5f #define FPI 3.141592653589793f #define OMR 0.16510f // DOM radius [m] #include "pro.cxx" #include "ini.cxx" #define SHRT #include "pro.cxx" #undef SHRT void initialize(float enh = 1.f){ m.set(); d.eff*=enh; } unsigned int pmax, pmxo, pn; bool xgpu=false; void checkError(cl_int result){ if(result!=CL_SUCCESS){ cerr<<"OpenCL Error: "< > all; struct gpu{ dats d; int device, mult; cl_uint nblk; size_t nthr, ntot; unsigned int npho, pmax, pmxo; unsigned int old, num; float deviceTime; cl_event event; cl_platform_id pfID; cl_device_id devID; cl_context ctx; cl_command_queue cq; cl_program program; cl_kernel clkernel; cl_mem ed, ez, eh, ep, eo; // pointers to structures on device string& replace(string& in, string old, string str){ string clx; int k=0, m=0; while((m=in.find(old, k))!=-1){ clx.append(in, k, m-k); k=m+old.length(); clx.append(str); } clx.append(in, k, in.length()-k); return in=clx; } gpu(int device) : deviceTime(0), old(0), npho(NPHO), mult(4){ this->device=device; { ostringstream o; o<<"NPHO_"<0){ mult=aux; cerr<<"Setting XMLT="<0) cerr<<"Error: TOT was a nan or an inf "<=d.hnum){ d.hidx=d.hnum; cerr<<"Error: data buffer overflow occurred!"<0){ unsigned int size=d.hidx*sizeof(hit); checkError(clEnqueueReadBuffer(cq, eh, CL_FALSE, 0, size, &q.hits[xppc::d.hidx], 0, NULL, NULL)); xppc::d.hidx+=d.hidx; } } void kernel_c(unsigned int & idx){ if(old>0) checkError(clFinish(cq)); unsigned int pn=num/OVER; if(pn>0){ unsigned int size=pn*sizeof(photon); checkError(clEnqueueWriteBuffer(cq, ep, CL_FALSE, 0, size, &q.pz[idx], 0, NULL, NULL)); idx+=pn; } } void kernel_f(){ checkError(clFinish(cq)); if(num>0){ unsigned int zero=0; checkError(clSetKernelArg(clkernel, 0, sizeof(unsigned int), &zero)); checkError(clEnqueueTask(cq, clkernel, 0, NULL, NULL)); checkError(clSetKernelArg(clkernel, 0, sizeof(unsigned int), &num)); checkError(clEnqueueNDRangeKernel(cq, clkernel, 1, NULL, &ntot, &nthr, 0, NULL, &event)); } } void stop(){ fprintf(stderr, "Device time: %2.1f [ms]\n", deviceTime); if(clkernel) checkError(clReleaseKernel(clkernel)); if(program) checkError(clReleaseProgram(program)); if(cq) checkError(clReleaseCommandQueue(cq)); if(ctx) checkError(clReleaseContext(ctx)); } }; vector gpus; void ini(int type){ d.hnum=0; pmax=0, pmxo=0, pn=0; for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++){ i->set(); i->ini(type); if(xgpu) sv++; d.hnum+=i->d.hnum; pmax+=i->pmax, pmxo+=i->pmxo; } { q.hits = new hit[d.hnum]; } { if(d.type==0) q.pz = new photon[pmxo]; } } void fin(){ for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++) i->set(), i->fin(); if(d.type==0) delete q.pz; delete q.hits; } void listDevices(){ cl_uint num; checkError(clGetPlatformIDs(0, NULL, &num)); fprintf(stderr, "Found %d platforms:\n", num); cl_platform_id ids[num]; checkError(clGetPlatformIDs(num, ids, NULL)); for(int i=0, n=0; i0){ d.hidx=0; for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++) i->set(), i->kernel_i(); cerr<<"photons: "<::iterator i=gpus.begin(); i!=gpus.end(); i++){ i->num=over*((num*(unsigned long long) i->pmax)/(over*(unsigned long long) pmax)); sum+=i->num; } while(num>sum){ static int res=0; gpu& g=gpus[res++%gpus.size()]; if(g.num::iterator i=gpus.begin(); i!=gpus.end(); i++) i->set(), i->kernel_c(idx); } for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++) i->set(), i->kernel_f(); if(old>0) print(); old=num; for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++) i->old=i->num; } float square(float x){ return x*x; } int flset(int str, int dom){ int type=1; float r[3]={0, 0, 0}; if(str<0){ type=2; str=-str; } if(str==0) switch(dom){ case 1: type=3; r[0]=544.07; r[1]=55.89; r[2]=136.86; break; case 2: type=4; r[0]=11.87; r[1]=179.19; r[2]=-205.64; break; } else for(int n=0; n0){ float FLZ, FLR; sincosf(fcv*30.f, &FLZ, &FLR); FLZ*=OMR, FLR*=OMR; r[0]+=FLR*n[0]; r[1]+=FLR*n[1]; r[2]+=FLZ; r[3]+=OMR*d.ocv; } float xi; sincosf(d.up, &n[2], &xi); n[0]*=xi; n[1]*=xi; } #endif void flone(unsigned long long num){ for(long long i=llroundf(num*(long double)d.eff); i>0; i-=pmax) kernel(min(i, (long long) pmax)); kernel(0); } void flasher(int str, int dom, unsigned long long num, int itr){ flini(str, dom); for(int j=0; j0) printf("\n"); } fin(); } void start(){ } void stop(){ fprintf(stderr, "\n"); for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++) i->set(), i->stop(); } void choose(int device){ listDevices(); int deviceCount=all.size(); if(device<0){ if(deviceCount==0){ cerr<<"Could not find compatible devices!"<=deviceCount){ cerr<<"Device #"<1; } #include "f2k.cxx" } #ifndef XLIB using namespace xppc; int main(int arg_c, char *arg_a[]){ start(); if(arg_c<=1){ listDevices(); fprintf(stderr, "Use: %s [device] (f2k muons)\n" " %s [str] [om] [num] [device] (flasher)\n", arg_a[0], arg_a[0]); } else if(arg_c<=2){ int device=0; if(arg_c>1) device=atoi(arg_a[1]); initialize(); choose(device); fprintf(stderr, "Processing f2k muons from stdin on device %d\n", device); f2k(); } else{ int str=0, dom=0, device=0, itr=0; unsigned long long num=1000000ULL; if(arg_c>1) str=atoi(arg_a[1]); if(arg_c>2) dom=atoi(arg_a[2]); if(arg_c>3){ num=(unsigned long long) atof(arg_a[3]); char * sub = strchr(arg_a[3], '*'); if(sub!=NULL) itr=(int) atof(++sub); } if(arg_c>4) device=atoi(arg_a[4]); initialize(); choose(device); fprintf(stderr, "Running flasher simulation on device %d\n", device); flasher(str, dom, num, itr); } stop(); } #endif