#include #include #include #include #include #include #include #ifndef __CUDACC__ #define XCPU #elif __CUDA_ARCH__ >= 120 #define USMA #endif #ifdef XCPU #include #include #endif using namespace std; namespace xppc{ #include "ini.cxx" #include "pro.cu" void initialize(){ m.set(); } #ifdef XLIB void igeo(float enh){ m.geo(); d.eff*=enh; } #endif dats *e; // pointer to a copy of "d" on device unsigned int pmax, pmxo, pn; int nblk, nthr, ntot; void ini(){ ntot=nblk*nthr; pmax=ntot*NPHO; d.hnum=pmax/HQUO; pmxo=pmax/OVER; pn=0; } #ifdef XCPU void ini(int type){ ini(); { d.hits = q.hits = new hit[d.hnum]; if(type==0) d.pz = q.pz = new photon[pmxo]; } { d.z=&z; e=&d; oms=q.oms; } { unsigned int size=d.rsize, need=seed+1; if(size0){ #ifdef XCPU d.hidx=0; for(d.blockIdx=0, d.gridDim=nblk, blockDim.x=nthr; d.blockIdx0){ cerr<<"Error: TOT was a nan or an inf "<=d.hnum){ d.hidx=d.hnum; cerr<<"Error: data buffer overflow occurred!"<0){ checkError(cudaEventRecord(evt1, stream)); propagate<<< 1, 1, 0, stream >>>(e, 0); checkError(cudaGetLastError()); propagate<<< nblk, nthr, 0, stream >>>(e, num); checkError(cudaGetLastError()); checkError(cudaEventRecord(evt2, stream)); } #endif if(old>0) print(); #ifndef XCPU old=num; #endif } void flini(int str, int dom){ int type=1; float r[3]={0, 0, 0}; if(str<0){ type=2; str=-str; } if(str==0) switch(dom){ case 1: type=3; r[0]=544.07; r[1]=55.89; r[2]=136.86; break; case 2: type=4; r[0]=11.87; r[1]=179.19; r[2]=-205.64; break; } else for(int n=0; n0; i-=pmax) kernel(min(i, (long long) pmax)); #ifndef XCPU kernel(0); #endif } void flasher(int str, int dom, unsigned long long num, int itr){ flini(str, dom); for(int j=0; j0) printf("\n"); } fin(); } void output(){ #ifndef XCPU { if(old>0) checkError(cudaStreamSynchronize(stream)); unsigned int size=pn*sizeof(photon); checkError(cudaMemcpyAsync(d.pz, q.pz, size, cudaMemcpyHostToDevice, stream)); } #endif kernel(pn*OVER); pn=0; #ifndef XCPU flnd=flne; #endif } #ifdef XCPU void start(){} void stop(){} void choose(int device){ sv+=device; seed=device; nblk=NBLK, nthr=NTHR; } void listDevices(){} #else void start(){ cudaSetDeviceFlags(cudaDeviceBlockingSync); } void stop(){ fprintf(stderr, "\nDevice time: %2.1f (in-kernel: %2.1f...%2.1f) [ms]\n", deviceTime, threadMin, threadMax); checkError(cudaThreadExit()); } void choose(int device){ sv+=device; checkError(cudaSetDevice(device)); checkError(cudaGetDeviceProperties(&prop, device)); #if CUDART_VERSION >= 3000 checkError(cudaFuncSetCacheConfig(propagate, cudaFuncCachePreferL1)); #endif cudaFuncAttributes attr; checkError(cudaFuncGetAttributes (&attr, propagate)); nblk=prop.multiProcessorCount, nthr=attr.maxThreadsPerBlock; cerr<<"Running on "<1) device=atoi(arg_a[1]); initialize(); choose(device); fprintf(stderr, "Processing f2k muons from stdin on device %d\n", device); f2k(); } else{ int str=0, dom=0, device=0, itr=0; unsigned long long num=1000000ULL; if(arg_c>1) str=atoi(arg_a[1]); if(arg_c>2) dom=atoi(arg_a[2]); if(arg_c>3){ num=(unsigned long long) atof(arg_a[3]); char * sub = strchr(arg_a[3], '*'); if(sub!=NULL) itr=(int) atof(++sub); } if(arg_c>4) device=atoi(arg_a[4]); initialize(); choose(device); fprintf(stderr, "Running flasher simulation on device %d\n", device); flasher(str, dom, (unsigned long long)llroundf(num*(long double)d.eff), itr); } stop(); } #endif