#include #include #include #include #include #include #ifdef XCPU #include #include #endif using namespace std; namespace xppc{ #ifdef XLIB struct begin{ begin(){ cerr<type=type; eh->hidx=0; for(int m=0; m<3; m++) eh->r[m]=0; } } void fin(){ if(eh->type==0) delete d.pz; delete d.hits; delete eh; } #else float deviceTime=0; cudaDeviceProp prop; cudaStream_t stream; void checkError(cudaError result){ if(result!=cudaSuccess){ cerr<<"CUDA Error: "<0?prop.multiProcessorCount:WNUM; pmax=nblk*NTHR*NPHO; pmxo=pmax/OVER; } { checkError(cudaStreamCreate(&stream)); checkError(cudaMallocHost((void**) &eh, sizeof(dats))); *eh=d; checkError(cudaMallocHost((void**) &d.hits, d.hnum*sizeof(hit))); } { pn=0; eh->type=type; eh->hidx=0; for(int m=0; m<3; m++) eh->r[m]=0; } { unsigned int size=d.rsize, need=nblk*NTHR; if(sizersize=need; } { unsigned int size=eh->rsize*sizeof(unsigned int); tot+=size; checkError(cudaMalloc((void**) &eh->rm, size)); checkError(cudaMemcpy(eh->rm, d.rm, size, cudaMemcpyHostToDevice)); } { unsigned int size=eh->rsize*sizeof(unsigned long long); tot+=size; checkError(cudaMalloc((void**) &eh->rs, size)); checkError(cudaMemcpy(eh->rs, d.rs, size, cudaMemcpyHostToDevice)); } for(int i=0; iw[i], size)); checkError(cudaMemcpy(eh->w[i], d.w[i], size, cudaMemcpyHostToDevice)); } { unsigned int size=d.hnum*sizeof(hit); tot+=size; checkError(cudaMalloc((void**) &eh->hits, size)); } if(eh->type==0){ unsigned int size=pmxo*sizeof(photon); tot+=size; checkError(cudaMallocHost((void**) &d.pz, size)); checkError(cudaMalloc((void**) &eh->pz, size)); } { unsigned int size=d.gsize*sizeof(DOM); checkError(cudaMemcpyToSymbol(oms, d.oms, size)); } { unsigned int size=sizeof(dats); tot+=size; checkError(cudaMalloc((void**) &ed, size)); checkError(cudaMemcpy(ed, eh, size, cudaMemcpyHostToDevice)); } cerr<<"Total GPU memory usage: "<rm)); checkError(cudaFree(eh->rs)); for(int i=0; iw[i])); checkError(cudaFree(eh->hits)); checkError(cudaFree(ed)); if(eh->type==0){ checkError(cudaFree(eh->pz)); checkError(cudaFreeHost(d.pz)); } checkError(cudaFreeHost(d.hits)); checkError(cudaFreeHost(eh)); checkError(cudaStreamDestroy(stream)); } void listDevices(){ int deviceCount, driver, runtime; cudaGetDeviceCount(&deviceCount); cudaDriverGetVersion(&driver); cudaRuntimeGetVersion(&runtime); fprintf(stderr, "\nFound %d devices, driver %d, runtime %d\n", deviceCount, driver, runtime); for(int device=0; devicehidx=0; #ifdef XCPU for(blockIdx.x=0, gridDim.x=nblk, blockDim.x=NTHR; blockIdx.x>>(ed, num); checkError(cudaGetLastError()); checkError(cudaEventRecord(evt2, stream)); checkError(cudaMemcpyAsync(eh, ed, sizeof(int), cudaMemcpyDeviceToHost, stream)); checkError(cudaStreamSynchronize(stream)); checkError(cudaEventElapsedTime(&dt, evt1, evt2)); deviceTime+=dt; checkError(cudaEventDestroy(evt1)); checkError(cudaEventDestroy(evt2)); } #endif cerr<<"photons: "<hidx<hidx>=eh->hnum){ eh->hidx=eh->hnum; cerr<<"Error: data buffer overflow occurred!"<hidx*sizeof(hit); checkError(cudaMemcpyAsync(d.hits, eh->hits, size, cudaMemcpyDeviceToHost, stream)); checkError(cudaStreamSynchronize(stream)); } #endif print(); } void flasher(int str, int dom, unsigned long long num, int itr){ int type=1; float r[3]={0, 0, 0}; if(str<0){ type=2; str=-str; } if(str==0) switch(dom){ case 1: type=3; r[0]=544.07; r[1]=55.89; r[2]=136.86; break; case 2: type=4; r[0]=11.87; r[1]=179.19; r[2]=-205.64; break; } else for(int n=0; nfla=d.fla; #endif eh->hidx=0; for(int m=0; m<3; m++) eh->r[m]=r[m]; #ifndef XCPU checkError(cudaMemcpyAsync(ed, eh, 4*sizeof(int), cudaMemcpyHostToDevice, stream)); #endif for(int j=0; j0; i-=pmax) kernel(min(i, (long long) pmax)); if(itr>0) printf("\n"); } fin(); } void output(){ #ifndef XCPU { unsigned int size=pn*sizeof(photon); checkError(cudaMemcpyAsync(eh->pz, d.pz, size, cudaMemcpyHostToDevice, stream)); } #endif kernel(pn*OVER); pn=0; } #ifdef XCPU void start(){} void stop(){} void choose(int device){ #ifdef MKOW srand(device); #endif seed=device; } void listDevices(){} #else void start(){ cudaSetDeviceFlags(cudaDeviceBlockingSync); } void stop(){ fprintf(stderr, "\nDevice time: %2.1f [ms]\n", deviceTime); checkError(cudaThreadExit()); } void choose(int device){ #ifdef MKOW srand(device); #endif checkError(cudaSetDevice(device)); checkError(cudaGetDeviceProperties(&prop, device)); } #endif } #ifndef XLIB using namespace xppc; int main(int arg_c, char *arg_a[]){ start(); if(arg_c<=1){ listDevices(); fprintf(stderr, "\nUse: %s [device] (f2k muons)\n" " %s [str] [om] [num] [device] (flasher)\n", arg_a[0], arg_a[0]); } else if(0==strcmp(arg_a[1], "-")){ ices * w = d.w[d.nfla]; cerr<<"For wv slice "<<(d.nfla+1)<<"/"<z[i].abs<<" "<z[i].sca<1) device=atoi(arg_a[1]); choose(device); fprintf(stderr, "Processing f2k muons from stdin on device %d\n", device); f2k(); } else{ int str=0, dom=0, device=0, itr=0; unsigned long long num=1000000ULL; if(arg_c>1) str=atoi(arg_a[1]); if(arg_c>2) dom=atoi(arg_a[2]); if(arg_c>3){ num=(unsigned long long) atof(arg_a[3]); char * sub = strchr(arg_a[3], '*'); if(sub!=NULL) itr=(int) atof(++sub); } if(arg_c>4) device=atoi(arg_a[4]); choose(device); fprintf(stderr, "Running flasher simulation on device %d\n", device); flasher(str, dom, (unsigned long long)(num*(long double)(EFF)), itr); } stop(); } #endif