plugins/mandelcuda/mandelcuda.cu

 1 #include "mandelcuda.h"
 2 #include <cuda_runtime.h>
 3 #include <cuda_gl_interop.h>
 4 #include "helper_cuda.h"
 5 #include "helper_gl.h"
 6
 7 // The dimensions of the thread block
 8 #define BLOCKDIM_X 16
 9 #define BLOCKDIM_Y 16
 10 #define ABS(n) ((n) < 0 ? -(n) : (n))
 11
 12 void MandelCuda::init_dev()
 13 {
 14         if( numSMs ) return;
 15 //      int dev_id = findCudaDevice(argc, (const char **)argv);
 16         int dev_id = gpuGetMaxGflopsDeviceId();
 17         checkCudaErrors(cudaSetDevice(dev_id));
 18         cudaDeviceProp deviceProp;
 19         checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev_id));
 20 printf("GPU Device %d: \"%s\" with compute capability %d.%d\n",
 21   dev_id, deviceProp.name, deviceProp.major, deviceProp.minor);
 22         version = deviceProp.major * 10 + deviceProp.minor;
 23         numSMs = deviceProp.multiProcessorCount;
 24         if( !numSMs ) numSMs = -1;
 25 }
 26
 27 void MandelCuda::init(int pbo, int pw, int ph)
 28 {
 29         if( pbo_id >= 0 ) return;
 30         pbo_id = pbo;  pbo_w = pw;  pbo_h = ph;
 31         checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_pbo, pbo_id, cudaGraphicsMapFlagsNone));
 32         checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo, 0));
 33         size_t pbo_bytes = 0;
 34         checkCudaErrors(cudaGraphicsResourceGetMappedPointer(&pbo_mem, &pbo_bytes, cuda_pbo));
 35 }
 36
 37 void MandelCuda::finish()
 38 {
 39         pbo_id = -1;
 40         pbo_w = pbo_h = 0;
 41         checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo));
 42         pbo_mem = 0;
 43         cudaGraphicsUnregisterResource(cuda_pbo);  cuda_pbo = 0;
 44 }
 45
 46
 47 MandelCuda::MandelCuda()
 48 {
 49         version = 0;
 50         numSMs = 0;
 51         pbo_id = -1;
 52         pbo_w = pbo_h = 0;
 53         cuda_pbo = 0;
 54         pbo_mem = 0;
 55 }
 56 MandelCuda::~MandelCuda()
 57 {
 58 }
 59
 60 static inline int iDivUp(int a, int b)
 61 {
 62     int v = a / b;
 63     return a % b ? v+1 : v;
 64 }
 65
 66 // Determine if two pixel colors are within tolerance
 67 __device__ inline int CheckColors(const uchar4 &color0, const uchar4 &color1)
 68 {
 69         int x = color1.x - color0.x;
 70         if( ABS(x) > 10 ) return 1;
 71         int y = color1.y - color0.y;
 72         if( ABS(y) > 10 ) return 1;
 73         int z = color1.z - color0.z;
 74         if( ABS(z) > 10 ) return 1;
 75         return 0;
 76 }
 77
 78
 79 // The core MandelCuda calculation function template
 80 template<class T> __device__
 81 inline int CalcCore(const int n, T ix, T iy, T xC, T yC)
 82 {
 83     T x = ix, y = iy;
 84     T xx = x * x, yy = y * y;
 85     int i = n;
 86     while( --i && (xx + yy < 4.0f) ) {
 87         y = x * y +  x * y + yC ;  // 2*x*y + yC
 88         x = xx - yy + xC ;
 89         yy = y * y;
 90         xx = x * x;
 91     }
 92
 93     return i;
 94 }
 95
 96 template<class T> __global__
 97 void Calc(uchar4 *dst, const int img_w, const int img_h, const int is_julia,
 98                 const int crunch, const int gridWidth, const int numBlocks,
 99                 const T x_off, const T y_off, const T x_julia, const T y_julia, const T scale,
 100                 const uchar4 colors, const int frame, const int animationFrame)
 101 {
 102         // loop until all blocks completed
 103         for( unsigned int bidx=blockIdx.x; bidx<numBlocks; bidx+=gridDim.x ) {
 104                 unsigned int blockX = bidx % gridWidth;
 105                 unsigned int blockY = bidx / gridWidth;
 106                 const int x = blockDim.x * blockX + threadIdx.x;
 107                 const int y = blockDim.y * blockY + threadIdx.y;
 108                 if( x >= img_w || y >= img_h ) continue;
 109                 int pi = img_w*y + x, n = !frame ? 1 : 0;
 110                 uchar4 pixel = dst[pi];
 111                 if( !n && x > 0 )
 112                         n += CheckColors(pixel, dst[pi-1]);
 113                 if( !n && x+1 < img_w )
 114                         n += CheckColors(pixel, dst[pi+1]);
 115                 if( !n && y > 0 )
 116                         n += CheckColors(pixel, dst[pi-img_w]);
 117                 if( !n && y+1 < img_h )
 118                         n += CheckColors(pixel, dst[pi+img_w]);
 119                 if( !n ) continue;
 120
 121                 const T tx = T(x) * scale + x_off;
 122                 const T ty = T(y) * scale + y_off;
 123                 const T ix = is_julia ? tx : 0;
 124                 const T iy = is_julia ? ty : 0;
 125                 const T xC = is_julia ? x_julia : tx;
 126                 const T yC = is_julia ? y_julia : ty;
 127                 int m = CalcCore(crunch, ix,iy, xC,yC);
 128                 m = m > 0 ? crunch - m : 0;
 129                 if( m ) m += animationFrame;
 130
 131                 uchar4 color;
 132                 color.x = m * colors.x;
 133                 color.y = m * colors.y;
 134                 color.z = m * colors.z;
 135                 color.w = 0;
 136
 137                 int frame1 = frame+1, frame2 = frame1/2;
 138                 color.x = (pixel.x * frame + color.x + frame2) / frame1;
 139                 color.y = (pixel.y * frame + color.y + frame2) / frame1;
 140                 color.z = (pixel.z * frame + color.z + frame2) / frame1;
 141                 dst[pi] = color; // Output the pixel
 142         }
 143 }
 144
 145
 146 void MandelCuda::Run(unsigned char *data, unsigned int size, int is_julia, int crunch,
 147                 double x_off, double y_off, double x_julia, double y_julia, double scale,
 148                 uchar4 colors, int pass, int animationFrame)
 149 {
 150         if( numSMs < 0 ) return;
 151         checkCudaErrors(cudaMemcpy(pbo_mem, data, size, cudaMemcpyHostToDevice));
 152         dim3 threads(BLOCKDIM_X, BLOCKDIM_Y);
 153         dim3 grid(iDivUp(pbo_w, BLOCKDIM_X), iDivUp(pbo_h, BLOCKDIM_Y));
 154         Calc<float><<<numSMs, threads>>>((uchar4 *)pbo_mem, pbo_w, pbo_h,
 155                         is_julia, crunch, grid.x, grid.x*grid.y,
 156                         float(x_off), float(y_off), float(x_julia), float(y_julia), float(scale),
 157                         colors, pass, animationFrame);
 158         checkCudaErrors(cudaMemcpy(data, pbo_mem, size, cudaMemcpyDeviceToHost));
 159 }
 160