1 #include "mandelcuda.h"
2 #include <cuda_runtime.h>
3 #include <cuda_gl_interop.h>
4 #include "helper_cuda.h"
7 // The dimensions of the thread block
10 #define ABS(n) ((n) < 0 ? -(n) : (n))
12 void MandelCuda::init_dev()
15 // int dev_id = findCudaDevice(argc, (const char **)argv);
16 int dev_id = gpuGetMaxGflopsDeviceId();
17 checkCudaErrors(cudaSetDevice(dev_id));
18 cudaDeviceProp deviceProp;
19 checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev_id));
20 printf("GPU Device %d: \"%s\" with compute capability %d.%d\n",
21 dev_id, deviceProp.name, deviceProp.major, deviceProp.minor);
22 version = deviceProp.major * 10 + deviceProp.minor;
23 numSMs = deviceProp.multiProcessorCount;
24 if( !numSMs ) numSMs = -1;
27 void MandelCuda::init(int pbo, int pw, int ph)
29 if( pbo_id >= 0 ) return;
30 pbo_id = pbo; pbo_w = pw; pbo_h = ph;
31 checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_pbo, pbo_id, cudaGraphicsMapFlagsNone));
32 checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo, 0));
34 checkCudaErrors(cudaGraphicsResourceGetMappedPointer(&pbo_mem, &pbo_bytes, cuda_pbo));
37 void MandelCuda::finish()
41 checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo));
43 cudaGraphicsUnregisterResource(cuda_pbo); cuda_pbo = 0;
47 MandelCuda::MandelCuda()
56 MandelCuda::~MandelCuda()
60 static inline int iDivUp(int a, int b)
63 return a % b ? v+1 : v;
66 // Determine if two pixel colors are within tolerance
67 __device__ inline int CheckColors(const uchar4 &color0, const uchar4 &color1)
69 int x = color1.x - color0.x;
70 if( ABS(x) > 10 ) return 1;
71 int y = color1.y - color0.y;
72 if( ABS(y) > 10 ) return 1;
73 int z = color1.z - color0.z;
74 if( ABS(z) > 10 ) return 1;
79 // The core MandelCuda calculation function template
80 template<class T> __device__
81 inline int CalcCore(const int n, T ix, T iy, T xC, T yC)
84 T xx = x * x, yy = y * y;
86 while( --i && (xx + yy < 4.0f) ) {
87 y = x * y + x * y + yC ; // 2*x*y + yC
96 template<class T> __global__
97 void Calc(uchar4 *dst, const int img_w, const int img_h, const int is_julia,
98 const int crunch, const int gridWidth, const int numBlocks,
99 const T x_off, const T y_off, const T x_julia, const T y_julia, const T scale,
100 const uchar4 colors, const int frame, const int animationFrame)
102 // loop until all blocks completed
103 for( unsigned int bidx=blockIdx.x; bidx<numBlocks; bidx+=gridDim.x ) {
104 unsigned int blockX = bidx % gridWidth;
105 unsigned int blockY = bidx / gridWidth;
106 const int x = blockDim.x * blockX + threadIdx.x;
107 const int y = blockDim.y * blockY + threadIdx.y;
108 if( x >= img_w || y >= img_h ) continue;
109 int pi = img_w*y + x, n = !frame ? 1 : 0;
110 uchar4 pixel = dst[pi];
112 n += CheckColors(pixel, dst[pi-1]);
113 if( !n && x+1 < img_w )
114 n += CheckColors(pixel, dst[pi+1]);
116 n += CheckColors(pixel, dst[pi-img_w]);
117 if( !n && y+1 < img_h )
118 n += CheckColors(pixel, dst[pi+img_w]);
121 const T tx = T(x) * scale + x_off;
122 const T ty = T(y) * scale + y_off;
123 const T ix = is_julia ? tx : 0;
124 const T iy = is_julia ? ty : 0;
125 const T xC = is_julia ? x_julia : tx;
126 const T yC = is_julia ? y_julia : ty;
127 int m = CalcCore(crunch, ix,iy, xC,yC);
128 m = m > 0 ? crunch - m : 0;
129 if( m ) m += animationFrame;
132 color.x = m * colors.x;
133 color.y = m * colors.y;
134 color.z = m * colors.z;
137 int frame1 = frame+1, frame2 = frame1/2;
138 color.x = (pixel.x * frame + color.x + frame2) / frame1;
139 color.y = (pixel.y * frame + color.y + frame2) / frame1;
140 color.z = (pixel.z * frame + color.z + frame2) / frame1;
141 dst[pi] = color; // Output the pixel
146 void MandelCuda::Run(unsigned char *data, unsigned int size, int is_julia, int crunch,
147 double x_off, double y_off, double x_julia, double y_julia, double scale,
148 uchar4 colors, int pass, int animationFrame)
150 if( numSMs < 0 ) return;
151 checkCudaErrors(cudaMemcpy(pbo_mem, data, size, cudaMemcpyHostToDevice));
152 dim3 threads(BLOCKDIM_X, BLOCKDIM_Y);
153 dim3 grid(iDivUp(pbo_w, BLOCKDIM_X), iDivUp(pbo_h, BLOCKDIM_Y));
154 Calc<float><<<numSMs, threads>>>((uchar4 *)pbo_mem, pbo_w, pbo_h,
155 is_julia, crunch, grid.x, grid.x*grid.y,
156 float(x_off), float(y_off), float(x_julia), float(y_julia), float(scale),
157 colors, pass, animationFrame);
158 checkCudaErrors(cudaMemcpy(data, pbo_mem, size, cudaMemcpyDeviceToHost));