Emulator vs real CUDA mode - different values

Mar 15, 2012 at 7:21 PM

Hi,

I'm trying to write small application which draws mandelbrot set on PictureBox component (Windows Forms).
The problem is that for Emulator mode everything is good (that is - I see beautiful fractal), but in CUDA mode i'm getting wrong results (and I have no what is going on - please help .
Here's a fragment of code:

        private Bitmap getMandelbrotSetPictureCUDA(int maxIterations, double xmin, double xmax, double ymin, double ymax, int imageWidth, int imageHeight)
        {
            //this is from Cudafy examples from CudaByExample
            Bitmap bmp = new Bitmap(imageWidth, imageHeight, PixelFormat.Format32bppArgb);
            Rectangle rect = new Rectangle(0, 0, bmp.Width, bmp.Height);
            BitmapData bmpData = bmp.LockBits(rect, ImageLockMode.ReadWrite, bmp.PixelFormat);

            IntPtr ptr = bmpData.Scan0;

            int bytes = bmpData.Stride * bmp.Height;
            byte[] rgbValues = new byte[bytes];

            System.Runtime.InteropServices.Marshal.Copy(ptr, rgbValues, 0, bytes);

            createMandelbrotSetWithCUDA(rgbValues, maxIterations, xmin, xmax, ymin, ymax, 0, 0, imageWidth, imageHeight);

            System.Runtime.InteropServices.Marshal.Copy(rgbValues, 0, ptr, bytes);
            
            bmp.UnlockBits(bmpData);
            return bmp;


        }
        private static void createMandelbrotSetWithCUDA(byte[] ptr,int maxIterations, double xrmin, double xrmax, double yrmin, double yrmax,int xs0,int ys0,int swidth, int sheight)
        {
            CudafyModule km = CudafyTranslator.Cudafy();


            GPGPU gpu = CudafyHost.GetDevice(eGPUType.Emulator);
            
            gpu.LoadModule(km);

            byte[] dev_bitmap = gpu.Allocate<byte>(ptr.Length);
           
            //calculating scalling coefficients (transforming screen coordinates to real coordinates)
            double EE = -((-xrmax + xrmin) / (double)swidth);
            double FF = -((-swidth * xrmin + xrmax * xs0 - xrmin * xs0) / (double)swidth);
            double GG = -((yrmax - yrmin) / sheight);
            double HH = -((-sheight * yrmax - yrmax * ys0 + yrmin * ys0) / (double)sheight);

            gpu.Launch(new dim3(swidth, swidth), 1).kernel(dev_bitmap, maxIterations, EE,FF,GG,HH);

            gpu.CopyFromDevice(dev_bitmap, ptr);
            gpu.FreeAll();

        }
        [Cudafy]
        public static void kernel(GThread thread, byte[] ptr,int maxIterations,double EE,double FF,double GG,double HH)
        {
            int xs = thread.blockIdx.x;
            int ys = thread.blockIdx.y;
            int offset = xs + ys * thread.gridDim.x;

            int r = 0;
            int g = 0;
            int b = 0;

            int iteration = 0;
            double x0, y0, x, y, xtemp;
            x = 0;
            y = 0;

            x0 = EE * (xs) + FF;
            y0 = GG * (ys) + HH;


            while ((x * x + y * y) < (2 * 2) && iteration < maxIterations)
            {
                xtemp = x * x - y * y + x0;
                y = 2 * x * y + y0;
                x = xtemp;
                iteration = iteration + 1;
            }
            if (iteration == maxIterations)
            {
                r = 0;
                g = 0;
                b = 0;
            }

            else
            {
                Colors col = HsvToRgb((iteration * 2) % 360, 1, 2);
                r = col.r;
                g = col.g;
                b = col.b;

            }
            ptr[offset * 4 + 0] = (byte)r;
            ptr[offset * 4 + 1] = (byte)g;
            ptr[offset * 4 + 2] = (byte)b;
            ptr[offset * 4 + 3] = 255;                         
        }
 
[Cudafy]
        public struct Colors
        {
            public int r;
            public int g;
            public int b;

            public Colors(int rr, int gg, int bb)
            {
                r = rr;
                g = gg;
                b = bb;
            }


        }

        [Cudafy]
        public static Colors HsvToRgb(double h, double S, double V)
        {
            // ######################################################################
            // T. Nathan Mundhenk
            // mundhenk@usc.edu
            // C/C++ Macro HSV to RGB

            double H = h;
            while (H < 0) { H += 360; };
            while (H >= 360) { H -= 360; };
            double R, G, B;
            if (V <= 0)
            { R = G = B = 0; }
            else if (S <= 0)
            {
                R = G = B = V;
            }
            else
            {
                double hf = H / 60.0;
                int i = (int)Math.Floor(hf);
                double f = hf - i;
                double pv = V * (1 - S);
                double qv = V * (1 - S * f);
                double tv = V * (1 - S * (1 - f));
                switch (i)
                {

                    // Red is the dominant color

                    case 0:
                        R = V;
                        G = tv;
                        B = pv;
                        break;

                    // Green is the dominant color

                    case 1:
                        R = qv;
                        G = V;
                        B = pv;
                        break;
                    case 2:
                        R = pv;
                        G = V;
                        B = tv;
                        break;

                    // Blue is the dominant color

                    case 3:
                        R = pv;
                        G = qv;
                        B = V;
                        break;
                    case 4:
                        R = tv;
                        G = pv;
                        B = V;
                        break;

                    // Red is the dominant color

                    case 5:
                        R = V;
                        G = pv;
                        B = qv;
                        break;

                    // Just in case we overshoot on our math by a little, we put these here. Since its a switch it won't slow us down at all to put these here.

                    case 6:
                        R = V;
                        G = tv;
                        B = pv;
                        break;
                    case -1:
                        R = V;
                        G = pv;
                        B = qv;
                        break;

                    // The color is not defined, we should throw an error.

                    default:
                        //LFATAL("i Value error in Pixel conversion, Value is %d", i);
                        R = G = B = V; // Just pretend its black/white
                        break;
                }
            }

            Colors col;
            
            
            col.r = Clamp((int)(R * 255.0));
            col.g = Clamp((int)(G * 255.0));
            col.b = Clamp((int)(B * 255.0));

            return col;
        }


        [Cudafy]
        public static int Clamp(int i)
        {
            if (i < 0) return 0;
            if (i > 255) return 255;
            return i;
        }

getMandelbrotSetPictureCUDA is called as: 
getMandelbrotSetPictureCUDA(1000,-2.1, 1, -1.1, 1.1, 600, 600);

Jacob
Coordinator
Mar 16, 2012 at 1:33 PM

Can you post the generate CUDA file?  See your application directory.  

Mar 16, 2012 at 2:38 PM

 

struct Form1Colors
{
	__device__ Form1Colors()
	{
	}
	int r;
	int g;
	int b;
	__device__ Form1Colors(int rr, int gg, int bb)
	{
		r = rr;
		g = gg;
		b = bb;
	}
};


// ParallelMandelbrot.Form1
extern "C" __global__ void kernel(unsigned char* ptr, int ptrLen0, int maxIterations, double EE, double FF, double GG, double HH);
// ParallelMandelbrot.Form1
__device__ Form1Colors HsvToRgb(double h, double S, double V);
// ParallelMandelbrot.Form1
__device__ int Clamp(int i);

// ParallelMandelbrot.Form1
extern "C" __global__ void kernel(unsigned char* ptr, int ptrLen0, int maxIterations, double EE, double FF, double GG, double HH)
{
	int x = blockIdx.x;
	int y = blockIdx.y;
	int num = x + y * gridDim.x;
	int num2 = 0;
	int num3 = 0;
	int num4 = 0;
	int num5 = 0;
	double num6 = 0.0;
	double num7 = 0.0;
	double num8 = EE * (double)x + FF;
	double num9 = GG * (double)y + HH;
	while (num6 * num6 + num7 * num7 < 4.0 && num5 < maxIterations)
	{
		double num10 = num6 * num6 - num7 * num7 + num8;
		num7 = 2.0 * num6 * num7 + num9;
		num6 = num10;
		num5++;
	}
	if (num5 == maxIterations)
	{
		num2 = 0;
		num3 = 0;
		num4 = 0;
	}
	else
	{
		Form1Colors colors = HsvToRgb((double)(num5 * 2 % 360), 1.0, 2.0);
		num2 = colors.r;
		num3 = colors.g;
		num4 = colors.b;
	}
	ptr[(num * 4)] = (unsigned char)num2;
	ptr[(num * 4 + 1)] = (unsigned char)num3;
	ptr[(num * 4 + 2)] = (unsigned char)num4;
	ptr[(num * 4 + 3)] = 255;
}
// ParallelMandelbrot.Form1
__device__ Form1Colors HsvToRgb(double h, double S, double V)
{
	double num;
	for (num = h; num < 0.0; num += 360.0)
	{
	}
	while (num >= 360.0)
	{
		num -= 360.0;
	}
	double num4;
	double num3;
	double num2;
	if (V <= 0.0)
	{
		num2 = (num3 = (num4 = 0.0));
	}
	else
	{
		if (S <= 0.0)
		{
			num4 = V;
			num2 = V;
			num3 = V;
		}
		else
		{
			double num5 = num / 60.0;
			int num6 = (int)floor(num5);
			double num7 = num5 - (double)num6;
			double num8 = V * (1.0 - S);
			double num9 = V * (1.0 - S * num7);
			double num10 = V * (1.0 - S * (1.0 - num7));
			switch (num6)
			{
				case -1:
				{
					num3 = V;
					num2 = num8;
					num4 = num9;
					break;
				}
				case 0:
				{
					num3 = V;
					num2 = num10;
					num4 = num8;
					break;
				}
				case 1:
				{
					num3 = num9;
					num2 = V;
					num4 = num8;
					break;
				}
				case 2:
				{
					num3 = num8;
					num2 = V;
					num4 = num10;
					break;
				}
				case 3:
				{
					num3 = num8;
					num2 = num9;
					num4 = V;
					break;
				}
				case 4:
				{
					num3 = num10;
					num2 = num8;
					num4 = V;
					break;
				}
				case 5:
				{
					num3 = V;
					num2 = num8;
					num4 = num9;
					break;
				}
				case 6:
				{
					num3 = V;
					num2 = num10;
					num4 = num8;
					break;
				}
				default:
				{
					num4 = V;
					num2 = V;
					num3 = V;
					break;
				}
			}
		}
	}
	Form1Colors result;
	result.r = Clamp((int)(num3 * 255.0));
	result.g = Clamp((int)(num2 * 255.0));
	result.b = Clamp((int)(num4 * 255.0));
	return result;
}
// ParallelMandelbrot.Form1
__device__ int Clamp(int i)
{
	int result;
	if (i < 0)
	{
		result = 0;
	}
	else
	{
		if (i > 255)
		{
			result = 255;
		}
		else
		{
			result = i;
		}
	}
	return result;
}

Mar 19, 2012 at 6:54 PM

I resolved problem by changing all doubles to floats.

 

4jacob4

Coordinator
Mar 19, 2012 at 7:19 PM

What CUDA architecture does your GPU support?