This project is read-only.

Longer FORs takes much time if using allocated variables

Dec 16, 2014 at 10:05 PM
Hi.
I've been testing cudafy and it's taking some troubles with allocated variables.
public const int N1 = 1000000;
public const int N2 = 6;

{
    CudafyModule km = CudafyTranslator.Cudafy();
    GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
    gpu.LoadModule(km);

    byte[,] g = new byte[N1, N2];
    uint[] r = new uint[N1];
    
    byte[,] dev_g = gpu.Allocate<byte>(g);
    uint[] dev_r = gpu.Allocate<uint>(r);
    
    gpu.CopyToDevice(g, dev_g);
    
    gpu.LaunchrunTest(dev_g, dev_r, 4);
    
    gpu.CopyFromDevice(dev_r, r);
    
    gpu.FreeAll();
}
[Cudafy]
public static void runTest(byte[,] g, uint[] r, int elements)
{
    uint counter;
    byte equals;

    for (int i = 0; i < N1; i++)
    {
        counter = 0;

        for (int j = 0; j < N1; j++)
        {
            equals = 0;

            for (int k1 = 0; k1 < N2; k1++)
            {
                for (int k2 = 0; k2 < N2; k2++)
                {
                    if (g[i, k1] == g[j, k2])
                    {
                        equals++;
                        break;
                    }
                }
            }

            if (equals >= elements)
                counter++;
        }

        r[i] = counter;
    }
}
If a comment:
//r[i] = counter;
The application runs faster, but without comment mark it takes a lot of time to run.
Jan 1, 2015 at 4:15 PM
Hi
Given the nature of your kernel, I wouldn't be surprised that removing that last line of code would make it optimized by the compiler to such an extent that it would end up being an empty method, hence very fast.