Using global structs in device code

Nov 11, 2012 at 1:06 AM
Edited Nov 11, 2012 at 1:07 AM

Here is a simple example that was giving me problems:

using System;
using System.Diagnostics;
using System.Linq;
using Cudafy;
using Cudafy.Host;
using Cudafy.Translator;

namespace CudafyTuningTsp {
   public class TypeTest  {
      protected const int _threadsPerBlock   = 256;
      protected const int _blocksPerGrid      =  256;

      [Cudafy]
      public struct AnswerStruct { public float distance; public long pathNo; } 

      internal Answer GetAnswer() {
         using (var gpu         = CudafyHost.GetDevice()) { 
            gpu.LoadModule(CudafyTranslator.Cudafy());

            var answer         = new AnswerStruct[_blocksPerGrid];;
            var gpuAnswer      = gpu.Allocate(answer);

            gpu.Launch(_blocksPerGrid, _threadsPerBlock,
               GpuFindPathDistance,   gpuAnswer);

            gpu.Synchronize();
            gpu.CopyFromDevice(gpuAnswer,answer);
            gpu.FreeAll();

            var bestDistance      = float.MaxValue;
            var bestPermutation   = 0L;
            for (var i = 0; i < _blocksPerGrid; i++) {
               if (answer[i].distance < bestDistance) {
                  bestDistance      = answer[i].distance;
                  bestPermutation   = answer[i].pathNo;
               }
            }

            return new AnswerBetter { 
               Distance      = bestDistance, 
               Permutation   = bestPermutation, 
               msLoadTime   = 0, 
               msRunTime   = 0
            };
         }
      }

      [Cudafy]
      public static void GpuFindPathDistance(GThread thread, AnswerStruct[] answer) {
         var answerLocal      = thread.AllocateShared<AnswerStruct> ("ansL",   _threadsPerBlock);

         var bestDistance      = float.MaxValue;
         var bestPermutation   = 0L;

         answerLocal[thread.threadIdx.x].distance   = bestDistance;
         answerLocal[thread.threadIdx.x].pathNo      = bestPermutation;
         thread.SyncThreads();

         if (thread.threadIdx.x == 0) {
            answer[thread.blockIdx.x]               = answerLocal[0];
         }
      }
   }
}

The AnswerStruct in the AllocateShared call was retaining the period member operator, though the declaration and use in the method prototype wasn't. Assuming the latter behaviour correct, I believe the fix is the following (one line) change in CUDALanguage.cs:

  public static string TranslateSpecialType(string declaringType)
  {
      declaringType = NormalizeDeclaringType(declaringType);
      if (SpecialTypes.ContainsKey(declaringType))
      {
          var stp = SpecialTypes[declaringType];
          if (!string.IsNullOrEmpty(stp.OptionalHeader))
              UseOptionalHeader(stp.OptionalHeader);
          return stp.Name;
      }
      else
          // 2012-11-10 PG: was: return declaringType;
          return declaringType.Replace(".","");
  }

I have tested this with CudaByExample, CudaExamples, and my CudaTuning tutorials without issue.

Pieter

Coordinator
Nov 13, 2012 at 9:02 AM
Edited Nov 13, 2012 at 9:19 AM

Thanks Pieter.  Nested structs were not recommended as they were not tested in all circumstances.  If this fix does the job then I'll include it.

- Looks good!  Thanks.