Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Passing structs to CUDA kernels

I'm new to CUDA C, and am trying to pass a typedef'd struct into a kernel. My method worked fine when I tried it with a struct containing only ints, but when I switch to floats I get meaningless numbers back as results. I assume this has to do with alignment, and I tried including __align__ along with my type declaration, but to no avail. Can someone give me an example of how this is done, or provide an alternative approach? I'm trying to set it up so that I can easily add or remove fields without changing anything other than the struct and the kernel. My code:

typedef struct __align__(8)
{
    float a, b;
} point;

__global__ void testKernel(point *p)
{
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    p[i].a = 1.1;
    p[i].b = 2.2;
}

int main(void)
{
        // set number of points 
    int numPoints    = 16,
        gpuBlockSize = 4,
        pointSize    = sizeof(point),
        numBytes     = numPoints * pointSize,
        gpuGridSize  = numPoints / gpuBlockSize;

        // allocate memory
    point *cpuPointArray = new point[numPoints],
          *gpuPointArray = new point[numPoints];
    cpuPointArray = (point*)malloc(numBytes);
    cudaMalloc((void**)&gpuPointArray, numBytes);

        // launch kernel
    testKernel<<<gpuGridSize,gpuBlockSize>>>(gpuPointArray);

        // retrieve the results
    cudaMemcpy(cpuPointArray, gpuPointArray, numBytes, cudaMemcpyDeviceToHost);
    printf("testKernel results:\n");
    for(int i = 0; i < numPoints; ++i)
    {
        printf("point.a: %d, point.b: %d\n",cpuPointArray[i].a,cpuPointArray[i].b);
    }

        // deallocate memory
    free(cpuPointArray);
    cudaFree(gpuPointArray);

    return 0;
}
like image 264
Paul Avatar asked Nov 14 '10 08:11

Paul


1 Answers

Since there doesn't appear to be any decent documentation on how to do this, I thought I'd post the final, revised code here. It turns out that the __align__ part was unnecessary as well, the actual problem was the use of %d in the printf when trying to print floats.

#include <stdlib.h>
#include <stdio.h>

typedef struct
{
    float a, b;
} point;

__global__ void testKernel(point *p)
{
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    p[i].a = 1.1;
    p[i].b = 2.2;
}

int main(void)
{
        // set number of points 
    int numPoints    = 16,
        gpuBlockSize = 4,
        pointSize    = sizeof(point),
        numBytes     = numPoints * pointSize,
        gpuGridSize  = numPoints / gpuBlockSize;

        // allocate memory
    point *cpuPointArray,
          *gpuPointArray;
    cpuPointArray = (point*)malloc(numBytes);
    cudaMalloc((void**)&gpuPointArray, numBytes);

        // launch kernel
    testKernel<<<gpuGridSize,gpuBlockSize>>>(gpuPointArray);

        // retrieve the results
    cudaMemcpy(cpuPointArray, gpuPointArray, numBytes, cudaMemcpyDeviceToHost);
    printf("testKernel results:\n");
    for(int i = 0; i < numPoints; ++i)
    {
        printf("point.a: %f, point.b: %f\n",cpuPointArray[i].a,cpuPointArray[i].b);
    }

        // deallocate memory
    free(cpuPointArray);
    cudaFree(gpuPointArray);

    return 0;
}
like image 115
Paul Avatar answered Sep 18 '22 12:09

Paul