Passing structs to CUDA kernels

Question

I'm new to CUDA C, and am trying to pass a typedef'd struct into a kernel. My method worked fine when I tried it with a struct containing only ints, but when I switch to floats I get meaningless numbers back as results. I assume this has to do with alignment, and I tried including __align__ along with my type declaration, but to no avail. Can someone give me an example of how this is done, or provide an alternative approach? I'm trying to set it up so that I can easily add or remove fields without changing anything other than the struct and the kernel. My code:

typedef struct __align__(8)
{
    float a, b;
} point;

__global__ void testKernel(point *p)
{
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    p[i].a = 1.1;
    p[i].b = 2.2;
}

int main(void)
{
        // set number of points 
    int numPoints    = 16,
        gpuBlockSize = 4,
        pointSize    = sizeof(point),
        numBytes     = numPoints * pointSize,
        gpuGridSize  = numPoints / gpuBlockSize;

        // allocate memory
    point *cpuPointArray = new point[numPoints],
          *gpuPointArray = new point[numPoints];
    cpuPointArray = (point*)malloc(numBytes);
    cudaMalloc((void**)&gpuPointArray, numBytes);

        // launch kernel
    testKernel<<<gpuGridSize,gpuBlockSize>>>(gpuPointArray);

        // retrieve the results
    cudaMemcpy(cpuPointArray, gpuPointArray, numBytes, cudaMemcpyDeviceToHost);
    printf("testKernel results:
");
    for(int i = 0; i < numPoints; ++i)
    {
        printf("point.a: %d, point.b: %d
",cpuPointArray[i].a,cpuPointArray[i].b);
    }

        // deallocate memory
    free(cpuPointArray);
    cudaFree(gpuPointArray);

    return 0;
}

Paul · Accepted Answer

Since there doesn't appear to be any decent documentation on how to do this, I thought I'd post the final, revised code here. It turns out that the __align__ part was unnecessary as well, the actual problem was the use of %d in the printf when trying to print floats.

#include <stdlib.h>
#include <stdio.h>

typedef struct
{
    float a, b;
} point;

__global__ void testKernel(point *p)
{
    int i = blockIdx.x * blockDim.x + threadIdx.x;
    p[i].a = 1.1;
    p[i].b = 2.2;
}

int main(void)
{
        // set number of points 
    int numPoints    = 16,
        gpuBlockSize = 4,
        pointSize    = sizeof(point),
        numBytes     = numPoints * pointSize,
        gpuGridSize  = numPoints / gpuBlockSize;

        // allocate memory
    point *cpuPointArray,
          *gpuPointArray;
    cpuPointArray = (point*)malloc(numBytes);
    cudaMalloc((void**)&gpuPointArray, numBytes);

        // launch kernel
    testKernel<<<gpuGridSize,gpuBlockSize>>>(gpuPointArray);

        // retrieve the results
    cudaMemcpy(cpuPointArray, gpuPointArray, numBytes, cudaMemcpyDeviceToHost);
    printf("testKernel results:
");
    for(int i = 0; i < numPoints; ++i)
    {
        printf("point.a: %f, point.b: %f
",cpuPointArray[i].a,cpuPointArray[i].b);
    }

        // deallocate memory
    free(cpuPointArray);
    cudaFree(gpuPointArray);

    return 0;
}

Passing structs to CUDA kernels

Tags:

alignment

struct

cuda

Paul

1 Answers

Paul

Recent Activity

Donate For Us

Passing structs to CUDA kernels

Tags:

alignment

struct

cuda

Paul

1 Answers

Paul

Related questions

Recent Activity

Donate For Us