Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Copying struct data from host to device on CUDA using cudaMemcpy

Tags:

struct

cuda

I am facing a problem in copying struct data from host to device in the CUDA architecture.
Following is the code snippet.

struct point  
{  
     double x,y;  
};

int main()  
{  
   point * a = (point*)malloc(sizeof(point));  
   a->x=10.0;   
   a->y=10.0;    
   point * d_a;  
   cudaMalloc((void**)d_a,sizeof(point));  
   cudaMemcpy((void**)d_a,a,sizeof(point),cudaMemcpyHostToDevice);  
   dim3 dimblock(16,16);  
   dim3 dimgrid(1,1);  

   MyFunc<<<dimgrid,dimblock>>>(d_a);  
   cudaMemcpy((void**)a,d_a,sizeof(point),cudaMemcpyDeviceToHost);    
   printf("%lf %lf\n",a->x,a->y);
}  

__global__ void MyFunc(point* d_a)  
{  
     if(threadIdx.x == 0 && threadIdx.y == 0)
     {  
        d_a->x=100.0;  
        d_a->y = 100.0;    
     }
}  

The x and y fields of point a should have been changed to 100. Instead, it is still 10 as initialized. What is happening here? Please help.

like image 538
Vikesh Avatar asked May 12 '11 21:05

Vikesh


3 Answers

The syntax of both cudaMemcpy() calls is incorrect, they should be

cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice);

and

cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost);    

EDIT:

This:

#include <cstdio>
#include <cstdlib>

struct point  
{  
     double x,y;  
};

__global__ void MyFunc(point* d_a)  
{  
     if(threadIdx.x == 0 && threadIdx.y == 0)
     {  
        d_a->x=100.0;  
        d_a->y = 100.0;    
     }
}  

int main(void)  
{  
   point * a = (point*)malloc(sizeof(point));  
   a->x=10.0;   
   a->y=10.0;    
   point * d_a;  
   cudaMalloc((void**)&d_a,sizeof(point));  
   cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice);  
   dim3 dimblock(16,16);  
   dim3 dimgrid(1,1);  

   MyFunc<<<dimgrid,dimblock>>>(d_a);  
   cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost);    
   printf("%lf %lf\n",a->x,a->y);

   return cudaThreadExit();
} 

works precisely as expected with CUDA 3.2 running on 64 bit linux:

cuda:~$ nvcc -arch=sm_20 -o bungle bungle.cu 
cuda:~$ ./bungle 
100.000000 100.000000

So if you cannot replicate this, then something is probably wrong with your CUDA installation.

like image 96
talonmies Avatar answered Nov 11 '22 19:11

talonmies


To conclude and extend the answers of Anycorn and talonmies:

  1. Use an additional ampersand like (void**)&d_a in malloc
  2. Don't use (void**) in memcpy
  3. Make sure to check for errors with cudaGetLastError and return values.
  4. Make sure to free allocated resources at the end with cudaFree
  5. Also cudaSetDevice and cudaThreadExit won't hurt.

See the reference manual and the progamming guide for more details.

like image 43
Jonas Bötel Avatar answered Nov 11 '22 19:11

Jonas Bötel


check your cuda statuses:

cudaMalloc((void**)&d_a,sizeof(point));  
like image 1
Anycorn Avatar answered Nov 11 '22 19:11

Anycorn