I am facing a problem in copying struct data from host to device in the CUDA architecture.
Following is the code snippet.
struct point  
{  
     double x,y;  
};
int main()  
{  
   point * a = (point*)malloc(sizeof(point));  
   a->x=10.0;   
   a->y=10.0;    
   point * d_a;  
   cudaMalloc((void**)d_a,sizeof(point));  
   cudaMemcpy((void**)d_a,a,sizeof(point),cudaMemcpyHostToDevice);  
   dim3 dimblock(16,16);  
   dim3 dimgrid(1,1);  
   MyFunc<<<dimgrid,dimblock>>>(d_a);  
   cudaMemcpy((void**)a,d_a,sizeof(point),cudaMemcpyDeviceToHost);    
   printf("%lf %lf\n",a->x,a->y);
}  
__global__ void MyFunc(point* d_a)  
{  
     if(threadIdx.x == 0 && threadIdx.y == 0)
     {  
        d_a->x=100.0;  
        d_a->y = 100.0;    
     }
}  
The x and y fields of point a should have been changed to 100. Instead, it is still 10 as initialized. What is happening here? Please help.
The syntax of both cudaMemcpy() calls is incorrect, they should be
cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice);
and
cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost);    
EDIT:
This:
#include <cstdio>
#include <cstdlib>
struct point  
{  
     double x,y;  
};
__global__ void MyFunc(point* d_a)  
{  
     if(threadIdx.x == 0 && threadIdx.y == 0)
     {  
        d_a->x=100.0;  
        d_a->y = 100.0;    
     }
}  
int main(void)  
{  
   point * a = (point*)malloc(sizeof(point));  
   a->x=10.0;   
   a->y=10.0;    
   point * d_a;  
   cudaMalloc((void**)&d_a,sizeof(point));  
   cudaMemcpy(d_a,a,sizeof(point),cudaMemcpyHostToDevice);  
   dim3 dimblock(16,16);  
   dim3 dimgrid(1,1);  
   MyFunc<<<dimgrid,dimblock>>>(d_a);  
   cudaMemcpy(a,d_a,sizeof(point),cudaMemcpyDeviceToHost);    
   printf("%lf %lf\n",a->x,a->y);
   return cudaThreadExit();
} 
works precisely as expected with CUDA 3.2 running on 64 bit linux:
cuda:~$ nvcc -arch=sm_20 -o bungle bungle.cu 
cuda:~$ ./bungle 
100.000000 100.000000
So if you cannot replicate this, then something is probably wrong with your CUDA installation.
To conclude and extend the answers of Anycorn and talonmies:
(void**)&d_a in malloc(void**) in memcpycudaGetLastError and return values.cudaFree
cudaSetDevice and cudaThreadExit won't hurt.See the reference manual and the progamming guide for more details.
check your cuda statuses:
cudaMalloc((void**)&d_a,sizeof(point));  
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With