Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Numerical Error in simple CUDA code

I just started experimenting cuda with the following cude

#include "macro.hpp"
#include <algorithm>
#include <iostream>
#include <cstdlib>

//#define double float
//#define double int

int RandomNumber(){return static_cast<double>(rand() % 1000);}

__global__ void sum3(double const* a,
             double const* b,
             double const* c,
             double * result, 
             unsigned const* n)
{    
   unsigned i = blockIdx.x;
   while(i < (*n))
{
  result[i] = (a[i] + b[i] + c[i]);
}
};


int main()
{

  static unsigned size = 1e2;
  srand(0);
  double* a = new double[size];
  double* b = new double[size];
  double* c = new double[size];
  double* result = new double[size];

  std::generate(a, a+size, RandomNumber);
  std::generate(b, b+size, RandomNumber);
  std::generate(c, c+size, RandomNumber);

  double* ad, *bd,* cd;
  double* resultd;

  unsigned * sized;
  std::cout << cudaMalloc((void**) &ad, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &bd, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &cd, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &resultd, size*sizeof(double)) << std::endl;
  std::cout << cudaMalloc((void**) &sized, sizeof(unsigned)) << std::endl;

  cudaMemcpy((void**) &sized, &size, sizeof(unsigned), cudaMemcpyHostToDevice);

  //  print_array(a, size);
  for(int i = 0; i < 1000; ++i)
    {
      cudaMemcpy(ad, a, size*sizeof(double), cudaMemcpyHostToDevice);
      cudaMemcpy(bd, b, size*sizeof(double), cudaMemcpyHostToDevice);
      cudaMemcpy(cd, c, size*sizeof(double), cudaMemcpyHostToDevice);      
      sum3<<<size, 1>>>(ad, bd, cd, resultd, sized);
      cudaMemcpy(result, resultd, size*sizeof(double), cudaMemcpyDeviceToHost);
    }

#ifdef PRINT
  for( int i = 0; i < size; ++i)
    {
      std::cout << a[i] << ", "<< b[i] <<"," << c[i] << "," << result[i]<< std::endl;
    }
#endif

  cudaFree(ad);
  cudaFree(bd);
  cudaFree(cd);
  cudaFree(resultd);

  delete[] a;
  delete[] b;
  delete[] c;
  delete[] result;

  return 0;
}

Compile this on mac book pro without any problem. However when I try to run this I get

930, 22,538,899
691, 832,205,23
415, 655,148,120
872, 876,481,985
761, 909,583,619
841, 104,466,917
610, 635,911,52
//more useless numbers

I have compared my samples with the one in Cuda By Example and I dont see any major difference except type. Any pointer on this problem is appreciated.

like image 283
leon Avatar asked Dec 19 '25 06:12

leon


1 Answers

while(i < (*n))
{
  result[i] = (a[i] + b[i] + c[i]);
}

is wrong (infinite)

this is wrong

cudaMemcpy((void**) &sized, &size, sizeof(unsigned), cudaMemcpyHostToDevice);

&sized is address of pointer variable, not pointer value

Single number can be passed to device on the stack, so use

unsigned size

check return status of your cuda functions, http://www.drdobbs.com/high-performance-computing/207603131

like image 171
Anycorn Avatar answered Dec 20 '25 20:12

Anycorn



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!