#include <cuda.h>
#include <cuda_runtime.h>
#include <iostream>
#include <vector>
using namespace std;
__global__ void kernel(vector <int> *a,vector <int> *b, vector <int> *c)
{
int index=threadIdx.x;
c[index]=a[index]+b[index];
};
int main ()
{
//Paar constanten initializeren
int N=3;
vector <int> a (3,1);
vector <int> b (3,1);
vector <int> c (3,0);
int size=N*sizeof(vector <int>);
//Device variabelen
vector <int> d_a;
vector <int> d_b;
vector <int> d_c;
//<size> bytes device-geheugen vrijmaken op locatie &a
cudaMalloc( (void **) &a, size);
cudaMalloc ( (void **) &b, size);
cudaMalloc ( (void **) &c, size);
//Kopieer inputs van &a naar &d_a
cudaMemcpy(&d_a,&a,size,cudaMemcpyHostToDevice);
cudaMemcpy(&d_b,&b,size,cudaMemcpyHostToDevice);
printf( (char *) a[1]);
printf( (char *) c[1]);
//Voer kernel uit
kernel<<<N,1>>>(&d_a,&d_b,&d_c);
//Kopieer resultaat terug naar host
cudaMemcpy(&c,&d_c,size,cudaMemcpyDeviceToHost);
printf( (char *) c[1]);
return 0;
};
So this is my basic C++ vector addition program, though it won't compile because it doesn't know what to do with the "+" when adding indexes ("no operator "+" matches these operands"). Anyone have an idea why this doesn't work inside the kernel? I think it may have to do with the fact that it's a host function or something like that, though I wouldn't know how to get around that problem.
Greetings and thanks in advance.
You can't use the STL in device code ... you're going to have to use an array or some other CUDA construct like the Thrust Library.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With