So I have the following code:
File: Cuda.cu
template <typename T>
__global__ void xpy( int n, T *x, T *y, T *r )
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < n) r[i] = x[i] + y[i];
}
mtx_mtx_add( float *a1, float *a2, float *r, const int &numElements )
{
// snip
xpy<<<numBlocks, blockSize>>>(numElements, a1, a2, r);
}
mtx_mtx_add( int *a1, int *a2, int *r, const int &numElements ) {:::}
mtx_mtx_add( long long *a1, long long *a2, long long *r, const int &numElements ) {:::}
File: Calling Code
extern "C" bool mtx_mtx_add( float *a1, float *a2, float *r, int &numElements );
extern "C" bool mtx_mtx_add( float *a1, float *a2, float *r, int &numElements );
extern "C" bool mtx_mtx_add( float *a1, float *a2, float *r, int &numElements );
int main()
{
... ...
mtx_mtx_add(...);
}
Now what I want is for the mtx_mtx_add function to be templated. Is this possible and if so how?
Programming in CUDA is basically C++. You can use all the features of the C++ language as you would use in a standard C++ program.
You can create the function template as follows:
template<typename T>
bool mtx_mtx_add(T *a1, T *a2, T *r, const int &numElements)
{
xpy<T><<<numBlocks, blockSize>>>(numElements, a1, a2, r);
}
Then you can specialize the function template for different data-types as:
template bool mtx_mtx_add<float>(float* a1, float* a2, float* r, const int& numElements);
template bool mtx_mtx_add<int>(int* a1, int* a2, int* r, const int& numElements);
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With