I wrote a simple cuda code to test if I can copy a vector of pointers to classes the GPU.
This is what I have:
test.hpp:
class Class {
public:
Class() {};
virtual ~Class() {};
__host__ __device__ int print() { return 42; };
__host__ __device__ virtual int getClass() const = 0;
};
class AlphaClass : public Class
{
public:
AlphaClass() {
className = "Alpha";
classAvgGrade = 90;
classSize = 100;
for(int i = 0; i < classSize; i++){
classScores.push_back((90+i)%100);
}
};
~AlphaClass() { };
std::string className;
int classAvgGrade;
int classSize;
std::vector<int> classScores;
__host__ __device__ void incClassSize(){
classSize++;
};
__host__ __device__ virtual int getClass() const{
return 0;
};
};
class BetaClass : public Class
{
public:
BetaClass() {
className = "Beta";
classAvgGrade = 80;
classSize = 120;
for(int i = 0; i < classSize; i++){
classScores.push_back((80+i)%100);
}
}
~BetaClass() { };
std::string className;
int classAvgGrade;
int classSize;
std::vector<int> classScores;
__host__ __device__ void incClassSize(){
classSize++;
}
__host__ __device__ virtual int getClass() const{
return 1;
};
};
class CudaClass : public Class
{
public:
CudaClass() {
className = "Cuda";
classAvgGrade = 70;
classSize = 110;
for(int i = 0; i < classSize; i++){
classScores.push_back(70);
}
};
~CudaClass() {
//do nothing
};
std::string className;
int classAvgGrade;
int classSize;
std::vector<int> classScores;
__host__ __device__ void incClassSize(){
classSize++;
};
};
test.cpp:
struct GlobalConstants {
Class** classList;
};
__constant__ GlobalConstants cuConstRaytracerParams;
__global__ void useClass()
{
Class** deviceClassList = cuConstRaytracerParams.classList;
AlphaClass* alpha = (AlphaClass*) deviceClassList[0];
BetaClass* beta = (BetaClass*) deviceClassList[1];
CudaClass* cuda = (CudaClass*) deviceClassList[2];
printf("%s\n", alpha->className);
printf("%s\n", beta->className);
printf("%s\n", cuda->ClassName);
printf("alpha avg = %d\n", alpha->classAvgGrade);
printf("beta avg = %d\n", beta->classAvgGrade);
printf("cuda avg = %d\n", cuda->classAvgGrade);
};
...
AlphaClass *alpha;
alpha = new AlphaClass();
BetaClass *beta;
beta = new BetaClass();
CudaClass *cuda;
cuda = new CudaClass();
std::vector<Class*> classes;
classes.push_back(alpha);
classes.push_back(beta);
classes.push_back(cuda);
AlphaClass* alpha_ptr;
BetaClass* beta_ptr;
CudaClass* cuda_ptr;
// copy classes to cuda
thrust::device_vector<Class*> deviceClassList;
for(int i = 0; i < classes.size(); i++){
if(classes[i]->getClass() == 0){
cudaMalloc(&alpha_ptr, sizeof(AlphaClass));
cudaMemcpy(alpha_ptr, &classes[i],sizeof(AlphaClass), cudaMemcpyHostToDevice);
deviceClassList.push_back(alpha_ptr);
}else if(classes[i]->getClass() == 1){
cudaMalloc(&beta_ptr, sizeof(BetaClass));
cudaMemcpy(beta_ptr, &classes[i],sizeof(BetaClass), cudaMemcpyHostToDevice);
deviceClassList.push_back(beta_ptr);
}else if(classes[i]->getClass() == 2){
cudaMalloc(&cuda_ptr, sizeof(CudaClass));
cudaMemcpy(cuda_ptr, &classes[i],sizeof(CudaClass), cudaMemcpyHostToDevice);
deviceClassList.push_back(cuda_ptr);
}else{
//do nothing
}
}
Class** class_ptr = thrust::raw_pointer_cast(&deviceClassList[0]);
//ADD IT TO CUDA PARAM STRUCT
GlobalConstants params;
params.classList = class_ptr;
cudaMemcpyToSymbol(cuConstRaytracerParams, ¶ms, sizeof(GlobalConstants));
useClass<<<1,1>>>();
cudaDeviceSynchronize();
...cleanup code
When i run this i don't get the correct value and get the following results:
alpha avg = 39696816
beta avg = 70
cuda avg = 0
And i don't get any results for string.
The OP raises several questions. The main question is in the title "How to use virtual class in cuda?". An unrelated question is how to use strings in cuda code. I will focus mainly on the question in the title.
According to the cuda c programming guide you can use virtual functions but with limitations. The limitation you run into is
It is not allowed to pass as an argument to a __global__ function an object of a class derived from virtual base classes.
In your example code you try to avoid this by passing the object (array of device pointers) via constant memory. However I think the programming guide is just not precise here. I think it is not possible to copy an object of a class derived from virtual base classes to device. The problem is (as far as I understand) that you will copy the host virtual function table to device.
The example code is much too complicated (and has other problems) to demonstrate the behaviour. The following more simplified code shows what you can do with virtual functions in cuda:
#include <stdio.h>
class Class
{
public:
__host__ __device__ virtual int getNumber() = 0;
__host__ __device__ virtual ~Class() {};
};
class ClassA: public Class
{
public:
int aNumber;
__host__ __device__ ClassA(): aNumber(0){}
__host__ __device__ int getNumber()
{
return aNumber;
}
};
class ClassB: public Class
{
public:
int aNumber;
int anotherNumber;
__host__ __device__ ClassB(): aNumber(1), anotherNumber(2){}
__host__ __device__ int getNumber()
{
return aNumber;
}
};
__global__ void invalidClassKernel( Class* superClass )
{
printf( "superClass->getNumber(): %d\n", superClass->getNumber() );
}
__global__ void validClassKernel()
{
Class* classVector[2];
classVector[0] = new ClassA();
classVector[1] = new ClassB();
printf( "classVector[0]->getNumber(): %d\n", classVector[0]->getNumber() );
printf( "classVector[1]->getNumber(): %d\n", classVector[1]->getNumber() );
delete classVector[0];
delete classVector[1];
}
int main()
{
ClassA hostClassA;
ClassB hostClassB;
ClassA* devClassA;
ClassA* devClassB;
cudaMalloc( &devClassA, sizeof(ClassA) );
cudaMalloc( &devClassB, sizeof(ClassB) );
cudaMemcpy( devClassA, &hostClassA, sizeof( ClassA ), cudaMemcpyHostToDevice );
cudaMemcpy( devClassB, &hostClassB, sizeof( ClassB ), cudaMemcpyHostToDevice );
validClassKernel<<<1,1>>>();
cudaDeviceSynchronize();
cudaError_t error = cudaGetLastError();
if(error!=cudaSuccess)
{
fprintf(stderr,"ERROR: validClassKernel: %s\n", cudaGetErrorString(error) );
}
invalidClassKernel<<<1,1>>>( devClassA );
cudaDeviceSynchronize();
error = cudaGetLastError();
if(error!=cudaSuccess)
{
fprintf(stderr,"ERROR: invalidClassKernel: %s\n", cudaGetErrorString(error) );
}
}
The validClassKernel()
shows how you can store pointers of derived objects in an array of base class pointers and access the virtual function getNumber()
. In this example the objects are created in device code.
The invalidClassKernel()
shows that you cannot use in device code a copy of an object derived from a virtual base class that was created on the host. The code compiles but the kernel fails with an illegal memory access was encountered
. Very likely this is the main problem in the original example code.
Other problems:
char
array of constant size. The same applies to std::vector (classScores
).If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With