Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to use virtual class in cuda?

Tags:

c++

cuda

I wrote a simple cuda code to test if I can copy a vector of pointers to classes the GPU.

This is what I have:

test.hpp:

class Class {
public:
    Class() {};
    virtual ~Class() {};

    __host__ __device__ int print() { return 42; };
    __host__ __device__ virtual int getClass() const = 0;
};                                                                                                                                                                                       

class AlphaClass : public Class
{
public:
    AlphaClass() {
      className = "Alpha";
      classAvgGrade = 90;
      classSize = 100;
      for(int i = 0; i < classSize; i++){
          classScores.push_back((90+i)%100);
      } 
    };
    ~AlphaClass() { };

    std::string className;
    int classAvgGrade;
    int classSize; 
    std::vector<int> classScores;

    __host__ __device__ void incClassSize(){
        classSize++;
    };

    __host__ __device__ virtual int getClass() const{
        return 0;
    }; 
};  


class BetaClass : public Class
{                                                                                                                                                                                        
public:
    BetaClass() {
      className = "Beta";
      classAvgGrade = 80;
      classSize = 120;
      for(int i = 0; i < classSize; i++){
          classScores.push_back((80+i)%100);
      } 
    }
    ~BetaClass() { };
    std::string className;
    int classAvgGrade;
    int classSize;
    std::vector<int> classScores;
    
    __host__ __device__ void incClassSize(){
        classSize++;
    }   

    __host__ __device__ virtual int getClass() const{
        return 1;
    };

};
    
    
class CudaClass : public Class
{     
public:
    CudaClass() {
      className = "Cuda";
      classAvgGrade = 70;
      classSize = 110;
      for(int i = 0; i < classSize; i++){
          classScores.push_back(70);
      }   
    };
    ~CudaClass() {
        //do nothing
    };
    
    std::string className;
    int classAvgGrade;
    int classSize;
    std::vector<int> classScores;
    
    __host__ __device__ void incClassSize(){
        classSize++;
    };
 };

test.cpp:

struct GlobalConstants {
    Class** classList;
};  

__constant__ GlobalConstants cuConstRaytracerParams;


   __global__ void useClass()
  {

    Class** deviceClassList = cuConstRaytracerParams.classList;
    AlphaClass* alpha = (AlphaClass*) deviceClassList[0];
    BetaClass* beta = (BetaClass*) deviceClassList[1];
    CudaClass* cuda = (CudaClass*) deviceClassList[2];

    printf("%s\n", alpha->className);
    printf("%s\n", beta->className); 
    printf("%s\n", cuda->ClassName);

    printf("alpha avg = %d\n", alpha->classAvgGrade);
    printf("beta avg = %d\n", beta->classAvgGrade);
    printf("cuda avg = %d\n", cuda->classAvgGrade);

  };  


...

  

    AlphaClass *alpha;
    alpha = new AlphaClass();
    BetaClass *beta;
    beta = new BetaClass();
    CudaClass *cuda;
    cuda = new CudaClass();
    std::vector<Class*> classes;
    classes.push_back(alpha);
    classes.push_back(beta);
    classes.push_back(cuda);
    
    AlphaClass* alpha_ptr;
    BetaClass* beta_ptr;
    CudaClass* cuda_ptr;
        
    // copy classes to cuda
    thrust::device_vector<Class*> deviceClassList;
    for(int i = 0; i < classes.size(); i++){
        if(classes[i]->getClass() == 0){
            cudaMalloc(&alpha_ptr, sizeof(AlphaClass));
            cudaMemcpy(alpha_ptr, &classes[i],sizeof(AlphaClass), cudaMemcpyHostToDevice);
            deviceClassList.push_back(alpha_ptr);

        }else if(classes[i]->getClass() == 1){
            cudaMalloc(&beta_ptr, sizeof(BetaClass));
            cudaMemcpy(beta_ptr, &classes[i],sizeof(BetaClass), cudaMemcpyHostToDevice);
            deviceClassList.push_back(beta_ptr);

            
        }else if(classes[i]->getClass() == 2){
            cudaMalloc(&cuda_ptr, sizeof(CudaClass));
            cudaMemcpy(cuda_ptr, &classes[i],sizeof(CudaClass), cudaMemcpyHostToDevice);
            deviceClassList.push_back(cuda_ptr);

        }else{
            //do nothing
        }
    }
    Class** class_ptr = thrust::raw_pointer_cast(&deviceClassList[0]);
          
    //ADD IT TO CUDA PARAM STRUCT
    GlobalConstants params;
    params.classList = class_ptr;
    cudaMemcpyToSymbol(cuConstRaytracerParams, &params, sizeof(GlobalConstants));

    useClass<<<1,1>>>();
    cudaDeviceSynchronize();

    ...cleanup code

When i run this i don't get the correct value and get the following results:

alpha avg = 39696816

beta avg = 70

cuda avg = 0

And i don't get any results for string.

like image 791
MoneyBall Avatar asked Sep 03 '25 04:09

MoneyBall


1 Answers

The OP raises several questions. The main question is in the title "How to use virtual class in cuda?". An unrelated question is how to use strings in cuda code. I will focus mainly on the question in the title.

According to the cuda c programming guide you can use virtual functions but with limitations. The limitation you run into is

It is not allowed to pass as an argument to a __global__ function an object of a class derived from virtual base classes.

In your example code you try to avoid this by passing the object (array of device pointers) via constant memory. However I think the programming guide is just not precise here. I think it is not possible to copy an object of a class derived from virtual base classes to device. The problem is (as far as I understand) that you will copy the host virtual function table to device.

The example code is much too complicated (and has other problems) to demonstrate the behaviour. The following more simplified code shows what you can do with virtual functions in cuda:

#include <stdio.h>

class Class
{
public:
    __host__ __device__ virtual int getNumber() = 0;
    __host__ __device__ virtual ~Class() {};
};

class ClassA: public Class
{
public:
    int aNumber;
    __host__ __device__ ClassA(): aNumber(0){}

    __host__ __device__ int getNumber()
    {
        return aNumber;
    }
};

class ClassB: public Class
{
public:
    int aNumber;
    int anotherNumber;
    __host__ __device__ ClassB(): aNumber(1), anotherNumber(2){}

    __host__ __device__ int getNumber()
    {
        return aNumber;
    }
};

__global__ void invalidClassKernel( Class* superClass )
{
    printf( "superClass->getNumber(): %d\n", superClass->getNumber() );
}

__global__ void validClassKernel()
{
    Class* classVector[2];
    classVector[0] = new ClassA();
    classVector[1] = new ClassB();

    printf( "classVector[0]->getNumber(): %d\n", classVector[0]->getNumber() );
    printf( "classVector[1]->getNumber(): %d\n", classVector[1]->getNumber() );

    delete classVector[0];
    delete classVector[1];
}

int main()
{
    ClassA hostClassA;
    ClassB hostClassB;

    ClassA* devClassA;
    ClassA* devClassB;
    cudaMalloc( &devClassA, sizeof(ClassA) );
    cudaMalloc( &devClassB, sizeof(ClassB) );
    cudaMemcpy( devClassA, &hostClassA, sizeof( ClassA ), cudaMemcpyHostToDevice );
    cudaMemcpy( devClassB, &hostClassB, sizeof( ClassB ), cudaMemcpyHostToDevice );

    validClassKernel<<<1,1>>>();
    cudaDeviceSynchronize();
    cudaError_t error = cudaGetLastError();
    if(error!=cudaSuccess)
    {
        fprintf(stderr,"ERROR: validClassKernel: %s\n", cudaGetErrorString(error) );
    }

    invalidClassKernel<<<1,1>>>( devClassA );
    cudaDeviceSynchronize();
    error = cudaGetLastError();
    if(error!=cudaSuccess)
    {
        fprintf(stderr,"ERROR: invalidClassKernel: %s\n", cudaGetErrorString(error) );
    }
}

The validClassKernel() shows how you can store pointers of derived objects in an array of base class pointers and access the virtual function getNumber(). In this example the objects are created in device code.

The invalidClassKernel() shows that you cannot use in device code a copy of an object derived from a virtual base class that was created on the host. The code compiles but the kernel fails with an illegal memory access was encountered. Very likely this is the main problem in the original example code.


Other problems:

  • You cannot use std::string in device code, see this question Can we use the string data type in C++ within kernels. As a solution you could use a char array of constant size. The same applies to std::vector (classScores).
like image 175
havogt Avatar answered Sep 04 '25 20:09

havogt