I have an existing application that uses a C++ class, a C++ wrapper, and FORTRAN code for the computationally intensive parts of the application. I would like to implement parts of the FORTRAN in CUDA to take advantage of parallelization, but I would still like to access some of the subroutines, so I need to link CUDA, C++, and FORTRAN code.
I have three questions: 1. How do I correctly link all of the object files with the Linux Terminal and with the Makefile (included below)? 2. What is the proper way to reference the CUDA function in the class header without confusing the compiler's recognition of device and host code? 3. Is passing a class to CUDA just like passing a class to any other external C code?
Note: I have not included full code (some of it is quite long) except for the Makefile. If I need to include more, please let me know.
.h file
#ifndef _DGCPM_H_
#define _DGCPM_H_
extern "C"{
#include <string.h>
#include <zlib.h>
#include <math.h>
}
/* Prototypes of Fortran subroutines */
extern "C" {
void initialize_(float *2Darray);
void advance_(float *2Darray);
//Want "advance" to be implemented in CUDA
}
/* Proper prototype of CUDA call? */
//extern "C" void cudaadvance(float *2Darray);
class DGCPM{
public:
DGCPM(); /* Initialized with defaults setup */
~DGCPM(); /* Free memory */
void advance(float dT); /* Advance model dT seconds */
private:
float **2Darray;
void initialize(float **2Darray);
};
#endif
.C wrapper
#include "../include/DGCPM.h"
DGCPM::DGCPM(){
initialize();
}
void DGCPM::advance(float dT){
advance_(2Darray[0]);
}
main.C file
#include <stdlib.h>
#include <stdio.h>
#include <zlib.h>
#include "../include/DGCPM.h"
int main(){
class DGCPM *model;
model=new class DGCPM();
//Write data to class from a file, then
for(int i=0;i<200;i++){
printf("%d\n",i);
model->advance(3600);
//write model state to file;
}
//Close file
return 0;
}
Makefile (Note: "pbo" is the FORTRAN code)
INSTALLDIR=../../lib/
FLAGS=-Wall -g -I ../../amj/include
CFLAGS=$(FLAGS)
CPPFLAGS=$(FLAGS)
FFLAGS=$(FLAGS)
CPP=g++
CC=gcc
FC=g77
PBO_PATH=../ober/for/
VPATH=$(PBO_PATH)
DGCPM_OBJ=DGCPM.o pbo.o
TESTDGCPM_OBJ=testDGCPM.o DGCPM.o pbo.o
ALL_OBJ=$(TESTDGCPM_OBJ)
install: all
mkdir -p $(INSTALLDIR)
cp libDGCPM.a $(INSTALLDIR)
all: libDGCPM.a testDGCPM
libDGCPM.a: $(DGCPM_OBJ)
ar rc $@ $^
testDGCPM: $(TESTDGCPM_OBJ)
$(CPP) -o $@ $^ -L ../../amj/lib -lamjMemory -lg2c -lz
clean:
- rm $(ALL_OBJ)
- rm $(INSTALLDIR)/libDGCPM.a
You currently don't have any CUDA code, so I may can't give enough details.
For your Qs:
gcc
for .c
, g++
for .cpp
, g77
for .f
and nvcc
for .cu
. Then you can use nvcc
to link all the object files .o
;__host__
and __device__
. It's your responsibility not to invoke device code from other host code;Here is an example from my project. The executable is built with 1 .cu
, 1 .cpp
, a few extern .a
as well as some .so
. For .cpp
I use Intel's compiler icpc
other than the default g++
. Please note my main()
is in the .cu
file.
# Compile : bin.cu/b-rbm-gpu.cu
nvcc -ftz true -ccbin icpc -Xcompiler "-Wall -Wno-long-long -ansi -pedantic -ansi-alias -parallel -fopenmp -openmp-link=static -static-intel -wd10237" -O3 -Xcompiler "-O3" -gencode arch=compute_20,code=sm_20 -Ilib -c -o bin.cu/b-rbm-gpu.o bin.cu/b-rbm-gpu.cu
# Compile : lib/KTiming.cpp
icpc -Wall -Wno-long-long -ansi -pedantic -ansi-alias -parallel -fopenmp -openmp-link=static -static-intel -wd10237 -O3 -MMD -Ilib -c -o lib/KTiming.o lib/KTiming.cpp
# Link : bin.cu/b-rbm-gpu
nvcc -ftz true -ccbin icpc -Xcompiler "-Wall -Wno-long-long -ansi -pedantic -ansi-alias -parallel -fopenmp -openmp-link=static -static-intel -wd10237" -O3 -Xcompiler "-O3" -Ilib -Llib bin.cu/b-rbm-gpu.o lib/KTiming.o -lpthread -lm /opt/intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_lp64.a /opt/intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_intel_thread.a /opt/intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_core.a /opt/intel/composer_xe_2013.1.117/mkl/lib/intel64/libmkl_core.a -lcublas -lcurand -lcusparse -o bin.cu/b-rbm-gpu
Here is the solution. To use the CUDA code, I reference it with, for example,
extern "C" void myfunction_(void)
In the header file, I add
void myfunction_(void);
in the extern "C" prototypes. In the public functions of the class I added
void mycudafunction(void);
In the C++ wrapper, I add
void DGCPM::mycudafunction(){
myfunction_();
}
I can now call "myfunction" from the main program with this type of syntax
model = new class DGCPM();
model->mycudafunction();
I modified my Makefile by adding myfunction.o to all of my objects and adding
-L /usr/local/cuda/lib -lcuda -lcudart
to all of my linking instructions.
To compile, create the CUDA object file (myfunction.o), and link, I type this in the terminal:
nvcc -c myfunction.cu
make
Here is the modified code:
.h file
#ifndef _DGCPM_H_
#define _DGCPM_H_
extern "C"{
#include <string.h>
#include <zlib.h>
#include <math.h>
}
/* Prototypes of Fortran subroutines */
extern "C" {
void initialize_(float *2Darray);
void advance_(float *2Darray);
/*CUDA prototype, can be changed to "cudaadvance" or the like*/
void myfunction_(void);
}
class DGCPM{
public:
DGCPM(); /* Initialized with defaults setup */
~DGCPM(); /* Free memory */
void advance(float dT); /* Advance model dT seconds */
void mycudafunction(void);
private:
float **2Darray;
void initialize(float **2Darray);
};
#endif
.C Wrapper
#include "../include/DGCPM.h"
DGCPM::DGCPM(){
initialize();
}
void DGCPM::advance(float dT){
advance_(2Darray[0]);
}
void DGCPM::mycudafunction(){
myfunction_();
}
main.C file
#include <stdlib.h>
#include <stdio.h>
#include <zlib.h>
#include "../include/DGCPM.h"
int main(){
class DGCPM *model;
model=new class DGCPM();
//Write data to class from a file, then
for(int i=0;i<200;i++){
printf("%d\n",i);
model->mycudafunction();
model->advance(3600);
//write model state to file;
}
//Close file
return 0;
}
Makefile
INSTALLDIR=../../lib/
FLAGS=-Wall -g -I ../../amj/include
CFLAGS=$(FLAGS)
CPPFLAGS=$(FLAGS)
FFLAGS=$(FLAGS)
CPP=g++
CC=gcc
FC=g77
PBO_PATH=../ober/for/
VPATH=$(PBO_PATH)
DGCPM_OBJ=DGCPM.o pbo.o myfunction.o
TESTDGCPM_OBJ=testDGCPM.o DGCPM.o pbo.o myfunction.o
ALL_OBJ=$(TESTDGCPM_OBJ)
install: all
mkdir -p $(INSTALLDIR)
cp libDGCPM.a $(INSTALLDIR)
all: libDGCPM.a testDGCPM
libDGCPM.a: $(DGCPM_OBJ)
ar rc $@ $^
testDGCPM: $(TESTDGCPM_OBJ)
$(CPP) -o $@ $^ -L ../../amj/lib -lamjMemory -lg2c -lz -L /usr/local/cuda/lib -lcuda -lcudart
clean:
- rm $(ALL_OBJ)
- rm $(INSTALLDIR)/libDGCPM.a
Here's the simple CUDA program I used to test.
#include <stdio.h>
__global__ void kernel( void ) {
}
extern "C" void myfunction_(void) {
kernel<<<1,1>>>();
printf( "Hello, World!\n" );
return;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With