What is the correct way of using a Vulkan VkImage as a CUDA cuArray?
I've been trying to follow some examples, however I get a CUDA_ERROR_INVALID_VALUE on a call to cuExternalMemoryGetMappedMipmappedArray()
To provide the information in an ordered way.
I'm using CUDA 10.1
Base code comes from https://github.com/SaschaWillems/Vulkan, in particular I'm using the 01 - Vulkan Gears demo, enriched with the saveScreenshot method 09 - Capturing screenshots
Instead of saving the snapshot image to a file, I'll be sending the snapshot image into CUDA as a CUarray.
I've enabled the following instance and device extensions:
std::vector<const char*> instanceExtensions = {
VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME };
std::vector<const char*> deviceExtensions = { VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME };
I have a VkImage, created as follows:
// Create the linear tiled destination image to copy to and to read the memory from
VkImageCreateInfo imageCreateCI(vks::initializers::imageCreateInfo());
imageCreateCI.imageType = VK_IMAGE_TYPE_2D;
// Note that vkCmdBlitImage (if supported) will also do format conversions if the swapchain color format would differ
imageCreateCI.format = VK_FORMAT_R8G8B8A8_UNORM;
imageCreateCI.extent.width = width;
imageCreateCI.extent.height = height;
imageCreateCI.extent.depth = 1;
imageCreateCI.arrayLayers = 1;
imageCreateCI.mipLevels = 1;
imageCreateCI.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageCreateCI.samples = VK_SAMPLE_COUNT_1_BIT;
imageCreateCI.tiling = VK_IMAGE_TILING_LINEAR;
imageCreateCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
imageCreateCI.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
VkExternalMemoryImageCreateInfoKHR extImageCreateInfo = {};
/*
* Indicate that the memory backing this image will be exported in an
* fd. In some implementations, this may affect the call to
* GetImageMemoryRequirements() with this image.
*/
extImageCreateInfo.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR;
extImageCreateInfo.handleTypes |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
imageCreateCI.pNext = &extImageCreateInfo;
// Create the image
VkImage dstImage;
VK_CHECK_RESULT(vkCreateImage(device, &imageCreateCI, nullptr, &dstImage));
// Create memory to back up the image
VkMemoryRequirements memRequirements;
VkMemoryAllocateInfo memAllocInfo(vks::initializers::memoryAllocateInfo());
VkDeviceMemory dstImageMemory;
vkGetImageMemoryRequirements(device, dstImage, &memRequirements);
memAllocInfo.allocationSize = memRequirements.size;
// Memory must be host visible to copy from
memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
VkExportMemoryAllocateInfoKHR exportInfo = {};
exportInfo.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR;
exportInfo.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
memAllocInfo.pNext = &exportInfo;
VK_CHECK_RESULT(vkAllocateMemory(device, &memAllocInfo, nullptr, &dstImageMemory));
VK_CHECK_RESULT(vkBindImageMemory(device, dstImage, dstImageMemory, 0));
From there I'll:
Get the Vulkan Memory Handler:
int CuEncoderImpl::getVulkanMemoryHandle(VkDevice device,
VkDeviceMemory memory) {
// Get handle to memory of the VkImage
int fd = -1;
VkMemoryGetFdInfoKHR fdInfo = { };
fdInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
fdInfo.memory = memory;
fdInfo.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
auto func = (PFN_vkGetMemoryFdKHR) vkGetDeviceProcAddr(device,
"vkGetMemoryFdKHR");
if (!func) {
printf("Failed to locate function vkGetMemoryFdKHR\n");
return -1;
}
VkResult r = func(device, &fdInfo, &fd);
if (r != VK_SUCCESS) {
printf("Failed executing vkGetMemoryFdKHR [%d]\n", r);
return -1;
}
return fd;
}
Import the memory:
CUDA_EXTERNAL_MEMORY_HANDLE_DESC memDesc = { };
memDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD;
memDesc.handle.fd = getVulkanMemoryHandle(device, memory);
memDesc.size = extent.width*extent.height*4;
CUDA_DRVAPI_CALL(cuImportExternalMemory(&externalMem, &memDesc));
And map the memory: This is the step that it is failing.
CUarray CuEncoderImpl::getCUDAArrayFromExternalMemory(const VkExtent3D &extent,const CUexternalMemory &m_extMem) {
CUmipmappedArray m_mipmapArray;
CUresult result = CUDA_SUCCESS;
CUarray array;
CUDA_ARRAY3D_DESCRIPTOR arrayDesc = { };
arrayDesc.Width = extent.width;
arrayDesc.Height = extent.height;
arrayDesc.Depth = 0;
arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT32;
arrayDesc.NumChannels = 4;
arrayDesc.Flags = CUDA_ARRAY3D_SURFACE_LDST;
CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapArrayDesc = { };
mipmapArrayDesc.arrayDesc = arrayDesc;
mipmapArrayDesc.numLevels = 1;
mipmapArrayDesc.offset = 0;
CUDA_DRVAPI_CALL(cuExternalMemoryGetMappedMipmappedArray(&m_mipmapArray, m_extMem, &mipmapArrayDesc));
CUDA_DRVAPI_CALL(cuMipmappedArrayGetLevel(&array, m_mipmapArray, 0));
return array;
}
I've been trying multiple combinations of the parameters, but failed so far. The error point to an invalid parameter, but I'm not sure how to find what's wrong.
Only thing that had worked is to map the Vulkan image memory to a host buffer and then copying it into the CUDA array... but I guess that's expensive and I'd like to avoid it if possible.
For the record, I finally got this to work.
Some notes and the modifications I had to do to the code listed in the question:
imageCreateCI.tiling = VK_IMAGE_TILING_OPTIMAL;
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
size
below is memRequirements.size
from the code creating the image): CUDA_EXTERNAL_MEMORY_HANDLE_DESC memDesc = { };
memDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD;
memDesc.handle.fd = getVulkanMemoryHandle(device, memory);
memDesc.size = size;
CUDA_DRVAPI_CALL(cuImportExternalMemory(&externalMem, &memDesc));
CU_AD_FORMAT_UNSIGNED_INT8
with four channels and with a CUDA_ARRAY3D_COLOR_ATTACHMENT
CUDA_ARRAY3D_DESCRIPTOR arrayDesc = { };
arrayDesc.Width = extent.width;
arrayDesc.Height = extent.height;
arrayDesc.Depth = 0;
arrayDesc.Format = CU_AD_FORMAT_UNSIGNED_INT8;
arrayDesc.NumChannels = 4;
arrayDesc.Flags = CUDA_ARRAY3D_COLOR_ATTACHMENT;
CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC mipmapArrayDesc = { };
mipmapArrayDesc.arrayDesc = arrayDesc;
mipmapArrayDesc.numLevels = 1;
mipmapArrayDesc.offset = 0;
CUDA_DRVAPI_CALL(cuExternalMemoryGetMappedMipmappedArray(&m_mipmapArray, m_extMem, &mipmapArrayDesc));
After those changes, I was able to get it to work. I few the changes were glaring mistakes on my side (like the size), a few things I found carefully re-reading the documentation for the 100th time, others were guesses at hints in the documentation and, finally, a lot of trial and error.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With