From 52f1c4fa00f46b1bff2e4432a85c579ea2af33e9 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sun, 10 Dec 2017 15:57:51 +0100 Subject: [PATCH] [dxvk] Implemented staging buffers for large data transfers --- src/d3d11/d3d11_device.cpp | 17 +++++- src/dxgi/dxgi_presenter.cpp | 4 +- src/dxvk/dxvk_buffer.h | 13 +++-- src/dxvk/dxvk_cmdlist.cpp | 37 +++++++++++- src/dxvk/dxvk_cmdlist.h | 20 ++++++- src/dxvk/dxvk_context.cpp | 108 +++++++++++++++++++++++++++++++++++- src/dxvk/dxvk_context.h | 21 +++++++ src/dxvk/dxvk_device.cpp | 28 +++++++++- src/dxvk/dxvk_device.h | 23 ++++++++ src/dxvk/dxvk_format.h | 2 +- src/dxvk/dxvk_image.h | 4 +- src/dxvk/dxvk_memory.h | 6 +- src/dxvk/dxvk_staging.cpp | 85 ++++++++++++++++++++++++++++ src/dxvk/dxvk_staging.h | 63 +++++++++++++++++++++ src/dxvk/dxvk_util.cpp | 15 ++++- src/dxvk/dxvk_util.h | 6 +- src/dxvk/meson.build | 1 + 17 files changed, 428 insertions(+), 25 deletions(-) create mode 100644 src/dxvk/dxvk_staging.cpp create mode 100644 src/dxvk/dxvk_staging.h diff --git a/src/d3d11/d3d11_device.cpp b/src/d3d11/d3d11_device.cpp index abc4a5a8..d53a934e 100644 --- a/src/d3d11/d3d11_device.cpp +++ b/src/d3d11/d3d11_device.cpp @@ -1169,8 +1169,21 @@ namespace dxvk { subresources.layerCount = image->info().numLayers; m_resourceInitContext->initImage(image, subresources); - if (pInitialData != nullptr) - Logger::err("D3D11: InitTexture cannot upload image data yet"); + if (pInitialData != nullptr) { + VkImageSubresourceLayers subresourceLayers; + subresourceLayers.aspectMask = subresources.aspectMask; + subresourceLayers.mipLevel = 0; + subresourceLayers.baseArrayLayer = 0; + subresourceLayers.layerCount = subresources.layerCount; + + m_resourceInitContext->updateImage( + image, subresourceLayers, + VkOffset3D { 0, 0, 0 }, + image->info().extent, + pInitialData->pSysMem, + pInitialData->SysMemPitch, + pInitialData->SysMemSlicePitch); + } m_dxvkDevice->submitCommandList( m_resourceInitContext->endRecording(), diff --git a/src/dxgi/dxgi_presenter.cpp b/src/dxgi/dxgi_presenter.cpp index 17f438cb..ba7295b4 100644 --- a/src/dxgi/dxgi_presenter.cpp +++ b/src/dxgi/dxgi_presenter.cpp @@ -22,7 +22,7 @@ namespace dxvk { // Create swap chain for the surface DxvkSwapchainProperties swapchainProperties; swapchainProperties.preferredSurfaceFormat = this->pickFormat(bufferFormat); - swapchainProperties.preferredPresentMode = VK_PRESENT_MODE_IMMEDIATE_KHR; + swapchainProperties.preferredPresentMode = VK_PRESENT_MODE_FIFO_KHR; swapchainProperties.preferredBufferSize.width = bufferWidth; swapchainProperties.preferredBufferSize.height = bufferHeight; @@ -206,7 +206,7 @@ namespace dxvk { DXGI_FORMAT bufferFormat) { DxvkSwapchainProperties swapchainProperties; swapchainProperties.preferredSurfaceFormat = this->pickFormat(bufferFormat); - swapchainProperties.preferredPresentMode = VK_PRESENT_MODE_IMMEDIATE_KHR; + swapchainProperties.preferredPresentMode = VK_PRESENT_MODE_FIFO_KHR; swapchainProperties.preferredBufferSize.width = bufferWidth; swapchainProperties.preferredBufferSize.height = bufferHeight; diff --git a/src/dxvk/dxvk_buffer.h b/src/dxvk/dxvk_buffer.h index b215bba9..8f27647d 100644 --- a/src/dxvk/dxvk_buffer.h +++ b/src/dxvk/dxvk_buffer.h @@ -47,11 +47,11 @@ namespace dxvk { /** - * \brief DXVK buffer + * \brief Buffer resource * - * A simple buffer resource that stores - * linear data. Can be mapped to host - * memory. + * A simple buffer resource that stores linear, + * unformatted data. Can be accessed by the host + * if allocated on an appropriate memory type. */ class DxvkBuffer : public DxvkResource { @@ -86,10 +86,11 @@ namespace dxvk { * If the buffer has been created on a host-visible * memory type, the buffer memory is mapped and can * be accessed by the host. + * \param [in] offset Byte offset into mapped region * \returns Pointer to mapped memory region */ - void* mapPtr() const { - return m_memory.mapPtr(); + void* mapPtr(VkDeviceSize offset) const { + return m_memory.mapPtr(offset); } private: diff --git a/src/dxvk/dxvk_cmdlist.cpp b/src/dxvk/dxvk_cmdlist.cpp index dc99b857..956d8d53 100644 --- a/src/dxvk/dxvk_cmdlist.cpp +++ b/src/dxvk/dxvk_cmdlist.cpp @@ -4,8 +4,9 @@ namespace dxvk { DxvkCommandList::DxvkCommandList( const Rc& vkd, + DxvkDevice* device, uint32_t queueFamily) - : m_vkd(vkd), m_descAlloc(vkd) { + : m_vkd(vkd), m_descAlloc(vkd), m_stagingAlloc(device) { VkCommandPoolCreateInfo poolInfo; poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; poolInfo.pNext = nullptr; @@ -28,7 +29,7 @@ namespace dxvk { DxvkCommandList::~DxvkCommandList() { - m_resources.reset(); + this->reset(); m_vkd->vkDestroyCommandPool( m_vkd->device(), m_pool, nullptr); @@ -86,6 +87,7 @@ namespace dxvk { void DxvkCommandList::reset() { + m_stagingAlloc.reset(); m_descAlloc.reset(); m_resources.reset(); } @@ -299,4 +301,35 @@ namespace dxvk { firstViewport, viewportCount, viewports); } + + DxvkStagingBufferSlice DxvkCommandList::stagedAlloc(VkDeviceSize size) { + return m_stagingAlloc.alloc(size); + } + + + void DxvkCommandList::stagedBufferCopy( + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const DxvkStagingBufferSlice& dataSlice) { + VkBufferCopy region; + region.srcOffset = dataSlice.offset; + region.dstOffset = dstOffset; + region.size = dataSize; + + m_vkd->vkCmdCopyBuffer(m_buffer, + dataSlice.buffer, dstBuffer, 1, ®ion); + } + + + void DxvkCommandList::stagedBufferImageCopy( + VkImage dstImage, + VkImageLayout dstImageLayout, + const VkBufferImageCopy& dstImageRegion, + const DxvkStagingBufferSlice& dataSlice) { + m_vkd->vkCmdCopyBufferToImage(m_buffer, + dataSlice.buffer, dstImage, dstImageLayout, + 1, &dstImageRegion); + } + } \ No newline at end of file diff --git a/src/dxvk/dxvk_cmdlist.h b/src/dxvk/dxvk_cmdlist.h index 0039a206..c451d0f6 100644 --- a/src/dxvk/dxvk_cmdlist.h +++ b/src/dxvk/dxvk_cmdlist.h @@ -5,6 +5,7 @@ #include "dxvk_descriptor.h" #include "dxvk_lifetime.h" #include "dxvk_pipelayout.h" +#include "dxvk_staging.h" namespace dxvk { @@ -23,6 +24,7 @@ namespace dxvk { DxvkCommandList( const Rc& vkd, + DxvkDevice* device, uint32_t queueFamily); ~DxvkCommandList(); @@ -177,6 +179,21 @@ namespace dxvk { uint32_t viewportCount, const VkViewport* viewports); + DxvkStagingBufferSlice stagedAlloc( + VkDeviceSize size); + + void stagedBufferCopy( + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const DxvkStagingBufferSlice& dataSlice); + + void stagedBufferImageCopy( + VkImage dstImage, + VkImageLayout dstImageLayout, + const VkBufferImageCopy& dstImageRegion, + const DxvkStagingBufferSlice& dataSlice); + private: Rc m_vkd; @@ -186,8 +203,7 @@ namespace dxvk { DxvkLifetimeTracker m_resources; DxvkDescriptorAlloc m_descAlloc; - - std::vector m_descriptorSetWrites; + DxvkStagingAlloc m_stagingAlloc; }; diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index 6f2697a9..7306d9f5 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -1,3 +1,5 @@ +#include + #include "dxvk_device.h" #include "dxvk_context.h" #include "dxvk_main.h" @@ -374,16 +376,23 @@ namespace dxvk { this->renderPassEnd(); if (size == VK_WHOLE_SIZE) - size = buffer->info().size; + size = buffer->info().size - offset; if (size != 0) { + // Vulkan specifies that small amounts of data (up to 64kB) + // can be copied to a buffer directly. Anything larger than + // that must be copied through a staging buffer. if (size <= 65536) { m_cmd->cmdUpdateBuffer( buffer->handle(), offset, size, data); } else { - // TODO implement - Logger::err("DxvkContext::updateBuffer: Large updates not yet supported"); + auto slice = m_cmd->stagedAlloc(size); + std::memcpy(slice.mapPtr, data, size); + + m_cmd->stagedBufferCopy( + buffer->handle(), + offset, size, slice); } m_barriers.accessBuffer( @@ -393,10 +402,103 @@ namespace dxvk { buffer->info().stages, buffer->info().access); m_barriers.recordCommands(m_cmd); + + m_cmd->trackResource(buffer); } } + void DxvkContext::updateImage( + const Rc& image, + const VkImageSubresourceLayers& subresources, + VkOffset3D imageOffset, + VkExtent3D imageExtent, + const void* data, + VkDeviceSize pitchPerRow, + VkDeviceSize pitchPerLayer) { + if (subresources.layerCount == 0) { + Logger::warn("DxvkContext::updateImage: Layer count is zero"); + return; + } + + VkImageSubresourceRange subresourceRange; + subresourceRange.aspectMask = subresources.aspectMask; + subresourceRange.baseMipLevel = subresources.mipLevel; + subresourceRange.levelCount = 1; + subresourceRange.baseArrayLayer = subresources.baseArrayLayer; + subresourceRange.layerCount = subresources.layerCount; + + m_barriers.accessImage( + image, subresourceRange, + image->info().extent == imageExtent + ? VK_IMAGE_LAYOUT_UNDEFINED + : image->info().layout, + image->info().stages, + image->info().access, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT); + m_barriers.recordCommands(m_cmd); + + // TODO support block formats properly + const DxvkFormatInfo* formatInfo + = imageFormatInfo(image->info().format); + + const VkDeviceSize layerCount = imageExtent.depth * subresources.layerCount; + VkDeviceSize bytesPerRow = imageExtent.width * formatInfo->elementSize; + VkDeviceSize bytesPerLayer = imageExtent.height * bytesPerRow; + VkDeviceSize bytesTotal = layerCount * bytesPerLayer; + + auto slice = m_cmd->stagedAlloc(bytesTotal); + auto dstData = reinterpret_cast(slice.mapPtr); + auto srcData = reinterpret_cast(data); + + bool useDirectCopy = true; + useDirectCopy &= (pitchPerLayer == bytesPerLayer) || layerCount == 1; + useDirectCopy &= (pitchPerRow == bytesPerRow) || imageExtent.height == 1; + + if (useDirectCopy) { + std::memcpy(dstData, srcData, bytesTotal); + } else { + for (uint32_t i = 0; i < layerCount; i++) { + for (uint32_t j = 0; j < imageExtent.height; j++) { + std::memcpy(dstData, srcData, bytesPerRow); + + dstData += bytesPerRow; + srcData += pitchPerRow; + } + + dstData += bytesPerLayer; + srcData += pitchPerLayer; + } + } + + VkBufferImageCopy region; + region.bufferOffset = slice.offset; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource = subresources; + region.imageOffset = imageOffset; + region.imageExtent = imageExtent; + + m_cmd->stagedBufferImageCopy(image->handle(), + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + region, slice); + + m_barriers.accessImage( + image, subresourceRange, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + image->info().layout, + image->info().stages, + image->info().access); + m_barriers.recordCommands(m_cmd); + + m_cmd->trackResource(image); + } + + void DxvkContext::setViewports( uint32_t viewportCount, const VkViewport* viewports, diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index 0bbb4d7f..68e4678d 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -262,6 +262,27 @@ namespace dxvk { VkDeviceSize size, const void* data); + /** + * \brief Updates an image + * + * Copies data from the host into an image. + * \param [in] image Destination image + * \param [in] subsresources Image subresources to update + * \param [in] imageOffset Offset of the image area to update + * \param [in] imageExtent Size of the image area to update + * \param [in] data Source data + * \param [in] pitchPerRow Row pitch of the source data + * \param [in] pitchPerLayer Layer pitch of the source data + */ + void updateImage( + const Rc& image, + const VkImageSubresourceLayers& subresources, + VkOffset3D imageOffset, + VkExtent3D imageExtent, + const void* data, + VkDeviceSize pitchPerRow, + VkDeviceSize pitchPerLayer); + /** * \brief Sets viewports * diff --git a/src/dxvk/dxvk_device.cpp b/src/dxvk/dxvk_device.cpp index 2872245d..079ad9aa 100644 --- a/src/dxvk/dxvk_device.cpp +++ b/src/dxvk/dxvk_device.cpp @@ -32,8 +32,34 @@ namespace dxvk { } + Rc DxvkDevice::allocStagingBuffer(VkDeviceSize size) { + // TODO actually recycle old buffers + const VkDeviceSize baseSize = 64 * 1024 * 1024; + const VkDeviceSize bufferSize = std::max(baseSize, size); + + DxvkBufferCreateInfo info; + info.size = bufferSize; + info.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + info.stages = VK_PIPELINE_STAGE_TRANSFER_BIT + | VK_PIPELINE_STAGE_HOST_BIT; + info.access = VK_ACCESS_TRANSFER_READ_BIT + | VK_ACCESS_HOST_WRITE_BIT; + + VkMemoryPropertyFlags memFlags + = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + return new DxvkStagingBuffer(this->createBuffer(info, memFlags)); + } + + + void DxvkDevice::recycleStagingBuffer(const Rc& buffer) { + // TODO implement + } + + Rc DxvkDevice::createCommandList() { - return new DxvkCommandList(m_vkd, + return new DxvkCommandList(m_vkd, this, m_adapter->graphicsQueueFamily()); } diff --git a/src/dxvk/dxvk_device.h b/src/dxvk/dxvk_device.h index f65a323c..d20d2bb8 100644 --- a/src/dxvk/dxvk_device.h +++ b/src/dxvk/dxvk_device.h @@ -73,6 +73,29 @@ namespace dxvk { return m_features; } + /** + * \brief Allocates a staging buffer + * + * Returns a staging buffer that is at least as large + * as the requested size. It is usually bigger so that + * a single staging buffer may serve multiple allocations. + * \param [in] size Minimum buffer size + * \returns The staging buffer + */ + Rc allocStagingBuffer( + VkDeviceSize size); + + /** + * \brief Recycles a staging buffer + * + * When a staging buffer is no longer needed, it should + * be returned to the device so that it can be reused + * for subsequent allocations. + * \param [in] buffer The buffer + */ + void recycleStagingBuffer( + const Rc& buffer); + /** * \brief Creates a command list * \returns The command list diff --git a/src/dxvk/dxvk_format.h b/src/dxvk/dxvk_format.h index a193f024..e2c2b01f 100644 --- a/src/dxvk/dxvk_format.h +++ b/src/dxvk/dxvk_format.h @@ -12,7 +12,7 @@ namespace dxvk { */ struct DxvkFormatInfo { /// Size of an element in this format - uint32_t byteSize; + VkDeviceSize elementSize; /// Available image aspect flags VkImageAspectFlags aspectMask; diff --git a/src/dxvk/dxvk_image.h b/src/dxvk/dxvk_image.h index 38972e28..b21347ba 100644 --- a/src/dxvk/dxvk_image.h +++ b/src/dxvk/dxvk_image.h @@ -78,8 +78,8 @@ namespace dxvk { * \brief DXVK image * * An image resource consisting of various subresources. - * Cannot be mapped to host memory, the only way to access - * image data is through buffer transfer operations. + * Can be accessed by the host if allocated on a suitable + * memory type and if created with the linear tiling option. */ class DxvkImage : public DxvkResource { diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index d7ffb72a..cd4afce6 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -47,10 +47,12 @@ namespace dxvk { /** * \brief Pointer to mapped data + * + * \param [in] offset Byte offset * \returns Pointer to mapped data */ - void* mapPtr() const { - return m_mapPtr; + void* mapPtr(VkDeviceSize offset) const { + return reinterpret_cast(m_mapPtr) + offset; } private: diff --git a/src/dxvk/dxvk_staging.cpp b/src/dxvk/dxvk_staging.cpp new file mode 100644 index 00000000..6308c382 --- /dev/null +++ b/src/dxvk/dxvk_staging.cpp @@ -0,0 +1,85 @@ +#include "dxvk_device.h" +#include "dxvk_staging.h" + +namespace dxvk { + + DxvkStagingBuffer::DxvkStagingBuffer( + const Rc& buffer) + : m_buffer(buffer), m_bufferSize(buffer->info().size){ + + } + + + DxvkStagingBuffer::~DxvkStagingBuffer() { + + } + + + VkDeviceSize DxvkStagingBuffer::freeBytes() const { + return m_bufferSize - m_bufferOffset; + } + + + bool DxvkStagingBuffer::alloc( + VkDeviceSize size, + DxvkStagingBufferSlice& slice) { + if (m_bufferOffset + size > m_bufferSize) + return false; + + slice.buffer = m_buffer->handle(); + slice.offset = m_bufferOffset; + slice.mapPtr = m_buffer->mapPtr(m_bufferOffset); + + m_bufferOffset += size; + return true; + } + + + void DxvkStagingBuffer::reset() { + m_bufferOffset = 0; + } + + + DxvkStagingAlloc::DxvkStagingAlloc(DxvkDevice* device) + : m_device(device) { } + + + DxvkStagingAlloc::~DxvkStagingAlloc() { + this->reset(); + } + + + DxvkStagingBufferSlice DxvkStagingAlloc::alloc(VkDeviceSize size) { + Rc selectedBuffer; + + // Try a worst-fit allocation strategy on the existing staging + // buffers first. This should keep the amount of wasted space + // small, especially if there are large allocations. + for (const auto& buf : m_stagingBuffers) { + if (selectedBuffer == nullptr || (buf->freeBytes() > selectedBuffer->freeBytes())) + selectedBuffer = buf; + } + + // If we have no suitable buffer, allocate one from the device + // that is *at least* as large as the amount of data we need + // to upload. Usually it will be bigger. + DxvkStagingBufferSlice slice; + + if ((selectedBuffer == nullptr) || (!selectedBuffer->alloc(size, slice))) { + selectedBuffer = m_device->allocStagingBuffer(size); + selectedBuffer->alloc(size, slice); + m_stagingBuffers.push_back(selectedBuffer); + } + + return slice; + } + + + void DxvkStagingAlloc::reset() { + for (const auto& buf : m_stagingBuffers) + m_device->recycleStagingBuffer(buf); + + m_stagingBuffers.resize(0); + } + +} diff --git a/src/dxvk/dxvk_staging.h b/src/dxvk/dxvk_staging.h new file mode 100644 index 00000000..5396ee0f --- /dev/null +++ b/src/dxvk/dxvk_staging.h @@ -0,0 +1,63 @@ +#pragma once + +#include "dxvk_buffer.h" + +namespace dxvk { + + class DxvkDevice; + + struct DxvkStagingBufferSlice { + VkBuffer buffer = VK_NULL_HANDLE; + VkDeviceSize offset = 0; + void* mapPtr = nullptr; + }; + + + class DxvkStagingBuffer : public RcObject { + + public: + + DxvkStagingBuffer( + const Rc& buffer); + + ~DxvkStagingBuffer(); + + VkDeviceSize freeBytes() const; + + bool alloc( + VkDeviceSize size, + DxvkStagingBufferSlice& slice); + + void reset(); + + private: + + Rc m_buffer; + + VkDeviceSize m_bufferSize = 0; + VkDeviceSize m_bufferOffset = 0; + + }; + + + class DxvkStagingAlloc { + + public: + + DxvkStagingAlloc(DxvkDevice* device); + ~DxvkStagingAlloc(); + + DxvkStagingBufferSlice alloc( + VkDeviceSize size); + + void reset(); + + private: + + DxvkDevice* const m_device; + + std::vector> m_stagingBuffers; + + }; + +} diff --git a/src/dxvk/dxvk_util.cpp b/src/dxvk/dxvk_util.cpp index 88f4b22d..2b4cb3b8 100644 --- a/src/dxvk/dxvk_util.cpp +++ b/src/dxvk/dxvk_util.cpp @@ -20,4 +20,17 @@ namespace dxvk::util { return result; } -} \ No newline at end of file +} + +bool operator == (VkExtent3D a, VkExtent3D b) { + return a.width == b.width + && a.height == b.height + && a.depth == b.depth; +} + + +bool operator != (VkExtent3D a, VkExtent3D b) { + return a.width != b.width + || a.height != b.height + || a.depth != b.depth; +} diff --git a/src/dxvk/dxvk_util.h b/src/dxvk/dxvk_util.h index 50195d7f..c52f3240 100644 --- a/src/dxvk/dxvk_util.h +++ b/src/dxvk/dxvk_util.h @@ -13,4 +13,8 @@ namespace dxvk::util { VkPipelineStageFlags pipelineStages( VkShaderStageFlags shaderStages); -} \ No newline at end of file + +} + +bool operator == (VkExtent3D a, VkExtent3D b); +bool operator != (VkExtent3D a, VkExtent3D b); diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build index d68e3432..814b0590 100644 --- a/src/dxvk/meson.build +++ b/src/dxvk/meson.build @@ -22,6 +22,7 @@ dxvk_src = files([ 'dxvk_resource.cpp', 'dxvk_sampler.cpp', 'dxvk_shader.cpp', + 'dxvk_staging.cpp', 'dxvk_surface.cpp', 'dxvk_swapchain.cpp', 'dxvk_sync.cpp',