From d3e89b20dda226dc142aae1b83cc9c90e09f1a5e Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sat, 10 Mar 2018 23:32:15 +0100 Subject: [PATCH] [d3d11] Allocate host-readable images on host-visible memory Not having to wait for an image->buffer copy to finish on the GPU allows for more efficient synchronization. Significantly improves performance in The Witcher 3. --- src/d3d11/d3d11_context_imm.cpp | 224 ++++++++++++++++++-------------- src/d3d11/d3d11_context_imm.h | 4 + src/d3d11/d3d11_texture.cpp | 36 ++++- src/d3d11/d3d11_texture.h | 1 - 4 files changed, 162 insertions(+), 103 deletions(-) diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index 528c7307..46badedf 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -120,17 +120,8 @@ namespace dxvk { cPhysicalSlice.resource()->release(); }); } else if (MapType != D3D11_MAP_WRITE_NO_OVERWRITE) { - // Synchronize with CS thread so that we know whether - // the buffer is currently in use by the GPU or not - Flush(); - SynchronizeCsThread(); - - while (buffer->isInUse()) { - if (MapFlags & D3D11_MAP_FLAG_DO_NOT_WAIT) - return DXGI_ERROR_WAS_STILL_DRAWING; - - SynchronizeDevice(); - } + if (!WaitForResource(buffer->resource(), MapFlags)) + return DXGI_ERROR_WAS_STILL_DRAWING; } // Use map pointer from previous map operation. This @@ -144,79 +135,103 @@ namespace dxvk { pMappedResource->DepthPitch = physicalSlice.length(); return S_OK; } else { - // Mapping an image is sadly not as simple as mapping a buffer - // because applications tend to ignore row and layer strides. - // We use a buffer instead and then perform a copy. + // Depending on whether the image has been allocated on a + // host-visible memory type, we can either use the mapped + // memory region directly, or we map a linear buffer. D3D11TextureInfo* textureInfo = GetCommonTextureInfo(pResource); - - if (textureInfo->imageBuffer == nullptr) { - Logger::err("D3D11: Cannot map a device-local image"); - return E_INVALIDARG; - } - - if (pMappedResource == nullptr) - return S_FALSE; - - // Query format and subresource in order to compute - // the row pitch and layer pitch properly. const DxvkImageCreateInfo& imageInfo = textureInfo->image->info(); - const DxvkFormatInfo* formatInfo = imageFormatInfo(imageInfo.format); - textureInfo->mappedSubresource = - GetSubresourceFromIndex(VK_IMAGE_ASPECT_COLOR_BIT, - imageInfo.mipLevels, Subresource); + textureInfo->mappedSubresource = GetSubresourceFromIndex( + VK_IMAGE_ASPECT_COLOR_BIT, imageInfo.mipLevels, Subresource); - const VkExtent3D levelExtent = textureInfo->image - ->mipLevelExtent(textureInfo->mappedSubresource.mipLevel); - - const VkExtent3D blockCount = util::computeBlockCount( - levelExtent, formatInfo->blockSize); - - DxvkPhysicalBufferSlice physicalSlice; - - // When using any map mode which requires the image contents - // to be preserved, copy the image's contents into the buffer. - if (MapType == D3D11_MAP_WRITE_DISCARD) { - physicalSlice = textureInfo->imageBuffer->allocPhysicalSlice(); - physicalSlice.resource()->acquire(); + if (textureInfo->image->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + const VkSubresourceLayout subresourceLayout = + textureInfo->image->querySubresourceLayout( + textureInfo->mappedSubresource); - EmitCs([ - cImageBuffer = textureInfo->imageBuffer, - cPhysicalSlice = physicalSlice - ] (DxvkContext* ctx) { - ctx->invalidateBuffer(cImageBuffer, cPhysicalSlice); - cPhysicalSlice.resource()->release(); - }); + if (!WaitForResource(textureInfo->image, MapFlags)) + return DXGI_ERROR_WAS_STILL_DRAWING; + + pMappedResource->pData = textureInfo->image->mapPtr(subresourceLayout.offset); + pMappedResource->RowPitch = subresourceLayout.rowPitch; + pMappedResource->DepthPitch = subresourceLayout.rowPitch * imageInfo.extent.height; + + if (imageInfo.type == VK_IMAGE_TYPE_3D) + pMappedResource->DepthPitch = subresourceLayout.depthPitch; + else if (imageInfo.numLayers > 1) + pMappedResource->DepthPitch = subresourceLayout.arrayPitch; + + return S_OK; } else { - const VkImageSubresourceLayers subresourceLayers = { - textureInfo->mappedSubresource.aspectMask, - textureInfo->mappedSubresource.mipLevel, - textureInfo->mappedSubresource.arrayLayer, 1 }; + if (textureInfo->imageBuffer == nullptr) { + Logger::err("D3D11: Cannot map a device-local image"); + return E_INVALIDARG; + } - EmitCs([ - cImageBuffer = textureInfo->imageBuffer, - cImage = textureInfo->image, - cSubresources = subresourceLayers, - cLevelExtent = levelExtent - ] (DxvkContext* ctx) { - ctx->copyImageToBuffer( - cImageBuffer, 0, VkExtent2D { 0u, 0u }, - cImage, cSubresources, VkOffset3D { 0, 0, 0 }, - cLevelExtent); - }); + if (pMappedResource == nullptr) + return S_FALSE; - Flush(); - SynchronizeCsThread(); - SynchronizeDevice(); + // Query format info in order to compute + // the row pitch and layer pitch properly. + const DxvkFormatInfo* formatInfo = imageFormatInfo(imageInfo.format); - physicalSlice = textureInfo->imageBuffer->slice(); + const VkExtent3D levelExtent = textureInfo->image + ->mipLevelExtent(textureInfo->mappedSubresource.mipLevel); + + const VkExtent3D blockCount = util::computeBlockCount( + levelExtent, formatInfo->blockSize); + + DxvkPhysicalBufferSlice physicalSlice; + + // When using any map mode which requires the image contents + // to be preserved, copy the image's contents into the buffer. + if (MapType == D3D11_MAP_WRITE_DISCARD) { + physicalSlice = textureInfo->imageBuffer->allocPhysicalSlice(); + physicalSlice.resource()->acquire(); + + EmitCs([ + cImageBuffer = textureInfo->imageBuffer, + cPhysicalSlice = physicalSlice + ] (DxvkContext* ctx) { + ctx->invalidateBuffer(cImageBuffer, cPhysicalSlice); + cPhysicalSlice.resource()->release(); + }); + } else { + // We may have to copy the current image contents into the + // mapped buffer if the GPU has write access to the image. + const bool copyExistingData = textureInfo->usage == D3D11_USAGE_STAGING; + + if (copyExistingData) { + const VkImageSubresourceLayers subresourceLayers = { + textureInfo->mappedSubresource.aspectMask, + textureInfo->mappedSubresource.mipLevel, + textureInfo->mappedSubresource.arrayLayer, 1 }; + + EmitCs([ + cImageBuffer = textureInfo->imageBuffer, + cImage = textureInfo->image, + cSubresources = subresourceLayers, + cLevelExtent = levelExtent + ] (DxvkContext* ctx) { + ctx->copyImageToBuffer( + cImageBuffer, 0, VkExtent2D { 0u, 0u }, + cImage, cSubresources, VkOffset3D { 0, 0, 0 }, + cLevelExtent); + }); + } + + if (!WaitForResource(textureInfo->imageBuffer->resource(), MapFlags)) + return DXGI_ERROR_WAS_STILL_DRAWING; + + physicalSlice = textureInfo->imageBuffer->slice(); + } + + // Set up map pointer. Data is tightly packed within the mapped buffer. + pMappedResource->pData = physicalSlice.mapPtr(0); + pMappedResource->RowPitch = formatInfo->elementSize * blockCount.width; + pMappedResource->DepthPitch = formatInfo->elementSize * blockCount.width * blockCount.height; + return S_OK; } - - // Set up map pointer. Data is tightly packed within the mapped buffer. - pMappedResource->pData = physicalSlice.mapPtr(0); - pMappedResource->RowPitch = formatInfo->elementSize * blockCount.width; - pMappedResource->DepthPitch = formatInfo->elementSize * blockCount.width * blockCount.height; - return S_OK; } } @@ -233,24 +248,26 @@ namespace dxvk { const D3D11TextureInfo* textureInfo = GetCommonTextureInfo(pResource); - const VkExtent3D levelExtent = textureInfo->image - ->mipLevelExtent(textureInfo->mappedSubresource.mipLevel); - - const VkImageSubresourceLayers subresourceLayers = { - textureInfo->mappedSubresource.aspectMask, - textureInfo->mappedSubresource.mipLevel, - textureInfo->mappedSubresource.arrayLayer, 1 }; - - EmitCs([ - cSrcBuffer = textureInfo->imageBuffer, - cDstImage = textureInfo->image, - cDstLayers = subresourceLayers, - cDstLevelExtent = levelExtent - ] (DxvkContext* ctx) { - ctx->copyBufferToImage(cDstImage, cDstLayers, - VkOffset3D { 0, 0, 0 }, cDstLevelExtent, - cSrcBuffer, 0, { 0u, 0u }); - }); + if (!(textureInfo->image->memFlags() & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { + const VkExtent3D levelExtent = textureInfo->image + ->mipLevelExtent(textureInfo->mappedSubresource.mipLevel); + + const VkImageSubresourceLayers subresourceLayers = { + textureInfo->mappedSubresource.aspectMask, + textureInfo->mappedSubresource.mipLevel, + textureInfo->mappedSubresource.arrayLayer, 1 }; + + EmitCs([ + cSrcBuffer = textureInfo->imageBuffer, + cDstImage = textureInfo->image, + cDstLayers = subresourceLayers, + cDstLevelExtent = levelExtent + ] (DxvkContext* ctx) { + ctx->copyBufferToImage(cDstImage, cDstLayers, + VkOffset3D { 0, 0, 0 }, cDstLevelExtent, + cSrcBuffer, 0, { 0u, 0u }); + }); + } } } @@ -265,14 +282,29 @@ namespace dxvk { void D3D11ImmediateContext::SynchronizeDevice() { - // FIXME waiting until the device finished executing *all* - // pending commands is too pessimistic. Instead we should - // wait for individual command submissions to complete. - // This will require changes in the DxvkDevice class. m_device->waitForIdle(); } + bool D3D11ImmediateContext::WaitForResource( + const Rc& Resource, + UINT MapFlags) { + // Wait for the any pending D3D11 command to be executed + // on the CS thread so that we can determine whether the + // resource is currently in use or not. + Flush(); + SynchronizeCsThread(); + + if (Resource->isInUse()) { + // TODO implement properly in DxvkDevice + while (Resource->isInUse()) + std::this_thread::yield(); + } + + return true; + } + + void D3D11ImmediateContext::EmitCsChunk(Rc&& chunk) { m_csThread.dispatchChunk(std::move(chunk)); } diff --git a/src/d3d11/d3d11_context_imm.h b/src/d3d11/d3d11_context_imm.h index 4e661d4c..f727ff97 100644 --- a/src/d3d11/d3d11_context_imm.h +++ b/src/d3d11/d3d11_context_imm.h @@ -50,6 +50,10 @@ namespace dxvk { void SynchronizeDevice(); + bool WaitForResource( + const Rc& Resource, + UINT MapFlags); + void EmitCsChunk(Rc&& chunk) final; }; diff --git a/src/d3d11/d3d11_texture.cpp b/src/d3d11/d3d11_texture.cpp index 83e1efa7..2e53bc2c 100644 --- a/src/d3d11/d3d11_texture.cpp +++ b/src/d3d11/d3d11_texture.cpp @@ -22,7 +22,6 @@ namespace dxvk { return DxgiFormatMode::Any; } - /** * \brief Optimizes image layout based on usage flags * @@ -144,8 +143,10 @@ namespace dxvk { if (CPUAccessFlags != 0) { pImageInfo->stages |= VK_PIPELINE_STAGE_HOST_BIT; - if (CPUAccessFlags & D3D11_CPU_ACCESS_WRITE) + if (CPUAccessFlags & D3D11_CPU_ACCESS_WRITE) { pImageInfo->access |= VK_ACCESS_HOST_WRITE_BIT; + pImageInfo->tiling = VK_IMAGE_TILING_LINEAR; + } if (CPUAccessFlags & D3D11_CPU_ACCESS_READ) pImageInfo->access |= VK_ACCESS_HOST_READ_BIT; @@ -159,6 +160,29 @@ namespace dxvk { } + /** + * \brief Retrieves memory flags for image usage + * + * If the host requires access to the image, we + * should create it on a host-visible memory type. + * \param [in] Usage Image usage flags + * \returns Image memory properties + */ + static VkMemoryPropertyFlags GetImageMemoryFlags(UINT CPUAccessFlags) { + if (CPUAccessFlags & D3D11_CPU_ACCESS_READ) { + return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + } else { + // If only write access is required, we will emulate + // image mapping through a buffer. Some games ignore + // the row pitch when mapping images, which leads to + // incorrect rendering. + return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + } + + D3D11Texture1D::D3D11Texture1D( D3D11Device* pDevice, const D3D11_TEXTURE1D_DESC* pDesc) @@ -201,7 +225,7 @@ namespace dxvk { // Create the image and, if necessary, the image buffer m_texInfo.formatMode = formatMode; m_texInfo.image = pDevice->GetDXVKDevice()->createImage( - info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + info, GetImageMemoryFlags(m_desc.CPUAccessFlags)); m_texInfo.imageBuffer = m_desc.CPUAccessFlags != 0 ? CreateImageBuffer(pDevice->GetDXVKDevice(), info.format, info.extent) : nullptr; @@ -301,7 +325,7 @@ namespace dxvk { // Create the image and, if necessary, the image buffer m_texInfo.formatMode = formatMode; m_texInfo.image = pDevice->GetDXVKDevice()->createImage( - info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + info, GetImageMemoryFlags(m_desc.CPUAccessFlags)); m_texInfo.imageBuffer = m_desc.CPUAccessFlags != 0 ? CreateImageBuffer(pDevice->GetDXVKDevice(), info.format, info.extent) : nullptr; @@ -354,7 +378,7 @@ namespace dxvk { /////////////////////////////////////////// - // D 3 D 1 1 T E X T U R E 2 D + // D 3 D 1 1 T E X T U R E 3 D D3D11Texture3D::D3D11Texture3D( D3D11Device* pDevice, const D3D11_TEXTURE3D_DESC* pDesc) @@ -398,7 +422,7 @@ namespace dxvk { // Create the image and, if necessary, the image buffer m_texInfo.formatMode = formatMode; m_texInfo.image = pDevice->GetDXVKDevice()->createImage( - info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + info, GetImageMemoryFlags(m_desc.CPUAccessFlags)); m_texInfo.imageBuffer = m_desc.CPUAccessFlags != 0 ? CreateImageBuffer(pDevice->GetDXVKDevice(), info.format, info.extent) : nullptr; diff --git a/src/d3d11/d3d11_texture.h b/src/d3d11/d3d11_texture.h index 350bdf89..6adf35ff 100644 --- a/src/d3d11/d3d11_texture.h +++ b/src/d3d11/d3d11_texture.h @@ -28,7 +28,6 @@ namespace dxvk { }; - /////////////////////////////////////////// // D 3 D 1 1 T E X T U R E 1 D class D3D11Texture1D : public D3D11DeviceChild {