From 781ee00f5cfea4bbbf43adc3c508ea74af468d07 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Mon, 8 Oct 2018 09:23:11 +0200 Subject: [PATCH] [dxvk] Refactor indirect draw/dispatch commands Introduces an OpenGL-style bind point for the argument buffer, which means we can avoid a lot of unnecessary reference tracking in games that do a lot of indirect draw calls. Reduces CPU overhead in Assassin's Creed Odyssey. --- src/d3d11/d3d11_context.cpp | 48 +++++++++++++++++++++------ src/d3d11/d3d11_context.h | 6 ++++ src/d3d11/d3d11_context_state.h | 6 ++++ src/dxvk/dxvk_context.cpp | 59 +++++++++++++++++++-------------- src/dxvk/dxvk_context.h | 42 +++++++++++++---------- src/dxvk/dxvk_context_state.h | 8 +++++ 6 files changed, 117 insertions(+), 52 deletions(-) diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp index 50b4615a..c2f95db0 100644 --- a/src/d3d11/d3d11_context.cpp +++ b/src/d3d11/d3d11_context.cpp @@ -165,6 +165,9 @@ namespace dxvk { m_state.ps.unorderedAccessViews[i] = nullptr; m_state.cs.unorderedAccessViews[i] = nullptr; } + + // Default ID state + m_state.id.argBuffer = nullptr; // Default IA state m_state.ia.inputLayout = nullptr; @@ -1325,12 +1328,11 @@ namespace dxvk { void STDMETHODCALLTYPE D3D11DeviceContext::DrawIndexedInstancedIndirect( ID3D11Buffer* pBufferForArgs, UINT AlignedByteOffsetForArgs) { - D3D11Buffer* buffer = static_cast(pBufferForArgs); + SetDrawBuffer(pBufferForArgs); - EmitCs([bufferSlice = buffer->GetBufferSlice(AlignedByteOffsetForArgs)] + EmitCs([cOffset = AlignedByteOffsetForArgs] (DxvkContext* ctx) { - ctx->drawIndexedIndirect( - bufferSlice, 1, 0); + ctx->drawIndexedIndirect(cOffset, 1, 0); }); } @@ -1338,11 +1340,11 @@ namespace dxvk { void STDMETHODCALLTYPE D3D11DeviceContext::DrawInstancedIndirect( ID3D11Buffer* pBufferForArgs, UINT AlignedByteOffsetForArgs) { - D3D11Buffer* buffer = static_cast(pBufferForArgs); + SetDrawBuffer(pBufferForArgs); - EmitCs([bufferSlice = buffer->GetBufferSlice(AlignedByteOffsetForArgs)] + EmitCs([cOffset = AlignedByteOffsetForArgs] (DxvkContext* ctx) { - ctx->drawIndirect(bufferSlice, 1, 0); + ctx->drawIndirect(cOffset, 1, 0); }); } @@ -1363,11 +1365,11 @@ namespace dxvk { void STDMETHODCALLTYPE D3D11DeviceContext::DispatchIndirect( ID3D11Buffer* pBufferForArgs, UINT AlignedByteOffsetForArgs) { - D3D11Buffer* buffer = static_cast(pBufferForArgs); + SetDrawBuffer(pBufferForArgs); - EmitCs([bufferSlice = buffer->GetBufferSlice(AlignedByteOffsetForArgs)] + EmitCs([cOffset = AlignedByteOffsetForArgs] (DxvkContext* ctx) { - ctx->dispatchIndirect(bufferSlice); + ctx->dispatchIndirect(cOffset); }); } @@ -2809,6 +2811,18 @@ namespace dxvk { } + void D3D11DeviceContext::BindDrawBuffer( + D3D11Buffer* pBuffer) { + EmitCs([ + cBufferSlice = pBuffer != nullptr + ? pBuffer->GetBufferSlice() + : DxvkBufferSlice() + ] (DxvkContext* ctx) { + ctx->bindDrawBuffer(cBufferSlice); + }); + } + + void D3D11DeviceContext::BindVertexBuffer( UINT Slot, D3D11Buffer* pBuffer, @@ -2937,6 +2951,17 @@ namespace dxvk { } + void D3D11DeviceContext::SetDrawBuffer( + ID3D11Buffer* pBuffer) { + auto buffer = static_cast(pBuffer); + + if (m_state.id.argBuffer != buffer) { + m_state.id.argBuffer = buffer; + BindDrawBuffer(buffer); + } + } + + void D3D11DeviceContext::SetConstantBuffers( DxbcProgramType ShaderStage, D3D11ConstantBufferBindings& Bindings, @@ -3101,6 +3126,9 @@ namespace dxvk { ApplyStencilRef(); ApplyRasterizerState(); ApplyViewportState(); + + BindDrawBuffer( + m_state.id.argBuffer.ptr()); BindIndexBuffer( m_state.ia.indexBuffer.buffer.ptr(), diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h index 2d2f11a5..e2e9166c 100644 --- a/src/d3d11/d3d11_context.h +++ b/src/d3d11/d3d11_context.h @@ -676,6 +676,9 @@ namespace dxvk { void BindFramebuffer( BOOL Spill); + void BindDrawBuffer( + D3D11Buffer* pBuffer); + void BindVertexBuffer( UINT Slot, D3D11Buffer* pBuffer, @@ -711,6 +714,9 @@ namespace dxvk { void DiscardTexture( D3D11CommonTexture* pTexture); + void SetDrawBuffer( + ID3D11Buffer* pBuffer); + void SetConstantBuffers( DxbcProgramType ShaderStage, D3D11ConstantBufferBindings& Bindings, diff --git a/src/d3d11/d3d11_context_state.h b/src/d3d11/d3d11_context_state.h index 01098805..18b086cc 100644 --- a/src/d3d11/d3d11_context_state.h +++ b/src/d3d11/d3d11_context_state.h @@ -99,6 +99,11 @@ namespace dxvk { UINT offset = 0; DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; }; + + + struct D3D11ContextStateID { + Com argBuffer = nullptr; + }; struct D3D11ContextStateIA { @@ -158,6 +163,7 @@ namespace dxvk { D3D11ContextStatePS ps; D3D11ContextStateVS vs; + D3D11ContextStateID id; D3D11ContextStateIA ia; D3D11ContextStateOM om; D3D11ContextStateRS rs; diff --git a/src/dxvk/dxvk_context.cpp b/src/dxvk/dxvk_context.cpp index d3424509..583dc91d 100644 --- a/src/dxvk/dxvk_context.cpp +++ b/src/dxvk/dxvk_context.cpp @@ -46,13 +46,13 @@ namespace dxvk { DxvkContextFlag::GpDirtyIndexBuffer, DxvkContextFlag::CpDirtyPipeline, DxvkContextFlag::CpDirtyPipelineState, - DxvkContextFlag::CpDirtyResources); + DxvkContextFlag::CpDirtyResources, + DxvkContextFlag::DirtyDrawBuffer); } Rc DxvkContext::endRecording() { this->spillRenderPass(); - this->trackDrawBuffer(DxvkBufferSlice(), VK_NULL_HANDLE); m_queries.trackQueryPools(m_cmd); @@ -104,6 +104,16 @@ namespace dxvk { } + void DxvkContext::bindDrawBuffer( + const DxvkBufferSlice& buffer) { + if (!m_state.id.argBuffer.matches(buffer)) { + m_state.id.argBuffer = buffer; + + m_flags.set(DxvkContextFlag::DirtyDrawBuffer); + } + } + + void DxvkContext::bindIndexBuffer( const DxvkBufferSlice& buffer, VkIndexType indexType) { @@ -880,10 +890,11 @@ namespace dxvk { void DxvkContext::dispatchIndirect( - const DxvkBufferSlice& buffer) { + VkDeviceSize offset) { this->commitComputeState(); - auto physicalSlice = buffer.physicalSlice(); + auto physicalSlice = m_state.id.argBuffer.physicalSlice() + .subSlice(offset, sizeof(VkDispatchIndirectCommand)); if (m_barriers.isBufferDirty(physicalSlice, DxvkAccess::Read)) m_barriers.recordCommands(m_cmd); @@ -898,9 +909,6 @@ namespace dxvk { physicalSlice.handle(), physicalSlice.offset()); - m_cmd->trackResource( - physicalSlice.resource()); - m_queries.endQueries(m_cmd, VK_QUERY_TYPE_PIPELINE_STATISTICS); @@ -909,8 +917,10 @@ namespace dxvk { m_barriers.accessBuffer(physicalSlice, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, - buffer.bufferInfo().stages, - buffer.bufferInfo().access); + m_state.id.argBuffer.bufferInfo().stages, + m_state.id.argBuffer.bufferInfo().access); + + this->trackDrawBuffer(); } m_cmd->addStatCtr(DxvkStatCounter::CmdDispatchCalls, 1); @@ -935,20 +945,20 @@ namespace dxvk { void DxvkContext::drawIndirect( - const DxvkBufferSlice& buffer, + VkDeviceSize offset, uint32_t count, uint32_t stride) { this->commitGraphicsState(); if (this->validateGraphicsState()) { - auto descriptor = buffer.getDescriptor(); + auto descriptor = m_state.id.argBuffer.getDescriptor(); m_cmd->cmdDrawIndirect( descriptor.buffer.buffer, - descriptor.buffer.offset, + descriptor.buffer.offset + offset, count, stride); - this->trackDrawBuffer(buffer, descriptor.buffer.buffer); + this->trackDrawBuffer(); } m_cmd->addStatCtr(DxvkStatCounter::CmdDrawCalls, 1); @@ -975,20 +985,20 @@ namespace dxvk { void DxvkContext::drawIndexedIndirect( - const DxvkBufferSlice& buffer, + VkDeviceSize offset, uint32_t count, uint32_t stride) { this->commitGraphicsState(); if (this->validateGraphicsState()) { - auto descriptor = buffer.getDescriptor(); + auto descriptor = m_state.id.argBuffer.getDescriptor(); m_cmd->cmdDrawIndexedIndirect( descriptor.buffer.buffer, - descriptor.buffer.offset, + descriptor.buffer.offset + offset, count, stride); - this->trackDrawBuffer(buffer, descriptor.buffer.buffer); + this->trackDrawBuffer(); } m_cmd->addStatCtr(DxvkStatCounter::CmdDrawCalls, 1); @@ -1121,6 +1131,9 @@ namespace dxvk { // may be bound to either directly or through views. const VkBufferUsageFlags usage = buffer->info().usage; + if (usage & VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT) + m_flags.set(DxvkContextFlag::DirtyDrawBuffer); + if (usage & VK_BUFFER_USAGE_INDEX_BUFFER_BIT) m_flags.set(DxvkContextFlag::GpDirtyIndexBuffer); @@ -2840,14 +2853,12 @@ namespace dxvk { } - void DxvkContext::trackDrawBuffer( - const DxvkBufferSlice& buffer, - VkBuffer handle) { - if (m_lastIndirectDrawBuffer != handle) { - m_lastIndirectDrawBuffer = handle; + void DxvkContext::trackDrawBuffer() { + if (m_flags.test(DxvkContextFlag::DirtyDrawBuffer)) { + m_flags.clr(DxvkContextFlag::DirtyDrawBuffer); - if (handle != VK_NULL_HANDLE) - m_cmd->trackResource(buffer.resource()); + if (m_state.id.argBuffer.defined()) + m_cmd->trackResource(m_state.id.argBuffer.resource()); } } diff --git a/src/dxvk/dxvk_context.h b/src/dxvk/dxvk_context.h index 8c7b4672..d0a3ae3e 100644 --- a/src/dxvk/dxvk_context.h +++ b/src/dxvk/dxvk_context.h @@ -91,6 +91,16 @@ namespace dxvk { const DxvkRenderTargets& targets, bool spill); + /** + * \brief Binds indirect argument buffer + * + * Sets the buffer that is going to be used + * for indirect draw and dispatch operations. + * \param [in] buffer New argument buffer + */ + void bindDrawBuffer( + const DxvkBufferSlice& buffer); + /** * \brief Binds index buffer * @@ -392,19 +402,19 @@ namespace dxvk { * \param [in] z Number of threads in Z direction */ void dispatch( - uint32_t x, - uint32_t y, - uint32_t z); + uint32_t x, + uint32_t y, + uint32_t z); /** * \brief Indirect dispatch call * * Takes arguments from a buffer. The buffer must contain * a structure of the type \c VkDispatchIndirectCommand. - * \param [in] buffer The buffer slice + * \param [in] offset Draw buffer offset */ void dispatchIndirect( - const DxvkBufferSlice& buffer); + VkDeviceSize offset); /** * \brief Draws primitive without using an index buffer @@ -415,22 +425,22 @@ namespace dxvk { * \param [in] firstInstance First instance ID */ void draw( - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance); + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance); /** * \brief Indirect indexed draw call * * Takes arguments from a buffer. The structure stored * in the buffer must be of type \c VkDrawIndirectCommand. - * \param [in] buffer The buffer slice + * \param [in] offset Draw buffer offset * \param [in] count Number of dispatch calls * \param [in] stride Stride between dispatch calls */ void drawIndirect( - const DxvkBufferSlice& buffer, + VkDeviceSize offset, uint32_t count, uint32_t stride); @@ -455,12 +465,12 @@ namespace dxvk { * * Takes arguments from a buffer. The structure type for * the draw buffer is \c VkDrawIndexedIndirectCommand. - * \param [in] buffer The buffer slice + * \param [in] offset Draw buffer offset * \param [in] count Number of dispatch calls * \param [in] stride Stride between dispatch calls */ void drawIndexedIndirect( - const DxvkBufferSlice& buffer, + VkDeviceSize offset, uint32_t count, uint32_t stride); @@ -704,8 +714,6 @@ namespace dxvk { VkDescriptorSet m_gpSet = VK_NULL_HANDLE; VkDescriptorSet m_cpSet = VK_NULL_HANDLE; - VkBuffer m_lastIndirectDrawBuffer = VK_NULL_HANDLE; - std::array m_rc; std::array m_descInfos; std::array m_descOffsets; @@ -812,9 +820,7 @@ namespace dxvk { void commitComputeInitBarriers(); void commitComputePostBarriers(); - void trackDrawBuffer( - const DxvkBufferSlice& buffer, - VkBuffer handle); + void trackDrawBuffer(); }; diff --git a/src/dxvk/dxvk_context_state.h b/src/dxvk/dxvk_context_state.h index f979e318..5784ec35 100644 --- a/src/dxvk/dxvk_context_state.h +++ b/src/dxvk/dxvk_context_state.h @@ -41,9 +41,16 @@ namespace dxvk { CpDirtyResources, ///< Compute pipeline resource bindings are out of date CpDirtyDescriptorOffsets, ///< Compute descriptor set needs to be rebound CpDirtyDescriptorSet, ///< Compute descriptor set needs to be updated + + DirtyDrawBuffer, ///< Indirect argument buffer is dirty }; using DxvkContextFlags = Flags; + + + struct DxvkIndirectDrawState { + DxvkBufferSlice argBuffer; + }; struct DxvkVertexInputState { @@ -113,6 +120,7 @@ namespace dxvk { * and constant pipeline state objects. */ struct DxvkContextState { + DxvkIndirectDrawState id; DxvkVertexInputState vi; DxvkViewportState vp; DxvkDynamicDepthState ds;