diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp index c29e6862..b3a07758 100644 --- a/src/d3d11/d3d11_context_imm.cpp +++ b/src/d3d11/d3d11_context_imm.cpp @@ -266,10 +266,8 @@ namespace dxvk { void D3D11ImmediateContext::EmitCsChunk() { - if (m_csChunk->commandCount() > 0) { - m_csThread.dispatchChunk(std::move(m_csChunk)); - m_csChunk = new DxvkCsChunk(); - } + if (m_csChunk->commandCount() > 0) + m_csChunk = m_csThread.dispatchChunk(std::move(m_csChunk)); } } \ No newline at end of file diff --git a/src/dxvk/dxvk_cs.cpp b/src/dxvk/dxvk_cs.cpp index b3308d95..2250cf68 100644 --- a/src/dxvk/dxvk_cs.cpp +++ b/src/dxvk/dxvk_cs.cpp @@ -40,17 +40,37 @@ namespace dxvk { } - void DxvkCsThread::dispatchChunk(Rc&& chunk) { + Rc DxvkCsThread::dispatchChunk(Rc&& chunk) { + Rc nextChunk = nullptr; + { std::unique_lock lock(m_mutex); - m_chunks.push(std::move(chunk)); + m_chunksQueued.push(std::move(chunk)); m_chunksPending += 1; - m_condOnSync.wait(lock, [this] { - return m_stopped.load() || (m_chunksPending < MaxChunksInFlight); - }); + // If a large number of chunks are queued up, wait for + // some of them to be processed in order to avoid memory + // leaks, stuttering, input lag and similar issues. + if (m_chunksPending >= MaxChunksInFlight) { + m_condOnSync.wait(lock, [this] { + return (m_chunksPending < MaxChunksInFlight / 2) + || (m_stopped.load()); + }); + } + + if (m_chunksUnused.size() != 0) { + nextChunk = std::move(m_chunksUnused.front()); + m_chunksUnused.pop(); + } } + // Wake CS thread m_condOnAdd.notify_one(); + + // Allocate new chunk if needed + if (nextChunk == nullptr) + nextChunk = new DxvkCsChunk(); + + return nextChunk; } @@ -64,30 +84,31 @@ namespace dxvk { void DxvkCsThread::threadFunc() { + Rc chunk; + while (!m_stopped.load()) { - Rc chunk; - { std::unique_lock lock(m_mutex); + if (chunk != nullptr) { + m_chunksPending -= 1; + m_chunksUnused.push(std::move(chunk)); + + m_condOnSync.notify_one(); + } m_condOnAdd.wait(lock, [this] { - return m_stopped.load() || (m_chunks.size() != 0); + return m_stopped.load() || (m_chunksQueued.size() != 0); }); - if (m_chunks.size() != 0) { - chunk = std::move(m_chunks.front()); - m_chunks.pop(); + if (m_chunksQueued.size() != 0) { + chunk = std::move(m_chunksQueued.front()); + m_chunksQueued.pop(); + } else { + chunk = nullptr; } } - if (chunk != nullptr) { + if (chunk != nullptr) chunk->executeAll(m_context.ptr()); - - { std::unique_lock lock(m_mutex); - m_chunksPending -= 1; - } - - m_condOnSync.notify_one(); - } } } diff --git a/src/dxvk/dxvk_cs.h b/src/dxvk/dxvk_cs.h index f7e1b641..7f4fd16b 100644 --- a/src/dxvk/dxvk_cs.h +++ b/src/dxvk/dxvk_cs.h @@ -65,7 +65,7 @@ namespace dxvk { * Stores a list of commands. */ class DxvkCsChunk : public RcObject { - constexpr static size_t MaxCommands = 64; + constexpr static size_t MaxCommands = 1024; constexpr static size_t MaxBlockSize = 64 * MaxCommands; public: @@ -140,7 +140,7 @@ namespace dxvk { class DxvkCsThread { // Limit the number of chunks in the queue // to prevent memory leaks, stuttering etc. - constexpr static uint32_t MaxChunksInFlight = 128; + constexpr static uint32_t MaxChunksInFlight = 16; public: DxvkCsThread(const Rc& context); @@ -152,8 +152,9 @@ namespace dxvk { * Can be used to efficiently play back large * command lists recorded on another thread. * \param [in] chunk The chunk to dispatch + * \returns New chunk for the next submissions */ - void dispatchChunk(Rc&& chunk); + Rc dispatchChunk(Rc&& chunk); /** * \brief Synchronizes with the thread @@ -173,7 +174,8 @@ namespace dxvk { std::mutex m_mutex; std::condition_variable m_condOnAdd; std::condition_variable m_condOnSync; - std::queue> m_chunks; + std::queue> m_chunksQueued; + std::queue> m_chunksUnused; std::thread m_thread; uint32_t m_chunksPending = 0;