diff --git a/README.md b/README.md index 6d3cb1a3..ceeb3a8c 100644 --- a/README.md +++ b/README.md @@ -82,6 +82,7 @@ The following environment variables can be used for **debugging** purposes. - `DXVK_CUSTOM_DEVICE_ID=` Specifies a custom PCI device ID - `DXVK_LOG_LEVEL=none|error|warn|info|debug` Controls message logging - `DXVK_FAKE_DX10_SUPPORT=1` Advertizes support for D3D10 interfaces +- `DXVK_USE_PIPECOMPILER=1` Enable asynchronous pipeline compilation. This currently only has an effect on RADV in mesa-git. ## Troubleshooting DXVK requires threading support from your mingw-w64 build environment. If you diff --git a/src/dxvk/dxvk_graphics.cpp b/src/dxvk/dxvk_graphics.cpp index 0962d2f9..4d2e9f6e 100644 --- a/src/dxvk/dxvk_graphics.cpp +++ b/src/dxvk/dxvk_graphics.cpp @@ -34,16 +34,37 @@ namespace dxvk { } + DxvkGraphicsPipelineInstance::DxvkGraphicsPipelineInstance( + const Rc& vkd, + const DxvkGraphicsPipelineStateInfo& stateVector, + VkRenderPass renderPass, + VkPipeline basePipeline) + : m_vkd (vkd), + m_stateVector (stateVector), + m_renderPass (renderPass), + m_basePipeline(basePipeline), + m_fastPipeline(VK_NULL_HANDLE) { + + } + + + DxvkGraphicsPipelineInstance::~DxvkGraphicsPipelineInstance() { + m_vkd->vkDestroyPipeline(m_vkd->device(), m_basePipeline, nullptr); + m_vkd->vkDestroyPipeline(m_vkd->device(), m_fastPipeline, nullptr); + } + + DxvkGraphicsPipeline::DxvkGraphicsPipeline( - const DxvkDevice* device, - const Rc& cache, - const Rc& vs, - const Rc& tcs, - const Rc& tes, - const Rc& gs, - const Rc& fs) + const DxvkDevice* device, + const Rc& cache, + const Rc& compiler, + const Rc& vs, + const Rc& tcs, + const Rc& tes, + const Rc& gs, + const Rc& fs) : m_device(device), m_vkd(device->vkd()), - m_cache(cache) { + m_cache(cache), m_compiler(compiler) { DxvkDescriptorSlotMapping slotMapping; if (vs != nullptr) vs ->defineResourceSlots(slotMapping); if (tcs != nullptr) tcs->defineResourceSlots(slotMapping); @@ -71,7 +92,7 @@ namespace dxvk { DxvkGraphicsPipeline::~DxvkGraphicsPipeline() { - this->destroyPipelines(); + } @@ -79,61 +100,96 @@ namespace dxvk { const DxvkGraphicsPipelineStateInfo& state, const DxvkRenderPass& renderPass, DxvkStatCounters& stats) { - VkPipeline pipeline = VK_NULL_HANDLE; VkRenderPass renderPassHandle = renderPass.getDefaultHandle(); { std::lock_guard lock(m_mutex); - if (this->findPipeline(state, renderPassHandle, pipeline)) - return pipeline; + DxvkGraphicsPipelineInstance* pipeline = + this->findInstance(state, renderPassHandle); + + if (pipeline != nullptr) + return pipeline->getPipeline(); } - // If no pipeline exists with the given state vector, - // create a new one and add it to the pipeline set. - VkPipeline newPipeline = this->validatePipelineState(state) - ? this->compilePipeline(state, renderPassHandle, m_basePipeline) - : VK_NULL_HANDLE; + // If the pipeline state vector is invalid, don't try + // to create a new pipeline, it won't work anyway. + if (!this->validatePipelineState(state)) + return VK_NULL_HANDLE; + + // If no pipeline instance exists with the given state + // vector, create a new one and add it to the list. + VkPipeline newPipelineBase = m_basePipelineBase.load(); + VkPipeline newPipelineHandle = this->compilePipeline(state, renderPassHandle, + m_compiler != nullptr ? VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT : 0, + newPipelineBase); + + Rc newPipeline = + new DxvkGraphicsPipelineInstance(m_device->vkd(), state, + renderPassHandle, newPipelineHandle); { std::lock_guard lock(m_mutex); // Discard the pipeline if another thread // was faster compiling the same pipeline - if (this->findPipeline(state, renderPassHandle, pipeline)) { - m_vkd->vkDestroyPipeline(m_vkd->device(), newPipeline, nullptr); - return pipeline; - } + DxvkGraphicsPipelineInstance* pipeline = + this->findInstance(state, renderPassHandle); + + if (pipeline != nullptr) + return pipeline->getPipeline(); // Add new pipeline to the set - m_pipelines.push_back({ state, renderPassHandle, newPipeline }); - - if (m_basePipeline == VK_NULL_HANDLE) - m_basePipeline = newPipeline; + m_pipelines.push_back(newPipeline); stats.addCtr(DxvkStatCounter::PipeCountGraphics, 1); - return newPipeline; } + + // Use the new pipeline as the base pipeline for derivative pipelines + if (newPipelineBase == VK_NULL_HANDLE && newPipelineHandle != VK_NULL_HANDLE) + m_basePipelineBase.compare_exchange_strong(newPipelineBase, newPipelineHandle); + + // Compile optimized pipeline asynchronously + if (m_compiler != nullptr) + m_compiler->queueCompilation(this, newPipeline); + + return newPipelineHandle; } - bool DxvkGraphicsPipeline::findPipeline( + void DxvkGraphicsPipeline::compileInstance( + const Rc& instance) { + // Compile an optimized version of the pipeline + VkPipeline newPipelineBase = m_fastPipelineBase.load(); + VkPipeline newPipelineHandle = this->compilePipeline( + instance->m_stateVector, instance->m_renderPass, + 0, m_fastPipelineBase); + + // Use the new pipeline as the base pipeline for derivative pipelines + if (newPipelineBase == VK_NULL_HANDLE && newPipelineHandle != VK_NULL_HANDLE) + m_fastPipelineBase.compare_exchange_strong(newPipelineBase, newPipelineHandle); + + // If an optimized version has been compiled + // in the meantime, discard the new pipeline + if (!instance->setFastPipeline(newPipelineHandle)) + m_vkd->vkDestroyPipeline(m_vkd->device(), newPipelineHandle, nullptr); + } + + + DxvkGraphicsPipelineInstance* DxvkGraphicsPipeline::findInstance( const DxvkGraphicsPipelineStateInfo& state, - VkRenderPass renderPass, - VkPipeline& pipeline) const { - for (const PipelineStruct& pair : m_pipelines) { - if (pair.stateVector == state - && pair.renderPass == renderPass) { - pipeline = pair.pipeline; - return true; - } + VkRenderPass renderPass) const { + for (const auto& pipeline : m_pipelines) { + if (pipeline->isCompatible(state, renderPass)) + return pipeline.ptr(); } - return false; + return nullptr; } VkPipeline DxvkGraphicsPipeline::compilePipeline( const DxvkGraphicsPipelineStateInfo& state, VkRenderPass renderPass, + VkPipelineCreateFlags createFlags, VkPipeline baseHandle) const { if (Logger::logLevel() <= LogLevel::Debug) { Logger::debug("Compiling graphics pipeline..."); @@ -287,9 +343,7 @@ namespace dxvk { VkGraphicsPipelineCreateInfo info; info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; info.pNext = nullptr; - info.flags = baseHandle == VK_NULL_HANDLE - ? VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT - : VK_PIPELINE_CREATE_DERIVATIVE_BIT; + info.flags = createFlags; info.stageCount = stages.size(); info.pStages = stages.data(); info.pVertexInputState = &viInfo; @@ -307,6 +361,10 @@ namespace dxvk { info.basePipelineHandle = baseHandle; info.basePipelineIndex = -1; + info.flags |= baseHandle == VK_NULL_HANDLE + ? VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT + : VK_PIPELINE_CREATE_DERIVATIVE_BIT; + if (tsInfo.patchControlPoints == 0) info.pTessellationState = nullptr; @@ -328,12 +386,6 @@ namespace dxvk { } - void DxvkGraphicsPipeline::destroyPipelines() { - for (const PipelineStruct& pair : m_pipelines) - m_vkd->vkDestroyPipeline(m_vkd->device(), pair.pipeline, nullptr); - } - - bool DxvkGraphicsPipeline::validatePipelineState( const DxvkGraphicsPipelineStateInfo& state) const { // Validate vertex input - each input slot consumed by the @@ -343,17 +395,13 @@ namespace dxvk { for (uint32_t i = 0; i < state.ilAttributeCount; i++) providedVertexInputs |= 1u << state.ilAttributes[i].location; - if ((providedVertexInputs & m_vsIn) != m_vsIn) { - Logger::err("DxvkGraphicsPipeline: Input layout mismatches vertex shader input"); + if ((providedVertexInputs & m_vsIn) != m_vsIn) return false; - } // If there are no tessellation shaders, we // obviously cannot use tessellation patches. - if ((state.iaPatchVertexCount != 0) && (m_tcs == nullptr || m_tes == nullptr)) { - Logger::err("DxvkGraphicsPipeline: Cannot use tessellation patches without tessellation shaders"); + if ((state.iaPatchVertexCount != 0) && (m_tcs == nullptr || m_tes == nullptr)) return false; - } // No errors return true; diff --git a/src/dxvk/dxvk_graphics.h b/src/dxvk/dxvk_graphics.h index cee384d9..ad1de14b 100644 --- a/src/dxvk/dxvk_graphics.h +++ b/src/dxvk/dxvk_graphics.h @@ -5,6 +5,7 @@ #include "dxvk_binding.h" #include "dxvk_constant_state.h" #include "dxvk_pipecache.h" +#include "dxvk_pipecompiler.h" #include "dxvk_pipelayout.h" #include "dxvk_renderpass.h" #include "dxvk_resource.h" @@ -90,6 +91,79 @@ namespace dxvk { }; + /** + * \brief Graphics pipeline instance + * + * Stores a state vector and the corresponding + * unoptimized and optimized pipeline handles. + */ + class DxvkGraphicsPipelineInstance : public RcObject { + friend class DxvkGraphicsPipeline; + public: + + DxvkGraphicsPipelineInstance( + const Rc& vkd, + const DxvkGraphicsPipelineStateInfo& stateVector, + VkRenderPass renderPass, + VkPipeline basePipeline); + + ~DxvkGraphicsPipelineInstance(); + + /** + * \brief Checks for matching pipeline state + * + * \param [in] stateVector Graphics pipeline state + * \param [in] renderPass Render pass handle + * \returns \c true if the specialization is compatible + */ + bool isCompatible( + const DxvkGraphicsPipelineStateInfo& stateVector, + VkRenderPass renderPass) const { + return m_renderPass == renderPass + && m_stateVector == stateVector; + } + + /** + * \brief Sets the optimized pipeline handle + * + * If an optimized pipeline handle has already been + * set up, this method will fail and the new pipeline + * handle should be destroyed. + * \param [in] pipeline The optimized pipeline + */ + bool setFastPipeline(VkPipeline pipeline) { + VkPipeline expected = VK_NULL_HANDLE; + return m_fastPipeline.compare_exchange_strong(expected, pipeline); + } + + /** + * \brief Retrieves pipeline + * + * Returns the optimized version of the pipeline if + * if has been set, or the base pipeline if not. + * \returns The pipeline handle + */ + VkPipeline getPipeline() const { + VkPipeline basePipeline = m_basePipeline.load(); + VkPipeline fastPipeline = m_fastPipeline.load(); + + return fastPipeline != VK_NULL_HANDLE + ? fastPipeline : basePipeline; + } + + private: + + const Rc m_vkd; + + DxvkGraphicsPipelineStateInfo m_stateVector; + VkRenderPass m_renderPass; + + std::atomic m_basePipeline; + std::atomic m_fastPipeline; + + }; + + /** * \brief Graphics pipeline * @@ -102,13 +176,14 @@ namespace dxvk { public: DxvkGraphicsPipeline( - const DxvkDevice* device, - const Rc& cache, - const Rc& vs, - const Rc& tcs, - const Rc& tes, - const Rc& gs, - const Rc& fs); + const DxvkDevice* device, + const Rc& cache, + const Rc& compiler, + const Rc& vs, + const Rc& tcs, + const Rc& tes, + const Rc& gs, + const Rc& fs); ~DxvkGraphicsPipeline(); /** @@ -134,9 +209,19 @@ namespace dxvk { * \returns Pipeline handle */ VkPipeline getPipelineHandle( - const DxvkGraphicsPipelineStateInfo& state, - const DxvkRenderPass& renderPass, - DxvkStatCounters& stats); + const DxvkGraphicsPipelineStateInfo& state, + const DxvkRenderPass& renderPass, + DxvkStatCounters& stats); + + /** + * \brief Compiles optimized pipeline + * + * Compiles an optimized version of a pipeline + * and makes it available to the system. + * \param [in] instance The pipeline instance + */ + void compileInstance( + const Rc& instance); private: @@ -149,8 +234,9 @@ namespace dxvk { const DxvkDevice* const m_device; const Rc m_vkd; - Rc m_cache; - Rc m_layout; + Rc m_cache; + Rc m_compiler; + Rc m_layout; Rc m_vs; Rc m_tcs; @@ -163,23 +249,24 @@ namespace dxvk { DxvkGraphicsCommonPipelineStateInfo m_common; - sync::Spinlock m_mutex; - std::vector m_pipelines; + // List of pipeline instances, shared between threads + alignas(CACHE_LINE_SIZE) sync::Spinlock m_mutex; + std::vector> m_pipelines; - VkPipeline m_basePipeline = VK_NULL_HANDLE; + // Pipeline handles used for derivative pipelines + std::atomic m_basePipelineBase = { VK_NULL_HANDLE }; + std::atomic m_fastPipelineBase = { VK_NULL_HANDLE }; - bool findPipeline( + DxvkGraphicsPipelineInstance* findInstance( const DxvkGraphicsPipelineStateInfo& state, - VkRenderPass renderPass, - VkPipeline& pipeline) const; + VkRenderPass renderPass) const; VkPipeline compilePipeline( const DxvkGraphicsPipelineStateInfo& state, VkRenderPass renderPass, + VkPipelineCreateFlags createFlags, VkPipeline baseHandle) const; - void destroyPipelines(); - bool validatePipelineState( const DxvkGraphicsPipelineStateInfo& state) const; diff --git a/src/dxvk/dxvk_pipecompiler.cpp b/src/dxvk/dxvk_pipecompiler.cpp new file mode 100644 index 00000000..425fe723 --- /dev/null +++ b/src/dxvk/dxvk_pipecompiler.cpp @@ -0,0 +1,72 @@ +#include "dxvk_graphics.h" +#include "dxvk_pipecompiler.h" + +namespace dxvk { + + DxvkPipelineCompiler::DxvkPipelineCompiler() { + // Use ~half the CPU cores for pipeline compilation + const uint32_t threadCount = std::max( + 1u, std::thread::hardware_concurrency() / 2); + + Logger::debug(str::format( + "DxvkPipelineCompiler: Using ", threadCount, " workers")); + + // Start the compiler threads + m_compilerThreads.resize(threadCount); + + for (uint32_t i = 0; i < threadCount; i++) { + m_compilerThreads.at(i) = std::thread( + [this, i] { this->runCompilerThread(i); }); + } + } + + + DxvkPipelineCompiler::~DxvkPipelineCompiler() { + { std::unique_lock lock(m_compilerLock); + m_compilerStop.store(true); + } + + m_compilerCond.notify_all(); + for (auto& thread : m_compilerThreads) + thread.join(); + } + + + void DxvkPipelineCompiler::queueCompilation( + const Rc& pipeline, + const Rc& instance) { + std::unique_lock lock(m_compilerLock); + m_compilerQueue.push({ pipeline, instance }); + m_compilerCond.notify_one(); + } + + + void DxvkPipelineCompiler::runCompilerThread(uint32_t workerId) { + Logger::debug(str::format( + "DxvkPipelineCompiler: Worker #", workerId, " started")); + + while (!m_compilerStop.load()) { + PipelineEntry entry; + + { std::unique_lock lock(m_compilerLock); + + m_compilerCond.wait(lock, [this] { + return m_compilerStop.load() + || m_compilerQueue.size() != 0; + }); + + if (m_compilerQueue.size() != 0) { + entry = std::move(m_compilerQueue.front()); + m_compilerQueue.pop(); + } + } + + if (entry.pipeline != nullptr && entry.instance != nullptr) + entry.pipeline->compileInstance(entry.instance); + } + + Logger::debug(str::format( + "DxvkPipelineCompiler: Worker #", workerId, " stopped")); + } + +} \ No newline at end of file diff --git a/src/dxvk/dxvk_pipecompiler.h b/src/dxvk/dxvk_pipecompiler.h new file mode 100644 index 00000000..8312494d --- /dev/null +++ b/src/dxvk/dxvk_pipecompiler.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "dxvk_include.h" + +namespace dxvk { + + class DxvkGraphicsPipeline; + class DxvkGraphicsPipelineInstance; + + /** + * \brief Pipeline compiler + * + * asynchronous pipeline compiler, which is used + * to compile optimized versions of pipelines. + */ + class DxvkPipelineCompiler : public RcObject { + + public: + + DxvkPipelineCompiler(); + ~DxvkPipelineCompiler(); + + /** + * \brief Compiles a pipeline asynchronously + * + * This should be used to compile optimized + * graphics pipeline instances asynchronously. + * \param [in] pipeline The pipeline object + * \param [in] instance The pipeline instance + */ + void queueCompilation( + const Rc& pipeline, + const Rc& instance); + + private: + + struct PipelineEntry { + Rc pipeline; + Rc instance; + }; + + std::atomic m_compilerStop = { false }; + std::mutex m_compilerLock; + std::condition_variable m_compilerCond; + std::queue m_compilerQueue; + std::vector m_compilerThreads; + + void runCompilerThread(uint32_t workerId); + + }; + +} \ No newline at end of file diff --git a/src/dxvk/dxvk_pipemanager.cpp b/src/dxvk/dxvk_pipemanager.cpp index c3b617fd..6e8144b8 100644 --- a/src/dxvk/dxvk_pipemanager.cpp +++ b/src/dxvk/dxvk_pipemanager.cpp @@ -39,8 +39,12 @@ namespace dxvk { DxvkPipelineManager::DxvkPipelineManager(const DxvkDevice* device) - : m_device(device), m_cache(new DxvkPipelineCache(device->vkd())) { - + : m_device (device), + m_cache (new DxvkPipelineCache(device->vkd())), + m_compiler(nullptr) { + // Async shader compilation is opt-in for now + if (env::getEnvVar(L"DXVK_USE_PIPECOMPILER") == "1") + m_compiler = new DxvkPipelineCompiler(); } @@ -93,8 +97,8 @@ namespace dxvk { if (pair != m_graphicsPipelines.end()) return pair->second; - const Rc pipeline - = new DxvkGraphicsPipeline(m_device, m_cache, vs, tcs, tes, gs, fs); + Rc pipeline = new DxvkGraphicsPipeline( + m_device, m_cache, m_compiler, vs, tcs, tes, gs, fs); m_graphicsPipelines.insert(std::make_pair(key, pipeline)); return pipeline; diff --git a/src/dxvk/dxvk_pipemanager.h b/src/dxvk/dxvk_pipemanager.h index 5dc62152..232c1373 100644 --- a/src/dxvk/dxvk_pipemanager.h +++ b/src/dxvk/dxvk_pipemanager.h @@ -5,6 +5,7 @@ #include "dxvk_compute.h" #include "dxvk_graphics.h" +#include "dxvk_pipecompiler.h" namespace dxvk { @@ -96,8 +97,9 @@ namespace dxvk { private: - const DxvkDevice* m_device; - const Rc m_cache; + const DxvkDevice* m_device; + Rc m_cache; + Rc m_compiler; std::mutex m_mutex; diff --git a/src/dxvk/meson.build b/src/dxvk/meson.build index 234d1f61..1c563df9 100644 --- a/src/dxvk/meson.build +++ b/src/dxvk/meson.build @@ -43,6 +43,7 @@ dxvk_src = files([ 'dxvk_meta_clear.cpp', 'dxvk_meta_resolve.cpp', 'dxvk_pipecache.cpp', + 'dxvk_pipecompiler.cpp', 'dxvk_pipelayout.cpp', 'dxvk_pipemanager.cpp', 'dxvk_query.cpp',