diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp index 63696628..57825d77 100644 --- a/src/d3d9/d3d9_device.cpp +++ b/src/d3d9/d3d9_device.cpp @@ -4900,9 +4900,8 @@ namespace dxvk { } - template - inline void D3D9DeviceEx::UploadSoftwareConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout) { - /* + inline void D3D9DeviceEx::UploadSoftwareConstantSet(const D3D9ShaderConstantsVSSoftware& Src, const D3D9ConstantLayout& Layout) { + /* * SWVP raises the amount of constants by a lot. * To avoid copying huge amounts of data for every draw call, * we track the highest set constant and only use a buffer big enough @@ -4914,9 +4913,16 @@ namespace dxvk { if (!constSet.dirty) return; - constSet.dirty = false; + constSet.dirty = false; - const uint32_t floatDataSize = std::min(constSet.meta.maxConstIndexF, m_vsFloatConstsCount) * sizeof(Vector4); + uint32_t floatCount = m_vsFloatConstsCount; + if (constSet.meta.needsConstantCopies) { + auto shader = GetCommonShader(m_state.vertexShader); + floatCount = std::max(floatCount, shader->GetMaxDefinedConstant()); + } + floatCount = std::min(floatCount, constSet.meta.maxConstIndexF); + + const uint32_t floatDataSize = floatCount * sizeof(Vector4); const uint32_t intDataSize = std::min(constSet.meta.maxConstIndexI, m_vsIntConstsCount) * sizeof(Vector4i); const uint32_t boolDataSize = divCeil(std::min(constSet.meta.maxConstIndexB, m_vsBoolConstsCount), 32u) * uint32_t(sizeof(uint32_t)); @@ -4924,7 +4930,18 @@ namespace dxvk { // Max copy source size is 8192 * 16 => always aligned to any plausible value // => we won't copy out of bounds if (likely(constSet.meta.maxConstIndexF != 0 || floatBuffer == nullptr)) { - CopySoftwareConstants(DxsoConstantBuffers::VSFloatConstantBuffer, floatBuffer, Src.fConsts, floatDataSize, m_dxsoOptions.vertexFloatConstantBufferAsSSBO); + DxvkBufferSliceHandle floatBufferSlice = CopySoftwareConstants(DxsoConstantBuffers::VSFloatConstantBuffer, floatBuffer, Src.fConsts, floatDataSize, m_dxsoOptions.vertexFloatConstantBufferAsSSBO); + + if (constSet.meta.needsConstantCopies) { + Vector4* data = reinterpret_cast(floatBufferSlice.mapPtr); + + auto& shaderConsts = GetCommonShader(m_state.vertexShader)->GetConstants(); + + for (const auto& constant : shaderConsts) { + if (constant.uboIdx < constSet.meta.maxConstIndexF) + data[constant.uboIdx] = *reinterpret_cast(constant.float32); + } + } } Rc& intBuffer = constSet.swvpBuffers.intBuffer; @@ -4941,13 +4958,14 @@ namespace dxvk { } - inline void D3D9DeviceEx::CopySoftwareConstants(DxsoConstantBuffers cBufferTarget, Rc& dstBuffer, const void* src, uint32_t size, bool useSSBO) { - uint32_t minSize = useSSBO ? m_robustSSBOAlignment : m_robustUBOAlignment; - minSize = std::max(minSize, 64u); - size = std::max(size, minSize); - + inline DxvkBufferSliceHandle D3D9DeviceEx::CopySoftwareConstants(DxsoConstantBuffers cBufferTarget, Rc& dstBuffer, const void* src, uint32_t size, bool useSSBO) { + uint32_t alignment = useSSBO ? m_robustSSBOAlignment : m_robustUBOAlignment; + alignment = std::max(alignment, 64u); + size = std::max(size, alignment); + size = align(size, alignment); + DxvkBufferSliceHandle slice; - if (unlikely(dstBuffer == nullptr || dstBuffer->info().size != size)) { + if (unlikely(dstBuffer == nullptr || dstBuffer->info().size < size)) { dstBuffer = CreateConstantBuffer(useSSBO, size, DxsoProgramType::VertexShader, cBufferTarget); slice = dstBuffer->getSliceHandle(); } else { @@ -4961,6 +4979,7 @@ namespace dxvk { } std::memcpy(slice.mapPtr, src, size); + return slice; } @@ -4982,12 +5001,12 @@ namespace dxvk { const uint32_t intRange = caps::MaxOtherConstants * sizeof(Vector4i); const uint32_t intDataSize = constSet.meta.maxConstIndexI * sizeof(Vector4i); uint32_t floatDataSize = std::min(constSet.meta.maxConstIndexF, floatCount) * sizeof(Vector4); - const uint32_t minSize = std::max(m_robustUBOAlignment, 64u); // Make sure we do not recreate the buffer because the new one has to be a tiny bit larger - const uint32_t bufferSize = std::max(floatDataSize + intRange, minSize); + const uint32_t alignment = std::max(m_robustUBOAlignment, 64u); // Make sure we do not recreate the buffer because the new one has to be a tiny bit larger + const uint32_t bufferSize = align(std::max(floatDataSize + intRange, alignment), alignment); floatDataSize = bufferSize - intRange; // Read additional floats for padding so we don't end up with garbage data VkDeviceSize& boundConstantBufferSize = ShaderStage == DxsoProgramType::VertexShader ? m_boundVSConstantsBufferSize : m_boundPSConstantsBufferSize; - if (boundConstantBufferSize != bufferSize) { + if (boundConstantBufferSize < bufferSize) { constexpr uint32_t slotId = computeResourceSlotId(ShaderStage, DxsoBindingType::ConstantBuffer, 0); EmitCs([ cBuffer = constSet.buffer, @@ -5032,7 +5051,7 @@ namespace dxvk { template void D3D9DeviceEx::UploadConstants() { if constexpr (ShaderStage == DxsoProgramTypes::VertexShader) { - if (CanSWVP()) + if (CanSWVP()) return UploadSoftwareConstantSet(m_state.vsConsts, m_vsLayout); else return UploadConstantSet(m_state.vsConsts, m_vsLayout, m_state.vertexShader); diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h index 11bf1121..2455315a 100644 --- a/src/d3d9/d3d9_device.h +++ b/src/d3d9/d3d9_device.h @@ -811,10 +811,9 @@ namespace dxvk { void BindAlphaTestState(); - template - inline void UploadSoftwareConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout); + inline void UploadSoftwareConstantSet(const D3D9ShaderConstantsVSSoftware& Src, const D3D9ConstantLayout& Layout); - inline void CopySoftwareConstants(DxsoConstantBuffers cBufferTarget, Rc& dstBuffer, const void* src, uint32_t copySize, bool useSSBO); + inline DxvkBufferSliceHandle CopySoftwareConstants(DxsoConstantBuffers cBufferTarget, Rc& dstBuffer, const void* src, uint32_t copySize, bool useSSBO); template inline void UploadConstantSet(const SoftwareLayoutType& Src, const D3D9ConstantLayout& Layout, const ShaderType& Shader); diff --git a/src/d3d9/d3d9_shader.cpp b/src/d3d9/d3d9_shader.cpp index 85bd033b..8b5bb8bd 100644 --- a/src/d3d9/d3d9_shader.cpp +++ b/src/d3d9/d3d9_shader.cpp @@ -71,6 +71,7 @@ namespace dxvk { m_info = pModule->info(); m_meta = pModule->meta(); m_constants = pModule->constants(); + m_maxDefinedConst = pModule->maxDefinedConstant(); m_shaders[0]->setShaderKey(Key); diff --git a/src/d3d9/d3d9_shader.h b/src/d3d9/d3d9_shader.h index 9bfc3100..d896f019 100644 --- a/src/d3d9/d3d9_shader.h +++ b/src/d3d9/d3d9_shader.h @@ -56,6 +56,8 @@ namespace dxvk { const DxsoProgramInfo& GetInfo() const { return m_info; } + uint32_t GetMaxDefinedConstant() const { return m_maxDefinedConst; } + private: DxsoIsgn m_isgn; @@ -65,6 +67,7 @@ namespace dxvk { DxsoProgramInfo m_info; DxsoShaderMetaInfo m_meta; DxsoDefinedConstants m_constants; + uint32_t m_maxDefinedConst; DxsoPermutations m_shaders; diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp index 36d640b2..5b8d16b6 100644 --- a/src/dxso/dxso_compiler.cpp +++ b/src/dxso/dxso_compiler.cpp @@ -1760,6 +1760,7 @@ namespace dxvk { for (uint32_t i = 0; i < 4; i++) constant.float32[i] = data[i]; m_constants.push_back(constant); + m_maxDefinedConstant = std::max(constant.uboIdx, m_maxDefinedConstant); } void DxsoCompiler::emitDefI(const DxsoInstructionContext& ctx) { diff --git a/src/dxso/dxso_compiler.h b/src/dxso/dxso_compiler.h index a96614d0..9cd90485 100644 --- a/src/dxso/dxso_compiler.h +++ b/src/dxso/dxso_compiler.h @@ -256,6 +256,7 @@ namespace dxvk { const DxsoDefinedConstants& constants() { return m_constants; } uint32_t usedSamplers() const { return m_usedSamplers; } uint32_t usedRTs() const { return m_usedRTs; } + uint32_t maxDefinedConstant() const { return m_maxDefinedConstant; } private: @@ -266,6 +267,7 @@ namespace dxvk { DxsoShaderMetaInfo m_meta; DxsoDefinedConstants m_constants; + uint32_t m_maxDefinedConstant; SpirvModule m_module; diff --git a/src/dxso/dxso_module.cpp b/src/dxso/dxso_module.cpp index 43f9fda9..cf32cdaa 100644 --- a/src/dxso/dxso_module.cpp +++ b/src/dxso/dxso_module.cpp @@ -34,10 +34,11 @@ namespace dxvk { this->runCompiler(*compiler, m_code.iter()); m_isgn = compiler->isgn(); - m_meta = compiler->meta(); - m_constants = compiler->constants(); - m_usedSamplers = compiler->usedSamplers(); - m_usedRTs = compiler->usedRTs(); + m_meta = compiler->meta(); + m_constants = compiler->constants(); + m_maxDefinedConst = compiler->maxDefinedConstant(); + m_usedSamplers = compiler->usedSamplers(); + m_usedRTs = compiler->usedRTs(); compiler->finalize(); diff --git a/src/dxso/dxso_module.h b/src/dxso/dxso_module.h index 1f5cda85..4b8238df 100644 --- a/src/dxso/dxso_module.h +++ b/src/dxso/dxso_module.h @@ -60,6 +60,8 @@ namespace dxvk { uint32_t usedRTs() { return m_usedRTs; } + uint32_t maxDefinedConstant() { return m_maxDefinedConst; } + private: void runCompiler( @@ -78,6 +80,7 @@ namespace dxvk { uint32_t m_usedRTs; DxsoShaderMetaInfo m_meta; + uint32_t m_maxDefinedConst; DxsoDefinedConstants m_constants; };