diff --git a/DDrawCompat/D3dDdi/DeviceState.cpp b/DDrawCompat/D3dDdi/DeviceState.cpp index c9ee880..62778b5 100644 --- a/DDrawCompat/D3dDdi/DeviceState.cpp +++ b/DDrawCompat/D3dDdi/DeviceState.cpp @@ -90,8 +90,8 @@ namespace D3dDdi , m_vertexDecl(nullptr) , m_changedStates(0) , m_maxChangedTextureStage(0) + , m_texCoordIndexes(0) , m_changedTextureStageStates{} - , m_vsVertexFixup(createVertexShader(g_vsVertexFixup)) , m_textureResource{} , m_pixelShader(nullptr) , m_isLocked(false) @@ -187,6 +187,7 @@ namespace D3dDdi { m_current.textureStageState[i].fill(UNINITIALIZED_STATE); m_current.textureStageState[i][D3DDDITSS_TEXCOORDINDEX] = i; + m_texCoordIndexes |= i << (i * 3); // When ADDRESSU or ADDRESSV is set to CLAMP, their value is overridden by D3DTSS_ADDRESS. // Setting this to CLAMP makes them behave as expected, instead of as WRAP, @@ -224,11 +225,11 @@ namespace D3dDdi updateConfig(); } - std::unique_ptr DeviceState::createVertexShader(const BYTE* code, UINT size) + std::unique_ptr DeviceState::createVertexShader(const UINT* code, UINT size) { D3DDDIARG_CREATEVERTEXSHADERFUNC data = {}; data.Size = size; - if (FAILED(m_device.getOrigVtable().pfnCreateVertexShaderFunc(m_device, &data, reinterpret_cast(code)))) + if (FAILED(m_device.getOrigVtable().pfnCreateVertexShaderFunc(m_device, &data, code))) { return nullptr; } @@ -326,6 +327,26 @@ namespace D3dDdi return m_vertexDecl ? *m_vertexDecl : emptyDecl; } + HANDLE DeviceState::getVsVertexFixup() + { + auto it = m_vsVertexFixups.find(m_texCoordIndexes); + if (it != m_vsVertexFixups.end()) + { + return it->second.get(); + } + + std::array texCoordIndexes = {}; + for (UINT i = 0; i < 8; ++i) + { + texCoordIndexes[i] = m_app.textureStageState[i][D3DDDITSS_TEXCOORDINDEX]; + } + + D3dDdi::ShaderAssembler shaderAssembler(reinterpret_cast(g_vsVertexFixup), sizeof(g_vsVertexFixup)); + shaderAssembler.applyTexCoordIndexes(texCoordIndexes); + return m_vsVertexFixups.emplace(m_texCoordIndexes, + createVertexShader(shaderAssembler.getTokens().data(), shaderAssembler.getTokens().size() * 4)).first->second.get(); + } + bool DeviceState::isColorKeyUsed() { if (!m_app.renderState[D3DDDIRS_COLORKEYENABLE]) @@ -669,6 +690,12 @@ namespace D3dDdi m_changedRenderStates.set(D3DDDIRS_COLORKEYENABLE); m_changedStates |= CS_RENDER_STATE; } + else if (D3DDDITSS_TEXCOORDINDEX == data->State) + { + m_texCoordIndexes &= ~(7 << (data->Stage * 3)); + m_texCoordIndexes |= data->Value << (data->Stage * 3); + m_changedStates |= CS_SHADER; + } m_app.textureStageState[data->Stage][data->State] = data->Value; m_changedTextureStageStates[data->Stage].set(data->State); @@ -1203,14 +1230,7 @@ namespace D3dDdi { setPixelShader(mapPixelShader(m_app.pixelShader)); setVertexShaderDecl(m_app.vertexShaderDecl); - if (getVertexDecl().isTransformed) - { - setVertexShaderFunc(m_vsVertexFixup.get()); - } - else - { - setVertexShaderFunc(m_app.vertexShaderFunc); - } + setVertexShaderFunc(getVertexDecl().isTransformed ? getVsVertexFixup() : m_app.vertexShaderFunc); } void DeviceState::updateStreamSource() diff --git a/DDrawCompat/D3dDdi/DeviceState.h b/DDrawCompat/D3dDdi/DeviceState.h index e93c5a6..51576ad 100644 --- a/DDrawCompat/D3dDdi/DeviceState.h +++ b/DDrawCompat/D3dDdi/DeviceState.h @@ -156,7 +156,6 @@ namespace D3dDdi Resource* getTextureResource(UINT stage); UINT getTextureStageCount() const; const VertexDecl& getVertexDecl() const; - HANDLE getVertexFixupDecl() const { return m_vsVertexFixup.get(); } bool isLocked() const { return m_isLocked; } void onDestroyResource(Resource* resource, HANDLE resourceHandle); void updateConfig(); @@ -183,13 +182,14 @@ namespace D3dDdi template std::unique_ptr createVertexShader(const BYTE(&code)[N]) { - return createVertexShader(code, N); + return createVertexShader(reinterpret_cast(code), N); } - std::unique_ptr DeviceState::createVertexShader(const BYTE* code, UINT size); + std::unique_ptr DeviceState::createVertexShader(const UINT* code, UINT size); HRESULT deleteShader(HANDLE shader, HANDLE State::* shaderMember, HRESULT(APIENTRY* origDeleteShaderFunc)(HANDLE, HANDLE)); + HANDLE getVsVertexFixup(); bool isColorKeyUsed(); HANDLE mapPixelShader(HANDLE shader); UINT mapRsValue(D3DDDIRENDERSTATETYPE state, UINT value); @@ -245,10 +245,10 @@ namespace D3dDdi VertexDecl* m_vertexDecl; UINT m_changedStates; UINT m_maxChangedTextureStage; - UINT m_usedTextureStages; + UINT m_texCoordIndexes; BitSet m_changedRenderStates; std::array, 8> m_changedTextureStageStates; - std::unique_ptr m_vsVertexFixup; + std::map> m_vsVertexFixups; std::array m_textureResource; std::map m_pixelShaders; PixelShader* m_pixelShader; diff --git a/DDrawCompat/D3dDdi/ShaderAssembler.cpp b/DDrawCompat/D3dDdi/ShaderAssembler.cpp index f2098d3..efec0be 100644 --- a/DDrawCompat/D3dDdi/ShaderAssembler.cpp +++ b/DDrawCompat/D3dDdi/ShaderAssembler.cpp @@ -359,6 +359,68 @@ namespace D3dDdi return true; } + void ShaderAssembler::applyTexCoordIndexes(const std::array& texCoordIndexes) + { + LOG_FUNC("ShaderAssembler::applyTexCoordIndex", Compat::array(texCoordIndexes.data(), texCoordIndexes.size())); + LOG_DEBUG << "Original bytecode: " << Compat::hexDump(m_tokens.data(), m_tokens.size() * 4); + LOG_DEBUG << disassemble(); + + RestorePos restorePos(m_pos); + m_pos = 0; + std::array tcIndexToRegNum = {}; + std::array regNumToTcIndex = {}; + regNumToTcIndex.fill(UINT_MAX); + + while (nextInstruction()) + { + const auto instruction = getToken(); + if (D3DSIO_DCL == instruction.opcode) + { + const auto usage = getToken(1); + if (D3DDECLUSAGE_TEXCOORD == (usage & D3DSP_DCL_USAGE_MASK)) + { + const auto tcIndex = (usage & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT; + const auto dst = getToken(2); + const auto regNum = dst & D3DSP_REGNUM_MASK; + tcIndexToRegNum[tcIndex] = regNum; + regNumToTcIndex[regNum] = tcIndex; + } + continue; + } + + const auto it = g_instructionMap.find(instruction.opcode); + if (it == g_instructionMap.end()) + { + continue; + } + + for (UINT i = 0; i < it->second.srcCount; ++i) + { + UINT& src = m_tokens[m_pos + 1 + it->second.dstCount + i]; + const auto regType = getRegisterType(src); + if (D3DSPR_INPUT != regType) + { + continue; + } + + const auto origRegNum = src & D3DSP_REGNUM_MASK; + const auto origTcIndex = regNumToTcIndex[origRegNum]; + if (origTcIndex >= texCoordIndexes.size()) + { + continue; + } + + const auto mappedTcIndex = texCoordIndexes[origTcIndex] & 7; + const auto mappedRegNum = tcIndexToRegNum[mappedTcIndex]; + src &= ~D3DSP_REGNUM_MASK; + src |= mappedRegNum; + } + } + + LOG_DEBUG << "Modified bytecode: " << Compat::hexDump(m_tokens.data(), m_tokens.size() * 4); + LOG_DEBUG << disassemble(); + } + std::string ShaderAssembler::disassemble() { if (Config::Settings::LogLevel::DEBUG != Compat::Log::getLogLevel()) diff --git a/DDrawCompat/D3dDdi/ShaderAssembler.h b/DDrawCompat/D3dDdi/ShaderAssembler.h index 2e56d7d..5121b6f 100644 --- a/DDrawCompat/D3dDdi/ShaderAssembler.h +++ b/DDrawCompat/D3dDdi/ShaderAssembler.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -15,6 +16,7 @@ namespace D3dDdi ShaderAssembler(const UINT* code, DWORD size); bool addAlphaTest(UINT alphaRef); + void applyTexCoordIndexes(const std::array& texCoordIndexes); std::string disassemble(); UINT getTextureStageCount(); const std::vector& getTokens() const { return m_tokens; }