diff --git a/DDrawCompat/Config/Config.cpp b/DDrawCompat/Config/Config.cpp index 2373a0b..6df8fdd 100644 --- a/DDrawCompat/Config/Config.cpp +++ b/DDrawCompat/Config/Config.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,7 @@ namespace Config Settings::AltTabFix altTabFix; Settings::Antialiasing antialiasing; Settings::BltFilter bltFilter; + Settings::ColorKeyMethod colorKeyMethod; Settings::ConfigHotKey configHotKey; Settings::CpuAffinity cpuAffinity; Settings::CpuAffinityRotation cpuAffinityRotation; diff --git a/DDrawCompat/Config/Settings/ColorKeyMethod.cpp b/DDrawCompat/Config/Settings/ColorKeyMethod.cpp new file mode 100644 index 0000000..b49d2a5 --- /dev/null +++ b/DDrawCompat/Config/Settings/ColorKeyMethod.cpp @@ -0,0 +1,25 @@ +#include + +namespace Config +{ + namespace Settings + { + ColorKeyMethod::ColorKeyMethod() + : MappedSetting("ColorKeyMethod", "native", { + {"none", NONE}, + {"native", NATIVE}, + {"alphatest", ALPHATEST} + }) + { + } + + Setting::ParamInfo ColorKeyMethod::getParamInfo() const + { + if (ALPHATEST == m_value) + { + return { "AlphaRef", 1, 255, 1, m_param }; + } + return {}; + } + } +} diff --git a/DDrawCompat/Config/Settings/ColorKeyMethod.h b/DDrawCompat/Config/Settings/ColorKeyMethod.h new file mode 100644 index 0000000..35d069d --- /dev/null +++ b/DDrawCompat/Config/Settings/ColorKeyMethod.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace Config +{ + namespace Settings + { + class ColorKeyMethod : public MappedSetting + { + public: + static const UINT NONE = 0; + static const UINT NATIVE = 1; + static const UINT ALPHATEST = 2; + + ColorKeyMethod(); + + virtual ParamInfo getParamInfo() const override; + }; + } + + extern Settings::ColorKeyMethod colorKeyMethod; +} diff --git a/DDrawCompat/D3dDdi/Device.cpp b/DDrawCompat/D3dDdi/Device.cpp index 7e25c71..27e0762 100644 --- a/DDrawCompat/D3dDdi/Device.cpp +++ b/DDrawCompat/D3dDdi/Device.cpp @@ -248,13 +248,6 @@ namespace D3dDdi return m_origVtable.pfnColorFill(m_device, data); } - HRESULT Device::pfnCreatePixelShader(D3DDDIARG_CREATEPIXELSHADER* data, const UINT* code) - { - LOG_DEBUG << "Pixel shader bytecode: " << Compat::hexDump(code, data->CodeSize); - LOG_DEBUG << ShaderAssembler(code, data->CodeSize).disassemble(); - return m_origVtable.pfnCreatePixelShader(m_device, data, code); - } - HRESULT Device::pfnCreateResource(D3DDDIARG_CREATERESOURCE* data) { D3DDDIARG_CREATERESOURCE2 data2 = {}; diff --git a/DDrawCompat/D3dDdi/Device.h b/DDrawCompat/D3dDdi/Device.h index ecd6379..cbd060a 100644 --- a/DDrawCompat/D3dDdi/Device.h +++ b/DDrawCompat/D3dDdi/Device.h @@ -33,7 +33,6 @@ namespace D3dDdi HRESULT pfnBlt(const D3DDDIARG_BLT* data); HRESULT pfnClear(const D3DDDIARG_CLEAR* data, UINT numRect, const RECT* rect); HRESULT pfnColorFill(const D3DDDIARG_COLORFILL* data); - HRESULT pfnCreatePixelShader(D3DDDIARG_CREATEPIXELSHADER* data, const UINT* code); HRESULT pfnCreateResource(D3DDDIARG_CREATERESOURCE* data); HRESULT pfnCreateResource2(D3DDDIARG_CREATERESOURCE2* data); HRESULT pfnCreateVertexShaderFunc(D3DDDIARG_CREATEVERTEXSHADERFUNC* data, const UINT* code); diff --git a/DDrawCompat/D3dDdi/DeviceFuncs.cpp b/DDrawCompat/D3dDdi/DeviceFuncs.cpp index d9d8807..e15030f 100644 --- a/DDrawCompat/D3dDdi/DeviceFuncs.cpp +++ b/DDrawCompat/D3dDdi/DeviceFuncs.cpp @@ -54,7 +54,6 @@ namespace SET_DEVICE_FUNC(pfnColorFill); SET_DEVICE_FUNC(pfnCreateResource); SET_DEVICE_FUNC(pfnCreateResource2); - SET_DEVICE_FUNC(pfnCreatePixelShader); SET_DEVICE_FUNC(pfnCreateVertexShaderFunc); SET_DEVICE_FUNC(pfnDepthFill); SET_DEVICE_FUNC(pfnDestroyDevice); @@ -71,6 +70,7 @@ namespace SET_DEVICE_FUNC(pfnUnlock); SET_DEVICE_FUNC(pfnUpdatePalette); + SET_DEVICE_STATE_FUNC(pfnCreatePixelShader); SET_DEVICE_STATE_FUNC(pfnCreateVertexShaderDecl); SET_DEVICE_STATE_FUNC(pfnDeletePixelShader); SET_DEVICE_STATE_FUNC(pfnDeleteVertexShaderDecl); diff --git a/DDrawCompat/D3dDdi/DeviceState.cpp b/DDrawCompat/D3dDdi/DeviceState.cpp index d756f6f..309d062 100644 --- a/DDrawCompat/D3dDdi/DeviceState.cpp +++ b/DDrawCompat/D3dDdi/DeviceState.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -10,6 +11,7 @@ #include #include #include +#include #include #define LOG_DS LOG_DEBUG << "DeviceState::" << __func__ << ": " @@ -280,6 +282,58 @@ namespace D3dDdi return it != m_vertexShaderDecls.end() ? it->second : emptyDecl; } + bool DeviceState::isColorKeyUsed() + { + if (!m_app.renderState[D3DDDIRS_COLORKEYENABLE]) + { + return false; + } + + bool used = false; + for (UINT i = 0; i < getVertexDecl().textureStageCount && !used; ++i) + { + used = !m_app.textureStageState[i][D3DDDITSS_DISABLETEXTURECOLORKEY]; + } + return used; + } + + HANDLE DeviceState::mapPixelShader(HANDLE shader) + { + if (Config::Settings::ColorKeyMethod::ALPHATEST != Config::colorKeyMethod.get()) + { + return m_app.pixelShader; + } + + auto it = m_pixelShaders.find(shader); + if (it == m_pixelShaders.end()) + { + return m_app.pixelShader; + } + + if (!it->second.isModified) + { + ShaderAssembler shaderAssembler(it->second.tokens.data(), it->second.tokens.size()); + if (shaderAssembler.addAlphaTest(Config::colorKeyMethod.getParam())) + { + const auto& tokens = shaderAssembler.getTokens(); + D3DDDIARG_CREATEPIXELSHADER data = {}; + data.CodeSize = tokens.size() * 4; + HRESULT result = m_device.getOrigVtable().pfnCreatePixelShader(m_device, &data, tokens.data()); + if (SUCCEEDED(result)) + { + it->second.modifiedPixelShader.reset(data.ShaderHandle); + } + else + { + LOG_ONCE("ERROR: failed to create modified pixel shader: " << Compat::hex(result)); + } + } + it->second.isModified = true; + } + + return it->second.modifiedPixelShader ? it->second.modifiedPixelShader.get() : m_app.pixelShader; + } + UINT DeviceState::mapRsValue(D3DDDIRENDERSTATETYPE state, UINT value) { if (state >= D3DDDIRS_WRAP0 && state <= D3DDDIRS_WRAP7) @@ -287,14 +341,13 @@ namespace D3dDdi return value & (D3DWRAPCOORD_0 | D3DWRAPCOORD_1 | D3DWRAPCOORD_2 | D3DWRAPCOORD_3); } - if (D3DDDIRS_COLORKEYENABLE == state && value) + if (D3DDDIRS_COLORKEYENABLE == state) { - UINT enable = FALSE; - for (UINT i = 0; i < getVertexDecl().textureStageCount && !enable; ++i) + if (Config::Settings::ColorKeyMethod::NATIVE != Config::colorKeyMethod.get()) { - enable = !m_app.textureStageState[i][D3DDDITSS_DISABLETEXTURECOLORKEY]; + return FALSE; } - return enable; + return isColorKeyUsed(); } if (D3DDDIRS_MULTISAMPLEANTIALIAS == state) @@ -371,6 +424,21 @@ namespace D3dDdi &DeviceState::pfnSetStreamSource); } + HRESULT DeviceState::pfnCreatePixelShader(D3DDDIARG_CREATEPIXELSHADER* data, const UINT* code) + { + LOG_DEBUG << "Pixel shader bytecode: " << Compat::hexDump(code, data->CodeSize); + LOG_DEBUG << ShaderAssembler(code, data->CodeSize).disassemble(); + HRESULT result = m_device.getOrigVtable().pfnCreatePixelShader(m_device, data, code); + if (SUCCEEDED(result)) + { + m_pixelShaders.emplace(data->ShaderHandle, + PixelShader{ std::vector(code, code + data->CodeSize / 4), + std::unique_ptr( + nullptr, ResourceDeleter(m_device, m_device.getOrigVtable().pfnDeleteVertexShaderFunc)) }); + } + return result; + } + HRESULT DeviceState::pfnCreateVertexShaderDecl( D3DDDIARG_CREATEVERTEXSHADERDECL* data, const D3DDDIVERTEXELEMENT* vertexElements) @@ -413,7 +481,21 @@ namespace D3dDdi HRESULT DeviceState::pfnDeletePixelShader(HANDLE shader) { - return deleteShader(shader, &State::pixelShader, m_device.getOrigVtable().pfnDeletePixelShader); + HRESULT result = deleteShader(shader, &State::pixelShader, m_device.getOrigVtable().pfnDeletePixelShader); + if (SUCCEEDED(result)) + { + auto it = m_pixelShaders.find(shader); + if (it != m_pixelShaders.end()) + { + if (it->second.modifiedPixelShader) + { + deleteShader(it->second.modifiedPixelShader.release(), &State::pixelShader, + m_device.getOrigVtable().pfnDeletePixelShader); + } + m_pixelShaders.erase(it); + } + } + return result; } HRESULT DeviceState::pfnDeleteVertexShaderDecl(HANDLE shader) @@ -593,6 +675,7 @@ namespace D3dDdi if (resource) { resource->updatePalettizedTexture(stage); + resource->prepareForTextureRead(stage); } } } @@ -918,14 +1001,27 @@ namespace D3dDdi void DeviceState::updateConfig() { - m_changedStates |= CS_RENDER_STATE | CS_RENDER_TARGET | CS_TEXTURE_STAGE; + m_changedStates |= CS_RENDER_STATE | CS_RENDER_TARGET | CS_SHADER | CS_TEXTURE_STAGE; + m_changedRenderStates.set(D3DDDIRS_COLORKEYENABLE); m_changedRenderStates.set(D3DDDIRS_MULTISAMPLEANTIALIAS); + + for (auto& ps : m_pixelShaders) + { + if (ps.second.modifiedPixelShader.get()) + { + deleteShader(ps.second.modifiedPixelShader.release(), &State::pixelShader, + m_device.getOrigVtable().pfnDeletePixelShader); + } + ps.second.isModified = false; + } + for (UINT i = 0; i < m_changedTextureStageStates.size(); ++i) { m_changedTextureStageStates[i].set(D3DDDITSS_MINFILTER); m_changedTextureStageStates[i].set(D3DDDITSS_MAGFILTER); m_changedTextureStageStates[i].set(D3DDDITSS_MIPFILTER); m_changedTextureStageStates[i].set(D3DDDITSS_MAXANISOTROPY); + m_changedTextureStageStates[i].set(D3DDDITSS_TEXTURECOLORKEYVAL); } m_changedTextureStageStates[0].set(D3DDDITSS_ADDRESSU); m_changedTextureStageStates[0].set(D3DDDITSS_ADDRESSV); @@ -937,7 +1033,7 @@ namespace D3dDdi m_changedRenderStates.forEach([&](UINT stateIndex) { const auto state = static_cast(stateIndex); - setRenderState({ state, mapRsValue(state, m_app.renderState[state]) }); + setRenderState({ state, mapRsValue(state, m_app.renderState[state]) }); }); m_changedRenderStates.reset(); } @@ -979,7 +1075,7 @@ namespace D3dDdi void DeviceState::updateShaders() { - setPixelShader(m_app.pixelShader); + setPixelShader(mapPixelShader(m_app.pixelShader)); setVertexShaderDecl(m_app.vertexShaderDecl); auto it = m_vertexShaderDecls.find(m_app.vertexShaderDecl); if (it != m_vertexShaderDecls.end() && it->second.isTransformed) @@ -1008,11 +1104,24 @@ namespace D3dDdi { m_changedTextureStageStates[stage].reset(D3DDDITSS_DISABLETEXTURECOLORKEY); m_changedTextureStageStates[stage].reset(D3DDDITSS_TEXTURECOLORKEYVAL); - if (!m_app.textures[stage]) + if (!m_app.textures[stage] || Config::Settings::ColorKeyMethod::NONE == Config::colorKeyMethod.get()) { return; } + if (Config::Settings::ColorKeyMethod::ALPHATEST == Config::colorKeyMethod.get()) + { + const BOOL colorKeyEnabled = !m_app.textureStageState[stage][D3DDDITSS_DISABLETEXTURECOLORKEY]; + if (colorKeyEnabled != m_pixelShaderConstB[stage][0]) + { + D3DDDIARG_SETPIXELSHADERCONSTB data = {}; + data.Register = stage; + data.Count = 1; + setShaderConst(&data, &colorKeyEnabled, m_pixelShaderConstB, m_device.getOrigVtable().pfnSetPixelShaderConstB); + } + return; + } + D3DDDIARG_TEXTURESTAGESTATE tss = {}; tss.Stage = stage; @@ -1030,7 +1139,7 @@ namespace D3dDdi if (resource && resource->getPalettizedTexture()) { tss.Value = reinterpret_cast( - m_device.getPalette(resource->getPalettizedTexture()->getPaletteHandle())[tss.Value]); + m_device.getPalette(resource->getPalettizedTexture()->getPaletteHandle())[tss.Value]) & 0xFFFFFF; } m_current.textureStageState[stage][D3DDDITSS_TEXTURECOLORKEYVAL] = tss.Value; m_current.textureStageState[stage][D3DDDITSS_DISABLETEXTURECOLORKEY] = FALSE; @@ -1048,7 +1157,7 @@ namespace D3dDdi auto resource = getTextureResource(stage); if (resource) { - resource = &resource->prepareForGpuRead(0); + resource = &resource->prepareForTextureRead(stage); } if (setTexture(stage, resource ? *resource : m_app.textures[stage]) || diff --git a/DDrawCompat/D3dDdi/DeviceState.h b/DDrawCompat/D3dDdi/DeviceState.h index 004855d..603f479 100644 --- a/DDrawCompat/D3dDdi/DeviceState.h +++ b/DDrawCompat/D3dDdi/DeviceState.h @@ -86,6 +86,7 @@ namespace D3dDdi DeviceState(Device& device); + HRESULT pfnCreatePixelShader(D3DDDIARG_CREATEPIXELSHADER* data, const UINT* code); HRESULT pfnCreateVertexShaderDecl(D3DDDIARG_CREATEVERTEXSHADERDECL* data, const D3DDDIVERTEXELEMENT* vertexElements); HRESULT pfnDeletePixelShader(HANDLE shader); HRESULT pfnDeleteVertexShaderDecl(HANDLE shader); @@ -148,6 +149,13 @@ namespace D3dDdi CS_TEXTURE_STAGE = 1 << 4 }; + struct PixelShader + { + std::vector tokens; + std::unique_ptr modifiedPixelShader; + bool isModified; + }; + template std::unique_ptr createVertexShader(const BYTE(&code)[N]) { @@ -158,6 +166,8 @@ namespace D3dDdi HRESULT deleteShader(HANDLE shader, HANDLE State::* shaderMember, HRESULT(APIENTRY* origDeleteShaderFunc)(HANDLE, HANDLE)); + bool isColorKeyUsed(); + HANDLE mapPixelShader(HANDLE shader); UINT mapRsValue(D3DDDIRENDERSTATETYPE state, UINT value); UINT mapTssValue(UINT stage, D3DDDITEXTURESTAGESTATETYPE state, UINT value); void prepareTextures(); @@ -214,6 +224,7 @@ namespace D3dDdi std::array, 8> m_changedTextureStageStates; std::unique_ptr m_vsVertexFixup; std::array m_textureResource; + std::map m_pixelShaders; bool m_isLocked; bool m_spriteMode; }; diff --git a/DDrawCompat/D3dDdi/Resource.cpp b/DDrawCompat/D3dDdi/Resource.cpp index 90fe184..6eb2612 100644 --- a/DDrawCompat/D3dDdi/Resource.cpp +++ b/DDrawCompat/D3dDdi/Resource.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -122,6 +123,8 @@ namespace D3dDdi , m_msaaSurface{} , m_msaaResolvedSurface{} , m_nullSurface{} + , m_colorKeyedSurface{} + , m_colorKey(0) , m_formatConfig(D3DDDIFMT_UNKNOWN) , m_multiSampleConfig{ D3DDDIMULTISAMPLE_NONE, 0 } , m_scaledSize{} @@ -133,6 +136,7 @@ namespace D3dDdi , m_isClampable(true) , m_isPrimary(false) , m_isPalettizedTextureUpToDate(false) + , m_isColorKeyedSurfaceUpToDate(false) { if (m_origData.Flags.VertexBuffer && m_origData.Flags.MightDrawFromLocked && @@ -824,7 +828,7 @@ namespace D3dDdi return *nextRt; } - RECT Resource::getRect(UINT subResourceIndex) + RECT Resource::getRect(UINT subResourceIndex) const { const auto& si = m_fixedData.pSurfList[subResourceIndex]; return { 0, 0, static_cast(si.Width), static_cast(si.Height) }; @@ -1071,6 +1075,7 @@ namespace D3dDdi if (!data.Flags.ReadOnly) { m_isPalettizedTextureUpToDate = false; + m_isColorKeyedSurfaceUpToDate = false; } if (m_fixedData.Flags.ZBuffer && m_msaaResolvedSurface.resource) @@ -1127,6 +1132,7 @@ namespace D3dDdi Resource& Resource::prepareForBltDst(HANDLE& resource, UINT subResourceIndex, RECT& rect) { m_isPalettizedTextureUpToDate = false; + m_isColorKeyedSurfaceUpToDate = false; if (m_lockResource || m_msaaResolvedSurface.resource) { loadFromLockRefResource(subResourceIndex); @@ -1166,6 +1172,8 @@ namespace D3dDdi void Resource::prepareForCpuWrite(UINT subResourceIndex) { + m_isPalettizedTextureUpToDate = false; + m_isColorKeyedSurfaceUpToDate = false; if (m_lockResource) { if (m_lockRefSurface.resource && @@ -1202,6 +1210,7 @@ namespace D3dDdi void Resource::prepareForGpuWrite(UINT subResourceIndex) { + m_isColorKeyedSurfaceUpToDate = false; if (m_lockResource || m_msaaResolvedSurface.resource) { if (m_msaaSurface.resource) @@ -1225,6 +1234,55 @@ namespace D3dDdi } } + Resource& Resource::prepareForTextureRead(UINT stage) + { + if (m_lockResource) + { + for (UINT i = 0; i < m_lockData.size(); ++i) + { + prepareForGpuRead(i); + } + } + + auto& defaultResource = m_msaaResolvedSurface.resource ? *m_msaaResolvedSurface.resource : *this; + const auto& appState = m_device.getState().getAppState(); + if (Config::Settings::ColorKeyMethod::ALPHATEST != Config::colorKeyMethod.get() || + !appState.renderState[D3DDDIRS_COLORKEYENABLE] || + appState.textureStageState[stage][D3DDDITSS_DISABLETEXTURECOLORKEY]) + { + return defaultResource; + } + + if (!m_colorKeyedSurface.surface) + { + auto& repo = SurfaceRepository::get(m_device.getAdapter()); + repo.getSurface(m_colorKeyedSurface, m_fixedData.pSurfList[0].Width, m_fixedData.pSurfList[0].Height, + D3DDDIFMT_A8R8G8B8, DDSCAPS_TEXTURE | DDSCAPS_3DDEVICE | DDSCAPS_VIDEOMEMORY | + (m_fixedData.MipLevels > 1 ? DDSCAPS_MIPMAP : 0), + m_fixedData.SurfCount, + m_fixedData.Flags.CubeMap ? DDSCAPS2_CUBEMAP : 0); + if (!m_colorKeyedSurface.surface) + { + return defaultResource; + } + m_colorKey = appState.textureStageState[stage][D3DDDITSS_TEXTURECOLORKEYVAL] + 1; + } + + if (!m_isColorKeyedSurfaceUpToDate || + m_colorKey != appState.textureStageState[stage][D3DDDITSS_TEXTURECOLORKEYVAL]) + { + m_isColorKeyedSurfaceUpToDate = true; + m_colorKey = appState.textureStageState[stage][D3DDDITSS_TEXTURECOLORKEYVAL]; + auto ck = convertToShaderConst(m_formatInfo, m_colorKey); + for (UINT i = 0; i < m_fixedData.SurfCount; ++i) + { + m_device.getShaderBlitter().colorKeyBlt(*m_colorKeyedSurface.resource, i, defaultResource, i, ck); + } + } + + return *m_colorKeyedSurface.resource; + } + HRESULT Resource::presentationBlt(D3DDDIARG_BLT data, Resource* srcResource) { LOG_FUNC("Resource::presentationBlt", data, *srcResource); @@ -1767,7 +1825,11 @@ namespace D3dDdi memcpy(palette, m_device.getPalette(m_palettizedTexture->m_paletteHandle), sizeof(palette)); for (int i = 0; i < 256; ++i) { - if (i != paletteColorKeyIndex && palette[i] == palette[paletteColorKeyIndex]) + if (i == paletteColorKeyIndex) + { + palette[i].rgbReserved = 0; + } + else if (palette[i] == palette[paletteColorKeyIndex]) { palette[i].rgbBlue += 0xFF == palette[i].rgbBlue ? -1 : 1; } diff --git a/DDrawCompat/D3dDdi/Resource.h b/DDrawCompat/D3dDdi/Resource.h index 5b8fb79..9e271c2 100644 --- a/DDrawCompat/D3dDdi/Resource.h +++ b/DDrawCompat/D3dDdi/Resource.h @@ -40,6 +40,7 @@ namespace D3dDdi HRESULT colorFill(D3DDDIARG_COLORFILL data); void disableClamp(); void* getLockPtr(UINT subResourceIndex); + RECT getRect(UINT subResourceIndex) const; HRESULT lock(D3DDDIARG_LOCK& data); void onDestroyResource(HANDLE resource); Resource& prepareForBltSrc(const D3DDDIARG_BLT& data); @@ -49,6 +50,7 @@ namespace D3dDdi void prepareForCpuWrite(UINT subResourceIndex); Resource& prepareForGpuRead(UINT subResourceIndex); void prepareForGpuWrite(UINT subResourceIndex); + Resource& prepareForTextureRead(UINT stage); HRESULT presentationBlt(D3DDDIARG_BLT data, Resource* srcResource); void scaleRect(RECT& rect); void setAsGdiResource(bool isGdiResource); @@ -111,7 +113,6 @@ namespace D3dDdi D3DDDIFORMAT getFormatConfig(); std::pair getMultisampleConfig(); const SurfaceRepository::Surface& getNextRenderTarget(Resource* currentRt, DWORD width, DWORD height); - RECT getRect(UINT subResourceIndex); SIZE getScaledSize(); bool isValidRect(UINT subResourceIndex, const RECT& rect); void loadFromLockRefResource(UINT subResourceIndex); @@ -138,6 +139,8 @@ namespace D3dDdi SurfaceRepository::Surface m_msaaSurface; SurfaceRepository::Surface m_msaaResolvedSurface; SurfaceRepository::Surface m_nullSurface; + SurfaceRepository::Surface m_colorKeyedSurface; + UINT m_colorKey; D3DDDIFORMAT m_formatConfig; std::pair m_multiSampleConfig; SIZE m_scaledSize; @@ -149,5 +152,6 @@ namespace D3dDdi bool m_isClampable; bool m_isPrimary; bool m_isPalettizedTextureUpToDate; + bool m_isColorKeyedSurfaceUpToDate; }; } diff --git a/DDrawCompat/D3dDdi/ShaderAssembler.cpp b/DDrawCompat/D3dDdi/ShaderAssembler.cpp index 371834d..505773a 100644 --- a/DDrawCompat/D3dDdi/ShaderAssembler.cpp +++ b/DDrawCompat/D3dDdi/ShaderAssembler.cpp @@ -5,12 +5,14 @@ #include +#include #include namespace { const UINT BEGIN_BLOCK = 1; const UINT END_BLOCK = 2; + const UINT32 PARAMETER_TOKEN_RESERVED_BIT = 0x80000000; typedef std::array Controls; @@ -50,6 +52,19 @@ namespace UINT32 type : 16; }; + class RestorePos + { + public: + RestorePos(UINT& pos) : m_pos(pos), m_origPos(pos) { } + ~RestorePos() { m_pos = m_origPos; } + + private: + UINT& m_pos; + UINT m_origPos; + }; + + void setRegisterType(UINT32& token, D3DSHADER_PARAM_REGISTER_TYPE registerType); + std::map g_instructionMap = { { D3DSIO_NOP, { "nop" } }, { D3DSIO_MOV, { "mov", 1, 1 } }, @@ -192,6 +207,68 @@ namespace { D3DDECLUSAGE_DEPTH, "depth" }, { D3DDECLUSAGE_SAMPLE, "sample" } }; + + UINT getFreeRegisterNumber(const std::set& usedRegisterNumbers) + { + UINT prev = UINT_MAX; + for (UINT num : usedRegisterNumbers) + { + if (num > prev + 1) + { + return prev + 1; + } + } + return usedRegisterNumbers.empty() ? 0 : (*usedRegisterNumbers.rbegin() + 1); + } + + D3DSHADER_PARAM_REGISTER_TYPE getRegisterType(UINT32 token) + { + return static_cast( + ((token & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | + ((token & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2)); + } + + UINT32 makeConstToken(FLOAT value) + { + return *reinterpret_cast(&value); + } + + UINT32 makeDestinationParameterToken(D3DSHADER_PARAM_REGISTER_TYPE registerType, UINT32 registerNumber, + UINT32 writeMask, UINT32 modifiers) + { + UINT32 token = PARAMETER_TOKEN_RESERVED_BIT | registerNumber | writeMask | modifiers; + setRegisterType(token, registerType); + return token; + } + + UINT32 makeInstructionToken(D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) + { + auto& inst = g_instructionMap.at(static_cast(opcode)); + auto tokenCount = inst.dstCount + inst.srcCount + inst.extraCount; + return opcode | (tokenCount << D3DSI_INSTLENGTH_SHIFT); + } + + UINT32 makeSourceParameterToken(D3DSHADER_PARAM_REGISTER_TYPE registerType, UINT32 registerNumber, + UINT32 swizzle, D3DSHADER_PARAM_SRCMOD_TYPE modifier) + { + UINT32 token = PARAMETER_TOKEN_RESERVED_BIT | registerNumber | swizzle | modifier; + setRegisterType(token, registerType); + return token; + } + + UINT reserveRegisterNumber(std::set& usedRegisterNumbers) + { + auto num = getFreeRegisterNumber(usedRegisterNumbers); + usedRegisterNumbers.insert(num); + return num; + } + + void setRegisterType(UINT32& token, D3DSHADER_PARAM_REGISTER_TYPE registerType) + { + token &= ~(D3DSP_REGTYPE_MASK | D3DSP_REGTYPE_MASK2); + token |= ((registerType << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) | + ((registerType << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); + } } namespace D3dDdi @@ -202,11 +279,95 @@ namespace D3dDdi { } + bool ShaderAssembler::addAlphaTest(UINT alphaRef) + { + LOG_FUNC("ShaderAssembler::addAlphaTest", alphaRef); + LOG_DEBUG << "Original bytecode: " << Compat::hexDump(m_tokens.data(), m_tokens.size() * 4); + LOG_DEBUG << disassemble(); + + RestorePos restorePos(m_pos); + m_pos = 0; + UINT constRegNum = UINT_MAX; + UINT tempRegNum = UINT_MAX; + + while (nextInstruction()) + { + auto instruction = getToken(); + if (D3DSIO_TEX != instruction.opcode) + { + continue; + } + + const auto dst = getToken(1); + const auto src = getToken(3); + const auto samplerRegNum = src & D3DSP_REGNUM_MASK; + + if (UINT_MAX == constRegNum) + { + auto usedConstRegNums = getUsedRegisterNumbers(D3DSPR_CONST); + constRegNum = reserveRegisterNumber(usedConstRegNums); + if (constRegNum >= 32) + { + LOG_ONCE("ERROR: no free PS const register found"); + return false; + } + + auto usedTempRegNums = getUsedRegisterNumbers(D3DSPR_TEMP); + tempRegNum = reserveRegisterNumber(usedTempRegNums); + if (tempRegNum >= 32) + { + LOG_ONCE("ERROR: no free PS temp register found"); + return false; + } + } + + nextInstruction(); + + insertToken(makeInstructionToken(D3DSIO_IF)); + insertToken(makeSourceParameterToken(D3DSPR_CONSTBOOL, samplerRegNum, D3DSP_NOSWIZZLE, D3DSPSM_NONE)); + + insertToken(makeInstructionToken(D3DSIO_SUB)); + insertToken(makeDestinationParameterToken(D3DSPR_TEMP, tempRegNum, D3DSP_WRITEMASK_ALL, D3DSPDM_NONE)); + insertToken(makeSourceParameterToken(D3DSPR_TEMP, dst & D3DSP_REGNUM_MASK, D3DSP_REPLICATEALPHA, D3DSPSM_NONE)); + insertToken(makeSourceParameterToken(D3DSPR_CONST, constRegNum, D3DSP_REPLICATEALPHA, D3DSPSM_NONE)); + + insertToken(makeInstructionToken(D3DSIO_TEXKILL)); + insertToken(makeDestinationParameterToken(D3DSPR_TEMP, tempRegNum, D3DSP_WRITEMASK_ALL, D3DSPDM_NONE)); + + insertToken(makeInstructionToken(D3DSIO_ENDIF)); + --m_pos; + } + + if (UINT_MAX == constRegNum) + { + LOG_DEBUG << "No modifications needed"; + return false; + } + + m_pos = 0; + nextInstruction(); + insertToken(makeInstructionToken(D3DSIO_DEF)); + insertToken(makeDestinationParameterToken(D3DSPR_CONST, constRegNum, D3DSP_WRITEMASK_ALL, D3DSPSM_NONE)); + for (UINT i = 0; i < 3; ++i) + { + insertToken(makeConstToken(0)); + } + insertToken(makeConstToken(alphaRef / 255.0f)); + + LOG_DEBUG << "Modified bytecode: " << Compat::hexDump(m_tokens.data(), m_tokens.size() * 4); + LOG_DEBUG << disassemble(); + return true; + } + std::string ShaderAssembler::disassemble() { - auto origPos = m_pos; - m_pos = 0; + if (Config::Settings::LogLevel::DEBUG != Compat::Log::getLogLevel()) + { + return {}; + } + RestorePos restorePos(m_pos); + m_pos = 0; std::ostringstream os; os << "Disassembled shader code:" << std::endl; @@ -228,7 +389,6 @@ namespace D3dDdi os << e.what(); } - m_pos = origPos; return os.str(); } @@ -419,9 +579,7 @@ namespace D3dDdi void ShaderAssembler::disassembleRegister(std::ostream& os, UINT token) { - auto registerType = static_cast( - ((token & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | - ((token & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2)); + auto registerType = getRegisterType(token); auto registerNumber = token & D3DSP_REGNUM_MASK; auto it = g_registerMap.find(registerType); @@ -538,6 +696,66 @@ namespace D3dDdi return static_cast(m_tokens.front() >> 16); } + template + Token ShaderAssembler::getToken(UINT offset) const + { + auto pos = m_pos + offset; + return pos < m_tokens.size() ? *reinterpret_cast(&m_tokens[pos]) : Token{}; + } + + std::set ShaderAssembler::getUsedRegisterNumbers(int registerType) + { + RestorePos restorePos(m_pos); + m_pos = 0; + std::set usedRegisterNumbers; + while (nextInstruction()) + { + auto it = g_instructionMap.find(getToken().opcode); + if (it == g_instructionMap.end()) + { + continue; + } + + const UINT offset = D3DSIO_DCL == it->first ? 2 : 1; + const auto tokenCount = it->second.dstCount + it->second.srcCount; + for (UINT i = 0; i < tokenCount; ++i) + { + auto token = getToken(offset + i); + if (registerType == getRegisterType(token)) + { + usedRegisterNumbers.insert(token & D3DSP_REGNUM_MASK); + } + } + } + return usedRegisterNumbers; + } + + void ShaderAssembler::insertToken(UINT32 token) + { + m_tokens.insert(m_tokens.begin() + m_pos, token); + ++m_pos; + } + + bool ShaderAssembler::nextInstruction() + { + if (0 == m_pos) + { + m_pos = 1; + } + else + { + auto token = readToken(); + readTokens(token.tokenCount); + } + + while (D3DSIO_COMMENT == getToken().opcode) + { + auto token = readToken(); + readTokens(token.tokenCount); + } + return m_pos < m_tokens.size() && D3DSIO_END != getToken().opcode; + } + UINT ShaderAssembler::readToken() { return *readTokens(1); diff --git a/DDrawCompat/D3dDdi/ShaderAssembler.h b/DDrawCompat/D3dDdi/ShaderAssembler.h index 683b3c5..49fe06e 100644 --- a/DDrawCompat/D3dDdi/ShaderAssembler.h +++ b/DDrawCompat/D3dDdi/ShaderAssembler.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -13,7 +14,9 @@ namespace D3dDdi public: ShaderAssembler(const UINT* code, DWORD size); + bool addAlphaTest(UINT alphaRef); std::string disassemble(); + const std::vector& getTokens() const { return m_tokens; } private: enum ShaderType @@ -33,10 +36,15 @@ namespace D3dDdi void disassembleSourceSwizzle(std::ostream& os, UINT token); void disassembleVersion(std::ostream& os); UINT getRemainingTokenCount() const; + std::set getUsedRegisterNumbers(int registerType); ShaderType getShaderType() const; + void insertToken(UINT32 token); + bool nextInstruction(); UINT readToken(); const UINT* readTokens(UINT count); + template + Token getToken(UINT offset = 0) const; template Token readToken(); diff --git a/DDrawCompat/D3dDdi/ShaderBlitter.cpp b/DDrawCompat/D3dDdi/ShaderBlitter.cpp index 1245103..d76b358 100644 --- a/DDrawCompat/D3dDdi/ShaderBlitter.cpp +++ b/DDrawCompat/D3dDdi/ShaderBlitter.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ namespace D3dDdi ShaderBlitter::ShaderBlitter(Device& device) : m_device(device) , m_psColorKey(createPixelShader(g_psColorKey)) + , m_psColorKeyBlend(createPixelShader(g_psColorKeyBlend)) , m_psDepthBlt(createPixelShader(g_psDepthBlt)) , m_psDrawCursor(createPixelShader(g_psDrawCursor)) , m_psGamma(createPixelShader(g_psGamma)) @@ -164,6 +166,15 @@ namespace D3dDdi m_device.flushPrimitives(); } + void ShaderBlitter::colorKeyBlt(const Resource& dstResource, UINT dstSubResourceIndex, + const Resource& srcResource, UINT srcSubResourceIndex, DeviceState::ShaderConstF srcColorKey) + { + DeviceState::TempPixelShaderConst psConst(m_device.getState(), { 31, 1 }, &srcColorKey); + blt(dstResource, dstSubResourceIndex, dstResource.getRect(dstSubResourceIndex), + srcResource, srcSubResourceIndex, srcResource.getRect(srcSubResourceIndex), + m_psColorKeyBlend.get(), D3DTEXF_POINT); + } + std::unique_ptr ShaderBlitter::createPixelShader(const BYTE* code, UINT size) { D3DDDIARG_CREATEPIXELSHADER data = {}; diff --git a/DDrawCompat/D3dDdi/ShaderBlitter.h b/DDrawCompat/D3dDdi/ShaderBlitter.h index 00c62b2..77925a6 100644 --- a/DDrawCompat/D3dDdi/ShaderBlitter.h +++ b/DDrawCompat/D3dDdi/ShaderBlitter.h @@ -24,6 +24,8 @@ namespace D3dDdi ShaderBlitter& operator=(const ShaderBlitter&) = delete; ShaderBlitter& operator=(ShaderBlitter&&) = delete; + void colorKeyBlt(const Resource& dstResource, UINT dstSubResourceIndex, + const Resource& srcResource, UINT srcSubResourceIndex, DeviceState::ShaderConstF srcColorKey); void cursorBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, HCURSOR cursor, POINT pt); void depthBlt(const Resource& dstResource, const RECT& dstRect, @@ -75,6 +77,7 @@ namespace D3dDdi Device& m_device; std::unique_ptr m_psColorKey; + std::unique_ptr m_psColorKeyBlend; std::unique_ptr m_psDepthBlt; std::unique_ptr m_psDrawCursor; std::unique_ptr m_psGamma; diff --git a/DDrawCompat/D3dDdi/SurfaceRepository.cpp b/DDrawCompat/D3dDdi/SurfaceRepository.cpp index 071fa87..6141fbd 100644 --- a/DDrawCompat/D3dDdi/SurfaceRepository.cpp +++ b/DDrawCompat/D3dDdi/SurfaceRepository.cpp @@ -30,7 +30,7 @@ namespace D3dDdi } CompatPtr SurfaceRepository::createSurface( - DWORD width, DWORD height, D3DDDIFORMAT format, DWORD caps, UINT surfaceCount) + DWORD width, DWORD height, D3DDDIFORMAT format, DWORD caps, DWORD caps2, UINT surfaceCount) { if (!m_dd) { @@ -53,7 +53,21 @@ namespace D3dDdi desc.dwHeight = height; desc.ddpfPixelFormat = getPixelFormat(format); desc.ddsCaps.dwCaps = caps; - if (surfaceCount > 1) + desc.ddsCaps.dwCaps2 = caps2; + + if (caps2 & DDSCAPS2_CUBEMAP) + { + desc.ddsCaps.dwCaps |= DDSCAPS_COMPLEX; + surfaceCount /= 6; + } + + if (caps & DDSCAPS_MIPMAP) + { + desc.dwFlags |= DDSD_MIPMAPCOUNT; + desc.ddsCaps.dwCaps |= DDSCAPS_COMPLEX; + desc.dwMipMapCount = surfaceCount; + } + else if (surfaceCount > 1) { desc.dwFlags |= DDSD_BACKBUFFERCOUNT; desc.ddsCaps.dwCaps |= DDSCAPS_COMPLEX | DDSCAPS_FLIP; @@ -221,7 +235,7 @@ namespace D3dDdi } SurfaceRepository::Surface& SurfaceRepository::getSurface(Surface& surface, DWORD width, DWORD height, - D3DDDIFORMAT format, DWORD caps, UINT surfaceCount) + D3DDDIFORMAT format, DWORD caps, UINT surfaceCount, DWORD caps2) { if (!g_enableSurfaceCheck) { @@ -235,7 +249,7 @@ namespace D3dDdi if (!surface.surface) { - surface.surface = createSurface(width, height, format, caps, surfaceCount); + surface.surface = createSurface(width, height, format, caps, caps2, surfaceCount); if (surface.surface) { surface.resource = D3dDdi::Device::findResource( diff --git a/DDrawCompat/D3dDdi/SurfaceRepository.h b/DDrawCompat/D3dDdi/SurfaceRepository.h index c874786..b0b651c 100644 --- a/DDrawCompat/D3dDdi/SurfaceRepository.h +++ b/DDrawCompat/D3dDdi/SurfaceRepository.h @@ -42,7 +42,7 @@ namespace D3dDdi Resource* getPaletteTexture(); Resource* getGammaRampTexture(); Surface& getSurface(Surface& surface, DWORD width, DWORD height, - D3DDDIFORMAT format, DWORD caps, UINT surfaceCount = 1); + D3DDDIFORMAT format, DWORD caps, UINT surfaceCount = 1, DWORD caps2 = 0); const Surface& getTempRenderTarget(DWORD width, DWORD height, UINT index = 0); Surface& getTempSysMemSurface(DWORD width, DWORD height); Surface& getTempSurface(Surface& surface, DWORD width, DWORD height, @@ -57,7 +57,7 @@ namespace D3dDdi private: CompatPtr createSurface(DWORD width, DWORD height, - D3DDDIFORMAT format, DWORD caps, UINT surfaceCount); + D3DDDIFORMAT format, DWORD caps, DWORD caps2, UINT surfaceCount); bool getCursorImage(Surface& surface, HCURSOR cursor, DWORD width, DWORD height, UINT flags); Resource* getInitializedResource(Surface& surface, DWORD width, DWORD height, D3DDDIFORMAT format, DWORD caps, std::function initFunc); diff --git a/DDrawCompat/DDrawCompat.vcxproj b/DDrawCompat/DDrawCompat.vcxproj index 1c176b6..fe9ec88 100644 --- a/DDrawCompat/DDrawCompat.vcxproj +++ b/DDrawCompat/DDrawCompat.vcxproj @@ -165,6 +165,7 @@ + @@ -333,6 +334,7 @@ + @@ -457,12 +459,9 @@ - - Pixel - - - Pixel - + + + diff --git a/DDrawCompat/DDrawCompat.vcxproj.filters b/DDrawCompat/DDrawCompat.vcxproj.filters index f20c25e..46b82da 100644 --- a/DDrawCompat/DDrawCompat.vcxproj.filters +++ b/DDrawCompat/DDrawCompat.vcxproj.filters @@ -663,6 +663,9 @@ Header Files\D3dDdi + + Header Files\Config\Settings + @@ -1049,7 +1052,10 @@ Source Files\D3dDdi - + + Source Files\Config\Settings + + Resource Files @@ -1088,10 +1094,13 @@ Shaders + + Shaders + Resource Files - \ No newline at end of file + diff --git a/DDrawCompat/Overlay/ConfigWindow.cpp b/DDrawCompat/Overlay/ConfigWindow.cpp index d6bf5b7..30b7364 100644 --- a/DDrawCompat/Overlay/ConfigWindow.cpp +++ b/DDrawCompat/Overlay/ConfigWindow.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -31,7 +32,7 @@ namespace namespace Overlay { ConfigWindow::ConfigWindow() - : Window(nullptr, { 0, 0, SettingControl::TOTAL_WIDTH, 455 }, WS_BORDER, Config::configHotKey.get()) + : Window(nullptr, { 0, 0, SettingControl::TOTAL_WIDTH, 480 }, WS_BORDER, Config::configHotKey.get()) , m_buttonCount(0) , m_focus(nullptr) { @@ -44,6 +45,7 @@ namespace Overlay addControl(Config::alternatePixelCenter); addControl(Config::antialiasing); addControl(Config::bltFilter); + addControl(Config::colorKeyMethod); addControl(Config::depthFormat); addControl(Config::displayFilter); addControl(Config::fontAntialiasing); diff --git a/DDrawCompat/Overlay/SettingControl.cpp b/DDrawCompat/Overlay/SettingControl.cpp index ccac3e8..9975aff 100644 --- a/DDrawCompat/Overlay/SettingControl.cpp +++ b/DDrawCompat/Overlay/SettingControl.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -83,6 +84,7 @@ namespace Overlay } if (&Config::antialiasing == &m_setting || + &Config::colorKeyMethod == &m_setting || &Config::depthFormat == &m_setting || &Config::renderColorDepth == &m_setting || &Config::resolutionScale == &m_setting || diff --git a/DDrawCompat/Shaders/ColorKeyBlend.hlsl b/DDrawCompat/Shaders/ColorKeyBlend.hlsl new file mode 100644 index 0000000..d12717c --- /dev/null +++ b/DDrawCompat/Shaders/ColorKeyBlend.hlsl @@ -0,0 +1,13 @@ +sampler2D s_texture : register(s0); +float4 g_colorKey : register(c31); + +float4 main(float2 texCoord : TEXCOORD0) : COLOR0 +{ + float4 color = tex2D(s_texture, texCoord); + float4 diff = abs(color - g_colorKey); + if (all(diff.rgb < 0.5f / 255)) + { + color.a = 0; + } + return color; +}