diff --git a/DDrawCompat/Common/Vector.h b/DDrawCompat/Common/Vector.h index 5d00c8b..8102091 100644 --- a/DDrawCompat/Common/Vector.h +++ b/DDrawCompat/Common/Vector.h @@ -6,6 +6,8 @@ #include +#include + template class VectorRepresentation { @@ -91,6 +93,18 @@ Vector binaryOperation(Operator op, const Vector& lhs, c return result; } +template +Vector binaryOperation(Operator op, Elem lhs, const Vector& rhs) +{ + return binaryOperation(op, Vector(lhs), rhs); +} + +template +Vector binaryOperation(Operator op, const Vector& lhs, Elem rhs) +{ + return binaryOperation(op, lhs, Vector(rhs)); +} + template Vector unaryOperation(Operator op, const Vector& vec) { @@ -104,14 +118,26 @@ Vector unaryOperation(Operator op, const Vector& vec) #define DEFINE_VECTOR_BINARY_OPERATOR(name, ...) \ template \ - inline Vector name(const Vector& lhs, const Vector& rhs) \ + Vector name(const Vector& lhs, const Vector& rhs) \ + { \ + return binaryOperation([](Elem x, Elem y) { return __VA_ARGS__; }, lhs, rhs); \ + } \ + \ + template \ + Vector name(Elem lhs, const Vector& rhs) \ + { \ + return binaryOperation([](Elem x, Elem y) { return __VA_ARGS__; }, lhs, rhs); \ + } \ + \ + template \ + Vector name(const Vector& lhs, Elem rhs) \ { \ return binaryOperation([](Elem x, Elem y) { return __VA_ARGS__; }, lhs, rhs); \ } #define DEFINE_VECTOR_UNARY_OPERATOR(name, ...) \ template \ - inline Vector name(const Vector& vec) \ + Vector name(const Vector& vec) \ { \ return unaryOperation([](Elem x) { return __VA_ARGS__; }, vec); \ } @@ -137,5 +163,27 @@ DEFINE_VECTOR_STD_UNARY_OPERATOR(floor); #undef DEFINE_VECTOR_STD_BINARY_OPERATOR #undef DEFINE_VECTOR_STD_UNARY_OPERATOR +template +Elem dot(const Vector& lhs, const Vector& rhs) +{ + Elem result = 0; + for (std::size_t i = 0; i < size; ++i) + { + result += lhs[i] * rhs[i]; + } + return result; +} + +template +std::ostream& operator<<(std::ostream& os, const Vector& vec) +{ + Compat::LogStruct log(os); + for (std::size_t i = 0; i < size; ++i) + { + log << vec[i]; + } + return os; +} + typedef Vector Float2; typedef Vector Int2; diff --git a/DDrawCompat/D3dDdi/Resource.cpp b/DDrawCompat/D3dDdi/Resource.cpp index d817600..d6b74b1 100644 --- a/DDrawCompat/D3dDdi/Resource.cpp +++ b/DDrawCompat/D3dDdi/Resource.cpp @@ -1257,6 +1257,7 @@ namespace D3dDdi pal[i].rgbRed = entries[i].peRed; pal[i].rgbGreen = entries[i].peGreen; pal[i].rgbBlue = entries[i].peBlue; + pal[i].rgbReserved = 0xFF; } m_device.getShaderBlitter().palettizedBlt( *rt, rtIndex, rtRect, *srcResource, data.SrcSubResourceIndex, data.SrcRect, pal); diff --git a/DDrawCompat/D3dDdi/ShaderBlitter.cpp b/DDrawCompat/D3dDdi/ShaderBlitter.cpp index e0c77e2..1ddf49f 100644 --- a/DDrawCompat/D3dDdi/ShaderBlitter.cpp +++ b/DDrawCompat/D3dDdi/ShaderBlitter.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -15,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -70,6 +70,7 @@ namespace D3dDdi { ShaderBlitter::ShaderBlitter(Device& device) : m_device(device) + , m_psBilinear(createPixelShader(g_psBilinear)) , m_psColorKey(createPixelShader(g_psColorKey)) , m_psColorKeyBlend(createPixelShader(g_psColorKeyBlend)) , m_psCubicConvolution{ @@ -80,14 +81,12 @@ namespace D3dDdi , m_psDepthBlt(createPixelShader(g_psDepthBlt)) , m_psDrawCursor(createPixelShader(g_psDrawCursor)) , m_psGamma(createPixelShader(g_psGamma)) - , m_psGenBilinear(createPixelShader(g_psGenBilinear)) , m_psLanczos(createPixelShader(g_psLanczos)) , m_psLockRef(createPixelShader(g_psLockRef)) , m_psPaletteLookup(createPixelShader(g_psPaletteLookup)) , m_psTextureSampler(createPixelShader(g_psTextureSampler)) , m_vertexShaderDecl(createVertexShaderDecl()) - , m_convolutionBaseParams{} - , m_convolutionExtraParams{} + , m_convolutionParams{} , m_vertices{} { for (std::size_t i = 0; i < m_vertices.size(); ++i) @@ -105,13 +104,42 @@ namespace D3dDdi const float B = blurPercent / 100.0f; const float C = (1 - B) / 2; - m_convolutionExtraParams[0] = { (12 - 9 * B - 6 * C) / 6, (-18 + 12 * B + 6 * C) / 6, 0, (6 - 2 * B) / 6 }; - m_convolutionExtraParams[1] = { (-B - 6 * C) / 6, (6 * B + 30 * C) / 6, (-12 * B - 48 * C) / 6, (8 * B + 24 * C) / 6 }; + m_convolutionParams.extra[0] = { (12 - 9 * B - 6 * C) / 6, (-18 + 12 * B + 6 * C) / 6, 0, (6 - 2 * B) / 6 }; + m_convolutionParams.extra[1] = { (-B - 6 * C) / 6, (6 * B + 30 * C) / 6, (-12 * B - 48 * C) / 6, (8 * B + 24 * C) / 6 }; convolutionBlt(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect, 2, m_psCubicConvolution[0].get()); } + void ShaderBlitter::bilinearBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, + const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT blurPercent) + { + LOG_FUNC("ShaderBlitter::bilinearBlt", static_cast(dstResource), dstSubResourceIndex, dstRect, + static_cast(srcResource), srcSubResourceIndex, srcRect, blurPercent); + + const Float2 dstSize(dstRect.right - dstRect.left, dstRect.bottom - dstRect.top); + const Float2 srcSize(srcRect.right - srcRect.left, srcRect.bottom - srcRect.top); + const Float2 scale = dstSize / srcSize; + const Float2 higherScale = max(scale, 1.0f / scale); + const Float2 blur = Config::displayFilter.getParam() / 100.0f; + const Float2 adjustedScale = 1.0f / (blur + (1.0f - blur) / higherScale); + const Float2 multiplier = -1.0f * adjustedScale; + const Float2 offset = 0.5f * adjustedScale + 0.5f; + const Float2 support = 0.5f + 0.5f / adjustedScale; + + convolutionBlt(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect, + support, m_psBilinear.get(), [&](bool isHorizontal) { + if (isHorizontal) + { + m_convolutionParams.extra[0] = { multiplier.x, multiplier.y, offset.x, offset.y }; + } + else + { + m_convolutionParams.extra[0] = { multiplier.y, multiplier.x, offset.y, offset.x }; + } + }); + } + void ShaderBlitter::blt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, HANDLE pixelShader, UINT filter, UINT flags, const BYTE* alpha, const Gdi::Region& srcRgn) @@ -213,64 +241,84 @@ namespace D3dDdi void ShaderBlitter::convolution(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, - bool isHorizontal, float kernelStep, int sampleCount, float support, HANDLE pixelShader) + bool isHorizontal, Float2 support, HANDLE pixelShader, + const std::function setExtraParams) { LOG_FUNC("ShaderBlitter::convolution", static_cast(dstResource), dstSubResourceIndex, dstRect, static_cast(srcResource), srcSubResourceIndex, srcRect, - isHorizontal, kernelStep, sampleCount, support, pixelShader); + isHorizontal, support, pixelShader, static_cast(setExtraParams)); const auto& srcDesc = srcResource.getFixedDesc().pSurfList[0]; - const Float2 textureSize(srcDesc.Width, srcDesc.Height); - const Float2 textureSizeRcp = Float2(1) / textureSize; - const float textureStep = isHorizontal ? textureSizeRcp.x : textureSizeRcp.y; + const Float2 dstSize(dstRect.right - dstRect.left, dstRect.bottom - dstRect.top); + const Float2 srcSize(srcRect.right - srcRect.left, srcRect.bottom - srcRect.top); + const Float2 scale = dstSize / srcSize; - const int firstSampleOffset = -sampleCount / 2 + 1; - const float firstKernelOffset = firstSampleOffset * kernelStep; - const float firstTextureOffset = firstSampleOffset * textureStep; + const bool isDual = srcSize.x != dstSize.x && srcSize.y != dstSize.y; + const Float2 compMaskPri = isHorizontal ? Float2(1, 0) : Float2(0, 1); + const Float2 compMaskSec = isHorizontal ? Float2(0, 1) : Float2(1, 0); + const Float2 compMask = isDual ? Float2(1, 1) : compMaskPri; - m_convolutionBaseParams[0] = { textureSize.x, textureSize.y, textureSizeRcp.x, textureSizeRcp.y }; - if (isHorizontal) + auto& p = m_convolutionParams; + p.textureSize = { srcDesc.Width, srcDesc.Height }; + p.sampleCoordOffset = -0.5f * compMask; + p.textureCoordStep = 1.0f / p.textureSize; + p.kernelCoordStep = min(scale, 1.0f); + p.textureCoordStepPri = 2.0f * p.textureCoordStep * compMaskPri; + p.textureCoordStepSec = p.textureCoordStep * compMaskSec; + p.kernelCoordStepPri = 2.0f * p.kernelCoordStep * compMaskPri; + p.support = dot(support, compMaskPri); + p.supportRcp = 1.0f / p.support; + + const Int2 sampleCountHalf = min(ceil(support / p.kernelCoordStep), 255.0f); + const Float2 firstSampleOffset = 1 - sampleCountHalf; + p.kernelCoordOffset[0] = firstSampleOffset * p.kernelCoordStep; + p.kernelCoordOffset[1] = p.kernelCoordOffset[0] + p.kernelCoordStep; + p.textureCoordOffset[0] = (firstSampleOffset * compMaskPri + 0.5f) * p.textureCoordStep; + p.textureCoordOffset[1] = p.textureCoordOffset[0] + p.textureCoordStep * compMaskPri; + + if (!isHorizontal) { - m_convolutionBaseParams[1] = { firstTextureOffset, 0, firstKernelOffset, 0 }; - m_convolutionBaseParams[2] = { textureStep, 0, kernelStep, 0 }; - m_convolutionBaseParams[3] = { -0.5f, 0, 0.5f * textureSizeRcp.x, 0.5f * textureSizeRcp.y }; + std::swap(p.kernelCoordStepPri.x, p.kernelCoordStepPri.y); } - else + + if (setExtraParams) { - m_convolutionBaseParams[1] = { 0, firstTextureOffset, 0, firstKernelOffset }; - m_convolutionBaseParams[2] = { 0, textureStep, 0, kernelStep }; - m_convolutionBaseParams[3] = { 0, -0.5f, 0.5f * textureSizeRcp.x, 0.5f * textureSizeRcp.y }; + setExtraParams(isHorizontal); } - m_convolutionBaseParams[4] = { support, 1.0f / support, 0, 0 }; + + const DeviceState::ShaderConstI reg = { dot(sampleCountHalf - 1, Int2(compMaskPri)) }; + DeviceState::TempPixelShaderConstI tempPsConstI(m_device.getState(), { 0, 1 }, ®); DeviceState::TempPixelShaderConst tempPsConst(m_device.getState(), - { 0, m_convolutionBaseParams.size() + m_convolutionExtraParams.size()}, m_convolutionBaseParams.data()); + { 0, sizeof(m_convolutionParams) / sizeof(DeviceState::ShaderConstF) }, + reinterpret_cast(&m_convolutionParams)); blt(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect, - pixelShader, D3DTEXF_LINEAR | D3DTEXF_SRGB); + pixelShader, (p.support <= 1 ? D3DTEXF_LINEAR : D3DTEXF_POINT) | D3DTEXF_SRGB); } void ShaderBlitter::convolutionBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, - Float2 support, HANDLE pixelShader) + Float2 support, HANDLE pixelShader, const std::function setExtraParams) { LOG_FUNC("ShaderBlitter::convolutionBlt", static_cast(dstResource), dstSubResourceIndex, dstRect, - static_cast(srcResource), srcSubResourceIndex, srcRect, support, pixelShader); + static_cast(srcResource), srcSubResourceIndex, srcRect, support, pixelShader, + static_cast(setExtraParams)); const Int2 dstSize(dstRect.right - dstRect.left, dstRect.bottom - dstRect.top); const Int2 srcSize(srcRect.right - srcRect.left, srcRect.bottom - srcRect.top); const Float2 scale = Float2(dstSize) / Float2(srcSize); - const Float2 kernelStep = min(scale, Float2(1)); - const Int2 sampleCount = min(Float2(2) * ceil(support / kernelStep), Float2(255)); + const Float2 kernelCoordStep = min(scale, 1.0f); + const Float2 sampleCountHalf = support / kernelCoordStep; - if (srcSize.y == dstSize.y) + if (srcSize.y == dstSize.y || sampleCountHalf.y <= 1) { return convolution(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect, - true, kernelStep.x, sampleCount.x, support.x, pixelShader); + true, support, pixelShader, setExtraParams); } - else if (srcSize.x == dstSize.x) + else if (srcSize.x == dstSize.x || sampleCountHalf.x <= 1) { return convolution(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect, - false, kernelStep.y, sampleCount.y, support.y, pixelShader); + false, support, pixelShader, setExtraParams); } const bool isHorizontalFirst = dstSize.x * srcSize.y <= srcSize.x * dstSize.y; @@ -290,27 +338,10 @@ namespace D3dDdi return; } - const std::array reg = { { - { sampleCount.x }, - { sampleCount.y } - } }; - - DeviceState::TempPixelShaderConstI tempPsConstI(m_device.getState(), { 0, reg.size()}, reg.data()); - - if (isHorizontalFirst) - { - convolution(*rt, 0, rect, srcResource, srcSubResourceIndex, srcRect, - true, kernelStep.x, sampleCount.x, support.x, pixelShader); - convolution(dstResource, dstSubResourceIndex, dstRect, *rt, 0, rect, - false, kernelStep.y, sampleCount.y, support.y, pixelShader); - } - else - { - convolution(*rt, 0, rect, srcResource, srcSubResourceIndex, srcRect, - false, kernelStep.y, sampleCount.y, support.y, pixelShader); - convolution(dstResource, dstSubResourceIndex, dstRect, *rt, 0, rect, - true, kernelStep.x, sampleCount.x, support.x, pixelShader); - } + convolution(*rt, 0, rect, srcResource, srcSubResourceIndex, srcRect, + isHorizontalFirst, support, pixelShader, setExtraParams); + convolution(dstResource, dstSubResourceIndex, dstRect, *rt, 0, rect, + !isHorizontalFirst, support, pixelShader, setExtraParams); } std::unique_ptr ShaderBlitter::createPixelShader(const BYTE* code, UINT size) @@ -502,7 +533,7 @@ namespace D3dDdi break; case Config::Settings::DisplayFilter::BILINEAR: - m_device.getShaderBlitter().genBilinearBlt(rt, rtIndex, rtRect, + m_device.getShaderBlitter().bilinearBlt(rt, rtIndex, rtRect, srcResource, srcSubResourceIndex, srcRect, Config::displayFilter.getParam()); break; @@ -582,36 +613,6 @@ namespace D3dDdi blt(dstResource, dstSubResourceIndex, dstRect, srcResource, 0, srcRect, m_psGamma.get(), D3DTEXF_POINT); } - void ShaderBlitter::genBilinearBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, - const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT blurPercent) - { - LOG_FUNC("ShaderBlitter::genBilinearBlt", static_cast(dstResource), dstSubResourceIndex, dstRect, - static_cast(srcResource), srcSubResourceIndex, srcRect, blurPercent); - if (100 == blurPercent) - { - blt(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect, - m_psTextureSampler.get(), D3DTEXF_LINEAR | D3DTEXF_SRGB); - return; - } - - const auto& srcDesc = srcResource.getFixedDesc().pSurfList[0]; - float scaleX = static_cast(dstRect.right - dstRect.left) / (srcRect.right - srcRect.left); - float scaleY = static_cast(dstRect.bottom - dstRect.top) / (srcRect.bottom - srcRect.top); - - const float blur = blurPercent / 100.0f; - scaleX = 1 / ((1 - blur) / scaleX + blur); - scaleY = 1 / ((1 - blur) / scaleY + blur); - - const std::array registers{ { - { static_cast(srcDesc.Width), static_cast(srcDesc.Height), 0.0f, 0.0f }, - { scaleX, scaleY, 0.0f, 0.0f } - } }; - - DeviceState::TempPixelShaderConst tempPsConst(m_device.getState(), { 0, registers.size() }, registers.data()); - blt(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect, m_psGenBilinear.get(), - D3DTEXF_LINEAR | D3DTEXF_SRGB); - } - void ShaderBlitter::lanczosBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT lobes) { @@ -721,21 +722,21 @@ namespace D3dDdi switch (lobes) { case 2: - m_convolutionExtraParams[0] = getSplineWeights(0, 1.0f, -9.0f / 5.0f, -1.0f / 5.0f, 1.0f); - m_convolutionExtraParams[1] = getSplineWeights(1, -1.0f / 3.0f, 4.0f / 5.0f, -7.0f / 15.0f, 0.0f); + m_convolutionParams.extra[0] = getSplineWeights(0, 1.0f, -9.0f / 5.0f, -1.0f / 5.0f, 1.0f); + m_convolutionParams.extra[1] = getSplineWeights(1, -1.0f / 3.0f, 4.0f / 5.0f, -7.0f / 15.0f, 0.0f); break; case 3: - m_convolutionExtraParams[0] = getSplineWeights(0, 13.0f / 11.0f, -453.0f / 209.0f, -3.0f / 209.0f, 1.0f); - m_convolutionExtraParams[1] = getSplineWeights(1, -6.0f / 11.0f, 270.0f / 209.0f, -156.0f / 209.0f, 0.0f); - m_convolutionExtraParams[2] = getSplineWeights(2, 1.0f / 11.0f, -45.0f / 209.0f, 26.0f / 209.0f, 0.0f); + m_convolutionParams.extra[0] = getSplineWeights(0, 13.0f / 11.0f, -453.0f / 209.0f, -3.0f / 209.0f, 1.0f); + m_convolutionParams.extra[1] = getSplineWeights(1, -6.0f / 11.0f, 270.0f / 209.0f, -156.0f / 209.0f, 0.0f); + m_convolutionParams.extra[2] = getSplineWeights(2, 1.0f / 11.0f, -45.0f / 209.0f, 26.0f / 209.0f, 0.0f); break; case 4: - m_convolutionExtraParams[0] = getSplineWeights(0, 49.0f / 41.0f, -6387.0f / 2911.0f, -3.0f / 2911.0f, 1.0f); - m_convolutionExtraParams[1] = getSplineWeights(1, -24.0f / 41.0f, 4032.0f / 2911.0f, -2328.0f / 2911.0f, 0.0f); - m_convolutionExtraParams[2] = getSplineWeights(2, 6.0f / 41.0f, -1008.0f / 2911.0f, 582.0f / 2911.0f, 0.0f); - m_convolutionExtraParams[3] = getSplineWeights(3, -1.0f / 41.0f, 168.0f / 2911.0f, -97.0f / 2911.0f, 0.0f); + m_convolutionParams.extra[0] = getSplineWeights(0, 49.0f / 41.0f, -6387.0f / 2911.0f, -3.0f / 2911.0f, 1.0f); + m_convolutionParams.extra[1] = getSplineWeights(1, -24.0f / 41.0f, 4032.0f / 2911.0f, -2328.0f / 2911.0f, 0.0f); + m_convolutionParams.extra[2] = getSplineWeights(2, 6.0f / 41.0f, -1008.0f / 2911.0f, 582.0f / 2911.0f, 0.0f); + m_convolutionParams.extra[3] = getSplineWeights(3, -1.0f / 41.0f, 168.0f / 2911.0f, -97.0f / 2911.0f, 0.0f); break; } diff --git a/DDrawCompat/D3dDdi/ShaderBlitter.h b/DDrawCompat/D3dDdi/ShaderBlitter.h index fb78a03..9248e25 100644 --- a/DDrawCompat/D3dDdi/ShaderBlitter.h +++ b/DDrawCompat/D3dDdi/ShaderBlitter.h @@ -25,6 +25,8 @@ namespace D3dDdi ShaderBlitter& operator=(const ShaderBlitter&) = delete; ShaderBlitter& operator=(ShaderBlitter&&) = delete; + void bilinearBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, + const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT blurPercent); void bicubicBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT blurPercent); void colorKeyBlt(const Resource& dstResource, UINT dstSubResourceIndex, @@ -37,8 +39,6 @@ namespace D3dDdi const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect); void gammaBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect); - void genBilinearBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, - const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT blurPercent); void lanczosBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT lobes); void lockRefBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, @@ -59,6 +59,22 @@ namespace D3dDdi private: const UINT BLT_SRCALPHA = 1; + struct ConvolutionParams + { + Float2 textureSize; + Float2 sampleCoordOffset; + Float2 textureCoordOffset[2]; + Float2 kernelCoordOffset[2]; + Float2 textureCoordStep; + Float2 kernelCoordStep; + Float2 textureCoordStepPri; + Float2 textureCoordStepSec; + Float2 kernelCoordStepPri; + float support; + float supportRcp; + alignas(sizeof(DeviceState::ShaderConstF)) std::array extra; + }; + struct Vertex { std::array xy; @@ -72,10 +88,11 @@ namespace D3dDdi UINT filter, UINT flags = 0, const BYTE* alpha = nullptr, const Gdi::Region& srcRgn = nullptr); void convolution(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, - bool isHorizontal, float kernelStep, int sampleCount, float support, HANDLE pixelShader); + bool isHorizontal, Float2 support, HANDLE pixelShader, + const std::function setExtraParams); void convolutionBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect, const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, - Float2 support, HANDLE pixelShader); + Float2 support, HANDLE pixelShader, const std::function setExtraParams = {}); template std::unique_ptr createPixelShader(const BYTE(&code)[N]) @@ -90,20 +107,19 @@ namespace D3dDdi void setTextureCoords(UINT stage, const RECT& rect, UINT width, UINT height); Device& m_device; + std::unique_ptr m_psBilinear; std::unique_ptr m_psColorKey; std::unique_ptr m_psColorKeyBlend; std::unique_ptr m_psCubicConvolution[3]; std::unique_ptr m_psDepthBlt; std::unique_ptr m_psDrawCursor; std::unique_ptr m_psGamma; - std::unique_ptr m_psGenBilinear; std::unique_ptr m_psLanczos; std::unique_ptr m_psLockRef; std::unique_ptr m_psPaletteLookup; std::unique_ptr m_psTextureSampler; std::unique_ptr m_vertexShaderDecl; - std::array m_convolutionBaseParams; - std::array m_convolutionExtraParams; + ConvolutionParams m_convolutionParams; std::array m_vertices; }; } diff --git a/DDrawCompat/D3dDdi/SurfaceRepository.cpp b/DDrawCompat/D3dDdi/SurfaceRepository.cpp index 9dafed4..5c9fb03 100644 --- a/DDrawCompat/D3dDdi/SurfaceRepository.cpp +++ b/DDrawCompat/D3dDdi/SurfaceRepository.cpp @@ -242,7 +242,7 @@ namespace D3dDdi } ++index; } - return getTempSurface(m_renderTargets[index], width, height, D3DDDIFMT_A8R8G8B8, + return getTempSurface(m_renderTargets[index], width, height, D3DDDIFMT_X8R8G8B8, DDSCAPS_3DDEVICE | DDSCAPS_TEXTURE | DDSCAPS_VIDEOMEMORY); } diff --git a/DDrawCompat/DDrawCompat.vcxproj b/DDrawCompat/DDrawCompat.vcxproj index bd9f10d..227eed3 100644 --- a/DDrawCompat/DDrawCompat.vcxproj +++ b/DDrawCompat/DDrawCompat.vcxproj @@ -461,6 +461,7 @@ + @@ -469,7 +470,6 @@ - diff --git a/DDrawCompat/DDrawCompat.vcxproj.filters b/DDrawCompat/DDrawCompat.vcxproj.filters index 1855105..9c68e41 100644 --- a/DDrawCompat/DDrawCompat.vcxproj.filters +++ b/DDrawCompat/DDrawCompat.vcxproj.filters @@ -1079,9 +1079,6 @@ Shaders - - Shaders - Shaders @@ -1115,6 +1112,9 @@ Shaders + + Shaders + diff --git a/DDrawCompat/Shaders/Bilinear.hlsl b/DDrawCompat/Shaders/Bilinear.hlsl new file mode 100644 index 0000000..631d3a1 --- /dev/null +++ b/DDrawCompat/Shaders/Bilinear.hlsl @@ -0,0 +1,7 @@ +#define NONNEGATIVE +#include "Convolution.hlsli" + +float4 kernel(float4 x) +{ + return saturate(mad(abs(x), g_extraParams[0].xyxy, g_extraParams[0].zwzw)); +} diff --git a/DDrawCompat/Shaders/Convolution.hlsli b/DDrawCompat/Shaders/Convolution.hlsli index 7c8c477..186e77f 100644 --- a/DDrawCompat/Shaders/Convolution.hlsli +++ b/DDrawCompat/Shaders/Convolution.hlsli @@ -1,52 +1,75 @@ sampler2D s_texture : register(s0); -int g_sampleCountX : register(i0); -int g_sampleCountY : register(i1); +int g_sampleCountHalfMinusOne : register(i0); float4 c[32] : register(c0); -float4 g_extraParams[4] : register(c5); +float4 g_extraParams[4] : register(c6); -static const float2 g_textureSize = c[0].xy; -static const float2 g_textureSizeRcp = c[0].zw; -static const float4 g_firstCoordOffset = c[1]; -static const float4 g_coordStep = c[2]; -static const float2 g_sampleCoordOffset = c[3].xy; -static const float2 g_halfTexelOffset = c[3].zw; -static const float g_support = c[4].x; -static const float g_supportRcp = c[4].y; +static const float2 g_textureSize = c[0].xy; +static const float2 g_sampleCoordOffset = c[0].zw; +static const float4 g_textureCoordOffset = c[1]; +static const float4 g_kernelCoordOffset = c[2]; +static const float2 g_textureCoordStep = c[3].xy; +static const float2 g_kernelCoordStep = c[3].zw; +static const float2 g_textureCoordStepPri = c[4].xy; +static const float2 g_textureCoordStepSec = c[4].zw; +static const float2 g_kernelCoordStepPri = c[5].xy; +static const float g_support = c[5].z; +static const float g_supportRcp = c[5].w; +#ifdef NONNEGATIVE +float4 kernel(float4 x); +#else float kernel(float x); +#endif + +void addSamples(inout float4 colorSum, float4 textureCoord, float4 weights) +{ +#ifdef NONNEGATIVE + const float weightSum = weights.x + weights.z; + colorSum += weightSum * tex2Dlod(s_texture, lerp(textureCoord.xyxy, textureCoord.zwzw, weights.z / weightSum)); +#else + colorSum += weights.x * tex2Dlod(s_texture, textureCoord.xyxy); + colorSum += weights.z * tex2Dlod(s_texture, textureCoord.zwzw); +#endif +} + +float4 getWeights(float4 kernelCoord) +{ +#ifdef NONNEGATIVE + return kernel(kernelCoord); +#else + return float4(kernel(kernelCoord.x), kernel(kernelCoord.y), kernel(kernelCoord.z), kernel(kernelCoord.w)); +#endif +} float4 main(float2 texCoord : TEXCOORD0) : COLOR0 { - const float2 sampleCoord = texCoord * g_textureSize + g_sampleCoordOffset; + const float2 sampleCoord = mad(texCoord, g_textureSize, g_sampleCoordOffset); const float2 sampleCoordFrac = frac(sampleCoord); const float2 sampleCoordInt = sampleCoord - sampleCoordFrac; - const float2 centeredTexCoord = sampleCoordInt * g_textureSizeRcp + g_halfTexelOffset; - float4 coord = float4(centeredTexCoord, -sampleCoordFrac * g_coordStep.zw) + g_firstCoordOffset; - float4 coordStep = g_coordStep; + float4 textureCoord = mad(sampleCoordInt.xyxy, g_textureCoordStep.xyxy, g_textureCoordOffset); + float4 kernelCoord = mad(-sampleCoordFrac.xyxy, g_kernelCoordStep.xyxy, g_kernelCoordOffset); + kernelCoord = g_textureCoordStepPri.x > 0 ? kernelCoord : kernelCoord.yxwz; + + const float4 weights = getWeights(kernelCoord); + +#ifdef NONNEGATIVE + [branch] if (g_sampleCoordOffset.x == g_sampleCoordOffset.y) + { + textureCoord = lerp(textureCoord, textureCoord + g_textureCoordStepSec.xyxy, weights.w / (weights.y + weights.w)); + } +#endif + float4 colorSum = 0; + addSamples(colorSum, textureCoord, weights); - if (0 != coordStep.x) + for (int i = 0; i < g_sampleCountHalfMinusOne; ++i) { - for (int i = 0; i < g_sampleCountX; ++i) - { - coordStep.w = kernel(coord.z); - float4 color = tex2Dlod(s_texture, coord); - colorSum += coordStep.w * color; - coord += coordStep; - } - return colorSum / coord.w; - } - else - { - for (int i = 0; i < g_sampleCountY; ++i) - { - coordStep.z = kernel(coord.w); - float4 color = tex2Dlod(s_texture, coord); - colorSum += coordStep.z * color; - coord += coordStep; - } - return colorSum / coord.z; + textureCoord += g_textureCoordStepPri.xyxy; + kernelCoord += g_kernelCoordStepPri.xyxy; + addSamples(colorSum, textureCoord, getWeights(kernelCoord)); } + + return colorSum / colorSum.a; } diff --git a/DDrawCompat/Shaders/GenBilinear.hlsl b/DDrawCompat/Shaders/GenBilinear.hlsl deleted file mode 100644 index 2b99d39..0000000 --- a/DDrawCompat/Shaders/GenBilinear.hlsl +++ /dev/null @@ -1,12 +0,0 @@ -sampler2D s_texture : register(s0); -float2 g_textureRes : register(c0); -float2 g_scaleFactor : register(c1); - -float4 main(float2 texCoord : TEXCOORD0) : COLOR0 -{ - float2 coord = texCoord * g_textureRes - 0.5f; - float2 fracPart = frac(coord); - float2 intPart = coord - fracPart; - coord = (intPart + saturate(g_scaleFactor * (fracPart - 0.5f) + 0.5f) + 0.5f) / g_textureRes; - return tex2D(s_texture, coord); -}