1
0
mirror of https://github.com/narzoul/DDrawCompat synced 2024-12-30 08:55:36 +01:00

Use convolution for bilinear display filter

This commit is contained in:
narzoul 2023-04-14 12:54:01 +02:00
parent a6bd01e49b
commit 3a570414b7
10 changed files with 240 additions and 156 deletions

View File

@ -6,6 +6,8 @@
#include <Windows.h>
#include <Common/Log.h>
template <typename Elem, std::size_t size>
class VectorRepresentation
{
@ -91,6 +93,18 @@ Vector<Elem, size> binaryOperation(Operator op, const Vector<Elem, size>& lhs, c
return result;
}
template <typename Elem, std::size_t size, typename Operator>
Vector<Elem, size> binaryOperation(Operator op, Elem lhs, const Vector<Elem, size>& rhs)
{
return binaryOperation(op, Vector<Elem, size>(lhs), rhs);
}
template <typename Elem, std::size_t size, typename Operator>
Vector<Elem, size> binaryOperation(Operator op, const Vector<Elem, size>& lhs, Elem rhs)
{
return binaryOperation(op, lhs, Vector<Elem, size>(rhs));
}
template <typename Elem, std::size_t size, typename Operator>
Vector<Elem, size> unaryOperation(Operator op, const Vector<Elem, size>& vec)
{
@ -104,14 +118,26 @@ Vector<Elem, size> unaryOperation(Operator op, const Vector<Elem, size>& vec)
#define DEFINE_VECTOR_BINARY_OPERATOR(name, ...) \
template <typename Elem, std::size_t size> \
inline Vector<Elem, size> name(const Vector<Elem, size>& lhs, const Vector<Elem, size>& rhs) \
Vector<Elem, size> name(const Vector<Elem, size>& lhs, const Vector<Elem, size>& rhs) \
{ \
return binaryOperation([](Elem x, Elem y) { return __VA_ARGS__; }, lhs, rhs); \
} \
\
template <typename Elem, std::size_t size> \
Vector<Elem, size> name(Elem lhs, const Vector<Elem, size>& rhs) \
{ \
return binaryOperation([](Elem x, Elem y) { return __VA_ARGS__; }, lhs, rhs); \
} \
\
template <typename Elem, std::size_t size> \
Vector<Elem, size> name(const Vector<Elem, size>& lhs, Elem rhs) \
{ \
return binaryOperation([](Elem x, Elem y) { return __VA_ARGS__; }, lhs, rhs); \
}
#define DEFINE_VECTOR_UNARY_OPERATOR(name, ...) \
template <typename Elem, std::size_t size> \
inline Vector<Elem, size> name(const Vector<Elem, size>& vec) \
Vector<Elem, size> name(const Vector<Elem, size>& vec) \
{ \
return unaryOperation([](Elem x) { return __VA_ARGS__; }, vec); \
}
@ -137,5 +163,27 @@ DEFINE_VECTOR_STD_UNARY_OPERATOR(floor);
#undef DEFINE_VECTOR_STD_BINARY_OPERATOR
#undef DEFINE_VECTOR_STD_UNARY_OPERATOR
template <typename Elem, std::size_t size>
Elem dot(const Vector<Elem, size>& lhs, const Vector<Elem, size>& rhs)
{
Elem result = 0;
for (std::size_t i = 0; i < size; ++i)
{
result += lhs[i] * rhs[i];
}
return result;
}
template <typename Elem, std::size_t size>
std::ostream& operator<<(std::ostream& os, const Vector<Elem, size>& vec)
{
Compat::LogStruct log(os);
for (std::size_t i = 0; i < size; ++i)
{
log << vec[i];
}
return os;
}
typedef Vector<float, 2> Float2;
typedef Vector<int, 2> Int2;

View File

@ -1257,6 +1257,7 @@ namespace D3dDdi
pal[i].rgbRed = entries[i].peRed;
pal[i].rgbGreen = entries[i].peGreen;
pal[i].rgbBlue = entries[i].peBlue;
pal[i].rgbReserved = 0xFF;
}
m_device.getShaderBlitter().palettizedBlt(
*rt, rtIndex, rtRect, *srcResource, data.SrcSubResourceIndex, data.SrcRect, pal);

View File

@ -7,6 +7,7 @@
#include <D3dDdi/ShaderBlitter.h>
#include <D3dDdi/SurfaceRepository.h>
#include <DDraw/Surfaces/PrimarySurface.h>
#include <Shaders/Bilinear.h>
#include <Shaders/ColorKey.h>
#include <Shaders/ColorKeyBlend.h>
#include <Shaders/CubicConvolution2.h>
@ -15,7 +16,6 @@
#include <Shaders/DepthBlt.h>
#include <Shaders/DrawCursor.h>
#include <Shaders/Gamma.h>
#include <Shaders/GenBilinear.h>
#include <Shaders/Lanczos.h>
#include <Shaders/LockRef.h>
#include <Shaders/PaletteLookup.h>
@ -70,6 +70,7 @@ namespace D3dDdi
{
ShaderBlitter::ShaderBlitter(Device& device)
: m_device(device)
, m_psBilinear(createPixelShader(g_psBilinear))
, m_psColorKey(createPixelShader(g_psColorKey))
, m_psColorKeyBlend(createPixelShader(g_psColorKeyBlend))
, m_psCubicConvolution{
@ -80,14 +81,12 @@ namespace D3dDdi
, m_psDepthBlt(createPixelShader(g_psDepthBlt))
, m_psDrawCursor(createPixelShader(g_psDrawCursor))
, m_psGamma(createPixelShader(g_psGamma))
, m_psGenBilinear(createPixelShader(g_psGenBilinear))
, m_psLanczos(createPixelShader(g_psLanczos))
, m_psLockRef(createPixelShader(g_psLockRef))
, m_psPaletteLookup(createPixelShader(g_psPaletteLookup))
, m_psTextureSampler(createPixelShader(g_psTextureSampler))
, m_vertexShaderDecl(createVertexShaderDecl())
, m_convolutionBaseParams{}
, m_convolutionExtraParams{}
, m_convolutionParams{}
, m_vertices{}
{
for (std::size_t i = 0; i < m_vertices.size(); ++i)
@ -105,13 +104,42 @@ namespace D3dDdi
const float B = blurPercent / 100.0f;
const float C = (1 - B) / 2;
m_convolutionExtraParams[0] = { (12 - 9 * B - 6 * C) / 6, (-18 + 12 * B + 6 * C) / 6, 0, (6 - 2 * B) / 6 };
m_convolutionExtraParams[1] = { (-B - 6 * C) / 6, (6 * B + 30 * C) / 6, (-12 * B - 48 * C) / 6, (8 * B + 24 * C) / 6 };
m_convolutionParams.extra[0] = { (12 - 9 * B - 6 * C) / 6, (-18 + 12 * B + 6 * C) / 6, 0, (6 - 2 * B) / 6 };
m_convolutionParams.extra[1] = { (-B - 6 * C) / 6, (6 * B + 30 * C) / 6, (-12 * B - 48 * C) / 6, (8 * B + 24 * C) / 6 };
convolutionBlt(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect,
2, m_psCubicConvolution[0].get());
}
void ShaderBlitter::bilinearBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT blurPercent)
{
LOG_FUNC("ShaderBlitter::bilinearBlt", static_cast<HANDLE>(dstResource), dstSubResourceIndex, dstRect,
static_cast<HANDLE>(srcResource), srcSubResourceIndex, srcRect, blurPercent);
const Float2 dstSize(dstRect.right - dstRect.left, dstRect.bottom - dstRect.top);
const Float2 srcSize(srcRect.right - srcRect.left, srcRect.bottom - srcRect.top);
const Float2 scale = dstSize / srcSize;
const Float2 higherScale = max(scale, 1.0f / scale);
const Float2 blur = Config::displayFilter.getParam() / 100.0f;
const Float2 adjustedScale = 1.0f / (blur + (1.0f - blur) / higherScale);
const Float2 multiplier = -1.0f * adjustedScale;
const Float2 offset = 0.5f * adjustedScale + 0.5f;
const Float2 support = 0.5f + 0.5f / adjustedScale;
convolutionBlt(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect,
support, m_psBilinear.get(), [&](bool isHorizontal) {
if (isHorizontal)
{
m_convolutionParams.extra[0] = { multiplier.x, multiplier.y, offset.x, offset.y };
}
else
{
m_convolutionParams.extra[0] = { multiplier.y, multiplier.x, offset.y, offset.x };
}
});
}
void ShaderBlitter::blt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect,
HANDLE pixelShader, UINT filter, UINT flags, const BYTE* alpha, const Gdi::Region& srcRgn)
@ -213,64 +241,84 @@ namespace D3dDdi
void ShaderBlitter::convolution(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect,
bool isHorizontal, float kernelStep, int sampleCount, float support, HANDLE pixelShader)
bool isHorizontal, Float2 support, HANDLE pixelShader,
const std::function<void(bool)> setExtraParams)
{
LOG_FUNC("ShaderBlitter::convolution", static_cast<HANDLE>(dstResource), dstSubResourceIndex, dstRect,
static_cast<HANDLE>(srcResource), srcSubResourceIndex, srcRect,
isHorizontal, kernelStep, sampleCount, support, pixelShader);
isHorizontal, support, pixelShader, static_cast<bool>(setExtraParams));
const auto& srcDesc = srcResource.getFixedDesc().pSurfList[0];
const Float2 textureSize(srcDesc.Width, srcDesc.Height);
const Float2 textureSizeRcp = Float2(1) / textureSize;
const float textureStep = isHorizontal ? textureSizeRcp.x : textureSizeRcp.y;
const Float2 dstSize(dstRect.right - dstRect.left, dstRect.bottom - dstRect.top);
const Float2 srcSize(srcRect.right - srcRect.left, srcRect.bottom - srcRect.top);
const Float2 scale = dstSize / srcSize;
const int firstSampleOffset = -sampleCount / 2 + 1;
const float firstKernelOffset = firstSampleOffset * kernelStep;
const float firstTextureOffset = firstSampleOffset * textureStep;
const bool isDual = srcSize.x != dstSize.x && srcSize.y != dstSize.y;
const Float2 compMaskPri = isHorizontal ? Float2(1, 0) : Float2(0, 1);
const Float2 compMaskSec = isHorizontal ? Float2(0, 1) : Float2(1, 0);
const Float2 compMask = isDual ? Float2(1, 1) : compMaskPri;
m_convolutionBaseParams[0] = { textureSize.x, textureSize.y, textureSizeRcp.x, textureSizeRcp.y };
if (isHorizontal)
auto& p = m_convolutionParams;
p.textureSize = { srcDesc.Width, srcDesc.Height };
p.sampleCoordOffset = -0.5f * compMask;
p.textureCoordStep = 1.0f / p.textureSize;
p.kernelCoordStep = min(scale, 1.0f);
p.textureCoordStepPri = 2.0f * p.textureCoordStep * compMaskPri;
p.textureCoordStepSec = p.textureCoordStep * compMaskSec;
p.kernelCoordStepPri = 2.0f * p.kernelCoordStep * compMaskPri;
p.support = dot(support, compMaskPri);
p.supportRcp = 1.0f / p.support;
const Int2 sampleCountHalf = min(ceil(support / p.kernelCoordStep), 255.0f);
const Float2 firstSampleOffset = 1 - sampleCountHalf;
p.kernelCoordOffset[0] = firstSampleOffset * p.kernelCoordStep;
p.kernelCoordOffset[1] = p.kernelCoordOffset[0] + p.kernelCoordStep;
p.textureCoordOffset[0] = (firstSampleOffset * compMaskPri + 0.5f) * p.textureCoordStep;
p.textureCoordOffset[1] = p.textureCoordOffset[0] + p.textureCoordStep * compMaskPri;
if (!isHorizontal)
{
m_convolutionBaseParams[1] = { firstTextureOffset, 0, firstKernelOffset, 0 };
m_convolutionBaseParams[2] = { textureStep, 0, kernelStep, 0 };
m_convolutionBaseParams[3] = { -0.5f, 0, 0.5f * textureSizeRcp.x, 0.5f * textureSizeRcp.y };
std::swap(p.kernelCoordStepPri.x, p.kernelCoordStepPri.y);
}
else
if (setExtraParams)
{
m_convolutionBaseParams[1] = { 0, firstTextureOffset, 0, firstKernelOffset };
m_convolutionBaseParams[2] = { 0, textureStep, 0, kernelStep };
m_convolutionBaseParams[3] = { 0, -0.5f, 0.5f * textureSizeRcp.x, 0.5f * textureSizeRcp.y };
setExtraParams(isHorizontal);
}
m_convolutionBaseParams[4] = { support, 1.0f / support, 0, 0 };
const DeviceState::ShaderConstI reg = { dot(sampleCountHalf - 1, Int2(compMaskPri)) };
DeviceState::TempPixelShaderConstI tempPsConstI(m_device.getState(), { 0, 1 }, &reg);
DeviceState::TempPixelShaderConst tempPsConst(m_device.getState(),
{ 0, m_convolutionBaseParams.size() + m_convolutionExtraParams.size()}, m_convolutionBaseParams.data());
{ 0, sizeof(m_convolutionParams) / sizeof(DeviceState::ShaderConstF) },
reinterpret_cast<DeviceState::ShaderConstF*>(&m_convolutionParams));
blt(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect,
pixelShader, D3DTEXF_LINEAR | D3DTEXF_SRGB);
pixelShader, (p.support <= 1 ? D3DTEXF_LINEAR : D3DTEXF_POINT) | D3DTEXF_SRGB);
}
void ShaderBlitter::convolutionBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect,
Float2 support, HANDLE pixelShader)
Float2 support, HANDLE pixelShader, const std::function<void(bool)> setExtraParams)
{
LOG_FUNC("ShaderBlitter::convolutionBlt", static_cast<HANDLE>(dstResource), dstSubResourceIndex, dstRect,
static_cast<HANDLE>(srcResource), srcSubResourceIndex, srcRect, support, pixelShader);
static_cast<HANDLE>(srcResource), srcSubResourceIndex, srcRect, support, pixelShader,
static_cast<bool>(setExtraParams));
const Int2 dstSize(dstRect.right - dstRect.left, dstRect.bottom - dstRect.top);
const Int2 srcSize(srcRect.right - srcRect.left, srcRect.bottom - srcRect.top);
const Float2 scale = Float2(dstSize) / Float2(srcSize);
const Float2 kernelStep = min(scale, Float2(1));
const Int2 sampleCount = min(Float2(2) * ceil(support / kernelStep), Float2(255));
const Float2 kernelCoordStep = min(scale, 1.0f);
const Float2 sampleCountHalf = support / kernelCoordStep;
if (srcSize.y == dstSize.y)
if (srcSize.y == dstSize.y || sampleCountHalf.y <= 1)
{
return convolution(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect,
true, kernelStep.x, sampleCount.x, support.x, pixelShader);
true, support, pixelShader, setExtraParams);
}
else if (srcSize.x == dstSize.x)
else if (srcSize.x == dstSize.x || sampleCountHalf.x <= 1)
{
return convolution(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect,
false, kernelStep.y, sampleCount.y, support.y, pixelShader);
false, support, pixelShader, setExtraParams);
}
const bool isHorizontalFirst = dstSize.x * srcSize.y <= srcSize.x * dstSize.y;
@ -290,27 +338,10 @@ namespace D3dDdi
return;
}
const std::array<DeviceState::ShaderConstI, 2> reg = { {
{ sampleCount.x },
{ sampleCount.y }
} };
DeviceState::TempPixelShaderConstI tempPsConstI(m_device.getState(), { 0, reg.size()}, reg.data());
if (isHorizontalFirst)
{
convolution(*rt, 0, rect, srcResource, srcSubResourceIndex, srcRect,
true, kernelStep.x, sampleCount.x, support.x, pixelShader);
convolution(dstResource, dstSubResourceIndex, dstRect, *rt, 0, rect,
false, kernelStep.y, sampleCount.y, support.y, pixelShader);
}
else
{
convolution(*rt, 0, rect, srcResource, srcSubResourceIndex, srcRect,
false, kernelStep.y, sampleCount.y, support.y, pixelShader);
convolution(dstResource, dstSubResourceIndex, dstRect, *rt, 0, rect,
true, kernelStep.x, sampleCount.x, support.x, pixelShader);
}
convolution(*rt, 0, rect, srcResource, srcSubResourceIndex, srcRect,
isHorizontalFirst, support, pixelShader, setExtraParams);
convolution(dstResource, dstSubResourceIndex, dstRect, *rt, 0, rect,
!isHorizontalFirst, support, pixelShader, setExtraParams);
}
std::unique_ptr<void, ResourceDeleter> ShaderBlitter::createPixelShader(const BYTE* code, UINT size)
@ -502,7 +533,7 @@ namespace D3dDdi
break;
case Config::Settings::DisplayFilter::BILINEAR:
m_device.getShaderBlitter().genBilinearBlt(rt, rtIndex, rtRect,
m_device.getShaderBlitter().bilinearBlt(rt, rtIndex, rtRect,
srcResource, srcSubResourceIndex, srcRect, Config::displayFilter.getParam());
break;
@ -582,36 +613,6 @@ namespace D3dDdi
blt(dstResource, dstSubResourceIndex, dstRect, srcResource, 0, srcRect, m_psGamma.get(), D3DTEXF_POINT);
}
void ShaderBlitter::genBilinearBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT blurPercent)
{
LOG_FUNC("ShaderBlitter::genBilinearBlt", static_cast<HANDLE>(dstResource), dstSubResourceIndex, dstRect,
static_cast<HANDLE>(srcResource), srcSubResourceIndex, srcRect, blurPercent);
if (100 == blurPercent)
{
blt(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect,
m_psTextureSampler.get(), D3DTEXF_LINEAR | D3DTEXF_SRGB);
return;
}
const auto& srcDesc = srcResource.getFixedDesc().pSurfList[0];
float scaleX = static_cast<float>(dstRect.right - dstRect.left) / (srcRect.right - srcRect.left);
float scaleY = static_cast<float>(dstRect.bottom - dstRect.top) / (srcRect.bottom - srcRect.top);
const float blur = blurPercent / 100.0f;
scaleX = 1 / ((1 - blur) / scaleX + blur);
scaleY = 1 / ((1 - blur) / scaleY + blur);
const std::array<DeviceState::ShaderConstF, 2> registers{ {
{ static_cast<float>(srcDesc.Width), static_cast<float>(srcDesc.Height), 0.0f, 0.0f },
{ scaleX, scaleY, 0.0f, 0.0f }
} };
DeviceState::TempPixelShaderConst tempPsConst(m_device.getState(), { 0, registers.size() }, registers.data());
blt(dstResource, dstSubResourceIndex, dstRect, srcResource, srcSubResourceIndex, srcRect, m_psGenBilinear.get(),
D3DTEXF_LINEAR | D3DTEXF_SRGB);
}
void ShaderBlitter::lanczosBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT lobes)
{
@ -721,21 +722,21 @@ namespace D3dDdi
switch (lobes)
{
case 2:
m_convolutionExtraParams[0] = getSplineWeights(0, 1.0f, -9.0f / 5.0f, -1.0f / 5.0f, 1.0f);
m_convolutionExtraParams[1] = getSplineWeights(1, -1.0f / 3.0f, 4.0f / 5.0f, -7.0f / 15.0f, 0.0f);
m_convolutionParams.extra[0] = getSplineWeights(0, 1.0f, -9.0f / 5.0f, -1.0f / 5.0f, 1.0f);
m_convolutionParams.extra[1] = getSplineWeights(1, -1.0f / 3.0f, 4.0f / 5.0f, -7.0f / 15.0f, 0.0f);
break;
case 3:
m_convolutionExtraParams[0] = getSplineWeights(0, 13.0f / 11.0f, -453.0f / 209.0f, -3.0f / 209.0f, 1.0f);
m_convolutionExtraParams[1] = getSplineWeights(1, -6.0f / 11.0f, 270.0f / 209.0f, -156.0f / 209.0f, 0.0f);
m_convolutionExtraParams[2] = getSplineWeights(2, 1.0f / 11.0f, -45.0f / 209.0f, 26.0f / 209.0f, 0.0f);
m_convolutionParams.extra[0] = getSplineWeights(0, 13.0f / 11.0f, -453.0f / 209.0f, -3.0f / 209.0f, 1.0f);
m_convolutionParams.extra[1] = getSplineWeights(1, -6.0f / 11.0f, 270.0f / 209.0f, -156.0f / 209.0f, 0.0f);
m_convolutionParams.extra[2] = getSplineWeights(2, 1.0f / 11.0f, -45.0f / 209.0f, 26.0f / 209.0f, 0.0f);
break;
case 4:
m_convolutionExtraParams[0] = getSplineWeights(0, 49.0f / 41.0f, -6387.0f / 2911.0f, -3.0f / 2911.0f, 1.0f);
m_convolutionExtraParams[1] = getSplineWeights(1, -24.0f / 41.0f, 4032.0f / 2911.0f, -2328.0f / 2911.0f, 0.0f);
m_convolutionExtraParams[2] = getSplineWeights(2, 6.0f / 41.0f, -1008.0f / 2911.0f, 582.0f / 2911.0f, 0.0f);
m_convolutionExtraParams[3] = getSplineWeights(3, -1.0f / 41.0f, 168.0f / 2911.0f, -97.0f / 2911.0f, 0.0f);
m_convolutionParams.extra[0] = getSplineWeights(0, 49.0f / 41.0f, -6387.0f / 2911.0f, -3.0f / 2911.0f, 1.0f);
m_convolutionParams.extra[1] = getSplineWeights(1, -24.0f / 41.0f, 4032.0f / 2911.0f, -2328.0f / 2911.0f, 0.0f);
m_convolutionParams.extra[2] = getSplineWeights(2, 6.0f / 41.0f, -1008.0f / 2911.0f, 582.0f / 2911.0f, 0.0f);
m_convolutionParams.extra[3] = getSplineWeights(3, -1.0f / 41.0f, 168.0f / 2911.0f, -97.0f / 2911.0f, 0.0f);
break;
}

View File

@ -25,6 +25,8 @@ namespace D3dDdi
ShaderBlitter& operator=(const ShaderBlitter&) = delete;
ShaderBlitter& operator=(ShaderBlitter&&) = delete;
void bilinearBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT blurPercent);
void bicubicBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT blurPercent);
void colorKeyBlt(const Resource& dstResource, UINT dstSubResourceIndex,
@ -37,8 +39,6 @@ namespace D3dDdi
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect);
void gammaBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect);
void genBilinearBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT blurPercent);
void lanczosBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect, UINT lobes);
void lockRefBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
@ -59,6 +59,22 @@ namespace D3dDdi
private:
const UINT BLT_SRCALPHA = 1;
struct ConvolutionParams
{
Float2 textureSize;
Float2 sampleCoordOffset;
Float2 textureCoordOffset[2];
Float2 kernelCoordOffset[2];
Float2 textureCoordStep;
Float2 kernelCoordStep;
Float2 textureCoordStepPri;
Float2 textureCoordStepSec;
Float2 kernelCoordStepPri;
float support;
float supportRcp;
alignas(sizeof(DeviceState::ShaderConstF)) std::array<DeviceState::ShaderConstF, 4> extra;
};
struct Vertex
{
std::array<float, 2> xy;
@ -72,10 +88,11 @@ namespace D3dDdi
UINT filter, UINT flags = 0, const BYTE* alpha = nullptr, const Gdi::Region& srcRgn = nullptr);
void convolution(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect,
bool isHorizontal, float kernelStep, int sampleCount, float support, HANDLE pixelShader);
bool isHorizontal, Float2 support, HANDLE pixelShader,
const std::function<void(bool)> setExtraParams);
void convolutionBlt(const Resource& dstResource, UINT dstSubResourceIndex, const RECT& dstRect,
const Resource& srcResource, UINT srcSubResourceIndex, const RECT& srcRect,
Float2 support, HANDLE pixelShader);
Float2 support, HANDLE pixelShader, const std::function<void(bool)> setExtraParams = {});
template <int N>
std::unique_ptr<void, ResourceDeleter> createPixelShader(const BYTE(&code)[N])
@ -90,20 +107,19 @@ namespace D3dDdi
void setTextureCoords(UINT stage, const RECT& rect, UINT width, UINT height);
Device& m_device;
std::unique_ptr<void, ResourceDeleter> m_psBilinear;
std::unique_ptr<void, ResourceDeleter> m_psColorKey;
std::unique_ptr<void, ResourceDeleter> m_psColorKeyBlend;
std::unique_ptr<void, ResourceDeleter> m_psCubicConvolution[3];
std::unique_ptr<void, ResourceDeleter> m_psDepthBlt;
std::unique_ptr<void, ResourceDeleter> m_psDrawCursor;
std::unique_ptr<void, ResourceDeleter> m_psGamma;
std::unique_ptr<void, ResourceDeleter> m_psGenBilinear;
std::unique_ptr<void, ResourceDeleter> m_psLanczos;
std::unique_ptr<void, ResourceDeleter> m_psLockRef;
std::unique_ptr<void, ResourceDeleter> m_psPaletteLookup;
std::unique_ptr<void, ResourceDeleter> m_psTextureSampler;
std::unique_ptr<void, ResourceDeleter> m_vertexShaderDecl;
std::array<DeviceState::ShaderConstF, 5> m_convolutionBaseParams;
std::array<DeviceState::ShaderConstF, 4> m_convolutionExtraParams;
ConvolutionParams m_convolutionParams;
std::array<Vertex, 4> m_vertices;
};
}

View File

@ -242,7 +242,7 @@ namespace D3dDdi
}
++index;
}
return getTempSurface(m_renderTargets[index], width, height, D3DDDIFMT_A8R8G8B8,
return getTempSurface(m_renderTargets[index], width, height, D3DDDIFMT_X8R8G8B8,
DDSCAPS_3DDEVICE | DDSCAPS_TEXTURE | DDSCAPS_VIDEOMEMORY);
}

View File

@ -461,6 +461,7 @@
<None Include="Shaders\CubicConvolution.hlsli" />
</ItemGroup>
<ItemGroup>
<FxCompile Include="Shaders\Bilinear.hlsl" />
<FxCompile Include="Shaders\ColorKey.hlsl" />
<FxCompile Include="Shaders\ColorKeyBlend.hlsl" />
<FxCompile Include="Shaders\CubicConvolution2.hlsl" />
@ -469,7 +470,6 @@
<FxCompile Include="Shaders\DepthBlt.hlsl" />
<FxCompile Include="Shaders\DrawCursor.hlsl" />
<FxCompile Include="Shaders\Gamma.hlsl" />
<FxCompile Include="Shaders\GenBilinear.hlsl" />
<FxCompile Include="Shaders\Lanczos.hlsl" />
<FxCompile Include="Shaders\LockRef.hlsl" />
<FxCompile Include="Shaders\PaletteLookup.hlsl" />

View File

@ -1079,9 +1079,6 @@
<FxCompile Include="Shaders\DrawCursor.hlsl">
<Filter>Shaders</Filter>
</FxCompile>
<FxCompile Include="Shaders\GenBilinear.hlsl">
<Filter>Shaders</Filter>
</FxCompile>
<FxCompile Include="Shaders\TextureSampler.hlsl">
<Filter>Shaders</Filter>
</FxCompile>
@ -1115,6 +1112,9 @@
<FxCompile Include="Shaders\CubicConvolution4.hlsl">
<Filter>Shaders</Filter>
</FxCompile>
<FxCompile Include="Shaders\Bilinear.hlsl">
<Filter>Shaders</Filter>
</FxCompile>
</ItemGroup>
<ItemGroup>
<Image Include="arrow.bmp">

View File

@ -0,0 +1,7 @@
#define NONNEGATIVE
#include "Convolution.hlsli"
float4 kernel(float4 x)
{
return saturate(mad(abs(x), g_extraParams[0].xyxy, g_extraParams[0].zwzw));
}

View File

@ -1,52 +1,75 @@
sampler2D s_texture : register(s0);
int g_sampleCountX : register(i0);
int g_sampleCountY : register(i1);
int g_sampleCountHalfMinusOne : register(i0);
float4 c[32] : register(c0);
float4 g_extraParams[4] : register(c5);
float4 g_extraParams[4] : register(c6);
static const float2 g_textureSize = c[0].xy;
static const float2 g_textureSizeRcp = c[0].zw;
static const float4 g_firstCoordOffset = c[1];
static const float4 g_coordStep = c[2];
static const float2 g_sampleCoordOffset = c[3].xy;
static const float2 g_halfTexelOffset = c[3].zw;
static const float g_support = c[4].x;
static const float g_supportRcp = c[4].y;
static const float2 g_textureSize = c[0].xy;
static const float2 g_sampleCoordOffset = c[0].zw;
static const float4 g_textureCoordOffset = c[1];
static const float4 g_kernelCoordOffset = c[2];
static const float2 g_textureCoordStep = c[3].xy;
static const float2 g_kernelCoordStep = c[3].zw;
static const float2 g_textureCoordStepPri = c[4].xy;
static const float2 g_textureCoordStepSec = c[4].zw;
static const float2 g_kernelCoordStepPri = c[5].xy;
static const float g_support = c[5].z;
static const float g_supportRcp = c[5].w;
#ifdef NONNEGATIVE
float4 kernel(float4 x);
#else
float kernel(float x);
#endif
void addSamples(inout float4 colorSum, float4 textureCoord, float4 weights)
{
#ifdef NONNEGATIVE
const float weightSum = weights.x + weights.z;
colorSum += weightSum * tex2Dlod(s_texture, lerp(textureCoord.xyxy, textureCoord.zwzw, weights.z / weightSum));
#else
colorSum += weights.x * tex2Dlod(s_texture, textureCoord.xyxy);
colorSum += weights.z * tex2Dlod(s_texture, textureCoord.zwzw);
#endif
}
float4 getWeights(float4 kernelCoord)
{
#ifdef NONNEGATIVE
return kernel(kernelCoord);
#else
return float4(kernel(kernelCoord.x), kernel(kernelCoord.y), kernel(kernelCoord.z), kernel(kernelCoord.w));
#endif
}
float4 main(float2 texCoord : TEXCOORD0) : COLOR0
{
const float2 sampleCoord = texCoord * g_textureSize + g_sampleCoordOffset;
const float2 sampleCoord = mad(texCoord, g_textureSize, g_sampleCoordOffset);
const float2 sampleCoordFrac = frac(sampleCoord);
const float2 sampleCoordInt = sampleCoord - sampleCoordFrac;
const float2 centeredTexCoord = sampleCoordInt * g_textureSizeRcp + g_halfTexelOffset;
float4 coord = float4(centeredTexCoord, -sampleCoordFrac * g_coordStep.zw) + g_firstCoordOffset;
float4 coordStep = g_coordStep;
float4 textureCoord = mad(sampleCoordInt.xyxy, g_textureCoordStep.xyxy, g_textureCoordOffset);
float4 kernelCoord = mad(-sampleCoordFrac.xyxy, g_kernelCoordStep.xyxy, g_kernelCoordOffset);
kernelCoord = g_textureCoordStepPri.x > 0 ? kernelCoord : kernelCoord.yxwz;
const float4 weights = getWeights(kernelCoord);
#ifdef NONNEGATIVE
[branch] if (g_sampleCoordOffset.x == g_sampleCoordOffset.y)
{
textureCoord = lerp(textureCoord, textureCoord + g_textureCoordStepSec.xyxy, weights.w / (weights.y + weights.w));
}
#endif
float4 colorSum = 0;
addSamples(colorSum, textureCoord, weights);
if (0 != coordStep.x)
for (int i = 0; i < g_sampleCountHalfMinusOne; ++i)
{
for (int i = 0; i < g_sampleCountX; ++i)
{
coordStep.w = kernel(coord.z);
float4 color = tex2Dlod(s_texture, coord);
colorSum += coordStep.w * color;
coord += coordStep;
}
return colorSum / coord.w;
}
else
{
for (int i = 0; i < g_sampleCountY; ++i)
{
coordStep.z = kernel(coord.w);
float4 color = tex2Dlod(s_texture, coord);
colorSum += coordStep.z * color;
coord += coordStep;
}
return colorSum / coord.z;
textureCoord += g_textureCoordStepPri.xyxy;
kernelCoord += g_kernelCoordStepPri.xyxy;
addSamples(colorSum, textureCoord, getWeights(kernelCoord));
}
return colorSum / colorSum.a;
}

View File

@ -1,12 +0,0 @@
sampler2D s_texture : register(s0);
float2 g_textureRes : register(c0);
float2 g_scaleFactor : register(c1);
float4 main(float2 texCoord : TEXCOORD0) : COLOR0
{
float2 coord = texCoord * g_textureRes - 0.5f;
float2 fracPart = frac(coord);
float2 intPart = coord - fracPart;
coord = (intPart + saturate(g_scaleFactor * (fracPart - 0.5f) + 0.5f) + 0.5f) / g_textureRes;
return tex2D(s_texture, coord);
}