1
0
mirror of https://github.com/FunkyFr3sh/cnc-ddraw.git synced 2025-03-25 10:07:47 +01:00

optimize shader

This commit is contained in:
FunkyFr3sh 2023-07-06 18:49:13 +02:00
parent 993ecf92c7
commit ed642b8c7e

View File

@ -407,23 +407,23 @@ const BYTE D3D9_PALETTE_SHADER_BILINEAR[] =
mul r1.xy, r1, r1.wzyx
mad r1.zw, r1, c1.y, c1.x
mul r1.zw, r1, r4
mul r0, r0, r1.x
mul r0, r2.z, r0
mul r3, r2.w, r3
mul r4, r2.w, r6
mad r0, r4, r1.y, r0
mad r0, r3, r2.z, r0
mul r3, r2.w, r5
mul r4, r1.w, r7
mad r0, r4, r2.z, r0
mad r0, r3, r1.z, r0
mul r1.x, r2.z, r1.x
mad r1.x, r2.w, r1.y, r1.x
mad r1.x, r2.w, r2.z, r1.x
mad r1.x, r1.w, r2.z, r1.x
mad r1.x, r2.w, r1.z, r1.x
rcp r1.x, r1.x
mul r0, r0, r1.x
mul r0.w, r2.z, r1.x
mul r0.xyz, r0.w, r0
mad r0.w, r2.w, r1.y, r0.w
mul r3.w, r1.y, r2.w
mad r0.w, r2.w, r2.z, r0.w
mad r0.w, r1.w, r2.z, r0.w
mad r0.w, r2.w, r1.z, r0.w
rcp r0.w, r0.w
mad r0.xyz, r6, r3.w, r0
mul r3.w, r2.z, r2.w
mad r0.xyz, r3, r3.w, r0
mul r5.w, r2.z, r1.w
mul r7.w, r1.z, r2.w
mad r0.xyz, r7, r5.w, r0
mad r0.xyz, r5, r7.w, r0
mul r0.xyz, r0.w, r0
mov r0.w, c1.z
mov oC0, r0
// approximately 58 instruction slots used (5 texture, 53 arithmetic)
@ -464,22 +464,21 @@ float4 catmull_rom(float2 coord)
texPos3 /= TextureSize.xy;
texPos12 /= TextureSize.xy;
float4 result = 0.0f;
//result += tex2D(SurfaceTex, float2(texPos0.x, texPos0.y)) * w0.x * w0.y;
result += tex2D(SurfaceTex, float2(texPos12.x, texPos0.y)) * w12.x * w0.y;
//result += tex2D(SurfaceTex, float2(texPos3.x, texPos0.y)) * w3.x * w0.y;
float wtm = w12.x * w0.y;
float wml = w0.x * w12.y;
float wmm = w12.x * w12.y;
float wmr = w3.x * w12.y;
float wbm = w12.x * w3.y;
result += tex2D(SurfaceTex, float2(texPos0.x, texPos12.y)) * w0.x * w12.y;
result += tex2D(SurfaceTex, float2(texPos12.x, texPos12.y)) * w12.x * w12.y;
result += tex2D(SurfaceTex, float2(texPos3.x, texPos12.y)) * w3.x * w12.y;
float3 result = 0.0f;
//result += tex2D(SurfaceTex, float2(texPos0.x, texPos3.y)) * w0.x * w3.y;
result += tex2D(SurfaceTex, float2(texPos12.x, texPos3.y)) * w12.x * w3.y;
//result += tex2D(SurfaceTex, float2(texPos3.x, texPos3.y)) * w3.x * w3.y;
result += tex2D(SurfaceTex, float2(texPos12.x, texPos0.y)).rgb * wtm;
result += tex2D(SurfaceTex, float2(texPos0.x, texPos12.y)).rgb * wml;
result += tex2D(SurfaceTex, float2(texPos12.x, texPos12.y)).rgb * wmm;
result += tex2D(SurfaceTex, float2(texPos3.x, texPos12.y)).rgb * wmr;
result += tex2D(SurfaceTex, float2(texPos12.x, texPos3.y)).rgb * wbm;
float wm = 1./((w12.x * w0.y)+(w0.x * w12.y)+(w12.x * w12.y)+(w3.x * w12.y)+(w12.x * w3.y));
return result * wm;
return float4(result * (1./(wtm+wml+wmm+wmr+wbm)), 1.0);
}
float4 main(float2 texCoords : TEXCOORD) : COLOR
@ -641,58 +640,59 @@ const BYTE D3D9_CATMULL_ROM_SHADER[] =
1, 0, 12, 128, 1, 0,
228, 128, 4, 0, 228, 128,
5, 0, 0, 3, 0, 0,
15, 128, 0, 0, 228, 128,
8, 128, 2, 0, 170, 128,
1, 0, 0, 128, 5, 0,
0, 3, 0, 0, 15, 128,
2, 0, 170, 128, 0, 0,
228, 128, 5, 0, 0, 3,
3, 0, 15, 128, 2, 0,
255, 128, 3, 0, 228, 128,
5, 0, 0, 3, 4, 0,
15, 128, 2, 0, 255, 128,
6, 0, 228, 128, 4, 0,
0, 4, 0, 0, 15, 128,
4, 0, 228, 128, 1, 0,
85, 128, 0, 0, 228, 128,
4, 0, 0, 4, 0, 0,
15, 128, 3, 0, 228, 128,
2, 0, 170, 128, 0, 0,
228, 128, 5, 0, 0, 3,
3, 0, 15, 128, 2, 0,
255, 128, 5, 0, 228, 128,
5, 0, 0, 3, 4, 0,
15, 128, 1, 0, 255, 128,
7, 0, 228, 128, 4, 0,
0, 4, 0, 0, 15, 128,
4, 0, 228, 128, 2, 0,
170, 128, 0, 0, 228, 128,
4, 0, 0, 4, 0, 0,
15, 128, 3, 0, 228, 128,
1, 0, 170, 128, 0, 0,
228, 128, 5, 0, 0, 3,
1, 0, 1, 128, 2, 0,
170, 128, 1, 0, 0, 128,
4, 0, 0, 4, 1, 0,
1, 128, 2, 0, 255, 128,
1, 0, 85, 128, 1, 0,
0, 128, 4, 0, 0, 4,
1, 0, 1, 128, 2, 0,
0, 3, 0, 0, 7, 128,
0, 0, 255, 128, 0, 0,
228, 128, 4, 0, 0, 4,
0, 0, 8, 128, 2, 0,
255, 128, 1, 0, 85, 128,
0, 0, 255, 128, 5, 0,
0, 3, 3, 0, 8, 128,
1, 0, 85, 128, 2, 0,
255, 128, 4, 0, 0, 4,
0, 0, 8, 128, 2, 0,
255, 128, 2, 0, 170, 128,
1, 0, 0, 128, 4, 0,
0, 4, 1, 0, 1, 128,
0, 0, 255, 128, 4, 0,
0, 4, 0, 0, 8, 128,
1, 0, 255, 128, 2, 0,
170, 128, 1, 0, 0, 128,
4, 0, 0, 4, 1, 0,
1, 128, 2, 0, 255, 128,
1, 0, 170, 128, 1, 0,
0, 128, 6, 0, 0, 2,
1, 0, 1, 128, 1, 0,
0, 128, 5, 0, 0, 3,
0, 0, 15, 128, 0, 0,
228, 128, 1, 0, 0, 128,
1, 0, 0, 2, 0, 8,
15, 128, 0, 0, 228, 128,
255, 255, 0, 0
170, 128, 0, 0, 255, 128,
4, 0, 0, 4, 0, 0,
8, 128, 2, 0, 255, 128,
1, 0, 170, 128, 0, 0,
255, 128, 6, 0, 0, 2,
0, 0, 8, 128, 0, 0,
255, 128, 4, 0, 0, 4,
0, 0, 7, 128, 6, 0,
228, 128, 3, 0, 255, 128,
0, 0, 228, 128, 5, 0,
0, 3, 3, 0, 8, 128,
2, 0, 170, 128, 2, 0,
255, 128, 4, 0, 0, 4,
0, 0, 7, 128, 3, 0,
228, 128, 3, 0, 255, 128,
0, 0, 228, 128, 5, 0,
0, 3, 5, 0, 8, 128,
2, 0, 170, 128, 1, 0,
255, 128, 5, 0, 0, 3,
7, 0, 8, 128, 1, 0,
170, 128, 2, 0, 255, 128,
4, 0, 0, 4, 0, 0,
7, 128, 7, 0, 228, 128,
5, 0, 255, 128, 0, 0,
228, 128, 4, 0, 0, 4,
0, 0, 7, 128, 5, 0,
228, 128, 7, 0, 255, 128,
0, 0, 228, 128, 5, 0,
0, 3, 0, 0, 7, 128,
0, 0, 255, 128, 0, 0,
228, 128, 1, 0, 0, 2,
0, 0, 8, 128, 1, 0,
170, 160, 1, 0, 0, 2,
0, 8, 15, 128, 0, 0,
228, 128, 255, 255, 0, 0
};
#endif