1
0
mirror of https://github.com/FunkyFr3sh/cnc-ddraw.git synced 2025-03-26 02:19:24 +01:00

optimize shader

This commit is contained in:
FunkyFr3sh 2023-07-06 18:49:13 +02:00
parent 993ecf92c7
commit ed642b8c7e

View File

@ -407,23 +407,23 @@ const BYTE D3D9_PALETTE_SHADER_BILINEAR[] =
mul r1.xy, r1, r1.wzyx mul r1.xy, r1, r1.wzyx
mad r1.zw, r1, c1.y, c1.x mad r1.zw, r1, c1.y, c1.x
mul r1.zw, r1, r4 mul r1.zw, r1, r4
mul r0, r0, r1.x mul r0.w, r2.z, r1.x
mul r0, r2.z, r0 mul r0.xyz, r0.w, r0
mul r3, r2.w, r3 mad r0.w, r2.w, r1.y, r0.w
mul r4, r2.w, r6 mul r3.w, r1.y, r2.w
mad r0, r4, r1.y, r0 mad r0.w, r2.w, r2.z, r0.w
mad r0, r3, r2.z, r0 mad r0.w, r1.w, r2.z, r0.w
mul r3, r2.w, r5 mad r0.w, r2.w, r1.z, r0.w
mul r4, r1.w, r7 rcp r0.w, r0.w
mad r0, r4, r2.z, r0 mad r0.xyz, r6, r3.w, r0
mad r0, r3, r1.z, r0 mul r3.w, r2.z, r2.w
mul r1.x, r2.z, r1.x mad r0.xyz, r3, r3.w, r0
mad r1.x, r2.w, r1.y, r1.x mul r5.w, r2.z, r1.w
mad r1.x, r2.w, r2.z, r1.x mul r7.w, r1.z, r2.w
mad r1.x, r1.w, r2.z, r1.x mad r0.xyz, r7, r5.w, r0
mad r1.x, r2.w, r1.z, r1.x mad r0.xyz, r5, r7.w, r0
rcp r1.x, r1.x mul r0.xyz, r0.w, r0
mul r0, r0, r1.x mov r0.w, c1.z
mov oC0, r0 mov oC0, r0
// approximately 58 instruction slots used (5 texture, 53 arithmetic) // approximately 58 instruction slots used (5 texture, 53 arithmetic)
@ -464,22 +464,21 @@ float4 catmull_rom(float2 coord)
texPos3 /= TextureSize.xy; texPos3 /= TextureSize.xy;
texPos12 /= TextureSize.xy; texPos12 /= TextureSize.xy;
float4 result = 0.0f; float wtm = w12.x * w0.y;
//result += tex2D(SurfaceTex, float2(texPos0.x, texPos0.y)) * w0.x * w0.y; float wml = w0.x * w12.y;
result += tex2D(SurfaceTex, float2(texPos12.x, texPos0.y)) * w12.x * w0.y; float wmm = w12.x * w12.y;
//result += tex2D(SurfaceTex, float2(texPos3.x, texPos0.y)) * w3.x * w0.y; float wmr = w3.x * w12.y;
float wbm = w12.x * w3.y;
result += tex2D(SurfaceTex, float2(texPos0.x, texPos12.y)) * w0.x * w12.y; float3 result = 0.0f;
result += tex2D(SurfaceTex, float2(texPos12.x, texPos12.y)) * w12.x * w12.y;
result += tex2D(SurfaceTex, float2(texPos3.x, texPos12.y)) * w3.x * w12.y;
//result += tex2D(SurfaceTex, float2(texPos0.x, texPos3.y)) * w0.x * w3.y; result += tex2D(SurfaceTex, float2(texPos12.x, texPos0.y)).rgb * wtm;
result += tex2D(SurfaceTex, float2(texPos12.x, texPos3.y)) * w12.x * w3.y; result += tex2D(SurfaceTex, float2(texPos0.x, texPos12.y)).rgb * wml;
//result += tex2D(SurfaceTex, float2(texPos3.x, texPos3.y)) * w3.x * w3.y; result += tex2D(SurfaceTex, float2(texPos12.x, texPos12.y)).rgb * wmm;
result += tex2D(SurfaceTex, float2(texPos3.x, texPos12.y)).rgb * wmr;
result += tex2D(SurfaceTex, float2(texPos12.x, texPos3.y)).rgb * wbm;
float wm = 1./((w12.x * w0.y)+(w0.x * w12.y)+(w12.x * w12.y)+(w3.x * w12.y)+(w12.x * w3.y)); return float4(result * (1./(wtm+wml+wmm+wmr+wbm)), 1.0);
return result * wm;
} }
float4 main(float2 texCoords : TEXCOORD) : COLOR float4 main(float2 texCoords : TEXCOORD) : COLOR
@ -641,58 +640,59 @@ const BYTE D3D9_CATMULL_ROM_SHADER[] =
1, 0, 12, 128, 1, 0, 1, 0, 12, 128, 1, 0,
228, 128, 4, 0, 228, 128, 228, 128, 4, 0, 228, 128,
5, 0, 0, 3, 0, 0, 5, 0, 0, 3, 0, 0,
15, 128, 0, 0, 228, 128, 8, 128, 2, 0, 170, 128,
1, 0, 0, 128, 5, 0, 1, 0, 0, 128, 5, 0,
0, 3, 0, 0, 15, 128, 0, 3, 0, 0, 7, 128,
2, 0, 170, 128, 0, 0, 0, 0, 255, 128, 0, 0,
228, 128, 5, 0, 0, 3, 228, 128, 4, 0, 0, 4,
3, 0, 15, 128, 2, 0, 0, 0, 8, 128, 2, 0,
255, 128, 3, 0, 228, 128, 255, 128, 1, 0, 85, 128,
5, 0, 0, 3, 4, 0, 0, 0, 255, 128, 5, 0,
15, 128, 2, 0, 255, 128, 0, 3, 3, 0, 8, 128,
6, 0, 228, 128, 4, 0, 1, 0, 85, 128, 2, 0,
0, 4, 0, 0, 15, 128, 255, 128, 4, 0, 0, 4,
4, 0, 228, 128, 1, 0, 0, 0, 8, 128, 2, 0,
85, 128, 0, 0, 228, 128,
4, 0, 0, 4, 0, 0,
15, 128, 3, 0, 228, 128,
2, 0, 170, 128, 0, 0,
228, 128, 5, 0, 0, 3,
3, 0, 15, 128, 2, 0,
255, 128, 5, 0, 228, 128,
5, 0, 0, 3, 4, 0,
15, 128, 1, 0, 255, 128,
7, 0, 228, 128, 4, 0,
0, 4, 0, 0, 15, 128,
4, 0, 228, 128, 2, 0,
170, 128, 0, 0, 228, 128,
4, 0, 0, 4, 0, 0,
15, 128, 3, 0, 228, 128,
1, 0, 170, 128, 0, 0,
228, 128, 5, 0, 0, 3,
1, 0, 1, 128, 2, 0,
170, 128, 1, 0, 0, 128,
4, 0, 0, 4, 1, 0,
1, 128, 2, 0, 255, 128,
1, 0, 85, 128, 1, 0,
0, 128, 4, 0, 0, 4,
1, 0, 1, 128, 2, 0,
255, 128, 2, 0, 170, 128, 255, 128, 2, 0, 170, 128,
1, 0, 0, 128, 4, 0, 0, 0, 255, 128, 4, 0,
0, 4, 1, 0, 1, 128, 0, 4, 0, 0, 8, 128,
1, 0, 255, 128, 2, 0, 1, 0, 255, 128, 2, 0,
170, 128, 1, 0, 0, 128, 170, 128, 0, 0, 255, 128,
4, 0, 0, 4, 1, 0, 4, 0, 0, 4, 0, 0,
1, 128, 2, 0, 255, 128, 8, 128, 2, 0, 255, 128,
1, 0, 170, 128, 1, 0, 1, 0, 170, 128, 0, 0,
0, 128, 6, 0, 0, 2, 255, 128, 6, 0, 0, 2,
1, 0, 1, 128, 1, 0, 0, 0, 8, 128, 0, 0,
0, 128, 5, 0, 0, 3, 255, 128, 4, 0, 0, 4,
0, 0, 15, 128, 0, 0, 0, 0, 7, 128, 6, 0,
228, 128, 1, 0, 0, 128, 228, 128, 3, 0, 255, 128,
1, 0, 0, 2, 0, 8, 0, 0, 228, 128, 5, 0,
15, 128, 0, 0, 228, 128, 0, 3, 3, 0, 8, 128,
255, 255, 0, 0 2, 0, 170, 128, 2, 0,
255, 128, 4, 0, 0, 4,
0, 0, 7, 128, 3, 0,
228, 128, 3, 0, 255, 128,
0, 0, 228, 128, 5, 0,
0, 3, 5, 0, 8, 128,
2, 0, 170, 128, 1, 0,
255, 128, 5, 0, 0, 3,
7, 0, 8, 128, 1, 0,
170, 128, 2, 0, 255, 128,
4, 0, 0, 4, 0, 0,
7, 128, 7, 0, 228, 128,
5, 0, 255, 128, 0, 0,
228, 128, 4, 0, 0, 4,
0, 0, 7, 128, 5, 0,
228, 128, 7, 0, 255, 128,
0, 0, 228, 128, 5, 0,
0, 3, 0, 0, 7, 128,
0, 0, 255, 128, 0, 0,
228, 128, 1, 0, 0, 2,
0, 0, 8, 128, 1, 0,
170, 160, 1, 0, 0, 2,
0, 8, 15, 128, 0, 0,
228, 128, 255, 255, 0, 0
}; };
#endif #endif