diff --git a/src/d3d9/d3d9_format_helpers.cpp b/src/d3d9/d3d9_format_helpers.cpp index 047e058f..4b40ba8f 100644 --- a/src/d3d9/d3d9_format_helpers.cpp +++ b/src/d3d9/d3d9_format_helpers.cpp @@ -37,7 +37,7 @@ namespace dxvk { } case D3D9ConversionFormat_NV12: - ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, VK_FORMAT_R8_UINT, 0, { 1u, 1u }); + ConvertGenericFormat(conversionFormat, dstImage, dstSubresource, srcBuffer, VK_FORMAT_R16_UINT, 0, { 2u, 1u }); break; case D3D9ConversionFormat_L6V5U5: diff --git a/src/d3d9/shaders/d3d9_convert_common.h b/src/d3d9/shaders/d3d9_convert_common.h index 6ee55e44..48f2409e 100644 --- a/src/d3d9/shaders/d3d9_convert_common.h +++ b/src/d3d9/shaders/d3d9_convert_common.h @@ -16,6 +16,11 @@ float unpackUnorm(uint p) { return float(p) / 255.0; } +vec2 unpackUnorm2x8(uint p) { + uvec2 value = uvec2(p & 0xFF, p >> 8); + return vec2(unpackUnorm(value.x), unpackUnorm(value.y)); +} + mat3x4 g_yuv_to_rgb = { { 298 / 256, 0, 409 / 256, 0.5 }, { 298 / 256, -100 / 256, -208 / 256, 0.5 }, diff --git a/src/d3d9/shaders/d3d9_convert_nv12.comp b/src/d3d9/shaders/d3d9_convert_nv12.comp index 2cd85266..68fd6846 100644 --- a/src/d3d9/shaders/d3d9_convert_nv12.comp +++ b/src/d3d9/shaders/d3d9_convert_nv12.comp @@ -18,39 +18,46 @@ uniform u_info_t { uvec2 extent; } u_info; -float fetchUnorm(usamplerBuffer source, uint offset) { - return unpackUnorm(texelFetch(src, int(offset)).r); +vec2 fetchUnorm2x8(usamplerBuffer source, uint offset) { + return unpackUnorm2x8(texelFetch(src, int(offset)).r); } +// Format is: +// YYYYYYYYYYYYYYY... +// YYYYYYYYYYYYYYY... +// UVUVUVUVUVUVUVU... + void main() { ivec3 thread_id = ivec3(gl_GlobalInvocationID); if (all(lessThan(thread_id.xy, u_info.extent))) { uvec2 pitch = uvec2(u_info.extent.x, u_info.extent.y); - // Format is: - // YYYYYYYYYYYYYYY... - // UVUVUVUVUVUVUVU... uint offset = thread_id.x + thread_id.y * pitch.x; - float c0 = fetchUnorm(src, offset) - (16 / 255.0); + // Fetch 2 luminance samples. + vec2 y = fetchUnorm2x8(src, offset) - (16 / 255.0); - // Floor .x to the nearest 2, because - // UV data is in WORDs, and we want to get the color - // for this pixel. - // Then divide thread_id.y by 2 because the macropixel - // layout for chroma data is [2, 2]. - offset = (thread_id.x / 2) * 2 + // Go into the second plane to get the chroma data. + // UV data is subsampled as [2, 2] + // So we need to divide thread_id.y by 2. + // thread_id.x is already accounted for as we read uint16 + offset = thread_id.x + thread_id.y / 2 * pitch.x + pitch.x * pitch.y; - float u = fetchUnorm(src, offset) - (128 / 255.0); - float v = fetchUnorm(src, offset + 1) - (128 / 255.0); + vec2 uv = fetchUnorm2x8(src, offset) - (128 / 255.0); // The NV12 format seems to use the BT.703 color space. - vec4 color0 = convertBT_703(vec3(c0, u, v)); + vec4 color0 = convertBT_703(vec3(y.x, uv.x, uv.y)); + vec4 color1 = convertBT_703(vec3(y.y, uv.x, uv.y)); + + // We write as a macropixel of [2, 1] + // So write out 2 pixels in this run. + ivec2 writePos = thread_id.xy * ivec2(2, 1); - imageStore(dst, thread_id.xy, color0); + imageStore(dst, ivec2(writePos.x, writePos.y), color0); + imageStore(dst, ivec2(writePos.x + 1, writePos.y), color1); } } \ No newline at end of file