#version 450 #extension GL_GOOGLE_include_directive : enable #include "d3d9_convert_common.h" layout( local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout(binding = 0) writeonly uniform image2D dst; layout(binding = 1) uniform usamplerBuffer src; layout(push_constant) uniform u_info_t { uvec2 extent; } u_info; vec2 fetchUnorm2x8(usamplerBuffer source, uint offset) { return unpackUnorm2x8(texelFetch(src, int(offset)).r); } // Format is: // YYYYYYYYYYYYYYY... // YYYYYYYYYYYYYYY... // UVUVUVUVUVUVUVU... void main() { ivec3 thread_id = ivec3(gl_GlobalInvocationID); if (all(lessThan(thread_id.xy, u_info.extent))) { uvec2 pitch = uvec2(u_info.extent.x, u_info.extent.y); uint offset = thread_id.x + thread_id.y * pitch.x; // Fetch 2 luminance samples. vec2 y = fetchUnorm2x8(src, offset) - (16 / 255.0); // Go into the second plane to get the chroma data. // UV data is subsampled as [2, 2] // So we need to divide thread_id.y by 2. // thread_id.x is already accounted for as we read uint16 offset = thread_id.x + thread_id.y / 2 * pitch.x + pitch.x * pitch.y; vec2 uv = fetchUnorm2x8(src, offset) - (128 / 255.0); // The NV12 format seems to use the BT.709 color space. vec4 color0 = convertBT_709(vec3(y.x, uv.x, uv.y)); vec4 color1 = convertBT_709(vec3(y.y, uv.x, uv.y)); // We write as a macropixel of [2, 1] // So write out 2 pixels in this run. ivec2 writePos = thread_id.xy * ivec2(2, 1); imageStore(dst, ivec2(writePos.x, writePos.y), color0); imageStore(dst, ivec2(writePos.x + 1, writePos.y), color1); } }