From e740adfcb76a307bd53ff0e67395928805f6660a Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Sat, 30 Dec 2017 13:18:31 +0100 Subject: [PATCH] [dxbc] Implemented f16 pack/unpack instructions --- src/dxbc/dxbc_compiler.cpp | 101 ++++++++++++++++++++++++++++++++----- src/dxbc/dxbc_compiler.h | 3 ++ src/dxbc/dxbc_defs.cpp | 10 +++- src/dxbc/dxbc_defs.h | 1 + src/spirv/spirv_module.cpp | 30 +++++++++++ src/spirv/spirv_module.h | 8 +++ 6 files changed, 138 insertions(+), 15 deletions(-) diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index 04baa109..f29adfc8 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -60,12 +60,6 @@ namespace dxvk { case DxbcInstClass::CustomData: return this->emitCustomData(ins); - case DxbcInstClass::ControlFlow: - return this->emitControlFlow(ins); - - case DxbcInstClass::GeometryEmit: - return this->emitGeometryEmit(ins); - case DxbcInstClass::Atomic: return this->emitAtomic(ins); @@ -87,6 +81,15 @@ namespace dxvk { case DxbcInstClass::BufferStore: return this->emitBufferStore(ins); + case DxbcInstClass::ConvertFloat16: + return this->emitConvertFloat16(ins); + + case DxbcInstClass::ControlFlow: + return this->emitControlFlow(ins); + + case DxbcInstClass::GeometryEmit: + return this->emitGeometryEmit(ins); + case DxbcInstClass::TextureQuery: return this->emitTextureQuery(ins); @@ -1368,7 +1371,7 @@ namespace dxvk { if (ins.dst[0].type != DxbcOperandType::Null && ins.dst[1].type != DxbcOperandType::Null && ins.dst[0].mask != ins.dst[1].mask) { - Logger::warn("DxbcCompiler: Umul with different destination masks not supported"); + Logger::warn("DxbcCompiler: Idiv with different destination masks not supported"); return; } @@ -1558,7 +1561,12 @@ namespace dxvk { void DxbcCompiler::emitAtomic(const DxbcShaderInstruction& ins) { - Logger::err("DxbcCompiler: emitAtomic not implemented"); + switch (ins.op) { + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + } } @@ -1753,6 +1761,75 @@ namespace dxvk { } + void DxbcCompiler::emitConvertFloat16(const DxbcShaderInstruction& ins) { + // f32tof16 takes two operands: + // (dst0) Destination register as a uint32 vector + // (src0) Source register as a float32 vector + // f16tof32 takes two operands: + // (dst0) Destination register as a float32 vector + // (src0) Source register as a uint32 vector + const DxbcRegisterValue src = emitRegisterLoad(ins.src[0], ins.dst[0].mask); + + // We handle both packing and unpacking here + const bool isPack = ins.op == DxbcOpcode::F32toF16; + + // The conversion instructions do not map very well to the + // SPIR-V pack instructions, which operate on 2D vectors. + std::array scalarIds = {{ 0, 0, 0, 0 }}; + std::array swizzleIds = {{ 0, 0, 0, 0 }}; + + uint32_t componentIndex = 0; + + // These types are used in both pack and unpack operations + const uint32_t t_u32 = getVectorTypeId({ DxbcScalarType::Uint32, 1 }); + const uint32_t t_f32 = getVectorTypeId({ DxbcScalarType::Float32, 1 }); + const uint32_t t_f32v2 = getVectorTypeId({ DxbcScalarType::Float32, 2 }); + + // Constant zero-bit pattern, used for packing + const uint32_t zerof32 = isPack ? m_module.constf32(0.0f) : 0; + + for (uint32_t i = 0; i < 4; i++) { + if (ins.dst[0].mask[i]) { + const uint32_t swizzleIndex = ins.src[0].swizzle[i]; + + // When extracting components from the source register, we must + // take into account that it it already swizzled and masked. + if (scalarIds[swizzleIndex] == 0) { + if (isPack) { // f32tof16 + const std::array packIds = + {{ m_module.opCompositeExtract(t_f32, src.id, 1, &componentIndex), zerof32 }}; + + scalarIds[swizzleIndex] = m_module.opPackHalf2x16(t_u32, + m_module.opCompositeConstruct(t_f32v2, packIds.size(), packIds.data())); + } else { // f16tof32 + const uint32_t zeroIndex = 0; + + scalarIds[swizzleIndex] = m_module.opCompositeExtract(t_f32, + m_module.opUnpackHalf2x16(t_f32v2, + m_module.opCompositeExtract(t_u32, src.id, 1, &componentIndex)), + 1, &zeroIndex); + } + } + + // Apply write mask and source swizzle at the same time + swizzleIds[componentIndex++] = scalarIds[swizzleIndex]; + } + } + + // Store result in the destination register + DxbcRegisterValue result; + result.type.ctype = ins.dst[0].dataType; + result.type.ccount = componentIndex; + result.id = componentIndex > 1 + ? m_module.opCompositeConstruct( + getVectorTypeId(result.type), + componentIndex, swizzleIds.data()) + : swizzleIds[0]; + + emitRegisterStore(ins.dst[0], result); + } + + void DxbcCompiler::emitTextureQuery(const DxbcShaderInstruction& ins) { // resinfo has three operands: // (dst0) The destination register @@ -2833,10 +2910,9 @@ namespace dxvk { if (componentIds[swizzleIndex] == 0) { // Add the component offset to the element index - const uint32_t elementIndexAdjusted = swizzleIndex != 0 - ? m_module.opIAdd(getVectorTypeId(elementIndex.type), - elementIndex.id, m_module.consti32(swizzleIndex)) - : elementIndex.id; + const uint32_t elementIndexAdjusted = m_module.opIAdd( + getVectorTypeId(elementIndex.type), elementIndex.id, + m_module.consti32(swizzleIndex)); // Load requested component from the buffer componentIds[swizzleIndex] = [&] { @@ -2876,7 +2952,6 @@ namespace dxvk { DxbcRegisterValue result; result.type.ctype = DxbcScalarType::Uint32; result.type.ccount = writeMask.setCount(); - result.id = result.type.ccount > 1 ? m_module.opCompositeConstruct(getVectorTypeId(result.type), result.type.ccount, swizzleIds.data()) diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h index f956e053..8989eaac 100644 --- a/src/dxbc/dxbc_compiler.h +++ b/src/dxbc/dxbc_compiler.h @@ -410,6 +410,9 @@ namespace dxvk { void emitBufferStore( const DxbcShaderInstruction& ins); + void emitConvertFloat16( + const DxbcShaderInstruction& ins); + void emitTextureQuery( const DxbcShaderInstruction& ins); diff --git a/src/dxbc/dxbc_defs.cpp b/src/dxbc/dxbc_defs.cpp index f5460836..e0474039 100644 --- a/src/dxbc/dxbc_defs.cpp +++ b/src/dxbc/dxbc_defs.cpp @@ -586,9 +586,15 @@ namespace dxvk { { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, } }, /* F32toF16 */ - { }, + { 2, DxbcInstClass::ConvertFloat16, { + { DxbcOperandKind::DstReg, DxbcScalarType::Uint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, /* F16toF32 */ - { }, + { 2, DxbcInstClass::ConvertFloat16, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Uint32 }, + } }, /* UAddc */ { }, /* USubb */ diff --git a/src/dxbc/dxbc_defs.h b/src/dxbc/dxbc_defs.h index 35a91f9f..983f72ef 100644 --- a/src/dxbc/dxbc_defs.h +++ b/src/dxbc/dxbc_defs.h @@ -39,6 +39,7 @@ namespace dxvk { BufferQuery, ///< Buffer query instruction BufferLoad, ///< Structured or raw buffer load BufferStore, ///< Structured or raw buffer store + ConvertFloat16, ///< 16-bit float packing/unpacking TextureQuery, ///< Texture query instruction TextureFetch, ///< Texture fetch instruction TextureSample, ///< Texture sampling instruction diff --git a/src/spirv/spirv_module.cpp b/src/spirv/spirv_module.cpp index 6a3fc8d2..8b01a22a 100644 --- a/src/spirv/spirv_module.cpp +++ b/src/spirv/spirv_module.cpp @@ -1760,6 +1760,36 @@ namespace dxvk { } + uint32_t SpirvModule::opPackHalf2x16( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(spv::GLSLstd450PackHalf2x16); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opUnpackHalf2x16( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(spv::GLSLstd450UnpackHalf2x16); + m_code.putWord(operand); + return resultId; + } + + uint32_t SpirvModule::opSelect( uint32_t resultType, uint32_t condition, diff --git a/src/spirv/spirv_module.h b/src/spirv/spirv_module.h index 6cdae38f..835ad844 100644 --- a/src/spirv/spirv_module.h +++ b/src/spirv/spirv_module.h @@ -601,6 +601,14 @@ namespace dxvk { uint32_t resultType, uint32_t operand); + uint32_t opPackHalf2x16( + uint32_t resultType, + uint32_t operand); + + uint32_t opUnpackHalf2x16( + uint32_t resultType, + uint32_t operand); + uint32_t opSelect( uint32_t resultType, uint32_t condition,