diff --git a/src/dxbc/dxbc_chunk_shex.h b/src/dxbc/dxbc_chunk_shex.h index 2ec577b9..53822fa1 100644 --- a/src/dxbc/dxbc_chunk_shex.h +++ b/src/dxbc/dxbc_chunk_shex.h @@ -2,6 +2,7 @@ #include "dxbc_common.h" #include "dxbc_decoder.h" +#include "dxbc_decoder_2.h" #include "dxbc_reader.h" namespace dxvk { @@ -24,6 +25,11 @@ namespace dxvk { return m_version; } + DxbcCodeSlice slice() const { + return DxbcCodeSlice(m_code.data(), + m_code.data() + m_code.size()); + } + DxbcDecoder begin() const { return DxbcDecoder(m_code.data(), m_code.size()); } diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp index b99b7c00..3582528a 100644 --- a/src/dxbc/dxbc_compiler.cpp +++ b/src/dxbc/dxbc_compiler.cpp @@ -1263,7 +1263,7 @@ namespace dxvk { // We only write to part of the destination // register, so we need to load and modify it DxbcValue tmp = this->loadPtr(ptr); - tmp = this->insertReg(tmp, srcValue, mask); + tmp = this->insertReg(tmp, srcValue, mask); m_module.opStore(ptr.pointerId, tmp.valueId); } diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h index 57870b80..c5dafd84 100644 --- a/src/dxbc/dxbc_compiler.h +++ b/src/dxbc/dxbc_compiler.h @@ -33,53 +33,6 @@ namespace dxvk { uint32_t pointerId = 0; }; - /** - * \brief Constant buffer binding - * - * Stores information required to - * access a constant buffer. - */ - struct DxbcConstantBuffer { - uint32_t varId = 0; - uint32_t size = 0; - }; - - /** - * \brief Sampler binding - * - * Stores a sampler variable that can be - * used together with a texture resource. - */ - struct DxbcSampler { - uint32_t varId = 0; - uint32_t typeId = 0; - }; - - - /** - * \brief Shader resource binding - * - * Stores a resource variable - * and associated type IDs. - */ - struct DxbcShaderResource { - uint32_t varId = 0; - uint32_t sampledTypeId = 0; - uint32_t textureTypeId = 0; - }; - - /** - * \brief System value mapping - * - * Maps a system value to a given set of - * components of an input or output register. - */ - struct DxbcSvMapping { - uint32_t regId; - DxbcRegMask regMask; - DxbcSystemValue sv; - }; - /** * \brief Compiler error code * diff --git a/src/dxbc/dxbc_compiler_2.cpp b/src/dxbc/dxbc_compiler_2.cpp new file mode 100644 index 00000000..e6dfd22f --- /dev/null +++ b/src/dxbc/dxbc_compiler_2.cpp @@ -0,0 +1,1600 @@ +#include "dxbc_compiler_2.h" + +namespace dxvk { + + constexpr uint32_t PerVertex_Position = 0; + constexpr uint32_t PerVertex_PointSize = 1; + constexpr uint32_t PerVertex_CullDist = 2; + constexpr uint32_t PerVertex_ClipDist = 3; + + DxbcCompiler2::DxbcCompiler2( + const DxbcProgramVersion& version, + const Rc& isgn, + const Rc& osgn) + : m_version (version), + m_isgn (isgn), + m_osgn (osgn) { + // Declare an entry point ID. We'll need it during the + // initialization phase where the execution mode is set. + m_entryPointId = m_module.allocateId(); + + // Set the memory model. This is the same for all shaders. + m_module.setMemoryModel( + spv::AddressingModelLogical, + spv::MemoryModelGLSL450); + + // Make sure our interface registers are clear + for (uint32_t i = 0; i < DxbcMaxInterfaceRegs; i++) { + m_ps.oTypes.at(i).ctype = DxbcScalarType::Float32; + m_ps.oTypes.at(i).ccount = 0; + + m_vRegs.at(i) = 0; + m_oRegs.at(i) = 0; + } + + // Initialize the shader module with capabilities + // etc. Each shader type has its own peculiarities. + switch (m_version.type()) { + case DxbcProgramType::VertexShader: this->emitVsInit(); break; + case DxbcProgramType::PixelShader: this->emitPsInit(); break; + default: throw DxvkError("DxbcCompiler: Unsupported program type"); + } + } + + + DxbcCompiler2::~DxbcCompiler2() { + + } + + + void DxbcCompiler2::processInstruction(const DxbcShaderInstruction& ins) { + switch (ins.op) { + case DxbcOpcode::DclGlobalFlags: + return this->emitDclGlobalFlags(ins); + + case DxbcOpcode::DclTemps: + return this->emitDclTemps(ins); + + case DxbcOpcode::DclInput: + case DxbcOpcode::DclInputSgv: + case DxbcOpcode::DclInputSiv: + case DxbcOpcode::DclInputPs: + case DxbcOpcode::DclInputPsSgv: + case DxbcOpcode::DclInputPsSiv: + case DxbcOpcode::DclOutput: + case DxbcOpcode::DclOutputSgv: + case DxbcOpcode::DclOutputSiv: + return this->emitDclInterfaceReg(ins); + + case DxbcOpcode::DclConstantBuffer: + return this->emitDclConstantBuffer(ins); + + case DxbcOpcode::DclSampler: + return this->emitDclSampler(ins); + + case DxbcOpcode::DclResource: + return this->emitDclResource(ins); + + case DxbcOpcode::Add: + case DxbcOpcode::Div: + case DxbcOpcode::Exp: + case DxbcOpcode::Log: + case DxbcOpcode::Mad: + case DxbcOpcode::Max: + case DxbcOpcode::Min: + case DxbcOpcode::Mul: + case DxbcOpcode::Mov: + case DxbcOpcode::Rsq: + case DxbcOpcode::Sqrt: + case DxbcOpcode::IAdd: + case DxbcOpcode::IMad: + case DxbcOpcode::IMax: + case DxbcOpcode::IMin: + case DxbcOpcode::INeg: + return this->emitVectorAlu(ins); + + case DxbcOpcode::Movc: + return this->emitVectorCmov(ins); + + case DxbcOpcode::Eq: + case DxbcOpcode::Ge: + case DxbcOpcode::Lt: + case DxbcOpcode::Ne: + case DxbcOpcode::IEq: + case DxbcOpcode::IGe: + case DxbcOpcode::ILt: + case DxbcOpcode::INe: + return this->emitVectorCmp(ins); + + case DxbcOpcode::Dp2: + case DxbcOpcode::Dp3: + case DxbcOpcode::Dp4: + return this->emitVectorDot(ins); + + case DxbcOpcode::IMul: + return this->emitVectorImul(ins); + + case DxbcOpcode::SinCos: + return this->emitVectorSinCos(ins); + + case DxbcOpcode::Sample: + return this->emitSample(ins); + + case DxbcOpcode::Ret: + return this->emitRet(ins); + + default: + Logger::warn( + str::format("DxbcCompiler: Unhandled opcode: ", + ins.op)); + } + } + + + Rc DxbcCompiler2::finalize() { + // Define the actual 'main' function of the shader + m_module.functionBegin( + m_module.defVoidType(), + m_entryPointId, + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr), + spv::FunctionControlMaskNone); + m_module.opLabel(m_module.allocateId()); + + // Depending on the shader type, this will prepare + // input registers, call various shader functions + // and write back the output registers. + switch (m_version.type()) { + case DxbcProgramType::VertexShader: this->emitVsFinalize(); break; + case DxbcProgramType::PixelShader: this->emitPsFinalize(); break; + default: throw DxvkError("DxbcCompiler: Unsupported program type"); + } + + // End main function + m_module.opReturn(); + m_module.functionEnd(); + + // Declare the entry point, we now have all the + // information we need, including the interfaces + m_module.addEntryPoint(m_entryPointId, + m_version.executionModel(), "main", + m_entryPointInterfaces.size(), + m_entryPointInterfaces.data()); + m_module.setDebugName(m_entryPointId, "main"); + + // Create the shader module object + return new DxvkShader( + m_version.shaderStage(), + m_resourceSlots.size(), + m_resourceSlots.data(), + m_module.compile()); + } + + + void DxbcCompiler2::emitDclGlobalFlags(const DxbcShaderInstruction& ins) { + // TODO implement properly + } + + + void DxbcCompiler2::emitDclTemps(const DxbcShaderInstruction& ins) { + // dcl_temps has one operand: + // (imm0) Number of temp registers + const uint32_t oldCount = m_rRegs.size(); + const uint32_t newCount = ins.imm[0].u32; + + if (newCount > oldCount) { + m_rRegs.resize(newCount); + + DxbcRegisterInfo info; + info.type.ctype = DxbcScalarType::Float32; + info.type.ccount = 4; + info.sclass = spv::StorageClassPrivate; + + for (uint32_t i = oldCount; i < newCount; i++) { + const uint32_t varId = this->emitNewVariable(info); + m_module.setDebugName(varId, str::format("r", i).c_str()); + m_rRegs.at(i) = varId; + } + } + } + + + void DxbcCompiler2::emitDclInterfaceReg(const DxbcShaderInstruction& ins) { + // dcl_input and dcl_output instructions + // have the following operands: + // (dst0) The register to declare + // (imm0) The system value (optional) + uint32_t regDim = 0; + uint32_t regIdx = 0; + + // In the vertex and fragment shader stage, the + // operand indices will have the following format: + // (0) Register index + // + // In other stages, the input and output registers + // may be declared as arrays of a fixed size: + // (0) Array length + // (1) Register index + if (ins.dst[0].idxDim == 2) { + regDim = ins.dst[0].idx[0].offset; + regIdx = ins.dst[0].idx[1].offset; + } else if (ins.dst[0].idxDim == 1) { + regIdx = ins.dst[0].idx[0].offset; + } else { + Logger::err(str::format( + "DxbcCompiler: ", ins.op, + ": Invalid index dimension")); + return; + } + + // This declaration may map an output register to a system + // value. If that is the case, the system value type will + // be stored in the second operand. + const bool hasSv = + ins.op == DxbcOpcode::DclInputSgv + || ins.op == DxbcOpcode::DclInputSiv + || ins.op == DxbcOpcode::DclInputPsSgv + || ins.op == DxbcOpcode::DclInputPsSiv + || ins.op == DxbcOpcode::DclOutputSgv + || ins.op == DxbcOpcode::DclOutputSiv; + + DxbcSystemValue sv = DxbcSystemValue::None; + + if (hasSv) + sv = static_cast(ins.imm[0].u32); + + // In the pixel shader, inputs are declared with an + // interpolation mode that is part of the op token. + const bool hasInterpolationMode = + ins.op == DxbcOpcode::DclInputPs + || ins.op == DxbcOpcode::DclInputPsSiv; + + DxbcInterpolationMode im = DxbcInterpolationMode::Undefined; + + if (hasInterpolationMode) + im = ins.controls.interpolation; + + // Declare the actual input/output variable + switch (ins.op) { + case DxbcOpcode::DclInput: + case DxbcOpcode::DclInputSgv: + case DxbcOpcode::DclInputSiv: + case DxbcOpcode::DclInputPs: + case DxbcOpcode::DclInputPsSgv: + case DxbcOpcode::DclInputPsSiv: + this->emitDclInput(regIdx, regDim, ins.dst[0].mask, sv, im); + break; + + case DxbcOpcode::DclOutput: + case DxbcOpcode::DclOutputSgv: + case DxbcOpcode::DclOutputSiv: + this->emitDclOutput(regIdx, regDim, ins.dst[0].mask, sv, im); + break; + + default: + Logger::err(str::format( + "DxbcCompiler: Unexpected opcode: ", + ins.op)); + } + } + + + void DxbcCompiler2::emitDclInput( + uint32_t regIdx, + uint32_t regDim, + DxbcRegMask regMask, + DxbcSystemValue sv, + DxbcInterpolationMode im) { + if (regDim != 0) { + Logger::err("DxbcCompiler: Input arrays not yet supported"); + return; + } + + // Avoid declaring the same variable multiple times. + // This may happen when multiple system values are + // mapped to different parts of the same register. + if (m_vRegs.at(regIdx) == 0) { + DxbcRegisterInfo info; + info.type.ctype = DxbcScalarType::Float32; + info.type.ccount = 4; + info.sclass = spv::StorageClassInput; + + const uint32_t varId = this->emitNewVariable(info); + + m_module.decorateLocation(varId, regIdx); + m_module.setDebugName(varId, str::format("v", regIdx).c_str()); + m_entryPointInterfaces.push_back(varId); + + m_vRegs.at(regIdx) = varId; + + // Interpolation mode, used in pixel shaders + if (im == DxbcInterpolationMode::Constant) + m_module.decorate(varId, spv::DecorationFlat); + + if (im == DxbcInterpolationMode::LinearCentroid + || im == DxbcInterpolationMode::LinearNoPerspectiveCentroid) + m_module.decorate(varId, spv::DecorationCentroid); + + if (im == DxbcInterpolationMode::LinearNoPerspective + || im == DxbcInterpolationMode::LinearNoPerspectiveCentroid + || im == DxbcInterpolationMode::LinearNoPerspectiveSample) + m_module.decorate(varId, spv::DecorationNoPerspective); + + if (im == DxbcInterpolationMode::LinearSample + || im == DxbcInterpolationMode::LinearNoPerspectiveSample) + m_module.decorate(varId, spv::DecorationSample); + } + + // Add a new system value mapping if needed + // TODO declare SV if necessary + if (sv != DxbcSystemValue::None) + m_vMappings.push_back({ regIdx, regMask, sv }); + } + + + void DxbcCompiler2::emitDclOutput( + uint32_t regIdx, + uint32_t regDim, + DxbcRegMask regMask, + DxbcSystemValue sv, + DxbcInterpolationMode im) { + if (regDim != 0) { + Logger::err("DxbcCompiler: Output arrays not yet supported"); + return; + } + + // Avoid declaring the same variable multiple times. + // This may happen when multiple system values are + // mapped to different parts of the same register. + if (m_oRegs.at(regIdx) == 0) { + DxbcRegisterInfo info; + info.type.ctype = DxbcScalarType::Float32; + info.type.ccount = 4; + info.sclass = spv::StorageClassOutput; + + const uint32_t varId = this->emitNewVariable(info); + + m_module.decorateLocation(varId, regIdx); + m_module.setDebugName(varId, str::format("o", regIdx).c_str()); + m_entryPointInterfaces.push_back(varId); + + m_oRegs.at(regIdx) = varId; + } + + + // Add a new system value mapping if needed + // TODO declare SV if necessary + if (sv != DxbcSystemValue::None) + m_oMappings.push_back({ regIdx, regMask, sv }); + } + + + void DxbcCompiler2::emitDclConstantBuffer(const DxbcShaderInstruction& ins) { + // dcl_constant_buffer has one operand with two indices: + // (0) Constant buffer register ID (cb#) + // (1) Number of constants in the buffer + const uint32_t bufferId = ins.dst[0].idx[0].offset; + const uint32_t elementCount = ins.dst[0].idx[1].offset; + + // Uniform buffer data is stored as a fixed-size array + // of 4x32-bit vectors. SPIR-V requires explicit strides. + const uint32_t arrayType = m_module.defArrayTypeUnique( + getVectorTypeId({ DxbcScalarType::Float32, 4 }), + m_module.constu32(elementCount)); + m_module.decorateArrayStride(arrayType, 16); + + // SPIR-V requires us to put that array into a + // struct and decorate that struct as a block. + const uint32_t structType = m_module.defStructTypeUnique(1, &arrayType); + m_module.memberDecorateOffset(structType, 0, 0); + m_module.decorateBlock(structType); + + // Variable that we'll use to access the buffer + const uint32_t varId = m_module.newVar( + m_module.defPointerType(structType, spv::StorageClassUniform), + spv::StorageClassUniform); + + m_module.setDebugName(varId, + str::format("cb", bufferId).c_str()); + + m_constantBuffers.at(bufferId).varId = varId; + m_constantBuffers.at(bufferId).size = elementCount; + + // Compute the DXVK binding slot index for the buffer. + // D3D11 needs to bind the actual buffers to this slot. + const uint32_t bindingId = computeResourceSlotId( + m_version.type(), DxbcBindingType::ConstantBuffer, + bufferId); + + m_module.decorateDescriptorSet(varId, 0); + m_module.decorateBinding(varId, bindingId); + + // Store descriptor info for the shader interface + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + m_resourceSlots.push_back(resource); + } + + + void DxbcCompiler2::emitDclSampler(const DxbcShaderInstruction& ins) { + // dclSampler takes one operand: + // (dst0) The sampler register to declare + // TODO implement sampler mode (default / comparison / mono) + const uint32_t samplerId = ins.dst[0].idx[0].offset; + + // The sampler type is opaque, but we still have to + // define a pointer and a variable in oder to use it + const uint32_t samplerType = m_module.defSamplerType(); + const uint32_t samplerPtrType = m_module.defPointerType( + samplerType, spv::StorageClassUniformConstant); + + // Define the sampler variable + const uint32_t varId = m_module.newVar(samplerPtrType, + spv::StorageClassUniformConstant); + m_module.setDebugName(varId, + str::format("s", samplerId).c_str()); + + m_samplers.at(samplerId).varId = varId; + m_samplers.at(samplerId).typeId = samplerType; + + // Compute binding slot index for the sampler + const uint32_t bindingId = computeResourceSlotId( + m_version.type(), DxbcBindingType::ImageSampler, samplerId); + + m_module.decorateDescriptorSet(varId, 0); + m_module.decorateBinding(varId, bindingId); + + // Store descriptor info for the shader interface + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_SAMPLER; + m_resourceSlots.push_back(resource); + } + + + void DxbcCompiler2::emitDclResource(const DxbcShaderInstruction& ins) { + // dclResource takes two operands: + // (dst0) The resource register ID + // (imm0) The resource return type + const uint32_t registerId = ins.dst[0].idx[0].offset; + + // Defines the type of the resource (texture2D, ...) + const DxbcResourceDim resourceType = ins.controls.resourceDim; + + // Defines the type of a read operation. DXBC has the ability + // to define four different types whereas SPIR-V only allows + // one, but in practice this should not be much of a problem. + auto xType = static_cast( + bit::extract(ins.imm[0].u32, 0, 3)); + auto yType = static_cast( + bit::extract(ins.imm[0].u32, 4, 7)); + auto zType = static_cast( + bit::extract(ins.imm[0].u32, 8, 11)); + auto wType = static_cast( + bit::extract(ins.imm[0].u32, 12, 15)); + + if ((xType != yType) || (xType != zType) || (xType != wType)) + Logger::warn("DxbcCompiler: dcl_resource: Ignoring resource return types"); + + // Declare the actual sampled type + uint32_t sampledTypeId = 0; + + switch (xType) { + case DxbcResourceReturnType::Float: sampledTypeId = m_module.defFloatType(32); break; + case DxbcResourceReturnType::Sint: sampledTypeId = m_module.defIntType (32, 1); break; + case DxbcResourceReturnType::Uint: sampledTypeId = m_module.defIntType (32, 0); break; + default: throw DxvkError(str::format("DxbcCompiler: Invalid sampled type: ", xType)); + } + + // Declare the resource type + uint32_t textureTypeId = 0; + + switch (resourceType) { + case DxbcResourceDim::Texture1D: + textureTypeId = m_module.defImageType( + sampledTypeId, spv::Dim1D, 0, 0, 0, 1, + spv::ImageFormatUnknown); + break; + + case DxbcResourceDim::Texture1DArr: + textureTypeId = m_module.defImageType( + sampledTypeId, spv::Dim1D, 0, 1, 0, 1, + spv::ImageFormatUnknown); + break; + + case DxbcResourceDim::Texture2D: + textureTypeId = m_module.defImageType( + sampledTypeId, spv::Dim2D, 0, 0, 0, 1, + spv::ImageFormatUnknown); + break; + + case DxbcResourceDim::Texture2DArr: + textureTypeId = m_module.defImageType( + sampledTypeId, spv::Dim2D, 0, 1, 0, 1, + spv::ImageFormatUnknown); + break; + + case DxbcResourceDim::Texture3D: + textureTypeId = m_module.defImageType( + sampledTypeId, spv::Dim3D, 0, 0, 0, 1, + spv::ImageFormatUnknown); + break; + + case DxbcResourceDim::TextureCube: + textureTypeId = m_module.defImageType( + sampledTypeId, spv::DimCube, 0, 0, 0, 1, + spv::ImageFormatUnknown); + break; + + case DxbcResourceDim::TextureCubeArr: + textureTypeId = m_module.defImageType( + sampledTypeId, spv::DimCube, 0, 1, 0, 1, + spv::ImageFormatUnknown); + break; + + default: + throw DxvkError(str::format("DxbcCompiler: Unsupported resource type: ", resourceType)); + } + + const uint32_t resourcePtrType = m_module.defPointerType( + textureTypeId, spv::StorageClassUniformConstant); + + const uint32_t varId = m_module.newVar(resourcePtrType, + spv::StorageClassUniformConstant); + + m_module.setDebugName(varId, + str::format("t", registerId).c_str()); + + m_textures.at(registerId).varId = varId; + m_textures.at(registerId).sampledTypeId = sampledTypeId; + m_textures.at(registerId).textureTypeId = textureTypeId; + + // Compute the DXVK binding slot index for the resource. + // D3D11 needs to bind the actual resource to this slot. + const uint32_t bindingId = computeResourceSlotId( + m_version.type(), DxbcBindingType::ShaderResource, registerId); + + m_module.decorateDescriptorSet(varId, 0); + m_module.decorateBinding(varId, bindingId); + + // Store descriptor info for the shader interface + DxvkResourceSlot resource; + resource.slot = bindingId; + resource.type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + m_resourceSlots.push_back(resource); + } + + + void DxbcCompiler2::emitVectorAlu(const DxbcShaderInstruction& ins) { + std::array src; + + for (uint32_t i = 0; i < ins.srcCount; i++) + src.at(i) = emitRegisterLoad(ins.src[i], ins.dst[0].mask); + + DxbcRegisterValue dst; + dst.type.ctype = ins.dst[0].dataType; + dst.type.ccount = ins.dst[0].mask.setCount(); + + const uint32_t typeId = getVectorTypeId(dst.type); + + switch (ins.op) { + case DxbcOpcode::Add: + dst.id = m_module.opFAdd(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Div: + dst.id = m_module.opFDiv(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Exp: + dst.id = m_module.opExp2( + typeId, src.at(0).id); + break; + + case DxbcOpcode::Log: + dst.id = m_module.opLog2( + typeId, src.at(0).id); + break; + + case DxbcOpcode::Mad: + dst.id = m_module.opFFma(typeId, + src.at(0).id, src.at(1).id, src.at(2).id); + break; + + case DxbcOpcode::Max: + dst.id = m_module.opFMax(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Min: + dst.id = m_module.opFMin(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Mul: + dst.id = m_module.opFMul(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Mov: + dst.id = src.at(0).id; + break; + + case DxbcOpcode::Sqrt: + dst.id = m_module.opSqrt( + typeId, src.at(0).id); + break; + + case DxbcOpcode::Rsq: + dst.id = m_module.opInverseSqrt( + typeId, src.at(0).id); + break; + + case DxbcOpcode::IAdd: + dst.id = m_module.opIAdd(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::IMad: + dst.id = m_module.opIAdd(typeId, + m_module.opIMul(typeId, + src.at(0).id, src.at(1).id), + src.at(2).id); + break; + + case DxbcOpcode::IMax: + dst.id = m_module.opSMax(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::IMin: + dst.id = m_module.opSMin(typeId, + src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::INeg: + dst.id = m_module.opSNegate( + typeId, src.at(0).id); + break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + return; + } + + // Store computed value + dst = emitDstOperandModifiers(dst, ins.modifiers); + emitRegisterStore(ins.dst[0], dst); + } + + + void DxbcCompiler2::emitVectorCmov(const DxbcShaderInstruction& ins) { + // movc has four operands: + // (dst0) The destination register + // (src0) The condition vector + // (src0) Vector to select from if the condition is not 0 + // (src0) Vector to select from if the condition is 0 + const DxbcRegisterValue condition = emitRegisterLoad(ins.src[0], ins.dst[0].mask); + const DxbcRegisterValue selectTrue = emitRegisterLoad(ins.src[1], ins.dst[0].mask); + const DxbcRegisterValue selectFalse = emitRegisterLoad(ins.src[2], ins.dst[0].mask); + + const uint32_t componentCount = ins.dst[0].mask.setCount(); + + // We'll compare against a vector of zeroes to generate a + // boolean vector, which in turn will be used by OpSelect + uint32_t zeroType = m_module.defIntType(32, 0); + uint32_t boolType = m_module.defBoolType(); + + uint32_t zero = m_module.constu32(0); + + if (componentCount > 1) { + zeroType = m_module.defVectorType(zeroType, componentCount); + boolType = m_module.defVectorType(boolType, componentCount); + + const std::array zeroVec = { zero, zero, zero, zero }; + zero = m_module.constComposite(zeroType, componentCount, zeroVec.data()); + } + + + // Use the component mask to select the vector components + DxbcRegisterValue result; + result.type.ctype = ins.dst[0].dataType; + result.type.ccount = componentCount; + result.id = m_module.opSelect( + getVectorTypeId(result.type), + m_module.opINotEqual(boolType, condition.id, zero), + selectTrue.id, selectFalse.id); + + // Apply result modifiers to floating-point results + result = emitDstOperandModifiers(result, ins.modifiers); + emitRegisterStore(ins.dst[0], result); + } + + void DxbcCompiler2::emitVectorCmp(const DxbcShaderInstruction& ins) { + // Compare instructions have three operands: + // (dst0) The destination register + // (src0) The first vector to compare + // (src1) The second vector to compare + const std::array src = { + emitRegisterLoad(ins.src[0], ins.dst[0].mask), + emitRegisterLoad(ins.src[1], ins.dst[0].mask), + }; + + const uint32_t componentCount = ins.dst[0].mask.setCount(); + + // Condition, which is a boolean vector used + // to select between the ~0u and 0u vectors. + uint32_t condition = 0; + uint32_t conditionType = m_module.defBoolType(); + + if (componentCount > 1) + conditionType = m_module.defVectorType(conditionType, componentCount); + + switch (ins.op) { + case DxbcOpcode::Eq: + condition = m_module.opFOrdEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Ge: + condition = m_module.opFOrdGreaterThanEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Lt: + condition = m_module.opFOrdLessThan( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::Ne: + condition = m_module.opFOrdNotEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::IEq: + condition = m_module.opIEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::IGe: + condition = m_module.opSGreaterThanEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::ILt: + condition = m_module.opSLessThan( + conditionType, src.at(0).id, src.at(1).id); + break; + + case DxbcOpcode::INe: + condition = m_module.opINotEqual( + conditionType, src.at(0).id, src.at(1).id); + break; + + default: + Logger::warn(str::format( + "DxbcCompiler: Unhandled instruction: ", + ins.op)); + return; + } + + // Generate constant vectors for selection + uint32_t sFalse = m_module.constu32( 0u); + uint32_t sTrue = m_module.constu32(~0u); + + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = componentCount; + + const uint32_t typeId = getVectorTypeId(result.type); + + if (componentCount > 1) { + const std::array vFalse = { sFalse, sFalse, sFalse, sFalse }; + const std::array vTrue = { sTrue, sTrue, sTrue, sTrue }; + + sFalse = m_module.constComposite(typeId, componentCount, vFalse.data()); + sTrue = m_module.constComposite(typeId, componentCount, vTrue .data()); + } + + // Perform component-wise mask selection + // based on the condition evaluated above. + result.id = m_module.opSelect( + typeId, condition, sTrue, sFalse); + + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler2::emitVectorDot(const DxbcShaderInstruction& ins) { + const DxbcRegMask srcMask(true, + ins.op >= DxbcOpcode::Dp2, + ins.op >= DxbcOpcode::Dp3, + ins.op >= DxbcOpcode::Dp4); + + const std::array src = { + emitRegisterLoad(ins.src[0], srcMask), + emitRegisterLoad(ins.src[1], srcMask), + }; + + DxbcRegisterValue dst; + dst.type.ctype = ins.dst[0].dataType; + dst.type.ccount = 1; + + dst.id = m_module.opDot( + getVectorTypeId(dst.type), + src.at(0).id, + src.at(1).id); + + dst = emitDstOperandModifiers(dst, ins.modifiers); + emitRegisterStore(ins.dst[0], dst); + } + + + void DxbcCompiler2::emitVectorImul(const DxbcShaderInstruction& ins) { + // imul and umul have four operands: + // (dst0) High destination register + // (dst1) Low destination register + // (src0) The first vector to compare + // (src1) The second vector to compare + if (ins.dst[0].type == DxbcOperandType::Null) { + if (ins.dst[1].type == DxbcOperandType::Null) + return; + + // If dst0 is NULL, this instruction behaves just + // like any other three -operand ALU instruction + const std::array src = { + emitRegisterLoad(ins.src[0], ins.dst[1].mask), + emitRegisterLoad(ins.src[1], ins.dst[1].mask), + }; + + DxbcRegisterValue result; + result.type.ctype = ins.dst[1].dataType; + result.type.ccount = ins.dst[1].mask.setCount(); + result.id = m_module.opIMul( + getVectorTypeId(result.type), + src.at(0).id, src.at(1).id); + + result = emitDstOperandModifiers(result, ins.modifiers); + emitRegisterStore(ins.dst[1], result); + } else { + // TODO implement this + Logger::warn("DxbcCompiler: Extended Imul not yet supported"); + } + } + + + void DxbcCompiler2::emitVectorSinCos(const DxbcShaderInstruction& ins) { + // sincos has three operands: + // (dst0) Destination register for sin(x) + // (dst1) Destination register for cos(x) + // (src0) Source operand x + + // Load source operand as 32-bit float vector. + const DxbcRegisterValue srcValue = emitRegisterLoad( + ins.src[0], DxbcRegMask(true, true, true, true)); + + // Either output may be DxbcOperandType::Null, in + // which case we don't have to generate any code. + if (ins.dst[0].type != DxbcOperandType::Null) { + const DxbcRegisterValue sinInput = + emitRegisterExtract(srcValue, ins.dst[0].mask); + + DxbcRegisterValue sin; + sin.type = sinInput.type; + sin.id = m_module.opSin( + getVectorTypeId(sin.type), + sinInput.id); + + emitRegisterStore(ins.dst[0], sin); + } + + if (ins.dst[1].type != DxbcOperandType::Null) { + const DxbcRegisterValue cosInput = + emitRegisterExtract(srcValue, ins.dst[1].mask); + + DxbcRegisterValue cos; + cos.type = cosInput.type; + cos.id = m_module.opSin( + getVectorTypeId(cos.type), + cosInput.id); + + emitRegisterStore(ins.dst[1], cos); + } + } + + + void DxbcCompiler2::emitSample( + const DxbcShaderInstruction& ins) { + // TODO support address offset + // TODO support more sample ops + + // sample has four operands: + // (dst0) The destination register + // (src0) Texture coordinates + // (src1) The texture itself + // (src2) The sampler object + const DxbcRegister& texCoordReg = ins.src[0]; + const DxbcRegister& textureReg = ins.src[1]; + const DxbcRegister& samplerReg = ins.src[2]; + + // Texture and sampler register IDs + const uint32_t textureId = textureReg.idx[0].offset; + const uint32_t samplerId = samplerReg.idx[0].offset; + + // Load the texture coordinates. SPIR-V allows these + // to be float4 even if not all components are used. + const DxbcRegisterValue coord = emitRegisterLoad( + texCoordReg, DxbcRegMask(true, true, true, true)); + + // Combine the texture and the sampler into a sampled image + const uint32_t sampledImageType = m_module.defSampledImageType( + m_textures.at(textureId).textureTypeId); + + const uint32_t sampledImageId = m_module.opSampledImage( + sampledImageType, + m_module.opLoad( + m_textures.at(textureId).textureTypeId, + m_textures.at(textureId).varId), + m_module.opLoad( + m_samplers.at(samplerId).typeId, + m_samplers.at(samplerId).varId)); + + // Sampling an image in SPIR-V always returns a four-component + // vector, so we need to declare the corresponding type here + // TODO infer sampled type properly + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Float32; + result.type.ccount = 4; + result.id = m_module.opImageSampleImplicitLod( + getVectorTypeId(result.type), + sampledImageId, coord.id); + + // Swizzle components using the texture swizzle + // and the destination operand's write mask + result = emitRegisterSwizzle(result, + textureReg.swizzle, ins.dst[0].mask); + + emitRegisterStore(ins.dst[0], result); + } + + + void DxbcCompiler2::emitRet(const DxbcShaderInstruction& ins) { + // TODO implement properly + m_module.opReturn(); + m_module.functionEnd(); + } + + + DxbcRegisterValue DxbcCompiler2::emitRegisterBitcast( + DxbcRegisterValue srcValue, + DxbcScalarType dstType) { + if (srcValue.type.ctype == dstType) + return srcValue; + + // TODO support 64-bit values + DxbcRegisterValue result; + result.type.ctype = dstType; + result.type.ccount = srcValue.type.ccount; + result.id = m_module.opBitcast( + getVectorTypeId(result.type), + srcValue.id); + return result; + } + + + DxbcRegisterValue DxbcCompiler2::emitRegisterSwizzle( + DxbcRegisterValue value, + DxbcRegSwizzle swizzle, + DxbcRegMask writeMask) { + std::array indices; + + uint32_t dstIndex = 0; + + for (uint32_t i = 0; i < value.type.ccount; i++) { + if (writeMask[i]) + indices[dstIndex++] = swizzle[i]; + } + + // If the swizzle combined with the mask can be reduced + // to a no-op, we don't need to insert any instructions. + bool isIdentitySwizzle = dstIndex == value.type.ccount; + + for (uint32_t i = 0; i < dstIndex && isIdentitySwizzle; i++) + isIdentitySwizzle &= indices[i] == i; + + if (isIdentitySwizzle) + return value; + + // Use OpCompositeExtract if the resulting vector contains + // only one component, and OpVectorShuffle if it is a vector. + DxbcRegisterValue result; + result.type.ctype = value.type.ctype; + result.type.ccount = dstIndex; + + const uint32_t typeId = getVectorTypeId(result.type); + + if (dstIndex == 1) { + result.id = m_module.opCompositeExtract( + typeId, value.id, 1, indices.data()); + } else { + result.id = m_module.opVectorShuffle( + typeId, value.id, value.id, + dstIndex, indices.data()); + } + + return result; + } + + + DxbcRegisterValue DxbcCompiler2::emitRegisterExtract( + DxbcRegisterValue value, + DxbcRegMask mask) { + return emitRegisterSwizzle(value, + DxbcRegSwizzle(0, 1, 2, 3), mask); + } + + + DxbcRegisterValue DxbcCompiler2::emitRegisterInsert( + DxbcRegisterValue dstValue, + DxbcRegisterValue srcValue, + DxbcRegMask srcMask) { + DxbcRegisterValue result; + result.type = dstValue.type; + + const uint32_t typeId = getVectorTypeId(result.type); + + if (srcMask.setCount() == 0) { + // Nothing to do if the insertion mask is empty + result.id = dstValue.id; + } else if (dstValue.type.ccount == 1) { + // Both values are scalar, so the first component + // of the write mask decides which one to take. + result.id = srcMask[0] ? srcValue.id : dstValue.id; + } else if (srcValue.type.ccount == 1) { + // The source value is scalar. Since OpVectorShuffle + // requires both arguments to be vectors, we have to + // use OpCompositeInsert to modify the vector instead. + const uint32_t componentId = srcMask.firstSet(); + + result.id = m_module.opCompositeInsert(typeId, + srcValue.id, dstValue.id, 1, &componentId); + } else { + // Both arguments are vectors. We can determine which + // components to take from which vector and use the + // OpVectorShuffle instruction. + std::array components; + uint32_t srcComponentId = dstValue.type.ccount; + + for (uint32_t i = 0; i < dstValue.type.ccount; i++) + components.at(i) = srcMask[i] ? srcComponentId++ : i; + + result.id = m_module.opVectorShuffle( + typeId, dstValue.id, srcValue.id, + dstValue.type.ccount, components.data()); + } + + return result; + } + + + DxbcRegisterValue DxbcCompiler2::emitRegisterExtend( + DxbcRegisterValue value, + uint32_t size) { + if (size == 1) + return value; + + std::array ids = { + value.id, value.id, + value.id, value.id, + }; + + DxbcRegisterValue result; + result.type.ctype = value.type.ctype; + result.type.ccount = size; + result.id = m_module.opCompositeConstruct( + getVectorTypeId(result.type), + size, ids.data()); + return result; + } + + + DxbcRegisterValue DxbcCompiler2::emitRegisterAbsolute( + DxbcRegisterValue value) { + const uint32_t typeId = getVectorTypeId(value.type); + + switch (value.type.ctype) { + case DxbcScalarType::Float32: value.id = m_module.opFAbs(typeId, value.id); break; + case DxbcScalarType::Sint32: value.id = m_module.opSAbs(typeId, value.id); break; + default: Logger::warn("DxbcCompiler: Cannot get absolute value for given type"); + } + + return value; + } + + + DxbcRegisterValue DxbcCompiler2::emitRegisterNegate( + DxbcRegisterValue value) { + const uint32_t typeId = getVectorTypeId(value.type); + + switch (value.type.ctype) { + case DxbcScalarType::Float32: value.id = m_module.opFNegate(typeId, value.id); break; + case DxbcScalarType::Sint32: value.id = m_module.opSNegate(typeId, value.id); break; + default: Logger::warn("DxbcCompiler: Cannot negate given type"); + } + + return value; + } + + + DxbcRegisterValue DxbcCompiler2::emitSrcOperandModifiers( + DxbcRegisterValue value, + DxbcRegModifiers modifiers) { + if (modifiers.test(DxbcRegModifier::Abs)) + value = emitRegisterAbsolute(value); + + if (modifiers.test(DxbcRegModifier::Neg)) + value = emitRegisterNegate(value); + return value; + } + + + DxbcRegisterValue DxbcCompiler2::emitDstOperandModifiers( + DxbcRegisterValue value, + DxbcOpModifiers modifiers) { + const uint32_t typeId = getVectorTypeId(value.type); + + if (value.type.ctype == DxbcScalarType::Float32) { + // Saturating only makes sense on floats + if (modifiers.saturate) { + value.id = m_module.opFClamp( + typeId, value.id, + m_module.constf32(0.0f), + m_module.constf32(1.0f)); + } + } + + return value; + } + + + DxbcRegisterPointer DxbcCompiler2::emitGetTempPtr( + const DxbcRegister& operand) { + // r# regs are indexed as follows: + // (0) register index (immediate) + DxbcRegisterPointer result; + result.type.ctype = DxbcScalarType::Float32; + result.type.ccount = 4; + result.id = m_rRegs.at(operand.idx[0].offset); + return result; + } + + + DxbcRegisterPointer DxbcCompiler2::emitGetInputPtr( + const DxbcRegister& operand) { + // In the vertex and pixel stages, + // v# regs are indexed as follows: + // (0) register index (relative) + // + // In the tessellation and geometry + // stages, the index has two dimensions: + // (0) vertex index (relative) + // (1) register index (relative) + if (operand.idxDim != 1) + throw DxvkError("DxbcCompiler: 2D index for v# not yet supported"); + + // We don't support two-dimensional indices yet + const uint32_t registerId = operand.idx[0].offset; + + DxbcRegisterPointer result; + result.type.ctype = DxbcScalarType::Float32; + result.type.ccount = 4; + result.id = m_vRegs.at(registerId); + return result; + } + + + DxbcRegisterPointer DxbcCompiler2::emitGetOutputPtr( + const DxbcRegister& operand) { + // Same index format as input registers, except that + // outputs cannot be accessed with a relative index. + if (operand.idxDim != 1) + throw DxvkError("DxbcCompiler: 2D index for o# not yet supported"); + + // We don't support two-dimensional indices yet + const uint32_t registerId = operand.idx[0].offset; + + // In the pixel shader, output registers are typed, + // whereas they are float4 in all other stages. + if (m_version.type() == DxbcProgramType::PixelShader) { + DxbcRegisterPointer result; + result.type = m_ps.oTypes.at(registerId); + result.id = m_oRegs.at(registerId); + return result; + } else { + DxbcRegisterPointer result; + result.type.ctype = DxbcScalarType::Float32; + result.type.ccount = 4; + result.id = m_oRegs.at(registerId); + return result; + } + } + + + DxbcRegisterPointer DxbcCompiler2::emitGetConstBufPtr( + const DxbcRegister& operand) { + // Constant buffers take a two-dimensional index: + // (0) register index (immediate) + // (1) constant offset (relative) + DxbcRegisterInfo info; + info.type.ctype = DxbcScalarType::Float32; + info.type.ccount = 4; + info.sclass = spv::StorageClassUniform; + + const uint32_t regId = operand.idx[0].offset; + const DxbcRegisterValue constId = emitIndexLoad(operand.idx[1]); + + const uint32_t ptrTypeId = getPointerTypeId(info); + + const std::array indices = { + m_module.consti32(0), constId.id + }; + + DxbcRegisterPointer result; + result.type = info.type; + result.id = m_module.opAccessChain(ptrTypeId, + m_constantBuffers.at(regId).varId, + indices.size(), indices.data()); + return result; + } + + + DxbcRegisterPointer DxbcCompiler2::emitGetOperandPtr( + const DxbcRegister& operand) { + switch (operand.type) { + case DxbcOperandType::Temp: + return emitGetTempPtr(operand); + + case DxbcOperandType::Input: + return emitGetInputPtr(operand); + + case DxbcOperandType::Output: + return emitGetOutputPtr(operand); + + case DxbcOperandType::ConstantBuffer: + return emitGetConstBufPtr(operand); + + default: + throw DxvkError(str::format( + "DxbcCompiler: Unhandled operand type: ", + operand.type)); + } + } + + + DxbcRegisterValue DxbcCompiler2::emitIndexLoad( + DxbcRegIndex index) { + if (index.relReg != nullptr) { + DxbcRegisterValue result = emitRegisterLoad( + *index.relReg, DxbcRegMask(true, false, false, false)); + + if (index.offset != 0) { + result.id = m_module.opIAdd( + getVectorTypeId(result.type), result.id, + m_module.consti32(index.offset)); + } + + return result; + } else { + DxbcRegisterValue result; + result.type.ctype = DxbcScalarType::Sint32; + result.type.ccount = 1; + result.id = m_module.consti32(index.offset); + return result; + } + } + + + DxbcRegisterValue DxbcCompiler2::emitValueLoad( + DxbcRegisterPointer ptr) { + DxbcRegisterValue result; + result.type = ptr.type; + result.id = m_module.opLoad( + getVectorTypeId(result.type), + ptr.id); + return result; + } + + + void DxbcCompiler2::emitValueStore( + DxbcRegisterPointer ptr, + DxbcRegisterValue value, + DxbcRegMask writeMask) { + // If the component types are not compatible, + // we need to bit-cast the source variable. + if (value.type.ctype != ptr.type.ctype) + value = emitRegisterBitcast(value, ptr.type.ctype); + + // If the source value consists of only one component, + // it is stored in all components of the destination. + if (value.type.ccount == 1) + value = emitRegisterExtend(value, writeMask.setCount()); + + if (ptr.type.ccount == writeMask.setCount()) { + // Simple case: We write to the entire register + m_module.opStore(ptr.id, value.id); + } else { + // We only write to part of the destination + // register, so we need to load and modify it + DxbcRegisterValue tmp = emitValueLoad(ptr); + tmp = emitRegisterInsert(tmp, value, writeMask); + + m_module.opStore(ptr.id, tmp.id); + } + } + + + DxbcRegisterValue DxbcCompiler2::emitRegisterLoad( + const DxbcRegister& reg, + DxbcRegMask writeMask) { + if (reg.type == DxbcOperandType::Imm32) { + DxbcRegisterValue result; + + if (reg.componentCount == DxbcRegComponentCount::c1) { + // Create one single u32 constant + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 1; + result.id = m_module.constu32(reg.imm.u32_1); + } else if (reg.componentCount == DxbcRegComponentCount::c4) { + // Create a four-component u32 vector + std::array indices = { + m_module.constu32(reg.imm.u32_4[0]), + m_module.constu32(reg.imm.u32_4[1]), + m_module.constu32(reg.imm.u32_4[2]), + m_module.constu32(reg.imm.u32_4[3]), + }; + + result.type.ctype = DxbcScalarType::Uint32; + result.type.ccount = 4; + result.id = m_module.constComposite( + getVectorTypeId(result.type), + indices.size(), indices.data()); + } else { + // Something went horribly wrong in the decoder or the shader is broken + throw DxvkError("DxbcCompiler: Invalid component count for immediate operand"); + } + + // Cast constants to the requested type + return emitRegisterBitcast(result, reg.dataType); + } else { + // Load operand from the operand pointer + DxbcRegisterPointer ptr = emitGetOperandPtr(reg); + DxbcRegisterValue result = emitValueLoad(ptr); + + // Apply operand swizzle to the operand value + result = emitRegisterSwizzle(result, reg.swizzle, writeMask); + + // Cast it to the requested type. We need to do + // this after the swizzling for 64-bit types. + result = emitRegisterBitcast(result, reg.dataType); + + // Apply operand modifiers + result = emitSrcOperandModifiers(result, reg.modifiers); + return result; + } + } + + + void DxbcCompiler2::emitRegisterStore( + const DxbcRegister& reg, + DxbcRegisterValue value) { + emitValueStore(emitGetOperandPtr(reg), value, reg.mask); + } + + + void DxbcCompiler2::emitVsInputSetup() { + + } + + + void DxbcCompiler2::emitPsInputSetup() { + + } + + + void DxbcCompiler2::emitVsOutputSetup() { + for (const DxbcSvMapping& svMapping : m_oMappings) { + switch (svMapping.sv) { + case DxbcSystemValue::Position: { + DxbcRegisterInfo info; + info.type.ctype = DxbcScalarType::Float32; + info.type.ccount = 4; + info.sclass = spv::StorageClassOutput; + + const uint32_t ptrTypeId = getPointerTypeId(info); + const uint32_t memberId = m_module.constu32(PerVertex_Position); + + DxbcRegisterPointer dstPtr; + dstPtr.type = info.type; + dstPtr.id = m_module.opAccessChain( + ptrTypeId, m_perVertexOut, 1, &memberId); + + DxbcRegisterPointer srcPtr; + srcPtr.type = info.type; + srcPtr.id = m_oRegs.at(svMapping.regId); + + emitValueStore(dstPtr, emitValueLoad(srcPtr), + DxbcRegMask(true, true, true, true)); + } break; + + default: + Logger::warn(str::format( + "dxbc: Unhandled vertex sv output: ", + svMapping.sv)); + } + } + } + + + void DxbcCompiler2::emitPsOutputSetup() { + + } + + + void DxbcCompiler2::emitVsInit() { + m_module.enableCapability(spv::CapabilityShader); + m_module.enableCapability(spv::CapabilityClipDistance); + m_module.enableCapability(spv::CapabilityCullDistance); + + // Declare the per-vertex output block. This is where + // the vertex shader will write the vertex position. + const uint32_t perVertexStruct = this->getPerVertexBlockId(); + const uint32_t perVertexPointer = m_module.defPointerType( + perVertexStruct, spv::StorageClassOutput); + + m_perVertexOut = m_module.newVar( + perVertexPointer, spv::StorageClassOutput); + m_entryPointInterfaces.push_back(m_perVertexOut); + m_module.setDebugName(m_perVertexOut, "vs_vertex_out"); + + // Main function of the vertex shader + m_vs.functionId = m_module.allocateId(); + m_module.setDebugName(m_vs.functionId, "vs_main"); + + m_module.functionBegin( + m_module.defVoidType(), + m_vs.functionId, + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr), + spv::FunctionControlMaskNone); + m_module.opLabel(m_module.allocateId()); + } + + + void DxbcCompiler2::emitPsInit() { + m_module.enableCapability(spv::CapabilityShader); + m_module.setOriginUpperLeft(m_entryPointId); + + // Declare pixel shader outputs. According to the Vulkan + // documentation, they are required to match the type of + // the render target. + for (auto e = m_osgn->begin(); e != m_osgn->end(); e++) { + if (e->systemValue == DxbcSystemValue::None) { + DxbcRegisterInfo info; + info.type.ctype = e->componentType; + info.type.ccount = e->componentMask.setCount(); + info.sclass = spv::StorageClassOutput; + + const uint32_t varId = emitNewVariable(info); + + m_module.decorateLocation(varId, e->registerId); + m_module.setDebugName(varId, str::format("o", e->registerId).c_str()); + m_entryPointInterfaces.push_back(varId); + + m_oRegs.at(e->registerId) = varId; + m_ps.oTypes.at(e->registerId) = info.type; + } + } + + // Main function of the pixel shader + m_ps.functionId = m_module.allocateId(); + m_module.setDebugName(m_ps.functionId, "ps_main"); + + m_module.functionBegin( + m_module.defVoidType(), + m_ps.functionId, + m_module.defFunctionType( + m_module.defVoidType(), 0, nullptr), + spv::FunctionControlMaskNone); + m_module.opLabel(m_module.allocateId()); + } + + + void DxbcCompiler2::emitVsFinalize() { + this->emitVsInputSetup(); + m_module.opFunctionCall( + m_module.defVoidType(), + m_vs.functionId, 0, nullptr); + this->emitVsOutputSetup(); + } + + + void DxbcCompiler2::emitPsFinalize() { + this->emitPsInputSetup(); + m_module.opFunctionCall( + m_module.defVoidType(), + m_ps.functionId, 0, nullptr); + this->emitPsOutputSetup(); + } + + + uint32_t DxbcCompiler2::emitNewVariable(const DxbcRegisterInfo& info) { + const uint32_t ptrTypeId = this->getPointerTypeId(info); + return m_module.newVar(ptrTypeId, info.sclass); + } + + + uint32_t DxbcCompiler2::getScalarTypeId(DxbcScalarType type) { + switch (type) { + case DxbcScalarType::Uint32: return m_module.defIntType(32, 0); + case DxbcScalarType::Uint64: return m_module.defIntType(64, 0); + case DxbcScalarType::Sint32: return m_module.defIntType(32, 1); + case DxbcScalarType::Sint64: return m_module.defIntType(64, 1); + case DxbcScalarType::Float32: return m_module.defFloatType(32); + case DxbcScalarType::Float64: return m_module.defFloatType(64); + } + + throw DxvkError("DxbcCompiler: Invalid scalar type"); + } + + + uint32_t DxbcCompiler2::getVectorTypeId(const DxbcVectorType& type) { + uint32_t typeId = this->getScalarTypeId(type.ctype); + + if (type.ccount > 1) + typeId = m_module.defVectorType(typeId, type.ccount); + + return typeId; + } + + + uint32_t DxbcCompiler2::getPointerTypeId(const DxbcRegisterInfo& type) { + return m_module.defPointerType( + this->getVectorTypeId(type.type), + type.sclass); + } + + + uint32_t DxbcCompiler2::getPerVertexBlockId() { + uint32_t t_f32 = m_module.defFloatType(32); + uint32_t t_f32_v4 = m_module.defVectorType(t_f32, 4); + uint32_t t_f32_a2 = m_module.defArrayType(t_f32, m_module.constu32(2)); + + std::array members; + members[PerVertex_Position] = t_f32_v4; + members[PerVertex_PointSize] = t_f32; + members[PerVertex_CullDist] = t_f32_a2; + members[PerVertex_ClipDist] = t_f32_a2; + + uint32_t typeId = m_module.defStructTypeUnique( + members.size(), members.data()); + + m_module.memberDecorateBuiltIn(typeId, PerVertex_Position, spv::BuiltInPosition); + m_module.memberDecorateBuiltIn(typeId, PerVertex_PointSize, spv::BuiltInPointSize); + m_module.memberDecorateBuiltIn(typeId, PerVertex_CullDist, spv::BuiltInCullDistance); + m_module.memberDecorateBuiltIn(typeId, PerVertex_ClipDist, spv::BuiltInClipDistance); + m_module.decorateBlock(typeId); + + m_module.setDebugName(typeId, "per_vertex"); + m_module.setDebugMemberName(typeId, PerVertex_Position, "position"); + m_module.setDebugMemberName(typeId, PerVertex_PointSize, "point_size"); + m_module.setDebugMemberName(typeId, PerVertex_CullDist, "cull_dist"); + m_module.setDebugMemberName(typeId, PerVertex_ClipDist, "clip_dist"); + return typeId; + } + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_compiler_2.h b/src/dxbc/dxbc_compiler_2.h new file mode 100644 index 00000000..5dfb3e04 --- /dev/null +++ b/src/dxbc/dxbc_compiler_2.h @@ -0,0 +1,311 @@ +#pragma once + +#include +#include + +#include "../spirv/spirv_module.h" + +#include "dxbc_chunk_isgn.h" +#include "dxbc_decoder_2.h" +#include "dxbc_defs.h" +#include "dxbc_names.h" +#include "dxbc_util.h" + +namespace dxvk { + + struct DxbcVectorType { + DxbcScalarType ctype; + uint32_t ccount; + }; + + struct DxbcRegisterInfo { + DxbcVectorType type; + spv::StorageClass sclass; + }; + + struct DxbcRegisterValue { + DxbcVectorType type; + uint32_t id; + }; + + struct DxbcRegisterPointer { + DxbcVectorType type; + uint32_t id; + }; + + struct DxbcCompilerVsPart { + uint32_t functionId; + }; + + struct DxbcCompilerPsPart { + uint32_t functionId; + std::array oTypes; + }; + + /** + * \brief DXBC to SPIR-V shader compiler + * + * Processes instructions from a DXBC shader and creates + * a DXVK shader object, which contains the SPIR-V module + * and information about the shader resource bindings. + */ + class DxbcCompiler2 { + + public: + + DxbcCompiler2( + const DxbcProgramVersion& version, + const Rc& isgn, + const Rc& osgn); + ~DxbcCompiler2(); + + /** + * \brief Processes a single instruction + * \param [in] ins The instruction + */ + void processInstruction( + const DxbcShaderInstruction& ins); + + /** + * \brief Finalizes the shader + * \returns The final shader object + */ + Rc finalize(); + + private: + + DxbcProgramVersion m_version; + SpirvModule m_module; + + Rc m_isgn; + Rc m_osgn; + + /////////////////////////////////////////////////////// + // Resource slot description for the shader. This will + // be used to map D3D11 bindings to DXVK bindings. + std::vector m_resourceSlots; + + /////////////////////////////// + // r# registers of type float4 + std::vector m_rRegs; + + /////////////////////////////////////////////////////////// + // v# registers as defined by the shader. The type of each + // of these inputs is either float4 or an array of float4. + std::array m_vRegs; + std::vector m_vMappings; + + ////////////////////////////////////////////////////////// + // o# registers as defined by the shader. In the fragment + // shader stage, these registers are typed by the signature, + // in all other stages, they are float4 registers or arrays. + std::array m_oRegs; + std::vector m_oMappings; + + ////////////////////////////////////////////////////// + // Shader resource variables. These provide access to + // constant buffers, samplers, textures, and UAVs. + std::array m_constantBuffers; + std::array m_samplers; + std::array m_textures; + + /////////////////////////////////////////////////////////// + // Array of input values. Since v# registers are indexable + // in DXBC, we need to copy them into an array first. + uint32_t m_vArray = 0; + + //////////////////////////////////////////////////// + // Per-vertex input and output blocks. Depending on + // the shader stage, these may be declared as arrays. + uint32_t m_perVertexIn = 0; + uint32_t m_perVertexOut = 0; + + /////////////////////////////////////////////////// + // Entry point description - we'll need to declare + // the function ID and all input/output variables. + std::vector m_entryPointInterfaces; + uint32_t m_entryPointId = 0; + + /////////////////////////////////// + // Shader-specific data structures + DxbcCompilerVsPart m_vs; + DxbcCompilerPsPart m_ps; + + ///////////////////////////////////////////////////// + // Shader interface and metadata declaration methods + void emitDclGlobalFlags( + const DxbcShaderInstruction& ins); + + void emitDclTemps( + const DxbcShaderInstruction& ins); + + void emitDclInterfaceReg( + const DxbcShaderInstruction& ins); + + void emitDclInput( + uint32_t regIdx, + uint32_t regDim, + DxbcRegMask regMask, + DxbcSystemValue sv, + DxbcInterpolationMode im); + + void emitDclOutput( + uint32_t regIdx, + uint32_t regDim, + DxbcRegMask regMask, + DxbcSystemValue sv, + DxbcInterpolationMode im); + + void emitDclConstantBuffer( + const DxbcShaderInstruction& ins); + + void emitDclSampler( + const DxbcShaderInstruction& ins); + + void emitDclResource( + const DxbcShaderInstruction& ins); + + ////////////////////////////// + // Instruction class handlers + void emitVectorAlu( + const DxbcShaderInstruction& ins); + + void emitVectorCmov( + const DxbcShaderInstruction& ins); + + void emitVectorCmp( + const DxbcShaderInstruction& ins); + + void emitVectorDot( + const DxbcShaderInstruction& ins); + + void emitVectorImul( + const DxbcShaderInstruction& ins); + + void emitVectorSinCos( + const DxbcShaderInstruction& ins); + + void emitSample( + const DxbcShaderInstruction& ins); + + void emitRet( + const DxbcShaderInstruction& ins); + + + ///////////////////////////////////////// + // Generic register manipulation methods + DxbcRegisterValue emitRegisterBitcast( + DxbcRegisterValue srcValue, + DxbcScalarType dstType); + + DxbcRegisterValue emitRegisterSwizzle( + DxbcRegisterValue value, + DxbcRegSwizzle swizzle, + DxbcRegMask writeMask); + + DxbcRegisterValue emitRegisterExtract( + DxbcRegisterValue value, + DxbcRegMask mask); + + DxbcRegisterValue emitRegisterInsert( + DxbcRegisterValue dstValue, + DxbcRegisterValue srcValue, + DxbcRegMask srcMask); + + DxbcRegisterValue emitRegisterExtend( + DxbcRegisterValue value, + uint32_t size); + + DxbcRegisterValue emitRegisterAbsolute( + DxbcRegisterValue value); + + DxbcRegisterValue emitRegisterNegate( + DxbcRegisterValue value); + + DxbcRegisterValue emitSrcOperandModifiers( + DxbcRegisterValue value, + DxbcRegModifiers modifiers); + + DxbcRegisterValue emitDstOperandModifiers( + DxbcRegisterValue value, + DxbcOpModifiers modifiers); + + //////////////////////// + // Address load methods + DxbcRegisterPointer emitGetTempPtr( + const DxbcRegister& operand); + + DxbcRegisterPointer emitGetInputPtr( + const DxbcRegister& operand); + + DxbcRegisterPointer emitGetOutputPtr( + const DxbcRegister& operand); + + DxbcRegisterPointer emitGetConstBufPtr( + const DxbcRegister& operand); + + DxbcRegisterPointer emitGetOperandPtr( + const DxbcRegister& operand); + + ////////////////////////////// + // Operand load/store methods + DxbcRegisterValue emitIndexLoad( + DxbcRegIndex index); + + DxbcRegisterValue emitValueLoad( + DxbcRegisterPointer ptr); + + void emitValueStore( + DxbcRegisterPointer ptr, + DxbcRegisterValue value, + DxbcRegMask writeMask); + + DxbcRegisterValue emitRegisterLoad( + const DxbcRegister& reg, + DxbcRegMask writeMask); + + void emitRegisterStore( + const DxbcRegister& reg, + DxbcRegisterValue value); + + ///////////////////////////// + // Input preparation methods + void emitVsInputSetup(); + void emitPsInputSetup(); + + ////////////////////////////// + // Output preparation methods + void emitVsOutputSetup(); + void emitPsOutputSetup(); + + ///////////////////////////////// + // Shader initialization methods + void emitVsInit(); + void emitPsInit(); + + /////////////////////////////// + // Shader finalization methods + void emitVsFinalize(); + void emitPsFinalize(); + + /////////////////////////////// + // Variable definition methods + uint32_t emitNewVariable( + const DxbcRegisterInfo& info); + + /////////////////////////// + // Type definition methods + uint32_t getScalarTypeId( + DxbcScalarType type); + + uint32_t getVectorTypeId( + const DxbcVectorType& type); + + uint32_t getPointerTypeId( + const DxbcRegisterInfo& type); + + uint32_t getPerVertexBlockId(); + + }; + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_decoder.h b/src/dxbc/dxbc_decoder.h index cce5140f..c907bab5 100644 --- a/src/dxbc/dxbc_decoder.h +++ b/src/dxbc/dxbc_decoder.h @@ -9,6 +9,41 @@ namespace dxvk { class DxbcOperand; + /** + * \brief Constant buffer binding + * + * Stores information required to + * access a constant buffer. + */ + struct DxbcConstantBuffer { + uint32_t varId = 0; + uint32_t size = 0; + }; + + /** + * \brief Sampler binding + * + * Stores a sampler variable that can be + * used together with a texture resource. + */ + struct DxbcSampler { + uint32_t varId = 0; + uint32_t typeId = 0; + }; + + + /** + * \brief Shader resource binding + * + * Stores a resource variable + * and associated type IDs. + */ + struct DxbcShaderResource { + uint32_t varId = 0; + uint32_t sampledTypeId = 0; + uint32_t textureTypeId = 0; + }; + /** * \brief Component swizzle * @@ -73,6 +108,19 @@ namespace dxvk { }; + /** + * \brief System value mapping + * + * Maps a system value to a given set of + * components of an input or output register. + */ + struct DxbcSvMapping { + uint32_t regId; + DxbcRegMask regMask; + DxbcSystemValue sv; + }; + + /** * \brief Basic control info * diff --git a/src/dxbc/dxbc_decoder_2.cpp b/src/dxbc/dxbc_decoder_2.cpp new file mode 100644 index 00000000..dcb066e3 --- /dev/null +++ b/src/dxbc/dxbc_decoder_2.cpp @@ -0,0 +1,333 @@ +#include "dxbc_decoder_2.h" + +namespace dxvk { + + uint32_t DxbcCodeSlice::at(uint32_t id) const { + if (m_ptr + id >= m_end) + throw DxvkError("DxbcCodeSlice: End of stream"); + return m_ptr[id]; + } + + + uint32_t DxbcCodeSlice::read() { + if (m_ptr >= m_end) + throw DxvkError("DxbcCodeSlice: End of stream"); + return *(m_ptr++); + } + + + DxbcCodeSlice DxbcCodeSlice::take(uint32_t n) const { + if (m_ptr + n > m_end) + throw DxvkError("DxbcCodeSlice: End of stream"); + return DxbcCodeSlice(m_ptr, m_ptr + n); + } + + + DxbcCodeSlice DxbcCodeSlice::skip(uint32_t n) const { + if (m_ptr + n > m_end) + throw DxvkError("DxbcCodeSlice: End of stream"); + return DxbcCodeSlice(m_ptr + n, m_end); + } + + + + void DxbcDecodeContext::decodeInstruction(DxbcCodeSlice& code) { + const uint32_t token0 = code.at(0); + + // Initialize the instruction structure. Some of these values + // may not get written otherwise while decoding the instruction. + m_instruction.op = static_cast(bit::extract(token0, 0, 10)); + m_instruction.sampleControls = { 0, 0, 0 }; + m_instruction.dstCount = 0; + m_instruction.srcCount = 0; + m_instruction.immCount = 0; + m_instruction.dst = m_dstOperands.data(); + m_instruction.src = m_srcOperands.data(); + m_instruction.imm = m_immOperands.data(); + + // Reset the index pointer, which may still contain + // a non-zero value from the previous iteration + m_indexId = 0; + + // Instruction length, in DWORDs. This includes the token + // itself and any other prefix that an instruction may have. + uint32_t length = 0; + + if (m_instruction.op == DxbcOpcode::CustomData) { + length = code.at(1); + this->decodeCustomData(code.take(length)); + } else { + length = bit::extract(token0, 24, 30); + this->decodeOperation(code.take(length)); + } + + // Advance the caller's slice to the next token so that + // they can make consecutive calls to decodeInstruction() + code = code.skip(length); + } + + + void DxbcDecodeContext::decodeCustomData(DxbcCodeSlice code) { + Logger::warn("DxbcDecodeContext::decodeCustomData: Not implemented"); + } + + + void DxbcDecodeContext::decodeOperation(DxbcCodeSlice code) { + uint32_t token = code.read(); + + // Result modifiers, which are applied to common ALU ops + m_instruction.modifiers.saturate = !!bit::extract(token, 13, 13); + m_instruction.modifiers.precise = !!bit::extract(token, 19, 22); + + // Opcode controls. It will depend on the opcode itself which ones are valid. + m_instruction.controls.zeroTest = static_cast (bit::extract(token, 18, 18)); + m_instruction.controls.syncFlags = static_cast (bit::extract(token, 11, 14)); + m_instruction.controls.resourceDim = static_cast (bit::extract(token, 11, 15)); + m_instruction.controls.resinfoType = static_cast (bit::extract(token, 11, 12)); + m_instruction.controls.interpolation = static_cast(bit::extract(token, 11, 14)); + + // Process extended opcode tokens + while (bit::extract(token, 31, 31)) { + token = code.read(); + + const DxbcExtOpcode extOpcode + = static_cast(bit::extract(token, 0, 5)); + + switch (extOpcode) { + case DxbcExtOpcode::SampleControls: { + struct { + int u : 4; + int v : 4; + int w : 4; + } aoffimmi; + + aoffimmi.u = bit::extract(token, 9, 12); + aoffimmi.v = bit::extract(token, 13, 16); + aoffimmi.w = bit::extract(token, 17, 20); + + // Four-bit signed numbers, sign-extend them + m_instruction.sampleControls.u = aoffimmi.u; + m_instruction.sampleControls.v = aoffimmi.v; + m_instruction.sampleControls.w = aoffimmi.w; + } break; + + default: + Logger::warn(str::format( + "DxbcDecodeContext: Unhandled extended opcode: ", + extOpcode)); + } + } + + // Retrieve the instruction format in order to parse the + // operands. Doing this mostly automatically means that + // the compiler can rely on the operands being valid. + const DxbcInstFormat format = dxbcInstructionFormat(m_instruction.op); + + for (uint32_t i = 0; i < format.operandCount; i++) + this->decodeOperand(code, format.operands[i]); + } + + + void DxbcDecodeContext::decodeComponentSelection(DxbcRegister& reg, uint32_t token) { + // Pick the correct component selection mode based on the + // component count. We'll simplify this here so that the + // compiler can assume that everything is a 4D vector. + reg.componentCount = static_cast(bit::extract(token, 0, 1)); + + switch (reg.componentCount) { + // No components - used for samplers etc. + case DxbcRegComponentCount::c0: + reg.mask = DxbcRegMask(false, false, false, false); + reg.swizzle = DxbcRegSwizzle(0, 0, 0, 0); + break; + + // One component - used for immediates + // and a few built-in registers. + case DxbcRegComponentCount::c1: + reg.mask = DxbcRegMask(true, false, false, false); + reg.swizzle = DxbcRegSwizzle(0, 0, 0, 0); + break; + + // Four components - everything else. This requires us + // to actually parse the component selection mode. + case DxbcRegComponentCount::c4: { + const DxbcRegMode componentMode = + static_cast(bit::extract(token, 2, 3)); + + switch (componentMode) { + // Write mask for destination operands + case DxbcRegMode::Mask: + reg.mask = bit::extract(token, 4, 7); + reg.swizzle = DxbcRegSwizzle(0, 1, 2, 3); + break; + + // Swizzle for source operands (including resources) + case DxbcRegMode::Swizzle: + reg.mask = DxbcRegMask(true, true, true, true); + reg.swizzle = DxbcRegSwizzle( + bit::extract(token, 4, 5), + bit::extract(token, 6, 7), + bit::extract(token, 8, 9), + bit::extract(token, 10, 11)); + break; + + // Selection of one component. We can generate both a + // mask and a swizzle for this so that the compiler + // won't have to deal with this case specifically. + case DxbcRegMode::Select1: { + const uint32_t n = bit::extract(token, 4, 5); + reg.mask = DxbcRegMask(n == 0, n == 1, n == 2, n == 3); + reg.swizzle = DxbcRegSwizzle(n, n, n, n); + } break; + + default: + Logger::warn("DxbcDecodeContext: Invalid component selection mode"); + } + } break; + + default: + Logger::warn("DxbcDecodeContext: Invalid component count"); + } + } + + + void DxbcDecodeContext::decodeOperandExtensions(DxbcCodeSlice& code, DxbcRegister& reg, uint32_t token) { + while (bit::extract(token, 31, 31)) { + token = code.read(); + + // Type of the extended operand token + const DxbcOperandExt extTokenType = + static_cast(bit::extract(token, 0, 5)); + + switch (extTokenType) { + // Operand modifiers, which are used to manipulate the + // value of a source operand during the load operation + case DxbcOperandExt::OperandModifier: + reg.modifiers = bit::extract(token, 6, 13); + break; + + default: + Logger::warn(str::format( + "DxbcDecodeContext: Unhandled extended operand token: ", + extTokenType)); + } + } + } + + + void DxbcDecodeContext::decodeOperandImmediates(DxbcCodeSlice& code, DxbcRegister& reg) { + if (reg.type == DxbcOperandType::Imm32) { + switch (reg.componentCount) { + // This is commonly used if only one vector + // component is involved in an operation + case DxbcRegComponentCount::c1: { + reg.imm.u32_1 = code.read(); + } break; + + // Typical four-component vector + case DxbcRegComponentCount::c4: { + reg.imm.u32_4[0] = code.read(); + reg.imm.u32_4[1] = code.read(); + reg.imm.u32_4[2] = code.read(); + reg.imm.u32_4[3] = code.read(); + } break; + + default: + Logger::warn("DxbcDecodeContext: Invalid component count for immediate operand"); + } + } else if (reg.type == DxbcOperandType::Imm64) { + Logger::warn("DxbcDecodeContext: 64-bit immediates not supported"); + } + } + + + void DxbcDecodeContext::decodeOperandIndex(DxbcCodeSlice& code, DxbcRegister& reg, uint32_t token) { + reg.idxDim = bit::extract(token, 20, 21); + + for (uint32_t i = 0; i < reg.idxDim; i++) { + // An index can be encoded in various different ways + const DxbcOperandIndexRepresentation repr = + static_cast( + bit::extract(token, 22 + 3 * i, 24 + 3 * i)); + + switch (repr) { + case DxbcOperandIndexRepresentation::Imm32: + reg.idx[i].offset = static_cast(code.read()); + reg.idx[i].relReg = nullptr; + break; + + case DxbcOperandIndexRepresentation::Relative: + reg.idx[i].offset = 0; + reg.idx[i].relReg = &m_indices.at(m_indexId); + + this->decodeRegister(code, + m_indices.at(m_indexId++), + DxbcScalarType::Sint32); + break; + + case DxbcOperandIndexRepresentation::Imm32Relative: + reg.idx[i].offset = static_cast(code.read()); + reg.idx[i].relReg = &m_indices.at(m_indexId); + + this->decodeRegister(code, + m_indices.at(m_indexId++), + DxbcScalarType::Sint32); + break; + + default: + Logger::warn(str::format( + "DxbcDecodeContext: Unhandled index representation: ", + repr)); + } + } + } + + + void DxbcDecodeContext::decodeRegister(DxbcCodeSlice& code, DxbcRegister& reg, DxbcScalarType type) { + const uint32_t token = code.read(); + + reg.type = static_cast(bit::extract(token, 12, 19)); + reg.dataType = type; + reg.modifiers = 0; + reg.idxDim = 0; + + for (uint32_t i = 0; i < DxbcMaxRegIndexDim; i++) { + reg.idx[i].relReg = nullptr; + reg.idx[i].offset = 0; + } + + this->decodeComponentSelection(reg, token); + this->decodeOperandExtensions(code, reg, token); + this->decodeOperandImmediates(code, reg); + this->decodeOperandIndex(code, reg, token); + } + + + void DxbcDecodeContext::decodeImm32(DxbcCodeSlice& code, DxbcImmediate& imm, DxbcScalarType type) { + imm.u32 = code.read(); + } + + + void DxbcDecodeContext::decodeOperand(DxbcCodeSlice& code, const DxbcInstOperandFormat& format) { + switch (format.kind) { + case DxbcOperandKind::DstReg: { + const uint32_t operandId = m_instruction.dstCount++; + this->decodeRegister(code, m_dstOperands.at(operandId), format.type); + } break; + + case DxbcOperandKind::SrcReg: { + const uint32_t operandId = m_instruction.srcCount++; + this->decodeRegister(code, m_srcOperands.at(operandId), format.type); + } break; + + case DxbcOperandKind::Imm32: { + const uint32_t operandId = m_instruction.immCount++; + this->decodeImm32(code, m_immOperands.at(operandId), format.type); + } break; + + default: + throw DxvkError("DxbcDecodeContext: Invalid operand format"); + } + } + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_decoder_2.h b/src/dxbc/dxbc_decoder_2.h new file mode 100644 index 00000000..e9f74e51 --- /dev/null +++ b/src/dxbc/dxbc_decoder_2.h @@ -0,0 +1,192 @@ +#pragma once + +#include + +#include "dxbc_common.h" +#include "dxbc_decoder.h" +#include "dxbc_defs.h" +#include "dxbc_enums.h" +#include "dxbc_names.h" + +namespace dxvk { + + constexpr size_t DxbcMaxRegIndexDim = 3; + + struct DxbcRegister; + + enum class DxbcRegComponentCount : uint32_t { + c0 = 0, + c1 = 1, + c4 = 2, + }; + + enum class DxbcRegModifier : uint32_t { + Neg = 0, + Abs = 1, + }; + + using DxbcRegModifiers = Flags; + + + struct DxbcRegIndex { + DxbcRegister* relReg; + int32_t offset; + }; + + + struct DxbcRegister { + DxbcOperandType type; + DxbcScalarType dataType; + DxbcRegComponentCount componentCount; + + uint32_t idxDim; + DxbcRegIndex idx[DxbcMaxRegIndexDim]; + + DxbcRegMask mask; + DxbcRegSwizzle swizzle; + DxbcRegModifiers modifiers; + + union { + uint32_t u32_4[4]; + uint32_t u32_1; + } imm; + }; + + + struct DxbcOpModifiers { + bool saturate; + bool precise; + }; + + + struct DxbcShaderOpcodeControls { + DxbcZeroTest zeroTest; + DxbcSyncFlags syncFlags; + DxbcResourceDim resourceDim; + DxbcResinfoType resinfoType; + DxbcInterpolationMode interpolation; + }; + + + struct DxbcShaderSampleControls { + int u, v, w; + }; + + + union DxbcImmediate { + uint32_t u32; + uint64_t u64; + }; + + + /** + * \brief Shader instruction + */ + struct DxbcShaderInstruction { + DxbcOpcode op; + DxbcOpModifiers modifiers; + DxbcShaderOpcodeControls controls; + DxbcShaderSampleControls sampleControls; + + uint32_t dstCount; + uint32_t srcCount; + uint32_t immCount; + + const DxbcRegister* dst; + const DxbcRegister* src; + const DxbcImmediate* imm; + }; + + + /** + * \brief DXBC code slice + * + * Convenient pointer pair that allows + * reading the code word stream safely. + */ + class DxbcCodeSlice { + + public: + + DxbcCodeSlice( + const uint32_t* ptr, + const uint32_t* end) + : m_ptr(ptr), m_end(end) { } + + uint32_t at(uint32_t id) const; + uint32_t read(); + + DxbcCodeSlice take(uint32_t n) const; + DxbcCodeSlice skip(uint32_t n) const; + + bool atEnd() const { + return m_ptr == m_end; + } + + private: + + const uint32_t* m_ptr = nullptr; + const uint32_t* m_end = nullptr; + + }; + + + /** + * \brief Decode context + * + * Stores data that is required to decode a single + * instruction. This data is not persistent, so it + * should be forwarded to the compiler right away. + */ + class DxbcDecodeContext { + + public: + + /** + * \brief Retrieves current instruction + * + * This is only valid after a call to \ref decode. + * \returns Reference to last decoded instruction + */ + const DxbcShaderInstruction& getInstruction() const { + return m_instruction; + } + + /** + * \brief Decodes an instruction + * + * This also advances the given code slice by the + * number of dwords consumed by the instruction. + * \param [in] code Code slice + */ + void decodeInstruction(DxbcCodeSlice& code); + + private: + + DxbcShaderInstruction m_instruction; + + std::array m_dstOperands; + std::array m_srcOperands; + std::array m_immOperands; + std::array m_indices; + + // Index into the indices array. Used when decoding + // instruction operands with relative indexing. + uint32_t m_indexId = 0; + + void decodeCustomData(DxbcCodeSlice code); + void decodeOperation(DxbcCodeSlice code); + + void decodeComponentSelection(DxbcRegister& reg, uint32_t token); + void decodeOperandExtensions(DxbcCodeSlice& code, DxbcRegister& reg, uint32_t token); + void decodeOperandImmediates(DxbcCodeSlice& code, DxbcRegister& reg); + void decodeOperandIndex(DxbcCodeSlice& code, DxbcRegister& reg, uint32_t token); + + void decodeRegister(DxbcCodeSlice& code, DxbcRegister& reg, DxbcScalarType type); + void decodeImm32(DxbcCodeSlice& code, DxbcImmediate& imm, DxbcScalarType type); + + void decodeOperand(DxbcCodeSlice& code, const DxbcInstOperandFormat& format); + + }; + +} \ No newline at end of file diff --git a/src/dxbc/dxbc_defs.cpp b/src/dxbc/dxbc_defs.cpp index b49391a0..854924f2 100644 --- a/src/dxbc/dxbc_defs.cpp +++ b/src/dxbc/dxbc_defs.cpp @@ -78,7 +78,10 @@ namespace dxvk { { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, } }, /* Exp */ - { }, + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, /* Frc */ { }, /* FtoI */ @@ -92,27 +95,68 @@ namespace dxvk { { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, } }, /* IAdd */ - { }, + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, /* If */ { }, /* IEq */ - { }, + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, /* IGe */ - { }, + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, /* ILt */ - { }, + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, /* IMad */ - { }, + { 4, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, /* IMax */ - { }, + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, /* IMin */ - { }, + { 3, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, /* IMul */ - { }, + { 4, DxbcInstClass::VectorImul, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, /* INe */ - { }, + { 3, DxbcInstClass::VectorCmp, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, /* INeg */ - { }, + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Sint32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Sint32 }, + } }, /* IShl */ { }, /* IShr */ @@ -126,7 +170,10 @@ namespace dxvk { /* LdMs */ { }, /* Log */ - { }, + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, /* Loop */ { }, /* Lt */ @@ -223,7 +270,10 @@ namespace dxvk { /* SampleB */ { }, /* Sqrt */ - { }, + { 2, DxbcInstClass::VectorAlu, { + { DxbcOperandKind::DstReg, DxbcScalarType::Float32 }, + { DxbcOperandKind::SrcReg, DxbcScalarType::Float32 }, + } }, /* Switch */ { }, /* SinCos */ diff --git a/src/dxbc/dxbc_defs.h b/src/dxbc/dxbc_defs.h index 44367e7c..b39043f3 100644 --- a/src/dxbc/dxbc_defs.h +++ b/src/dxbc/dxbc_defs.h @@ -34,6 +34,7 @@ namespace dxvk { VectorCmov, ///< Component-wise conditional move VectorCmp, ///< Component-wise vector comparison VectorDot, ///< Dot product instruction + VectorImul, ///< Component-wise integer multiplication VectorSinCos, ///< Sine and Cosine instruction ControlFlow, ///< Control flow instructions Undefined, ///< Instruction code not defined diff --git a/src/dxbc/dxbc_enums.h b/src/dxbc/dxbc_enums.h index c8f3393e..d8e2eb21 100644 --- a/src/dxbc/dxbc_enums.h +++ b/src/dxbc/dxbc_enums.h @@ -451,7 +451,7 @@ namespace dxvk { enum class DxbcResinfoType : uint32_t { Float = 0, - RcpFloat = 1, // ? + RcpFloat = 1, Uint = 2, }; diff --git a/src/dxbc/dxbc_module.cpp b/src/dxbc/dxbc_module.cpp index 6bd9fe0d..807993c5 100644 --- a/src/dxbc/dxbc_module.cpp +++ b/src/dxbc/dxbc_module.cpp @@ -1,4 +1,5 @@ #include "dxbc_compiler.h" +#include "dxbc_compiler_2.h" #include "dxbc_module.h" namespace dxvk { @@ -44,19 +45,19 @@ namespace dxvk { if (m_shexChunk == nullptr) throw DxvkError("DxbcModule::compile: No SHDR/SHEX chunk"); - DxbcCompiler compiler( + DxbcCodeSlice slice = m_shexChunk->slice(); + + DxbcCompiler2 compiler( m_shexChunk->version(), m_isgnChunk, m_osgnChunk); - for (auto ins : *m_shexChunk) { - const DxbcError error = compiler.processInstruction(ins); + DxbcDecodeContext decoder; + + while (!slice.atEnd()) { + decoder.decodeInstruction(slice); - if (error != DxbcError::sOk) { - Logger::err(str::format( - "dxbc: Error while processing ", - ins.token().opcode(), ": Error ", - static_cast(error))); - } + compiler.processInstruction( + decoder.getInstruction()); } return compiler.finalize(); diff --git a/src/dxbc/meson.build b/src/dxbc/meson.build index c9948f7b..b23fd09d 100644 --- a/src/dxbc/meson.build +++ b/src/dxbc/meson.build @@ -3,8 +3,10 @@ dxbc_src = files([ 'dxbc_chunk_shex.cpp', 'dxbc_common.cpp', 'dxbc_compiler.cpp', + 'dxbc_compiler_2.cpp', 'dxbc_defs.cpp', 'dxbc_decoder.cpp', + 'dxbc_decoder_2.cpp', 'dxbc_header.cpp', 'dxbc_module.cpp', 'dxbc_names.cpp', diff --git a/src/spirv/spirv_module.cpp b/src/spirv/spirv_module.cpp index 2b9cdc37..a362aa48 100644 --- a/src/spirv/spirv_module.cpp +++ b/src/spirv/spirv_module.cpp @@ -836,6 +836,40 @@ namespace dxvk { } + uint32_t SpirvModule::opSMax( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(spv::GLSLstd450SMax); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + + uint32_t SpirvModule::opSMin( + uint32_t resultType, + uint32_t a, + uint32_t b) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 7); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(spv::GLSLstd450SMin); + m_code.putWord(a); + m_code.putWord(b); + return resultId; + } + + uint32_t SpirvModule::opFClamp( uint32_t resultType, uint32_t x, @@ -885,6 +919,66 @@ namespace dxvk { } + uint32_t SpirvModule::opSLessThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSLessThan, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opSLessThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSLessThanEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opSGreaterThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSGreaterThan, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + + uint32_t SpirvModule::opSGreaterThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpSGreaterThanEqual, 5); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(vector1); + m_code.putWord(vector2); + return resultId; + } + + uint32_t SpirvModule::opFOrdEqual( uint32_t resultType, uint32_t vector1, @@ -1020,9 +1114,24 @@ namespace dxvk { } + uint32_t SpirvModule::opSqrt( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(spv::GLSLstd450Sqrt); + m_code.putWord(operand); + return resultId; + } + + uint32_t SpirvModule::opInverseSqrt( uint32_t resultType, - uint32_t x) { + uint32_t operand) { uint32_t resultId = this->allocateId(); m_code.putIns (spv::OpExtInst, 6); @@ -1030,7 +1139,37 @@ namespace dxvk { m_code.putWord(resultId); m_code.putWord(m_instExtGlsl450); m_code.putWord(spv::GLSLstd450InverseSqrt); - m_code.putWord(x); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opExp2( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(spv::GLSLstd450Exp2); + m_code.putWord(operand); + return resultId; + } + + + uint32_t SpirvModule::opLog2( + uint32_t resultType, + uint32_t operand) { + uint32_t resultId = this->allocateId(); + + m_code.putIns (spv::OpExtInst, 6); + m_code.putWord(resultType); + m_code.putWord(resultId); + m_code.putWord(m_instExtGlsl450); + m_code.putWord(spv::GLSLstd450Log2); + m_code.putWord(operand); return resultId; } diff --git a/src/spirv/spirv_module.h b/src/spirv/spirv_module.h index c3e6537c..6a9cc621 100644 --- a/src/spirv/spirv_module.h +++ b/src/spirv/spirv_module.h @@ -296,6 +296,16 @@ namespace dxvk { uint32_t a, uint32_t b); + uint32_t opSMax( + uint32_t resultType, + uint32_t a, + uint32_t b); + + uint32_t opSMin( + uint32_t resultType, + uint32_t a, + uint32_t b); + uint32_t opFClamp( uint32_t resultType, uint32_t x, @@ -312,6 +322,26 @@ namespace dxvk { uint32_t vector1, uint32_t vector2); + uint32_t opSLessThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opSLessThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opSGreaterThan( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + + uint32_t opSGreaterThanEqual( + uint32_t resultType, + uint32_t vector1, + uint32_t vector2); + uint32_t opFOrdEqual( uint32_t resultType, uint32_t vector1, @@ -355,9 +385,21 @@ namespace dxvk { uint32_t resultType, uint32_t vector); + uint32_t opSqrt( + uint32_t resultType, + uint32_t operand); + uint32_t opInverseSqrt( uint32_t resultType, - uint32_t x); + uint32_t operand); + + uint32_t opExp2( + uint32_t resultType, + uint32_t operand); + + uint32_t opLog2( + uint32_t resultType, + uint32_t operand); uint32_t opSelect( uint32_t resultType,