diff --git a/src/dxvk/dxvk_buffer.cpp b/src/dxvk/dxvk_buffer.cpp index b7e3c447..22295352 100644 --- a/src/dxvk/dxvk_buffer.cpp +++ b/src/dxvk/dxvk_buffer.cpp @@ -102,11 +102,15 @@ namespace dxvk { bool isGpuWritable = (m_info.access & ( VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT)) != 0; - float priority = isGpuWritable ? 1.0f : 0.5f; - + + DxvkMemoryFlags hints(DxvkMemoryFlag::GpuReadable); + + if (isGpuWritable) + hints.set(DxvkMemoryFlag::GpuWritable); + // Ask driver whether we should be using a dedicated allocation handle.memory = m_memAlloc->alloc(&memReq.memoryRequirements, - dedicatedRequirements, dedMemoryAllocInfo, m_memFlags, priority); + dedicatedRequirements, dedMemoryAllocInfo, m_memFlags, hints); if (vkd->vkBindBufferMemory(vkd->device(), handle.buffer, handle.memory.memory(), handle.memory.offset()) != VK_SUCCESS) diff --git a/src/dxvk/dxvk_image.cpp b/src/dxvk/dxvk_image.cpp index 89509456..cafb3baf 100644 --- a/src/dxvk/dxvk_image.cpp +++ b/src/dxvk/dxvk_image.cpp @@ -86,7 +86,7 @@ namespace dxvk { m_vkd->vkGetImageMemoryRequirements2( m_vkd->device(), &memReqInfo, &memReq); - + if (info.tiling != VK_IMAGE_TILING_LINEAR && !dedicatedRequirements.prefersDedicatedAllocation) { memReq.memoryRequirements.size = align(memReq.memoryRequirements.size, memAlloc.bufferImageGranularity()); memReq.memoryRequirements.alignment = align(memReq.memoryRequirements.alignment , memAlloc.bufferImageGranularity()); @@ -100,11 +100,14 @@ namespace dxvk { VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) != 0; - float priority = isGpuWritable ? 1.0f : 0.5f; + DxvkMemoryFlags hints(DxvkMemoryFlag::GpuReadable); + + if (isGpuWritable) + hints.set(DxvkMemoryFlag::GpuWritable); // Ask driver whether we should be using a dedicated allocation m_image.memory = memAlloc.alloc(&memReq.memoryRequirements, - dedicatedRequirements, dedMemoryAllocInfo, memFlags, priority); + dedicatedRequirements, dedMemoryAllocInfo, memFlags, hints); // Try to bind the allocated memory slice to the image if (m_vkd->vkBindImageMemory(m_vkd->device(), m_image.image, diff --git a/src/dxvk/dxvk_memory.cpp b/src/dxvk/dxvk_memory.cpp index b415ff5e..b52b7cc2 100644 --- a/src/dxvk/dxvk_memory.cpp +++ b/src/dxvk/dxvk_memory.cpp @@ -60,8 +60,9 @@ namespace dxvk { DxvkMemoryChunk::DxvkMemoryChunk( DxvkMemoryAllocator* alloc, DxvkMemoryType* type, - DxvkDeviceMemory memory) - : m_alloc(alloc), m_type(type), m_memory(memory) { + DxvkDeviceMemory memory, + DxvkMemoryFlags hints) + : m_alloc(alloc), m_type(type), m_memory(memory), m_hints(hints) { // Mark the entire chunk as free m_freeList.push_back(FreeSlice { 0, memory.memSize }); } @@ -78,11 +79,10 @@ namespace dxvk { VkMemoryPropertyFlags flags, VkDeviceSize size, VkDeviceSize align, - float priority) { + DxvkMemoryFlags hints) { // Property flags must be compatible. This could // be refined a bit in the future if necessary. - if (m_memory.memFlags != flags - || m_memory.priority != priority) + if (m_memory.memFlags != flags || !checkHints(hints)) return DxvkMemory(); // If the chunk is full, return @@ -160,6 +160,19 @@ namespace dxvk { } + bool DxvkMemoryChunk::checkHints(DxvkMemoryFlags hints) const { + DxvkMemoryFlags mask( + DxvkMemoryFlag::Small, + DxvkMemoryFlag::GpuReadable, + DxvkMemoryFlag::GpuWritable); + + if (hints.test(DxvkMemoryFlag::IgnoreConstraints)) + mask = DxvkMemoryFlags(); + + return (m_hints & mask) == (hints & mask); + } + + DxvkMemoryAllocator::DxvkMemoryAllocator(const DxvkDevice* device) : m_vkd (device->vkd()), m_device (device), @@ -182,7 +195,6 @@ namespace dxvk { m_memTypes[i].heapId = m_memProps.memoryTypes[i].heapIndex; m_memTypes[i].memType = m_memProps.memoryTypes[i]; m_memTypes[i].memTypeId = i; - m_memTypes[i].chunkSize = pickChunkSize(i); } /* Work around an issue on Nvidia drivers where using the entire @@ -197,10 +209,8 @@ namespace dxvk { for (uint32_t i = 0; i < m_memProps.memoryTypeCount; i++) { VkMemoryPropertyFlags flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - if ((m_memTypes[i].memType.propertyFlags & flags) == flags) { + if ((m_memTypes[i].memType.propertyFlags & flags) == flags) m_memTypes[i].heap->budget = 32 << 20; - m_memTypes[i].chunkSize = 1 << 20; - } } } } @@ -217,19 +227,38 @@ namespace dxvk { const VkMemoryDedicatedRequirements& dedAllocReq, const VkMemoryDedicatedAllocateInfo& dedAllocInfo, VkMemoryPropertyFlags flags, - float priority) { + DxvkMemoryFlags hints) { std::lock_guard lock(m_mutex); + // Keep small allocations together to avoid fragmenting + // chunks for larger resources with lots of small gaps, + // as well as resources with potentially weird lifetimes + if (req->size <= SmallAllocationThreshold) { + hints.set(DxvkMemoryFlag::Small); + hints.clr(DxvkMemoryFlag::GpuWritable, DxvkMemoryFlag::GpuReadable); + } + + // Ignore all hints for host-visible allocations since they + // usually don't make much sense for those resources + if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + hints = DxvkMemoryFlags(); + // Try to allocate from a memory type which supports the given flags exactly auto dedAllocPtr = dedAllocReq.prefersDedicatedAllocation ? &dedAllocInfo : nullptr; - DxvkMemory result = this->tryAlloc(req, dedAllocPtr, flags, priority); + DxvkMemory result = this->tryAlloc(req, dedAllocPtr, flags, hints); // If the first attempt failed, try ignoring the dedicated allocation if (!result && dedAllocPtr && !dedAllocReq.requiresDedicatedAllocation) { - result = this->tryAlloc(req, nullptr, flags, priority); + result = this->tryAlloc(req, nullptr, flags, hints); dedAllocPtr = nullptr; } + // Retry without the hint constraints + if (!result) { + hints.set(DxvkMemoryFlag::IgnoreConstraints); + result = this->tryAlloc(req, nullptr, flags, hints); + } + // If that still didn't work, probe slower memory types as well VkMemoryPropertyFlags optFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; @@ -239,7 +268,7 @@ namespace dxvk { remFlags |= optFlags & -optFlags; optFlags &= ~remFlags; - result = this->tryAlloc(req, dedAllocPtr, flags & ~remFlags, priority); + result = this->tryAlloc(req, dedAllocPtr, flags & ~remFlags, hints); } if (!result) { @@ -276,7 +305,7 @@ namespace dxvk { const VkMemoryRequirements* req, const VkMemoryDedicatedAllocateInfo* dedAllocInfo, VkMemoryPropertyFlags flags, - float priority) { + DxvkMemoryFlags hints) { DxvkMemory result; for (uint32_t i = 0; i < m_memProps.memoryTypeCount && !result; i++) { @@ -285,7 +314,7 @@ namespace dxvk { if (supported && adequate) { result = this->tryAllocFromType(&m_memTypes[i], - flags, req->size, req->alignment, priority, dedAllocInfo); + flags, req->size, req->alignment, hints, dedAllocInfo); } } @@ -298,35 +327,31 @@ namespace dxvk { VkMemoryPropertyFlags flags, VkDeviceSize size, VkDeviceSize align, - float priority, + DxvkMemoryFlags hints, const VkMemoryDedicatedAllocateInfo* dedAllocInfo) { - // Prevent unnecessary external host memory fragmentation - bool isDeviceLocal = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0; - - if (!isDeviceLocal) - priority = 0.0f; + VkDeviceSize chunkSize = pickChunkSize(type->memTypeId, hints); DxvkMemory memory; - if (size >= type->chunkSize || dedAllocInfo) { + if (size >= chunkSize || dedAllocInfo) { DxvkDeviceMemory devMem = this->tryAllocDeviceMemory( - type, flags, size, priority, dedAllocInfo); + type, flags, size, hints, dedAllocInfo); if (devMem.memHandle != VK_NULL_HANDLE) memory = DxvkMemory(this, nullptr, type, devMem.memHandle, 0, size, devMem.memPointer); } else { for (uint32_t i = 0; i < type->chunks.size() && !memory; i++) - memory = type->chunks[i]->alloc(flags, size, align, priority); + memory = type->chunks[i]->alloc(flags, size, align, hints); if (!memory) { DxvkDeviceMemory devMem; - for (uint32_t i = 0; i < 6 && (type->chunkSize >> i) >= size && !devMem.memHandle; i++) - devMem = tryAllocDeviceMemory(type, flags, type->chunkSize >> i, priority, nullptr); + for (uint32_t i = 0; i < 6 && (chunkSize >> i) >= size && !devMem.memHandle; i++) + devMem = tryAllocDeviceMemory(type, flags, chunkSize >> i, hints, nullptr); if (devMem.memHandle) { - Rc chunk = new DxvkMemoryChunk(this, type, devMem); - memory = chunk->alloc(flags, size, align, priority); + Rc chunk = new DxvkMemoryChunk(this, type, devMem, hints); + memory = chunk->alloc(flags, size, align, hints); type->chunks.push_back(std::move(chunk)); } @@ -344,7 +369,7 @@ namespace dxvk { DxvkMemoryType* type, VkMemoryPropertyFlags flags, VkDeviceSize size, - float priority, + DxvkMemoryFlags hints, const VkMemoryDedicatedAllocateInfo* dedAllocInfo) { bool useMemoryPriority = (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && (m_device->features().extMemoryPriority.memoryPriority); @@ -352,6 +377,13 @@ namespace dxvk { if (type->heap->budget && type->heap->stats.memoryAllocated + size > type->heap->budget) return DxvkDeviceMemory(); + float priority = 0.0f; + + if (hints.test(DxvkMemoryFlag::GpuReadable)) + priority = 0.5f; + if (hints.test(DxvkMemoryFlag::GpuWritable)) + priority = 1.0f; + DxvkDeviceMemory result; result.memSize = size; result.memFlags = flags; @@ -433,19 +465,20 @@ namespace dxvk { } - VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId) const { + VkDeviceSize DxvkMemoryAllocator::pickChunkSize(uint32_t memTypeId, DxvkMemoryFlags hints) const { VkMemoryType type = m_memProps.memoryTypes[memTypeId]; VkMemoryHeap heap = m_memProps.memoryHeaps[type.heapIndex]; // Default to a chunk size of 128 MiB VkDeviceSize chunkSize = 128 << 20; - // Try to waste a bit less system memory in 32-bit - // applications due to address space constraints - if (env::is32BitHostPlatform()) { - if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) - chunkSize = 32 << 20; - } + if (hints.test(DxvkMemoryFlag::Small)) + chunkSize = 16 << 20; + + // Try to waste a bit less system memory especially in + // 32-bit applications due to address space constraints + if (type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + chunkSize = 16 << 20; // Reduce the chunk size on small heaps so // we can at least fit in 15 allocations diff --git a/src/dxvk/dxvk_memory.h b/src/dxvk/dxvk_memory.h index 4230d447..7f32cb4c 100644 --- a/src/dxvk/dxvk_memory.h +++ b/src/dxvk/dxvk_memory.h @@ -62,8 +62,6 @@ namespace dxvk { VkMemoryType memType; uint32_t memTypeId; - VkDeviceSize chunkSize; - std::vector> chunks; }; @@ -155,6 +153,22 @@ namespace dxvk { void free(); }; + + + /** + * \brief Memory allocation flags + * + * Used to batch similar allocations into the same + * set of chunks, which may help with fragmentation. + */ + enum class DxvkMemoryFlag : uint32_t { + Small = 0, ///< Small allocation + GpuReadable = 1, ///< Medium-priority resource + GpuWritable = 2, ///< High-priority resource + IgnoreConstraints = 3, ///< Ignore most allocation flags + }; + + using DxvkMemoryFlags = Flags; /** @@ -170,7 +184,8 @@ namespace dxvk { DxvkMemoryChunk( DxvkMemoryAllocator* alloc, DxvkMemoryType* type, - DxvkDeviceMemory memory); + DxvkDeviceMemory memory, + DxvkMemoryFlags m_hints); ~DxvkMemoryChunk(); @@ -179,17 +194,17 @@ namespace dxvk { * * On failure, this returns a slice with * \c VK_NULL_HANDLE as the memory handle. - * \param [in] flags Requested memory flags + * \param [in] flags Requested memory type flags * \param [in] size Number of bytes to allocate * \param [in] align Required alignment - * \param [in] priority Requested priority + * \param [in] hints Memory category * \returns The allocated memory slice */ DxvkMemory alloc( VkMemoryPropertyFlags flags, VkDeviceSize size, VkDeviceSize align, - float priority); + DxvkMemoryFlags hints); /** * \brief Frees memory @@ -220,8 +235,11 @@ namespace dxvk { DxvkMemoryAllocator* m_alloc; DxvkMemoryType* m_type; DxvkDeviceMemory m_memory; + DxvkMemoryFlags m_hints; std::vector m_freeList; + + bool checkHints(DxvkMemoryFlags hints) const; }; @@ -235,6 +253,8 @@ namespace dxvk { class DxvkMemoryAllocator { friend class DxvkMemory; friend class DxvkMemoryChunk; + + constexpr static VkDeviceSize SmallAllocationThreshold = 256 << 10; public: DxvkMemoryAllocator(const DxvkDevice* device); @@ -259,7 +279,7 @@ namespace dxvk { * \param [in] dedAllocReq Dedicated allocation requirements * \param [in] dedAllocInfo Dedicated allocation info * \param [in] flags Memory type flags - * \param [in] priority Device-local memory priority + * \param [in] hints Memory hints * \returns Allocated memory slice */ DxvkMemory alloc( @@ -267,7 +287,7 @@ namespace dxvk { const VkMemoryDedicatedRequirements& dedAllocReq, const VkMemoryDedicatedAllocateInfo& dedAllocInfo, VkMemoryPropertyFlags flags, - float priority); + DxvkMemoryFlags hints); /** * \brief Queries memory stats @@ -296,21 +316,21 @@ namespace dxvk { const VkMemoryRequirements* req, const VkMemoryDedicatedAllocateInfo* dedAllocInfo, VkMemoryPropertyFlags flags, - float priority); + DxvkMemoryFlags hints); DxvkMemory tryAllocFromType( DxvkMemoryType* type, VkMemoryPropertyFlags flags, VkDeviceSize size, VkDeviceSize align, - float priority, + DxvkMemoryFlags hints, const VkMemoryDedicatedAllocateInfo* dedAllocInfo); DxvkDeviceMemory tryAllocDeviceMemory( DxvkMemoryType* type, VkMemoryPropertyFlags flags, VkDeviceSize size, - float priority, + DxvkMemoryFlags hints, const VkMemoryDedicatedAllocateInfo* dedAllocInfo); void free( @@ -327,7 +347,8 @@ namespace dxvk { DxvkDeviceMemory memory); VkDeviceSize pickChunkSize( - uint32_t memTypeId) const; + uint32_t memTypeId, + DxvkMemoryFlags hints) const; };