From 4a0c81276fd59a9b3dddbaed1e554ee35a7104a3 Mon Sep 17 00:00:00 2001
From: Philip Rebohle <philip.rebohle@tu-dortmund.de>
Date: Mon, 4 Jun 2018 23:31:49 +0200
Subject: [PATCH] [d3d11] Implement new auto-flush heuristic

---
 src/d3d11/d3d11_cmdlist.cpp     |  6 +-----
 src/d3d11/d3d11_cmdlist.h       |  8 +-------
 src/d3d11/d3d11_context.cpp     | 16 ----------------
 src/d3d11/d3d11_context.h       |  1 -
 src/d3d11/d3d11_context_def.cpp |  3 +--
 src/d3d11/d3d11_context_imm.cpp | 22 ++++++++++++++++------
 src/d3d11/d3d11_context_imm.h   | 10 +++++++++-
 7 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/src/d3d11/d3d11_cmdlist.cpp b/src/d3d11/d3d11_cmdlist.cpp
index 9a866581..b46c9419 100644
--- a/src/d3d11/d3d11_cmdlist.cpp
+++ b/src/d3d11/d3d11_cmdlist.cpp
@@ -42,10 +42,8 @@ namespace dxvk {
   
   
   void D3D11CommandList::AddChunk(
-            Rc<DxvkCsChunk>&&   Chunk,
-            UINT                DrawCount) {
+            Rc<DxvkCsChunk>&&   Chunk) {
     m_chunks.push_back(std::move(Chunk));
-    m_drawCount += DrawCount;
   }
   
   
@@ -54,8 +52,6 @@ namespace dxvk {
     
     for (const auto& chunk : m_chunks)
       cmdList->m_chunks.push_back(chunk);
-    
-    cmdList->m_drawCount += m_drawCount;
   }
   
   
diff --git a/src/d3d11/d3d11_cmdlist.h b/src/d3d11/d3d11_cmdlist.h
index dc6d45e8..d15419cc 100644
--- a/src/d3d11/d3d11_cmdlist.h
+++ b/src/d3d11/d3d11_cmdlist.h
@@ -24,8 +24,7 @@ namespace dxvk {
     UINT STDMETHODCALLTYPE GetContextFlags() final;
     
     void AddChunk(
-            Rc<DxvkCsChunk>&&   Chunk,
-            UINT                DrawCount);
+            Rc<DxvkCsChunk>&&   Chunk);
     
     void EmitToCommandList(
             ID3D11CommandList*  pCommandList);
@@ -33,15 +32,10 @@ namespace dxvk {
     void EmitToCsThread(
             DxvkCsThread*       CsThread);
     
-    UINT GetDrawCount() const {
-      return m_drawCount;
-    }
-    
   private:
     
     D3D11Device* const m_device;
     UINT         const m_contextFlags;
-    UINT               m_drawCount = 0;
     
     std::vector<Rc<DxvkCsChunk>> m_chunks;
     
diff --git a/src/d3d11/d3d11_context.cpp b/src/d3d11/d3d11_context.cpp
index 62bb9844..15a3ce69 100644
--- a/src/d3d11/d3d11_context.cpp
+++ b/src/d3d11/d3d11_context.cpp
@@ -1117,8 +1117,6 @@ namespace dxvk {
         VertexCount, 1,
         StartVertexLocation, 0);
     });
-    
-    m_drawCount += 1;
   }
   
   
@@ -1132,8 +1130,6 @@ namespace dxvk {
         StartIndexLocation,
         BaseVertexLocation, 0);
     });
-    
-    m_drawCount += 1;
   }
   
   
@@ -1149,8 +1145,6 @@ namespace dxvk {
         StartVertexLocation,
         StartInstanceLocation);
     });
-    
-    m_drawCount += 1;
   }
   
   
@@ -1168,8 +1162,6 @@ namespace dxvk {
         BaseVertexLocation,
         StartInstanceLocation);
     });
-    
-    m_drawCount += 1;
   }
   
   
@@ -1183,8 +1175,6 @@ namespace dxvk {
       ctx->drawIndexedIndirect(
         bufferSlice, 1, 0);
     });
-    
-    m_drawCount += 1;
   }
   
   
@@ -1197,8 +1187,6 @@ namespace dxvk {
     (DxvkContext* ctx) {
       ctx->drawIndirect(bufferSlice, 1, 0);
     });
-    
-    m_drawCount += 1;
   }
   
   
@@ -1212,8 +1200,6 @@ namespace dxvk {
         ThreadGroupCountY,
         ThreadGroupCountZ);
     });
-    
-    m_drawCount += 1;
   }
   
   
@@ -1226,8 +1212,6 @@ namespace dxvk {
     (DxvkContext* ctx) {
       ctx->dispatchIndirect(bufferSlice);
     });
-    
-    m_drawCount += 1;
   }
   
   
diff --git a/src/d3d11/d3d11_context.h b/src/d3d11/d3d11_context.h
index 292084ad..be00f9a3 100644
--- a/src/d3d11/d3d11_context.h
+++ b/src/d3d11/d3d11_context.h
@@ -654,7 +654,6 @@ namespace dxvk {
     Com<D3D11RasterizerState>   m_defaultRasterizerState;
     
     D3D11ContextState           m_state;
-    UINT                        m_drawCount = 0;
     
     void ApplyInputLayout();
     
diff --git a/src/d3d11/d3d11_context_def.cpp b/src/d3d11/d3d11_context_def.cpp
index 5acb5db2..817afbcb 100644
--- a/src/d3d11/d3d11_context_def.cpp
+++ b/src/d3d11/d3d11_context_def.cpp
@@ -290,8 +290,7 @@ namespace dxvk {
   
   
   void D3D11DeferredContext::EmitCsChunk(Rc<DxvkCsChunk>&& chunk) {
-    m_commandList->AddChunk(std::move(chunk), m_drawCount);
-    m_drawCount = 0;
+    m_commandList->AddChunk(std::move(chunk));
   }
 
 }
\ No newline at end of file
diff --git a/src/d3d11/d3d11_context_imm.cpp b/src/d3d11/d3d11_context_imm.cpp
index 7deeb081..8e0b49a2 100644
--- a/src/d3d11/d3d11_context_imm.cpp
+++ b/src/d3d11/d3d11_context_imm.cpp
@@ -63,8 +63,8 @@ namespace dxvk {
       FlushCsChunk();
       
       // Reset optimization info
-      m_drawCount = 0;
       m_csIsBusy  = false;
+      m_lastFlush = std::chrono::high_resolution_clock::now();
     }
   }
   
@@ -80,8 +80,7 @@ namespace dxvk {
     
     // As an optimization, flush everything if the
     // number of pending draw calls is high enough.
-    if (m_drawCount >= MaxPendingDraws)
-      Flush();
+    FlushImplicit();
     
     // Dispatch command list to the CS thread and
     // restore the immediate context's state
@@ -95,7 +94,6 @@ namespace dxvk {
     // Mark CS thread as busy so that subsequent
     // flush operations get executed correctly.
     m_csIsBusy = true;
-    m_drawCount += commandList->GetDrawCount();
   }
   
   
@@ -202,8 +200,7 @@ namespace dxvk {
     // prior to the previous context flush is above a certain threshold,
     // submit the current command buffer in order to keep the GPU busy.
     // This also helps keep the command buffers at a reasonable size.
-    if (m_drawCount >= MaxPendingDraws)
-      Flush();
+    FlushImplicit();
     
     D3D11DeviceContext::OMSetRenderTargets(
       NumViews, ppRenderTargetViews, pDepthStencilView);
@@ -386,5 +383,18 @@ namespace dxvk {
     m_csThread.dispatchChunk(std::move(chunk));
     m_csIsBusy = true;
   }
+
+
+  void D3D11ImmediateContext::FlushImplicit() {
+    // Flush only if the GPU is about to go idle, in
+    // order to keep the number of submissions low.
+    if (m_device->pendingSubmissions() <= MaxPendingSubmits) {
+      auto now = std::chrono::high_resolution_clock::now();
+
+      // Prevent flushing too often in short intervals.
+      if (now - m_lastFlush >= std::chrono::microseconds(MinFlushIntervalUs))
+        Flush();
+    }
+  }
   
 }
\ No newline at end of file
diff --git a/src/d3d11/d3d11_context_imm.h b/src/d3d11/d3d11_context_imm.h
index 6de73f22..5c72796c 100644
--- a/src/d3d11/d3d11_context_imm.h
+++ b/src/d3d11/d3d11_context_imm.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#include <chrono>
+
 #include "d3d11_context.h"
 
 namespace dxvk {
@@ -8,7 +10,8 @@ namespace dxvk {
   class D3D11CommonTexture;
   
   class D3D11ImmediateContext : public D3D11DeviceContext {
-    constexpr static UINT MaxPendingDraws = 500;
+    constexpr static uint32_t MinFlushIntervalUs = 2500;
+    constexpr static uint32_t MaxPendingSubmits  = 2;
   public:
     
     D3D11ImmediateContext(
@@ -56,6 +59,9 @@ namespace dxvk {
     
     DxvkCsThread m_csThread;
     bool         m_csIsBusy = false;
+
+    std::chrono::high_resolution_clock::time_point m_lastFlush
+      = std::chrono::high_resolution_clock::now();
     
     HRESULT MapBuffer(
             D3D11Buffer*                pResource,
@@ -81,6 +87,8 @@ namespace dxvk {
             UINT                              MapFlags);
     
     void EmitCsChunk(Rc<DxvkCsChunk>&& chunk) final;
+
+    void FlushImplicit();
     
   };