#pragma once

#include <atomic>
#include <condition_variable>
#include <mutex>
#include <queue>

#include "../util/thread.h"
#include "dxvk_context.h"

namespace dxvk {
   * \brief Command stream operation
   * An abstract representation of an operation
   * that can be recorded into a command list.
  class DxvkCsCmd {
    virtual ~DxvkCsCmd() { }
     * \brief Retrieves next command in a command chain
     * This can be used to quickly iterate
     * over commands within a chunk.
     * \returns Pointer the next command
    DxvkCsCmd* next() const {
      return m_next;
     * \brief Sets next command in a command chain
     * \param [in] next Next command
    void setNext(DxvkCsCmd* next) {
      m_next = next;
     * \brief Executes embedded commands
     * \param [in] ctx The target context
    virtual void exec(DxvkContext* ctx) const = 0;
    DxvkCsCmd* m_next = nullptr;
   * \brief Typed command
   * Stores a function object which is
   * used to execute an embedded command.
  template<typename T>
  class alignas(16) DxvkCsTypedCmd : public DxvkCsCmd {
    DxvkCsTypedCmd(T&& cmd)
    : m_command(std::move(cmd)) { }
    DxvkCsTypedCmd             (DxvkCsTypedCmd&&) = delete;
    DxvkCsTypedCmd& operator = (DxvkCsTypedCmd&&) = delete;
    void exec(DxvkContext* ctx) const {
    T m_command;
   * \brief Submission flags
  enum class DxvkCsChunkFlag : uint32_t {
    /// Indicates that the submitted chunk will
    /// no longer be needed after one submission.

  using DxvkCsChunkFlags = Flags<DxvkCsChunkFlag>;
   * \brief Command chunk
   * Stores a list of commands.
  class DxvkCsChunk : public RcObject {
    constexpr static size_t MaxBlockSize = 16384;
     * \brief Number of commands recorded to the chunk
     * Can be used to check whether the chunk needs to
     * be dispatched or just to keep track of statistics.
    size_t commandCount() const {
      return m_commandCount;

     * \brief Tries to add a command to the chunk
     * If the given command can be added to the chunk, it
     * will be consumed. Otherwise, a new chunk must be
     * created which is large enough to hold the command.
     * \param [in] command The command to add
     * \returns \c true on success, \c false if
     *          a new chunk needs to be allocated
    template<typename T>
    bool push(T& command) {
      using FuncType = DxvkCsTypedCmd<T>;
      if (m_commandOffset + sizeof(FuncType) > MaxBlockSize)
        return false;
      DxvkCsCmd* tail = m_tail;
      m_tail = new (m_data + m_commandOffset)
      if (tail != nullptr)
        m_head = m_tail;
      m_commandCount  += 1;
      m_commandOffset += sizeof(FuncType);
      return true;
     * \brief Initializes chunk for recording
     * \param [in] flags Chunk flags
    void init(DxvkCsChunkFlags flags);
     * \brief Executes all commands
     * This will also reset the chunk
     * so that it can be reused.
     * \param [in] ctx The context
    void executeAll(DxvkContext* ctx);
     * \brief Resets chunk
     * Destroys all recorded commands and
     * marks the chunk itself as empty, so
     * that it can be reused later.
    void reset();
    size_t m_commandCount  = 0;
    size_t m_commandOffset = 0;
    DxvkCsCmd* m_head = nullptr;
    DxvkCsCmd* m_tail = nullptr;

    DxvkCsChunkFlags m_flags;
    char m_data[MaxBlockSize];
   * \brief Chunk pool
   * Implements a pool of CS chunks which can be
   * recycled. The goal is to reduce the number
   * of dynamic memory allocations.
  class DxvkCsChunkPool {
    DxvkCsChunkPool             (const DxvkCsChunkPool&) = delete;
    DxvkCsChunkPool& operator = (const DxvkCsChunkPool&) = delete;
     * \brief Allocates a chunk
     * Takes an existing chunk from the pool,
     * or creates a new one if necessary.
     * \param [in] flags Chunk flags
     * \returns Allocated chunk object
    DxvkCsChunk* allocChunk(DxvkCsChunkFlags flags);
     * \brief Releases a chunk
     * Resets the chunk and adds it to the pool.
     * \param [in] chunk Chunk to release
    void freeChunk(DxvkCsChunk* chunk);
    sync::Spinlock            m_mutex;
    std::vector<DxvkCsChunk*> m_chunks;
   * \brief Chunk reference
   * Implements basic reference counting for
   * CS chunks and returns them to the pool
   * as soon as they are no longer needed.
  class DxvkCsChunkRef {
    DxvkCsChunkRef() { }
      DxvkCsChunk*      chunk,
      DxvkCsChunkPool*  pool)
    : m_chunk (chunk),
      m_pool  (pool) {
    DxvkCsChunkRef(const DxvkCsChunkRef& other)
    : m_chunk (other.m_chunk),
      m_pool  (other.m_pool) {
    DxvkCsChunkRef(DxvkCsChunkRef&& other)
    : m_chunk (other.m_chunk),
      m_pool  (other.m_pool) {
      other.m_chunk = nullptr;
      other.m_pool  = nullptr;
    DxvkCsChunkRef& operator = (const DxvkCsChunkRef& other) {
      this->m_chunk = other.m_chunk;
      this->m_pool  = other.m_pool;
      return *this;
    DxvkCsChunkRef& operator = (DxvkCsChunkRef&& other) {
      this->m_chunk = other.m_chunk;
      this->m_pool  = other.m_pool;
      other.m_chunk = nullptr;
      other.m_pool  = nullptr;
      return *this;
    ~DxvkCsChunkRef() {
    DxvkCsChunk* operator -> () const {
      return m_chunk;
    operator bool () const {
      return m_chunk != nullptr;
    DxvkCsChunk*      m_chunk = nullptr;
    DxvkCsChunkPool*  m_pool  = nullptr;
    void incRef() const {
      if (m_chunk != nullptr)
    void decRef() const {
      if (m_chunk != nullptr && m_chunk->decRef() == 0)

   * \brief Command stream thread
   * Spawns a thread that will execute
   * commands on a DXVK context. 
  class DxvkCsThread {
    DxvkCsThread(const Rc<DxvkContext>& context);
     * \brief Dispatches an entire chunk
     * Can be used to efficiently play back large
     * command lists recorded on another thread.
     * \param [in] chunk The chunk to dispatch
    void dispatchChunk(DxvkCsChunkRef&& chunk);
     * \brief Synchronizes with the thread
     * This waits for all chunks in the dispatch
     * queue to be processed by the thread. Note
     * that this does \e not implicitly call
     * \ref flush.
    void synchronize();
    const Rc<DxvkContext>       m_context;
    std::atomic<bool>           m_stopped = { false };
    std::mutex                  m_mutex;
    std::condition_variable     m_condOnAdd;
    std::condition_variable     m_condOnSync;
    std::queue<DxvkCsChunkRef>  m_chunksQueued;
    dxvk::thread                m_thread;
    uint32_t                    m_chunksPending = 0;
    void threadFunc();