﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

/**
 * @examplesource{RingBufferManagers.h,PageSampleNvnTutorialLibrary}
 *
 * @brief
 *  This files defines classes used to manage
 *  the memory of command buffers such that as
 *  few syncs are needed as possible.
 */

#pragma once

#include <nvn/nvn.h>
#include <nvn/nvn_Cpp.h>
#include <nvn/nvn_FuncPtrInline.h>
#include <list>
#include <nn/nn_Assert.h>

/*
 * RingBufferManager
 * -----------------
 * This class manages a ring buffer of <size> entries.  If <T> is an integer
 * type, the entries are numbered <start> through <start>+<size>-1.  If <T> is
 * a pointer to type X, type entries are maintained as pointers, with the
 * first entry at <start> and the last at <start>+(<size>-1)*sizeof(X).
 *
 * We maintain read and write pointers in m_read and m_write.  When m_read ==
 * m_write, the ring buffer is empty.  Both pointers wrap at the end of the
 * ring buffer back to the beginning.
 *
 * To write to the ring buffer, applications request one or multiple entries
 * of contiguous memory via getWriteSpace() and update the write pointer via
 * syncWrite().  If a request for more than one entry would cause us to run
 * off the end of the buffer, we leave blank space at the end and wrap back to
 * the beginning.
 *
 * Reads from the ring buffer are assumed to happen externally, and the read
 * pointer is updated via the setRead() method.
 *
 * This class doesn't provide the storage of the ring buffer, it only manages
 * storage provided from other sources.
 */
template <typename T_ElementType, typename T_SizeType> class RingBufferManager
{
    private:
        enum WriteSpaceRequest
        {
            WriteSpaceRequest_GetExactSize,   /* Allocate only the requested size. */
            WriteSpaceRequest_GetMaximumSize  /* Allocate the maximum available contiguous size. */
        };

    public:
        RingBufferManager(T_SizeType size, T_ElementType start) :
            m_Size(size),
            m_Start(start),
            m_Read(start),
            m_Write(start),
            m_End(start + size),
            m_Writing(false),
            m_WriteLimit(start)
        {}

        T_SizeType size() const
        {
            return m_Size;
        }

        T_ElementType getStart() const
        {
            return m_Start;
        }

        T_ElementType getRead() const
        {
            return m_Read;
        }

        T_ElementType getWrite() const
        {
            return m_Write;
        }

        T_ElementType getEnd() const
        {
            return m_End;
        }

        bool isEmpty() const
        {
            return m_Write == m_Read;
        }

        bool isFull() const
        {
            return getAvailableSpace() == 0;
        }

        void setRead(T_ElementType read)
        {
            m_Read = read;
        }

            /*
             * Check for available space in the ring buffer.  Note that we don't allow
             * the ring buffer to get completely full -- m_read == m_write implies an
             * empty buffer, not a full one.  If m_read is ahead of m_write, the free
             * space in the ring buffer is between the two pointers:
             *
             *     m_write    m_read
             *    -----+#########+-----
             *
             * Leaving one entry free, that gives us:
             *
             *     (m_read - m_write) - 1
             *
             * entries available.  If m_read is behind (or equal to) m_write, the free
             * space in the ring buffer is everything not between the two pointers:
             *
             *      m_read    m_write
             *    #####+---------+######
             *
             * That has(m_write - m_read) entries occupied, which leaves:
             *
             *     (m_size - 1) - (m_write - m_read)
             *   = (m_size - 1) + (m_read - m_write)
             *   = m_size + ((m_read - m_write) - 1)
             *
             * entries available.
             */
        inline T_SizeType getAvailableSpace() const
        {
            T_SizeType space = (m_Read - m_Write) - 1;

            if (space < 0)
            {
                space += m_Size;
            }

            return space;
        }

            /*
             * Reserve <space> contiguous entries in the ring buffer.  If
             * <requestType> is GetExactSize, that exact number of entries are
             * reserved.  If <requestType> is GetMaximum size, the maximum size
             * allocation starting from the write pointer is reserved.
             *
             * The number of entries allocated is returned, or 0 if no space is
             * available.  The write pointer is returned in <current>.
             */
        inline T_SizeType getWriteSpace(T_ElementType &current, T_SizeType space = 1, WriteSpaceRequest requestType = WriteSpaceRequest_GetExactSize)
        {
            NN_ASSERT(space < m_Size);
            NN_ASSERT(!m_Writing);

                /*
                 * If we're near the end of the buffer and don't have enough
                 * contiguous space, try to reserve padding to the end of the buffer
                 * and wrap around to the start.
                 */
            if (m_Write + static_cast<T_ElementType>(space) > m_End)
            {
                if (!getWriteSpace(current, m_End - m_Write))
                {
                    return 0;
                }
                m_Writing = false;
                m_Write = m_Start;
            }

            NN_ASSERT(m_Write + static_cast<T_ElementType>(space) <= m_End);
            if (space > getAvailableSpace())
            {
                return 0;
            }

                /*
                 * If this is a request for the maximum size, compute the amount of
                 * available contiguous and update the size request accordingly.
                 */
            if (requestType == WriteSpaceRequest_GetMaximumSize)
            {
                T_SizeType maxSpace;
                if (m_Read > m_Write)
                {
                        /*
                         * Compute the space between the write and read pointers,
                         * leaving the last one blank.
                         */
                    maxSpace = (m_Read - m_Write) - 1;
                }
                else
                {
                        /*
                         * Compute the space between the write pointer and the end of
                         * the buffer, leaving the last entry unused if the read
                         * pointer is at the start.
                         */
                    maxSpace = m_End - m_Write;
                    if (m_Read == m_Start)
                    {
                        maxSpace--;
                    }
                }
                NN_ASSERT(maxSpace >= space);
                space = maxSpace;
            }

            m_Writing = true;
            m_WriteLimit = m_Write + space;
            current = m_Write;
            return space;
        }

            /*
             * Request the maximum number of contiguous entries available, as long as
             * it's at least <minSpace>.
             */
        inline T_SizeType getWriteSpaceMax(T_ElementType &current, T_SizeType minSpace)
        {
            return getWriteSpace(current, minSpace, WriteSpaceRequest_GetMaximumSize);
        }

            /*
             * Update the write pointer to <current> after writing in the reserved
             * write space.
             */
        inline void syncWrite(T_ElementType current)
        {
            NN_ASSERT(m_Writing);
            NN_ASSERT(current <= m_WriteLimit);
            m_Write = wrapEntry(current);
            m_Writing = false;
            m_WriteLimit = m_Write;
        }

            /*
             * Apply wrapping (if needed) to a ring buffer entry pointer, wrapping
             * back to the beginning when it has run off the end.
             */
        inline T_ElementType wrapEntry(T_ElementType entry)
        {
            if (entry >= m_End)
            {
                entry -= m_Size;
            }
            NN_ASSERT(entry >= m_Start);
            NN_ASSERT(entry < m_End);
            return entry;
        }

    private:
        T_SizeType      m_Size;             /* Number of entries in the ring buffer. */

        T_ElementType   m_Start;            /* First entry in the ring buffer. */
        T_ElementType   m_Read;             /* Read pointer (next entry to be consumed). */
        T_ElementType   m_Write;            /* Write pointer (next entry to be added). */
        T_ElementType   m_End;              /* Limit of the ring buffer (one past the last entry). */

        bool            m_Writing;          /* Have we reserved space via getWriteSpace? */
        T_ElementType   m_WriteLimit;       /* Limit of the reserved write space. */
};

/*
 * CompletionTracker
 * -----------------
 * Uses a ring buffer of <size> NVNsync objects to track the completion of
 * commands sent to queues.
 *
 * The completion tracker manages a list of tracked allocators and sends
 * notifications to each allocator when a new fence is inserted or when an old
 * sync object has been waited on successfully.  Both notifications include a
 * <fenceid> value indicating the location of the sync object in the ring
 * buffer.
 */
class CompletionTracker
{
    private:
        typedef class TrackedAllocator *Allocator;
        typedef std::list<Allocator> AllocatorList;

    public:
        CompletionTracker(NVNdevice *pDevice, int size) : m_Ring(size, 0),
            m_Allocators(),
            m_pObjects(NULL)
        {
            m_pObjects = new NVNsync[size];

            for (int i = 0; i < size; i++)
            {
                nvnSyncInitialize(&m_pObjects[i], pDevice);
            }
        };

        ~CompletionTracker()
        {
            m_Allocators.clear();

            for (int i = 0; i < m_Ring.size(); i++)
            {
                nvnSyncFinalize(&m_pObjects[i]);
            }

            delete[] m_pObjects;
        }

        int size() const
        {
            return m_Ring.size();
        }

        bool isEmpty() const
        {
            return m_Ring.isEmpty();
        }

            /* Register and unregister tracked allocators. */
        bool addAllocator(Allocator allocator);
        bool removeAllocator(Allocator allocator);

            /*
             * Send notifications to tracked allocators when a new sync object is
             * inserted (FenceSync) or removed (SyncWait) from the queue.
             */
        void notifyFenceInserted(int fenceid);
        void notifySyncCompleted(int fenceid);

            /*
             * Insert a sync object into the ring buffer (at the write pointer) and
             * notify tracked allocators.
             */
        void insertFence(NVNqueue *queue);

            /*
             * Check the completion of one or more sync objects (starting at the read
             * pointer).  If <wait> is true, wait for at least one sync object to
             * complete.  Returns true if and only if any sync object was detected to
             * be completed.
             */
        bool updateGet(bool wait = false);

    private:
        RingBufferManager<int, int>     m_Ring;
        AllocatorList                   m_Allocators;
        NVNsync*                        m_pObjects;
};


/*
 * TrackedAllocator
 * ----------------
 * Abstract base class used to track and free allocations once dependent NVN
 * commands have completed execution.
 */
class TrackedAllocator
{
    public:
        explicit TrackedAllocator(CompletionTracker *pTracker = NULL) : m_pTracker(pTracker)
        {
        }

        virtual ~TrackedAllocator()
        {
        }

        void setTracker(CompletionTracker* pTracker)
        {
            m_pTracker = pTracker;
        }

        CompletionTracker *getTracker() const
        {
            return m_pTracker;
        }

        virtual void notifyFenceInserted(int fenceId) = 0;
        virtual void notifySyncCompleted(int fenceId) = 0;

    private:
        CompletionTracker   *m_pTracker;
};


/*
 * TrackedRingBuffer
 * -----------------
 * Utility class that manages a ring buffer of transient memory allocations
 * where all allocations performed before a fence notification are assumed to
 * be completed when the corresponding sync object has landed.
 *
 * This class maintains an array of fences (m_fences) that records the current
 * write pointer each time a fence is inserted.  The read pointer is updated
 * to the fence when the corresponding sync object has landed.
 */
template <typename T_ElementType, typename T_SizeType>
class TrackedRingBuffer : public RingBufferManager<T_ElementType, T_SizeType>, public TrackedAllocator
{
    public:
        TrackedRingBuffer(CompletionTracker* pTracker, T_ElementType start, T_SizeType size, size_t alignment) :
            RingBufferManager<T_ElementType, T_SizeType>(size, start),
            TrackedAllocator(pTracker),
            m_AlignmentMask(~(alignment - 1))
        {
            m_pFences = new T_ElementType[size];

            for (T_SizeType i = 0; i < size; i++)
            {
                m_pFences[i] = start;
            }

            pTracker->addAllocator(this);
        }

        virtual ~TrackedRingBuffer()
        {
            CompletionTracker *tracker = getTracker();
            tracker->removeAllocator(this);
            delete[] m_pFences;
        }

        void setAlignment(T_SizeType alignment) { m_AlignmentMask = ~(alignment - 1); }

            /*
             * Record the current write pointer as a fence when a sync object is
             * inserted.
             */
        void setFence(int fenceId, T_ElementType fence)
        {
            m_pFences[fenceId] = fence;
        }

            /*
             * Update fences when a sync object is inserted.  This function is virtual
             * so that derived command buffer memory classes can override; we don't
             * continuously track the write pointer on the client side.
             */
        virtual void notifyFenceInserted(int fenceId)
        {
            setFence(fenceId, RingBufferManager<T_ElementType, T_SizeType>::getWrite());
        }

            /*
             * Update the read pointer from a previously stored fence when a sync
             * object has completed.
             */
        void notifySyncCompleted(int fenceId)
        {
            RingBufferManager<T_ElementType, T_SizeType>::setRead(m_pFences[fenceId]);
        }

            /*
             * Request <minSpace> bytes of write space in the ring buffer.  The
             * resulting amount of space allocated is clamped to <maxSpace> if
             * specified.  Returns the number of bytes allocated and stores the write
             * pointer in <current>..
             */
        T_SizeType getWriteSpace(T_ElementType &current, T_SizeType minSpace, T_SizeType maxSpace)
        {
            CompletionTracker* pTracker = getTracker();
            T_SizeType reservedSize = RingBufferManager<T_ElementType, T_SizeType>::getWriteSpaceMax(current, minSpace);
            bool forceWait = false;

            while (reservedSize == 0)
            {
                NN_ASSERT(!pTracker->isEmpty());
                pTracker->updateGet(forceWait);
                reservedSize = RingBufferManager<T_ElementType, T_SizeType>::getWriteSpaceMax(current, minSpace);
                forceWait = true;
            }

            if (reservedSize > maxSpace)
            {
                reservedSize = maxSpace;
            }

            reservedSize &= m_AlignmentMask;
            return reservedSize;
        }

    private:
        T_ElementType   *m_pFences;
        T_SizeType      m_AlignmentMask;   /* Alignment required for each allocation. */
};

/*
 * TrackedChunkRingBuffer
 * ----------------------
 * Utility class derived from TrackedRingBuffer that doles out memory in
 * chunks with fixed minimum and maximum sizes.  The amount of size returned
 * is variable if the minimum and maximum chunk sizes don't match.
 */
template <typename T_ElementType, typename T_SizeType>
class TrackedChunkRingBuffer : public TrackedRingBuffer <T_ElementType, T_SizeType>
{
    public:
        TrackedChunkRingBuffer(
            CompletionTracker *tracker, T_ElementType start, int size,
            T_SizeType minChunkSize, T_SizeType maxChunkSize, T_SizeType alignment)
            :
            TrackedRingBuffer<T_ElementType, T_SizeType>(tracker, start, size, alignment),
            m_minChunkSize(minChunkSize),
            m_maxChunkSize(maxChunkSize)
        {
        }

        void setMaxChunkSize(size_t size)
        {
            m_maxChunkSize = size;
        }

        void setMinChunkSize(size_t size)
        {
            m_minChunkSize = size;
        }

        T_SizeType getWriteSpace(T_ElementType &current)
        {
            T_SizeType space = TrackedRingBuffer<T_ElementType, T_SizeType>::getWriteSpace(current, m_minChunkSize, m_maxChunkSize);
            return space;
        }

    private:
        T_SizeType m_minChunkSize;         /* Minimum chunk size required for an allocation. */
        T_SizeType m_maxChunkSize;         /* Maximum chunk size allowed for an allocation. */
};


/*
 * TrackedCommandMemRingBuffer
 * ---------------------------
 * Utility class derived from TrackedRingBuffer that plugs ring buffer memory
 * into the command memory of the specified command buffer.
 */
class TrackedCommandMemRingBuffer : public TrackedChunkRingBuffer <ptrdiff_t, size_t>
{
    public:
        TrackedCommandMemRingBuffer(
            NVNcommandBuffer *cmdBuf, NVNmemoryPool *pool, CompletionTracker *tracker, int size,
            ptrdiff_t start, size_t minChunkSize, size_t maxChunkSize, size_t alignment)
            :
            TrackedChunkRingBuffer<ptrdiff_t, size_t>(tracker, start, size, minChunkSize, maxChunkSize, alignment),
            m_cmdBuf(cmdBuf),
            m_pool(pool),
            m_lastChunk(start)
        {
        }

            /*
             * Allocate a new chunk of memory from the ring buffer and plug it into
             * the command buffer.
             */
        bool setupNewChunk()
        {
            size_t reservedSize = getWriteSpace(m_lastChunk);
            NN_ASSERT(reservedSize);
            nvnCommandBufferAddCommandMemory(m_cmdBuf, m_pool, m_lastChunk, reservedSize);
            return true;
        }

            /*
             * Update fences when a sync object is inserted.  We need to query the
             * write pointer from the command buffer since we're not tracking
             * continuously.
             */
        void notifyFenceInserted(int fenceid)
        {
            size_t used = nvnCommandBufferGetCommandMemoryUsed(m_cmdBuf);
            setFence(fenceid, m_lastChunk + used);
        }

            /*
             * Handle an out-of-memory notification by grabbing and inserting a new
             * chunk of memory.
             */
        void notifyOutOfMemory()
        {
            size_t used = nvnCommandBufferGetCommandMemoryUsed(m_cmdBuf);
            syncWrite(m_lastChunk + used);
            setupNewChunk();
        }

    private:
        NVNcommandBuffer *m_cmdBuf;         /* Command buffer owning the ring buffer. */
        NVNmemoryPool *m_pool;              /* Memory pool providing storage. */
        ptrdiff_t m_lastChunk;              /* Offset of last chunk given to m_cmdBuf. */
};

/*
 *  TrackedCommandMemRingBuffer
 *
 *  Utility class derived from TrackedRingBuffer that plugs ring buffer memory
 *  into the control memory of the specified command buffer.
 */
class TrackedControlMemRingBuffer : public TrackedChunkRingBuffer <ptrdiff_t, size_t>
{
    public:
        TrackedControlMemRingBuffer(
            NVNcommandBuffer *cmdBuf, CompletionTracker *tracker, int size, char *start,
            size_t minChunkSize, size_t maxChunkSize, size_t alignment)
            :
            TrackedChunkRingBuffer<ptrdiff_t, size_t>(tracker, 0, size, minChunkSize, maxChunkSize, alignment),
            m_cmdBuf(cmdBuf),
            m_lastChunk(0),
            m_dataStart(start)
        {
        }

            /*
             * Allocate a new chunk of memory from the ring buffer and plug it into
             * the command buffer.
             */
        bool setupNewChunk()
        {
            size_t reservedSize = getWriteSpace(m_lastChunk);
            NN_ASSERT(reservedSize);
            nvnCommandBufferAddControlMemory(m_cmdBuf, m_lastChunk + m_dataStart, reservedSize);
            return true;
        }

            /*
             * Update fences when a sync object is inserted.  We need to query the
             * write pointer from the command buffer since we're not tracking
             * continuously.
             */
        void notifyFenceInserted(int fenceid)
        {
            size_t used = nvnCommandBufferGetControlMemoryUsed(m_cmdBuf);
            setFence(fenceid, m_lastChunk + used);
        }

            /*
             * Handle an out-of-memory notification by grabbing and inserting a new
             * chunk of memory.
             */
        void notifyOutOfMemory()
        {
            size_t used = nvnCommandBufferGetControlMemoryUsed(m_cmdBuf);
            syncWrite(m_lastChunk + used);
            setupNewChunk();
        }

    private:
        NVNcommandBuffer *m_cmdBuf;         /* Command buffer owning the ring buffer. */
        ptrdiff_t m_lastChunk;              /* Pointer to last chunk given to m_cmdBuf. */
        char* m_dataStart;                  /* Pointer to start of backing memory */
};

inline bool CompletionTracker::addAllocator(Allocator allocator)
{
    AllocatorList::iterator it;
    for (it = m_Allocators.begin(); it != m_Allocators.end(); it++)
    {
        if (*it == allocator)
        {
            return false;
        }
    }
    m_Allocators.push_back(allocator);
    return true;
}

inline bool CompletionTracker::removeAllocator(Allocator allocator)
{
    AllocatorList::iterator it;
    for (it = m_Allocators.begin(); it != m_Allocators.end(); it++)
    {
        if (*it == allocator)
        {
            m_Allocators.erase(it);
            return true;
        }
    }
    return false;
}

inline void CompletionTracker::notifyFenceInserted(int fenceid)
{
    AllocatorList::iterator it;
    for (it = m_Allocators.begin(); it != m_Allocators.end(); it++)
    {
        Allocator allocator = *it;
        allocator->notifyFenceInserted(fenceid);
    }
}

inline void CompletionTracker::notifySyncCompleted(int fenceid)
{
    AllocatorList::iterator it;

    for (it = m_Allocators.begin(); it != m_Allocators.end(); it++)
    {
        Allocator allocator = *it;
        allocator->notifySyncCompleted(fenceid);
    }
}

inline void CompletionTracker::insertFence(NVNqueue *queue)
{
        /*
         * Before writing a new fence, wait on a previous fence if the ring buffer
         * is full.
         */
    if (m_Ring.isFull())
    {
        updateGet(true);
    }

    int put;
    size_t reserved = m_Ring.getWriteSpace(put);
    NN_ASSERT(reserved);

    nvnQueueFenceSync(queue, &m_pObjects[put], NVN_SYNC_CONDITION_ALL_GPU_COMMANDS_COMPLETE,
        NVN_SYNC_FLAG_FLUSH_FOR_CPU_BIT);
    nvnQueueFlush(queue);
    notifyFenceInserted(put);

    put++;
    m_Ring.syncWrite(put);
}

inline bool CompletionTracker::updateGet(bool wait /*= false*/)
{
    bool updated = false;
    uint64_t timeout = wait ? NVN_WAIT_TIMEOUT_MAXIMUM : NVN_WAIT_TIMEOUT_NONE;

    while (!m_Ring.isEmpty())
    {
        int get = m_Ring.getRead();
        NVNsyncWaitResult condition = nvnSyncWait(&m_pObjects[get], timeout);

        if (condition == NVN_SYNC_WAIT_RESULT_TIMEOUT_EXPIRED)
        {
            break;
        }

        notifySyncCompleted(get);
        get = m_Ring.wrapEntry(get + 1);
        m_Ring.setRead(get);
        updated = true;
        timeout = NVN_WAIT_TIMEOUT_NONE;
    }
    return updated;
}

inline CompletionTracker* initCompletionTracker(NVNdevice *pDevice, int size)
{
    CompletionTracker* pTracker = new CompletionTracker(pDevice, size);
    return pTracker;
}

inline void insertCompletionTrackerFence(CompletionTracker *tracker, NVNqueue *queue)
{
    tracker->insertFence(queue);
}

struct CommandBufferMemoryManager
{
    TrackedCommandMemRingBuffer* m_commandMemoryManager;
    TrackedControlMemRingBuffer* m_controlMemoryManager;
};

inline void NVNAPIENTRY outOfMemory(NVNcommandBuffer *cmdBuf, NVNcommandBufferMemoryEvent event, size_t minSize, void *callbackData)
{
    NN_ASSERT(cmdBuf);
    NN_ASSERT(minSize);

        /*
         * When using the functions that query command buffers for memory
         * usage (nvnCommandBufferGetCommandMemoryUsed, etc.), the value
         * returned represents the amount of memory the command buffer is
         * currently managing, not the cumulative amount it has been given.
         *
         * For example:
         * If the command buffer is initialized with a 256 kb sized buffer
         * and 200 kb has been used so far:
         *  nvnCommandBufferGetCommandMemoryUsed:   200 kb
         *  Actual Memory Used:                     200 kb
         *  Total Memory Given:                     256 kb
         *
         * If another 60 kb is used before the next query for memory used,
         * this out of memory callback will be hit and provide more memory
         * to the command buffer; in this example an extra 256 kb will be
         * provided.  Calling nvnCommandBufferGetCommandMemoryUsed after
         * the out of memory callback is hit would instead give:
         *  nvnCommandBufferGetCommandMemoryUsed:   4 kb
         *  Actual Memory Used:                     260 kb
         *  Total Memory Given:                     512 kb
         *
         * The function returns 4 kb because, of the new 256 kb chunk that
         * it is currently using, only 4 kb of memory has been used.
         */
    CommandBufferMemoryManager* pManager = (CommandBufferMemoryManager *)callbackData;

    if (event == NVN_COMMAND_BUFFER_MEMORY_EVENT_OUT_OF_COMMAND_MEMORY)
    {
        pManager->m_commandMemoryManager->notifyOutOfMemory();
    }
    else if (event == NVN_COMMAND_BUFFER_MEMORY_EVENT_OUT_OF_CONTROL_MEMORY)
    {
        pManager->m_controlMemoryManager->notifyOutOfMemory();
    }
    else
    {
        NN_ASSERT(!"Unknown command buffer event.");
    }
}
