﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

/**
 * @examplesource{RenderThreadPool.cpp,PageSampleNvnTutorialLibrary}
 *
 * @brief
 *  This file defines a class that handles running
 *  multiple threads in parallel that create command
 *  buffers to render objects from a work queue.
 */

#include <nn/nn_Assert.h>
#include <nvngdSupport/RenderThreadPool.h>
#include <nvngdSupport/GraphicsObject.h>
#include <nvngdSupport/TutorialUtil.h>

static const int g_NumChunks = 2;

static const size_t g_CommandMemoryChunkSize = 1024 * 1024;
static const size_t g_ControlMemoryChunkSize = 16 * 1024;

static const size_t g_ThreadStackSize = 1024 * 1024;
static const size_t g_ThreadStackAlignment = 4096;

void RenderThread(void* pParam);

/*
 * RenderThreadPool Constructor
 * ----------------------------
 * Sets up the mutexes for getting work and writing
 * output. Sets other members to initial values.
 */
RenderThreadPool::RenderThreadPool() :
    m_pWorkQueue(NULL),
    m_pOutput(NULL),
    m_pFrameBufferSyncManager(NULL),
    m_WorkIndex(0),
    m_pThreadStackMemory(NULL),
    m_Running(false)
{
}

/*
 * RenderThreadPool Destructor
 * ---------------------------
 * Cleans up the mutexes and all data created
 * for the threads.
 */
RenderThreadPool::~RenderThreadPool()
{
    nn::os::FinalizeMutex(&m_WorkMutex);
    nn::os::FinalizeMutex(&m_WriteMutex);

    Stop();
    for (size_t i = 0; i < m_ThreadStateMutex.size(); ++i)
    {
        nn::os::FinalizeMutex(&m_ThreadStateMutex[i]);
    }

    Clean();
}

/*
 * RenderThreadPool::Clean
 * -----------------------
 * Cleans up the per thread command buffer data
 * and thread stack memory.
 */
void RenderThreadPool::Clean()
{
    if(m_CommandBuffers.size() > 0)
    {
        for(size_t i = 0; i < m_CommandBuffers.size(); ++i)
        {
            delete m_CommandBuffers[i];
        }

        m_CommandBuffers.clear();
    }

    if(m_pThreadStackMemory != NULL)
    {
        AlignedDeallocate(m_pThreadStackMemory);
        m_pThreadStackMemory = NULL;
    }
}

/*
 * RenderThreadPool::Init
 * ----------------------
 * Sets up the per thread command buffer data and
 * allocates memory for the per thread stacks.
 */
void RenderThreadPool::Init(int numThreads, NVNdevice* pDevice, NVNqueue* pQueue)
{
    Clean();

    m_CommandBuffers.reserve(numThreads);

        /* Grabs the command and control memory alignment. */
    int commandBufferCommandAlignment = 0;
    int commandBufferControlAlignment = 0;
    nvnDeviceGetInteger(pDevice, NVN_DEVICE_INFO_COMMAND_BUFFER_COMMAND_ALIGNMENT, &commandBufferCommandAlignment);
    nvnDeviceGetInteger(pDevice, NVN_DEVICE_INFO_COMMAND_BUFFER_CONTROL_ALIGNMENT, &commandBufferControlAlignment);

        /* Create sync manager to protect command memory */
    m_pFrameBufferSyncManager = new FrameBufferedSyncManager(pDevice, pQueue, g_NumChunks);

        /*
         * Creates a command buffer per worker thread. Each
         * command buffer will be used to generate multiple
         * sets of commands to draw individual objects from
         * the work queue.
         */
    for(int i = 0; i < numThreads; ++i)
    {
        ManagedCommandBuffer* managedCommandBuffer = new ManagedCommandBuffer(pDevice,
                                                                              g_CommandMemoryChunkSize,
                                                                              g_ControlMemoryChunkSize,
                                                                              g_NumChunks);
        m_CommandBuffers.push_back(managedCommandBuffer);

        m_pFrameBufferSyncManager->RegisterMemoryManager(managedCommandBuffer);
    }

        /* Allocate memory for the per thread stacks. */
    m_ThreadHandles.resize(numThreads);
    if(m_pThreadStackMemory == NULL)
    {
        m_pThreadStackMemory = reinterpret_cast<char*>(AlignedAllocate(g_ThreadStackSize * numThreads, g_ThreadStackAlignment));
    }

        /*
         * Initialize mutexes for grabbing new work items and writing
         * command handle outputs
         */
    nn::os::InitializeMutex(&m_WorkMutex, false, 0);
    nn::os::InitializeMutex(&m_WriteMutex, false, 0);

        /* Initialize thread data and mutexes for checking thread state */
    m_ThreadData.resize(numThreads);
    m_ThreadStateMutex.resize(numThreads);
    for (int i = 0; i < numThreads; ++i)
    {
        ThreadData& threadData = m_ThreadData[i];
        threadData.m_pParentPool = this;
        threadData.m_pManagedCommandBuffer = m_CommandBuffers[i];
        threadData.m_ThreadState = TUTORIAL_THREAD_STATE_NUM_STATES;
        nn::os::InitializeMutex(&m_ThreadStateMutex[i], false, 0);
        threadData.m_ThreadStateCheckMutex = &m_ThreadStateMutex[i];
    }
}

/*
 * RenderThreadPool::Run
 * ---------------------
 * Runs the worker threads with the given work to be done
 * and saves the command handles in the given output buffer.
 */
void RenderThreadPool::Run(std::vector<NVNcommandHandle>* pOutput, std::vector<GraphicsObject*>* pWork)
{
    NN_ASSERT(pWork != NULL || pOutput != NULL, "Work and/or output not provided to thread pool.");

    m_WorkIndex = 0;
    m_pWorkQueue = pWork;
    m_pOutput = pOutput;

    m_pOutput->clear();
    m_pOutput->resize(m_pWorkQueue->size());
    nn::Result result;

        /* Set thread data to indicate the threads should run */
    for (size_t i = 0; i < m_ThreadStateMutex.size(); ++i)
    {
        nn::os::LockMutex(&m_ThreadStateMutex[i]);
        m_ThreadData[i].m_ThreadState = TUTORIAL_THREAD_STATE_RUN;
        nn::os::UnlockMutex(&m_ThreadStateMutex[i]);
    }

        /* Set threads to actually start if this is the first run */
    if (m_Running == false)
    {
            /* Determine available cores that a thread can be run on */
        nn::Bit64 availableCoresMask = nn::os::GetThreadAvailableCoreMask();
        nn::Bit64 checkCoreMask = 1;
        std::vector<int> availableCores;
        for (int i = 0; i < 16; ++i)
        {
            if (checkCoreMask & availableCoresMask)
            {
                availableCores.push_back(i);
            }

            checkCoreMask <<= 1;
        }

            /* Create and run threads with the command buffer data from Init. */
        for (size_t i = 0; i < m_ThreadHandles.size(); ++i)
        {
            result = nn::os::CreateThread(&m_ThreadHandles[i],                          /* Address of ThreadType */
                                          RenderThread,                                 /* Function pointer to call */
                                          &m_ThreadData[i],                             /* Data to pass */
                                          m_pThreadStackMemory + i * g_ThreadStackSize, /* Stack memory for thread to use */
                                          g_ThreadStackSize,                            /* Size of stack */
                                          nn::os::DefaultThreadPriority,                /* Priority */
                                          availableCores[i % availableCores.size()]);   /* Core to place thread (has no effect on Windows) */
            NN_ASSERT(result.IsSuccess());
            nn::os::StartThread(&m_ThreadHandles[i]);
        }

        m_Running = true;
    }
}

/*
* RenderThreadPool::Wait
* ----------------------
* Wait for current frame of rendering to finish, must be called
* after Run
*/
void RenderThreadPool::Wait()
{
    bool running = true;
    while (running)
    {
        running = false;
        for (size_t i = 0; i < m_ThreadStateMutex.size(); ++i)
        {
            nn::os::LockMutex(&m_ThreadStateMutex[i]);
            TutorialThreadState state = m_ThreadData[i].m_ThreadState;

                /*
                 * Check if thread is still running, continue waiting
                 * if a thread is still running
                 */
            if (state == TUTORIAL_THREAD_STATE_RUN)
            {
                running = true;
                nn::os::UnlockMutex(&m_ThreadStateMutex[i]);

                /*
                 *	NOTE: Since threads are run to completion on NX, a call to YieldThread
                 *        is necessary in order to allow other threads on the same core to run
                 */
                nn::os::YieldThread();
                break;
            }
            nn::os::UnlockMutex(&m_ThreadStateMutex[i]);
        }
    }

    m_pFrameBufferSyncManager->InsertFence();
}
/*
 * RenderThreadPool::SwapCommandMemory
 * ----------------------
 * Swap the command memory of the command buffers being used after a render pass
 */
void RenderThreadPool::SwapCommandMemory()
{
    m_pFrameBufferSyncManager->SwapPools();
}

/*
 * RenderThreadPool::Stop
 * ----------------------
 * Stop render thread pool from running and destroy threads
 */
void RenderThreadPool::Stop()
{
        /* Set thread data to indicate the threads should stop if they are running */
    for (size_t i = 0; i < m_ThreadStateMutex.size(); ++i)
    {
        nn::os::LockMutex(&m_ThreadStateMutex[i]);
        m_ThreadData[i].m_ThreadState = TUTORIAL_THREAD_STATE_STOP;
        nn::os::UnlockMutex(&m_ThreadStateMutex[i]);
    }

        /* Wait for the threads to be done running. */
    for(size_t i = 0; i < m_ThreadHandles.size(); ++i)
    {
        nn::os::WaitThread(&m_ThreadHandles[i]);
    }

        /* Destroy the threads. */
    for(size_t i = 0; i < m_ThreadHandles.size(); ++i)
    {
        nn::os::DestroyThread(&m_ThreadHandles[i]);
    }

    m_Running = false;
}

/*
 * RenderThreadPool::GetNextWorkItem
 * ---------------------------------
 * Atomically grabs the next available work item
 * from the work queue.
 */
std::pair<int, GraphicsObject*> RenderThreadPool::GetNextWorkItem()
{
    nn::os::LockMutex(&m_WorkMutex);

    std::pair<int, GraphicsObject*> res;

        /* If there's no work left, return null data. */
    if(static_cast<size_t>(m_WorkIndex) >= m_pWorkQueue->size())
    {
        res.first = m_WorkIndex;
        res.second = NULL;

        nn::os::UnlockMutex(&m_WorkMutex);

        return res;
    }

        /* Get the work item at the current index. */
    res.first = m_WorkIndex;
    res.second = (*m_pWorkQueue)[m_WorkIndex];

        /* Increment the current work index. */
    ++m_WorkIndex;

    nn::os::UnlockMutex(&m_WorkMutex);

        /* Return the data to be processed. */
    return res;
}

/*
 * RenderThreadPool::WriteThreadOutput
 * -----------------------------------
 * Atomically write the resulting command handle from
 * from a work item at the given index.
 */
void RenderThreadPool::WriteThreadOutput(int index, const NVNcommandHandle& handle)
{
    nn::os::LockMutex(&m_WriteMutex);

    (*m_pOutput)[index] = handle;

    nn::os::UnlockMutex(&m_WriteMutex);
}

/*
 * RenderThread
 * ------------
 * This is the funstion that each work thread runs.
 * The function runs until there are no more work
 * items left in the queue to process.
 */
void RenderThread(void* pParam)
{
    ThreadData* data = reinterpret_cast<ThreadData*>(pParam);

        /* Grab the command buffer data. */
    ManagedCommandBuffer* managedCommandBuffer = data->m_pManagedCommandBuffer;
    NVNcommandBuffer* commandBuffer = managedCommandBuffer->GetCommandBuffer();

    bool run = true;
    while(run)
    {
            /* Check if thread's state has changed */
        nn::os::LockMutex(data->m_ThreadStateCheckMutex);
        TutorialThreadState state = data->m_ThreadState;
        nn::os::UnlockMutex(data->m_ThreadStateCheckMutex);

            /* Determine thread behavior based on state */
        switch (state)
        {
        case TUTORIAL_THREAD_STATE_RUN:
            {
                    /* Get the first work item. */
                std::pair<int, GraphicsObject*> currentWorkItem = data->m_pParentPool->GetNextWorkItem();

                    /* Run while there is still work to be done. */
                while (currentWorkItem.second != NULL)
                {
                        /* Update the object's uniform buffers. */
                    currentWorkItem.second->UpdateUniforms();

                        /* Start recording commands to draw the object. */
                    nvnCommandBufferBeginRecording(commandBuffer);

                        /* Bind the render states for the object. */
                    currentWorkItem.second->BindState(commandBuffer);

                        /* Draw the object. */
                    currentWorkItem.second->Draw(commandBuffer);

                        /*
                        * Grab the command handle for the newly recorded
                        * commands and write it to the output list at the
                        * same index as the work item.
                        */
                    NVNcommandHandle result = nvnCommandBufferEndRecording(commandBuffer);
                    data->m_pParentPool->WriteThreadOutput(currentWorkItem.first, result);

                        /* Grab the next available work item. */
                    currentWorkItem = data->m_pParentPool->GetNextWorkItem();
                }

                    /* Put thread to Wait state */
                nn::os::LockMutex(data->m_ThreadStateCheckMutex);
                TutorialThreadState& currentState = data->m_ThreadState;
                if (currentState == TUTORIAL_THREAD_STATE_RUN)
                {
                    currentState = TUTORIAL_THREAD_STATE_WAIT;
                }
                nn::os::UnlockMutex(data->m_ThreadStateCheckMutex);
            }
            break;

        case TUTORIAL_THREAD_STATE_WAIT:
                /*
                 * Nothing to do so yield and wait until state changes
                 */
            nn::os::YieldThread();
            break;

        case TUTORIAL_THREAD_STATE_STOP:
                /* Rendering is stopped; break out of loop and clean up */
            run = false;
            break;

        default:
            NN_ASSERT(false, "Unknown thread state encountered in render thread pool\n");
        }
    }
}
