﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <cmath>
#include <cstdio>
#include <nvn/nvn_FuncPtrInline.h>
#include <nvn/nvn_FuncPtrImpl.h>

#include <nn/fs.h>
#include <nn/util/util_Matrix.h>
#include <nn/util/util_Vector.h>
#include <nn/nn_SdkAssert.h>
#include <nn/nn_Log.h>
#include <nn/os.h>
#include <nn/os/os_Thread.h>
#include <nn/os/os_Mutex.h>
#ifdef _WIN32 // For nvn debugger dll loading
    #ifndef WIN32_LEAN_AND_MEAN
    #define WIN32_LEAN_AND_MEAN
    #endif
    #ifndef NOMINMAX
    #define NOMINMAX
    #endif
    #include <nn/nn_Windows.h>
#endif

#include <string>

#include <nvngdSupport/UniformBufferManager.h>
#include <nvngdSupport/TutorialBaseClass.h>
#include <nvngdSupport/ShaderTypeEnum.h>
#include <nvngdSupport/MultiThreadedAssetFileLoadingHelper.h>
#include <nvngdSupport/TextureIDManager.h>
#include <nvngdSupport/ManagedCommandBuffer.h>
#include <nvngdSupport/FrameBufferManager.h>

static const int    g_CubesToRender = 9;
static const float  g_Pi = 3.14159f;
static const float  g_ToRad = g_Pi / 180.0f;

static const int    g_NumChunks = 2;

static const size_t g_CommandMemoryChunkSize = 32 * 1024;
static const size_t g_ControlMemoryChunkSize = 4 * 1024;

static const int    g_NumColorBuffers = 2;

class FrameBufferMemoryManagement : public TutorialBaseClass
{
    NN_DISALLOW_COPY(FrameBufferMemoryManagement);

    public:
        FrameBufferMemoryManagement();
        virtual ~FrameBufferMemoryManagement();
        virtual void Init(PFNNVNBOOTSTRAPLOADERPROC pLoader, NVNnativeWindow nativeWindow);
        virtual void Shutdown();

        virtual void Draw(uint64_t millisec);
        virtual void Resize(int width, int height);

    private:
        static void NVNAPIENTRY DebugLayerCallback(
            NVNdebugCallbackSource source,
            NVNdebugCallbackType type,
            int id,
            NVNdebugCallbackSeverity severity,
            const char* message,
            void* user
            );

        void SetupUniformBlock();
        void UpdateSimpleTexturedModelUniformBlock(ManagedUniformBuffer* pVertexUniformBuffer, ManagedUniformBuffer* pFragmentUniformBuffer, nn::util::Matrix4x4fType& modelMatrix, nn::util::Matrix4x4fType& projectionMatrix, NVNtextureHandle handle);
        void MultiThreadedLoad();
        int PopulateCommandBuffer();
        int UpdateRenderTargets();

        NVNdevice                            m_Device;
        NVNqueue                             m_Queue;
        void*                                m_pQueueMemory;

        ManagedCommandBuffer*                m_pManagedCommandBuffer;
        NVNcommandHandle                     m_CommandHandle;

        NVNtextureBuilder                    m_RenderTargetBuilder;
        NVNtexture*                          m_RenderTargets[g_NumColorBuffers];
        NVNtexture*                          m_pDepthBuffer;
        NVNsamplerBuilder                    m_SamplerBuilder;
        NVNbufferBuilder                     m_BufferBuilder;

        MultiThreadedAssetFileLoadingHelper* m_pMultiThreadedAssetLoader;
        std::vector<AssetFileDataHolder*>    m_DataHolders;

        int                                  m_ScreenWidth;
        int                                  m_ScreenHeight;

        NVNblendState                        m_BlendState;
        NVNchannelMaskState                  m_ChannelMaskState;
        NVNcolorState                        m_ColorState;
        NVNdepthStencilState                 m_DepthStencilState;
        NVNmultisampleState                  m_MultisampleState;
        NVNpolygonState                      m_PolygonState;

        UniformBufferManager*                m_pUniformBufferManager;
        ManagedUniformBuffer*                m_ManagedVertexUniformBuffers[g_CubesToRender];
        ManagedUniformBuffer*                m_ManagedFragmentUniformBuffers[g_CubesToRender];

        FrameBufferedSyncManager*            m_pFrameBufferedSyncManager;

        TextureIDManager*                    m_pTextureIDManager;

        MemoryPool*                          m_pRenderTargetMemoryPool;

        NVNwindow*                           m_pWindow;
        NVNwindowBuilder                     m_WindowBuilder;
        int                                  m_CurrentWindowIndex;
        NVNsync                              m_WindowSync;

        float                                m_RotY;

        size_t                               m_ColorTargetSize;
        size_t                               m_DepthTargetSize;
};

/*
 * FrameBufferMemoryManagement Constructor
 * -----------------------------------------
 * Sets up default values for member data.
 */
FrameBufferMemoryManagement::FrameBufferMemoryManagement() :
    m_pQueueMemory(NULL),
    m_pManagedCommandBuffer(NULL),
    m_CommandHandle(0),
    m_pDepthBuffer(NULL),
    m_pMultiThreadedAssetLoader(NULL),
    m_ScreenWidth(0),
    m_ScreenHeight(0),
    m_pUniformBufferManager(NULL),
    m_pFrameBufferedSyncManager(NULL),
    m_pTextureIDManager(NULL),
    m_pRenderTargetMemoryPool(NULL),
    m_pWindow(NULL),
    m_CurrentWindowIndex(-1),
    m_RotY(0.0f),
    m_ColorTargetSize(0),
    m_DepthTargetSize(0)
{
    for (int i = 0; i < g_NumColorBuffers; ++i)
    {
        m_RenderTargets[i] = NULL;
    }

    for (int i = 0; i < g_CubesToRender; ++i)
    {
        m_ManagedVertexUniformBuffers[i] = NULL;
        m_ManagedFragmentUniformBuffers[i] = NULL;
    }
}

/*
 * FrameBufferMemoryManagement::SetupUniformBlock
 * ------------------------------------------------
 * Setup the managed uniform blocks with the appropriate size.
 */
void FrameBufferMemoryManagement::SetupUniformBlock()
{
    for(int i = 0; i < g_CubesToRender; ++i)
    {
        m_ManagedVertexUniformBuffers[i]    = m_pUniformBufferManager->CreateUniformBuffer(sizeof(SimpleTexturedModel::BlockVSUniformBlockData));
        m_ManagedFragmentUniformBuffers[i]  = m_pUniformBufferManager->CreateUniformBuffer(sizeof(SimpleTexturedModel::BlockFSUniformBlockData));
    }
}

/*
 * FrameBufferMemoryManagement::UpdateSimpleTexturedModelUniformBlock
 * --------------------------------------------------------------------
 * Update the uniform block data.
 */
void FrameBufferMemoryManagement::UpdateSimpleTexturedModelUniformBlock(ManagedUniformBuffer* pVertexUniformBuffer, ManagedUniformBuffer* pFragmentUniformBuffer, nn::util::Matrix4x4fType& modelMatrix, nn::util::Matrix4x4fType& projectionMatrix, NVNtextureHandle handle)
{
        /* Update Vertex shader uniform buffer. */

        /* Get the current mapped point fom the managed buffer. */
    void* vertexPoolMap = pVertexUniformBuffer->GetMappedPointer();

    SimpleTexturedModel::BlockVSUniformBlockData vertexShaderData;

    nn::util::Float4x4 temp;
    nn::util::MatrixStore(&temp, modelMatrix);
    vertexShaderData.SetUniform_u_modelMtx(*reinterpret_cast<float(*)[16]>(&temp));

    nn::util::Matrix4x4fType cameraMat;
    nn::util::MatrixIdentity(&cameraMat);
    nn::util::MatrixStore(&temp, cameraMat);
    vertexShaderData.SetUniform_u_viewMtx(*reinterpret_cast<float(*)[16]>(&temp));

    nn::util::MatrixStore(&temp, projectionMatrix);
    vertexShaderData.SetUniform_u_projMtx(*reinterpret_cast<float(*)[16]>(&temp));

        /* Copy in the uniform data. */
    memcpy(vertexPoolMap, &vertexShaderData, sizeof(SimpleTexturedModel::BlockVSUniformBlockData));

        /* Update Fragment shader uniform buffer. */

        /* Get the current mapped point fom the managed buffer. */
    void* fragmentPoolMap = pFragmentUniformBuffer->GetMappedPointer();

    SimpleTexturedModel::BlockFSUniformBlockData fragmentShaderData;
    fragmentShaderData.SetUniform_u_bindlessTex(handle);

        /* Copy in the uniform data. */
    memcpy(fragmentPoolMap, &fragmentShaderData, sizeof(SimpleTexturedModel::BlockFSUniformBlockData));
}

/*
 * FrameBufferMemoryManagement Destructor
 * ----------------------------------------
 * Empty destructor.
 */
FrameBufferMemoryManagement::~FrameBufferMemoryManagement()
{
}

#ifndef _WIN32
//#define NVN_GRAPHICS_DEBUGGER
#endif

#ifdef NVN_GRAPHICS_DEBUGGER
extern "C" PFNNVNGENERICFUNCPTRPROC nvnDeviceGetToolsProcAddress(NVNdevice *device, const char* name);
#endif

/*!
* FrameBufferMemoryManagement::Init
* ----------------------
* Initialize NVN, load asset files, and create objects needed for the
* application to run.
*/
void FrameBufferMemoryManagement::Init(PFNNVNBOOTSTRAPLOADERPROC pLoader, NVNnativeWindow nativeWindow)
{
#ifdef NVN_GRAPHICS_DEBUGGER
    /* Load procs from NVNGD instead of from the driver, this must be called once prior to nvnDeviceInitialize. */
    nvnLoadCProcs(nullptr, (PFNNVNDEVICEGETPROCADDRESSPROC)nvnDeviceGetToolsProcAddress);

    NVNdeviceBuilder deviceBuilder;
    nvnDeviceBuilderSetDefaults(&deviceBuilder);
    int deviceFlags = NVN_DEVICE_FLAG_DEBUG_ENABLE_LEVEL_4_BIT;
    nvnDeviceBuilderSetFlags(&deviceBuilder, deviceFlags);

    if (nvnDeviceInitialize(&m_Device, &deviceBuilder) == false)
    {
        NN_ASSERT(0, "nvnDeviceInitialize");
    }

    nvnDeviceSetDebugLabel(&m_Device, "Tutorial06_Device");

    /* Never call nvnLoadCProcs after nvnDeviceInitialize. */
#else /* NVN_GRAPHICS_DEBUGGER */
    pfnc_nvnDeviceInitialize = reinterpret_cast<PFNNVNDEVICEINITIALIZEPROC>((*pLoader)("nvnDeviceInitialize"));
    pfnc_nvnDeviceGetProcAddress = reinterpret_cast<PFNNVNDEVICEGETPROCADDRESSPROC>((*pLoader)("nvnDeviceGetProcAddress"));
    if (!pfnc_nvnDeviceInitialize)
    {
        /* This can happen if an NVN driver is not installed on a Windows PC. */
        NN_ASSERT(0, "BootstrapLoader failed to find nvnDeviceInitialize");
    }

    nvnLoadCProcs(NULL, pfnc_nvnDeviceGetProcAddress);

    int MajorVersion, MinorVersion;
    nvnDeviceGetInteger(NULL, NVN_DEVICE_INFO_API_MAJOR_VERSION, &MajorVersion);
    nvnDeviceGetInteger(NULL, NVN_DEVICE_INFO_API_MINOR_VERSION, &MinorVersion);

    if (MajorVersion != NVN_API_MAJOR_VERSION || MinorVersion < NVN_API_MINOR_VERSION)
    {
        NN_ASSERT(0, "NVN SDK not supported by current driver.");
    }

    /* If debug or develop is enabled, turn on NVN's debug layer. */
    int deviceFlags = 0;
#if defined(NN_SDK_BUILD_DEBUG) || defined(NN_SDK_BUILD_DEVELOP)
    deviceFlags = NVN_DEVICE_FLAG_DEBUG_ENABLE_LEVEL_4_BIT;
#endif

    NVNdeviceBuilder deviceBuilder;
    nvnDeviceBuilderSetDefaults(&deviceBuilder);
    nvnDeviceBuilderSetFlags(&deviceBuilder, deviceFlags);

    if (nvnDeviceInitialize(&m_Device, &deviceBuilder) == false)
    {
        /*
        * This can fail for a few reasons; the most likely on Horizon is
        * insufficent device memory.
        */
        NN_ASSERT(0, "nvnDeviceInitialize");
    }

    nvnLoadCProcs(&m_Device, pfnc_nvnDeviceGetProcAddress);
#endif /* NVN_GRAPHICS_DEBUGGER */
        /* Setup the debug callback for the debug layer. */
    if (deviceFlags & NVN_DEVICE_FLAG_DEBUG_ENABLE_LEVEL_4_BIT)
    {
        nvnDeviceInstallDebugCallback(
            &m_Device,
            reinterpret_cast<PFNNVNDEBUGCALLBACKPROC>(&DebugLayerCallback),
            NULL, // For testing purposes; any pointer is OK here.
            NVN_TRUE // NVN_TRUE = Enable the callback.
            );
    }

        /* Initialize the queue. */
    NVNqueueBuilder queueBuilder;
    nvnQueueBuilderSetDevice(&queueBuilder, &m_Device);
    nvnQueueBuilderSetDefaults(&queueBuilder);
    nvnQueueBuilderSetComputeMemorySize(&queueBuilder, 0);

    int minQueueCommandMemorySize = 0;
    nvnDeviceGetInteger(&m_Device, NVN_DEVICE_INFO_QUEUE_COMMAND_MEMORY_MIN_SIZE, &minQueueCommandMemorySize);
    nvnQueueBuilderSetCommandMemorySize(&queueBuilder, minQueueCommandMemorySize);
    nvnQueueBuilderSetCommandFlushThreshold(&queueBuilder, minQueueCommandMemorySize);

    size_t neededQueueMemorySize = nvnQueueBuilderGetQueueMemorySize(&queueBuilder);

    if ((neededQueueMemorySize % NVN_MEMORY_POOL_STORAGE_GRANULARITY) != 0)
    {
        NN_ASSERT(0, "Memory size reported for queue is not the proper granularity");
    }

#if defined( NN_BUILD_TARGET_PLATFORM_OS_WIN )
    m_pQueueMemory = NULL;
#else
    m_pQueueMemory = AlignedAllocate(neededQueueMemorySize, NVN_MEMORY_POOL_STORAGE_ALIGNMENT);
#endif

    nvnQueueBuilderSetQueueMemory(&queueBuilder, m_pQueueMemory, neededQueueMemorySize);

    if (nvnQueueInitialize(&m_Queue, &queueBuilder) == false)
    {
        NN_ASSERT(0, "nvnQueueInitialize failed");
    }

        /*! Initialize the window sync. */
    if (!nvnSyncInitialize(&m_WindowSync, &m_Device))
    {
        NN_ASSERT(0, "Failed to initialize window sync");
    }

        /*
         * Create a managed Command Buffer.  This contains the NVNcommandBuffer object and the
         * command and control memory pools.  This class is then handed to a FrameBufferSyncManager
         * which manages the syncs to safely buffer the memory and minimizes the amount of time
         * that is spent waiting on commands to complete. The number of chunks should correspond
         * to the number of render targets in the NVNwindow to prevent wasting memory or unnecessarily
         * waiting on a sync due to having too few chunks. The default number of chunks is 2.
         */
    m_pManagedCommandBuffer = new ManagedCommandBuffer(&m_Device, g_CommandMemoryChunkSize, g_ControlMemoryChunkSize, g_NumChunks);

        /* Blend State */
    nvnBlendStateSetDefaults(&m_BlendState);

        /* Channel Mask State */
    nvnChannelMaskStateSetDefaults(&m_ChannelMaskState);

        /* Color State */
    nvnColorStateSetDefaults(&m_ColorState);

        /* Depth Stencil State */
    nvnDepthStencilStateSetDefaults(&m_DepthStencilState);

    nvnDepthStencilStateSetDepthTestEnable(&m_DepthStencilState, NVN_TRUE);
    nvnDepthStencilStateSetDepthWriteEnable(&m_DepthStencilState, NVN_TRUE);
    nvnDepthStencilStateSetDepthFunc(&m_DepthStencilState, NVNdepthFunc::NVN_DEPTH_FUNC_LESS);

        /* Multisample state */
    nvnMultisampleStateSetDefaults(&m_MultisampleState);

        /* Polygon state */
    nvnPolygonStateSetDefaults(&m_PolygonState);
    nvnPolygonStateSetFrontFace(&m_PolygonState, NVNfrontFace::NVN_FRONT_FACE_CCW);
    nvnPolygonStateSetCullFace(&m_PolygonState, NVNface::NVN_FACE_BACK);
    nvnPolygonStateSetPolygonMode(&m_PolygonState, NVNpolygonMode::NVN_POLYGON_MODE_FILL);

        /* Builders */
    nvnSamplerBuilderSetDevice(&m_SamplerBuilder, &m_Device);
    nvnSamplerBuilderSetDefaults(&m_SamplerBuilder);

    nvnBufferBuilderSetDevice(&m_BufferBuilder, &m_Device);
    nvnBufferBuilderSetDefaults(&m_BufferBuilder);

        /* Set up the texture builder for the render target. */
    nvnTextureBuilderSetDevice(&m_RenderTargetBuilder, &m_Device);
    nvnTextureBuilderSetDefaults(&m_RenderTargetBuilder);
    nvnTextureBuilderSetFlags(&m_RenderTargetBuilder, NVN_TEXTURE_FLAGS_DISPLAY_BIT | NVN_TEXTURE_FLAGS_COMPRESSIBLE_BIT);
    nvnTextureBuilderSetSize2D(&m_RenderTargetBuilder, 1920, 1080);
    nvnTextureBuilderSetTarget(&m_RenderTargetBuilder, NVN_TEXTURE_TARGET_2D);
    nvnTextureBuilderSetFormat(&m_RenderTargetBuilder, NVN_FORMAT_RGBA8);
    m_ColorTargetSize = nvnTextureBuilderGetStorageSize(&m_RenderTargetBuilder);

    nvnTextureBuilderSetDefaults(&m_RenderTargetBuilder);
    nvnTextureBuilderSetFlags(&m_RenderTargetBuilder, NVN_TEXTURE_FLAGS_COMPRESSIBLE_BIT);
    nvnTextureBuilderSetSize2D(&m_RenderTargetBuilder, 1920, 1080);
    nvnTextureBuilderSetTarget(&m_RenderTargetBuilder, NVN_TEXTURE_TARGET_2D);
    nvnTextureBuilderSetFormat(&m_RenderTargetBuilder, NVN_FORMAT_DEPTH32F);
    m_DepthTargetSize = nvnTextureBuilderGetStorageSize(&m_RenderTargetBuilder);

        /* Allocate the render target memory. */
    m_pRenderTargetMemoryPool = new MemoryPool();
    m_pRenderTargetMemoryPool->Init(
        NULL,
        m_ColorTargetSize * g_NumColorBuffers + m_DepthTargetSize,
        NVN_MEMORY_POOL_FLAGS_CPU_NO_ACCESS_BIT | NVN_MEMORY_POOL_FLAGS_GPU_CACHED_BIT | NVN_MEMORY_POOL_FLAGS_COMPRESSIBLE_BIT,
        &m_Device);

    m_pTextureIDManager = new TextureIDManager(&m_Device);

        /*
         * Create the uniform buffer manager. This class handles safely writing
         * per frame uniform data to the uniform buffers by buffering the memory
         * memory pool through multiple chunks.  Syncing to protect the buffered
         * memory is handled through the FrameBufferSyncManager class. The default
         * number of chunks is 2.
         *
         * More details in UniformBufferManager.h/.cpp and
         * ManagerUniformBuffer.h/.cpp.
         */
    m_pUniformBufferManager = new UniformBufferManager(&m_Device, 32 * 1024);
    SetupUniformBlock();

        /*
         * Creates the sync manager.  This class takes a pointer to the classes that
         * derive from FrameBufferedMemoryManager and swaps their memory safely with
         * an array of syncs to prevent overwriting memory currently in use.
         *
         * The two classes that are compatible with the sync manager are:
         *  - ManagedCommandBuffer
         *  - UniformBufferManager
         */
    m_pFrameBufferedSyncManager = new FrameBufferedSyncManager(&m_Device, &m_Queue);
    m_pFrameBufferedSyncManager->RegisterMemoryManager(m_pUniformBufferManager);
    m_pFrameBufferedSyncManager->RegisterMemoryManager(m_pManagedCommandBuffer);

        /* Load assets. */
    MultiThreadedLoad();

    nvnWindowBuilderSetDefaults(&m_WindowBuilder);
    nvnWindowBuilderSetDevice(&m_WindowBuilder, &m_Device);
    nvnWindowBuilderSetNativeWindow(&m_WindowBuilder, nativeWindow);
}//NOLINT(impl/function_size)

/*
 * LoadAssetFiles
 * --------------
 * Multi threaded asset loading thread function
 */
void LoadAssetFiles(void* pParam )
{
    MultiThreadedAssetFileLoadingHelper* loader = (MultiThreadedAssetFileLoadingHelper*)pParam;
    loader->LoadAssetFile();
}

/*
 * FrameBufferMemoryManagement::MultiThreadedLoad
 * ------------------------------------------------
 * This function sets up multiple worker threads to
 * load asset files in parallel. Each thread handles
 * loading in the data from the binary file and setiing
 * up the NVN objects for each section of the file.
 */
void FrameBufferMemoryManagement::MultiThreadedLoad()
{
        /* Initialize the asset loaders. */
    AssetLoaderArg* args[g_CubesToRender];
    MultiThreadedAssetFileLoadingHelper* loaders[g_CubesToRender];

    nn::os::MutexType poolMutex;
    nn::os::InitializeMutex(&poolMutex, false, 0);

    size_t cacheSize = 0;
    nn::Result result = nn::fs::QueryMountRomCacheSize(&cacheSize);
    NN_ASSERT( result.IsSuccess() );

    char* mountRomCacheBuffer = new(std::nothrow) char[cacheSize];
    NN_ASSERT_NOT_NULL(mountRomCacheBuffer);

    result = nn::fs::MountRom("rom", mountRomCacheBuffer, cacheSize);
    NN_ABORT_UNLESS_RESULT_SUCCESS(result);

#ifdef _WIN32
    size_t threadStackSize = 8 * 1024;
    char* threadStack = (char*)AlignedAllocate(g_CubesToRender * threadStackSize, 4096);
#else
    size_t threadStackSize = 1024 * 1024;
    char* threadStack = reinterpret_cast<char*>(AlignedAllocate(g_CubesToRender * threadStackSize, 4096));
    NN_ASSERT(threadStack);
#endif

    nn::fs::FileHandle fileHandle;
    result = nn::fs::OpenFile(&fileHandle, "rom:/uniformManagerConfigAssetFiles.txt", nn::fs::OpenMode_Read);
    NN_ASSERT(result.IsSuccess());

    int64_t fileSize;
    result = nn::fs::GetFileSize(&fileSize, fileHandle);
    NN_ASSERT(result.IsSuccess());
    char* fileData = reinterpret_cast<char*>(malloc(static_cast<size_t>(fileSize + 1)));
    memset(fileData, 0, static_cast<size_t>(fileSize + 1));

    size_t out;
    result = nn::fs::ReadFile(&out, fileHandle, 0, fileData, static_cast<size_t>(fileSize));

    std::string fileName[g_CubesToRender];
    uint32_t start = 0;
    uint32_t end = 0;

    for (int i = 0; i < g_CubesToRender; ++i)
    {
        while (fileData[end] != ' ')
        {
            ++end;
        }

        fileName[i] = std::string(fileData + start, end - start);
        start = ++end;

        args[i] = new AssetLoaderArg(&m_Device, fileName[i].c_str(), &poolMutex, m_pTextureIDManager);
        loaders[i] = new MultiThreadedAssetFileLoadingHelper(args[i]);
    }

    nn::os::ThreadType hThreadArray[g_CubesToRender];

    NN_ASSERT(result.IsSuccess());

    for (int i = 0; i < g_CubesToRender; ++i)
    {
        result = nn::os::CreateThread(
            &hThreadArray[i],                   // Address of ThreadType
            LoadAssetFiles,                     // Function pointer to call
            loaders[i],                         // Data to pass
            threadStack + threadStackSize * i,  // Stack memory for thread to use
            threadStackSize,                    // Size of stack
            nn::os::DefaultThreadPriority);     // Priority

        NN_ASSERT(result.IsSuccess());
        nn::os::StartThread(&hThreadArray[i]);
    }

        /* Wait for the threads to be done. */
    for (int i = 0; i < g_CubesToRender; ++i)
    {
        nn::os::WaitThread(&hThreadArray[i]);
    }

        /* Grab the data holders for each file and do some setup. */
    for (int i = 0; i < g_CubesToRender; ++i)
    {
        AssetFileDataHolder* dataHolder = loaders[i]->GetAssetFileDataHolder();
        m_DataHolders.push_back(dataHolder);

        std::vector<NVNProgramData*>& programData = m_DataHolders[i]->GetProgramData();
        ShaderTypes::ShaderType shaderType = programData[0]->m_ShaderType;

        m_DataHolders[i]->SetupAttributeStatesNVN(ShaderTypes::Attributes_GetAttributeLocationFunction(shaderType));
    }

    nn::os::FinalizeMutex(&poolMutex);
    for (int i = 0; i < g_CubesToRender; ++i)
    {
        delete args[i];
        delete loaders[i];
        nn::os::DestroyThread(&hThreadArray[i]);
    }

    AlignedDeallocate(threadStack);
    free(fileData);

    nn::fs::CloseFile(fileHandle);
    nn::fs::Unmount("rom");
    delete[] mountRomCacheBuffer;
}

/*
 * FrameBufferMemoryManagement::Shutdown
 * ---------------------------------------
 * This method cleans up all nvn objects and dynamically allocated memory.
 */
void FrameBufferMemoryManagement::Shutdown()
{
    nvnQueueFinish(&m_Queue);
    nvnSyncFinalize(&m_WindowSync);

    if (m_pManagedCommandBuffer != NULL)
    {
        delete m_pManagedCommandBuffer;
        m_pManagedCommandBuffer = NULL;
    }

    if (m_pWindow != NULL)
    {
        nvnWindowFinalize(m_pWindow);
        delete m_pWindow;
        m_pWindow = NULL;
    }

    for(int i = 0; i < g_NumColorBuffers; ++i)
    {
        if (m_RenderTargets[i] != NULL)
        {
            nvnTextureFinalize(m_RenderTargets[i]);
            delete m_RenderTargets[i];
            m_RenderTargets[i] = NULL;
        }
    }

    if (m_pDepthBuffer != NULL)
    {
        nvnTextureFinalize(m_pDepthBuffer);
        delete m_pDepthBuffer;
        m_pDepthBuffer = NULL;
    }

    if (m_pTextureIDManager != NULL)
    {
        delete m_pTextureIDManager;
        m_pTextureIDManager = NULL;
    }

    if (m_pUniformBufferManager != NULL)
    {
        delete m_pUniformBufferManager;
        m_pUniformBufferManager = NULL;
    }

    if (m_pFrameBufferedSyncManager != NULL)
    {
        delete m_pFrameBufferedSyncManager;
        m_pFrameBufferedSyncManager = NULL;
    }

    for (size_t i = 0; i < m_DataHolders.size(); ++i)
    {
        if (m_DataHolders[i] != NULL)
        {
            delete m_DataHolders[i];
            m_DataHolders[i] = NULL;
        }
    }

    if (m_pMultiThreadedAssetLoader != NULL)
    {
        delete m_pMultiThreadedAssetLoader;
        m_pMultiThreadedAssetLoader = NULL;
    }

    if(m_pRenderTargetMemoryPool)
    {
        m_pRenderTargetMemoryPool->Shutdown();
        delete m_pRenderTargetMemoryPool;
        m_pRenderTargetMemoryPool = NULL;
    }

    nvnQueueFinalize(&m_Queue);
    if (m_pQueueMemory)
    {
        AlignedDeallocate(m_pQueueMemory);
        m_pQueueMemory = NULL;
    }

    nvnDeviceFinalize(&m_Device);
}

/*
 * FrameBufferMemoryManagement::Draw
 * -----------------------------------
 * This method updates uniform data and submits the commands
 * recorded in the command buffer to the queue and presents
 * the render target to the screen.
 */
void FrameBufferMemoryManagement::Draw(uint64_t /*millisec*/)
{
        /* Get the current render target and setup/submit a command buffer to set it. */
        /* Create the command buffer to render the cubes. */
    int currentRenderTargetIndex = PopulateCommandBuffer();

    static float xPos[] = {-1.5f, 0.0f, 1.5f, -1.5f, 0.0f, 1.5f, -1.5f, 0.0f, 1.5f};
    static float yPos[] = {1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, -1.0f, -1.0f, -1.0f};

    nn::util::Matrix4x4fType projMat44;
    nn::util::MatrixPerspectiveFieldOfViewRightHanded(&projMat44, 60.0f * g_ToRad, static_cast<float>(m_ScreenWidth) / static_cast<float>(m_ScreenHeight), 1.0f, 1000.0f);

        /* Update the uniform data. */
    for(int i = 0; i < g_CubesToRender; ++i)
    {
        nn::util::Matrix4x3fType simpleTranslateMat;
        nn::util::MatrixIdentity(&simpleTranslateMat);
        nn::util::Vector3fType simpleTranslateVector;
        nn::util::VectorSet(&simpleTranslateVector, xPos[i], yPos[i], -3.0f);
        nn::util::MatrixSetTranslate(&simpleTranslateMat, simpleTranslateVector);

        nn::util::Matrix4x3fType simpleRotateXMat;
        nn::util::MatrixIdentity(&simpleRotateXMat);
        nn::util::Vector3fType simpleRotateXVector;
        nn::util::VectorSet(&simpleRotateXVector, 30.0f * g_ToRad, 0.0f, 0.0f);
        nn::util::MatrixSetRotateXyz(&simpleRotateXMat, simpleRotateXVector);

        nn::util::Matrix4x3fType simpleRotateYMat;
        nn::util::MatrixIdentity(&simpleRotateYMat);
        nn::util::Vector3fType simpleRotateYVector;
        nn::util::VectorSet(&simpleRotateYVector, 0.0f, m_RotY * (i + 1) * g_ToRad, 0.0f);
        nn::util::MatrixSetRotateXyz(&simpleRotateYMat, simpleRotateYVector);

        nn::util::Matrix4x3fType simpleScaleMat;
        nn::util::MatrixIdentity(&simpleScaleMat);
        nn::util::Vector3fType simpleScaleVector;
        nn::util::VectorSet(&simpleScaleVector, 0.5f, 0.5f, 0.5f);
        nn::util::MatrixSetScale(&simpleScaleMat, simpleScaleVector);

        nn::util::Matrix4x3fType simpleModelMat;
        nn::util::Matrix4x3fType tempMat1;
        nn::util::Matrix4x3fType tempMat2;
        nn::util::MatrixMultiply(&tempMat1, simpleScaleMat, simpleRotateYMat);
        nn::util::MatrixMultiply(&tempMat2, tempMat1, simpleRotateXMat);
        nn::util::MatrixMultiply(&simpleModelMat, tempMat2, simpleTranslateMat);

        nn::util::Matrix4x4fType simpleModelMat44;
        nn::util::MatrixConvert(&simpleModelMat44, simpleModelMat);

        std::vector<NVNTextureData*>& textureDataSimple = m_DataHolders[i]->GetTextureData();
        UpdateSimpleTexturedModelUniformBlock(m_ManagedVertexUniformBuffers[i], m_ManagedFragmentUniformBuffers[i], simpleModelMat44, projMat44, textureDataSimple[0]->m_TextureHandle);
    }

    m_RotY += 0.15f;

        /*
         * Insert a fence in the sync manager to track
         * when the current chunk in the multi buffer
         * is done being used. This sync is checked at a
         * later frame to minimize waiting.
         */
    m_pFrameBufferedSyncManager->InsertFence();

        /*!
         * Wait on sync that was received in UpdateRenderTargets now that we are
         * actually ready to use the render target
         */
    nvnSyncWait(&m_WindowSync, NVN_WAIT_TIMEOUT_MAXIMUM);

        /* Submit the commands to the queue. */
    nvnQueueSubmitCommands(&m_Queue, 1, &m_CommandHandle);

        /* Present the texture to the screen. */
    nvnQueuePresentTexture(&m_Queue, m_pWindow, currentRenderTargetIndex);

        /* Swap the multi buffered memory. */
    m_pFrameBufferedSyncManager->SwapPools();
}

/*
 * FrameBufferMemoryManagement::UpdateRenderTargets
 * --------------------------------------------------
 * Gets the index of the current render target from the NVNwindow
 * and records a command buffer that sets it up to be rendered to.
 */
int FrameBufferMemoryManagement::UpdateRenderTargets()
{
        /* Get next render target to be used */
    NVNwindowAcquireTextureResult result = nvnWindowAcquireTexture(m_pWindow, &m_WindowSync, &m_CurrentWindowIndex);

    NN_ASSERT(result == NVN_WINDOW_ACQUIRE_TEXTURE_RESULT_SUCCESS);

        /* Record the command buffer to set the target. */
    nvnCommandBufferSetRenderTargets(m_pManagedCommandBuffer->GetCommandBuffer(), 1, &m_RenderTargets[m_CurrentWindowIndex], NULL, m_pDepthBuffer, NULL);

        /* Return the index. */
    return m_CurrentWindowIndex;
}

/*
 * FrameBufferMemoryManagement::Resize
 * -------------------------------------
 * This method is called everytime the window is resized and is passed
 * the new size of the window. It frees the old render target and creates a new
 * render target with the new screen size.
 */
void FrameBufferMemoryManagement::Resize(int width, int height)
{
        /* Check for the window being minimized or having no visible surface. */
    if (width == 0 || height == 0)
    {
        return;
    }

    m_ScreenHeight = height;
    m_ScreenWidth  = width;

        /* If it's the first time Resize is called, allocate the NVNwindow. */
    if (m_pWindow == NULL)
    {
        m_pWindow = new NVNwindow;
    }
        /*
         * Otherwise finalize (free) the NVNwindow used for the previous window size.
         * The NVNWindow must be finalized before a render target it owns is finalized.
         */
    else
    {
        nvnWindowFinalize(m_pWindow);
    }

        /* Set up the builder for the render target. */
    nvnTextureBuilderSetDefaults(&m_RenderTargetBuilder);
    nvnTextureBuilderSetFlags(&m_RenderTargetBuilder, NVN_TEXTURE_FLAGS_DISPLAY_BIT | NVN_TEXTURE_FLAGS_COMPRESSIBLE_BIT);
    nvnTextureBuilderSetSize2D(&m_RenderTargetBuilder, m_ScreenWidth, m_ScreenHeight);
    nvnTextureBuilderSetTarget(&m_RenderTargetBuilder, NVN_TEXTURE_TARGET_2D);
    nvnTextureBuilderSetFormat(&m_RenderTargetBuilder, NVN_FORMAT_RGBA8);

    for(int i = 0; i < g_NumColorBuffers; ++i)
    {
            /* If it's the first time Resize is called, allocate the render target. */
        if (!m_RenderTargets[i])
        {
            m_RenderTargets[i] = new NVNtexture;
        }
            /* Otherwise finalize (free) the render target used for the previous window size. */
        else
        {
            nvnTextureFinalize(m_RenderTargets[i]);
        }

        nvnTextureBuilderSetStorage(&m_RenderTargetBuilder, m_pRenderTargetMemoryPool->GetMemoryPool(), m_ColorTargetSize * i);

            /* Create the texture using the current state of the texture builder. */
        nvnTextureInitialize(m_RenderTargets[i], &m_RenderTargetBuilder);
    }

    if (!m_pDepthBuffer)
    {
        m_pDepthBuffer = new NVNtexture;
    }
    else
    {
        nvnTextureFinalize(m_pDepthBuffer);
    }

        /* Initialize depth buffer for render target. */
    nvnTextureBuilderSetDefaults(&m_RenderTargetBuilder);
    nvnTextureBuilderSetFlags(&m_RenderTargetBuilder, NVN_TEXTURE_FLAGS_COMPRESSIBLE_BIT);
    nvnTextureBuilderSetSize2D(&m_RenderTargetBuilder, m_ScreenWidth, m_ScreenHeight);
    nvnTextureBuilderSetTarget(&m_RenderTargetBuilder, NVN_TEXTURE_TARGET_2D);
    nvnTextureBuilderSetFormat(&m_RenderTargetBuilder, NVN_FORMAT_DEPTH32F);
    nvnTextureBuilderSetStorage(&m_RenderTargetBuilder, m_pRenderTargetMemoryPool->GetMemoryPool(), m_ColorTargetSize * g_NumColorBuffers);

    if (!nvnTextureInitialize(m_pDepthBuffer, &m_RenderTargetBuilder))
    {
        NN_ASSERT(0, "nvnTextureInitialize failed");
    }

        /* Pass off the render targets to the window. */
    nvnWindowBuilderSetTextures(&m_WindowBuilder, g_NumColorBuffers, m_RenderTargets);
    nvnWindowInitialize(m_pWindow, &m_WindowBuilder);
}

/*
 * FrameBufferMemoryManagement::PopulateCommandBuffer
 * --------------------------------------------------
 * Builds the command buffer to render the cubes.
 */
int FrameBufferMemoryManagement::PopulateCommandBuffer()
{
    int currentRenderTargetIndex = -1;
        /// Starts the recording of a new set of commands for the given command buffer.
    m_pManagedCommandBuffer->BeginRecording();
    {
        NVNcommandBuffer* commandBuffer = m_pManagedCommandBuffer->GetCommandBuffer();
        currentRenderTargetIndex = UpdateRenderTargets();

            /* Bind the texture and sampler descriptor pools. */
        m_pTextureIDManager->SetSamplerPool(commandBuffer);
        m_pTextureIDManager->SetTexturePool(commandBuffer);

            /* Sets the scissor rectangle and viewport to the full screen */
        nvnCommandBufferSetScissor(commandBuffer, 0, 0, m_ScreenWidth, m_ScreenHeight);
        nvnCommandBufferSetViewport(commandBuffer, 0, 0, m_ScreenWidth, m_ScreenHeight);

            /* Clears the currently set render target at a given index. */
        float clear_color[4] = { 0.4f, 0.55f, 0.6f, 1.0f };
        nvnCommandBufferClearColor(commandBuffer, 0, clear_color, NVN_CLEAR_COLOR_MASK_RGBA);
        nvnCommandBufferClearDepthStencil(commandBuffer, 1.0, NVN_TRUE, 0, 0);

            /* Bind the render state objects. */
        nvnCommandBufferBindBlendState(commandBuffer, &m_BlendState);
        nvnCommandBufferBindChannelMaskState(commandBuffer, &m_ChannelMaskState);
        nvnCommandBufferBindColorState(commandBuffer, &m_ColorState);
        nvnCommandBufferBindDepthStencilState(commandBuffer, &m_DepthStencilState);
        nvnCommandBufferBindMultisampleState(commandBuffer, &m_MultisampleState);
        nvnCommandBufferBindPolygonState(commandBuffer, &m_PolygonState);
        nvnCommandBufferSetSampleMask(commandBuffer, static_cast<uint32_t>(~0));

        for(int i = 0; i < g_CubesToRender; ++i)
        {
            std::vector<NVNModelData*>&   modelData   = m_DataHolders[i]->GetModelData();
            std::vector<NVNProgramData*>& programData = m_DataHolders[i]->GetProgramData();

            ShaderTypes::ShaderType shaderType = programData[0]->m_ShaderType;

            Model* model = &modelData[0]->m_Model;
            NVNbufferAddress vboAddr = nvnBufferGetAddress(&modelData[0]->m_VertexBuffer);

                /* Bind the vertex buffer(s). */
            for(size_t j = 0; j < model->m_VertexAttributes.size(); ++j)
            {
                VertexAttribute& attr = model->m_VertexAttributes[j];
                nvnCommandBufferBindVertexBuffer(commandBuffer, attr.m_Location, vboAddr + modelData[0]->m_VertexAttributeBufferOffsets[j], attr.m_DataSize);
            }

                /* Bind the uniform buffers. */
            nvnCommandBufferBindUniformBuffer(commandBuffer,
                                              NVN_SHADER_STAGE_VERTEX,
                                              ShaderTypes::BlockVS_GetBinding(NVN_SHADER_STAGE_VERTEX, shaderType),
                                              m_ManagedVertexUniformBuffers[i]->GetCurrentBufferAddress(),
                                              sizeof(SimpleTexturedModel::BlockVSUniformBlockData));

            nvnCommandBufferBindUniformBuffer(commandBuffer,
                                              NVN_SHADER_STAGE_FRAGMENT,
                                              ShaderTypes::BlockFS_GetBinding(NVN_SHADER_STAGE_FRAGMENT, shaderType),
                                              m_ManagedFragmentUniformBuffers[i]->GetCurrentBufferAddress(),
                                              sizeof(SimpleTexturedModel::BlockFSUniformBlockData));

                /* Bind the vertex states. */
            nvnCommandBufferBindVertexAttribState(commandBuffer, static_cast<int>(modelData[0]->m_VertexAttributeStates.size()), &modelData[0]->m_VertexAttributeStates[0]);
            nvnCommandBufferBindVertexStreamState(commandBuffer, static_cast<int>(modelData[0]->m_VertexStreamStates.size()), &modelData[0]->m_VertexStreamStates[0]);

                /* Bind the shader program. */
            nvnCommandBufferBindProgram(commandBuffer, &programData[0]->m_Program, programData[0]->m_ShaderStages);

                /* Draw the primitives. */
            NVNindexType indexType = (NVNindexType)model->m_IndexData.m_IndexType;
            uint32_t numIndices = model->m_IndexData.m_DataSize / model->m_IndexData.m_Stride;
            nvnCommandBufferDrawElements(commandBuffer,
                                         (NVNdrawPrimitive)model->m_NvnDrawPrimitiveType,
                                         indexType,
                                         numIndices,
                                         nvnBufferGetAddress(&modelData[0]->m_IndexBuffer));
        }
    }

    m_CommandHandle = m_pManagedCommandBuffer->EndRecording();

    return currentRenderTargetIndex;
}

// static
void FrameBufferMemoryManagement::DebugLayerCallback(
    NVNdebugCallbackSource source,
    NVNdebugCallbackType type,
    int id,
    NVNdebugCallbackSeverity severity,
    const char* message,
    void* user
    )
{
    NN_ASSERT(user == NULL);

    NN_LOG("NVN Debug Layer Callback:\n");
    NN_LOG("  source:       0x%08x\n", source);
    NN_LOG("  type:         0x%08x\n", type);
    NN_LOG("  id:           0x%08x\n", id);
    NN_LOG("  severity:     0x%08x\n", severity);
    NN_LOG("  message:      %s\n",     message);
    NN_ASSERT(false);
}

TutorialBaseClass* t()
{
    static FrameBufferMemoryManagement tut;
    return (&tut);
}

extern "C" void nnMain()
{
    TutorialRun();
}
