﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <cmath>
#include <cstdio>
#include <nvn/nvn_FuncPtrInline.h>
#include <nvn/nvn_FuncPtrImpl.h>

#include <nn/fs.h>
#include <nn/util/util_Matrix.h>
#include <nn/util/util_Vector.h>
#include <nn/nn_SdkAssert.h>
#include <nn/nn_Log.h>
#include <nn/os.h>
#include <nn/os/os_Thread.h>
#include <nn/os/os_Mutex.h>
#ifdef _WIN32 // For nvn debugger dll loading
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <nn/nn_Windows.h>
#endif

#include <string>

#include <nvngdSupport/UniformBufferManager.h>
#include <nvngdSupport/TutorialBaseClass.h>
#include <nvngdSupport/ShaderTypeEnum.h>
#include <nvngdSupport/AssetFileLoadingHelper.h>
#include <nvngdSupport/TextureIDManager.h>
#include <nvngdSupport/ManagedCommandBuffer.h>
#include <nvngdSupport/FrameBufferManager.h>
#include <lopProfiler/LOP_Profiler.h>

class ComputeInfo
{
public:
    ComputeInfo();
    ~ComputeInfo();

    // Program + UBO
    NVNprogram                              m_ComputeProgram;
    MemoryPool*                             m_pComputeShaderBufferMemoryPool;

    NVNbuffer                               m_ComputeShaderBuffer;
    NVNshaderData                           m_ComputeShaderData;

    // Particle info
    NVNbuffer                               m_ParticlePositions;
    MemoryPool*                             m_pPositionMemoryPool;
    NVNbuffer                               m_ParticleVelocities;
    MemoryPool*                             m_pVelocityMemoryPool;

    void Dispatch(NVNcommandBuffer* pCmdBuf);
    void RandomizeData(float* data, size_t length, float modifier = 1.0f);
};

static const int    g_NumParticles = 64;
static const int    g_ParticleBufferSize = 8 * 1024;

static const float  g_Pi = 3.14159f;
static const float  g_ToRad = g_Pi / 180.0f;

static const int    g_NumChunks = 2;

static const size_t g_CommandMemoryChunkSize = 16 * 1024 * 1024;
static const size_t g_ControlMemoryChunkSize = 512 * 1024;

static const size_t g_UniformBufferManagerMemorySize = 256 * 1024;

static const int    g_NumColorBuffers = 2;
char*               g_MountRomCacheBuffer = NULL;

#define TEST_LOP

class testLop_Compute : public TutorialBaseClass
{
    NN_DISALLOW_COPY(testLop_Compute);

public:
    testLop_Compute();
    virtual ~testLop_Compute();
    virtual void Init(PFNNVNBOOTSTRAPLOADERPROC pLoader, NVNnativeWindow nativeWindow);
    virtual void Shutdown();

    virtual void Draw(uint64_t millisec);
    virtual bool TestDraw(uint64_t millisec);
    virtual void Resize(int width, int height);

    void SetContinueRendering(bool c)
    {
        m_ContinueRendering = c;
    }

    bool ContinueRendering()
    {
        return m_ContinueRendering;
    }

    std::vector<std::pair<std::string, double> >& GetMetricValues()
    {
        return m_MetricValues;
    }

private:
    static void NVNAPIENTRY DebugLayerCallback(
        NVNdebugCallbackSource source,
        NVNdebugCallbackType type,
        int id,
        NVNdebugCallbackSeverity severity,
        const char* message,
        void* user
    );

    void SetupUniformBlock();
    void UpdateSimpleTexturedModelUniformBlock(ManagedUniformBuffer* pVertexUniformBuffer, ManagedUniformBuffer* pFragmentUniformBuffer, nn::util::Matrix4x4fType& modelMatrix, nn::util::Matrix4x4fType& projectionMatrix, NVNtextureHandle handle);

    int PopulateCommandBuffer();
    int UpdateRenderTargets();
    void InitComputeShader();

    NVNdevice                            m_Device;
    NVNqueue                             m_Queue;
    void*                                m_pQueueMemory;

    ManagedCommandBuffer*                m_pManagedCommandBuffer;
    NVNcommandHandle                     m_CommandHandle;

    NVNtextureBuilder                    m_RenderTargetBuilder;
    NVNtexture*                          m_RenderTargets[g_NumColorBuffers];
    NVNtexture*                          m_pDepthBuffer;
    NVNsamplerBuilder                    m_SamplerBuilder;
    NVNbufferBuilder                     m_BufferBuilder;

    AssetFileLoadingHelper*              m_pAssetLoader;
    AssetFileDataHolder*                 m_pDataHolder;

    int                                  m_ScreenWidth;
    int                                  m_ScreenHeight;

    NVNblendState                        m_BlendState;
    NVNchannelMaskState                  m_ChannelMaskState;
    NVNcolorState                        m_ColorState;
    NVNdepthStencilState                 m_DepthStencilState;
    NVNmultisampleState                  m_MultisampleState;
    NVNpolygonState                      m_PolygonState;

    UniformBufferManager*                m_pUniformBufferManager;
    ManagedUniformBuffer*                m_ManagedVertexUniformBuffers[g_NumParticles];
    ManagedUniformBuffer*                m_ManagedFragmentUniformBuffers[g_NumParticles];

    DebugTextRenderer*                   m_pDebugTextRenderer;
    FrameBufferedSyncManager*            m_pFrameBufferedSyncManager;

    TextureIDManager*                    m_pTextureIDManager;

    MemoryPool*                          m_pRenderTargetMemoryPool;

    NVNwindow*                           m_pWindow;
    NVNwindowBuilder                     m_WindowBuilder;
    int                                  m_CurrentWindowIndex;
    NVNsync                              m_WindowSync;

    float                                m_RotY;

    size_t                               m_ColorTargetSize;
    size_t                               m_DepthTargetSize;
    ComputeInfo*                         m_ComputeInfo;

#ifdef USE_LOP
    lop::LOP_Profiler*                   m_pProfiler;
    std::vector<uint8_t>                 m_ConfigImage;
    std::vector<lop::MetricSpec>         m_SelectedMetrics;
    std::vector<std::pair<std::string, double> >    m_MetricValues;
    bool                                            m_ContinueRendering;
#endif
};

/*
* FrameBufferMemoryManagement Constructor
* -----------------------------------------
* Sets up default values for member data.
*/
testLop_Compute::testLop_Compute() :
    m_pQueueMemory(NULL),
    m_pManagedCommandBuffer(NULL),
    m_CommandHandle(0),
    m_pDepthBuffer(NULL),
    m_pAssetLoader(NULL),
    m_pDataHolder(NULL),
    m_ScreenWidth(0),
    m_ScreenHeight(0),
    m_pUniformBufferManager(NULL),
    m_pDebugTextRenderer(NULL),
    m_pFrameBufferedSyncManager(NULL),
    m_pTextureIDManager(NULL),
    m_pRenderTargetMemoryPool(NULL),
    m_pWindow(NULL),
    m_CurrentWindowIndex(-1),
    m_RotY(0.0f),
    m_ColorTargetSize(0),
    m_DepthTargetSize(0),
    m_ComputeInfo(NULL)
#ifdef USE_LOP
    , m_pProfiler(NULL)
    , m_ContinueRendering(true)
#endif
{
    for (int i = 0; i < g_NumColorBuffers; ++i)
    {
        m_RenderTargets[i] = NULL;
    }

    for (int i = 0; i < g_NumParticles; ++i)
    {
        m_ManagedVertexUniformBuffers[i] = NULL;
        m_ManagedFragmentUniformBuffers[i] = NULL;
    }
}

/*
* FrameBufferMemoryManagement::SetupUniformBlock
* ------------------------------------------------
* Setup the managed uniform blocks with the appropriate size.
*/
void testLop_Compute::SetupUniformBlock()
{
    for (int i = 0; i < g_NumParticles; ++i)
    {
        m_ManagedVertexUniformBuffers[i] = m_pUniformBufferManager->CreateUniformBuffer(sizeof(SimpleTexturedModel::BlockVSUniformBlockData));
        m_ManagedFragmentUniformBuffers[i] = m_pUniformBufferManager->CreateUniformBuffer(sizeof(SimpleTexturedModel::BlockFSUniformBlockData));
    }
}

/*
* FrameBufferMemoryManagement::UpdateSimpleTexturedModelUniformBlock
* --------------------------------------------------------------------
* Update the uniform block data.
*/
void testLop_Compute::UpdateSimpleTexturedModelUniformBlock(ManagedUniformBuffer* pVertexUniformBuffer, ManagedUniformBuffer* pFragmentUniformBuffer, nn::util::Matrix4x4fType& modelMatrix, nn::util::Matrix4x4fType& projectionMatrix, NVNtextureHandle handle)
{
    /* Update Vertex shader uniform buffer. */

    /* Get the current mapped point fom the managed buffer. */
    void* vertexPoolMap = pVertexUniformBuffer->GetMappedPointer();

    SimpleTexturedModel::BlockVSUniformBlockData vertexShaderData;

    nn::util::Float4x4 temp;
    nn::util::MatrixStore(&temp, modelMatrix);
    vertexShaderData.SetUniform_u_modelMtx(*reinterpret_cast<float(*)[16]>(&temp));

    nn::util::Matrix4x4fType cameraMat;
    nn::util::MatrixIdentity(&cameraMat);
    nn::util::MatrixStore(&temp, cameraMat);
    vertexShaderData.SetUniform_u_viewMtx(*reinterpret_cast<float(*)[16]>(&temp));

    nn::util::MatrixStore(&temp, projectionMatrix);
    vertexShaderData.SetUniform_u_projMtx(*reinterpret_cast<float(*)[16]>(&temp));

    /* Copy in the uniform data. */
    memcpy(vertexPoolMap, &vertexShaderData, sizeof(SimpleTexturedModel::BlockVSUniformBlockData));

    /* Update Fragment shader uniform buffer. */

    /* Get the current mapped point fom the managed buffer. */
    void* fragmentPoolMap = pFragmentUniformBuffer->GetMappedPointer();

    SimpleTexturedModel::BlockFSUniformBlockData fragmentShaderData;
    fragmentShaderData.SetUniform_u_bindlessTex(handle);

    /* Copy in the uniform data. */
    memcpy(fragmentPoolMap, &fragmentShaderData, sizeof(SimpleTexturedModel::BlockFSUniformBlockData));
}

/*
* FrameBufferMemoryManagement Destructor
* ----------------------------------------
* Empty destructor.
*/
testLop_Compute::~testLop_Compute()
{
}

#ifndef _WIN32
//#define NVN_GRAPHICS_DEBUGGER
#endif

#ifdef NVN_GRAPHICS_DEBUGGER
extern "C" PFNNVNGENERICFUNCPTRPROC nvnDeviceGetToolsProcAddress(NVNdevice *device, const char* name);
#endif

/*!
* FrameBufferMemoryManagement::Init
* ----------------------
* Initialize NVN, load asset files, and create objects needed for the
* application to run.
*/
void testLop_Compute::Init(PFNNVNBOOTSTRAPLOADERPROC pLoader, NVNnativeWindow nativeWindow)
{
#ifdef NVN_GRAPHICS_DEBUGGER
    /* Load procs from NVNGD instead of from the driver, this must be called once prior to nvnDeviceInitialize. */
    nvnLoadCProcs(nullptr, (PFNNVNDEVICEGETPROCADDRESSPROC)nvnDeviceGetToolsProcAddress);

    NVNdeviceBuilder deviceBuilder;
    nvnDeviceBuilderSetDefaults(&deviceBuilder);

    int deviceFlags = NVN_DEVICE_FLAG_DEBUG_ENABLE_LEVEL_4_BIT;

    nvnDeviceBuilderSetFlags(&deviceBuilder, deviceFlags);

    if (nvnDeviceInitialize(&m_Device, &deviceBuilder) == false)
    {
        NN_ASSERT(0, "nvnDeviceInitialize");
    }

    nvnDeviceSetDebugLabel(&m_Device, "Tutorial03_Device");

    /* Never call nvnLoadCProcs after nvnDeviceInitialize. */
#else /* NVN_GRAPHICS_DEBUGGER */
    pfnc_nvnDeviceInitialize = reinterpret_cast<PFNNVNDEVICEINITIALIZEPROC>((*pLoader)("nvnDeviceInitialize"));
    pfnc_nvnDeviceGetProcAddress = reinterpret_cast<PFNNVNDEVICEGETPROCADDRESSPROC>((*pLoader)("nvnDeviceGetProcAddress"));
    if (!pfnc_nvnDeviceInitialize)
    {
        /* This can happen if an NVN driver is not installed on a Windows PC. */
        NN_ASSERT(0, "BootstrapLoader failed to find nvnDeviceInitialize");
    }

    nvnLoadCProcs(NULL, pfnc_nvnDeviceGetProcAddress);

    int MajorVersion, MinorVersion;
    nvnDeviceGetInteger(NULL, NVN_DEVICE_INFO_API_MAJOR_VERSION, &MajorVersion);
    nvnDeviceGetInteger(NULL, NVN_DEVICE_INFO_API_MINOR_VERSION, &MinorVersion);

    if (MajorVersion != NVN_API_MAJOR_VERSION || MinorVersion < NVN_API_MINOR_VERSION)
    {
        NN_ASSERT(0, "NVN SDK not supported by current driver.");
    }

    /* If debug or develop is enabled, turn on NVN's debug layer. */
    int deviceFlags = 0;
#if defined(NN_SDK_BUILD_DEBUG) || defined(NN_SDK_BUILD_DEVELOP)
    deviceFlags = NVN_DEVICE_FLAG_DEBUG_ENABLE_LEVEL_4_BIT;
#endif

    NVNdeviceBuilder deviceBuilder;
    nvnDeviceBuilderSetDefaults(&deviceBuilder);
    nvnDeviceBuilderSetFlags(&deviceBuilder, deviceFlags);

    if (nvnDeviceInitialize(&m_Device, &deviceBuilder) == false)
    {
        /*
        * This can fail for a few reasons; the most likely on Horizon is
        * insufficent device memory.
        */
        NN_ASSERT(0, "nvnDeviceInitialize");
    }

    nvnLoadCProcs(&m_Device, pfnc_nvnDeviceGetProcAddress);
#endif /* NVN_GRAPHICS_DEBUGGER */
    /* Setup the debug callback for the debug layer. */
    if (deviceFlags & NVN_DEVICE_FLAG_DEBUG_ENABLE_LEVEL_4_BIT)
    {
        nvnDeviceInstallDebugCallback(
            &m_Device,
            reinterpret_cast<PFNNVNDEBUGCALLBACKPROC>(&DebugLayerCallback),
            NULL, // For testing purposes; any pointer is OK here.
            NVN_TRUE // NVN_TRUE = Enable the callback.
        );
    }

    /* Initialize the queue. */
    NVNqueueBuilder queueBuilder;
    nvnQueueBuilderSetDevice(&queueBuilder, &m_Device);
    nvnQueueBuilderSetDefaults(&queueBuilder);
    int computeMemoryNeeded;
    nvnDeviceGetInteger(&m_Device, NVNdeviceInfo::NVN_DEVICE_INFO_QUEUE_COMPUTE_MEMORY_MIN_SIZE, &computeMemoryNeeded);
    nvnQueueBuilderSetComputeMemorySize(&queueBuilder, computeMemoryNeeded);

    int minQueueCommandMemorySize = 0;
    nvnDeviceGetInteger(&m_Device, NVN_DEVICE_INFO_QUEUE_COMMAND_MEMORY_MIN_SIZE, &minQueueCommandMemorySize);
    nvnQueueBuilderSetCommandMemorySize(&queueBuilder, minQueueCommandMemorySize);
    nvnQueueBuilderSetCommandFlushThreshold(&queueBuilder, minQueueCommandMemorySize);

    size_t neededQueueMemorySize = nvnQueueBuilderGetQueueMemorySize(&queueBuilder);

    if ((neededQueueMemorySize % NVN_MEMORY_POOL_STORAGE_GRANULARITY) != 0)
    {
        NN_ASSERT(0, "Memory size reported for queue is not the proper granularity");
    }

#if defined( NN_BUILD_TARGET_PLATFORM_OS_WIN )
    m_pQueueMemory = NULL;
#else
    m_pQueueMemory = AlignedAllocate(neededQueueMemorySize, NVN_MEMORY_POOL_STORAGE_ALIGNMENT);
#endif

    nvnQueueBuilderSetQueueMemory(&queueBuilder, m_pQueueMemory, neededQueueMemorySize);

    if (nvnQueueInitialize(&m_Queue, &queueBuilder) == false)
    {
        NN_ASSERT(0, "nvnQueueInitialize failed");
    }

    /*! Initialize the window sync. */
    if (!nvnSyncInitialize(&m_WindowSync, &m_Device))
    {
        NN_ASSERT(0, "Failed to initialize window sync");
    }

    m_pManagedCommandBuffer = new ManagedCommandBuffer(&m_Device, g_CommandMemoryChunkSize, g_ControlMemoryChunkSize, g_NumChunks);

    /* Blend State */
    nvnBlendStateSetDefaults(&m_BlendState);

    /* Channel Mask State */
    nvnChannelMaskStateSetDefaults(&m_ChannelMaskState);

    /* Color State */
    nvnColorStateSetDefaults(&m_ColorState);

    /* Depth Stencil State */
    nvnDepthStencilStateSetDefaults(&m_DepthStencilState);

    nvnDepthStencilStateSetDepthTestEnable(&m_DepthStencilState, NVN_TRUE);
    nvnDepthStencilStateSetDepthWriteEnable(&m_DepthStencilState, NVN_TRUE);
    nvnDepthStencilStateSetDepthFunc(&m_DepthStencilState, NVNdepthFunc::NVN_DEPTH_FUNC_LESS);

    /* Multisample state */
    nvnMultisampleStateSetDefaults(&m_MultisampleState);

    /* Polygon state */
    nvnPolygonStateSetDefaults(&m_PolygonState);
    nvnPolygonStateSetFrontFace(&m_PolygonState, NVNfrontFace::NVN_FRONT_FACE_CCW);
    nvnPolygonStateSetCullFace(&m_PolygonState, NVNface::NVN_FACE_BACK);
    nvnPolygonStateSetPolygonMode(&m_PolygonState, NVNpolygonMode::NVN_POLYGON_MODE_FILL);

    /* Builders */
    nvnSamplerBuilderSetDevice(&m_SamplerBuilder, &m_Device);
    nvnSamplerBuilderSetDefaults(&m_SamplerBuilder);

    nvnBufferBuilderSetDevice(&m_BufferBuilder, &m_Device);
    nvnBufferBuilderSetDefaults(&m_BufferBuilder);

    /* Set up the texture builder for the render target. */
    nvnTextureBuilderSetDevice(&m_RenderTargetBuilder, &m_Device);
    nvnTextureBuilderSetDefaults(&m_RenderTargetBuilder);
    nvnTextureBuilderSetFlags(&m_RenderTargetBuilder, NVN_TEXTURE_FLAGS_DISPLAY_BIT | NVN_TEXTURE_FLAGS_COMPRESSIBLE_BIT);
    nvnTextureBuilderSetSize2D(&m_RenderTargetBuilder, 1280, 720);
    nvnTextureBuilderSetTarget(&m_RenderTargetBuilder, NVN_TEXTURE_TARGET_2D);
    nvnTextureBuilderSetFormat(&m_RenderTargetBuilder, NVN_FORMAT_RGBA8);
    m_ColorTargetSize = nvnTextureBuilderGetStorageSize(&m_RenderTargetBuilder);

    nvnTextureBuilderSetDefaults(&m_RenderTargetBuilder);
    nvnTextureBuilderSetFlags(&m_RenderTargetBuilder, NVN_TEXTURE_FLAGS_COMPRESSIBLE_BIT);
    nvnTextureBuilderSetSize2D(&m_RenderTargetBuilder, 1280, 720);
    nvnTextureBuilderSetTarget(&m_RenderTargetBuilder, NVN_TEXTURE_TARGET_2D);
    nvnTextureBuilderSetFormat(&m_RenderTargetBuilder, NVN_FORMAT_DEPTH32F);
    m_DepthTargetSize = nvnTextureBuilderGetStorageSize(&m_RenderTargetBuilder);

    /* Allocate the render target memory. */
    m_pRenderTargetMemoryPool = new MemoryPool();
    m_pRenderTargetMemoryPool->Init(
        NULL,
        m_ColorTargetSize * g_NumColorBuffers + m_DepthTargetSize,
        NVN_MEMORY_POOL_FLAGS_CPU_NO_ACCESS_BIT | NVN_MEMORY_POOL_FLAGS_GPU_CACHED_BIT | NVN_MEMORY_POOL_FLAGS_COMPRESSIBLE_BIT,
        &m_Device);

    m_pTextureIDManager = new TextureIDManager(&m_Device);

    m_pUniformBufferManager = new UniformBufferManager(&m_Device, g_UniformBufferManagerMemorySize, g_NumChunks);
    SetupUniformBlock();

    m_pFrameBufferedSyncManager = new FrameBufferedSyncManager(&m_Device, &m_Queue, g_NumChunks);
    m_pFrameBufferedSyncManager->RegisterMemoryManager(m_pUniformBufferManager);
    m_pFrameBufferedSyncManager->RegisterMemoryManager(m_pManagedCommandBuffer);

    /* Load assets. */
    size_t cacheSize = 0;
    nn::Result result = nn::fs::QueryMountRomCacheSize(&cacheSize);
    NN_ASSERT(result.IsSuccess());

    g_MountRomCacheBuffer = new(std::nothrow) char[cacheSize];
    NN_ASSERT_NOT_NULL(g_MountRomCacheBuffer);

    result = nn::fs::MountRom("rom", g_MountRomCacheBuffer, cacheSize);
    NN_ABORT_UNLESS_RESULT_SUCCESS(result);

    /*! Create a new asset loader. */
    m_pAssetLoader = new AssetFileLoadingHelper(&m_Device, m_pTextureIDManager);

    /*! Load in the asset file for the tutorial. */
    m_pDataHolder = m_pAssetLoader->LoadAssetFile("cube.out");
    m_pDataHolder->SetupAttributeStatesNVN(SimpleTexturedModel::Attributes::GetAttributeLocation);
    m_pDataHolder->SetupTextureSamplerHandle(&m_Device, m_pTextureIDManager, &m_SamplerBuilder);

    /////////////////////////////
    /////////////////////////////
    // Debug Text Renderer
    /////////////////////////////
    /////////////////////////////

    m_pDebugTextRenderer = DebugTextRenderer::GetInstance();

    m_pDebugTextRenderer->Init(&m_Device, &m_Queue, m_pTextureIDManager, m_pFrameBufferedSyncManager);
    m_pDebugTextRenderer->SetDrawEnable(true);
    m_pDebugTextRenderer->SetSpacing(false);
    m_pDebugTextRenderer->SetGridSize(120.0f, 36.0f);

#ifdef USE_LOP
    /////////////////////////////
    /////////////////////////////
    // Profiler
    /////////////////////////////
    /////////////////////////////

    void* place = AlignedAllocate(sizeof(lop::LOP_Profiler), NN_ALIGNOF(lop::LOP_Profiler));
    m_pProfiler = new(place) lop::LOP_Profiler();
    m_pProfiler->Initialize(&m_Device, &m_Queue);
    m_pProfiler->InitializeChipDesc();

    // Basic 3D Activity
    static const char* const ppMetricNames[] = {
        "gr__cycles_active",
        "mmu__cycles_active",

        "gpmsd__input_active_compute",
        "gpu__dispatch_count",
        "sked__dispatch_count",
        "sked__dispatch_active_scg0",

        "sm__cycles_active",
        "sm__cycles_active_3d",
        "sm__cycles_active_3d_ps",

        "sm__cycles_active_compute",
        "sm__cycles_active_cs",
        "sm__warps_active_cs",
        "sm__warps_launched_cs",
        "sm__threads_launched_cs",
        "smsp__inst_executed_cs",

        nullptr
    };

    int metricPrint[] = {
        lop::MetricPrint_Avg,                                               // gr__cycles_active
        lop::MetricPrint_Avg | lop::MetricPrint_Pct_Peak_Sustained_Elapsed, // mmu__cycles_active

        lop::MetricPrint_Avg,                                               // gpmsd__input_active_compute
        lop::MetricPrint_Avg,                                               // gpu__dispatch_count
        lop::MetricPrint_Avg,                                               // sked__dispatch_count
        lop::MetricPrint_Avg,                                               // sked__dispatch_active_scg0

        lop::MetricPrint_Avg | lop::MetricPrint_Pct_Peak_Sustained_Elapsed, // sm__cycles_active
        lop::MetricPrint_Avg | lop::MetricPrint_Pct_Peak_Sustained_Elapsed, // sm__cycles_active_3d
        lop::MetricPrint_Avg | lop::MetricPrint_Pct_Peak_Sustained_Elapsed, // sm__cycles_active_3d_ps

        lop::MetricPrint_Avg,                                               // sm__cycles_active_compute
        lop::MetricPrint_Avg,                                               // sm__cycles_active_cs
        lop::MetricPrint_Sum | lop::MetricPrint_Avg,                        // sm__warps_active_cs
        lop::MetricPrint_Sum | lop::MetricPrint_Avg,                        // sm__warps_launched_cs
        lop::MetricPrint_Sum | lop::MetricPrint_Avg,                        // sm__threads_launched_cs
        lop::MetricPrint_Sum | lop::MetricPrint_Avg,                        // smsp__inst_executed_cs
    };

    NN_ASSERT(m_pProfiler->GenerateMetricSpecs(ppMetricNames, metricPrint, m_SelectedMetrics));
    m_pProfiler->PrepareOfflineData(m_ConfigImage, m_SelectedMetrics);

    m_pProfiler->BeginSession(m_ConfigImage);

    /////////////////////////////
    /////////////////////////////
    // Profiler
    /////////////////////////////
    /////////////////////////////
#endif

    nvnWindowBuilderSetDefaults(&m_WindowBuilder);
    nvnWindowBuilderSetDevice(&m_WindowBuilder, &m_Device);
    nvnWindowBuilderSetNativeWindow(&m_WindowBuilder, nativeWindow);
}//NOLINT(impl/function_size)

void testLop_Compute::Shutdown()
{
#ifdef USE_LOP
    if (m_pProfiler)
    {
        m_pProfiler->EndSession();
        AlignedDeallocate(m_pProfiler);
    }
#endif

    nvnQueueFinish(&m_Queue);
    nvnSyncFinalize(&m_WindowSync);

    if (m_pDebugTextRenderer != NULL)
    {
        m_pDebugTextRenderer->CleanUp();
        m_pDebugTextRenderer = NULL;
    }

    if (m_pManagedCommandBuffer != NULL)
    {
        delete m_pManagedCommandBuffer;
        m_pManagedCommandBuffer = NULL;
    }

    if (m_pWindow != NULL)
    {
        nvnWindowFinalize(m_pWindow);
        delete m_pWindow;
        m_pWindow = NULL;
    }

    for (int i = 0; i < g_NumColorBuffers; ++i)
    {
        if (m_RenderTargets[i] != NULL)
        {
            nvnTextureFinalize(m_RenderTargets[i]);
            delete m_RenderTargets[i];
            m_RenderTargets[i] = NULL;
        }
    }

    if (m_pDepthBuffer != NULL)
    {
        nvnTextureFinalize(m_pDepthBuffer);
        delete m_pDepthBuffer;
        m_pDepthBuffer = NULL;
    }

    if (m_pTextureIDManager != NULL)
    {
        delete m_pTextureIDManager;
        m_pTextureIDManager = NULL;
    }

    if (m_pUniformBufferManager != NULL)
    {
        delete m_pUniformBufferManager;
        m_pUniformBufferManager = NULL;
    }

    if (m_pFrameBufferedSyncManager != NULL)
    {
        delete m_pFrameBufferedSyncManager;
        m_pFrameBufferedSyncManager = NULL;
    }

    if (m_pDataHolder != NULL)
    {
        delete m_pDataHolder;
        m_pDataHolder = NULL;
    }

    if (m_pAssetLoader != NULL)
    {
        delete m_pAssetLoader;
        m_pAssetLoader = NULL;
    }

    if (m_ComputeInfo != NULL)
    {
        AlignedDeallocate(m_ComputeInfo);
        m_ComputeInfo = NULL;
    }

    if (m_pRenderTargetMemoryPool)
    {
        m_pRenderTargetMemoryPool->Shutdown();
        delete m_pRenderTargetMemoryPool;
        m_pRenderTargetMemoryPool = NULL;
    }

    nvnQueueFinalize(&m_Queue);
    if (m_pQueueMemory)
    {
        AlignedDeallocate(m_pQueueMemory);
        m_pQueueMemory = NULL;
    }

    nvnDeviceFinalize(&m_Device);
}

void testLop_Compute::Draw(uint64_t /*millisec*/)
{
    /*
     * Get the current render target and setup/submit a command buffer to set it.
     * Create the command buffer to render the cubes.
     */
    int currentRenderTargetIndex = PopulateCommandBuffer();

    nn::util::Matrix4x4fType projMat44;
    nn::util::MatrixPerspectiveFieldOfViewRightHanded(&projMat44, 60.0f * g_ToRad, static_cast<float>(m_ScreenWidth) / static_cast<float>(m_ScreenHeight), 1.0f, 1000.0f);

    float* positions = reinterpret_cast<float*>(nvnBufferMap(&m_ComputeInfo->m_ParticlePositions));

    /* Update the uniform data. */
    for (int i = 0; i < g_NumParticles; ++i)
    {
        nn::util::Matrix4x3fType simpleTranslateMat;
        nn::util::MatrixIdentity(&simpleTranslateMat);
        nn::util::Vector3fType simpleTranslateVector;
        nn::util::VectorSet(&simpleTranslateVector, positions[0 + (i * 4)], positions[1 + (i * 4)], positions[2 + (i * 4)] - 3.0f);
        nn::util::MatrixSetTranslate(&simpleTranslateMat, simpleTranslateVector);

        nn::util::Matrix4x3fType simpleRotateXMat;
        nn::util::MatrixIdentity(&simpleRotateXMat);
        nn::util::Vector3fType simpleRotateXVector;
        nn::util::VectorSet(&simpleRotateXVector, 30.0f * g_ToRad, 0.0f, 0.0f);
        nn::util::MatrixSetRotateXyz(&simpleRotateXMat, simpleRotateXVector);

        nn::util::Matrix4x3fType simpleRotateYMat;
        nn::util::MatrixIdentity(&simpleRotateYMat);
        nn::util::Vector3fType simpleRotateYVector;
        nn::util::VectorSet(&simpleRotateYVector, 0.0f, m_RotY * (i + 1) * g_ToRad, 0.0f);
        nn::util::MatrixSetRotateXyz(&simpleRotateYMat, simpleRotateYVector);

        nn::util::Matrix4x3fType simpleScaleMat;
        nn::util::MatrixIdentity(&simpleScaleMat);
        nn::util::Vector3fType simpleScaleVector;
        nn::util::VectorSet(&simpleScaleVector, 0.05f, 0.05f, 0.05f);
        nn::util::MatrixSetScale(&simpleScaleMat, simpleScaleVector);

        nn::util::Matrix4x3fType simpleModelMat;
        nn::util::Matrix4x3fType tempMat1;
        nn::util::Matrix4x3fType tempMat2;
        nn::util::MatrixMultiply(&tempMat1, simpleScaleMat, simpleRotateYMat);
        nn::util::MatrixMultiply(&tempMat2, tempMat1, simpleRotateXMat);
        nn::util::MatrixMultiply(&simpleModelMat, tempMat2, simpleTranslateMat);

        nn::util::Matrix4x4fType simpleModelMat44;
        nn::util::MatrixConvert(&simpleModelMat44, simpleModelMat);

        std::vector<NVNTextureData*>& textureDataSimple = m_pDataHolder->GetTextureData();
        UpdateSimpleTexturedModelUniformBlock(m_ManagedVertexUniformBuffers[i], m_ManagedFragmentUniformBuffers[i], simpleModelMat44, projMat44, textureDataSimple[0]->m_TextureHandle);
    }

    /*
    * Insert a fence in the sync manager to track
    * when the current chunk in the multi buffer
    * is done being used. This sync is checked at a
    * later frame to minimize waiting.
    */
    m_pFrameBufferedSyncManager->InsertFence();

    /*!
    * Wait on sync that was received in UpdateRenderTargets now that we are
    * actually ready to use the render target
    */
    nvnSyncWait(&m_WindowSync, NVN_WAIT_TIMEOUT_MAXIMUM);

    /*! Submit the command buffer to render the cube. */
#ifdef USE_LOP
    NVPA_Bool allPassesDecoded = false;
    static bool first = true;
    int runs = 0;
    do
    {
        m_pProfiler->BeginPass();
        {
            /* Submit the commands to the queue. */
            nvnQueueSubmitCommands(&m_Queue, 1, &m_CommandHandle);
        }
        m_pProfiler->EndPass();
        nvnQueueFinish(&m_Queue);
        m_pProfiler->DecodeCounters(m_ConfigImage, allPassesDecoded);

        ++runs;

        if (allPassesDecoded)
        {
#ifdef TEST_LOP
            m_pProfiler->UnpackRawMetrics_Test(m_SelectedMetrics, m_MetricValues);
            m_ContinueRendering = false;
#else
            m_pDebugTextRenderer->SetColor(1.0f, 1.0f, 1.0f, 1.0f);
            m_pDebugTextRenderer->Printf(0, 0, "Runs: %i", runs);

            std::string outputString = "%-35s = %8.0f\n";
            m_pProfiler->UnpackRawMetrics(m_SelectedMetrics, false, m_pDebugTextRenderer, outputString, 1, false);
#endif
            m_pProfiler->ProfilerInitializeCounterData();
            break;
        }
    } while (!allPassesDecoded && !first);

    first = false;
#ifndef TEST_LOP
    m_pDebugTextRenderer->Draw(m_RenderTargets[currentRenderTargetIndex], m_ScreenWidth, m_ScreenHeight);
#endif
#else
    nvnQueueSubmitCommands(&m_Queue, 1, &m_CommandHandle);
#endif

    /* Present the texture to the screen. */
    nvnQueuePresentTexture(&m_Queue, m_pWindow, currentRenderTargetIndex);

    /* Swap the multi buffered memory. */
    m_pFrameBufferedSyncManager->SwapPools();
}

void DrawHelper(testLop_Compute* com)
{
    std::vector<std::pair<std::string, double> >& metricValues = com->GetMetricValues();
    metricValues.clear();

    while (com->ContinueRendering())
    {
        com->Draw(0);
    }

    com->SetContinueRendering(true);
}

TEST(ComputeTest, test_Compute)
{
    testLop_Compute* com = reinterpret_cast<testLop_Compute*>(t());
    std::vector<std::pair<std::string, double> >& metricValues = com->GetMetricValues();

    DrawHelper(com);

    ASSERT_EQ(1.0, metricValues[4].second);
    ASSERT_EQ(1.0, metricValues[5].second);

    ASSERT_EQ(static_cast<double>(g_NumParticles), metricValues[17].second);
    ASSERT_EQ(metricValues[17].second * 16.0, metricValues[19].second);
}

bool testLop_Compute::TestDraw(uint64_t /*millisec*/)
{
#ifdef TEST_LOP
    int result = RUN_ALL_TESTS();
    m_pProfiler->EndSession();
    nnt::Exit(result);
#else
    Draw(0);
#endif
    return true;
}

int testLop_Compute::UpdateRenderTargets()
{
    /* Get next render target to be used */
    NVNwindowAcquireTextureResult result = nvnWindowAcquireTexture(m_pWindow, &m_WindowSync, &m_CurrentWindowIndex);

    NN_ASSERT(result == NVN_WINDOW_ACQUIRE_TEXTURE_RESULT_SUCCESS);

    /* Record the command buffer to set the target. */
    nvnCommandBufferSetRenderTargets(m_pManagedCommandBuffer->GetCommandBuffer(), 1, &m_RenderTargets[m_CurrentWindowIndex], NULL, m_pDepthBuffer, NULL);

    /* Return the index. */
    return m_CurrentWindowIndex;
}

void testLop_Compute::InitComputeShader()
{
        // Load + Compile
    char* pShaderSources;

    nn::fs::FileHandle computeShaderFileHandle;
    nn::Result result = nn::fs::OpenFile(&computeShaderFileHandle, "rom:/testComputeShader.cs", nn::fs::OpenMode_Read);
    NN_ASSERT(result.IsSuccess());

    int64_t computeFileSize = 0;
    result = nn::fs::GetFileSize(&computeFileSize, computeShaderFileHandle);
    NN_ASSERT(result.IsSuccess());

    pShaderSources = reinterpret_cast<char*>(malloc(static_cast<size_t>(computeFileSize + 1)));
    memset(pShaderSources, 0, static_cast<size_t>(computeFileSize + 1));

    size_t computeReadBytes = 0;
    result = nn::fs::ReadFile(&computeReadBytes, computeShaderFileHandle, 0, pShaderSources, static_cast<size_t>(computeFileSize));
    NN_ASSERT(result.IsSuccess());

    GLSLCcompileObject compileObject;
    uint8_t initializeResult = glslcInitialize(&compileObject);
    NN_ASSERT(initializeResult == 1, "Failed to initialize glslc compile object\n");

    GLSLCinput& glslcInput = compileObject.input;
    glslcInput.sources = &pShaderSources;
    glslcInput.count = 1;
    NVNshaderStage stage = NVN_SHADER_STAGE_COMPUTE;
    glslcInput.stages = &stage;

        /* Various options available for compiling the shaders */
    GLSLCoptions& glslcOptions = compileObject.options;
    glslcOptions.optionFlags.glslSeparable = true;
    glslcOptions.optionFlags.outputAssembly = false;
    glslcOptions.optionFlags.outputGpuBinaries = true;
    glslcOptions.optionFlags.outputPerfStats = true;
    glslcOptions.optionFlags.outputShaderReflection = true;
    glslcOptions.optionFlags.outputDebugInfo = GLSLC_DEBUG_LEVEL_G0;
#ifdef _WIN32
    glslcOptions.optionFlags.outputThinGpuBinaries = 0;
#else
    glslcOptions.optionFlags.outputThinGpuBinaries = 1;
#endif

    uint8_t compileRet = glslcCompile(&compileObject);
    if (compileObject.lastCompiledResults->compilationStatus->infoLog)
    {
        NN_LOG(compileObject.lastCompiledResults->compilationStatus->infoLog);
    }
    NN_ASSERT(compileRet && compileObject.lastCompiledResults->compilationStatus->success == 1);

    // Post compile + Uniform
    GLSLCoutput* pGlslcOutput = compileObject.lastCompiledResults->glslcOutput;

    const GLSLCgpuCodeHeader& pGpuHeader = (pGlslcOutput->headers[0].gpuCodeHeader);
    const uint8_t* pData = (reinterpret_cast<const uint8_t*>(pGlslcOutput)) + pGpuHeader.common.dataOffset;

    // Shader Program
    char* shaderDataArray = reinterpret_cast<char*>(AlignedAllocate(4096, NVN_MEMORY_POOL_STORAGE_ALIGNMENT));

    memcpy(shaderDataArray, const_cast<uint8_t*>(pData + pGpuHeader.dataOffset), pGpuHeader.dataSize);

    m_ComputeInfo = (ComputeInfo*)AlignedAllocate(sizeof(ComputeInfo), 32);// new ComputeInfo();
    m_ComputeInfo->m_pComputeShaderBufferMemoryPool = new MemoryPool();
    m_ComputeInfo->m_pComputeShaderBufferMemoryPool->Init(
        shaderDataArray,
        4096,
        NVN_MEMORY_POOL_FLAGS_CPU_NO_ACCESS_BIT | NVN_MEMORY_POOL_FLAGS_GPU_CACHED_BIT | NVN_MEMORY_POOL_FLAGS_SHADER_CODE_BIT,
        &m_Device);

    nvnBufferBuilderSetDefaults(&m_BufferBuilder);
    nvnBufferBuilderSetStorage(&m_BufferBuilder, m_ComputeInfo->m_pComputeShaderBufferMemoryPool->GetMemoryPool(), 0, pGpuHeader.dataSize);

    nvnBufferInitialize(&m_ComputeInfo->m_ComputeShaderBuffer, &m_BufferBuilder);

    m_ComputeInfo->m_ComputeShaderData.control = const_cast<uint8_t*>(pData + pGpuHeader.controlOffset);
    m_ComputeInfo->m_ComputeShaderData.data = nvnBufferGetAddress(&m_ComputeInfo->m_ComputeShaderBuffer);

    NN_ASSERT(nvnProgramInitialize(&m_ComputeInfo->m_ComputeProgram, &m_Device));
    NN_ASSERT(nvnProgramSetShaders(&m_ComputeInfo->m_ComputeProgram, 1, &m_ComputeInfo->m_ComputeShaderData));
    nn::fs::CloseFile(computeShaderFileHandle);

    srand(0);

        // Particle setup
        // Position
    float* positionBuf = reinterpret_cast<float*>(AlignedAllocate(g_ParticleBufferSize, 4 * sizeof(float)));// new float[g_ParticleBufferSize];
    memset(positionBuf, 0, g_ParticleBufferSize);
    m_ComputeInfo->RandomizeData(positionBuf, g_NumParticles * 4, 0.2f);

    m_ComputeInfo->m_pPositionMemoryPool = new MemoryPool();
    m_ComputeInfo->m_pPositionMemoryPool->Init(positionBuf, g_ParticleBufferSize, NVN_MEMORY_POOL_FLAGS_CPU_UNCACHED_BIT | NVN_MEMORY_POOL_FLAGS_GPU_CACHED_BIT, &m_Device);
    nvnBufferBuilderSetDefaults(&m_BufferBuilder);
    nvnBufferBuilderSetStorage(&m_BufferBuilder, m_ComputeInfo->m_pPositionMemoryPool->GetMemoryPool(), 0, g_NumParticles * 4 * sizeof(float));

    nvnBufferInitialize(&m_ComputeInfo->m_ParticlePositions, &m_BufferBuilder);

        // Velocity
    float* velocityBuf = reinterpret_cast<float*>(AlignedAllocate(g_ParticleBufferSize, 4 * sizeof(float)));// new float[g_ParticleBufferSize];
    memset(velocityBuf, 0, g_ParticleBufferSize);
    m_ComputeInfo->RandomizeData(velocityBuf, g_NumParticles * 4, 0.04f);

    m_ComputeInfo->m_pVelocityMemoryPool = new MemoryPool();
    m_ComputeInfo->m_pVelocityMemoryPool->Init(velocityBuf, g_ParticleBufferSize, NVN_MEMORY_POOL_FLAGS_CPU_UNCACHED_BIT | NVN_MEMORY_POOL_FLAGS_GPU_CACHED_BIT, &m_Device);
    nvnBufferBuilderSetDefaults(&m_BufferBuilder);
    nvnBufferBuilderSetStorage(&m_BufferBuilder, m_ComputeInfo->m_pVelocityMemoryPool->GetMemoryPool(), 0, g_NumParticles * 4 * sizeof(float));

    nvnBufferInitialize(&m_ComputeInfo->m_ParticleVelocities, &m_BufferBuilder);

    AlignedDeallocate(positionBuf);
    AlignedDeallocate(velocityBuf);

    nn::fs::Unmount("rom");
    delete[] g_MountRomCacheBuffer;
}

void testLop_Compute::Resize(int width, int height)
{
    /* Check for the window being minimized or having no visible surface. */
    if (width == 0 || height == 0)
    {
        return;
    }

    m_ScreenHeight = height;
    m_ScreenWidth = width;

    /* If it's the first time Resize is called, allocate the NVNwindow. */
    if (m_pWindow == NULL)
    {
        m_pWindow = new NVNwindow;
    }
    /*
    * Otherwise finalize (free) the NVNwindow used for the previous window size.
    * The NVNWindow must be finalized before a render target it owns is finalized.
    */
    else
    {
        nvnWindowFinalize(m_pWindow);
    }

    /* Set up the builder for the render target. */
    nvnTextureBuilderSetDefaults(&m_RenderTargetBuilder);
    nvnTextureBuilderSetFlags(&m_RenderTargetBuilder, NVN_TEXTURE_FLAGS_DISPLAY_BIT | NVN_TEXTURE_FLAGS_COMPRESSIBLE_BIT);
    nvnTextureBuilderSetSize2D(&m_RenderTargetBuilder, m_ScreenWidth, m_ScreenHeight);
    nvnTextureBuilderSetTarget(&m_RenderTargetBuilder, NVN_TEXTURE_TARGET_2D);
    nvnTextureBuilderSetFormat(&m_RenderTargetBuilder, NVN_FORMAT_RGBA8);

    for (int i = 0; i < g_NumColorBuffers; ++i)
    {
        /* If it's the first time Resize is called, allocate the render target. */
        if (!m_RenderTargets[i])
        {
            m_RenderTargets[i] = new NVNtexture;
        }
        /* Otherwise finalize (free) the render target used for the previous window size. */
        else
        {
            nvnTextureFinalize(m_RenderTargets[i]);
        }

        nvnTextureBuilderSetStorage(&m_RenderTargetBuilder, m_pRenderTargetMemoryPool->GetMemoryPool(), m_ColorTargetSize * i);

        /* Create the texture using the current state of the texture builder. */
        nvnTextureInitialize(m_RenderTargets[i], &m_RenderTargetBuilder);
    }

    if (!m_pDepthBuffer)
    {
        m_pDepthBuffer = new NVNtexture;
    }
    else
    {
        nvnTextureFinalize(m_pDepthBuffer);
    }

    /* Initialize depth buffer for render target. */
    nvnTextureBuilderSetDefaults(&m_RenderTargetBuilder);
    nvnTextureBuilderSetFlags(&m_RenderTargetBuilder, NVN_TEXTURE_FLAGS_COMPRESSIBLE_BIT);
    nvnTextureBuilderSetSize2D(&m_RenderTargetBuilder, m_ScreenWidth, m_ScreenHeight);
    nvnTextureBuilderSetTarget(&m_RenderTargetBuilder, NVN_TEXTURE_TARGET_2D);
    nvnTextureBuilderSetFormat(&m_RenderTargetBuilder, NVN_FORMAT_DEPTH32F);
    nvnTextureBuilderSetStorage(&m_RenderTargetBuilder, m_pRenderTargetMemoryPool->GetMemoryPool(), m_ColorTargetSize * g_NumColorBuffers);

    if (!nvnTextureInitialize(m_pDepthBuffer, &m_RenderTargetBuilder))
    {
        NN_ASSERT(0, "nvnTextureInitialize failed");
    }

    /* Pass off the render targets to the window. */
    nvnWindowBuilderSetTextures(&m_WindowBuilder, g_NumColorBuffers, m_RenderTargets);
    nvnWindowInitialize(m_pWindow, &m_WindowBuilder);

    InitComputeShader();
}

/*
* FrameBufferMemoryManagement::PopulateCommandBuffer
* --------------------------------------------------
* Builds the command buffer to render the cubes.
*/
int testLop_Compute::PopulateCommandBuffer()
{
    int currentRenderTargetIndex = -1;
    /// Starts the recording of a new set of commands for the given command buffer.
    m_pManagedCommandBuffer->BeginRecording();
    {
        NVNcommandBuffer* pCommandBuffer = m_pManagedCommandBuffer->GetCommandBuffer();
        nvnCommandBufferPushDebugGroup(pCommandBuffer, "Draw");

        m_ComputeInfo->Dispatch(pCommandBuffer);

        currentRenderTargetIndex = UpdateRenderTargets();

        /* Bind the texture and sampler descriptor pools. */
        m_pTextureIDManager->SetSamplerPool(pCommandBuffer);
        m_pTextureIDManager->SetTexturePool(pCommandBuffer);

        /* Sets the scissor rectangle and viewport to the full screen */
        nvnCommandBufferSetScissor(pCommandBuffer, 0, 0, m_ScreenWidth, m_ScreenHeight);
        nvnCommandBufferSetViewport(pCommandBuffer, 0, 0, m_ScreenWidth, m_ScreenHeight);

        /* Clears the currently set render target at a given index. */
        float clear_color[4] = { 0.4f, 0.55f, 0.6f, 1.0f };
        nvnCommandBufferClearColor(pCommandBuffer, 0, clear_color, NVN_CLEAR_COLOR_MASK_RGBA);
        nvnCommandBufferClearDepthStencil(pCommandBuffer, 1.0, NVN_TRUE, 0, 0);

        /* Bind the render state objects. */
        nvnCommandBufferBindBlendState(pCommandBuffer, &m_BlendState);
        nvnCommandBufferBindChannelMaskState(pCommandBuffer, &m_ChannelMaskState);
        nvnCommandBufferBindColorState(pCommandBuffer, &m_ColorState);
        nvnCommandBufferBindDepthStencilState(pCommandBuffer, &m_DepthStencilState);
        nvnCommandBufferBindMultisampleState(pCommandBuffer, &m_MultisampleState);
        nvnCommandBufferBindPolygonState(pCommandBuffer, &m_PolygonState);
        nvnCommandBufferSetSampleMask(pCommandBuffer, static_cast<uint32_t>(~0));

        for (int i = 0; i < g_NumParticles; ++i)
        {
            std::vector<NVNModelData*>&   modelData = m_pDataHolder->GetModelData();
            std::vector<NVNProgramData*>& programData = m_pDataHolder->GetProgramData();

            ShaderTypes::ShaderType shaderType = programData[0]->m_ShaderType;

            Model* model = &modelData[0]->m_Model;
            NVNbufferAddress vboAddr = nvnBufferGetAddress(&modelData[0]->m_VertexBuffer);

            /* Bind the vertex buffer(s). */
            for (size_t j = 0; j < model->m_VertexAttributes.size(); ++j)
            {
                VertexAttribute& attr = model->m_VertexAttributes[j];
                nvnCommandBufferBindVertexBuffer(pCommandBuffer, attr.m_Location, vboAddr + modelData[0]->m_VertexAttributeBufferOffsets[j], attr.m_DataSize);
            }

            /* Bind the uniform buffers. */
            nvnCommandBufferBindUniformBuffer(pCommandBuffer,
                NVN_SHADER_STAGE_VERTEX,
                ShaderTypes::BlockVS_GetBinding(NVN_SHADER_STAGE_VERTEX, shaderType),
                m_ManagedVertexUniformBuffers[i]->GetCurrentBufferAddress(),
                sizeof(SimpleTexturedModel::BlockVSUniformBlockData));

            nvnCommandBufferBindUniformBuffer(pCommandBuffer,
                NVN_SHADER_STAGE_FRAGMENT,
                ShaderTypes::BlockFS_GetBinding(NVN_SHADER_STAGE_FRAGMENT, shaderType),
                m_ManagedFragmentUniformBuffers[i]->GetCurrentBufferAddress(),
                sizeof(SimpleTexturedModel::BlockFSUniformBlockData));

            /* Bind the vertex states. */
            nvnCommandBufferBindVertexAttribState(pCommandBuffer, static_cast<int>(modelData[0]->m_VertexAttributeStates.size()), &modelData[0]->m_VertexAttributeStates[0]);
            nvnCommandBufferBindVertexStreamState(pCommandBuffer, static_cast<int>(modelData[0]->m_VertexStreamStates.size()), &modelData[0]->m_VertexStreamStates[0]);

            /* Bind the shader program. */
            nvnCommandBufferBindProgram(pCommandBuffer, &programData[0]->m_Program, programData[0]->m_ShaderStages);

            /* Draw the primitives. */
            NVNindexType indexType = (NVNindexType)model->m_IndexData.m_IndexType;
            uint32_t numIndices = model->m_IndexData.m_DataSize / model->m_IndexData.m_Stride;
            nvnCommandBufferDrawElements(
                pCommandBuffer,
                (NVNdrawPrimitive)model->m_NvnDrawPrimitiveType,
                indexType,
                numIndices,
                nvnBufferGetAddress(&modelData[0]->m_IndexBuffer));
        }

        nvnCommandBufferPopDebugGroup(pCommandBuffer);
    }

    m_CommandHandle = m_pManagedCommandBuffer->EndRecording();

    return currentRenderTargetIndex;
}

// static
void testLop_Compute::DebugLayerCallback(
    NVNdebugCallbackSource source,
    NVNdebugCallbackType type,
    int id,
    NVNdebugCallbackSeverity severity,
    const char* message,
    void* user
)
{
    NN_ASSERT(user == NULL);

    NN_LOG("NVN Debug Layer Callback:\n");
    NN_LOG("  source:       0x%08x\n", source);
    NN_LOG("  type:         0x%08x\n", type);
    NN_LOG("  id:           0x%08x\n", id);
    NN_LOG("  severity:     0x%08x\n", severity);
    NN_LOG("  message:      %s\n", message);
}

TutorialBaseClass* t()
{
    static testLop_Compute tut;
    return (&tut);
}

extern "C" void nnMain()
{
    int     argc = nnt::GetHostArgc();
    char**  argv = nnt::GetHostArgv();

    ::testing::InitGoogleTest(&argc, argv);

    TutorialRun(true);
}

ComputeInfo::ComputeInfo() :
    m_pComputeShaderBufferMemoryPool(NULL),
    m_pPositionMemoryPool(NULL),
    m_pVelocityMemoryPool(NULL)
{
}

ComputeInfo::~ComputeInfo()
{
    nvnBufferFinalize(&m_ComputeShaderBuffer);
    nvnProgramFinalize(&m_ComputeProgram);
    m_pComputeShaderBufferMemoryPool->Shutdown();
    delete m_pComputeShaderBufferMemoryPool;

    nvnBufferFinalize(&m_ParticlePositions);
    nvnBufferFinalize(&m_ParticleVelocities);

    m_pPositionMemoryPool->Shutdown();
    delete m_pPositionMemoryPool;
    m_pVelocityMemoryPool->Shutdown();
    delete m_pVelocityMemoryPool;
}

void ComputeInfo::Dispatch(NVNcommandBuffer* pCmdBuf)
{
    nvnCommandBufferPushDebugGroup(pCmdBuf, "Draw");
    nvnCommandBufferBindProgram(pCmdBuf, &m_ComputeProgram, NVNshaderStageBits::NVN_SHADER_STAGE_COMPUTE_BIT);

    //nvnCommandBufferBindUniformBuffer(&m_CommandBuffer, NVNshaderStage::NVN_SHADER_STAGE_COMPUTE, 0, m_ComputeUniformBuffer.GetBufferAddress(), 0xFFFF);

    NVNbufferAddress particleAddress = nvnBufferGetAddress(&m_ParticlePositions);
    NVNbufferAddress velocityAddress = nvnBufferGetAddress(&m_ParticleVelocities);
    nvnCommandBufferBindStorageBuffer(pCmdBuf, NVNshaderStage::NVN_SHADER_STAGE_COMPUTE, 0, particleAddress, g_NumParticles * 4 * sizeof(float));
    nvnCommandBufferBindStorageBuffer(pCmdBuf, NVNshaderStage::NVN_SHADER_STAGE_COMPUTE, 1, velocityAddress, g_NumParticles * 4 * sizeof(float));
    nvnCommandBufferDispatchCompute(pCmdBuf, g_NumParticles, 1, 1);

    nvnCommandBufferBarrier(pCmdBuf, NVNbarrierBits::NVN_BARRIER_ORDER_PRIMITIVES_BIT | NVNbarrierBits::NVN_BARRIER_INVALIDATE_SHADER_BIT);
    nvnCommandBufferPopDebugGroup(pCmdBuf);
}

void ComputeInfo::RandomizeData(float* data, size_t length, float modifier /*= 1.0f*/)
{
    for (size_t i = 0; i < length; ++i)
    {
        data[i] = (((float)rand() / (float)(RAND_MAX)) - 0.5f) * 2.0f * modifier;
    }
}
