﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <nn/nn_Abort.h>
#include <nn/nn_Assert.h>
#include <nn/os/os_Thread.h>
#include <nn/os/os_Tick.h>
#include <nn/gfx/gfx_BufferLayout.h>
#include <nn/gfx/gfx_GpuAddress.h>
#include <nn/gfx/gfx_Queue.h>

#include "gfxUtilGpuBenchmark_GpuBenchmark.h"
#include "gfxUtilGpuBenchmark_ResHelpers.h"
#include "gfxUtilGpuBenchmark_ResourceAllocator.h"

namespace nnt { namespace gfx { namespace util {

GpuBenchmark::GpuBenchmark()
: m_GetGfxObjectsInitialized(false)
{
}

GpuBenchmark::~GpuBenchmark()
{
    NN_ASSERT(!m_GetGfxObjectsInitialized);
}

void GpuBenchmark::Initialize(ResourceAllocator* pResourceAllocator)
{
    NN_UNUSED(pResourceAllocator);
}

void GpuBenchmark::Finalize(ResourceAllocator* pResourceAllocator)
{
    NN_UNUSED(pResourceAllocator);
}

void GpuBenchmark::InitializeGfxObjects(ResourceAllocator* pResourceAllocator, nn::gfx::Device* pDevice)
{
    NN_UNUSED(pResourceAllocator);
    NN_UNUSED(pDevice);
    m_GetGfxObjectsInitialized = true;
}

void GpuBenchmark::FinalizeGfxObjects(ResourceAllocator* pResourceAllocator, nn::gfx::Device* pDevice)
{
    NN_UNUSED(pResourceAllocator);
    NN_UNUSED(pDevice);
    m_GetGfxObjectsInitialized = false;
}

void GpuBenchmark::PreBenchmark(nn::gfx::CommandBuffer* pTestCommandBuffer)
{
    NN_UNUSED(pTestCommandBuffer);
}

void GpuBenchmark::DoBenchmark(nn::gfx::CommandBuffer* pTestCommandBuffer, int runCount)
{
    NN_UNUSED(pTestCommandBuffer);
    NN_UNUSED(runCount);
}

void GpuBenchmark::RenderDebug(nn::gfx::CommandBuffer* pTestCommandBuffer)
{
    NN_UNUSED(pTestCommandBuffer);
}

void GpuBenchmark::PrintResults(
    nn::TimeSpan cpuTimeElapsed, nn::TimeSpan gpuTimeElapsed,
    int runCount, nn::gfx::util::DebugFontTextWriter* pDebugFontTextWriter)
{
    NN_UNUSED(cpuTimeElapsed);
    NN_UNUSED(gpuTimeElapsed);
    NN_UNUSED(runCount);
    NN_UNUSED(pDebugFontTextWriter);
    NN_ABORT();
}


void GpuBenchmark::CopyResultToBuffer(nn::gfx::CommandBuffer* pCommandBuffer)
{
    NN_UNUSED(pCommandBuffer);
    NN_ABORT();
}

void GpuBenchmark::MapResultBuffer(void** pOutBuffer, size_t* pOutBufferSize)
{
    *pOutBuffer = nullptr;
    *pOutBufferSize = 0;
    NN_ABORT();
}

void GpuBenchmark::UnmapResultBuffer()
{
    NN_ABORT();
}


const char* GpuBenchmark::GetName() const
{
    NN_ABORT();
    return "invalid";
}

BenchmarkType GpuBenchmark::GetType() const
{
    NN_ABORT();
    return BenchmarkType_Invalid;
}

int GpuBenchmark::GetPropertyCount() const
{
    return 0;
}

int GpuBenchmark::FillPropertyList(
    const GpuBenchmarkPropertyHolder** ppDestinationArray, int destinationArrayMaxSize) const
{
    NN_UNUSED(ppDestinationArray);
    NN_UNUSED(destinationArrayMaxSize);
    return 0;
}

int GpuBenchmark::FillPropertyList(GpuBenchmarkPropertyHolder** ppDestinationArray, int destinationArrayMaxSize)
{
    NN_UNUSED(ppDestinationArray);
    NN_UNUSED(destinationArrayMaxSize);
    return 0;
}

GpuBenchmarkPropertyHolder* GpuBenchmark::FindPropertyByName(const char* propertyName)
{
    NN_UNUSED(propertyName);
    return nullptr;
}


GpuBenchmarkPropertyHolder* GpuBenchmark::GetPropertyByIndex(int index)
{
    NN_UNUSED(index);
    NN_ABORT();
}

void InitializeRuntimeGfxObjectsCommandBuffer(
    RuntimeGfxObjects* pRuntimeGfxObjects,
    ResourceAllocator* pResourceAllocator, nn::gfx::Device* pDevice)
{
    const size_t commandMemorySize = 0x800000;
    const size_t controlMemorySize = 0x100000;

    CommandBufferData* pBenchmarkCommandBuffer = &pRuntimeGfxObjects->benchmarkCommandBuffer;

    // CommandBuffer
    {
        nn::gfx::CommandBuffer::InfoType commandbufferInfo;
        commandbufferInfo.SetDefault();
        commandbufferInfo.SetQueueCapability(nn::gfx::QueueCapability_Graphics
            | nn::gfx::QueueCapability_Compute | nn::gfx::QueueCapability_Copy);
        commandbufferInfo.SetCommandBufferType(nn::gfx::CommandBufferType_Direct);

        pBenchmarkCommandBuffer->commandBuffer.Initialize(pDevice, commandbufferInfo);
    }

    // CommandMemory
    {
        size_t alignment = nn::gfx::CommandBuffer::GetCommandMemoryAlignment(pDevice);
        size_t size = commandMemorySize;

        ptrdiff_t commandMemoryOffset =
            pResourceAllocator->AllocatePoolMemory(MemoryPoolType_CommandBuffer, size, alignment);

        pBenchmarkCommandBuffer->pCommandMemoryPool = pResourceAllocator->GetMemoryPool(MemoryPoolType_CommandBuffer);
        pBenchmarkCommandBuffer->commandMemoryOffset = commandMemoryOffset;
        pBenchmarkCommandBuffer->commandMemorySize = controlMemorySize;
    }

    // ControlMemory
    {
        size_t alignment = nn::gfx::CommandBuffer::GetControlMemoryAlignment(pDevice);
        size_t size = controlMemorySize;
        pBenchmarkCommandBuffer->pControlMemory = pResourceAllocator->AllocateMemory(size, alignment);
        pBenchmarkCommandBuffer->controlMemorySize = controlMemorySize;
        NN_SDK_ASSERT_NOT_NULL(pBenchmarkCommandBuffer->pControlMemory);
    }
}

void InitializeRuntimeGfxObjectsDefaultState(
    RuntimeGfxObjects* pRuntimeGfxObjects,
    ResourceAllocator* pResourceAllocator, nn::gfx::Device* pDevice)
{
    // VertexState
    {
        nn::gfx::VertexState::InfoType vertexInfo;

        // VertexStateType_Float3_Float2
        vertexInfo.SetDefault();
        ptrdiff_t stride = sizeof(float) * 5;
        nn::gfx::VertexAttributeStateInfo attribs[2];
        {
            attribs[0].SetDefault();
            attribs[0].SetBufferIndex(0);
            attribs[0].SetFormat(nn::gfx::AttributeFormat_32_32_32_Float);
            attribs[0].SetOffset(0);
            attribs[0].SetShaderSlot(0);
        }
        {
            attribs[1].SetDefault();
            attribs[1].SetBufferIndex(0);
            attribs[1].SetFormat(nn::gfx::AttributeFormat_32_32_Float);
            attribs[1].SetOffset(sizeof(float) * 3);
            attribs[1].SetShaderSlot(1);
        }
        nn::gfx::VertexBufferStateInfo buffer;
        {
            buffer.SetDefault();
            buffer.SetStride(stride);
        }
        vertexInfo.SetVertexAttributeStateInfoArray(attribs, 2);
        vertexInfo.SetVertexBufferStateInfoArray(&buffer, 1);


        InitializeVertexState(
            &pRuntimeGfxObjects->defaultVertexState, vertexInfo,
            pResourceAllocator, pDevice);
    }

    // BlendState
    {
        nn::gfx::BlendState::InfoType blendInfo;
        nn::gfx::BlendTargetStateInfo targetInfo;

        // BlendStateType_Disabled
        blendInfo.SetDefault();
        targetInfo.SetDefault();
        targetInfo.SetBlendEnabled(false);
        blendInfo.SetBlendTargetStateInfoArray(&targetInfo, 1);
        InitializeBlendState(
            &pRuntimeGfxObjects->defaultBlendState, blendInfo,
            pResourceAllocator, pDevice);
    }

    // RasterizerState
    {
        nn::gfx::RasterizerState::InfoType rasterizerInfo;

        // RasterizerStateType_FillSolid_CullNone
        rasterizerInfo.SetDefault();
        rasterizerInfo.SetCullMode(nn::gfx::CullMode_None);
        rasterizerInfo.SetScissorEnabled(true);
        rasterizerInfo.SetDepthClipEnabled(false);
        pRuntimeGfxObjects->defaultRasterizerState.Initialize(pDevice, rasterizerInfo);
    }

    // DepthStencilState
    {
        nn::gfx::DepthStencilState::InfoType depthStencilInfo;

        // DepthStencilStateType_Disabled
        depthStencilInfo.SetDefault();
        depthStencilInfo.SetDepthTestEnabled(false);
        depthStencilInfo.SetDepthWriteEnabled(false);
        pRuntimeGfxObjects->defaultDepthStencilState.Initialize(pDevice, depthStencilInfo);
    }
}

void InitializeRuntimeGfxObjects(
    RuntimeGfxObjects* pRuntimeGfxObjects, nn::gfx::Device* pDevice,
    ResourceAllocator* pResourceAllocator)
{
    InitializeRuntimeGfxObjectsCommandBuffer(pRuntimeGfxObjects, pResourceAllocator, pDevice);

    InitializeRuntimeGfxObjectsDefaultState(pRuntimeGfxObjects, pResourceAllocator, pDevice);

    nn::gfx::Buffer::InfoType timestampBufferInfo;
    {
        timestampBufferInfo.SetDefault();
        timestampBufferInfo.SetGpuAccessFlags(nn::gfx::GpuAccess_QueryBuffer);
        timestampBufferInfo.SetSize(sizeof(nn::gfx::TimestampBuffer) * 2);
    }

    InitializeBuffer(
        &pRuntimeGfxObjects->timestampBuffer, timestampBufferInfo,
        pResourceAllocator, MemoryPoolType_QueryBuffer,
        pDevice);
    pRuntimeGfxObjects->timestampBufferSize = timestampBufferInfo.GetSize();

    nn::gfx::Fence::InfoType commandFenceInfo;
    commandFenceInfo.SetDefault();
    pRuntimeGfxObjects->commandFence.Initialize(pDevice, commandFenceInfo);

    pRuntimeGfxObjects->pBufferViewDescriptorPool =
        pResourceAllocator->GetDescriptorPool(nn::gfx::DescriptorPoolType_BufferView);
    pRuntimeGfxObjects->pTextureViewDescriptorPool =
        pResourceAllocator->GetDescriptorPool(nn::gfx::DescriptorPoolType_TextureView);
    pRuntimeGfxObjects->pSamplerDescriptorPool =
        pResourceAllocator->GetDescriptorPool(nn::gfx::DescriptorPoolType_Sampler);
}

void FinalizeRuntimeGfxObjects(
    RuntimeGfxObjects* pRuntimeGfxObjects, nn::gfx::Device* pDevice,
    ResourceAllocator* pResourceAllocator)
{
    pRuntimeGfxObjects->pBufferViewDescriptorPool = nullptr;
    pRuntimeGfxObjects->pTextureViewDescriptorPool = nullptr;
    pRuntimeGfxObjects->pSamplerDescriptorPool = nullptr;

    pRuntimeGfxObjects->commandFence.Finalize(pDevice);

    FinalizeBuffer(&pRuntimeGfxObjects->timestampBuffer, pResourceAllocator, pDevice);
    pRuntimeGfxObjects->timestampBufferSize = 0;

    pRuntimeGfxObjects->defaultDepthStencilState.Finalize(pDevice);
    pRuntimeGfxObjects->defaultRasterizerState.Finalize(pDevice);
    FinalizeBlendState(&pRuntimeGfxObjects->defaultBlendState, pResourceAllocator, pDevice);
    FinalizeVertexState(&pRuntimeGfxObjects->defaultVertexState, pResourceAllocator, pDevice);

    pRuntimeGfxObjects->benchmarkCommandBuffer.commandBuffer.Finalize(pDevice);

    pResourceAllocator->FreeMemory(pRuntimeGfxObjects->benchmarkCommandBuffer.pControlMemory);
    pRuntimeGfxObjects->benchmarkCommandBuffer.pControlMemory = nullptr;

    pResourceAllocator->FreePoolMemory(
        MemoryPoolType_CommandBuffer, pRuntimeGfxObjects->benchmarkCommandBuffer.commandMemoryOffset);
    pRuntimeGfxObjects->benchmarkCommandBuffer.pCommandMemoryPool = nullptr;
    pRuntimeGfxObjects->benchmarkCommandBuffer.commandMemoryOffset = 0;
    pRuntimeGfxObjects->benchmarkCommandBuffer.commandMemorySize = 0;
}

void RecordGpuBenchmarkCommandList(
    GpuBenchmark* pGpuBenchmark, RuntimeGfxObjects* pRuntimeGfxObjects,
    int warmupRunCount, int measureRunCount)
{
    nn::gfx::GpuAddress timeStampBufferGpuAddress;
    pRuntimeGfxObjects->timestampBuffer.GetGpuAddress(&timeStampBufferGpuAddress);

    nn::gfx::CommandBuffer* pCommandBuffer =
        &pRuntimeGfxObjects->benchmarkCommandBuffer.commandBuffer;

    pCommandBuffer->Reset();

    pCommandBuffer->AddCommandMemory(
        pRuntimeGfxObjects->benchmarkCommandBuffer.pCommandMemoryPool,
        pRuntimeGfxObjects->benchmarkCommandBuffer.commandMemoryOffset,
        pRuntimeGfxObjects->benchmarkCommandBuffer.commandMemorySize);
    pCommandBuffer->AddControlMemory(
        pRuntimeGfxObjects->benchmarkCommandBuffer.pControlMemory,
        pRuntimeGfxObjects->benchmarkCommandBuffer.controlMemorySize);

    pCommandBuffer->Begin();

    pCommandBuffer->InvalidateMemory(
        nn::gfx::GpuAccess_ShaderCode | nn::gfx::GpuAccess_Descriptor |
        nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_VertexBuffer |
        nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_ColorBuffer);

    pCommandBuffer->SetDescriptorPool(pRuntimeGfxObjects->pBufferViewDescriptorPool);
    pCommandBuffer->SetDescriptorPool(pRuntimeGfxObjects->pTextureViewDescriptorPool);
    pCommandBuffer->SetDescriptorPool(pRuntimeGfxObjects->pSamplerDescriptorPool);

    pCommandBuffer->SetVertexState(&pRuntimeGfxObjects->defaultVertexState);
    pCommandBuffer->SetBlendState(&pRuntimeGfxObjects->defaultBlendState);
    pCommandBuffer->SetRasterizerState(&pRuntimeGfxObjects->defaultRasterizerState);
    pCommandBuffer->SetDepthStencilState(&pRuntimeGfxObjects->defaultDepthStencilState);

    pGpuBenchmark->PreBenchmark(pCommandBuffer);

    if (warmupRunCount > 0)
    {
        pGpuBenchmark->DoBenchmark(pCommandBuffer, warmupRunCount);
    }

    pCommandBuffer->WriteTimestamp(timeStampBufferGpuAddress);

    pGpuBenchmark->DoBenchmark(pCommandBuffer, measureRunCount);

    timeStampBufferGpuAddress.Offset(sizeof(nn::gfx::TimestampBuffer));
    pCommandBuffer->WriteTimestamp(timeStampBufferGpuAddress);

    pGpuBenchmark->CopyResultToBuffer(pCommandBuffer);


    pCommandBuffer->End();
}

void RunGpuBenchmarkCommandList(
    nn::TimeSpan* pOutCpuDuration, nn::TimeSpan* pOutGpuDuration,
    nn::gfx::Queue* pQueue, RuntimeGfxObjects* pRuntimeGfxObjects)
{
    int previousPriority =
        nn::os::ChangeThreadPriority(nn::os::GetCurrentThread(), nn::os::HighestThreadPriority);

    nn::os::Tick cpuStartTick = nn::os::GetSystemTick();

    pQueue->ExecuteCommand(
        &pRuntimeGfxObjects->benchmarkCommandBuffer.commandBuffer,
        &pRuntimeGfxObjects->commandFence);
    pQueue->Flush();
    pQueue->Sync();

    nn::os::Tick cpuEndTick = nn::os::GetSystemTick();

    nn::os::ChangeThreadPriority(nn::os::GetCurrentThread(), previousPriority);

    const nn::gfx::TimestampBuffer* pTimestamp =
        pRuntimeGfxObjects->timestampBuffer.Map<nn::gfx::TimestampBuffer>();
    pRuntimeGfxObjects->timestampBuffer.InvalidateMappedRange(0, pRuntimeGfxObjects->timestampBufferSize);
    nn::TimeSpan gpuTimeElapsed = nn::gfx::GetDuration(pTimestamp[0].GetValue(), pTimestamp[1].GetValue());
    pRuntimeGfxObjects->timestampBuffer.Unmap();

    nn::TimeSpan cpuTimeElapsed = (cpuEndTick - cpuStartTick).ToTimeSpan();

    *pOutCpuDuration = cpuTimeElapsed;
    *pOutGpuDuration = gpuTimeElapsed;
}

void RenderGpuBenchmarkDebug(
    GpuBenchmark* pGpuBenchmark,
    RuntimeGfxObjects* pRuntimeGfxObjects,
    nn::gfx::CommandBuffer* pCommandBuffer)
{
    pCommandBuffer->InvalidateMemory(
        nn::gfx::GpuAccess_Descriptor | nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_ColorBuffer);

    pCommandBuffer->SetDescriptorPool(pRuntimeGfxObjects->pBufferViewDescriptorPool);
    pCommandBuffer->SetDescriptorPool(pRuntimeGfxObjects->pTextureViewDescriptorPool);
    pCommandBuffer->SetDescriptorPool(pRuntimeGfxObjects->pSamplerDescriptorPool);

    pCommandBuffer->SetVertexState(&pRuntimeGfxObjects->defaultVertexState);
    pCommandBuffer->SetBlendState(&pRuntimeGfxObjects->defaultBlendState);
    pCommandBuffer->SetRasterizerState(&pRuntimeGfxObjects->defaultRasterizerState);
    pCommandBuffer->SetDepthStencilState(&pRuntimeGfxObjects->defaultDepthStencilState);

    pGpuBenchmark->RenderDebug(pCommandBuffer);
}


ValidationResult ValidateResultMinMax(
    nn::TimeSpan timeElapsed,
    uint64_t referenceResultMin,
    uint64_t referenceResultMax)
{
    uint64_t currentValue = timeElapsed.GetNanoSeconds();

    NN_ASSERT(referenceResultMax >= referenceResultMin);

    if (currentValue < referenceResultMin)
    {
        return ValidationResult_MissBelow;
    }
    if (currentValue > referenceResultMax)
    {
        return ValidationResult_MissOver;
    }

    return ValidationResult_Pass;
}

ValidationResult ValidateResultStandardDeviation(
    nn::TimeSpan timeElapsed,
    uint64_t referenceResultMean,
    uint64_t referenceResultStandardDeviation,
    int factor)
{
    uint64_t validationOffsetMin = referenceResultStandardDeviation * static_cast<uint64_t>(factor);
    uint64_t validationOffsetMax = validationOffsetMin;

    uint64_t min = (referenceResultMean > validationOffsetMin) ? referenceResultMean - validationOffsetMin : 0;
    uint64_t max = referenceResultMean + validationOffsetMax;

    return ValidateResultMinMax(timeElapsed, min, max);
}

} } } // namespace nnt { namespace gfx { namespace util {
