﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <nn/gfx/util/gfx_DebugFontTextWriter.h>

#include "gfxUtilGpuBenchmark_GpuBenchmarkShader.h"

#include "gfxUtilGpuBenchmark_ResHelpers.h"
#include "gfxUtilGpuBenchmark_PropertyMacros.h"
#include "gfxUtilGpuBenchmark_ResourceAllocator.h"
#include "gfxUtilGpuBenchmark_ComputeHeavyShaderVariationIndex.h"

#include "gfxUtilGpuBenchmark_BuiltinHeavyShader.h"
#include "gfxUtilGpuBenchmark_BuiltinRenderQuadShader.h"

namespace nnt { namespace gfx { namespace util {

const char* GpuBenchmarkShader::ClassName = "Shader";

GpuBenchmarkShader::GpuBenchmarkShader()
{
}

GpuBenchmarkShader::~GpuBenchmarkShader()
{
}

void GpuBenchmarkShader::Initialize(ResourceAllocator* pResourceAllocator)
{
    NN_UNUSED(pResourceAllocator);

    // GridSize
    GpuBenchmarkPropertyHolder* pPropertyGridSize = m_PropertyArray.Get(Property_GridSize);
    BENCHMARK_PROPERTY_ENUM_DEFINITION(
        pPropertyGridSize, "GridSize",
        int, pResourceAllocator,
        GpuBenchmarkShader::GetGridSize,
        GpuBenchmarkShader::SetGridSize,
        "128", 128,
        "256", 256,
        "512", 512,
        "1024", 1024);

    // VertexTransformCount
    GpuBenchmarkPropertyHolder* pPropertyVertexTransformCount = m_PropertyArray.Get(Property_VertexTransformCount);
    BENCHMARK_PROPERTY_ENUM_DEFINITION(
        pPropertyVertexTransformCount, "VertexTransformCount",
        ShaderTransformCount, pResourceAllocator,
        GpuBenchmarkShader::GetVertexTransformCount,
        GpuBenchmarkShader::SetVertexTransformCount,
        "8", ShaderTransformCount_8,
        "16", ShaderTransformCount_16,
        "32", ShaderTransformCount_32,
        "64", ShaderTransformCount_64);

    // RenderSize
    GpuBenchmarkPropertyHolder* pPropertyRenderSize = m_PropertyArray.Get(Property_RenderSize);
    BENCHMARK_PROPERTY_ENUM_DEFINITION(
        pPropertyRenderSize, "RenderSize",
        int, pResourceAllocator,
        GpuBenchmarkShader::GetRenderSize,
        GpuBenchmarkShader::SetRenderSize,
        "256", 256,
        "512", 512,
        "1024", 1024,
        "2048", 2048);

    // PixelTransformCount
    GpuBenchmarkPropertyHolder* pPropertyPixelTransformCount = m_PropertyArray.Get(Property_PixelTransformCount);
    BENCHMARK_PROPERTY_ENUM_DEFINITION(
        pPropertyPixelTransformCount, "PixelTransformCount",
        ShaderTransformCount, pResourceAllocator,
        GpuBenchmarkShader::GetPixelTransformCount,
        GpuBenchmarkShader::SetPixelTransformCount,
        "8", ShaderTransformCount_8,
        "16", ShaderTransformCount_16,
        "32", ShaderTransformCount_32,
        "64", ShaderTransformCount_64);
}

void GpuBenchmarkShader::Finalize(ResourceAllocator* pResourceAllocator)
{
    NN_UNUSED(pResourceAllocator);

    for (int i = 0; i < m_PropertyArray.GetCount(); ++i)
    {
        m_PropertyArray.Get(i)->Finalize();
    }
}

void GpuBenchmarkShader::InitializeGfxObjects(ResourceAllocator* pResourceAllocator, nn::gfx::Device* pDevice)
{
    GpuBenchmark::InitializeGfxObjects(pResourceAllocator, pDevice);

    m_OutputCopyBufferSize = InitializeColorRenderTarget(
        &m_RenderTexture, &m_OutputCopyBuffer,
        &m_RenderTextureColorTargetView, &m_ViewportScissorState,
        m_RenderSize, m_RenderSize, m_RenderFormat, m_TileMode,
        pResourceAllocator, pDevice);

    InitializeResShader(&m_ResShader, g_HeavyShaderData, sizeof(g_HeavyShaderData), pResourceAllocator, pDevice);
    NN_ASSERT(m_ResShader.pResShaderContainer->GetShaderVariationCount() == (ShaderTransformCount_Max * ShaderTransformCount_Max));

    nn::gfx::Buffer::InfoType constantBufferInfo;
    {
        constantBufferInfo.SetDefault();
        constantBufferInfo.SetGpuAccessFlags(nn::gfx::GpuAccess_ConstantBuffer);
        constantBufferInfo.SetSize(m_ConstantBufferSize);
    }
    InitializeBuffer(&m_ConstantBuffer, constantBufferInfo, pResourceAllocator, MemoryPoolType_ConstantBuffer, pDevice);
    {
        float* pConstantBufferMemory = nn::util::BytePtr(m_ConstantBuffer.Map()).Get<float>();
        memset(pConstantBufferMemory, 0, m_ConstantBufferSize);

        for (int i = 0; i < m_ConstantBufferMatrixCount; ++i)
        {
            pConstantBufferMemory[0] = 1.0f;
            pConstantBufferMemory[5] = 1.0f;
            pConstantBufferMemory[10] = 1.0f;
            pConstantBufferMemory[15] = 1.0f;
            pConstantBufferMemory += 16;
        }

        m_ConstantBuffer.Unmap();
        m_ConstantBuffer.FlushMappedRange(0, m_ConstantBufferSize);
    }

    m_ConstantBufferDescriptorSlotIndex = pResourceAllocator->AllocateAndSetBufferViewToDescriptorPool(
        &m_ConstantBuffer, constantBufferInfo.GetSize(), &m_ConstantBufferDescriptorSlot);

#if defined(NN_GFXUTIL_GPUBENCHMARK_SHADER_DEBUG)
    InitializeResShader(&m_RenderQuadResShader, g_RenderQuadShaderData, sizeof(g_RenderQuadShaderData), pResourceAllocator, pDevice);

    nn::gfx::TextureView::InfoType renderTextureViewInfo;
    {
        renderTextureViewInfo.SetDefault();
        renderTextureViewInfo.SetTexturePtr(&m_RenderTexture);
        renderTextureViewInfo.SetImageDimension(nn::gfx::ImageDimension_2d);
        renderTextureViewInfo.SetImageFormat(m_RenderFormat);
        renderTextureViewInfo.EditSubresourceRange().EditMipRange().SetMipCount(1);
    }
    m_RenderTextureView.Initialize(pDevice, renderTextureViewInfo);

    m_RenderTextureViewDescriptorSlotIndex = pResourceAllocator->AllocateAndSetTextureViewToDescriptorPool(
        &m_RenderTextureView, &m_RenderTextureViewDescriptorSlot);


    nn::gfx::Sampler::InfoType renderTextureSamplerInfo;
    {
        renderTextureSamplerInfo.SetDefault();
        renderTextureSamplerInfo.SetFilterMode(nn::gfx::FilterMode_MinPoint_MagPoint_MipPoint);
        renderTextureSamplerInfo.SetAddressU(nn::gfx::TextureAddressMode_Repeat);
        renderTextureSamplerInfo.SetAddressV(nn::gfx::TextureAddressMode_Repeat);
        renderTextureSamplerInfo.SetAddressW(nn::gfx::TextureAddressMode_Repeat);
    }
    m_RenderTextureSampler.Initialize(pDevice, renderTextureSamplerInfo);
    m_RenderTextureSamplerDescriptorSlotIndex = pResourceAllocator->AllocateAndSetSamplerToDescriptorPool(
        &m_RenderTextureSampler, &m_RenderTextureSamplerDescriptorSlot);
#endif
}

void GpuBenchmarkShader::FinalizeGfxObjects(ResourceAllocator* pResourceAllocator, nn::gfx::Device* pDevice)
{
#if defined(NN_GFXUTIL_GPUBENCHMARK_SHADER_DEBUG)
    FinalizeResShader(&m_RenderQuadResShader, pResourceAllocator, pDevice);
    m_RenderTextureView.Finalize(pDevice);
    pResourceAllocator->FreeDescriptorSlots(nn::gfx::DescriptorPoolType_TextureView, m_RenderTextureViewDescriptorSlotIndex);
    m_RenderTextureViewDescriptorSlotIndex = -1;

    m_RenderTextureSampler.Finalize(pDevice);
    pResourceAllocator->FreeDescriptorSlots(nn::gfx::DescriptorPoolType_Sampler, m_RenderTextureSamplerDescriptorSlotIndex);
    m_RenderTextureSamplerDescriptorSlotIndex = -1;
#endif

    FinalizeBuffer(&m_ConstantBuffer, pResourceAllocator, pDevice);
    pResourceAllocator->FreeDescriptorSlots(nn::gfx::DescriptorPoolType_BufferView, m_ConstantBufferDescriptorSlotIndex);
    m_ConstantBufferDescriptorSlotIndex = -1;

    FinalizeResShader(&m_ResShader, pResourceAllocator, pDevice);
    FinalizeColorRenderTarget(
        &m_RenderTexture, &m_OutputCopyBuffer,
        &m_RenderTextureColorTargetView, &m_ViewportScissorState,
        pResourceAllocator, pDevice);

    GpuBenchmark::FinalizeGfxObjects(pResourceAllocator, pDevice);
}

void GpuBenchmarkShader::PreBenchmark(nn::gfx::CommandBuffer* pTestCommandBuffer)
{
    nn::gfx::ColorTargetView* pTestTarget = &m_RenderTextureColorTargetView;

    pTestCommandBuffer->ClearColor(pTestTarget, 0.0f, 0.0f, 1.0f, 1.0f, nullptr);

    pTestCommandBuffer->SetRenderTargets(1, &pTestTarget, nullptr);
    pTestCommandBuffer->SetViewportScissorState(&m_ViewportScissorState);

    const int shaderVariationIndex = ComputeHeavyShaderVariationIndex(m_PixelTransformCount, m_VertexTransformCount);
    nn::gfx::Shader* pShader = m_ResShader.pResShaderContainer->GetResShaderVariation(shaderVariationIndex)->GetResShaderProgram(m_ResShader.codeType)->GetShader();
    pTestCommandBuffer->SetShader(pShader, nn::gfx::ShaderStageBit_All);
    pTestCommandBuffer->SetConstantBuffer(m_ConstantBufferSlotIndex, nn::gfx::ShaderStage_Vertex, m_ConstantBufferDescriptorSlot);
    pTestCommandBuffer->SetConstantBuffer(m_ConstantBufferSlotIndex, nn::gfx::ShaderStage_Pixel, m_ConstantBufferDescriptorSlot);
}

void GpuBenchmarkShader::DoBenchmark(nn::gfx::CommandBuffer* pTestCommandBuffer, int runCount)
{
    int gridSize = m_GridSize;
    for (int i = 0; i < runCount; ++i)
    {
        pTestCommandBuffer->Draw(
            nn::gfx::PrimitiveTopology_TriangleStrip,
            4, gridSize << 2,
            gridSize * gridSize, 0);
    }
}

void GpuBenchmarkShader::RenderDebug(nn::gfx::CommandBuffer* pTestCommandBuffer)
{
    NN_UNUSED(pTestCommandBuffer);

#if defined(NN_GFXUTIL_GPUBENCHMARK_SHADER_DEBUG)
    int shaderVariationIndex = 3; // instance + blit texture
    nn::gfx::Shader* pShader = m_RenderQuadResShader.pResShaderContainer->GetResShaderVariation(shaderVariationIndex)->GetResShaderProgram(m_RenderQuadResShader.codeType)->GetShader();
    pTestCommandBuffer->SetShader(pShader, nn::gfx::ShaderStageBit_All);

    pTestCommandBuffer->SetTextureAndSampler(
        0, nn::gfx::ShaderStage_Pixel,
        m_RenderTextureViewDescriptorSlot,
        m_RenderTextureSamplerDescriptorSlot);

    int scale = 2;
    int offsetX = 1;
    int offsetY = 1;

    pTestCommandBuffer->Draw(
        nn::gfx::PrimitiveTopology_TriangleStrip, 4,
        (scale << 8) | (offsetX << 16) | (offsetY << 24), 1, 0);
#endif
}


void GpuBenchmarkShader::PrintResults(nn::TimeSpan cpuTimeElapsed, nn::TimeSpan gpuTimeElapsed, int runCount, nn::gfx::util::DebugFontTextWriter* pDebugFontTextWriter)
{
    NN_ASSERT(runCount > 0);

    uint64_t gpuTimeElapsedValueInNs = static_cast<uint64_t>(gpuTimeElapsed.GetNanoSeconds());
    uint64_t cpuTimeElapsedValueInNs = static_cast<uint64_t>(cpuTimeElapsed.GetNanoSeconds());

    uint64_t gpuTimeElapsedAvgValueInNs = gpuTimeElapsedValueInNs / static_cast<uint64_t>(runCount);
    uint64_t cpuTimeElapsedAvgValueInNs = cpuTimeElapsedValueInNs / static_cast<uint64_t>(runCount);

    uint64_t pixelCount = static_cast<uint64_t>(runCount) * static_cast<uint64_t>(m_RenderSize * m_RenderSize);

    uint64_t gpuFillrateAvgMPixPerSec = 0;
    if (gpuTimeElapsedValueInNs > 0)
    {
        gpuFillrateAvgMPixPerSec = pixelCount / (gpuTimeElapsedValueInNs / 1000);
    }

    pDebugFontTextWriter->Print("gpu fillRateAvgMPixPerSec:%12lu\n", gpuFillrateAvgMPixPerSec);
    pDebugFontTextWriter->Print("gpu time (ns): %8lu\n", gpuTimeElapsedAvgValueInNs);
    pDebugFontTextWriter->Print("cpu time (ns): %8lu\n", cpuTimeElapsedAvgValueInNs);
    pDebugFontTextWriter->Print("total gpu time (ns): %12lu\n", gpuTimeElapsedValueInNs);
    pDebugFontTextWriter->Print("total cpu time (ns): %12lu\n", cpuTimeElapsedValueInNs);
}

void GpuBenchmarkShader::CopyResultToBuffer(nn::gfx::CommandBuffer* pCommandBuffer)
{
    int renderSize = GetRenderSize();

    nn::gfx::BufferTextureCopyRegion bufferTextureCopyRegion;
    bufferTextureCopyRegion.SetDefault();
    bufferTextureCopyRegion.SetBufferImageHeight(renderSize);
    bufferTextureCopyRegion.SetBufferImageWidth(renderSize);
    bufferTextureCopyRegion.EditTextureCopyRegion().SetDefault();
    bufferTextureCopyRegion.EditTextureCopyRegion().SetWidth(renderSize);
    bufferTextureCopyRegion.EditTextureCopyRegion().SetHeight(renderSize);
    bufferTextureCopyRegion.EditTextureCopyRegion().EditSubresource().SetDefault();

    pCommandBuffer->InvalidateMemory(nn::gfx::GpuAccess_Texture);
    pCommandBuffer->CopyImageToBuffer(&m_OutputCopyBuffer, &m_RenderTexture, bufferTextureCopyRegion);
    pCommandBuffer->InvalidateMemory(nn::gfx::GpuAccess_Write);
}

void GpuBenchmarkShader::MapResultBuffer(void** pOutBuffer, size_t* pOutBufferSize)
{
    *pOutBuffer = m_OutputCopyBuffer.Map();
    *pOutBufferSize = m_OutputCopyBufferSize;
}

void GpuBenchmarkShader::UnmapResultBuffer()
{
    m_OutputCopyBuffer.Unmap();
}

const char* GpuBenchmarkShader::GetName() const
{
    return ClassName;
}

BenchmarkType GpuBenchmarkShader::GetType() const
{
    return BenchmarkType_Shader;
}

int GpuBenchmarkShader::GetPropertyCount() const
{
    return m_PropertyArray.GetCount();
}

int GpuBenchmarkShader::FillPropertyList(const GpuBenchmarkPropertyHolder** ppDestinationArray, int destinationArrayMaxSize) const
{
    return m_PropertyArray.FillPropertyList(ppDestinationArray, destinationArrayMaxSize);
}

int GpuBenchmarkShader::FillPropertyList(GpuBenchmarkPropertyHolder** ppDestinationArray, int destinationArrayMaxSize)
{
    return m_PropertyArray.FillPropertyList(ppDestinationArray, destinationArrayMaxSize);
}

GpuBenchmarkPropertyHolder* GpuBenchmarkShader::FindPropertyByName(const char* propertyName)
{
    return m_PropertyArray.FindPropertyByName(propertyName);
}

GpuBenchmarkPropertyHolder* GpuBenchmarkShader::GetPropertyByIndex(int index)
{
    return m_PropertyArray.Get(index);
}

} } } // namespace nnt { namespace gfx { namespace util {
