﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <nn/gfx/util/gfx_DebugFontTextWriter.h>

#include "gfxUtilGpuBenchmark_GpuBenchmarkVertexFetch.h"

#include "gfxUtilGpuBenchmark_ResHelpers.h"
#include "gfxUtilGpuBenchmark_PropertyMacros.h"
#include "gfxUtilGpuBenchmark_ComputeVertexFetchShaderVariationIndex.h"
#include "gfxUtilGpuBenchmark_ResourceAllocator.h"

#include "gfxUtilGpuBenchmark_BuiltinVertexFetchShader.h"

#if defined(NN_GFXUTIL_GPUBENCHMARK_VERTEXFETCH_DEBUG)
#include "gfxUtilGpuBenchmark_ComputeRenderQuadShaderVariationIndex.h"
#include "gfxUtilGpuBenchmark_BuiltinRenderQuadShader.h"
#endif

#define ATTRIBUTE_FORMAT_ENUM_VALUES                                                        \
    "8_Unorm",                  nn::gfx::AttributeFormat_8_Unorm,                           \
    "16_Unorm",                 nn::gfx::AttributeFormat_16_Unorm,                          \
    "8_8_8_8_Unorm",            nn::gfx::AttributeFormat_8_8_8_8_Unorm,                     \
    "10_10_10_2_Unorm",         nn::gfx::AttributeFormat_10_10_10_2_Unorm,                  \
    "16_16_16_16_Unorm",        nn::gfx::AttributeFormat_16_16_16_16_Unorm,                 \
    "32_32_32_32_Uint",         nn::gfx::AttributeFormat_32_32_32_32_Uint,                  \
    "16_16_16_16_UintToFloat",  nn::gfx::AttributeFormat_16_16_16_16_UintToFloat,           \
    "32_32_32_32_Float",        nn::gfx::AttributeFormat_32_32_32_32_Float


#define PROPERTY_VERTEX_ATTRIBUTE(_n)                                                                                           \
    GpuBenchmarkPropertyHolder* pPropertyAttribute ## _n ## Format = m_PropertyArray.Get(Property_Attribute ## _n ## Format);   \
    BENCHMARK_PROPERTY_ENUM_DEFINITION_INDEXED(                                                                                 \
        pPropertyAttribute ## _n ## Format, "Attribute" #_n "Format",                                                           \
        nn::gfx::AttributeFormat, int, _n,                                                                                      \
        GpuBenchmarkVertexFetch::GetAttributeFormat,                                                                            \
        GpuBenchmarkVertexFetch::SetAttributeFormat,                                                                            \
        ATTRIBUTE_FORMAT_ENUM_VALUES);

namespace nnt { namespace gfx { namespace util {

int GetAttributeSizeInBytes(nn::gfx::AttributeFormat format)
{
    switch (format)
    {
    case nn::gfx::AttributeFormat_8_Unorm:
        return 1;

    case nn::gfx::AttributeFormat_16_Unorm:
        return 2;

    case nn::gfx::AttributeFormat_8_8_8_8_Unorm:
    case nn::gfx::AttributeFormat_10_10_10_2_Unorm:
        return 4;

    case nn::gfx::AttributeFormat_16_16_16_16_Unorm:
    case nn::gfx::AttributeFormat_16_16_16_16_UintToFloat:
        return 8;

    case nn::gfx::AttributeFormat_32_32_32_32_Uint:
    case nn::gfx::AttributeFormat_32_32_32_32_Float:
        return 16;

    default:
        {
            NN_UNEXPECTED_DEFAULT;
        }
    }
}

const char* GpuBenchmarkVertexFetch::ClassName = "VertexFetch";


GpuBenchmarkVertexFetch::GpuBenchmarkVertexFetch()
: m_AttributeCount(1)
, m_AttributeFormat(nn::gfx::AttributeFormat_8_Unorm)
, m_VertexGridSize(256)
, m_VertexStride(0)
, m_RenderTexture()
, m_RenderTextureTargetView()
, m_RenderTextureViewportScissorState()
, m_DrawQuadResShader()
, m_VertexBuffer()
, m_PropertyArray()
{
}

GpuBenchmarkVertexFetch::~GpuBenchmarkVertexFetch()
{
}

void GpuBenchmarkVertexFetch::Initialize(ResourceAllocator* pResourceAllocator)
{
    NN_UNUSED(pResourceAllocator);

    // AttributeCount
    GpuBenchmarkPropertyHolder* pPropertyAttributeCount = m_PropertyArray.Get(Property_AttributeCount);
    BENCHMARK_PROPERTY_INTEGER_RANGE_DEFINITION(
        pPropertyAttributeCount, "AttributeCount",
        GpuBenchmarkVertexFetch::GetAttributeCount,
        GpuBenchmarkVertexFetch::SetAttributeCount,
        1, m_MaxAttributeCount, 1);

    // AttributeFormat
    GpuBenchmarkPropertyHolder* pPropertyAttributeFormat = m_PropertyArray.Get(Property_AttributeFormat);
    BENCHMARK_PROPERTY_ENUM_DEFINITION(
        pPropertyAttributeFormat, "AttributeFormat",
        nn::gfx::AttributeFormat, pResourceAllocator,
        GpuBenchmarkVertexFetch::GetAttributeFormat,
        GpuBenchmarkVertexFetch::SetAttributeFormat,
        ATTRIBUTE_FORMAT_ENUM_VALUES);

    // GridSize
    GpuBenchmarkPropertyHolder* pPropertyVertexGridSize = m_PropertyArray.Get(Property_VertexGridSize);
    BENCHMARK_PROPERTY_ENUM_DEFINITION(
        pPropertyVertexGridSize, "VertexGridSize",
        int, pResourceAllocator,
        GpuBenchmarkVertexFetch::GetVertexGridSize,
        GpuBenchmarkVertexFetch::SetVertexGridSize,
        "64", 64,
        "128", 128,
        "256", 256);
}

void GpuBenchmarkVertexFetch::Finalize(ResourceAllocator* pResourceAllocator)
{
    NN_UNUSED(pResourceAllocator);

    for (int i = 0; i < m_PropertyArray.GetCount(); ++i)
    {
        m_PropertyArray.Get(i)->Finalize();
    }
}

void GpuBenchmarkVertexFetch::InitializeGfxObjects(ResourceAllocator* pResourceAllocator, nn::gfx::Device* pDevice)
{
    GpuBenchmark::InitializeGfxObjects(pResourceAllocator, pDevice);

    m_OutputCopyBufferSize = InitializeColorRenderTarget(
        &m_RenderTexture, &m_OutputCopyBuffer,
        &m_RenderTextureTargetView, &m_RenderTextureViewportScissorState,
        m_RenderSize, m_RenderSize, m_RenderFormat, m_TileMode,
        pResourceAllocator, pDevice);

    InitializeResShader(&m_DrawQuadResShader, g_VertexFetchShaderData, sizeof(g_VertexFetchShaderData), pResourceAllocator, pDevice);

    m_VertexStride = GetAttributeSizeInBytes(m_AttributeFormat) * m_AttributeCount;

    size_t vertexBufferSizeInBytes = static_cast<size_t>(m_VertexStride) * GetVertexCount();

    nn::gfx::Buffer::InfoType vertexBufferInfo;
    {
        vertexBufferInfo.SetDefault();
        vertexBufferInfo.SetGpuAccessFlags(nn::gfx::GpuAccess_VertexBuffer);
        vertexBufferInfo.SetSize(vertexBufferSizeInBytes);
    }

    InitializeBuffer(&m_VertexBuffer, vertexBufferInfo, pResourceAllocator, MemoryPoolType_Data, pDevice);
    void* pVertexBuffer = m_VertexBuffer.Map();
    memset(pVertexBuffer, 0, vertexBufferInfo.GetSize());
    m_VertexBuffer.Unmap();
    m_VertexBuffer.FlushMappedRange(0, vertexBufferInfo.GetSize());

    nn::gfx::VertexState::InfoType vertexStateInfo;
    {
        nn::gfx::VertexAttributeStateInfo attributeStateInfo[m_MaxAttributeCount];

        ptrdiff_t vertexOffset = 0;
        for (int attributeIndex = 0; attributeIndex < m_AttributeCount; ++attributeIndex)
        {
            nn::gfx::VertexAttributeStateInfo* pVertexAttributeStateInfo = &attributeStateInfo[attributeIndex];
            pVertexAttributeStateInfo->SetBufferIndex(0);
            pVertexAttributeStateInfo->SetFormat(m_AttributeFormat);
            pVertexAttributeStateInfo->SetOffset(vertexOffset);
            pVertexAttributeStateInfo->SetShaderSlot(attributeIndex);

            vertexOffset += GetAttributeSizeInBytes(m_AttributeFormat);
        }
        NN_ASSERT(vertexOffset == m_VertexStride);

        nn::gfx::VertexBufferStateInfo vertexBufferStateInfo;
        vertexBufferStateInfo.SetDefault();
        vertexBufferStateInfo.SetStride(m_VertexStride);

        vertexStateInfo.SetDefault();
        vertexStateInfo.SetVertexAttributeStateInfoArray(attributeStateInfo, m_AttributeCount);
        vertexStateInfo.SetVertexBufferStateInfoArray(&vertexBufferStateInfo, 1);
    }
    InitializeVertexState(&m_VertexState, vertexStateInfo, pResourceAllocator, pDevice);

    nn::gfx::Buffer::InfoType constantBufferInfo;
    {
        constantBufferInfo.SetDefault();
        constantBufferInfo.SetGpuAccessFlags(nn::gfx::GpuAccess_ConstantBuffer);
        constantBufferInfo.SetSize(sizeof(int));
    }
    InitializeBuffer(&m_ConstantBuffer, constantBufferInfo, pResourceAllocator, MemoryPoolType_ConstantBuffer, pDevice);
    int* pConstantBuffer = nn::util::BytePtr(m_ConstantBuffer.Map()).Get<int>();
    *pConstantBuffer = m_VertexGridSize;
    m_ConstantBuffer.Unmap();
    m_ConstantBuffer.FlushMappedRange(0, constantBufferInfo.GetSize());

    m_ConstantBufferDescriptorSlotIndex = pResourceAllocator->AllocateAndSetBufferViewToDescriptorPool(
        &m_ConstantBuffer, constantBufferInfo.GetSize(), &m_ConstantBufferDescriptorSlot);

#if defined(NN_GFXUTIL_GPUBENCHMARK_VERTEXFETCH_DEBUG)
    InitializeResShader(&m_RenderQuadResShader, g_RenderQuadShaderData, sizeof(g_RenderQuadShaderData), pResourceAllocator, pDevice);

    nn::gfx::TextureView::InfoType renderTextureViewInfo;
    {
        renderTextureViewInfo.SetDefault();
        renderTextureViewInfo.SetTexturePtr(&m_RenderTexture);
        renderTextureViewInfo.SetImageDimension(nn::gfx::ImageDimension_2d);
        renderTextureViewInfo.SetImageFormat(m_RenderFormat);
        renderTextureViewInfo.EditSubresourceRange().EditMipRange().SetMipCount(1);
    }
    m_RenderTextureView.Initialize(pDevice, renderTextureViewInfo);

    m_RenderTextureViewDescriptorSlotIndex = pResourceAllocator->AllocateAndSetTextureViewToDescriptorPool(
        &m_RenderTextureView, &m_RenderTextureViewDescriptorSlot);


    nn::gfx::Sampler::InfoType renderTextureSamplerInfo;
    {
        renderTextureSamplerInfo.SetDefault();
        renderTextureSamplerInfo.SetFilterMode(nn::gfx::FilterMode_MinPoint_MagPoint_MipPoint);
        renderTextureSamplerInfo.SetAddressU(nn::gfx::TextureAddressMode_Repeat);
        renderTextureSamplerInfo.SetAddressV(nn::gfx::TextureAddressMode_Repeat);
        renderTextureSamplerInfo.SetAddressW(nn::gfx::TextureAddressMode_Repeat);
    }
    m_RenderTextureSampler.Initialize(pDevice, renderTextureSamplerInfo);
    m_RenderTextureSamplerDescriptorSlotIndex = pResourceAllocator->AllocateAndSetSamplerToDescriptorPool(
        &m_RenderTextureSampler, &m_RenderTextureSamplerDescriptorSlot);
#endif
}

void GpuBenchmarkVertexFetch::FinalizeGfxObjects(ResourceAllocator* pResourceAllocator, nn::gfx::Device* pDevice)
{
#if defined(NN_GFXUTIL_GPUBENCHMARK_VERTEXFETCH_DEBUG)
    FinalizeResShader(&m_RenderQuadResShader, pResourceAllocator, pDevice);
    m_RenderTextureView.Finalize(pDevice);
    pResourceAllocator->FreeDescriptorSlots(nn::gfx::DescriptorPoolType_TextureView, m_RenderTextureViewDescriptorSlotIndex);
    m_RenderTextureViewDescriptorSlotIndex = -1;

    m_RenderTextureSampler.Finalize(pDevice);
    pResourceAllocator->FreeDescriptorSlots(nn::gfx::DescriptorPoolType_Sampler, m_RenderTextureSamplerDescriptorSlotIndex);
    m_RenderTextureSamplerDescriptorSlotIndex = -1;
#endif

    FinalizeBuffer(&m_ConstantBuffer, pResourceAllocator, pDevice);
    pResourceAllocator->FreeDescriptorSlots(nn::gfx::DescriptorPoolType_BufferView, m_ConstantBufferDescriptorSlotIndex);
    m_ConstantBufferDescriptorSlotIndex = 1;

    FinalizeVertexState(&m_VertexState, pResourceAllocator, pDevice);
    FinalizeBuffer(&m_VertexBuffer, pResourceAllocator, pDevice);

    FinalizeResShader(&m_DrawQuadResShader, pResourceAllocator, pDevice);

    FinalizeColorRenderTarget(
        &m_RenderTexture, &m_OutputCopyBuffer,
        &m_RenderTextureTargetView, &m_RenderTextureViewportScissorState,
        pResourceAllocator, pDevice);

    GpuBenchmark::FinalizeGfxObjects(pResourceAllocator, pDevice);
}

void GpuBenchmarkVertexFetch::PreBenchmark(nn::gfx::CommandBuffer* pTestCommandBuffer)
{
    nn::gfx::ColorTargetView* pTestTarget = &m_RenderTextureTargetView;

    pTestCommandBuffer->ClearColor(pTestTarget, 0.0f, 0.0f, 1.0f, 1.0f, nullptr);

    pTestCommandBuffer->SetRenderTargets(1, &pTestTarget, nullptr);
    pTestCommandBuffer->SetViewportScissorState(&m_RenderTextureViewportScissorState);

    pTestCommandBuffer->SetVertexState(&m_VertexState);

    int shaderVariationIndex = ComputeVertexFetchShaderVariationIndex(m_AttributeCount - 1);
    nn::gfx::Shader* pShader = m_DrawQuadResShader.pResShaderContainer->GetResShaderVariation(shaderVariationIndex)->GetResShaderProgram(m_DrawQuadResShader.codeType)->GetShader();
    pTestCommandBuffer->SetShader(pShader, nn::gfx::ShaderStageBit_All);

    size_t vertexBufferSizeInBytes = static_cast<size_t>(m_VertexStride * GetVertexCount());
    nn::gfx::GpuAddress vertexBufferGpuAddress;
    m_VertexBuffer.GetGpuAddress(&vertexBufferGpuAddress);
    pTestCommandBuffer->SetVertexBuffer(0, vertexBufferGpuAddress, m_VertexStride, vertexBufferSizeInBytes);

    pTestCommandBuffer->SetConstantBuffer(0, nn::gfx::ShaderStage_Vertex, m_ConstantBufferDescriptorSlot);
}

void GpuBenchmarkVertexFetch::DoBenchmark(nn::gfx::CommandBuffer* pTestCommandBuffer, int runCount)
{
    NN_ASSERT(runCount > 0);

    int vertexCount = GetVertexCount();
    for (int repeatIndex = 0; repeatIndex < runCount; ++repeatIndex)
    {
        pTestCommandBuffer->Draw(nn::gfx::PrimitiveTopology_TriangleList, vertexCount, 0);
    }
}

void GpuBenchmarkVertexFetch::RenderDebug(nn::gfx::CommandBuffer* pTestCommandBuffer)
{
    NN_UNUSED(pTestCommandBuffer);

#if defined(NN_GFXUTIL_GPUBENCHMARK_VERTEXFETCH_DEBUG)
    int shaderVariationIndex = ComputeRenderQuadShaderVariationIndex(1, 1); // instance + blit texture
    nn::gfx::Shader* pShader = m_RenderQuadResShader.pResShaderContainer->GetResShaderVariation(shaderVariationIndex)->GetResShaderProgram(m_RenderQuadResShader.codeType)->GetShader();
    pTestCommandBuffer->SetShader(pShader, nn::gfx::ShaderStageBit_All);

    pTestCommandBuffer->SetTextureAndSampler(
        0, nn::gfx::ShaderStage_Pixel,
        m_RenderTextureViewDescriptorSlot,
        m_RenderTextureSamplerDescriptorSlot);

    int scale = 3;
    int offsetX = 2;
    int offsetY = 2;

    pTestCommandBuffer->Draw(
        nn::gfx::PrimitiveTopology_TriangleStrip, 4,
        (scale << 8) | (offsetX << 16) | (offsetY << 24));
#endif
}


void GpuBenchmarkVertexFetch::PrintResults(nn::TimeSpan cpuTimeElapsed, nn::TimeSpan gpuTimeElapsed, int runCount, nn::gfx::util::DebugFontTextWriter* pDebugFontTextWriter)
{
    NN_ASSERT(runCount > 0);

    uint64_t gpuTimeElapsedValueInNs = static_cast<uint64_t>(gpuTimeElapsed.GetNanoSeconds());
    uint64_t cpuTimeElapsedValueInNs = static_cast<uint64_t>(cpuTimeElapsed.GetNanoSeconds());

    uint64_t gpuTimeElapsedAvgValueInNs = gpuTimeElapsedValueInNs / static_cast<uint64_t>(runCount);
    uint64_t cpuTimeElapsedAvgValueInNs = cpuTimeElapsedValueInNs / static_cast<uint64_t>(runCount);

    uint64_t verticesPerSec = 0;
    if (gpuTimeElapsedValueInNs > 0)
    {
        verticesPerSec = (static_cast<uint64_t>(GetVertexCount() * runCount) * 1000000000LLU) / gpuTimeElapsedValueInNs;
    }

    pDebugFontTextWriter->Print("verticesPerSec:%12lu\n", verticesPerSec);
    pDebugFontTextWriter->Print("gpu time (ns): %8lu\n", gpuTimeElapsedAvgValueInNs);
    pDebugFontTextWriter->Print("cpu time (ns): %8lu\n", cpuTimeElapsedAvgValueInNs);
    pDebugFontTextWriter->Print("total gpu time (ns): %12lu\n", gpuTimeElapsedValueInNs);
    pDebugFontTextWriter->Print("total cpu time (ns): %12lu\n", cpuTimeElapsedValueInNs);
}

void GpuBenchmarkVertexFetch::CopyResultToBuffer(nn::gfx::CommandBuffer* pCommandBuffer)
{
    int renderSize = m_RenderSize;

    nn::gfx::BufferTextureCopyRegion bufferTextureCopyRegion;
    bufferTextureCopyRegion.SetDefault();
    bufferTextureCopyRegion.SetBufferImageHeight(renderSize);
    bufferTextureCopyRegion.SetBufferImageWidth(renderSize);
    bufferTextureCopyRegion.EditTextureCopyRegion().SetDefault();
    bufferTextureCopyRegion.EditTextureCopyRegion().SetWidth(renderSize);
    bufferTextureCopyRegion.EditTextureCopyRegion().SetHeight(renderSize);
    bufferTextureCopyRegion.EditTextureCopyRegion().EditSubresource().SetDefault();

    pCommandBuffer->InvalidateMemory(nn::gfx::GpuAccess_Texture);
    pCommandBuffer->CopyImageToBuffer(&m_OutputCopyBuffer, &m_RenderTexture, bufferTextureCopyRegion);
    pCommandBuffer->InvalidateMemory(nn::gfx::GpuAccess_Write);
}

void GpuBenchmarkVertexFetch::MapResultBuffer(void** pOutBuffer, size_t* pOutBufferSize)
{
    *pOutBuffer = m_OutputCopyBuffer.Map();
    *pOutBufferSize = m_OutputCopyBufferSize;
}

void GpuBenchmarkVertexFetch::UnmapResultBuffer()
{
    m_OutputCopyBuffer.Unmap();
}

const char* GpuBenchmarkVertexFetch::GetName() const
{
    return ClassName;
}

BenchmarkType GpuBenchmarkVertexFetch::GetType() const
{
    return BenchmarkType_VertexFetch;
}

int GpuBenchmarkVertexFetch::GetPropertyCount() const
{
    return m_PropertyArray.GetCount();
}

int GpuBenchmarkVertexFetch::FillPropertyList(const GpuBenchmarkPropertyHolder** ppDestinationArray, int destinationArrayMaxSize) const
{
    return m_PropertyArray.FillPropertyList(ppDestinationArray, destinationArrayMaxSize);
}

int GpuBenchmarkVertexFetch::FillPropertyList(GpuBenchmarkPropertyHolder** ppDestinationArray, int destinationArrayMaxSize)
{
    return m_PropertyArray.FillPropertyList(ppDestinationArray, destinationArrayMaxSize);
}

GpuBenchmarkPropertyHolder* GpuBenchmarkVertexFetch::FindPropertyByName(const char* propertyName)
{
    return m_PropertyArray.FindPropertyByName(propertyName);
}

GpuBenchmarkPropertyHolder* GpuBenchmarkVertexFetch::GetPropertyByIndex(int index)
{
    return m_PropertyArray.Get(index);
}

} } } // namespace nnt { namespace gfx { namespace util {
