﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/


#include <nn/nn_Assert.h>
#include <nn/gfx/util/gfx_DebugFontTextWriter.h>

#include "gfxUtilGpuBenchmark_GpuBenchmarkFillrate.h"

#include "gfxUtilGpuBenchmark_ResHelpers.h"
#include "gfxUtilGpuBenchmark_PropertyMacros.h"
#include "gfxUtilGpuBenchmark_ComputeRenderQuadShaderVariationIndex.h"

#include "gfxUtilGpuBenchmark_BuiltinRenderQuadShader.h"

// 307Mhz, 14.4 pixels/cycle
// -> SOL=307*1000000*14.4=4420800000

namespace nnt { namespace gfx { namespace util {

namespace {

const DefaultVertex g_TriangleVertexBufferData[] =
{
    { -1.0f, -1.0f, 1.0f, 0.0f, 0.0f, },
    { -1.0f,  3.0f, 1.0f, 0.0f, 2.0f, },
    { 3.0f, -1.0f, 1.0f, 2.0f, 0.0f, },
};

} // anonymous namespace

const char* GpuBenchmarkFillrate::ClassName = "Fillrate";

GpuBenchmarkFillrate::GpuBenchmarkFillrate()
{
    const int defaultRenderSize = 4096;

    m_RenderSize = defaultRenderSize;

    m_RenderFormat = nn::gfx::ImageFormat_R8_G8_B8_A8_Unorm;
    m_TileMode = nn::gfx::TileMode_Optimal;

    m_PrimitiveType = PrimitiveType_Quad;

    m_DepthBufferFormat = nn::gfx::ImageFormat_Undefined;
}

GpuBenchmarkFillrate::~GpuBenchmarkFillrate()
{
}

void GpuBenchmarkFillrate::Initialize(ResourceAllocator* pResourceAllocator)
{
    NN_UNUSED(pResourceAllocator);

    // RenderSize
    GpuBenchmarkPropertyHolder* pPropertyRenderSize = m_PropertyArray.Get(Property_RenderSize);
    BENCHMARK_PROPERTY_INTEGER_RANGE_DEFINITION(
        pPropertyRenderSize, "RenderSize",
        GpuBenchmarkFillrate::GetRenderSize,
        GpuBenchmarkFillrate::SetRenderSize,
        1024, 4096, 1024);

    // RenderFormat
    GpuBenchmarkPropertyHolder* pPropertyRenderFormat = m_PropertyArray.Get(Property_RenderFormat);
    BENCHMARK_PROPERTY_ENUM_DEFINITION(
        pPropertyRenderFormat, "RenderFormat",
        nn::gfx::ImageFormat, pResourceAllocator,
        GpuBenchmarkFillrate::GetRenderFormat,
        GpuBenchmarkFillrate::SetRenderFormat,
        "ImageFormat_R8_G8_B8_A8_UnormSrgb", nn::gfx::ImageFormat_R8_G8_B8_A8_UnormSrgb,
        "ImageFormat_R8_G8_B8_A8_Unorm", nn::gfx::ImageFormat_R8_G8_B8_A8_Unorm,
        "ImageFormat_R16_G16_B16_A16_Float", nn::gfx::ImageFormat_R16_G16_B16_A16_Float);

    // TileMode
    GpuBenchmarkPropertyHolder* pPropertyTileMode = m_PropertyArray.Get(Property_TileMode);
    BENCHMARK_PROPERTY_ENUM_DEFINITION(
        pPropertyTileMode, "TileMode",
        nn::gfx::TileMode, pResourceAllocator,
        GpuBenchmarkFillrate::GetTileMode,
        GpuBenchmarkFillrate::SetTileMode,
        "TileMode_Optimal", nn::gfx::TileMode_Optimal,
        "TileMode_Linear", nn::gfx::TileMode_Linear);

    // PrimitiveType
    GpuBenchmarkPropertyHolder* pPropertyPrimitiveType = m_PropertyArray.Get(Property_PrimitiveType);
    BENCHMARK_PROPERTY_ENUM_DEFINITION(
        pPropertyPrimitiveType, "PrimitiveType",
        PrimitiveType, pResourceAllocator,
        GpuBenchmarkFillrate::GetPrimitiveType,
        GpuBenchmarkFillrate::SetPrimitiveType,
        "Quad", GpuBenchmarkFillrate::PrimitiveType_Quad,
        "Triangle", GpuBenchmarkFillrate::PrimitiveType_Triangle);

    // DepthBufferFormat
    GpuBenchmarkPropertyHolder* pPropertyDepthBufferFormat = m_PropertyArray.Get(Property_DepthBufferFormat);
    BENCHMARK_PROPERTY_ENUM_DEFINITION(
        pPropertyDepthBufferFormat, "DepthBufferFormat",
        nn::gfx::ImageFormat, pResourceAllocator,
        GpuBenchmarkFillrate::GetDepthBufferFormat,
        GpuBenchmarkFillrate::SetDepthBufferFormat,
        "Off", nn::gfx::ImageFormat_Undefined,
        "D24_Unorm_S8_Uint", nn::gfx::ImageFormat_D24_Unorm_S8_Uint,
        "D32_Float", nn::gfx::ImageFormat_D32_Float);
}


void GpuBenchmarkFillrate::Finalize(ResourceAllocator* pResourceAllocator)
{
    NN_UNUSED(pResourceAllocator);

    for (int i = 0; i < m_PropertyArray.GetCount(); ++i)
    {
        m_PropertyArray.Get(i)->Finalize();
    }
}

void GpuBenchmarkFillrate::InitializeGfxObjectsRender(ResourceAllocator* pResourceAllocator, nn::gfx::Device* pDevice)
{
    nn::gfx::TileMode tileMode = m_TileMode;
    if (m_DepthBufferFormat != nn::gfx::ImageFormat_Undefined)
        tileMode = nn::gfx::TileMode_Optimal;

    m_OutputCopyBufferSize = InitializeColorRenderTarget(
        &m_RenderTexture, &m_OutputCopyBuffer,
        &m_RenderTextureTargetView, &m_RenderTextureViewportScissorState,
        m_RenderSize, m_RenderSize, m_RenderFormat, tileMode,
        pResourceAllocator, pDevice);

    if (m_DepthBufferFormat != nn::gfx::ImageFormat_Undefined)
    {
        nn::gfx::Texture::InfoType depthTextureInfo;
        {
            depthTextureInfo.SetDefault();
            depthTextureInfo.SetGpuAccessFlags(nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_DepthStencil);
            depthTextureInfo.SetWidth(m_RenderSize);
            depthTextureInfo.SetHeight(m_RenderSize);
            depthTextureInfo.SetDepth(1);
            depthTextureInfo.SetImageFormat(m_DepthBufferFormat);
            depthTextureInfo.SetMipCount(1);
            depthTextureInfo.SetArrayLength(1);
            depthTextureInfo.SetImageStorageDimension(nn::gfx::ImageStorageDimension_2d);
            depthTextureInfo.SetTileMode(tileMode);
        }

        InitializeTexture(&m_DepthStencilTexture, depthTextureInfo, pResourceAllocator, MemoryPoolType_RenderTarget, pDevice);

        nn::gfx::DepthStencilView::InfoType depthStencilViewInfo;
        {
            depthStencilViewInfo.SetDefault();
            depthStencilViewInfo.SetTexturePtr(&m_DepthStencilTexture);
            depthStencilViewInfo.SetImageDimension(nn::gfx::ImageDimension_2d);
        }

        m_DepthStencilView.Initialize(pDevice, depthStencilViewInfo);
    }

#if defined(NN_GFXUTIL_GPUBENCHMARK_FILLRATE_DEBUG)
    nn::gfx::TextureView::InfoType renderTextureViewInfo;
    {
        renderTextureViewInfo.SetDefault();
        renderTextureViewInfo.SetTexturePtr(&m_RenderTexture);
        renderTextureViewInfo.SetImageDimension(nn::gfx::ImageDimension_2d);
        renderTextureViewInfo.SetImageFormat(m_RenderFormat);
        renderTextureViewInfo.EditSubresourceRange().EditMipRange().SetMipCount(1);
    }
    m_RenderTextureView.Initialize(pDevice, renderTextureViewInfo);

    m_RenderTextureViewDescriptorSlotIndex = pResourceAllocator->AllocateAndSetTextureViewToDescriptorPool(
        &m_RenderTextureView, &m_RenderTextureViewDescriptorSlot);

    if (m_DepthBufferFormat != nn::gfx::ImageFormat_Undefined)
    {
        nn::gfx::TextureView::InfoType depthStencilTextureViewInfo;
        {
            depthStencilTextureViewInfo.SetDefault();
            depthStencilTextureViewInfo.SetTexturePtr(&m_DepthStencilTexture);
            depthStencilTextureViewInfo.SetImageDimension(nn::gfx::ImageDimension_2d);
            depthStencilTextureViewInfo.SetImageFormat(nn::gfx::ImageFormat_R8_G8_B8_A8_Unorm);
            depthStencilTextureViewInfo.SetChannelMapping(
                nn::gfx::ChannelMapping_Alpha,
                nn::gfx::ChannelMapping_Blue,
                nn::gfx::ChannelMapping_Green,
                nn::gfx::ChannelMapping_Red);

            depthStencilTextureViewInfo.EditSubresourceRange().EditMipRange().SetMipCount(1);
        }
        m_DepthStencilTextureView.Initialize(pDevice, depthStencilTextureViewInfo);

        m_DepthStencilTextureViewDescriptorSlotIndex = pResourceAllocator->AllocateAndSetTextureViewToDescriptorPool(
            &m_DepthStencilTextureView, &m_DepthStencilTextureViewDescriptorSlot);
    }

    nn::gfx::Sampler::InfoType renderTextureSamplerInfo;
    {
        renderTextureSamplerInfo.SetDefault();
        renderTextureSamplerInfo.SetFilterMode(nn::gfx::FilterMode_MinPoint_MagPoint_MipPoint);
        renderTextureSamplerInfo.SetAddressU(nn::gfx::TextureAddressMode_Repeat);
        renderTextureSamplerInfo.SetAddressV(nn::gfx::TextureAddressMode_Repeat);
        renderTextureSamplerInfo.SetAddressW(nn::gfx::TextureAddressMode_Repeat);
    }
    m_DebugTextureSampler.Initialize(pDevice, renderTextureSamplerInfo);
    m_DebugTextureSamplerDescriptorSlotIndex = pResourceAllocator->AllocateAndSetSamplerToDescriptorPool(
        &m_DebugTextureSampler, &m_DebugTextureSamplerDescriptorSlot);
#endif
}

void GpuBenchmarkFillrate::InitializeGfxObjects(ResourceAllocator* pResourceAllocator, nn::gfx::Device* pDevice)
{
    GpuBenchmark::InitializeGfxObjects(pResourceAllocator, pDevice);

    NN_ASSERT(m_RenderSize > 1);

    InitializeGfxObjectsRender(pResourceAllocator, pDevice);

    switch (m_PrimitiveType)
    {
    case PrimitiveType_Quad:
        {
            InitializeFullScreenQuadVertexBuffer(&m_VertexBuffer, pResourceAllocator, pDevice);
            m_VertexBufferSize = g_RectangleVertexBufferDataSize;
        }
        break;

    case PrimitiveType_Triangle:
        {
            InitializeVertexBuffer(&m_VertexBuffer, g_TriangleVertexBufferData, sizeof(g_TriangleVertexBufferData), pResourceAllocator, pDevice);
            m_VertexBufferSize = sizeof(g_TriangleVertexBufferData);
        }
        break;
    default:
            NN_UNEXPECTED_DEFAULT;
    }

    InitializeResShader(&m_ResShader, g_RenderQuadShaderData, sizeof(g_RenderQuadShaderData), pResourceAllocator, pDevice);


    bool depthEnabled = (m_DepthBufferFormat != nn::gfx::ImageFormat_Undefined) ? true : false;
    nn::gfx::DepthStencilState::InfoType depthStencilInfo;
    {
        depthStencilInfo.SetDefault();
        depthStencilInfo.SetDepthTestEnabled(depthEnabled);
        depthStencilInfo.SetDepthComparisonFunction(nn::gfx::ComparisonFunction_Always);
        depthStencilInfo.SetDepthWriteEnabled(depthEnabled);
    }
    m_DepthStencilState.Initialize(pDevice, depthStencilInfo);
}

void GpuBenchmarkFillrate::FinalizeGfxObjects(ResourceAllocator* pResourceAllocator, nn::gfx::Device* pDevice)
{
    m_DepthStencilState.Finalize(pDevice);

    FinalizeResShader(&m_ResShader, pResourceAllocator, pDevice);

    FinalizeBuffer(&m_VertexBuffer, pResourceAllocator, pDevice);

#if defined(NN_GFXUTIL_GPUBENCHMARK_FILLRATE_DEBUG)
    m_DebugTextureSampler.Finalize(pDevice);
    pResourceAllocator->FreeDescriptorSlots(nn::gfx::DescriptorPoolType_Sampler, m_DebugTextureSamplerDescriptorSlotIndex);
    m_DebugTextureSamplerDescriptorSlotIndex = -1;

    if (m_DepthBufferFormat != nn::gfx::ImageFormat_Undefined)
    {
        pResourceAllocator->FreeDescriptorSlots(nn::gfx::DescriptorPoolType_TextureView, m_DepthStencilTextureViewDescriptorSlotIndex);
        m_DepthStencilTextureViewDescriptorSlotIndex = -1;
        m_DepthStencilTextureView.Finalize(pDevice);
    }

    pResourceAllocator->FreeDescriptorSlots(nn::gfx::DescriptorPoolType_TextureView, m_RenderTextureViewDescriptorSlotIndex);
    m_RenderTextureViewDescriptorSlotIndex = -1;
    m_RenderTextureView.Finalize(pDevice);
#endif

    if (m_DepthBufferFormat != nn::gfx::ImageFormat_Undefined)
    {
        m_DepthStencilView.Finalize(pDevice);
        FinalizeTexture(&m_DepthStencilTexture, pResourceAllocator, pDevice);
    }

    FinalizeColorRenderTarget(
        &m_RenderTexture, &m_OutputCopyBuffer,
        &m_RenderTextureTargetView, &m_RenderTextureViewportScissorState,
        pResourceAllocator, pDevice);

    GpuBenchmark::FinalizeGfxObjects(pResourceAllocator, pDevice);
}

void GpuBenchmarkFillrate::PreBenchmark(nn::gfx::CommandBuffer* pTestCommandBuffer)
{
    nn::gfx::ColorTargetView* pTestTarget = &m_RenderTextureTargetView;
    nn::gfx::DepthStencilView* pDepthStencilView = nullptr;

    pTestCommandBuffer->ClearColor(&m_RenderTextureTargetView, 0.5f, 0.5f, 0.5f, 0.5f, nullptr);
    if (m_DepthBufferFormat != nn::gfx::ImageFormat_Undefined)
    {
        pTestCommandBuffer->ClearDepthStencil(
            &m_DepthStencilView, 1.0f, 0x00,
            nn::gfx::DepthStencilClearMode_DepthStencil, nullptr);

        pDepthStencilView = &m_DepthStencilView;
    }

    pTestCommandBuffer->SetRenderTargets(1, &pTestTarget, pDepthStencilView);
    pTestCommandBuffer->SetViewportScissorState(&m_RenderTextureViewportScissorState);

    pTestCommandBuffer->SetDepthStencilState(&m_DepthStencilState);

    const int shaderVariationIndex = ComputeRenderQuadShaderVariationIndex(0, 0);
    nn::gfx::Shader* pShader = m_ResShader.pResShaderContainer->GetResShaderVariation(shaderVariationIndex)->GetResShaderProgram(m_ResShader.codeType)->GetShader();
    pTestCommandBuffer->SetShader(pShader, nn::gfx::ShaderStageBit_All);

    nn::gfx::GpuAddress vertexBufferGpuAddress;
    m_VertexBuffer.GetGpuAddress(&vertexBufferGpuAddress);
    pTestCommandBuffer->SetVertexBuffer(0, vertexBufferGpuAddress, sizeof(DefaultVertex), m_VertexBufferSize);
}

void GpuBenchmarkFillrate::DoBenchmark(nn::gfx::CommandBuffer* pTestCommandBuffer, int runCount)
{
    for (int i = 0; i < runCount; ++i)
    {
        switch (m_PrimitiveType)
        {
        case PrimitiveType_Quad:
            {
                pTestCommandBuffer->Draw(nn::gfx::PrimitiveTopology_TriangleStrip, 4, 0);
            }
            break;

        case PrimitiveType_Triangle:
            {
                pTestCommandBuffer->Draw(nn::gfx::PrimitiveTopology_TriangleList, 3, 0);
            }
            break;
        default:
            {
                NN_UNEXPECTED_DEFAULT;
            }
            break;
        }
    }
}

void GpuBenchmarkFillrate::RenderDebug(nn::gfx::CommandBuffer* pTestCommandBuffer)
{
    NN_UNUSED(pTestCommandBuffer);

#if defined(NN_GFXUTIL_GPUBENCHMARK_FILLRATE_DEBUG)
    int shaderVariationIndex = 3; // instance + blit texture
    nn::gfx::Shader* pShader = m_ResShader.pResShaderContainer->GetResShaderVariation(shaderVariationIndex)->GetResShaderProgram(m_ResShader.codeType)->GetShader();
    pTestCommandBuffer->SetShader(pShader, nn::gfx::ShaderStageBit_All);

    pTestCommandBuffer->SetTextureAndSampler(
        0, nn::gfx::ShaderStage_Pixel,
        m_RenderTextureViewDescriptorSlot,
        m_DebugTextureSamplerDescriptorSlot);

    int scale = 3;
    int offsetX = 1;
    int offsetY = 1;

    pTestCommandBuffer->Draw(
        nn::gfx::PrimitiveTopology_TriangleStrip, 4,
        (scale << 8) | (offsetX << 16) | (offsetY << 24), 1, 0);

    if (m_DepthBufferFormat != nn::gfx::ImageFormat_Undefined)
    {
        pTestCommandBuffer->SetTextureAndSampler(
            0, nn::gfx::ShaderStage_Pixel,
            m_DepthStencilTextureViewDescriptorSlot,
            m_DebugTextureSamplerDescriptorSlot);

        pTestCommandBuffer->Draw(
            nn::gfx::PrimitiveTopology_TriangleStrip, 4,
            (scale << 8) | ((offsetX + 1) << 16) | (offsetY << 24), 1, 0);
    }
#endif
}


void GpuBenchmarkFillrate::PrintResults(nn::TimeSpan cpuTimeElapsed, nn::TimeSpan gpuTimeElapsed, int runCount, nn::gfx::util::DebugFontTextWriter* pDebugFontTextWriter)
{
    NN_ASSERT(runCount > 0);

    uint64_t gpuTimeElapsedValueInNs = static_cast<uint64_t>(gpuTimeElapsed.GetNanoSeconds());
    uint64_t cpuTimeElapsedValueInNs = static_cast<uint64_t>(cpuTimeElapsed.GetNanoSeconds());

    uint64_t gpuTimeElapsedAvgValueInNs = gpuTimeElapsedValueInNs / static_cast<uint64_t>(runCount);
    uint64_t cpuTimeElapsedAvgValueInNs = cpuTimeElapsedValueInNs / static_cast<uint64_t>(runCount);

    uint64_t pixelCount = static_cast<uint64_t>(runCount) * static_cast<uint64_t>(m_RenderSize * m_RenderSize);

    uint64_t gpuFillrateAvgMPixPerSec = 0;
    if (gpuTimeElapsedValueInNs > 0)
    {
        gpuFillrateAvgMPixPerSec = static_cast<uint64_t>(pixelCount) / (gpuTimeElapsedValueInNs / 1000);
    }

    pDebugFontTextWriter->Print("fillRateAvgMPixPerSec:%12lu\n", gpuFillrateAvgMPixPerSec);
    pDebugFontTextWriter->Print("gpu time (ns): %8lu\n", gpuTimeElapsedAvgValueInNs);
    pDebugFontTextWriter->Print("cpu time (ns): %8lu\n", cpuTimeElapsedAvgValueInNs);
    pDebugFontTextWriter->Print("total gpu time (ns): %12lu\n", gpuTimeElapsedValueInNs);
    pDebugFontTextWriter->Print("total cpu time (ns): %12lu\n", cpuTimeElapsedValueInNs);
}

void GpuBenchmarkFillrate::CopyResultToBuffer(nn::gfx::CommandBuffer* pCommandBuffer)
{
    int renderSize = GetRenderSize();

    nn::gfx::BufferTextureCopyRegion bufferTextureCopyRegion;
    bufferTextureCopyRegion.SetDefault();
    bufferTextureCopyRegion.SetBufferImageHeight(renderSize);
    bufferTextureCopyRegion.SetBufferImageWidth(renderSize);
    bufferTextureCopyRegion.EditTextureCopyRegion().SetDefault();
    bufferTextureCopyRegion.EditTextureCopyRegion().SetWidth(renderSize);
    bufferTextureCopyRegion.EditTextureCopyRegion().SetHeight(renderSize);
    bufferTextureCopyRegion.EditTextureCopyRegion().EditSubresource().SetDefault();

    pCommandBuffer->InvalidateMemory(nn::gfx::GpuAccess_Texture);
    pCommandBuffer->CopyImageToBuffer(&m_OutputCopyBuffer, &m_RenderTexture, bufferTextureCopyRegion);
    pCommandBuffer->InvalidateMemory(nn::gfx::GpuAccess_Write);
}

void GpuBenchmarkFillrate::MapResultBuffer(void** pOutBuffer, size_t* pOutBufferSize)
{
    *pOutBuffer = m_OutputCopyBuffer.Map();
    *pOutBufferSize = m_OutputCopyBufferSize;
}

void GpuBenchmarkFillrate::UnmapResultBuffer()
{
    m_OutputCopyBuffer.Unmap();
}

const char* GpuBenchmarkFillrate::GetName() const
{
    return ClassName;
}

BenchmarkType GpuBenchmarkFillrate::GetType() const
{
    return BenchmarkType_Fillrate;
}

int GpuBenchmarkFillrate::GetPropertyCount() const
{
    return m_PropertyArray.GetCount();
}

int GpuBenchmarkFillrate::FillPropertyList(const GpuBenchmarkPropertyHolder** ppDestinationArray, int destinationArrayMaxSize) const
{
    return m_PropertyArray.FillPropertyList(ppDestinationArray, destinationArrayMaxSize);
}

int GpuBenchmarkFillrate::FillPropertyList(GpuBenchmarkPropertyHolder** ppDestinationArray, int destinationArrayMaxSize)
{
    return m_PropertyArray.FillPropertyList(ppDestinationArray, destinationArrayMaxSize);
}

GpuBenchmarkPropertyHolder* GpuBenchmarkFillrate::FindPropertyByName(const char* propertyName)
{
    return m_PropertyArray.FindPropertyByName(propertyName);
}

GpuBenchmarkPropertyHolder* GpuBenchmarkFillrate::GetPropertyByIndex(int index)
{
    return m_PropertyArray.Get(index);
}

} } } // namespace nnt { namespace gfx { namespace util {
