﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <nn/fs.h>
#include <nn/nn_Assert.h>
#include <nn/nn_Log.h>
#include <nn/oe/oe_OperationStateControl.h>
#include <nn/crypto/crypto_Md5Generator.h>
#include <nn/crypto/crypto_Sha1Generator.h>
#include <nn/crypto/crypto_Sha256Generator.h>

#include <nns/gfx/gfx_GraphicsFramework.h>

#include "testGfxUtil_Application.h"
#include "testGfxUtil_ApplicationCommandLine.h"
#if defined(NNT_GFX_UTIL_ENABLE_LOP)
#include "testGfxUtil_LopIntegration.h"
#endif

#include "gfxUtilGpuBenchmark_Factory.h"
#include "gfxUtilGpuBenchmark_Property.h"
#include "gfxUtilGpuBenchmark_ResourceAllocator.h"

namespace {

void RecordAndRunGpuBenchmarkCommandList(
    ApplicationTestData* pTestData, nnt::gfx::util::GpuBenchmark* pBenchmark,
    int warmUpCount, int runCount)
{
    RecordGpuBenchmarkCommandList(pTestData, pBenchmark, warmUpCount, runCount);
    RunGpuBenchmarkCommandList(pTestData);
}

void PrintToLog(void* pContext, const char* message)
{
    NN_UNUSED(pContext);
    NN_LOG("%s", message);
}

void PrintToDebugFont(void* pContext, const char* s)
{
    nn::gfx::util::DebugFontTextWriter* pDebugFontWriter = static_cast<nn::gfx::util::DebugFontTextWriter*>(pContext);
    pDebugFontWriter->Print(s);
}


const char* g_HostMountPoint = "host";
bool g_HostMountCompleted = false;

void MountHostIO()
{
    nn::Result result;

    if (g_HostMountCompleted)
    {
        return;
    }

    result = nn::fs::MountHost(g_HostMountPoint, "./");
    if (result.IsFailure())
    {
        NN_LOG("Cannot mount host file system\n");
        return;
    }

    g_HostMountCompleted = true;

#if defined(NN_PLATFORM_NX)
    nn::fs::DirectoryEntry entryList[100];
    nn::fs::DirectoryHandle h = {};
    nn::fs::OpenDirectory(&h, "host:/", nn::fs::OpenDirectoryMode_All);
    int64_t n = 0;
    nn::fs::ReadDirectory(&n, entryList, h, 100);
    NN_LOG("%d entry found.\n", static_cast<int>(n));
    for (int64_t i = 0; i < n; i++)
    {
        auto& e = entryList[i];
        NN_LOG("  %s%s\n", e.name, (e.directoryEntryType == nn::fs::DirectoryEntryType_Directory ? "/" : ""));
    }
#endif
}

void UnmountHostIO()
{
    if (g_HostMountCompleted)
    {
        nn::fs::Unmount(g_HostMountPoint);
        g_HostMountCompleted = false;
    }
}


void* PrimitiveRendererAllocateFunction(size_t size, size_t alignment, void* pUserData)
{
    NN_ASSERT_NOT_NULL(pUserData);
    nns::gfx::GraphicsFramework* pGfw = reinterpret_cast<nns::gfx::GraphicsFramework*>(pUserData);
    return pGfw->AllocateMemory(size, alignment);
}

void PrimitiveRendererFreeFunction(void* ptr, void* pUserData)
{
    NN_ASSERT_NOT_NULL(pUserData);
    nns::gfx::GraphicsFramework* pGfw = reinterpret_cast<nns::gfx::GraphicsFramework*>(pUserData);
    pGfw->FreeMemory(ptr);
}


void InitializeTestGfxObjects(ApplicationTestData* pTestData, nns::gfx::GraphicsFramework* pGfw)
{
#if defined(NN_SDK_BUILD_DEBUG)
    NN_ASSERT(pTestData->createdBenchmarkCount == 0);
#endif

    // デバッグフォント初期化
    {
        const int TextWriterCharCountMax = 32 * 1024;

        nn::gfx::util::DebugFontTextWriterInfo info;
        info.SetDefault();
        info.SetCharCountMax(TextWriterCharCountMax);
        info.SetBufferCount(pGfw->GetBufferCount());
        info.SetUserMemoryPoolEnabled(false);

        pTestData->debugFontWriterHeapSize =
            nn::gfx::util::DebugFontTextWriter::GetRequiredMemorySize(pTestData->pDevice, info);

        pTestData->pDebugFontWriterHeap = pGfw->AllocateMemory(pTestData->debugFontWriterHeapSize, sizeof(int));

        pTestData->debugFontWriter.Initialize(
            pTestData->pDevice, info,
            pTestData->pDebugFontWriterHeap, pTestData->debugFontWriterHeapSize,
            nullptr, 0, 0);

        pTestData->debugFontWriterTextureSlotIndex = pGfw->AllocateDescriptorSlot(nn::gfx::DescriptorPoolType_TextureView, 1);
        pTestData->debugFontWriterSamplerSlotIndex = pGfw->AllocateDescriptorSlot(nn::gfx::DescriptorPoolType_Sampler, 1);

        pTestData->debugFontWriter.SetDisplayWidth(pGfw->GetDisplayWidth());
        pTestData->debugFontWriter.SetDisplayHeight(pGfw->GetDisplayHeight());

        pTestData->debugFontWriter.SetTextureDescriptor(
            pGfw->GetDescriptorPool(nn::gfx::DescriptorPoolType_TextureView), pTestData->debugFontWriterTextureSlotIndex);
        pTestData->debugFontWriter.SetSamplerDescriptor(
            pGfw->GetDescriptorPool(nn::gfx::DescriptorPoolType_Sampler), pTestData->debugFontWriterSamplerSlotIndex);
    }
    {
        nns::gfx::PrimitiveRenderer::RendererInfo info;
        info.SetDefault();
        info.SetAllocator(PrimitiveRendererAllocateFunction, pGfw);
        info.SetAdditionalBufferSize(1024 * 4);
        info.SetMultiBufferQuantity(pGfw->GetBufferCount());

        pTestData->m_pPrimitiveRenderer = nns::gfx::PrimitiveRenderer::CreateRenderer(pGfw->GetDevice(), info);
        pTestData->m_pPrimitiveRenderer->SetScreenWidth(pGfw->GetDisplayWidth());
        pTestData->m_pPrimitiveRenderer->SetScreenHeight(pGfw->GetDisplayHeight());
    }

}

void FinalizeTestGfxObjects(ApplicationTestData* pTestData, nns::gfx::GraphicsFramework* pGfw)
{
#if defined(NN_SDK_BUILD_DEBUG)
    NN_ASSERT(pTestData->createdBenchmarkCount == 0);
#endif

    nns::gfx::PrimitiveRenderer::DestroyRenderer(
        pTestData->m_pPrimitiveRenderer,
        pGfw->GetDevice(), PrimitiveRendererFreeFunction, pGfw);

    // デバッグフォント終了
    pTestData->debugFontWriter.Finalize();
    pGfw->FreeDescriptorSlot(nn::gfx::DescriptorPoolType_TextureView, pTestData->debugFontWriterTextureSlotIndex);
    pGfw->FreeDescriptorSlot(nn::gfx::DescriptorPoolType_Sampler, pTestData->debugFontWriterSamplerSlotIndex);
    pGfw->FreeMemory(pTestData->pDebugFontWriterHeap);
}


void BenchmarkApplicationMainLoop(const ApplicationConfiguration* pApplicationConfiguration, ApplicationTestData* pTestData)
{
    ApplicationMode mode = pApplicationConfiguration->initialApplicationMode;
    bool mustExit = false;

    while (!mustExit)
    {
#if defined(NN_SDK_BUILD_DEBUG)
        pTestData->pResourceAllocator->PushMemoryPoolAllocatorStatus();
#endif

        switch (mode)
        {
        case ApplicationMode_SelectMode:
            {
                mode = DoSelectMode(pTestData);
            }
            break;
        case ApplicationMode_Interactive:
            {
                DoInteractiveMode(
                    pTestData,
                    pApplicationConfiguration->interactiveModeInitialBenchmark,
                    pApplicationConfiguration->interactiveModeOutputFile);
                mode = pApplicationConfiguration->exitAfterTest ? ApplicationMode_Exit : ApplicationMode_SelectMode;
            }
            break;
        case ApplicationMode_Replay:
            {
                DoReplayMode(
                    pTestData,
                    pApplicationConfiguration->replayModeInputFile);
                mode = pApplicationConfiguration->exitAfterTest ? ApplicationMode_Exit : ApplicationMode_SelectMode;
            }
            break;
        case ApplicationMode_FindMaxPower:
            {
                DoFindMaxPowerMode(
                    pTestData,
                    pApplicationConfiguration->findMaxPowerUpdateMode,
                    pApplicationConfiguration->findMaxPowerModeOutputFilePath,
                    pApplicationConfiguration->findMaxPowerModeBenchmarkMask);
                mode = pApplicationConfiguration->exitAfterTest ? ApplicationMode_Exit : ApplicationMode_SelectMode;
            }
            break;
        case ApplicationMode_UpdateTimings:
            {
                DoUpdateTimingsMode(
                    pTestData,
                    pApplicationConfiguration->updateTimingsModeInputFile,
                    pApplicationConfiguration->updateTimingsModeOutputFile,
                    pApplicationConfiguration->updateTimingsModeTestCaseFilter);
                mode = pApplicationConfiguration->exitAfterTest ? ApplicationMode_Exit : ApplicationMode_SelectMode;
            }
            break;

#if defined(NNT_GFX_UTIL_ENABLE_LOP)
        case ApplicationMode_Profile:
            {
                DoProfileMode(pTestData, pApplicationConfiguration->interactiveModeInitialBenchmark);
                mode = pApplicationConfiguration->exitAfterTest ? ApplicationMode_Exit : ApplicationMode_SelectMode;
            }
            break;
#endif
        case ApplicationMode_Exit:
            {
                mustExit = true;
            }
            break;
        default:
            NN_UNEXPECTED_DEFAULT;
        }

#if defined(NN_SDK_BUILD_DEBUG)
        pTestData->pResourceAllocator->PopAndCompareMemoryPoolAllocatorStatus();
        pTestData->pResourceAllocator->PrintMemoryPoolAllocatorMaxUsage();
#endif
    }
}

void RunBenchmarkApplication(nns::gfx::GraphicsFramework* pGfw, const ApplicationConfiguration* pApplicationConfiguration)
{
    nnt::gfx::util::ResourceAllocator::InfoType resourceAllocatorInfo;
    resourceAllocatorInfo.SetDefault();
    resourceAllocatorInfo.SetMemoryAllocator(
        pGfw->GetAllocateFunction(),
        pGfw->GetFreeFunction(),
        pGfw->GetAllocateFunctionUserData());

    nnt::gfx::util::ResourceAllocator resourceAllocator;
    resourceAllocator.Initialize(pGfw->GetDevice(), resourceAllocatorInfo);

    ApplicationTestData* pTestData = resourceAllocator.NewObject<ApplicationTestData>();

    pTestData->pHostContext = pGfw;
    pTestData->frameCounter = 0;
    pTestData->pResourceAllocator = &resourceAllocator;
    pTestData->pDevice = pGfw->GetDevice();
    pTestData->platformMeasurementTracker.Initialize();

    pTestData->benchmarkQueueInfo.SetDefault();
    pTestData->benchmarkQueueInfo.SetCapability(nn::gfx::QueueCapability_Graphics
        | nn::gfx::QueueCapability_Compute | nn::gfx::QueueCapability_Copy);
    pTestData->benchmarkQueue.Initialize(pGfw->GetDevice(), pTestData->benchmarkQueueInfo);

    nn::gfx::Semaphore::InfoType benchmarkQueueCompletionSemaphoreInfo;
    benchmarkQueueCompletionSemaphoreInfo.SetDefault();
    pTestData->benchmarkQueueCompletionSemaphore.Initialize(pGfw->GetDevice(), benchmarkQueueCompletionSemaphoreInfo);

    InitializeTestGfxObjects(pTestData, pGfw);

    InitializeRuntimeGfxObjects(
        &pTestData->runtimeGfxObjects,
        pTestData->pDevice,
        pTestData->pResourceAllocator);

    BenchmarkApplicationMainLoop(pApplicationConfiguration, pTestData);

    pTestData->platformMeasurementTracker.Finalize();

    FinalizeRuntimeGfxObjects(
        &pTestData->runtimeGfxObjects, pTestData->pDevice,
        pTestData->pResourceAllocator);

    FinalizeTestGfxObjects(pTestData, pGfw);

    pTestData->benchmarkQueue.Finalize(pGfw->GetDevice());
    pTestData->benchmarkQueueCompletionSemaphore.Finalize(pGfw->GetDevice());

    resourceAllocator.DeleteObject(pTestData);
    pTestData = nullptr;

    resourceAllocator.Finalize(pGfw->GetDevice());
}


} // anoynmous namespace


bool IsHostIoRootMounted()
{
    return g_HostMountCompleted;
}


bool TestFilePathMountPointExist(const char* filePath)
{
    int filePathLen = static_cast<int>(strlen(filePath));
    int len = static_cast<int>(strlen(g_HostMountPoint));

    // g_HostMountPoint + ":/"の有無
    if ((filePathLen > (len + 2))
        && (memcmp(filePath, g_HostMountPoint, len) == 0))
    {
        if ((filePath[len] == ':') && (filePath[len + 1] == '/'))
            return IsHostIoRootMounted();
    }

    return false;
}

void ResetHistory(ApplicationTestData* pTestData)
{
    memset(&pTestData->benchmarkResultHistory, 0, sizeof(pTestData->benchmarkResultHistory));
    pTestData->benchmarkResultHistoryIndex = 0;
    pTestData->benchmarkResultHistoryCount = 0;
}

void AddResultToHistory(ApplicationTestData* pTestData, nn::TimeSpan cpuTimeElapsed, nn::TimeSpan gpuTimeElapsed)
{
    int historyIndex = pTestData->benchmarkResultHistoryIndex;
    pTestData->benchmarkResultHistory[historyIndex].byName.cpuTime = cpuTimeElapsed.GetNanoSeconds();
    pTestData->benchmarkResultHistory[historyIndex].byName.gpuTime = gpuTimeElapsed.GetNanoSeconds();

    pTestData->benchmarkResultHistoryIndex = (pTestData->benchmarkResultHistoryIndex + 1) % ApplicationTestData::BenchmarkResultHistoryLength;
    pTestData->benchmarkResultHistoryCount++;
}

const ApplicationGpuBenchmarkResult* GetPreviousResult(ApplicationTestData* pTestData)
{
    int historyIndex = pTestData->benchmarkResultHistoryIndex - 1;
    if (historyIndex < 0)
    {
        historyIndex += ApplicationTestData::BenchmarkResultHistoryLength;
    }
    return &pTestData->benchmarkResultHistory[historyIndex];
}

int GetBenchmarkHistoryCount(const ApplicationTestData* pTestData)
{
    return pTestData->benchmarkResultHistoryCount < ApplicationTestData::BenchmarkResultHistoryLength ? pTestData->benchmarkResultHistoryCount : ApplicationTestData::BenchmarkResultHistoryLength;
}

void ComputeAverageFromHistory(
    ApplicationGpuBenchmarkResult* pOutResult,
    const ApplicationTestData* pTestData)
{
    const ApplicationGpuBenchmarkResult* benchmarkResultHistory = pTestData->benchmarkResultHistory;
    int historyCount = GetBenchmarkHistoryCount(pTestData);

    for (int valueIndex = 0; valueIndex < ApplicationGpuBenchmarkResult::ValueCount; ++valueIndex)
    {
        uint64_t average = 0;

        for (int historyIndex = 0; historyIndex < historyCount; ++historyIndex)
        {
            uint64_t newValue = average + benchmarkResultHistory[historyIndex].value[valueIndex];
            NN_ASSERT(newValue >= average);
            average = newValue;
        }
        average /= static_cast<uint64_t>(historyCount);
        pOutResult->value[valueIndex] = average;
    }
}

void ComputeMinMaxFromHistory(
    ApplicationGpuBenchmarkResult* pOutResultMin,
    ApplicationGpuBenchmarkResult* pOutResultMax,
    const ApplicationTestData* pTestData)
{
    const ApplicationGpuBenchmarkResult* benchmarkResultHistory = pTestData->benchmarkResultHistory;
    int historyCount = GetBenchmarkHistoryCount(pTestData);
    NN_ASSERT(historyCount > 0);

    for (int valueIndex = 0; valueIndex < ApplicationGpuBenchmarkResult::ValueCount; ++valueIndex)
    {
        uint64_t min = benchmarkResultHistory[0].value[valueIndex];
        uint64_t max = min;

        for (int historyIndex = 1; historyIndex < historyCount; ++historyIndex)
        {
            uint64_t historyValue = benchmarkResultHistory[historyIndex].value[valueIndex];
            if (min > historyValue)
            {
                min = historyValue;
            }
            if (max < historyValue)
            {
                max = historyValue;
            }
        }

        pOutResultMin->value[valueIndex] = min;
        pOutResultMax->value[valueIndex] = max;
    }
}

void FillDistributionBinFromHistory(
    const ApplicationTestData* pTestData,
    ApplicationGpuBenchmarkResult* pDistributionBin,
    int distributionBinCount,
    ApplicationGpuBenchmarkResult* pDistributionCount,
    ApplicationGpuBenchmarkResult* pMin, ApplicationGpuBenchmarkResult* pMax)
{
    const ApplicationGpuBenchmarkResult* benchmarkResultHistory = pTestData->benchmarkResultHistory;
    int historyCount = GetBenchmarkHistoryCount(pTestData);
    memset(pDistributionBin, 0, sizeof(ApplicationGpuBenchmarkResult) * distributionBinCount);

    if (historyCount > 1)
    {
        for (int valueIndex = 0; valueIndex < ApplicationGpuBenchmarkResult::ValueCount; ++valueIndex)
        {
            pDistributionCount->value[valueIndex] = historyCount;

            uint64_t min = benchmarkResultHistory[0].value[valueIndex];
            uint64_t max = min;
            for (int historyIndex = 1; historyIndex < historyCount; ++historyIndex)
            {
                uint64_t value = benchmarkResultHistory[historyIndex].value[valueIndex];
                min = std::min(min, value);
                max = std::max(max, value);
            }

            pMin->value[valueIndex] = min;
            pMax->value[valueIndex] = max;

            uint64_t spanLength = (max - min) + 1;

            for (int historyIndex = 0; historyIndex < historyCount; ++historyIndex)
            {
                uint64_t binIndexResult = ((benchmarkResultHistory[historyIndex].value[valueIndex] - min) * distributionBinCount) / spanLength;
                int binIndex = static_cast<int>(binIndexResult);
                NN_ASSERT(binIndex >= 0);
                NN_ASSERT(binIndex < distributionBinCount);
                pDistributionBin[binIndex].value[valueIndex]++;
            }
        }
    }
    else
    {
        for (int valueIndex = 0; valueIndex < ApplicationGpuBenchmarkResult::ValueCount; ++valueIndex)
        {
            pDistributionBin[0].value[valueIndex] = 1;
        }
    }
}


void ComputeStandardDeviationFromHistory(
    ApplicationGpuBenchmarkResult* pOutMean,
    ApplicationGpuBenchmarkResult* pOutStandardDeviation,
    const ApplicationTestData* pTestData)
{
    const ApplicationGpuBenchmarkResult* benchmarkResultHistory = pTestData->benchmarkResultHistory;
    int historyCount = GetBenchmarkHistoryCount(pTestData);

    for (int valueIndex = 0; valueIndex < ApplicationGpuBenchmarkResult::ValueCount; ++valueIndex)
    {
        pOutMean->value[valueIndex] = 0;
        pOutStandardDeviation->value[valueIndex] = 0;
    }

    if (historyCount > 1)
    {
        for (int valueIndex = 0; valueIndex < ApplicationGpuBenchmarkResult::ValueCount; ++valueIndex)
        {
            uint64_t mean = 0;
            uint64_t squareDiffSum = 0;

            for (int historyIndex = 0; historyIndex < historyCount; ++historyIndex)
            {
                uint64_t newValue = mean + benchmarkResultHistory[historyIndex].value[valueIndex];
                NN_ASSERT(newValue >= mean);
                mean = newValue;
            }

            mean /= historyCount;

            for (int historyIndex = 0; historyIndex < historyCount; ++historyIndex)
            {
                int64_t diff = mean - benchmarkResultHistory[historyIndex].value[valueIndex];
                uint64_t squaredDiff = diff * diff;
                NN_ASSERT(squaredDiff >= static_cast<uint64_t>(diff < 0 ? -diff : diff));
                uint64_t newValue = squareDiffSum + squaredDiff;
                NN_ASSERT(newValue >= squareDiffSum);
                squareDiffSum = newValue;
            }

            uint64_t variance = squareDiffSum / static_cast<uint64_t>(historyCount - 1);
            uint64_t standardDeviation = static_cast<uint64_t>(sqrt(static_cast<double>(variance)));


            pOutMean->value[valueIndex] = mean;
            pOutStandardDeviation->value[valueIndex] = standardDeviation;
        }
    }
}

void UpdateResultValidationStandardDeviation(
    ApplicationGpuBenchmarkValidation* pApplicationGpuBenchmarkValidation,
    const ApplicationGpuBenchmarkResult* pTestResult,
    const ApplicationGpuBenchmarkResult* pMean,
    const ApplicationGpuBenchmarkResult* pStandardDeviation,
    int factor)
{
    nnt::gfx::util::ValidationResult cpuPass =
        nnt::gfx::util::ValidateResultStandardDeviation(
            pTestResult->GetCpuTimeElapsed(),
            pMean->byName.cpuTime, pStandardDeviation->byName.cpuTime,
            factor);

    nnt::gfx::util::ValidationResult gpuPass =
        nnt::gfx::util::ValidateResultStandardDeviation(
            pTestResult->GetGpuTimeElapsed(),
            pMean->byName.gpuTime, pStandardDeviation->byName.gpuTime,
            factor);

    if (cpuPass != nnt::gfx::util::ValidationResult_Pass)
    {
        pApplicationGpuBenchmarkValidation->cpuMiss++;
    }

    if (gpuPass != nnt::gfx::util::ValidationResult_Pass)
    {
        pApplicationGpuBenchmarkValidation->gpuMiss++;
    }

    if ((cpuPass != nnt::gfx::util::ValidationResult_Pass)
        || (gpuPass != nnt::gfx::util::ValidationResult_Pass))
    {
        pApplicationGpuBenchmarkValidation->totalMiss++;
    }

    pApplicationGpuBenchmarkValidation->testCount++;
}


nnt::gfx::util::GpuBenchmark* InitializeBenchmarkFromTestCase(
    ApplicationTestData* pTestData,
    nnt::gfx::util::json::TestCaseIterator* pTestCaseIterator)
{
#if defined(NN_SDK_BUILD_DEBUG)
    pTestData->pResourceAllocator->PushMemoryPoolAllocatorStatus();
#endif
    const char* testCaseName = nnt::gfx::util::json::GetTestCaseName(pTestCaseIterator);

    nnt::gfx::util::GpuBenchmark* pGpuBenchmark =
        nnt::gfx::util::CreateBenchmarkFromName(testCaseName, pTestData->pResourceAllocator);
    pGpuBenchmark->Initialize(pTestData->pResourceAllocator);

    if (pTestCaseIterator != nullptr)
    {
        nnt::gfx::util::json::ApplyTestCaseConfiguration(pTestCaseIterator, pGpuBenchmark);
    }

    pGpuBenchmark->InitializeGfxObjects(pTestData->pResourceAllocator, pTestData->pDevice);

#if defined(NN_SDK_BUILD_DEBUG)
    pTestData->pResourceAllocator->PushMemoryPoolAllocatorStatus();
    pTestData->pResourceAllocator->UpdateMemoryPoolAllocatorMaxUsage();
    pTestData->createdBenchmarkCount++;
#endif

    ResetHistory(pTestData);

    return pGpuBenchmark;
}

nnt::gfx::util::GpuBenchmark* InitializeBenchmarkFromConfiguration(
    ApplicationTestData* pTestData, const char* testName,
    const int* pPropertyValueArray, int propertyValueArrayCount)
{
#if defined(NN_SDK_BUILD_DEBUG)
    pTestData->pResourceAllocator->PushMemoryPoolAllocatorStatus();
#endif

    nnt::gfx::util::GpuBenchmark* pGpuBenchmark =
        nnt::gfx::util::CreateBenchmarkFromName(testName, pTestData->pResourceAllocator);
    pGpuBenchmark->Initialize(pTestData->pResourceAllocator);

    if (pPropertyValueArray != nullptr)
    {
        int propertyCount = pGpuBenchmark->GetPropertyCount();
        NN_ASSERT(propertyCount <= propertyValueArrayCount);
        for (int i = 0; i < propertyCount; ++i)
        {
            pGpuBenchmark->GetPropertyByIndex(i)->Set(pPropertyValueArray[i]);
        }
    }

    pGpuBenchmark->InitializeGfxObjects(pTestData->pResourceAllocator, pTestData->pDevice);

#if defined(NN_SDK_BUILD_DEBUG)
    pTestData->pResourceAllocator->PushMemoryPoolAllocatorStatus();
    pTestData->pResourceAllocator->UpdateMemoryPoolAllocatorMaxUsage();
    pTestData->createdBenchmarkCount++;
#endif

    ResetHistory(pTestData);

    return pGpuBenchmark;
}

void FinalizeBenchmark(ApplicationTestData* pTestData, nnt::gfx::util::GpuBenchmark* pBenchmark)
{
#if defined(NN_SDK_BUILD_DEBUG)
    pTestData->pResourceAllocator->PopAndCompareMemoryPoolAllocatorStatus();
#endif

    pBenchmark->FinalizeGfxObjects(pTestData->pResourceAllocator, pTestData->pDevice);
    pBenchmark->Finalize(pTestData->pResourceAllocator);
    DestroyBenchmark(pBenchmark, pTestData->pResourceAllocator);

#if defined(NN_SDK_BUILD_DEBUG)
    NN_ASSERT(pTestData->createdBenchmarkCount > 0);
    pTestData->pResourceAllocator->PopAndCompareMemoryPoolAllocatorStatus();
    pTestData->createdBenchmarkCount--;
#endif
}

typedef void PrintFunc(void*, const char*);

void PrintWithSelectionMarker(PrintFunc pPrinfFunc, void* pPrintContext, bool isSelected, const char* message)
{
    const int bufferSize = 256;
    char sBuffer[bufferSize];
    snprintf(sBuffer, bufferSize , "[%c] %s", isSelected ? 'X' : ' ', message);
    pPrinfFunc(pPrintContext, sBuffer);
}

void PrintBenchmarkInformation(
    const nnt::gfx::util::GpuBenchmark* pBenchmark, int selectedPropertyIndex,
    PrintFunc pPrinfFunc, void* pPrintContext)
{
    const nnt::gfx::util::GpuBenchmarkPropertyHolder** pDestinationArray = nullptr;
    int propertyCount = pBenchmark->GetPropertyCount();
    pDestinationArray = static_cast<const nnt::gfx::util::GpuBenchmarkPropertyHolder**>(alloca(propertyCount * sizeof(nnt::gfx::util::GpuBenchmarkPropertyHolder*)));
    NN_ASSERT(pDestinationArray != nullptr);
    pBenchmark->FillPropertyList(pDestinationArray, propertyCount);

    const int bufferSize = 256;
    char sBuffer[bufferSize];
    int bufferIndex = 0;

    snprintf(sBuffer, bufferSize, "Benchmark: %s\n", pBenchmark->GetName());
    if (selectedPropertyIndex != g_DisableDrawSelection)
        PrintWithSelectionMarker(pPrinfFunc, pPrintContext, selectedPropertyIndex == -1, sBuffer);
    else
        pPrinfFunc(pPrintContext, sBuffer);

    for (int i = 0; i < static_cast<int>(propertyCount); ++i)
    {
        const nnt::gfx::util::GpuBenchmarkPropertyHolder* pPropertyHolder = pDestinationArray[i];

        const char* propertyName = pPropertyHolder->GetName();

        nnt::gfx::util::PropertyType type = pPropertyHolder->GetType();
        NN_ASSERT(type < nnt::gfx::util::PropertyType_Max);
        const char* typeName = GetPropertyTypeName(type);

        bufferIndex = 0;
        bufferIndex += snprintf(sBuffer + bufferIndex, bufferSize - bufferIndex, "%s (%s): ", propertyName, typeName);

        switch (type)
        {
        case nnt::gfx::util::PropertyType_IntegerRange:
            {
                bufferIndex += snprintf(sBuffer + bufferIndex, bufferSize - bufferIndex, "%d", pPropertyHolder->Get());
            }
            break;

        case nnt::gfx::util::PropertyType_Enumeration:
            {
                int value = pPropertyHolder->Get();
                const char* elementName = pPropertyHolder->ToEnum()->GetElementNameAt(value);
                bufferIndex += snprintf(sBuffer + bufferIndex, bufferSize - bufferIndex, "%s", elementName);
            }
            break;

        case nnt::gfx::util::PropertyType_Invalid:
        default:
            {
                NN_UNEXPECTED_DEFAULT;
            }
            break;
        }

        NN_ASSERT(bufferIndex < (bufferSize - 1));
        sBuffer[bufferIndex] = '\n';
        sBuffer[bufferIndex + 1] = '\0';

        if (selectedPropertyIndex != g_DisableDrawSelection)
            PrintWithSelectionMarker(pPrinfFunc, pPrintContext, selectedPropertyIndex == i, sBuffer);
        else
            pPrinfFunc(pPrintContext, sBuffer);
    }
}

void LogBenchmarkInformation(const nnt::gfx::util::GpuBenchmark* pBenchmark)
{
    PrintBenchmarkInformation(
        pBenchmark, g_DisableDrawSelection,
        &PrintToLog, nullptr);
}

void PrintWithSelectionMarker(nn::gfx::util::DebugFontTextWriter* pDebugFontWriter, bool isSelected, const char* message)
{
    PrintWithSelectionMarker(&PrintToDebugFont, pDebugFontWriter, isSelected, message);
}

void PrintBenchmarkInformation(
    nn::gfx::util::DebugFontTextWriter* pDebugFontWriter,
    const nnt::gfx::util::GpuBenchmark* pBenchmark, int selectedProperyIndex)
{
    PrintBenchmarkInformation(
        pBenchmark, selectedProperyIndex,
        &PrintToDebugFont, pDebugFontWriter);
}

void RecordGpuBenchmarkCommandList(
    ApplicationTestData* pTestData,
    nnt::gfx::util::GpuBenchmark* pBenchmark,
    int warmUpCount, int runCount)
{
    nnt::gfx::util::RecordGpuBenchmarkCommandList(
        pBenchmark, &pTestData->runtimeGfxObjects,
        warmUpCount, runCount);
}


void RunGpuBenchmarkCommandList(ApplicationTestData* pTestData)
{
    nnt::gfx::util::RunGpuBenchmarkCommandList(
        &pTestData->previousRunCpuTimeElapsed, &pTestData->previousRunGpuTimeElapsed,
        &pTestData->benchmarkQueue, &pTestData->runtimeGfxObjects);

    AddResultToHistory(pTestData, pTestData->previousRunCpuTimeElapsed, pTestData->previousRunGpuTimeElapsed);

    pTestData->platformMeasurementTracker.Update();
}

bool RunGpuBenchmarkForDuration(
    nn::TimeSpan* pOutTotalTestDuration,
    int* pOutTotalRunCount,
    ApplicationTestData* pTestData,
    nnt::gfx::util::GpuBenchmark* pBenchmark,
    int measureRunCount, int warmUpCount,
    nn::TimeSpan targetDuration)
{
    RecordAndRunGpuBenchmarkCommandList(pTestData, pBenchmark, warmUpCount, measureRunCount);

    nn::TimeSpan measureDuration = pTestData->previousRunGpuTimeElapsed;
    int totalRunCount = static_cast<int>((targetDuration.GetNanoSeconds() * static_cast<int64_t>(measureRunCount)) / measureDuration.GetNanoSeconds());
    if (totalRunCount == 0)
    {
        totalRunCount = 1;
    }
    nn::TimeSpan testDuration = nn::TimeSpan(0);

    const int maxRunCount = 2048;
    int runCount = totalRunCount;
    while (runCount > maxRunCount)
    {
        RecordAndRunGpuBenchmarkCommandList(pTestData, pBenchmark, warmUpCount, maxRunCount);
        testDuration += pTestData->previousRunGpuTimeElapsed;
        runCount -= maxRunCount;
    }
    RecordAndRunGpuBenchmarkCommandList(pTestData, pBenchmark, warmUpCount, runCount);
    testDuration += pTestData->previousRunGpuTimeElapsed;

    *pOutTotalTestDuration = testDuration;
    *pOutTotalRunCount = totalRunCount;

    float targetDurationAsFloat = static_cast<float>(targetDuration.GetMicroSeconds());
    float totalRunCountASFloat = static_cast<float>(totalRunCount);
    float testDurationAsFloat = static_cast<float>(testDuration.GetMicroSeconds());
    float averageRunDuration = targetDurationAsFloat / totalRunCountASFloat;
    float diffDuration = fabsf(targetDurationAsFloat - testDurationAsFloat);
    if ((diffDuration > (targetDurationAsFloat * 0.05))
        && (diffDuration > averageRunDuration))
    {
        NN_LOG("Difference between measured (%lld us) and expected (%lld us) time is large (runCount:%d)\n",
            testDuration.GetMicroSeconds(), targetDuration.GetMicroSeconds(), totalRunCount);
        return false;
    }

    return true;
}

void RunGpuBenchmarkForDurationWithRetry(
    nn::TimeSpan* pOutTotalTestDuration, int* pOutTotalRunCount,
    ApplicationTestData* pTestData,
    nnt::gfx::util::GpuBenchmark* pBenchmark,
    int warmUpCount, nn::TimeSpan targetDuration, int retryCount)
{
    int retryIndex = 0;
    int measureRunCount = 4;
    while (retryIndex < retryCount)
    {
        if (RunGpuBenchmarkForDuration(
            pOutTotalTestDuration, pOutTotalRunCount,
            pTestData, pBenchmark,
            measureRunCount, warmUpCount, targetDuration))
        {
            break;
        }

        measureRunCount += 4;
        retryIndex++;
    }

    if (retryIndex > 0)
    {
        NN_LOG(
            "RunGpuBenchmarkForDurationWithRetry: measured:%lldns expected:%lldns runCount:%d retryCount:%d\n",
            pOutTotalTestDuration->GetMicroSeconds(), targetDuration.GetMicroSeconds(),
            *pOutTotalRunCount, retryIndex);
        LogBenchmarkInformation(pBenchmark);
    }
}


void BeginFrame(ApplicationTestData* pTestData)
{
    nns::gfx::GraphicsFramework* pGfw =
        reinterpret_cast<nns::gfx::GraphicsFramework*>(pTestData->pHostContext);

    int frameIndex = 0;
    int bufferCount = pGfw->GetBufferCount();
    int currentBufferIndex = frameIndex % bufferCount;

    pTestData->benchmarkQueue.SetSemaphore(&pTestData->benchmarkQueueCompletionSemaphore);
    pTestData->benchmarkQueue.Flush();
    pGfw->GetQueue()->SyncSemaphore(&pTestData->benchmarkQueueCompletionSemaphore);

    nn::gfx::SyncResult frameSyncResult;

    pGfw->AcquireTexture(currentBufferIndex);
    pGfw->QueueWaitSync(currentBufferIndex);
    frameSyncResult = pGfw->WaitDisplaySync(currentBufferIndex, nn::TimeSpan::FromSeconds(2));
    NN_ASSERT(frameSyncResult == nn::gfx::SyncResult_Success);

    pGfw->BeginFrame(currentBufferIndex);

    nn::gfx::CommandBuffer* pRootCommandBuffer = pGfw->GetRootCommandBuffer(currentBufferIndex);
    nn::gfx::ColorTargetView* pScreenTarget = pGfw->GetColorTargetView();

    pRootCommandBuffer->InvalidateMemory(
        nn::gfx::GpuAccess_ShaderCode | nn::gfx::GpuAccess_Descriptor |
        nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_VertexBuffer |
        nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_ColorBuffer);

    pRootCommandBuffer->SetDescriptorPool(
        pGfw->GetDescriptorPool(nn::gfx::DescriptorPoolType_BufferView));
    pRootCommandBuffer->SetDescriptorPool(
        pGfw->GetDescriptorPool(nn::gfx::DescriptorPoolType_TextureView));
    pRootCommandBuffer->SetDescriptorPool(
        pGfw->GetDescriptorPool(nn::gfx::DescriptorPoolType_Sampler));

    pRootCommandBuffer->ClearColor(pScreenTarget, 0.0f, 0.0f, 0.0f, 1.0f, nullptr);
    pRootCommandBuffer->SetRenderTargets(1, &pScreenTarget, nullptr);
    pRootCommandBuffer->SetViewportScissorState(pGfw->GetViewportScissorState());

    pTestData->m_pPrimitiveRenderer->Update(currentBufferIndex);
    pTestData->m_pPrimitiveRenderer->SetDefaultParameters();
}

void EndFrame(ApplicationTestData* pTestData, nnt::gfx::util::GpuBenchmark* pGpuBenchmark)
{
    nns::gfx::GraphicsFramework* pGfw =
        reinterpret_cast<nns::gfx::GraphicsFramework*>(pTestData->pHostContext);

    int frameIndex = 0;
    int bufferCount = pGfw->GetBufferCount();
    int currentBufferIndex = frameIndex % bufferCount;

    nn::gfx::CommandBuffer* pRootCommandBuffer = pGfw->GetRootCommandBuffer(currentBufferIndex);

    pTestData->debugFontWriter.Draw(pRootCommandBuffer);

    if ((pGpuBenchmark != nullptr) &&
        (pGpuBenchmark->GetGfxObjectsInitialized()))
    {
        RenderGpuBenchmarkDebug(pGpuBenchmark, &pTestData->runtimeGfxObjects, pRootCommandBuffer);
    }

    pGfw->EndFrame(currentBufferIndex);
    pGfw->ExecuteCommand(currentBufferIndex);

    pGfw->QueuePresentTexture(1);

    nn::gfx::SyncResult frameSyncResult =
        pGfw->WaitGpuSync(currentBufferIndex, nn::TimeSpan::FromSeconds(2));
    NN_ASSERT(frameSyncResult == nn::gfx::SyncResult_Success);

    pTestData->frameCounter++;
}

nn::gfx::CommandBuffer* GetRootCommandBuffer(ApplicationTestData* pTestData)
{
    nns::gfx::GraphicsFramework* pGfw =
        reinterpret_cast<nns::gfx::GraphicsFramework*>(pTestData->pHostContext);
    int frameIndex = 0;
    int bufferCount = pGfw->GetBufferCount();
    int currentBufferIndex = frameIndex % bufferCount;

    return pGfw->GetRootCommandBuffer(currentBufferIndex);
}

void DrawFrame(ApplicationTestData* pTestData, nnt::gfx::util::GpuBenchmark* pGpuBenchmark)
{
    BeginFrame(pTestData);
    EndFrame(pTestData, pGpuBenchmark);
}

void CreateTestCaseData(
    ApplicationTestData* pTestData, nnt::gfx::util::GpuBenchmark* pGpuBenchmark,
    int warmUpCount, int testRunCount,
    nnt::gfx::util::json::Document* pJsonDocument, const char* pTestId)
{
    ResetHistory(pTestData);

    pTestData->benchmarkQueue.Flush();
    pTestData->benchmarkQueue.Sync();

    RecordGpuBenchmarkCommandList(
        pTestData, pGpuBenchmark,
        warmUpCount, testRunCount);

    int frameWarmUpCount = 128;
    int frameCount = ApplicationTestData::BenchmarkResultHistoryLength + frameWarmUpCount;
    for (int frameIndex = 0; frameIndex < frameCount; ++frameIndex)
    {
        RunGpuBenchmarkCommandList(pTestData);

        pTestData->debugFontWriter.SetCursor(0, 0);
        pTestData->debugFontWriter.Print("Creating test case data: %d / %d\n", frameIndex, frameCount);
        PrintBenchmarkInformation(&pTestData->debugFontWriter, pGpuBenchmark, g_DisableDrawSelection);
        DrawFrame(pTestData, pGpuBenchmark);
    }

    ApplicationGpuBenchmarkResult resultMean;
    ApplicationGpuBenchmarkResult resultStandardDeviation;
    ComputeStandardDeviationFromHistory(&resultMean, &resultStandardDeviation, pTestData);

    nnt::gfx::util::json::BenchmarkTestResult benchmarkTestResult;
    benchmarkTestResult.cpuTimeAverage = resultMean.byName.cpuTime;
    benchmarkTestResult.gpuTimeAverage = resultMean.byName.gpuTime;
    benchmarkTestResult.cpuTimeStandardDeviation = resultStandardDeviation.byName.cpuTime;
    benchmarkTestResult.gpuTimeStandardDeviation = resultStandardDeviation.byName.gpuTime;

    typedef nn::crypto::Sha1Generator HashGenerator;
    uint8_t hashContent[HashGenerator::HashSize] = {};
    pGpuBenchmark->HashResultBuffer<HashGenerator>(hashContent, HashGenerator::HashSize);

    NN_LOG("Test data for benchmark %s\n", pGpuBenchmark->GetName());
    NN_LOG("cpu mean %8ld standard_deviation:%8d\n",
        benchmarkTestResult.cpuTimeAverage, benchmarkTestResult.cpuTimeStandardDeviation);
    NN_LOG("gpu mean %8ld standard_deviation:%8d\n",
        benchmarkTestResult.gpuTimeAverage, benchmarkTestResult.gpuTimeStandardDeviation);

    nnt::gfx::util::json::CreateOrUpdateBenchmarkTestCase(
        pJsonDocument, pGpuBenchmark, &benchmarkTestResult,
        hashContent, HashGenerator::HashSize,
        warmUpCount, testRunCount, pTestId);
}

bool ValidateTestCaseData(ApplicationTestData* pTestData, nnt::gfx::util::json::TestCaseIterator* pTestCaseIterator)
{
    nnt::gfx::util::json::BenchmarkTestResult expectedResult;
    ApplicationGpuBenchmarkResult expectedResultAverage;
    ApplicationGpuBenchmarkResult expectedResultStandardDeviation;

    nnt::gfx::util::json::GetTestCaseExpectedResult(pTestCaseIterator, &expectedResult);

    expectedResultAverage.byName.cpuTime = expectedResult.cpuTimeAverage;
    expectedResultAverage.byName.gpuTime = expectedResult.gpuTimeAverage;
    expectedResultStandardDeviation.byName.cpuTime = expectedResult.cpuTimeStandardDeviation;
    expectedResultStandardDeviation.byName.gpuTime = expectedResult.gpuTimeStandardDeviation;

    int warmUpCount = nnt::gfx::util::json::GetTestCaseWarmUpCount(pTestCaseIterator);
    int repeatCount = nnt::gfx::util::json::GetTestCaseRepeatCount(pTestCaseIterator);

    nnt::gfx::util::GpuBenchmark* pGpuBenchmark =
        InitializeBenchmarkFromTestCase(pTestData, pTestCaseIterator);

    ResetHistory(pTestData);

    RecordGpuBenchmarkCommandList(
        pTestData, pGpuBenchmark,
        warmUpCount, repeatCount);

    ApplicationGpuBenchmarkValidation resultValidation;
    resultValidation.Clear();

    const double failThreshold = 10;
    const int factor = 3;

    const int frameWarmUpCount = 128;
    int frameCount = ApplicationTestData::BenchmarkResultHistoryLength + frameWarmUpCount;
    for (int frameIndex = 0; frameIndex < frameCount; ++frameIndex)
    {
        RunGpuBenchmarkCommandList(pTestData);

        const ApplicationGpuBenchmarkResult* pResult = GetPreviousResult(pTestData);
        UpdateResultValidationStandardDeviation(
            &resultValidation, pResult,
            &expectedResultAverage, &expectedResultStandardDeviation, factor);

        pTestData->debugFontWriter.SetCursor(0, 0);
        pTestData->debugFontWriter.Print("Validating test case data: %d / %d\n", frameIndex, frameCount);
        PrintBenchmarkInformation(&pTestData->debugFontWriter, pGpuBenchmark, g_DisableDrawSelection);
        DrawFrame(pTestData, pGpuBenchmark);
    }

    typedef nn::crypto::Sha1Generator HashGenerator;
    uint8_t testHashContent[HashGenerator::HashSize] = {};
    pGpuBenchmark->HashResultBuffer<HashGenerator>(testHashContent, HashGenerator::HashSize);

    uint8_t referenceHashContent[HashGenerator::HashSize] = {};
    int referenceHashSize =
        nnt::gfx::util::json::GetTestCaseOutputBufferHash(pTestCaseIterator, referenceHashContent, HashGenerator::HashSize);

    bool hashCompareResult =
        (referenceHashSize == HashGenerator::HashSize)
        && (memcmp(testHashContent, referenceHashContent, HashGenerator::HashSize) == 0);

    NN_LOG("Ouput hash compare result: %s\n", hashCompareResult ? "Passed" : "Failed");

    bool testResult  =
        hashCompareResult
        && (resultValidation.GetCpuMissPercentage() <= failThreshold)
        && (resultValidation.GetGpuMissPercentage() <= failThreshold);

    const int resultDisplayBufferSize = 128;
    char resultDisplayBuffer[resultDisplayBufferSize];
    snprintf(
        resultDisplayBuffer, resultDisplayBufferSize, "%s cpu %2.2f gpu %2.2f",
        pGpuBenchmark->GetName(),
        resultValidation.GetCpuMissPercentage(),
        resultValidation.GetGpuMissPercentage());

    NN_LOG("Test result: %-50s => %s\n", resultDisplayBuffer, testResult ? "Passed" : "Failed");

    LogBenchmarkInformation(pGpuBenchmark);

    FinalizeBenchmark(pTestData, pGpuBenchmark);

    return testResult;
}

extern "C" void nnMain()
{
    nn::os::SetThreadCoreMask(nn::os::GetCurrentThread(), 0, 1);
    MountHostIO();

    ApplicationConfiguration applicationConfiguration;
    applicationConfiguration.SetDefault();
    if (!ParseCommandLine(nn::os::GetHostArgc(), nn::os::GetHostArgv(), &applicationConfiguration))
    {
        NN_LOG("Command line error\n");
        PrintCommandLineHelp();
        return;
    }

    const int ScreenWidth = 1280;
    const int ScreenHeight = 720;
    const int BufferCount = 2;


    const size_t GraphicsSystemMemorySize = 8 * 1024 * 1024;
    nns::gfx::GraphicsFramework::InitializeGraphicsSystem(GraphicsSystemMemorySize);

#if defined(NNT_GFX_UTIL_ENABLE_LOP)
    profiler::InitializeLopLibrary();
#endif

    nns::gfx::GraphicsFramework::FrameworkInfo fwInfo;
    fwInfo.SetDefault();
    fwInfo.SetDisplayWidth(ScreenWidth);
    fwInfo.SetDisplayHeight(ScreenHeight);
    fwInfo.SetBufferCount(BufferCount);
    fwInfo.SetSwapChainBufferCount(BufferCount);

    fwInfo.SetRootCommandBufferCommandMemorySize(8 * 1024 * 1024);
#if defined(NN_SDK_BUILD_DEBUG)
    fwInfo.SetDebugMode(nn::gfx::DebugMode_Full);
#else
    fwInfo.SetDebugMode(nn::gfx::DebugMode_Disable);
#endif

    fwInfo.SetDescriptorPoolSlotCount(nn::gfx::DescriptorPoolType_BufferView, 16);
    fwInfo.SetDescriptorPoolSlotCount(nn::gfx::DescriptorPoolType_TextureView, 1024);
    fwInfo.SetDescriptorPoolSlotCount(nn::gfx::DescriptorPoolType_Sampler, 1024);

    nns::gfx::GraphicsFramework gfw;
    gfw.Initialize(fwInfo);

    RunBenchmarkApplication(&gfw, &applicationConfiguration);

    gfw.Finalize();

    UnmountHostIO();
}

