﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <string>
#include <nn/nn_Log.h>
#include <nn/nn_Assert.h>
#include <nn/nn_Result.h>
#include <nn/fs.h>
#include <nn/util/util_Vector.h>
#include <lopProfiler/LOP_Profiler.h>
#include <nvngdSupport/TutorialUtil.h>
#ifdef USE_LOP

namespace lop
{
LOP_Profiler::LOP_Profiler() :
    m_NumRequiredPasses(0),
    m_pDevice(NULL),
    m_pQueue(NULL),
    m_pTraceArena(NULL),
    m_pComputeArena(NULL),
#ifdef _WIN32
    m_pMetricsContext(NULL),
#endif
    m_pChipName(NULL),
    m_TraceBufferSize(0),
    m_ComputeBufferSize(0)
{
}

LOP_Profiler::~LOP_Profiler()
{
    EndSession();
}

void LOP_Profiler::Initialize(NVNdevice* pDevice_, NVNqueue* pQueue_)
{
    NVPA_Status status = NVPA_InitializeHost();
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    status = NVPA_InitializeTarget();
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    status = NVPA_NVNC_LoadDriver();
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    status = NVPA_NVNC_QueueInitializeRangeCommands(pQueue_);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    m_pDevice = pDevice_;
    m_pQueue = pQueue_;
}

bool LOP_Profiler::InitializeChipDesc()
{
    // Define global variables as function-statics.  All global variables are POD types.
#if defined(__NX__)
#define NV_METRICS_CLASS nv::metrics::gm20b::AllMetrics
    NVPA_DEFINE_METRIC_DESCS(NV_GM20B_ALL_METRIC_DESCS, s_metricsGM20B);
#undef NV_METRICS_CLASS
    static nv::metrics::MetricsStorage<nv::metrics::gm20b::AllMetrics> s_metricsStorage[2];
#elif defined(_WIN32)
#define NV_METRICS_CLASS nv::metrics::gm204::AllMetrics
    NVPA_DEFINE_METRIC_DESCS(NV_GM204_ALL_METRIC_DESCS, s_metricsGM204);
#undef NV_METRICS_CLASS
    static nv::metrics::MetricsStorage<nv::metrics::gm204::AllMetrics> s_metricsStorage[2];
#else
#error "Unsupported platform."
#endif

    m_ChipDesc = {};

    size_t numDevices;
    NVPA_Status nvpaStatus = NVPA_GetDeviceCount(&numDevices);
    NN_ASSERT(nvpaStatus == NVPA_STATUS_SUCCESS && numDevices);

    const size_t deviceIndex = 0;
    const char* pDeviceName = nullptr;
    nvpaStatus = NVPA_Device_GetNames(deviceIndex, &pDeviceName, &m_pChipName);
    NN_ASSERT(nvpaStatus == NVPA_STATUS_SUCCESS);

    char* pStorageBuffer[2] = { s_metricsStorage[0].buffer, s_metricsStorage[1].buffer };
    if (false) {}
#if defined(__NX__)
    else if (!strcmp(m_pChipName, "GM20B"))
    {
        nv::metrics::ChipDescInitialize<nv::metrics::gm20b::AllMetrics>(m_ChipDesc, m_pChipName, pStorageBuffer, s_metricsGM20B, s_metricsGM20B_count);
    }
#elif defined(_WIN32)
    else if (!strcmp(m_pChipName, "GM200") || !strcmp(m_pChipName, "GM204") || !strcmp(m_pChipName, "GM206"))
    {
        nv::metrics::ChipDescInitialize<nv::metrics::gm204::AllMetrics>(m_ChipDesc, m_pChipName, pStorageBuffer, s_metricsGM204, s_metricsGM204_count);
    }
#endif
    else
    {
        // Unsupported GPU.
        NN_ASSERT(false);
        return false;
    }

    return true;
}

void LOP_Profiler::SelectAllMetrics(std::vector<MetricSpec>& selectedMetrics)
{
    selectedMetrics.clear();
    for (const nv::metrics::MetricDesc* pMetricDesc = m_ChipDesc.pMetricDescs; pMetricDesc->pName; ++pMetricDesc)
    {
        selectedMetrics.push_back(MetricSpec{ pMetricDesc, MetricPrint_ALL, true });
    }
}

bool LOP_Profiler::GenerateMetricSpecs(const char* const* ppMetricNames, int* pPrint, std::vector<MetricSpec>& metricSpecs)
{
    const nv::metrics::MetricDesc* const pFirst = m_ChipDesc.pMetricDescs;
    const nv::metrics::MetricDesc* const pLast = pFirst + m_ChipDesc.numMetricDescs;

    int print = 0;

    for (const char* const* ppMetricName = ppMetricNames; *ppMetricName; ++ppMetricName)
    {
        const nv::metrics::MetricDesc* pMetricDesc = std::lower_bound(pFirst, pLast, *ppMetricName, nv::metrics::MetricDescComparator());

        if (pMetricDesc == pLast || strcmp(pMetricDesc->pName, *ppMetricName))
        {
            return false;
        }

        NN_ASSERT(pMetricDesc);

        int printFormat = pPrint ? pPrint[print++] : MetricPrint_ALL;

        const MetricSpec metricSpec = { pMetricDesc, printFormat, true };
        metricSpecs.push_back(metricSpec);
    }
    return true;
}

bool LOP_Profiler::GenerateLoadedMetricSpecs(std::set<std::string> metricNames, std::vector<int> printType, std::vector<MetricSpec>& metricSpecs)
{
    const nv::metrics::MetricDesc* const pFirst = m_ChipDesc.pMetricDescs;
    const nv::metrics::MetricDesc* const pLast = pFirst + m_ChipDesc.numMetricDescs;

    int print = 0;

    for (const std::string& metricName : metricNames)
    {
        const nv::metrics::MetricDesc* pMetricDesc = std::lower_bound(pFirst, pLast, metricName.c_str(), nv::metrics::MetricDescComparator());

        if (pMetricDesc == pLast || strcmp(pMetricDesc->pName, metricName.c_str()))
        {
            return false;
        }

        NN_ASSERT(pMetricDesc);

        int printFormat = printType.size() ? printType[print++] : MetricPrint_ALL;

        const MetricSpec metricSpec = { pMetricDesc, printFormat, true };
        metricSpecs.push_back(metricSpec);
    }
    return true;
}

bool LOP_Profiler::PrepareOfflineData(std::vector<uint8_t>& configImage, std::vector<MetricSpec>& selectedMetrics)
{
    /////////////////////////////////
    // Initialize RawMetricsConfig //
    /////////////////////////////////
    NVPA_RawMetricsConfigOptions metricsConfigOptions = { NVPA_RAW_METRICS_CONFIG_OPTIONS_STRUCT_SIZE };
    metricsConfigOptions.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
    metricsConfigOptions.pChipName = m_ChipDesc.pChipName;

    NVPA_RawMetricsConfig* pRawMetricsConfig;
    NVPA_Status nvpaStatus = NVPA_RawMetricsConfig_Create(&metricsConfigOptions, &pRawMetricsConfig);
    NN_ASSERT(nvpaStatus == NVPA_STATUS_SUCCESS);

    ///////////////////////////////////
    // Initialize CounterDataBuilder //
    ///////////////////////////////////
    NVPA_CounterDataBuilderOptions counterDataBuilderOptions = { NVPA_COUNTER_DATA_BUILDER_OPTIONS_STRUCT_SIZE };
    counterDataBuilderOptions.pChipName = m_ChipDesc.pChipName;

    NVPA_CounterDataBuilder* pCounterDataBuilder;
    nvpaStatus = NVPA_CounterDataBuilder_Create(&counterDataBuilderOptions, &pCounterDataBuilder);
    NN_ASSERT(nvpaStatus == NVPA_STATUS_SUCCESS);

    /////////////////////////////////////////////
    // Initialize raw metrics to be configured //
    /////////////////////////////////////////////
    nv::metrics::ChipDescResetContexts(m_ChipDesc, nan(""));
    for (NVPA_Bool isolated = 0; isolated < 2; ++isolated)
    {
        m_ChipDesc.pRawMetricsContexts[isolated]->configuring = true;
    }

    /////////////////
    // Add Metrics //
    /////////////////
    for (size_t ii = 0; ii < selectedMetrics.size(); ++ii)
    {
        const MetricSpec& metricSpec = selectedMetrics[ii];
        metricSpec.pMetricDesc->metricEvalFn(m_ChipDesc.pRawMetricsContexts[metricSpec.isolated]);
    }

    //////////////////////////
    // Resolve Dependencies //
    //////////////////////////
    ResolveDependencies(pRawMetricsConfig, pCounterDataBuilder);

    /////////////////////////
    // Create Config Image //
    /////////////////////////
    CreateConfigImage(configImage, pRawMetricsConfig);

    ////////////////////////////////
    // Create Counter Prefix Data //
    ////////////////////////////////
    CreateCounterDataPrefix(pCounterDataBuilder);

    return true;
}

bool LOP_Profiler::PrepareLoadedOfflineData(std::vector<uint8_t>& counterDataPrefixImage, std::vector<MetricSpec>& selectedMetrics)
{
    /////////////////////////////////////////////
    // Initialize raw metrics to be configured //
    /////////////////////////////////////////////
    nv::metrics::ChipDescResetContexts(m_ChipDesc, nan(""));
    for (NVPA_Bool isolated = 0; isolated < 2; ++isolated)
    {
        m_ChipDesc.pRawMetricsContexts[isolated]->configuring = true;
    }

    /////////////////
    // Add Metrics //
    /////////////////
    for (size_t ii = 0; ii < selectedMetrics.size(); ++ii)
    {
        const MetricSpec& metricSpec = selectedMetrics[ii];
        metricSpec.pMetricDesc->metricEvalFn(m_ChipDesc.pRawMetricsContexts[metricSpec.isolated]);
    }

    ////////////////////////////
    //// Resolve Dependencies //
    ////////////////////////////
    m_CounterDataPrefix = counterDataPrefixImage;

    ProfilerInitializeCounterData();

    return true;
}

bool LOP_Profiler::ResolveDependencies(NVPA_RawMetricsConfig* pRawMetricsConfig, NVPA_CounterDataBuilder* pCounterBuilder)
{
    NVPA_RawMetricsPassGroupOptions rawMetricsPassGroupOptions = { NVPA_RAW_METRICS_PASS_GROUP_OPTIONS_STRUCT_SIZE };
    NVPA_Status nvpaStatus = NVPA_RawMetricsConfig_BeginPassGroup(pRawMetricsConfig, &rawMetricsPassGroupOptions);
    NN_ASSERT(nvpaStatus == NVPA_STATUS_SUCCESS);

    for (NVPA_Bool isolated = 0; isolated < 2; ++isolated)
    {
        for (uint32_t rawCounterIdx = 0; rawCounterIdx < m_ChipDesc.numRawMetricIds; ++rawCounterIdx)
        {
            const uint16_t isRawCounterNeeded = m_ChipDesc.pRawMetricsContexts[isolated]->pCounts[rawCounterIdx];
            if (!isRawCounterNeeded)
            {
                continue;
            }

            NVPA_RawMetricRequest metricRequest = { NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE };
            metricRequest.pMetricName = m_ChipDesc.ppRawMetricNames[rawCounterIdx];
            metricRequest.isolated = isolated;
            metricRequest.keepInstances = true;// keepInstances;

            nvpaStatus = NVPA_RawMetricsConfig_AddMetrics(pRawMetricsConfig, &metricRequest, 1);
            NN_ASSERT(nvpaStatus == NVPA_STATUS_SUCCESS);
            nvpaStatus = NVPA_CounterDataBuilder_AddMetrics(pCounterBuilder, &metricRequest, 1);
            NN_ASSERT(nvpaStatus == NVPA_STATUS_SUCCESS);
        }
    }

    nvpaStatus = NVPA_RawMetricsConfig_EndPassGroup(pRawMetricsConfig);
    NN_ASSERT(nvpaStatus == NVPA_STATUS_SUCCESS);

    return true;
}

bool LOP_Profiler::CreateConfigImage(std::vector<uint8_t>& configImage, NVPA_RawMetricsConfig* pRawMetricsConfig)
{
    configImage.clear();

    NVPA_Status status = NVPA_RawMetricsConfig_GenerateConfigImage(pRawMetricsConfig);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    size_t configImageSize = 0;
    status = NVPA_RawMetricsConfig_GetConfigImage(pRawMetricsConfig, 0, nullptr, &configImageSize);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    configImage.resize(configImageSize);
    status = NVPA_RawMetricsConfig_GetConfigImage(pRawMetricsConfig, configImage.size(), &configImage[0], nullptr);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    status = NVPA_RawMetricsConfig_Destroy(pRawMetricsConfig);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    return true;
}

bool LOP_Profiler::CreateCounterDataPrefix(NVPA_CounterDataBuilder* pCounterBuilder)
{
    m_CounterDataPrefix.clear();

    size_t counterDataPrefixSize;
    NVPA_Status status = NVPA_CounterDataBuilder_GetCounterDataPrefix(pCounterBuilder, 0, nullptr, &counterDataPrefixSize);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    m_CounterDataPrefix.resize(counterDataPrefixSize);
    status = NVPA_CounterDataBuilder_GetCounterDataPrefix(pCounterBuilder, m_CounterDataPrefix.size(), &m_CounterDataPrefix[0], nullptr);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    status = NVPA_CounterDataBuilder_Destroy(pCounterBuilder);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);
    return true;
}

bool LOP_Profiler::BeginSession(std::vector<uint8_t>& configImage)
{
    ProfilerInitializeCounterData();

    const size_t maxRangesPerFrame = 10;
    const size_t maxDispatchesPerFrame = 1;
    const size_t numNestingLevels = 1;

    const size_t avgDynamicStringLength = 0;    // we only use the static string API in this test
    const size_t numTraceBuffers = 4;
    if (!m_TraceBufferSize)
    {
        m_TraceBufferSize = NVPA_NVN_TRACE_BUFFER_PAD_SIZE + /*maxRangesPerFrame * */(/*2 **/ NVPA_NVN_TRACE_RECORD_SIZE + avgDynamicStringLength) + 4096;
    }
    const size_t traceArenaSize = numTraceBuffers * m_TraceBufferSize;
    m_pTraceArena = new MemoryPool();
    m_pTraceArena->Init(NULL, traceArenaSize, NVN_MEMORY_POOL_FLAGS_CPU_UNCACHED_BIT | NVN_MEMORY_POOL_FLAGS_GPU_CACHED_BIT, m_pDevice);

    if (!m_ComputeBufferSize)
    {
        m_ComputeBufferSize = NVPA_NVN_COMPUTE_BUFFER_PAD_SIZE + maxDispatchesPerFrame * NVPA_NVN_COMPUTE_RECORD_SIZE;
    }
    const size_t computeArenaSize = numTraceBuffers * m_ComputeBufferSize;
    m_pComputeArena = new MemoryPool();
    m_pComputeArena->Init(NULL, computeArenaSize, NVN_MEMORY_POOL_FLAGS_CPU_UNCACHED_BIT | NVN_MEMORY_POOL_FLAGS_GPU_CACHED_BIT, m_pDevice);

#if defined(_WIN32)
    // This is an upper bound for Maxwell architecture GPUs.
    const size_t numPerfmons = 100;
#elif defined(__HOS__)
    // The NX has 7 perfmon units.
    const size_t numPerfmons = 7;
#endif

    const size_t perfmonBufferMinSize = maxRangesPerFrame * numPerfmons * (2 * NVPA_NVN_PERFMON_RECORD_SIZE);
    const size_t perfmonBufferSize = Align(perfmonBufferMinSize, NVN_MEMORY_POOL_STORAGE_ALIGNMENT);
#if defined(_WIN32)
    uint8_t* pPerfmonBuffer = nullptr;
#else
    uint8_t* pPerfmonBuffer = &m_PerfmonBuffer[0];
    if (perfmonBufferSize > sizeof(m_PerfmonBuffer))
    {
        NN_LOG("FAILED: statically sized g_profiler.perfmonBuffer is too small; need %d bytes\n", (int)perfmonBufferSize);
        return false;
    }
#endif

    m_SessionOptions = NVPA_NVNC_SessionOptions();
    m_SessionOptions.structSize = NVPA_NVNC_SESSION_OPTIONS_STRUCT_SIZE;
    m_SessionOptions.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
    m_SessionOptions.pConfig = &configImage[0];
    m_SessionOptions.configSize = configImage.size();
    m_SessionOptions.numTraceBuffers = numTraceBuffers;

    // trace buffer
    m_SessionOptions.traceBufferSize = m_TraceBufferSize;
    m_SessionOptions.pTraceArena = (uint8_t*)nvnMemoryPoolMap(m_pTraceArena->GetMemoryPool());
    m_SessionOptions.traceArenaGpuAddress = nvnMemoryPoolGetBufferAddress(m_pTraceArena->GetMemoryPool());
    m_SessionOptions.pTraceArenaMemoryPool = m_pTraceArena->GetMemoryPool();

    // compute buffer
    m_SessionOptions.computeBufferSize = m_ComputeBufferSize;
    m_SessionOptions.pComputeArena = (uint8_t*)nvnMemoryPoolMap(m_pComputeArena->GetMemoryPool());
    m_SessionOptions.computeArenaGpuAddress = nvnMemoryPoolGetBufferAddress(m_pComputeArena->GetMemoryPool());
    m_SessionOptions.pComputeArenaMemoryPool = m_pComputeArena->GetMemoryPool();

    m_SessionOptions.pPerfmonBuffer = pPerfmonBuffer;
    m_SessionOptions.perfmonBufferSize = perfmonBufferSize;
    m_SessionOptions.finishOnEndPass = false;
    m_SessionOptions.minNestingLevel = 1;
    m_SessionOptions.numNestingLevels = numNestingLevels;

    NVPA_Status status = NVPA_NVNC_BeginSession(m_pQueue, &m_SessionOptions);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    size_t numPipelinedPasses = 0;
    size_t numIsolatedPasses = 0;
    status = NVPA_NVNC_Config_GetNumPasses(&configImage[0], &numPipelinedPasses, &numIsolatedPasses);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    m_FrameNumber = 0;
    m_NumRequiredPasses = numPipelinedPasses + numIsolatedPasses * numNestingLevels;

    return true;
}

bool LOP_Profiler::EndSession(bool shutdown)
{
    if (!m_pQueue)
    {
        return false;
    }
    nvnQueueFinish(m_pQueue);

    NVPA_Status status = NVPA_NVNC_EndSession(m_pQueue);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    m_pComputeArena->Shutdown();
    m_pTraceArena->Shutdown();

    delete m_pComputeArena;
    delete m_pTraceArena;

    if (shutdown)
    {
        m_pQueue = nullptr;
    }
    return true;
}

bool LOP_Profiler::ResetSession(std::vector<uint8_t>& configImage)
{
    EndSession(false);
    bool status = BeginSession(configImage);
    return status;
}

bool LOP_Profiler::BeginPass()
{
    NVPA_Status status = NVPA_NVNC_BeginPass(m_pQueue);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);
    return true;
}

bool LOP_Profiler::EndPass()
{
    NVPA_Bool allPassesSubmitted;
    NVPA_Status status = NVPA_NVNC_EndPass(m_pQueue, &allPassesSubmitted);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    NVPA_NVNC_QueueDebugStats queueDebugStats = { NVPA_NVNC_QUEUE_DEBUG_STATS_STRUCT_SIZE };
    NVPA_NVNC_TraceBufferDebugStats bufferDebugStats = { NVPA_NVNC_TRACE_BUFFER_DEBUG_STATS_STRUCT_SIZE };
    status = NVPA_NVNC_GetTraceBufferDebugStats(queueDebugStats.traceBufferIndexRead, &bufferDebugStats);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    status = NVPA_NVNC_QueueGetDebugStats(m_pQueue, &queueDebugStats);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    return true;
}

bool LOP_Profiler::DecodeCounters(std::vector<uint8_t>& configImage, NVPA_Bool& allPassesDecoded)
{
    NVPA_NVNC_DecodeCountersOptions decodeOptions = {};
    decodeOptions.structSize = NVPA_NVNC_DECODE_COUNTERS_OPTIONS_STRUCT_SIZE;
    decodeOptions.pConfig = &configImage[0];
    decodeOptions.configSize = configImage.size();
    decodeOptions.pCounterDataImage = &m_CounterDataImage[0];
    decodeOptions.counterDataImageSize = m_CounterDataImage.size();
    decodeOptions.pCounterDataScratchBuffer = &m_CounterDataScratch[0];
    decodeOptions.counterDataScratchBufferSize = m_CounterDataScratch.size();

    NVPA_NVNC_DecodeCountersStats decodeStats = {};
    decodeStats.structSize = NVPA_NVNC_DECODE_COUNTERS_STATS_STRUCT_SIZE;

    NVPA_Status status = NVPA_NVNC_DecodeCounters(&decodeOptions, &decodeStats);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    size_t traceBytesDropped = decodeStats.numTraceBytesDropped;
    size_t computeBytesDropped = decodeStats.numComputeBytesDropped;

    if (traceBytesDropped || computeBytesDropped)
    {
        m_TraceBufferSize += traceBytesDropped;
        m_ComputeBufferSize += computeBytesDropped;
        ResetSession(configImage);

        allPassesDecoded = false;
    }
    else
    {
        allPassesDecoded = decodeStats.allPassesDecoded;
    }
    return true;
}

bool LOP_Profiler::UnpackRawMetrics(std::vector<MetricSpec>& selectedMetrics, bool allMetricsSelected, DebugTextRenderer* pDebugTextRenderer, const std::string& outputFormat, int linesToSkip, bool outputFile)
{
    nv::metrics::ChipDescResetContexts(m_ChipDesc, nan(""));
    std::vector<const char*> descriptions(16); // temp buffer for description string pointers

    NVPA_NVNC_UnpackRawMetricsOptions options = { NVPA_NVNC_UNPACK_RAW_METRICS_OPTIONS_STRUCT_SIZE };
    options.pCounterDataImage = &m_CounterDataImage[0];
    // options.rangeIndex is assigned in the loop
    options.numRawMetrics = m_ChipDesc.numRawMetricIds;
    options.pRawMetricIds = m_ChipDesc.pRawMetricIds;

    size_t numRanges;
    NVPA_Status status = NVPA_CounterData_GetNumRanges(options.pCounterDataImage, &numRanges);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    std::string outputText;

    for (size_t rangeIndex = 0; rangeIndex < numRanges; ++rangeIndex)
    {
        size_t numDescriptions;
        status = NVPA_CounterData_GetRangeDescriptions(options.pCounterDataImage, rangeIndex, 0, nullptr, &numDescriptions);
        NN_ASSERT(status == NVPA_STATUS_SUCCESS);

        if (descriptions.size() < numDescriptions)
        {
            const size_t numNeeded = numDescriptions - descriptions.size();
            descriptions.insert(descriptions.end(), numNeeded, nullptr);
        }
        status = NVPA_CounterData_GetRangeDescriptions(options.pCounterDataImage, rangeIndex, descriptions.size(), &descriptions[0], &numDescriptions);
        NN_ASSERT(status == NVPA_STATUS_SUCCESS);

        for (int isolated = 0; isolated < 2; ++isolated)
        {
            auto& pRawMetrics = m_ChipDesc.pRawMetricsContexts[isolated];
            options.rangeIndex = rangeIndex;
            options.isolated = static_cast<NVPA_Bool>(isolated);
            options.pRawMetricValues = pRawMetrics->pValues;
            options.pHwUnitCounts = pRawMetrics->pCounts;
            status = NVPA_NVNC_CounterData_UnpackRawMetrics(&options);
            NN_ASSERT(status == NVPA_STATUS_SUCCESS);
        }

        if (allMetricsSelected)
        {
            for (size_t ii = 0; ii < selectedMetrics.size(); ++ii)
            {
                const MetricSpec& metricSpec = selectedMetrics[ii];
                const nv::metrics::MetricValue metricValue = metricSpec.pMetricDesc->metricEvalFn(m_ChipDesc.pRawMetricsContexts[metricSpec.isolated]);

                const std::string metricName = lop::ssprintf("%s%s", metricSpec.pMetricDesc->pName, metricSpec.isolated ? "$" : "&");

                const std::string sumName = metricName + ".sum";
                outputText += lop::ssprintf("    %-90s = %16.2f\n", sumName.c_str(), metricValue.sum);

                const std::string avgName = metricName + ".avg";
                outputText += lop::ssprintf("    %-90s = %16.2f\n", avgName.c_str(), metricValue.avg);

                const std::string peakName = metricName + ".peak_sustained";
                outputText += lop::ssprintf("    %-90s = %16.2f\n", peakName.c_str(), metricValue.peak_sustained);

                const std::string cyclesName = metricName + ".cycles_elapsed";
                outputText += lop::ssprintf("    %-90s = %16.2f\n", cyclesName.c_str(), metricValue.cycles_elapsed);

                const std::string sumPerCycleName = metricName + ".sum_per_cycle_elapsed";
                outputText += lop::ssprintf("    %-90s = %16.2f\n", sumPerCycleName.c_str(), metricValue.sum_per_cycle_elapsed());

                const std::string avgPerCycleName = metricName + ".avg_per_cycle_elapsed";
                outputText += lop::ssprintf("    %-90s = %16.2f\n", avgPerCycleName.c_str(), metricValue.avg_per_cycle_elapsed());

                const std::string pctName = metricName + ".pct_of_peak_sustained_elapsed";
                outputText += lop::ssprintf("    %-90s = %16.2f\n", pctName.c_str(), metricValue.pct_of_peak_sustained_elapsed());
            }
        }
        else
        {
            const nv::metrics::MetricDesc* pFirst = m_ChipDesc.pMetricDescs;
            const nv::metrics::MetricDesc* pLast = pFirst + m_ChipDesc.numMetricDescs;

            int row = linesToSkip;

            nn::util::Vector4f color[] = { { 1.0f, 1.0f, 1.0f, 1.0f }, { 0.85f, 0.85f, 0.85f, 1.0f } };

            for (size_t ii = 0; ii < selectedMetrics.size(); ++ii)
            {
                const MetricSpec& metricSpec = selectedMetrics[ii];

                auto* pMetricDesc = std::lower_bound(pFirst, pLast, metricSpec.pMetricDesc->pName, nv::metrics::MetricDescComparator());
                if (pMetricDesc == pLast || strcmp(pMetricDesc->pName, metricSpec.pMetricDesc->pName))
                {
                    NN_ASSERT(false, "Metric not found\n");
                }
                else
                {
                    const nv::metrics::MetricValue metricValue = pMetricDesc->metricEvalFn(m_ChipDesc.pRawMetricsContexts[metricSpec.isolated]);
                    const std::string metricName = metricSpec.pMetricDesc->pName;

                    int printCounter = MetricPrint_Sum;

                    // Sum
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + "/S";
                        nn::util::Vector4f& c = color[row % 2];
                        pDebugTextRenderer->SetColor(c.GetX(), c.GetY(), c.GetZ(), c.GetW());
                        pDebugTextRenderer->Printf(0, static_cast<float>(row++), outputFormat.c_str(), name.c_str(), metricValue.sum);
                    }
                    printCounter <<= 1;

                    // Avg
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + "/A";
                        nn::util::Vector4f& c = color[row % 2];
                        pDebugTextRenderer->SetColor(c.GetX(), c.GetY(), c.GetZ(), c.GetW());
                        pDebugTextRenderer->Printf(0, static_cast<float>(row++), outputFormat.c_str(), name.c_str(), metricValue.avg);
                    }
                    printCounter <<= 1;

                    // Avg peak sustained
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + "/Aps";
                        nn::util::Vector4f& c = color[row % 2];
                        pDebugTextRenderer->SetColor(c.GetX(), c.GetY(), c.GetZ(), c.GetW());
                        pDebugTextRenderer->Printf(0, static_cast<float>(row++), outputFormat.c_str(), name.c_str(), metricValue.peak_sustained);
                    }
                    printCounter <<= 1;

                    // Avg cycles elapsed
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + "/Ce";
                        nn::util::Vector4f& c = color[row % 2];
                        pDebugTextRenderer->SetColor(c.GetX(), c.GetY(), c.GetZ(), c.GetW());
                        pDebugTextRenderer->Printf(0, static_cast<float>(row++), outputFormat.c_str(), name.c_str(), metricValue.cycles_elapsed);
                    }
                    printCounter <<= 1;

                    // Sum per cycle elapsed
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + "/Sce";
                        nn::util::Vector4f& c = color[row % 2];
                        pDebugTextRenderer->SetColor(c.GetX(), c.GetY(), c.GetZ(), c.GetW());
                        pDebugTextRenderer->Printf(0, static_cast<float>(row++), outputFormat.c_str(), name.c_str(), metricValue.sum_per_cycle_elapsed());
                    }
                    printCounter <<= 1;

                    // Avg per cycle elapsed
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + "/Ace";
                        nn::util::Vector4f& c = color[row % 2];
                        pDebugTextRenderer->SetColor(c.GetX(), c.GetY(), c.GetZ(), c.GetW());
                        pDebugTextRenderer->Printf(0, static_cast<float>(row++), outputFormat.c_str(), name.c_str(), metricValue.avg_per_cycle_elapsed());
                    }
                    printCounter <<= 1;

                    // Pct of peak sustained elapsed
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + "/Ppse";
                        nn::util::Vector4f& c = color[row % 2];
                        pDebugTextRenderer->SetColor(c.GetX(), c.GetY(), c.GetZ(), c.GetW());
                        outputFormat.c_str();
                        pDebugTextRenderer->Printf(0, static_cast<float>(row++), outputFormat.c_str(), name.c_str(), metricValue.pct_of_peak_sustained_elapsed());
                    }
                    printCounter <<= 1;
                }
            }
        }
    }

    if (allMetricsSelected && outputFile)
    {
        nn::Result result = nn::fs::MountHost("host", "C:/");
        if (nn::fs::ResultPathNotFound::Includes(result))
        {
            NN_ASSERT(false, "Target directory not found.\n");
        }
        else if (nn::fs::ResultTargetNotFound::Includes(result))
        {
            NN_ASSERT(false, "Host PC not found.\n");
        }

        NN_ASSERT(result.IsSuccess());

        result = nn::fs::CreateDirectory("host:/Lop_Test_Output");

        // Save out Counter data image

#ifdef _WIN32
        std::string metricOutputName = lop::ssprintf("host:/Lop_Test_Output/metrics_%s.txt", m_pChipName);
#else
        std::string metricOutputName = "host:/Lop_Test_Output/metrics_NX.txt";
#endif
        result = nn::fs::DeleteFile(metricOutputName.c_str());
        result = nn::fs::CreateFile(metricOutputName.c_str(), outputText.length());

        nn::fs::FileHandle outputFileHandle;
        result = nn::fs::OpenFile(&outputFileHandle, metricOutputName.c_str(), nn::fs::OpenMode_Write);
        result = nn::fs::WriteFile(outputFileHandle, 0, outputText.c_str(), outputText.length(), nn::fs::WriteOption());
        result = nn::fs::FlushFile(outputFileHandle);

        nn::fs::CloseFile(outputFileHandle);

        nn::fs::Unmount("host");

        NN_LOG("-----------------------------------------------------------\n");
        NN_LOG("File has been output at C:\\Lop_Test_Output\\%s\n", metricOutputName.c_str());
        NN_LOG("-----------------------------------------------------------\n");
    }
    return true;
}//NOLINT(impl/function_size)

bool LOP_Profiler::UnpackRawMetrics_Test(std::vector<MetricSpec>& selectedMetrics, std::vector<std::pair<std::string, double> >& metricValues)
{
    nv::metrics::ChipDescResetContexts(m_ChipDesc, nan(""));
    std::vector<const char*> descriptions(16); // temp buffer for description string pointers

    NVPA_NVNC_UnpackRawMetricsOptions options = { NVPA_NVNC_UNPACK_RAW_METRICS_OPTIONS_STRUCT_SIZE };
    options.pCounterDataImage = &m_CounterDataImage[0];
    // options.rangeIndex is assigned in the loop
    options.numRawMetrics = m_ChipDesc.numRawMetricIds;
    options.pRawMetricIds = m_ChipDesc.pRawMetricIds;

    size_t numRanges;
    NVPA_Status status = NVPA_CounterData_GetNumRanges(options.pCounterDataImage, &numRanges);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    std::string outputText;

    for (size_t rangeIndex = 0; rangeIndex < numRanges; ++rangeIndex)
    {
        size_t numDescriptions;
        status = NVPA_CounterData_GetRangeDescriptions(options.pCounterDataImage, rangeIndex, 0, nullptr, &numDescriptions);
        NN_ASSERT(status == NVPA_STATUS_SUCCESS);

        if (descriptions.size() < numDescriptions)
        {
            const size_t numNeeded = numDescriptions - descriptions.size();
            descriptions.insert(descriptions.end(), numNeeded, nullptr);
        }
        status = NVPA_CounterData_GetRangeDescriptions(options.pCounterDataImage, rangeIndex, descriptions.size(), &descriptions[0], &numDescriptions);
        NN_ASSERT(status == NVPA_STATUS_SUCCESS);

        for (int isolated = 0; isolated < 2; ++isolated)
        {
            auto& pRawMetrics = m_ChipDesc.pRawMetricsContexts[isolated];
            options.rangeIndex = rangeIndex;
            options.isolated = static_cast<NVPA_Bool>(isolated);
            options.pRawMetricValues = pRawMetrics->pValues;
            options.pHwUnitCounts = pRawMetrics->pCounts;
            status = NVPA_NVNC_CounterData_UnpackRawMetrics(&options);
            NN_ASSERT(status == NVPA_STATUS_SUCCESS);
        }

        {
            const nv::metrics::MetricDesc* pFirst = m_ChipDesc.pMetricDescs;
            const nv::metrics::MetricDesc* pLast = pFirst + m_ChipDesc.numMetricDescs;

            for (size_t ii = 0; ii < selectedMetrics.size(); ++ii)
            {
                const MetricSpec& metricSpec = selectedMetrics[ii];

                auto* pMetricDesc = std::lower_bound(pFirst, pLast, metricSpec.pMetricDesc->pName, nv::metrics::MetricDescComparator());
                if (pMetricDesc == pLast || strcmp(pMetricDesc->pName, metricSpec.pMetricDesc->pName))
                {
                    NN_ASSERT(false, "Metric not found\n");
                }
                else
                {
                    const nv::metrics::MetricValue metricValue = pMetricDesc->metricEvalFn(m_ChipDesc.pRawMetricsContexts[metricSpec.isolated]);
                    const std::string metricName = metricSpec.pMetricDesc->pName; // lop::ssprintf("%s%s", metricSpec.pMetricDesc->pName, metricSpec.isolated ? "$" : "&");

                    int printCounter = MetricPrint_Sum;

                    // Sum
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + ".sum";
                        metricValues.push_back(std::pair<std::string, double>(name, metricValue.sum));
                    }
                    printCounter <<= 1;

                    // Avg
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + ".avg";
                        metricValues.push_back(std::pair<std::string, double>(name, metricValue.avg));
                    }
                    printCounter <<= 1;

                    // Avg peak sustained
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + ".peak_sustained";
                        metricValues.push_back(std::pair<std::string, double>(name, metricValue.peak_sustained));
                    }
                    printCounter <<= 1;

                    // Avg cycles elapsed
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + ".cycles_elapsed";
                        metricValues.push_back(std::pair<std::string, double>(name, metricValue.cycles_elapsed));
                    }
                    printCounter <<= 1;

                    // Sum per cycle elapsed
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + ".sum_per_cycle_elapsed";
                        metricValues.push_back(std::pair<std::string, double>(name, metricValue.sum_per_cycle_elapsed()));
                    }
                    printCounter <<= 1;

                    // Avg per cycle elapsed
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + ".avg_per_cycle_elapsed";
                        metricValues.push_back(std::pair<std::string, double>(name, metricValue.avg_per_cycle_elapsed()));
                    }
                    printCounter <<= 1;

                    // Pct of peak sustained elapsed
                    if (metricSpec.printStats & printCounter)
                    {
                        const std::string name = metricName + ".pct_of_peak_sustained_elapsed";
                        metricValues.push_back(std::pair<std::string, double>(name, metricValue.pct_of_peak_sustained_elapsed()));
                    }
                    printCounter <<= 1;
                }
            }
        }
    }

    return true;
}

bool LOP_Profiler::UnpackAllMetrics(std::vector<MetricSpec>& selectedMetrics)
{
    nv::metrics::ChipDescResetContexts(m_ChipDesc, nan(""));
    std::vector<const char*> descriptions(16); // temp buffer for description string pointers

    NVPA_NVNC_UnpackRawMetricsOptions options = { NVPA_NVNC_UNPACK_RAW_METRICS_OPTIONS_STRUCT_SIZE };
    options.pCounterDataImage = &m_CounterDataImage[0];
    // options.rangeIndex is assigned in the loop
    options.numRawMetrics = m_ChipDesc.numRawMetricIds;
    options.pRawMetricIds = m_ChipDesc.pRawMetricIds;

    size_t numRanges;
    NVPA_Status status = NVPA_CounterData_GetNumRanges(options.pCounterDataImage, &numRanges);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    std::string outputText;

    for (size_t rangeIndex = 0; rangeIndex < numRanges; ++rangeIndex)
    {
        size_t numDescriptions;
        status = NVPA_CounterData_GetRangeDescriptions(options.pCounterDataImage, rangeIndex, 0, nullptr, &numDescriptions);
        NN_ASSERT(status == NVPA_STATUS_SUCCESS);

        if (descriptions.size() < numDescriptions)
        {
            const size_t numNeeded = numDescriptions - descriptions.size();
            descriptions.insert(descriptions.end(), numNeeded, nullptr);
        }
        status = NVPA_CounterData_GetRangeDescriptions(options.pCounterDataImage, rangeIndex, descriptions.size(), &descriptions[0], &numDescriptions);
        NN_ASSERT(status == NVPA_STATUS_SUCCESS);

        for (int isolated = 0; isolated < 2; ++isolated)
        {
            auto& pRawMetrics = m_ChipDesc.pRawMetricsContexts[isolated];
            options.rangeIndex = rangeIndex;
            options.isolated = static_cast<NVPA_Bool>(isolated);
            options.pRawMetricValues = pRawMetrics->pValues;
            options.pHwUnitCounts = pRawMetrics->pCounts;
            status = NVPA_NVNC_CounterData_UnpackRawMetrics(&options);
            NN_ASSERT(status == NVPA_STATUS_SUCCESS);
        }

        for (size_t ii = 0; ii < selectedMetrics.size(); ++ii)
        {
            const MetricSpec& metricSpec = selectedMetrics[ii];
            const nv::metrics::MetricValue metricValue = metricSpec.pMetricDesc->metricEvalFn(m_ChipDesc.pRawMetricsContexts[metricSpec.isolated]);

            EXPECT_FALSE(isnan(metricValue.sum));
            EXPECT_FALSE(isnan(metricValue.avg));
            EXPECT_FALSE(isnan(metricValue.peak_sustained));
            EXPECT_FALSE(isnan(metricValue.cycles_elapsed));
            EXPECT_FALSE(isnan(metricValue.sum_per_cycle_elapsed()));
            EXPECT_FALSE(isnan(metricValue.avg_per_cycle_elapsed()));
            EXPECT_FALSE(isnan(metricValue.pct_of_peak_sustained_elapsed()));
        }
    }
    return true;
}
//NOLINT(impl/function_size)

bool LOP_Profiler::ProfilerInitializeCounterData()
{
    NVPA_NVNC_CounterDataImageOptions counterDataImageOptions = { NVPA_NVNC_COUNTER_DATA_IMAGE_OPTIONS_STRUCT_SIZE };
    counterDataImageOptions.pCounterDataPrefix = &m_CounterDataPrefix[0];
    counterDataImageOptions.counterDataPrefixSize = m_CounterDataPrefix.size();
    counterDataImageOptions.maxNumRanges = (uint32_t)16;
    counterDataImageOptions.maxNumRangeTreeNodes = (uint32_t)16;
    counterDataImageOptions.maxRangeNameLength = 64;

    size_t counterDataImageSize;
    NVPA_Status status = NVPA_NVNC_CalculateCounterDataImageSize(&counterDataImageOptions, &counterDataImageSize);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    m_CounterDataImage.resize(counterDataImageSize);
    status = NVPA_NVNC_InitializeCounterDataImage(&counterDataImageOptions, m_CounterDataImage.size(), &m_CounterDataImage[0]);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    const uint8_t* pCounterDataImage = &m_CounterDataImage[0];
    size_t counterDataScratchBufferSize;
    status = NVPA_NVNC_CalculateCounterDataScratchBufferSize(pCounterDataImage, &counterDataScratchBufferSize);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    m_CounterDataScratch.resize(counterDataScratchBufferSize);
    status = NVPA_NVNC_InitializeCounterDataScratchBuffer(pCounterDataImage, m_CounterDataScratch.size(), &m_CounterDataScratch[0]);
    NN_ASSERT(status == NVPA_STATUS_SUCCESS);

    return true;
}

bool LOP_Profiler::GenerateAllMetricRequests(const NVPA_RawMetricsConfig * pRawMetricsConfig, const std::vector<MetricSpec>& selectedMetrics, std::vector<NVPA_RawMetricRequest>& metricRequests)
{
    ChipDescResetContexts(m_ChipDesc, nan(""));
    for (uint32_t isolated = 0; isolated < 2; ++isolated)
    {
        m_ChipDesc.pRawMetricsContexts[isolated]->configuring = true;
    }

    for (size_t ii = 0; ii < selectedMetrics.size(); ++ii)
    {
        const MetricSpec& metricSpec = selectedMetrics[ii];
        metricSpec.pMetricDesc->metricEvalFn(m_ChipDesc.pRawMetricsContexts[metricSpec.isolated]);
    }

    for (uint32_t isolated = 0; isolated < 2; ++isolated)
    {
        for (uint32_t rawCounterIdx = 0; rawCounterIdx < m_ChipDesc.numRawMetricIds; ++rawCounterIdx)
        {
            const uint16_t isRawCounterNeeded = m_ChipDesc.pRawMetricsContexts[isolated]->pCounts[rawCounterIdx];
            if (isRawCounterNeeded)
            {
                const uint64_t rawMetricId = m_ChipDesc.pRawMetricIds[rawCounterIdx];
                const char* pMetricName = nullptr;
                NVPA_Status nvpaStatus = NVPA_RawMetricsConfig_GetMetricNameFromCounterId(pRawMetricsConfig, rawMetricId, &pMetricName);
                NN_ASSERT(nvpaStatus == NVPA_STATUS_SUCCESS);

                NVPA_RawMetricRequest metricRequest = { NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE };
                metricRequest.pMetricName = pMetricName;
                metricRequest.isolated = static_cast<NVPA_Bool>(isolated);
                metricRequest.keepInstances = true;
                metricRequests.push_back(metricRequest);
            }
        }
    }

    return true;
}

bool LOP_Profiler::SaveRawMetrics(const std::vector<MetricSpec>& metricSpecs)
{
    if (metricSpecs.empty())
    {
        return false;
    }

    nn::Result result = nn::fs::MountHost("host", "C:/siglo_tree/sdk/Samples/Sources/Applications/NvnTutorial03AssetFileLoading");
    if (nn::fs::ResultPathNotFound::Includes(result))
    {
        NN_ASSERT(false, "Target directory not found.\n");
    }
    else if (nn::fs::ResultTargetNotFound::Includes(result))
    {
        NN_ASSERT(false, "Host PC not found.\n");
    }

    NN_ASSERT(result.IsSuccess());

    result = nn::fs::CreateDirectory("host:/Out");

    // Save out Counter data image
    result = nn::fs::DeleteFile("host:/Out/counterImage");
    result = nn::fs::CreateFile("host:/Out/counterImage", m_CounterDataImage.size());

    nn::fs::FileHandle counterImageFile;
    result = nn::fs::OpenFile(&counterImageFile, "host:/Out/counterImage", nn::fs::OpenMode_Write);
    result = nn::fs::WriteFile(counterImageFile, 0, &m_CounterDataImage[0], m_CounterDataImage.size(), nn::fs::WriteOption());
    result = nn::fs::FlushFile(counterImageFile);

    nn::fs::CloseFile(counterImageFile);

    // Save out metric names
    result = nn::fs::DeleteFile("host:/Out/counterNames.txt");

    std::string counterNames;
    for (size_t i = 0; i < metricSpecs.size(); ++i)
    {
        //counterNames += metricSpecs[i].pName;
        counterNames += "\n";
    }

    result = nn::fs::CreateFile("host:/Out/counterNames.txt", counterNames.length());

    nn::fs::FileHandle counterNamesFile;
    result = nn::fs::OpenFile(&counterNamesFile, "host:/Out/counterNames.txt", nn::fs::OpenMode_Write);
    result = nn::fs::WriteFile(counterNamesFile, 0, counterNames.c_str(), counterNames.length(), nn::fs::WriteOption());
    result = nn::fs::FlushFile(counterNamesFile);

    nn::fs::CloseFile(counterNamesFile);

    nn::fs::Unmount("host");

    return true;
}
}
#endif
