﻿// NOLINT(style/copyright)
/*
 * Copyright 2014-2016 NVIDIA Corporation.  All rights reserved.
 *
 * NOTICE TO USER:
 *
 * This source code is subject to NVIDIA ownership rights under U.S. and
 * international Copyright laws.
 *
 * This software and the information contained herein is PROPRIETARY and
 * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
 * of a form of NVIDIA software license agreement.
 *
 * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
 * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
 * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
 * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
 * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
 * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
 * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
 * OR PERFORMANCE OF THIS SOURCE CODE.
 *
 * U.S. Government End Users.   This source code is a "commercial item" as
 * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
 * "commercial computer  software"  and "commercial computer software
 * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
 * and is provided to the U.S. Government only as a commercial end item.
 * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
 * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
 * source code with only those rights set forth herein.
 *
 * Any use of this source code in individual and commercial software must
 * include, in the user documentation and internal comments to the code,
 * the above Disclaimer and U.S. Government End Users Notice.
 */

#include <gfx/demo.h>
#include <nvperfapi.h>
#include <string>
#include <vector>
#include <cstdlib>
#include <cstdio>

#include "nvperfapi_profiler.h"
#include "nvperfapi_debug.h"

namespace NvPerfApiSamples {

    namespace Profiler
    {
        bool Init(NVPA_Status(*APILoadFn)(), uint8_t verbosity)
        {
            // NVPA GUIDE
            //    Initialize the perfworks library
            INSTRUMENT_NVPA(NVPA_Init());

            // NVPA GUIDE
            //    Initialize graphics API specific hooks
            INSTRUMENT_NVPA(APILoadFn());


            // NVPA GUIDE
            //    The rest is for debugging only.
            size_t numDevices = 0;
            INSTRUMENT_NVPA(NVPA_GetNumDevices(&numDevices));

            detail::Check(numDevices, "Failed to initialize NvPerf (no devices found)");

            for (uint32_t i = 0; i < numDevices; ++i)
            {
                char const* pStr = nullptr;
                INSTRUMENT_NVPA(NVPA_Device_GetName(i, &pStr));
                detail::Check(pStr, "Failed to initialize NvPerf (no device name found)");
                if (verbosity>0)
                    DEMOPrintf( "Device: %s\n", pStr );

                INSTRUMENT_NVPA(NVPA_Device_GetChipName(i, &pStr));
                detail::Check(pStr, "Failed to initialize NvPerf (no chip name found)");
                if (verbosity>0)
                    DEMOPrintf( "Chip:   %s\n", pStr );

                uint64_t sms;
                INSTRUMENT_NVPA(NVPA_Device_GetAttribute(i, NVPA_DEVICE_ATTR_NUM_SM, &sms));
                detail::Check(sms, "Failed to initialize NvPerf (device has no SMs)");
                if (verbosity>0)
                    DEMOPrintf( "SMs:    %d\n", sms );
            }

            return true;
        };

        NVPA_Config* CreateConfig(
            size_t deviceIndex,
            NVPA_ActivityKind activityKind,
            MetricSpec const* metricsFirst,
            MetricSpec const* metricsLast,
            uint8_t verbosity)
        {
            // NVPA GUIDE CONFIGURE METRICS
            //   Metric configuration has this top level design:
            //        CREATE ActivityOptions
            //        CREATE Activity
            //        CREATE MetricOptions
            //        for each metric:
            //           ENABLE METRIC
            //        CREATE Config

            // NVPA GUIDE
            //    Here we only create a configuration for the active device for the gpu context
            const char* pDeviceName = nullptr;
            INSTRUMENT_NVPA(NVPA_Device_GetName(deviceIndex, &pDeviceName));
            detail::CheckNeq((const char*)nullptr, pDeviceName, "Error returning device name!");

            // NVPA GUIDE CREATE ActivityOptions
            //    The activity defines how the collection will occur which limits what kind
            //    of data can be collected.  The options object is used to configure the
            //    creation of the activity below.
            NVPA_ActivityOptions* pActivityOptions = nullptr;
            INSTRUMENT_NVPA(NVPA_ActivityOptions_Create(&pActivityOptions));
            INSTRUMENT_NVPA(NVPA_ActivityOptions_SetActivityKind(pActivityOptions, activityKind));

            // NVPA GUIDE CREATE Activity
            //     This creates an activity by associating it with a device.  The activity
            //     determines which metrics are available for the (device, activity-kind)
            //     pair and that then can be enabled for collection.
            NVPA_Activity* pActivity = nullptr;
            INSTRUMENT_NVPA(NVPA_Activity_CreateForDevice(deviceIndex, pActivityOptions, &pActivity));

            INSTRUMENT_NVPA(NVPA_ActivityOptions_Destroy(pActivityOptions));

            // NVPA GUIDE
            //     For debugging only, this code snippet demonstrate how to query all available
            //     metrics from the activity and print them to stdout.
            if (verbosity>1)
            {
                size_t numMetrics;
                INSTRUMENT_NVPA(NVPA_Activity_GetNumMetrics(pActivity, &numMetrics));
                detail::Check(numMetrics, "Failed to initialize NvPerf (no metrics found)");

                std::vector<NVPA_MetricId> metricIds(numMetrics);
                INSTRUMENT_NVPA(NVPA_Activity_GetMetricIds(pActivity, numMetrics, &metricIds[0], nullptr));

                std::vector<char const*> metricNames(numMetrics);
                INSTRUMENT_NVPA(NVPA_GetMetricNames(numMetrics, &metricIds[0], &metricNames[0]));

                std::vector<char const*> descriptions(numMetrics);
                INSTRUMENT_NVPA(NVPA_GetMetricDescriptions(
                    numMetrics,
                    &metricIds[0],
                    &descriptions[0]));

                DEMOPrintf( "All Metrics:\n" );
                for (size_t index = 0; index < numMetrics; ++index)
                {
                    DEMOPrintf( "    %s (%s)\n", metricNames[ index ], descriptions[ index ] );
                }
            }

            // NVPA GUIDE CREATE MetricOptions
            //     The metrics options defines properties on enabled metrics.
            //
            //     Serialized - Forces synchronizations between ranges.  Draw calls within
            //                  a range will be pipelined but draw calls in different ranges
            //                  are performed serially
            //                  NOTE: the added synchronization will have performance
            //                        consequences for the application.  Serialization allows
            //                        for atomic measuring of ranges but because of the added
            //                        synchs, the results could be inaccurate.
            //     Pipelined - No synchronization between ranges.  All draw calls will be
            //                 pipelined.
            NVPA_MetricOptions* pMetricOptions;
            INSTRUMENT_NVPA(NVPA_MetricOptions_Create(&pMetricOptions));

            for (MetricSpec const* it = metricsFirst; it != metricsLast; ++it)
            {
                INSTRUMENT_NVPA(NVPA_MetricOptions_SetSerialized(pMetricOptions, it->serialized));

                // NVPA GUIDE ENABLE METRIC
                //    Perfworks internally manages all metrics through a metric id.  The id
                //    for a particular metric name must first be queried from the activity.
                //
                //    Once the metric id is determined, the metric can be enabled on the activity.
                NVPA_MetricId metricId = 0;
                INSTRUMENT_NVPA(NVPA_Activity_FindMetricByName(pActivity, it->name.c_str(), &metricId));

                NVPA_MetricEnableError metricEnableError;
                INSTRUMENT_NVPA(NVPA_Activity_EnableMetric(pActivity, metricId, pMetricOptions, &metricEnableError));
                detail::CheckEq(NVPA_METRIC_ENABLE_ERROR_NONE, metricEnableError, "Error enabling metric: " + it->name + ", reason: " + NvPerfApiSamples::detail::ToStr(metricEnableError));
            }
            // NVPA GUIDE
            //     Once the metrics are enabled, the MetricOptions object is no longer needed and can safely be freed.
            INSTRUMENT_NVPA(NVPA_MetricOptions_Destroy(pMetricOptions));

            // NVPA GUIDE
            //     For debugging only.  This code snippet demonstrates how to query all enabled metrics from an activity.
            //     This for demonstration purposes only as we don't have to query for enabled metrics here since we just
            //     enabled them above.
            if (verbosity>0)
            {
                size_t numEnabledMetrics = 0;
                INSTRUMENT_NVPA(NVPA_Activity_GetNumEnabledMetrics(pActivity, &numEnabledMetrics));

                DEMOPrintf( "Enabled Metrics:\n" );
                if (numEnabledMetrics)
                {
                    std::vector<NVPA_Bool> serialized(numEnabledMetrics);
                    INSTRUMENT_NVPA(NVPA_Activity_GetEnabledMetricsSerialized(pActivity, numEnabledMetrics, &serialized[0], nullptr));

                    std::vector<NVPA_MetricId> metricIds(numEnabledMetrics);
                    INSTRUMENT_NVPA(NVPA_Activity_GetEnabledMetricIds(pActivity, numEnabledMetrics, &metricIds[0], nullptr));

                    std::vector<const char*> metricNames(numEnabledMetrics);
                    INSTRUMENT_NVPA(NVPA_GetMetricNames(numEnabledMetrics, &metricIds[0], &metricNames[0]));

                    for (uint32_t i = 0; i < metricNames.size(); ++i)
                    {
                        DEMOPrintf( "    %s%s\n", metricNames[ i ], ( serialized[ i ] ? "" : "$" ) );
                    }
                }
                else
                {
                    DEMOPrintf( "    [None]\n" );
                }

                // NVPA GUIDE
                //   For a set of enabled metrics, this call will tell you the number of passes that would be required to
                //   successfully collect all the metric data from the gpu.  This can be used later to repeat a frame
                //   numRequiredPasses times.  See IsMetricDataReady() for an online method of determining if a frame has
                //   been repeated enough times or successful collection of all enabled metrics.
                size_t numRequiredPasses = 0;
                INSTRUMENT_NVPA(NVPA_Activity_GetNumRequiredPasses(pActivity, &numRequiredPasses));
                DEMOPrintf( "Required passes: %d\n", numRequiredPasses );
            }

            // NVPA GUIDE CREATE Config
            //    Now that we have setup the activity and enabled our interesting metrics, we
            //    create the Config that will manage metric collection.
            NVPA_Config* pConfig = nullptr;
            INSTRUMENT_NVPA(NVPA_Config_Create(pActivity, &pConfig));
            INSTRUMENT_NVPA(NVPA_Activity_Destroy(pActivity));

            return pConfig;
        }

        namespace Detail
        {
            enum class StackDataKind
            {
                Stacks,
                Ranges
            };

            inline std::string StackDataStr(NVPA_StackData const* pStackData, StackDataKind dataKind)
            {
                std::string retval;

                NVPA_Config const* pConfig = nullptr;
                INSTRUMENT_NVPA(NVPA_StackData_GetConfig(pStackData, &pConfig));
                if (!pConfig)
                {
                    return retval;
                }

                size_t numEnabledMetrics = 0;
                INSTRUMENT_NVPA(NVPA_Config_GetNumEnabledMetrics(pConfig, &numEnabledMetrics));

                std::vector<NVPA_Bool> enabledMetricSerialized(numEnabledMetrics);
                INSTRUMENT_NVPA(NVPA_Config_GetEnabledMetricsSerialized(pConfig, numEnabledMetrics, &enabledMetricSerialized[0], nullptr));

                std::vector<NVPA_MetricId> metricIds(numEnabledMetrics);
                INSTRUMENT_NVPA(NVPA_Config_GetEnabledMetricIds(pConfig, numEnabledMetrics, &metricIds[0], nullptr));

                std::vector<const char*> metricNames(numEnabledMetrics);
                INSTRUMENT_NVPA(NVPA_GetMetricNames(numEnabledMetrics, &metricIds[0], &metricNames[0]));

                size_t numStackIds = 0;
                INSTRUMENT_NVPA(NVPA_StackData_GetNumStackIds(pStackData, &numStackIds));

                std::vector<NVPA_StackId> stackIds(numStackIds);
                INSTRUMENT_NVPA(NVPA_StackData_GetStackIds(pStackData, stackIds.size(), &stackIds[0], nullptr));

                size_t maxStackSize;
                INSTRUMENT_NVPA(NVPA_StackData_GetMaxStackSize(pStackData, &maxStackSize));

                if (dataKind == StackDataKind::Ranges)
                {
                    retval += "|=RANGES==|=========|=========|=========|=========|=========|=========|=========\n";
                }
                else
                {
                    retval += "|=STACKS==|=========|=========|=========|=========|=========|=========|=========\n";
                }

                char buf[128];
                for (auto stackId : stackIds)
                {
                    std::vector<double> values(numEnabledMetrics);
                    std::vector<uint32_t> errors(numEnabledMetrics);
                    INSTRUMENT_NVPA(NVPA_StackData_GetMetricValues(pStackData, stackId, numEnabledMetrics, &values[0], &errors[0], nullptr));

                    if (dataKind == StackDataKind::Ranges)
                    {
                        sprintf(buf, "|-- rangeId  = ");
                        retval += buf;
                    }
                    else
                    {
                        sprintf(buf, "|-- rangeIds = ");
                        retval += buf;
                    }

                    std::vector<NVPA_RangeId> rangeIds(maxStackSize);
                    size_t numRanges;
                    INSTRUMENT_NVPA(NVPA_StackData_GetRangeIds(pStackData, stackId, maxStackSize, &rangeIds[0], &numRanges));
                    for (size_t rangeIdx = 0; rangeIdx < numRanges; ++rangeIdx)
                    {
                        sprintf(buf, "%u, ", (uint32_t)rangeIds[rangeIdx]);
                        retval += buf;
                    }
                    sprintf(buf, "\n");
                    retval += buf;

                    for (size_t enabledIndex = 0; enabledIndex < numEnabledMetrics; ++enabledIndex)
                    {
                        char valueStr[64] = {};
                        sprintf(valueStr, "%9.6f", values[enabledIndex]);
                        sprintf(buf, "|------ %-50s = %16s\n", metricNames[enabledIndex], valueStr);
                        retval += buf;
                    }
                }

                return retval;
            }
        }

        std::string CollectAndPrint( const NVPA_StackData* pStackData )
        {
            // NVPA GUIDE QUERY METRICS
            //   Perfworks writes metrics into a NVPA_StackData object in a format that is
            //   efficient for Perfworks which doesn't necessarily match the ranges set up
            //   by the user. So there are two methods for retrieving this data.
            //   1) QUERY METRICS STACK DATA
            //        query the NVPA_StackData directly to retrieve the internal stack view
            //   2) QUERY METRICS RANGE DATA
            //        create a NVPA_StackData from a base NVPA_StackData but with a range view
            //
            //   Perfworks allows ranges to be nested, ie:
            //
            //   NVPA_Object_PushRange(100);
            //   // draw_calls_A()
            //   NVPA_Object_PushRange(200);
            //   // draw_calls_B()
            //   NVPA_Object_PopRange(); // pop 200
            //   // draw_calls_C()
            //   NVPA_Object_PopRange(); // pop 100
            //
            //   This gives the following timeline based view of the work:
            //
            //               Stack 1  |  Stack 2  | Stack 1
            //   Range 200            | --- B --- |
            //   Range 100  | -- A -- | --------- | -- C -- |
            //
            //   In this example there are 2 stacks, simply called stacks 1 and 2.
            //
            //   When serializing:
            //   Stack 1 contains the values for draw_calls_A(), draw_calls_B(), and draw_calls_C().
            //   Stack 2 contains the values for draw_calls_B().
            //   Range 100 contains the values for draw calls A + B + C.
            //   Range 200 contains the values for draw calls B.
            //   Stack 1 is Range 100.
            //   Stack 2 is Range 100, 200.
            //
            //   When pipelining:
            //   Stack 1 contains the values for draw_calls_A() and draw_calls_C().
            //   Stack 2 contains the values for draw_calls_B().
            //   Range 100 contains the values for draw calls A + B + C.
            //   Range 200 contains the values for draw calls B.
            //   Stack 1 is Range 100.
            //   Stack 2 is Range 100, 200.
            //
            //   If nested ranges are not used, then the range data view will match the original stack data view.

            // NVPA GUIDE QUERY METRICS STACK DATA
            //   Demonstrates querying the metrics as the original stack data view
            std::string retval = Detail::StackDataStr(pStackData, Detail::StackDataKind::Stacks);

            // NVPA GUIDE QUERY METRICS RANGE DATA
            //    Demonstrates how to create a range data view out of the original stack data view.
            NVPA_StackDataOptions* pStackDataOptions = nullptr;
            INSTRUMENT_NVPA(NVPA_StackDataOptions_Create(&pStackDataOptions));
            NVPA_StackData* pRangeData = nullptr;
            INSTRUMENT_NVPA(NVPA_StackData_CreateStackData(pStackData, pStackDataOptions, &pRangeData));
            INSTRUMENT_NVPA(NVPA_StackDataOptions_Destroy(pStackDataOptions));

            INSTRUMENT_NVPA(NVPA_StackData_AccumulateRangeData(pStackData, 0, nullptr, pRangeData));

            retval += Detail::StackDataStr(pRangeData, Detail::StackDataKind::Ranges);
            INSTRUMENT_NVPA(NVPA_StackData_Release(pRangeData));

            return retval;
        }
    }
}
