﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <algorithm>

#include <nn/nn_Common.h>

NN_PRAGMA_PUSH_WARNINGS
#pragma GCC diagnostic ignored "-Wsign-conversion"
#include <nn/nn_Result.h>
#include <nn/nn_Version.h>
#include <nn/init.h>
#include <nn/os.h>
#include <nn/profiler/profiler_Api.h>
#include <nn/profiler/profiler_Result.h>
NN_PRAGMA_POP_WARNINGS

#include "pmu/profiler_PerfCounterGroups.h"
#include "pmu/profiler_PerfCounterThread.h"
#include "profiler_CodeRewriting.h"
#include "profiler_CommMessages.h"
#include "profiler_Comms.h"
#include "profiler_CommsIpc.h"
#include "profiler_Core.h"
#include "profiler_DataStream.h"
#include "profiler_Defines.h"
#include "profiler_HeaderWriter.h"
#include "profiler_IpcEvent.h"
#include "profiler_LibModule.h"
#include "profiler_LibPrivate.h"
#include "profiler_Logging.h"
#include "profiler_Memory.h"
#include "profiler_Messages.h"
#include "profiler_ResultPrivate.h"
#include "profiler_SamplingThread.h"
#include "profiler_SamplingOffset.h"
#include "profiler_StringTable.h"
#include "profiler_TargetApplication.h"
#include "profiler_ThreadPriorities.h"
#include "profiler_Time.h"
#include "profiler_Vsync.h"


// We assume that this is true in many places. Make sure that it is actually true.
NN_STATIC_ASSERT(sizeof(uintptr_t) <= sizeof(uint64_t));


namespace nn { namespace profiler {


void DumpSettings(SettingsFromThePcGui* settings);


//-------------------------------------------------------------------
// Internal Variables
//-------------------------------------------------------------------
namespace /*anonymous*/
{
    const int CoreStackAlignment = nn::os::ThreadStackAlignment;
    const size_t CoreStackSize = 8 * 1024;

    enum ProfileStartType : int
    {
        ProfileStartType_Error,
        ProfileStartType_Pccom,
    };

    struct Globals
    {
        SettingsFromThePcGui settings;
        SamplingOffset samplingOffset;
    };
    Globals* globals;

    struct CoreThread
    {
        nn::os::ThreadType thread;
        void* stack;
    } coreThread;


    nn::Result CheckProfilingMode(SettingsFromThePcGui* settings)
    {
        if (settings->flags & SettingsFromThePcGui::UseOutOfProcessSampling)
        {
            return TargetApplication::GetCurrent()->SetProfilingMode(ProfilingMode_OutOfProcess);
        }
        else
        {
            return TargetApplication::GetCurrent()->SetProfilingMode(ProfilingMode_InProcess);
        }
    }


    // VerifySettings
    //  Makes sure that the settings stored in settings packet are okay
    //  If not, correct them so that they are acceptable
    //  If not correctable, return false
    //  If all is good, return true
    bool VerifySettings(SettingsFromThePcGui* settings)
    {
        INFO_LOG("Initial settings:\n--\n");
        DumpSettings(settings);
        INFO_LOG("--\n");

        // Verify that the version numbers match
        // If they do not, return back an error
        if (settings->version < ProfilerMinGuiVersion)
        {
            SendNotificationToPC(
                NotificationErrorVersionMismatch,
                nn::profiler::ResultInvalidArgument());
            INFO_LOG("Incompatible plugin version\n"
                "\tSettings Version : %d\n"
                "\tExpected at least: %d\n",
                settings->version, ProfilerMinGuiVersion);
            return false;
        }

        // Fix up invalid flag settings
        // If attempting an instrumented profile, give this priority
        if (settings->flags & SettingsFromThePcGui::UseInstrumented)
        {
            settings->flags |= SettingsFromThePcGui::UseSimple;
            settings->flags &= static_cast<uint32_t>(~SettingsFromThePcGui::SampleByPerfCounter);

            auto app = TargetApplication::GetCurrent();

            // Instrument
            // Make sure that the trampoline and function to instrument are valid address
            // This means that they should be in the range of valid addresses
            if (!IsValidCodeAddress(static_cast<uintptr_t>(settings->arm_trampoline1)) ||
                !IsValidCodeAddress(static_cast<uintptr_t>(settings->func_to_instrument)) ||
                (settings->flags & SettingsFromThePcGui::UseOutOfProcessSampling &&
                    (app->IsAttachedToLibrary() == false ||
                     app->IsValidCodeAddress(settings->arm_trampoline1).IsFailure() ||
                     app->IsValidCodeAddress(settings->func_to_instrument).IsFailure())))
            {
                WARNING_LOG("Clearing instrumentation values. Previously: %p -> %p\n",
                    settings->func_to_instrument,
                    settings->arm_trampoline1);

                // A code-block only profile is being attempted
                settings->arm_trampoline1 = 0;
                settings->func_to_instrument = 0;
            }
        }

        // Only allow Performance Counters to be enabled if the PMU access has been enabled by
        // the user in Dev Menu.
        bool triedUsingPmu = (settings->perf_counters[0] != pmu::PerfCounter_Disabled);
        if (!pmu::IsAvailable() && triedUsingPmu)
        {
            SendNotificationToPC(NotificationWarningPmuNotAvailable, nn::profiler::ResultInvalidArgument());

            for (int i = 0; i < pmu::PerformanceCounterCount; ++i)
            {
                settings->perf_counters[i] = pmu::PerfCounter_Disabled;
            }
            settings->perf_counter_cycle = 0;
        }

        // Ensure that the mask of cores to record only contains valid values
        settings->coreMask &= TargetApplication::GetCurrent()->GetCoreMask();

        INFO_LOG("Settings to be used for profiling\n--\n");
        DumpSettings(settings);
        INFO_LOG("--\n");

        return true;
    }


    //-------------------------------------------------------------------
    // Profiler Header setup
    //-------------------------------------------------------------------
    void SetupMasterHeader(SettingsFromThePcGui* settings)
    {
        // Now fill the header into the buffer
        Header* header = GetMasterHeader();
        header->Initialize();
        header->WriteControlValueOnly(HeaderSpecialValues_MasterHeaderBegin);
        header->Write(HeaderSpecialValues_HeaderVersion, ProfilerHeaderVersion);

        header->Write(HeaderSpecialValues_RuntimeVersion, ProfilerRuntimeVersion);

        header->Write(HeaderSpecialValues_RuntimeSdkVersion, static_cast<uint32_t>(NN_SDK_CURRENT_VERSION_NUMBER));

        header->Write(HeaderSpecialValues_ApplicationSdkVersion, TargetApplication::GetCurrent()->GetSdkVersion());

        header->Write(HeaderSpecialValues_ApplicationPointerSize, static_cast<uint32_t>(TargetApplication::GetCurrent()->GetPointerSize()));

        header->Write(HeaderSpecialValues_CoresRecorded, settings->coreMask);

        // Neb_Note : Passing the Units Per Second before the base time so that the
        //    base time can be properly converted to microseconds.
        header->Write(
            HeaderSpecialValues_TimeUnitsPerSecond,
            GetTimeFrequency());

        header->Write(HeaderSpecialValues_BaseTime, GetCurrentTime());
    }


    nn::Result TakeOutOfProcessProfile()
    {
        SampleBuffers::GetInstance()->Reset();

        // Dynamic memory setup may mean that the sampling threads are not quite ready yet.
        // Wait for them to be initialized before attempting to start profiling.
        while (!AreSamplingThreadsInitialized())
        {
            nn::os::SleepThread(nn::TimeSpan::FromMicroSeconds(100));
        }

        //TargetApplication::GetCurrent()->StartProfiling(
        //    globals->settings.requested_time_between_samples_in_nanoseconds);

        DEBUG_LOG("** received start request\n");

        TargetApplication::GetCurrent()->FindCodeRegions();

        if (VerifySettings(&globals->settings) == false)
        {
            INFO_LOG("Invalid settings... Restarting loop.\n");
            return nn::profiler::ResultInvalidArgument();
        }

        if (globals->settings.IsOutOfProcess() && (globals->settings.coreMask == (1 << 3)))
        {
            ChangeCoreThreadAffinity(2);
            ChangeVsyncThreadAffinity(2);
            TargetApplication::GetCurrent()->ChangeWatchThreadAffinity(2);
        }
        else
        {
            ChangeCoreThreadAffinity(ProfilerPrimaryCore);
            ChangeVsyncThreadAffinity(ProfilerPrimaryCore);
            TargetApplication::GetCurrent()->ChangeWatchThreadAffinity(ProfilerPrimaryCore);
        }
        nn::os::YieldThread();

        SetupMasterHeader(&globals->settings);

        if (IsPCConnected())
        {
            DEBUG_LOG("profiler: Sending message that profiling has started\n");
            SendProfilingHasBegun();
            DEBUG_LOG("profiler: message sent\n");
        }

        globals->samplingOffset.SetBaseValue(
            globals->settings.requested_time_between_samples_in_nanoseconds);

        DUMP_CURRENT_LINE();

        // Instrument
        if (globals->settings.flags & SettingsFromThePcGui::UseInstrumented
            && IsValidCodeAddress(static_cast<uintptr_t>(globals->settings.func_to_instrument)))
        {
            nn::Result result;
            DEBUG_LOG("Attempting to instrument\n");
            SetTrampolineAddresses(&globals->settings.arm_trampoline1, 1);
            result = InstrumentFunction(globals->settings.func_to_instrument, 0);
            if (result.IsFailure())
            {
                ERROR_LOG("Failed to instrument\n");
                DumpResultInformation(LOG_AS_ERROR, result);
                globals->settings.func_to_instrument = 0;
            }
        }

        // and then... begin.
        DEBUG_LOG("** main loop / stage 2.\n");

        // MAIN PROFILING LOOP
        DUMP_CURRENT_LINE();

        StartProfilingSamplingThreads(&globals->settings);

        SetProfilerStatus(ProfilerStatus_Profiling);

        nn::os::WaitEvent(GetStopEvent());

        DUMP_CURRENT_LINE();

        StopProfilingSamplingThreads();

        DUMP_CURRENT_LINE();

        TargetApplication::GetCurrent()->StopProfiling();

        DUMP_CURRENT_LINE();

        SetProfilerStatus(ProfilerStatus_Transferring);

        CheckForNewModules();
        CloseActiveModules();

        // Undo Instrument
        if (globals->settings.flags & SettingsFromThePcGui::UseInstrumented
            && IsValidCodeAddress(static_cast<uintptr_t>(globals->settings.func_to_instrument)))
        {
            InstrumentRemoval(0);
        }

        // Wait for all buffers to be closed out
        WaitCoresClosed();

        // If you use core mask 0xF (all cores), please uncomment this code.
        // If not, htcs::Send() returns HTCS_ECONNRESET. Sako is investigating.
        //nn::os::SleepThread(nn::TimeSpan::FromMilliSeconds(100));

        // Now send the raw data.
        DUMP_CURRENT_LINE();
        DEBUG_LOG("** main loop / stage 3.\n");
        {
            nn::os::EventType *other_event = SendDataToPC();
            if (other_event != nullptr)
            {
                nn::os::WaitEvent(other_event);
                SampleBuffers::GetInstance()->Reset();
            }
        }

        TargetApplication::GetCurrent()->DumpTimers();
        // Transfer is now completed.

        DUMP_CURRENT_LINE();
        SetProfilerStatus(ProfilerStatus_Active);
        DUMP_CURRENT_LINE();

        return nn::ResultSuccess();
    }


    nn::Result TransferInProcessProfile()
    {
        nn::os::ClearEvent(GetTransferCompleteEvent());

        SetProfilerStatus(ProfilerStatus_Transferring);

        // Undo Instrument
        if (TargetApplication::GetCurrent()->IsAttached())
        {
            if (globals->settings.flags & SettingsFromThePcGui::UseInstrumented
                && IsValidCodeAddress(static_cast<uintptr_t>(globals->settings.func_to_instrument)))
            {
                InstrumentRemoval(0);
            }
        }

        if (TargetApplication::GetCurrent()->IsLibraryInitialized())
        {
            IpcEventInfo info;
            info.event = IpcEvent_PcMessageForward;
            info.info.pcMessageForward.message = ProfilerCommMessage_Stop;
            info.info.pcMessageForward.size = 0;
            info.info.pcMessageForward.smallPayload = 0;

            auto queue = GetIpcEventQueue();
            queue->Push(&info);
        }

        CloseInstrumentationBuffer();

        while (IsPCConnected() && TargetApplication::GetCurrent()->IsLibraryInitialized())
        {
            bool finished = nn::os::TimedWaitEvent(GetTransferCompleteEvent(), nn::TimeSpan::FromSeconds(1));
            if (finished) { break; }
        }

        SetProfilerStatus(ProfilerStatus_Active);

        return nn::ResultSuccess();
    }


    nn::Result TakeInProcessProfile()
    {
        DUMP_CURRENT_LINE();

        SampleBuffers::GetInstance()->Reset();

        if (VerifySettings(&globals->settings) == false)
        {
            INFO_LOG("Invalid settings... Restarting loop.\n");
            return nn::profiler::ResultInvalidArgument();
        }

        PrepareInstrumentationBuffer(&globals->settings);

        // Instrument
        if (TargetApplication::GetCurrent()->IsAttached())
        {
            if (globals->settings.flags & SettingsFromThePcGui::UseInstrumented
                && IsValidCodeAddress(static_cast<uintptr_t>(globals->settings.func_to_instrument)))
            {
                nn::Result result;
                DEBUG_LOG("Attempting to instrument\n");
                SetTrampolineAddresses(&globals->settings.arm_trampoline1, 1);
                result = InstrumentFunction(globals->settings.func_to_instrument, 0);
                if (result.IsFailure())
                {
                    ERROR_LOG("Failed to instrument\n");
                    DumpResultInformation(LOG_AS_ERROR, result);
                    globals->settings.func_to_instrument = 0;
                }
            }
        }

        {
            IpcEventInfo info;
            info.event = IpcEvent_PcMessageForward;
            info.info.pcMessageForward.message = ProfilerCommMessage_Start;
            info.info.pcMessageForward.size = sizeof(SettingsFromThePcGui);
            info.info.pcMessageForward.smallPayload = 0;

            auto queue = GetIpcEventQueue();
            queue->Push(&info);
        }

        SetProfilerStatus(ProfilerStatus_Profiling);

        nn::os::WaitEvent(GetStopEvent());

        return TransferInProcessProfile();
    }
}


void StartProfilerCore()
{
    nn::os::EventType waitStart;
    nn::os::InitializeEvent(&waitStart, false, nn::os::EventClearMode_AutoClear);

    coreThread.stack = Memory::GetInstance()->Allocate(CoreStackSize, CoreStackAlignment);
    nn::Result result = nn::os::CreateThread(
        &coreThread.thread,
        ProfilerMainThread,
        &waitStart,
        coreThread.stack,
        CoreStackSize,
        ThreadPriority_Core,
        ProfilerPrimaryCore
    );
    NN_ABORT_UNLESS_RESULT_SUCCESS(result);

    nn::os::StartThread(&coreThread.thread);

    nn::os::WaitEvent(&waitStart);

    SetProfilerStatus(ProfilerStatus_Active);

    nn::os::FinalizeEvent(&waitStart);
}


void ProfilerMainThread(/* nn::os::EventType* */ void* arg)
{
    nn::Result result;

    INFO_LOG("Made it into ProfilerMainThread!\n");

    globals = Memory::GetInstance()->Allocate<Globals>();
    memset(globals, 0, sizeof(Globals));
    new (globals) Globals;

    bool sampleBuffersAvailable = SampleBuffers::GetInstance()->IsInitialized();

    if (sampleBuffersAvailable)
    {
        InitializeLib();
    }

    // Give the profiler some time to stabilize
    nn::os::SleepThread(nn::TimeSpan::FromMilliSeconds(100));

    // Dump debugging information
    nn::os::SetThreadName(nn::os::GetCurrentThread(), "[profiler] Main Thread");
    DumpThreadInformation();

    // start PCCOM
    InitializeCommunicationsLayer();
    DEBUG_LOG("Communications online\n");

    // Set up our thread that will be in charge of getting performance counter
    //  data
    DEBUG_LOG("Setting up performance counter threads\n");
    nn::profiler::pmu::Initialize();

    // Give PCCOM some time to start up
    nn::os::SleepThread(nn::TimeSpan::FromMilliSeconds(100));

    DEBUG_LOG("NX CPU Profiler Started\n");

    TargetApplication::Initialize();
    DEBUG_LOG("Target application initialized\n");

    // Initialize Core and Master Headers
    if (sampleBuffersAvailable)
    {
        InitializeHeaders();
    }

    ResetInstrumentation();

    DEBUG_LOG("Starting sampling threads\n");
    result = InitializeSamplingThreads();
    if (nn::profiler::ResultNotInitialized::Includes(result))
    {
        WARNING_LOG("Sample buffers are not initialized!\n");
        DumpResultInformation(LOG_AS_WARNING, result);
    }
    else if (result.IsFailure())
    {
        ERROR_LOG("Error starting sampling threads!\n");
        DumpResultInformation(LOG_AS_ERROR, result);
        NN_ABORT();
    }

    if (sampleBuffersAvailable)
    {
        InitializeVsyncSupport();
    }

    DEBUG_LOG("Initialization nominal\n");

    if (arg != nullptr)
    {
        // After we signal this event it will be destroyed, don't attempt to use it again
        nn::os::EventType* waitEvent = reinterpret_cast<nn::os::EventType*>(arg);
        nn::os::SignalEvent(waitEvent);
        arg = nullptr;
    }

    DUMP_CURRENT_LINE();
    while (NN_STATIC_CONDITION(true))
    {
        nn::Result result;

        DEBUG_LOG("** main loop / stage 1.\n");
        nn::os::EventType *beginEvent = CheckForBegin(&globals->settings);
        DUMP_CURRENT_LINE();
        nn::os::WaitEvent(beginEvent);

        result = CheckProfilingMode(&globals->settings);

        if (result.IsSuccess())
        {
            const ProfilingMode mode = TargetApplication::GetCurrent()->GetProfilingMode();
            if (mode == ProfilingMode_OutOfProcess)
            {
                result = TakeOutOfProcessProfile();
            }
            else if (mode == ProfilingMode_InProcess)
            {
                result = TakeInProcessProfile();
            }
            else
            {
                nn::Result r = nn::profiler::ResultInvalidProfilerStatus();
                ERROR_LOG("No profiling client available!\n");
                DumpResultInformation(LOG_AS_ERROR, r);
                SendNotificationToPC(NotificationErrorNoProfilingClient, r);
            }
        }

        if (result.IsFailure())
        {
            ERROR_LOG("Error taking profile\n");
            DumpResultInformation(LOG_AS_ERROR, result);
            SendNotificationToPC(NotificationErrorDidNotProfile, result);
        }

        nn::os::ClearEvent(beginEvent);
    }
} // NOLINT(impl/function_size)


SettingsFromThePcGui* GetProfilerSettingsPointer()
{
    return &globals->settings;
}


void DumpSettings(SettingsFromThePcGui* settings)
{
    NN_UNUSED(settings); // when logging is disabled

    INFO_LOG("Settings:\n");
    INFO_LOG(" Version: %d\n", settings->version);
    INFO_LOG(" Flags: 0x%p\n", settings->flags);
    INFO_LOG(" Perf Counters:\n");
    for (int i = 0; i < pmu::PerformanceCounterCount; ++i)
    {
        INFO_LOG("    %d: 0x%02x\n", i, settings->perf_counters[i]);
    }
    INFO_LOG(" PerfCounterCycle: %d\n", settings->perf_counter_cycle);
    INFO_LOG(" Instrument: 0x%p\n", settings->func_to_instrument);
    INFO_LOG(" Trampoline: 0x%p\n", settings->arm_trampoline1);
    INFO_LOG(" Profile Cores: %x\n", settings->coreMask);
}


void ChangeCoreThreadAffinity(int core)
{
    NN_SDK_ASSERT(core >= 0 && core < static_cast<int>(SupportedCoreCount));
    nn::os::SetThreadCoreMask(&coreThread.thread, core, (1 << core));
}


bool DynamicMemorySetup(bool initialStartup)
{
    NN_ABORT_UNLESS(Memory::GetInstance()->IsInitialized());

    // If the second heap is already initialized, we have already done this.
    if (Memory::GetInstance()->IsSecondHeapInitialized())
    {
        DEBUG_LOG("Attempted dynamic memory setup, but the process was already completed.\n");
        return true;
    }

    bool success;
    nn::Result result;

    nn::os::MemoryInfo memInfo;
    nn::os::QueryMemoryInfo(&memInfo);
    INFO_LOG("Memory Info:\n");
    INFO_LOG("  Allocated Memory Heap Size : %ld\n", memInfo.allocatedMemoryHeapSize);
    INFO_LOG("  Program Size               : %ld\n", memInfo.programSize);
    INFO_LOG("  Thread Count               : %d\n", memInfo.threadCount);
    INFO_LOG("  Total Available Memory Size: %ld\n", memInfo.totalAvailableMemorySize);
    INFO_LOG("  Total Memory Heap Size     : %ld\n", memInfo.totalMemoryHeapSize);
    INFO_LOG("  Total Thread Stack Size    : %ld\n", memInfo.totalThreadStackSize);
    INFO_LOG("  Total Used Memory Size     : %ld\n", memInfo.totalUsedMemorySize);

    size_t requestedSize = nn::profiler::MinimumBufferSize;
    const size_t AbsoluteMinimumMemorySize = nn::profiler::ProfilerExtraMemory +
        (nn::profiler::SampleBufferIndex_MAX * nn::profiler::SampleMemoryBlockSize);

    // We cannot pass testing in 4GB-sdk mode. Make sure we have more than enough space.
    // On my SDEV by default in 6GB, there is 380MB free or so. So, let's check on 256MB.
    size_t requiredMemorySize = AbsoluteMinimumMemorySize * 2;
    if (initialStartup) { requiredMemorySize = requestedSize +  64 * 1024 * 1024 * 3; }

    if (memInfo.totalAvailableMemorySize > requiredMemorySize)
    {
        // Never take more than half of the available memory to allow other processes to start.
        requestedSize = std::min(static_cast<uint64_t>(requestedSize), memInfo.totalAvailableMemorySize / 2);
        requestedSize = nn::util::align_down(requestedSize, nn::os::MemoryHeapUnitSize);

        // Final sanity check that we will be getting enough memory to take an out-of-process profile.
        if (requestedSize < AbsoluteMinimumMemorySize)
        {
            WARNING_LOG("Requested size ends up smaller than the absolute minimum: %ld < %ld",
                requestedSize,
                AbsoluteMinimumMemorySize);
            return false;
        }

        result = nn::os::SetMemoryHeapSize(requestedSize);
        if (result.IsSuccess())
        {
            uintptr_t memory;
            result = nn::os::AllocateMemoryBlock(&memory, requestedSize);
            if (result.IsSuccess())
            {
                void* heapMem = reinterpret_cast<void*>(memory);
                size_t heapSize = nn::profiler::ProfilerExtraMemory;

                success = nn::profiler::Memory::GetInstance()->AddSecondHeap(heapMem, heapSize);
                NN_ABORT_UNLESS(success, "Failed to initialize memory heap\n");

                void* sampleBuffer = reinterpret_cast<void*>(memory + heapSize);
                size_t sampleBufferSize = requestedSize - heapSize;
                success = nn::profiler::SampleBuffers::GetInstance()->Initialize(sampleBuffer, sampleBufferSize);
                NN_ABORT_UNLESS(success, "Could not initalize sample buffers\n");
            }
        }
    }
    else
    {
        ERROR_LOG("Insufficient memory available to perform dynamic setup: has %ld, wanted: %ld\n",
            memInfo.totalAvailableMemorySize,
            requiredMemorySize);
    }

    if (!initialStartup && Memory::GetInstance()->IsSecondHeapInitialized())
    {
        InitializeLib();
        InitializeHeaders();
        result = InitializeSamplingThreads();
        if (result.IsFailure())
        {
            ERROR_LOG("Error starting sampling threads!\n");
            DumpResultInformation(LOG_AS_ERROR, result);
            NN_ABORT_UNLESS_RESULT_SUCCESS(result);
        }
        InitializeVsyncSupport();
    }

    return Memory::GetInstance()->IsSecondHeapInitialized();
}


} // profiler
} // nn
