﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <atomic>

#include <nn/nn_Common.h>
#include <nn/os/os_Thread.h>
#include <nn/os/os_Event.h>
#include <nn/ro/detail/ro_RoModule.h>
#include <nn/util/util_BitUtil.h>
#include <nn/profiler.h>
#include <nn/profiler/profiler_Control.private.h>

#include "pmu/profiler_PerfCounterGroups.h"
#include "profiler_Comms.h"
#include "profiler_DataStream.h"
#include "profiler_Defines.h"
#include "profiler_Hipc.h"
#include "profiler_LibModule.h"
#include "profiler_LibPrivate.h"
#include "profiler_Logging.h"
#include "profiler_Main.h"
#include "profiler_Memory.h"
#include "profiler_MemoryServer.h"
#include "profiler_RecordMethods.h"
#include "profiler_ResultPrivate.h"
#include "profiler_SampleRates.h"
#include "profiler_SamplingThread.h"
#include "profiler_StringTable.h"
#include "profiler_TargetApplication.h"
#include "profiler_Workarea.h"


namespace nn { namespace profiler {

void EnableInstrumentationTrampoline();
void InstrumentTrampoline();

#ifndef NN_BUILD_CONFIG_CPU_CORTEX_A57_AARCH64
void EnableInstrumentationTrampoline()
{
    // do nothing
}
#endif

namespace /*anonymous*/
{
    std::atomic<ProfilerStatus> sProfilerStatus;

    const int MAIN_STACK_ALIGNMENT = nn::os::ThreadStackAlignment;
    const size_t MAIN_STACK_SIZE = 16 * 1024;
    void *sMainThreadStack = nullptr;
    nn::os::ThreadType* sMainThreadType;

    FinalizeCallback FinalizeCallbackGraphicsFeatures;

    StringTable* gMarkerNames;

    void Cleanup()
    {
        nn::Result result;

        if (sMainThreadType->_state == nn::os::ThreadType::State_Started)
        {
            FinalizeMainThread();
            nn::os::WaitThread(sMainThreadType);
            nn::os::DestroyThread(sMainThreadType);
        }

        Memory::GetInstance()->Free(sMainThreadType);
        sMainThreadType = nullptr;

        Memory::GetInstance()->Free(sMainThreadStack);
        sMainThreadStack = nullptr;

        Memory::GetInstance()->Free(gMarkerNames);
        gMarkerNames = nullptr;

#if defined(NN_PROFILER_USE_EXTERNAL_PROCESS)
        if (IsProfilerProcessInitialized())
        {
            FinalizeProfilerProcess();
        }
#endif
        result = LibraryFinalize();
        if (result.IsFailure())
        {
            ERROR_LOG("Error attempting to shutdown IPC.\n");
            DumpResultInformation(LOG_AS_ERROR, result);
        }

        result = DestroyTransferMemory();
        NN_SDK_ASSERT(result.IsSuccess()); // This call should never return anything other than success

        FinalizeProfilerProcess();

        TargetApplication::Finalize();

        void* sampleBuffer = SampleBuffers::GetInstance()->GetStartAddress();
        SampleBuffers::GetInstance()->Finalize();
        Memory::GetInstance()->Free(sampleBuffer);
        Memory::GetInstance()->Finalize();
    }

    WorkArea* ShouldRecordCheck(uint32_t* pOutCore)
    {
        // Profiling not active
        if (GetProfilerStatus() != ProfilerStatus_Profiling) { return nullptr; }

        uint32_t core = static_cast<uint32_t>(nn::os::GetCurrentCoreNumber());
        WorkArea *ws = GetWorkAreaForCore(SampleBufferIndex_Instrumentation);

        // Profiling not active on this core
        if ((ws->record_cores & (1 << core)) == 0) { return nullptr; }
        if ((ws->record_cores & (1 << SampleBufferIndex_Instrumentation)) == 0) { return nullptr; }

        ws->recordCount++;
        if (!CheckAndExpandBuffersIfNeeded(SampleBufferIndex_Instrumentation))
        {
            ws->recordCount--;
            //StopProfilingSamplingThreads();
            SendBasicIpcMessage(ProfilerIpcMessage_StopProfiling, 0);
            return nullptr;
        }

        *pOutCore = core;
        return ws;
    }

    WorkArea* GetFirstActiveCore(uint32_t* pOutCore)
    {
        // Profiling not active
        if (GetProfilerStatus() != ProfilerStatus_Profiling) { return nullptr; }

        WorkArea *ws = GetWorkAreaForCore(SampleBufferIndex_Instrumentation);
        if ((ws->record_cores & (1 << SampleBufferIndex_Instrumentation)) == 0) { return nullptr; }

        ws->recordCount++;
        if (!CheckAndExpandBuffersIfNeeded(SampleBufferIndex_Instrumentation))
        {
            ws->recordCount--;
            //StopProfilingSamplingThreads();
            SendBasicIpcMessage(ProfilerIpcMessage_StopProfiling, 0);
            return nullptr;
        }

        *pOutCore = 0;
        for (uint32_t i = 0; i < SupportedCoreCount; ++i)
        {
            if (ws->record_cores & (1 << i))
            {
                *pOutCore = i;
                break;
            }
        }

        return ws;
    }
}



namespace detail {

    nn::Result Initialize(void* memory, size_t memorySize, uint32_t sdkVersion)
    {
        // Validate arguments passed in
        if (memory == nullptr) { return nn::profiler::ResultNullArgument(); }
        if (GetProfilerStatus() != ProfilerStatus_Offline) { return nn::profiler::ResultAlreadyDone(); }
        if (nn::os::GetCurrentCoreNumber() == 3) { return nn::profiler::ResultInitFailedCore3(); }

        FinalizeCallbackGraphicsFeatures = nullptr;

        bool success;
        nn::Result result = nn::ResultSuccess();

        uint32_t coreMask = static_cast<uint32_t>(nn::os::GetThreadAvailableCoreMask());
        TargetApplication::SetCoreMask(coreMask);
        if ((coreMask & ~((1 << (SupportedCoreCount)) - 1)) != 0) { return nn::profiler::ResultTooManyCores(); }

        size_t coreCount = static_cast<size_t>(nn::util::popcount(coreMask));
        if (coreCount > SupportedCoreCount) { return nn::profiler::ResultTooManyCores(); }

        // VSync is automatically recorded into Instrumentation Buffer, make sure we have space for it.
        size_t sampleBufferCount = coreCount + 1;

        size_t AbsoluteMinimumMemorySize = ProfilerExtraMemory + (sampleBufferCount * SampleMemoryBlockSize);
        if (memorySize < AbsoluteMinimumMemorySize) { return nn::profiler::ResultInvalidArgument(); }

        size_t sampleBufferSize = nn::util::align_down(memorySize - ProfilerExtraMemory, SampleMemoryBlockSize);
        if (sampleBufferSize < (sampleBufferCount * SampleMemoryBlockSize)) { return nn::profiler::ResultInvalidArgument(); }

        void *alignedMem = reinterpret_cast<void*>(nn::util::align_up(reinterpret_cast<uintptr_t>(memory), nn::os::GuardedStackAlignment));
        if (alignedMem != memory) { memorySize -= nn::os::GuardedStackAlignment; }
        memory = alignedMem;

        success = Memory::GetInstance()->Initialize(memory, memorySize);
        if (!success) { return nn::profiler::ResultMemoryAllocationFailure(); }

        void* sampleBuffer = Memory::GetInstance()->Allocate(sampleBufferSize, nn::os::GuardedStackAlignment);
        success = SampleBuffers::GetInstance()->Initialize(sampleBuffer, sampleBufferSize);
        if (!success) { return nn::profiler::ResultMemoryAllocationFailure(); }

        gMarkerNames = reinterpret_cast<StringTable*>(Memory::GetInstance()->Allocate(sizeof(StringTable)));
        new (gMarkerNames) StringTable;

        ModuleInitialize();
        EnableInstrumentationTrampoline();

        TargetApplication::Initialize();
        TargetApplication::GetCurrent()->SetSdkVersion(sdkVersion);

#if defined(NN_PROFILER_USE_EXTERNAL_PROCESS)
        if (TargetApplication::GetCoreCount() > 1 &&
            sdkVersion >= NN_SDK_VERSION_NUMBER(3, 0, 0, 0))
        {
            InitializeProfilerProcess();
        }
#endif
        InitializeProfilerProcess();
        result = LibraryInitialize(sdkVersion);
        if (result.IsFailure())
        {
            Cleanup();
            SetProfilerStatus(ProfilerStatus_Offline);
            return result;
        }

        SetProfilerStatus(GetProfilerStatusFromServer());
        NN_SDK_ASSERT(GetProfilerStatus() != ProfilerStatus_Offline);

        // From here on out we need to be sure to undo anything that has been initialized
        // if there is a problem that occurred.
        nn::os::EventType waitForMainThreadStart;
        nn::os::InitializeEvent(&waitForMainThreadStart, false, nn::os::EventClearMode_AutoClear);

        int priority = nn::os::HighestThreadPriority + 2;
        //if (nn::os::GetCurrentCoreNumber() == 3) { priority = nn::os::LowestThreadPriority; }
        sMainThreadStack = Memory::GetInstance()->Allocate(MAIN_STACK_SIZE, MAIN_STACK_ALIGNMENT);
        sMainThreadType = Memory::GetInstance()->Allocate<nn::os::ThreadType>();
        new (sMainThreadType) nn::os::ThreadType;
        result = nn::os::CreateThread(
            sMainThreadType,
            &nn::profiler::ProfilerMainThread,
            &waitForMainThreadStart,
            sMainThreadStack,
            MAIN_STACK_SIZE,
            priority);

        if (result.IsSuccess())
        {
            nn::os::StartThread(sMainThreadType);
            nn::os::WaitEvent(&waitForMainThreadStart);
            result = GetMainThreadLastError();
        }

        nn::os::FinalizeEvent(&waitForMainThreadStart);

        if (result.IsSuccess())
        {
            SetProfilerStatus(GetProfilerStatusFromServer());
            SetProfileSettings(
                coreMask,
                Flags_Callstack,
                PerformanceCounterGroup_Disabled,
                SampleRate_ByTime100x);
        }
        else
        {
            Cleanup();
            SetProfilerStatus(ProfilerStatus_Offline);
        }
        return result;
    }

    nn::Result ForceAttachToInProcess()
    {
        return nn::profiler::SendBasicIpcMessage(nn::profiler::ProfilerIpcMessage_ForceAttachToInProcess, 0);
    }
}

void RegisterFinalizeCallbackGraphicsFeatures(FinalizeCallback callback)
{
    FinalizeCallbackGraphicsFeatures = callback;
}

nn::Result Finalize()
{
    if (GetProfilerStatus() != ProfilerStatus_Offline)
    {
        if (FinalizeCallbackGraphicsFeatures != nullptr)
        {
            FinalizeCallbackGraphicsFeatures();
            FinalizeCallbackGraphicsFeatures = nullptr;
        }

        ModuleFinalize();
        FinalizeMemoryProfilerServer();
        Cleanup();
        SetProfilerStatus(ProfilerStatus_Offline);
    }
    return nn::ResultSuccess();
}



void SetProfilerStatus(ProfilerStatus status)
{
    sProfilerStatus = status;
}



ProfilerStatus GetProfilerStatus()
{
    return sProfilerStatus.load();
}



StringTable* GetMarkerNamesStringTable()
{
    return gMarkerNames;
}



nn::Result RecordHeartbeat(Heartbeats heartbeat)
{
    // Invalid heartbeat number
    if (heartbeat < 0 || heartbeat > Heartbeats_MAX) { return ResultInvalidArgument(); }

    uint32_t core;
    WorkArea *ws = ShouldRecordCheck(&core);
    if (ws == nullptr)
    {
        return ResultSkippedRecord();
    }

    auto result = RecordHeartbeat(ws, heartbeat, core);
    ws->recordCount--;
    return result;
}


void RecordVsync()
{
    auto settings = GetProfilerSettingsPointer();
    if (settings->IsInProcess())
    {
        uint32_t core;
        WorkArea* ws = GetFirstActiveCore(&core);
        if (ws != nullptr)
        {
            RecordHeartbeat(ws, nn::profiler::Heartbeats_Vsync, core);
            ws->recordCount--;
        }
    }
}


/**
*  @brief Registers a string with the profiler as an ID for User Data or Code Blocks.
*
*  @param [in] pMarkerName The null-terminated string for which to generate an ID.
*
*  @return An ID to use with either User Data or Code Blocks.
*          On error, returns UINT64_MAX.
*
*  @details
*  This function registers a string with the profiler and returns an ID associated with the string.
*  The returned value can be passed in as an ID for either User Data or Code Blocks.
*  This allows for dynamic strings to be displayed in the GUI for these markers.
*
*  Strings up to 254 bytes may be registered.
*  Strings longer than this value will be truncated.
*  The string registered should be null-terminated.
*  Using the same string twice will result in the same ID returned for both strings.
*
*  The profiler must be initialized before attempting to register a string.
*  Otherwise, the error value will be returned.
*  The error value may also be returned if the internal storage buffer for strings is full.
*
*  An example call to this function is:
*  @code
*  char str[10];
*  sprintf(str, sizeof(str), "Example");
*  uint64_t id = nn::profiler::RegisterStringId(str);
*  nn::profiler::EnterCodeBlock(id);
*  nn::profiler::RecordData(id, 12345uL);
*  nn::profiler::ExitCodeBlock(id);
*  @endcode
*/
uint64_t RegisterStringId(const char* pMarkerName)
{
    uint64_t id = UINT64_MAX;
    if (pMarkerName != nullptr && GetProfilerStatus() != ProfilerStatus_Offline)
    {
        uintptr_t value = gMarkerNames->AddString(pMarkerName);
        NN_STATIC_ASSERT(sizeof(value) <= sizeof(id));
        id = static_cast<uint64_t>(value);
    }
    return id;
}



nn::Result RecordData(uint64_t id, uint64_t data)
{
    uint32_t core;
    WorkArea *ws = ShouldRecordCheck(&core);
    if (ws == nullptr)
    {
        return ResultSkippedRecord();
    }

    nn::os::ThreadId tid = TargetApplication::GetThreadId(nn::os::GetCurrentThread());
    auto result = RecordData(
        ws,
        PayloadEvents_UserPlotInt,
        tid,
        id,
        &data,
        core);
    ws->recordCount--;
    return result;
}



nn::Result RecordData(uint64_t id, double data)
{
    uint32_t core;
    WorkArea *ws = ShouldRecordCheck(&core);
    if (ws == nullptr)
    {
        return ResultSkippedRecord();
    }

    nn::os::ThreadId tid = TargetApplication::GetThreadId(nn::os::GetCurrentThread());
    auto result = RecordData(
        ws,
        PayloadEvents_UserPlotFloat,
        tid,
        id,
        reinterpret_cast<uint64_t*>(&data),
        core);
    ws->recordCount--;
    return result;
}



nn::Result RecordData(const char* id, uint64_t data)
{
    if (id == nullptr)
    {
        return ResultNullArgument();
    }

    uint32_t core;
    WorkArea *ws = ShouldRecordCheck(&core);
    if (ws == nullptr)
    {
        return ResultSkippedRecord();
    }

    nn::os::ThreadId tid = TargetApplication::GetThreadId(nn::os::GetCurrentThread());
    uint64_t name = RegisterStringId(id);
    auto result = RecordData(
        ws,
        PayloadEvents_UserPlotInt,
        tid,
        name,
        &data,
        core);
    ws->recordCount--;
    return result;
}



nn::Result RecordData(const char* id, double data)
{
    if (id == nullptr)
    {
        return ResultNullArgument();
    }

    uint32_t core;
    WorkArea *ws = ShouldRecordCheck(&core);
    if (ws == nullptr)
    {
        return ResultSkippedRecord();
    }

    nn::os::ThreadId tid = TargetApplication::GetThreadId(nn::os::GetCurrentThread());
    uint64_t name = RegisterStringId(id);
    auto result = RecordData(
        ws,
        PayloadEvents_UserPlotFloat,
        tid,
        name,
        reinterpret_cast<uint64_t*>(&data),
        core);
    ws->recordCount--;
    return result;
}



ScopedCodeBlock::ScopedCodeBlock(const char* id) NN_NOEXCEPT
{
    m_Id = RegisterStringId(id);
    EnterCodeBlock(m_Id);
}



nn::Result EnterCodeBlock(uint64_t id)
{
    uint32_t core;
    WorkArea *ws = ShouldRecordCheck(&core);
    if (ws == nullptr)
    {
        return ResultSkippedRecord();
    }

    nn::os::ThreadId tid = TargetApplication::GetThreadId(nn::os::GetCurrentThread());
    auto result = RecordCodeBlock(ws, tid, id, true, core);
    ws->recordCount--;
    return result;
}



nn::Result ExitCodeBlock(uint64_t id)
{
    uint32_t core;
    WorkArea *ws = ShouldRecordCheck(&core);
    if (ws == nullptr)
    {
        return ResultSkippedRecord();
    }

    nn::os::ThreadId tid = TargetApplication::GetThreadId(nn::os::GetCurrentThread());
    auto result = RecordCodeBlock(ws, tid, id, false, core);
    ws->recordCount--;
    return result;
}



nn::Result EnterCodeBlock(const char* id)
{
    if (id == nullptr)
    {
        return ResultNullArgument();
    }

    uint32_t core;
    WorkArea *ws = ShouldRecordCheck(&core);
    if (ws == nullptr)
    {
        return ResultSkippedRecord();
    }

    nn::os::ThreadId tid = TargetApplication::GetThreadId(nn::os::GetCurrentThread());
    uint64_t name = RegisterStringId(id);
    auto result = RecordCodeBlock(ws, tid, name, true, core);
    ws->recordCount--;
    return result;
}



nn::Result ExitCodeBlock(const char* id)
{
    if (id == nullptr)
    {
        return ResultNullArgument();
    }

    uint32_t core;
    WorkArea *ws = ShouldRecordCheck(&core);
    if (ws == nullptr)
    {
        return ResultSkippedRecord();
    }

    nn::os::ThreadId tid = TargetApplication::GetThreadId(nn::os::GetCurrentThread());
    uint64_t name = RegisterStringId(id);
    auto result = RecordCodeBlock(ws, tid, name, false, core);
    ws->recordCount--;
    return result;
}



void EnterInstrumentedFunction()
{
    uint32_t core;
    WorkArea *ws = ShouldRecordCheck(&core);
    if (ws == nullptr)
    {
        return;
    }

    // Instrumentation is currently mutually exclusive to sampling.
    // As such, we will need to register threads to get their names as we see the thread.
    auto thread = nn::os::GetCurrentThread();
    nn::os::ThreadId tid = TargetApplication::GetThreadId(thread);
    TargetApplication::GetCurrent()->RegisterThread(thread);

    RecordInstrumentedHit(ws, tid, static_cast<uintptr_t>(ws->settings->func_to_instrument), true, core);
    ws->recordCount--;
}



void ExitInstrumentedFunction()
{
    uint32_t core;
    WorkArea *ws = ShouldRecordCheck(&core);
    if (ws == nullptr)
    {
        return;
    }

    // Instrumentation is currently mutually exclusive to sampling.
    // As such, we will need to register threads to get their names as we see the thread.
    auto thread = nn::os::GetCurrentThread();
    nn::os::ThreadId tid = TargetApplication::GetThreadId(thread);
    TargetApplication::GetCurrent()->RegisterThread(thread);

    RecordInstrumentedHit(ws, tid, static_cast<uintptr_t>(ws->settings->func_to_instrument), false, core);
    ws->recordCount--;
}



nn::Result StartProfiling()
{
    ProfilerStatus status = GetProfilerStatus();
    if (status == ProfilerStatus_Offline)
    {
        return ResultNotInitialized();
    }
    else if (status != ProfilerStatus_Active)
    {
        return ResultAlreadyDone();
    }

    auto settings = GetProfilerSettingsPointer();

    nn::Result result = SendBasicIpcMessage(ProfilerIpcMessage_StartProfiling, 0);
    if (result.IsSuccess())
    {
        if (settings->flags & Flags_OutOfProcessProfiling)
        {
            while (GetProfilerStatus() != ProfilerStatus_Profiling)
            {
                nn::os::YieldThread();
            }
        }
        else
        {
            nn::profiler::WaitProfilingStarted();
        }
    }
    return result;

    //auto pSettings = GetProfilerSettingsPointer();
    //nn::os::EventType* beginEvent = CheckForBegin(pSettings);
    //nn::os::SignalEvent(beginEvent);

    //nn::profiler::WaitProfilingStarted();

    //return nn::ResultSuccess();
}



nn::Result StopProfiling()
{
    ProfilerStatus status = GetProfilerStatus();
    if (status == ProfilerStatus_Offline)
    {
        return ResultNotInitialized();
    }
    else if (status != ProfilerStatus_Profiling)
    {
        return ResultAlreadyDone();
    }

    auto settings = GetProfilerSettingsPointer();

    nn::Result result = SendBasicIpcMessage(ProfilerIpcMessage_StopProfiling, 0);
    if (result.IsSuccess())
    {
        if (settings->flags & Flags_OutOfProcessProfiling)
        {
            while (GetProfilerStatus() == ProfilerStatus_Profiling)
            {
                nn::os::YieldThread();
            }
        }
        else
        {
            nn::profiler::WaitCoresClosed();
        }
    }
    return result;

    //StopProfilingSamplingThreads();

    //nn::profiler::WaitCoresClosed();

    //return nn::ResultSuccess();
}



nn::Result SetProfileSettings(
    uint32_t affinityMask,
    uint32_t flags,
    PerformanceCounterGroup performanceCounterGroup,
    SampleRate sampleRate)
{
    ProfilerStatus status = GetProfilerStatus();
    if (status == ProfilerStatus_Offline)
    {
        return ResultNotInitialized();
    }

    nn::Result result;
    result = SetGlobalProfileSettings(affinityMask, flags, performanceCounterGroup, sampleRate);

    if (result.IsSuccess())
    {
        auto settings = GetProfilerSettingsPointer();
        result = GetGlobalProfileSettings(settings, sizeof(*settings));
    }

    return result;
}


} // profiler
} // nn
