﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <cstdlib>
#include <algorithm>

#include <nn/nn_Common.h>
#include <nn/os/os_Types.h>
#include <nn/os/os_Thread.h>
#include <nn/pcm/pcm.h>

#include "pmu/profiler_PerfCounterGroups.h"
#include "pmu/profiler_PerfCounterThread.h"
#include "profiler_CommMessages.h"
#include "profiler_DataStream.h"
#include "profiler_Defines.h"
#include "profiler_Logging.h"
#include "profiler_Memory.h"
#include "profiler_RecordMethods.h"
#include "profiler_ResultPrivate.h"
#include "profiler_TargetApplication.h"
#include "profiler_Time.h"
#include "profiler_Workarea.h"
#include "profiler_WriteToBuffer.h"


namespace nn { namespace profiler {

namespace /*anonymous*/
{
    const int EndStackMarker = 0;
    const int NotInStackMarker = -1;
    const int BadFramePointerMarker = -2;
    const int LastDitchSentinelMarker = -3;

    const size_t SizeofPerformanceCounterData = (pmu::PerformanceCounterCount * 4) + 4;
} // anonymous

//-------------------------------------------------------------------
// Forward declarations
//-------------------------------------------------------------------
void WalkStack_FramePointer(
    WorkArea* ws,
    uintptr_t lr,
    uintptr_t sp);

void RecordPerfCounters(
    uint8_t* writePtr,
    uint32_t core,
    bool wasSamplerThread);


//-------------------------------------------------------------------
// "Global" variables
//-------------------------------------------------------------------


//-------------------------------------------------------------------
// Inline functions
//-------------------------------------------------------------------


//-------------------------------------------------------------------
// Global functions
//-------------------------------------------------------------------
nn::Result RecordHeartbeat(
    WorkArea* ws,
    Heartbeats heartbeat,
    uint32_t core)
{
    uint32_t event = PayloadEvents_HeartbeatBase | heartbeat;
    uint8_t* writePtr = ws->curPtr.fetch_add(16);
    writePtr = WriteToBuffer(writePtr, event);
    writePtr = WriteToBuffer(writePtr, GetCurrentTime());
    writePtr = WriteToBuffer(writePtr, core);
    return nn::ResultSuccess();
}



nn::Result RecordData(
    WorkArea* ws,
    uint32_t event,
    nn::os::ThreadId threadId,
    uint64_t id,
    uint64_t *data,
    uint32_t core)
{
    uint8_t* writePtr = ws->curPtr.fetch_add(40);
    writePtr = WriteToBuffer(writePtr, event);
    writePtr = WriteToBuffer(writePtr, GetCurrentTime());
    writePtr = WriteToBuffer(writePtr, core);
    writePtr = WriteToBuffer(writePtr, threadId);
    writePtr = WriteToBuffer(writePtr, id);
    writePtr = WriteToBuffer(writePtr, *data);
    return nn::ResultSuccess();
}



nn::Result RecordCodeBlock(
    WorkArea* ws,
    nn::os::ThreadId threadId,
    uint64_t blockId,
    bool enteredBlock,
    uint32_t core)
{
    VERBOSE_LOG("Recording Code Block\n");

    uint32_t event = enteredBlock ? PayloadEvents_CodeBlockEnter : PayloadEvents_CodeBlockExit;
    ptrdiff_t offset = 32;
    if (ws->fields.GetBit(WorkArea::IsUsingPerfCounters))
    {
        offset += SizeofPerformanceCounterData;
    }

    uint8_t* writePtr = ws->curPtr.fetch_add(offset);
    writePtr = WriteToBuffer(writePtr, event);
    writePtr = WriteToBuffer(writePtr, GetCurrentTime());
    writePtr = WriteToBuffer(writePtr, core);
    writePtr = WriteToBuffer(writePtr, threadId);
    writePtr = WriteToBuffer(writePtr, blockId);

    if (ws->fields.GetBit(WorkArea::IsUsingPerfCounters))
    {
        RecordPerfCounters(writePtr, ws->core_number, false);
    }

    return nn::ResultSuccess();
}



nn::Result RecordInstrumentedHit(
    WorkArea* ws,
    nn::os::ThreadId threadId,
    uintptr_t instrumentedAddress,
    bool enteredBlock,
    uint32_t core)
{
    VERBOSE_LOG("Recording Instrumented Hit\n");

    uint32_t event = enteredBlock ? PayloadEvents_InstrumentEnter : PayloadEvents_InstrumentExit;
    ptrdiff_t offset = 32;
    if (ws->fields.GetBit(WorkArea::IsUsingPerfCounters))
    {
        offset += SizeofPerformanceCounterData;
    }

    uint8_t* writePtr = ws->curPtr.fetch_add(offset);
    writePtr = WriteToBuffer(writePtr, event);
    writePtr = WriteToBuffer(writePtr, GetCurrentTime());
    writePtr = WriteToBuffer(writePtr, core);
    writePtr = WriteToBuffer(writePtr, threadId);
    writePtr = WriteToBuffer(writePtr, static_cast<uint64_t>(instrumentedAddress)); // force 64-bit

    if (ws->fields.GetBit(WorkArea::IsUsingPerfCounters))
    {
        RecordPerfCounters(writePtr, ws->core_number, false);
    }

    return nn::ResultSuccess();
}



nn::Result RecordSampleHeader(WorkArea* ws)
{
    nn::os::ThreadId threadId = TargetApplication::GetThreadId(ws->thread_to_profile);
    TargetApplication::GetCurrent()->RegisterThread(ws->thread_to_profile);

    uint32_t headerEvent = static_cast<uint32_t>(PayloadEvents_Sample);
    if (ws->fields.GetBit(WorkArea::ThreadInSystemCall))
    {
        headerEvent |= PayloadEvents_ThreadInSystemCall;
    }

    uint8_t* writePtr = ws->curPtr.fetch_add(12 + sizeof(threadId));
    writePtr = WriteToBuffer(writePtr, headerEvent);
    writePtr = WriteToBuffer(writePtr, threadId);
    writePtr = WriteToBuffer(writePtr, GetCurrentTime());
    return nn::ResultSuccess();
}



nn::Result RecordPerfCounters(WorkArea* ws)
{
    uint8_t* writePtr = ws->curPtr.fetch_add(SizeofPerformanceCounterData);
    RecordPerfCounters(writePtr, ws->core_number, true);

    return nn::ResultSuccess();
}



nn::Result RecordPC(WorkArea* ws)
{
    uint8_t* writePtr = ws->curPtr.fetch_add(sizeof(ws->context.pc));
    writePtr = WriteToBuffer(writePtr, ws->context.pc);

    return nn::ResultSuccess();
}



nn::Result RecordStackDepth(WorkArea* ws)
{
    uintptr_t stack_start;
    bool success = TargetApplication::GetCurrent()->GetStackStartFromThreadId(
        ws->thread_to_profile,
        &stack_start);

    uint32_t stack_size = 0xFFFFFFFF;
    if (success)
    {
        uintptr_t sp = ws->context.sp;
        stack_size = static_cast<uint32_t>(stack_start - sp);
    }

    uint8_t* writePtr = ws->curPtr.fetch_add(sizeof(stack_size));
    writePtr = WriteToBuffer(writePtr, stack_size);

    return nn::ResultSuccess();
}



nn::Result RecordStack(WorkArea* ws)
{
    WalkStack_FramePointer(ws, ws->context.lr, ws->context.sp);
    return nn::ResultSuccess();
}



nn::Result RecordDefaultFuncPtr(WorkArea* ws)
{
    NN_UNUSED(ws);
    return nn::profiler::ResultSampleLoopComplete();
}



//-------------------------------------------------------------------
// Stack walking functions
//-------------------------------------------------------------------
void WalkStack_FramePointer(
    WorkArea* ws,
    uintptr_t lr,
    uintptr_t sp)
{
    uint8_t* writePtr = ws->curPtr;
    uintptr_t stack_start = 0;
    {
        bool success = TargetApplication::GetCurrent()->GetStackStartFromThreadId(
            ws->thread_to_profile,
            &stack_start);
        if (success)
        {
            uint32_t stack_depth = static_cast<uint32_t>(stack_start - sp);
            writePtr = WriteToBuffer(writePtr, stack_depth);
        }
        else
        {
            writePtr = WriteToBuffer(writePtr, static_cast<uint32_t>(0));
        }
    }

    // Store the link register
    // when storing the value in LR, the stack depth does not change
    writePtr = WriteToBuffer(writePtr, lr);
    writePtr = WriteToBuffer(writePtr, static_cast<uint32_t>(NotInStackMarker)); // explicit 32-bit value

    if (stack_start != 0)
    {
        uintptr_t *topfp = reinterpret_cast<uintptr_t*>(ws->context.fp);
        while (topfp != nullptr)
        {
            uintptr_t topfp_uintptr = reinterpret_cast<uintptr_t>(topfp);
            if (!(ws->context.sp <= topfp_uintptr && topfp_uintptr < stack_start))
            {
                writePtr = WriteToBuffer(writePtr, static_cast<uintptr_t>(BadFramePointerMarker));
                writePtr = WriteToBuffer(writePtr, static_cast<uint32_t>(BadFramePointerMarker));
                break;
            }
            if (writePtr >= (ws->sentinel + LastDitchSentinel))
            {
                writePtr = WriteToBuffer(writePtr, static_cast<uintptr_t>(LastDitchSentinelMarker));
                writePtr = WriteToBuffer(writePtr, static_cast<uint32_t>(LastDitchSentinelMarker));
                break;
            }

            uintptr_t frame_lr = *(topfp + 1);
            uintptr_t frame_fp = *(topfp + 0);

            uint32_t depth = static_cast<uint32_t>(stack_start - reinterpret_cast<uintptr_t>(topfp + 1));

            if (frame_fp != 0)
            {
                writePtr = WriteToBuffer(writePtr, frame_lr);
                writePtr = WriteToBuffer(writePtr, depth);
            }

            topfp = reinterpret_cast<uintptr_t*>(frame_fp);
        }
    }

    // Null terminate the function list
    writePtr = WriteToBuffer(writePtr, static_cast<uintptr_t>(EndStackMarker));
    writePtr = WriteToBuffer(writePtr, static_cast<uint32_t>(EndStackMarker)); // explicit 32-bit value
    ws->curPtr = writePtr;
}




void RecordPerfCounters(
    uint8_t* writePtr,
    uint32_t core,
    bool wasSamplerThread)
{

    nn::profiler::pmu::PerformanceCounters data;
    nn::profiler::pmu::ReadCounters(data, core, wasSamplerThread);

    // Store in temporaries to let the compiler know that it can perform
    // all of the loads before doing the stores, saving a few cycles.
    const uint32_t cycles = static_cast<uint32_t>(data.cycles);

    writePtr = WriteToBuffer(writePtr, cycles);
    for (int i = 0; i < pmu::PerformanceCounterCount; ++i)
    {
        writePtr = WriteToBuffer(writePtr, data.counters[i]);
    }
}


} // profiler
} // nn
