﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <nn/perf/perf_LoadMeter.h>
#include <nn/nn_SdkLog.h>
#include <nn/nn_SdkAssert.h>

#if defined(NN_GFX_CONFIG_INCLUDE_NVN)
#include <nvn/nvn_FuncPtrInline.h>
#endif

#if defined(NN_GFX_CONFIG_INCLUDE_GL)
#include <GL/glew.h>
#include <nn/nn_Windows.h>
#endif

// NN_SDK_LOG 系のマクロが Release ビルドだと使えなくなってしまう。
// 暫定対応として NN_SDK_LOG をコピーして内部に抱えておく。
#define NN_PERF_DUMP_LOG(...)                                    \
    do                                                                  \
    {                                                                   \
        ::nn::diag::LogMetaData logMetaData;                            \
        logMetaData.sourceInfo.lineNumber = __LINE__;                   \
        logMetaData.sourceInfo.fileName = __FILE__;                     \
        logMetaData.sourceInfo.functionName = NN_CURRENT_FUNCTION_NAME; \
        logMetaData.moduleName = "$";                                   \
        logMetaData.severity = ::nn::diag::LogSeverity_Info;            \
        logMetaData.verbosity = 0;                                      \
        logMetaData.useDefaultLocaleCharset = false;                    \
        logMetaData.pAdditionalData = static_cast<void*>(0);            \
        logMetaData.additionalDataBytes = 0;                            \
        ::nn::diag::detail::LogImpl(logMetaData, __VA_ARGS__);          \
    } while (NN_STATIC_CONDITION(0))


namespace nn
{
namespace perf
{

LoadMeterBase::LoadMeterBase() NN_NOEXCEPT
    : m_FrameRate( 60.f ),
    m_Color(nn::util::Color4u8::Green()),
    m_CurrentBufferIndex( 0 ),
    m_ReferenceBufferIndex( 1 ),
    m_OverCount( 0 )
{
    memset(m_Name, 0, 128);
}

void LoadMeterBase::Initialize(const LoadMeterInfo& info, void* pMemory, size_t memorySize) NN_NOEXCEPT
{
    NN_SDK_ASSERT(memorySize >= LoadMeterBase::CalculateBufferSize(info));
    NN_SDK_ASSERT(pMemory != NULL);
    NN_UNUSED(memorySize);

    m_Info = info;
    int bufferCount = m_Info.GetBufferCount();
    int sectionCountMax = m_Info.GetSectionCountMax();

    nn::util::BytePtr memory( pMemory );
    nn::util::BytePtr initMemory = memory;
    NN_SDK_ASSERT(memory.IsAligned(LoadMeterBase::GetBufferAlignment()));

    size_t size = m_SectionArray.CalculateWorkMemorySize(bufferCount);
    m_SectionArray.ResetWorkMemory(memory.Get(), size, bufferCount);
    memory.Advance(size);
    for(int i = 0; i < bufferCount; ++i)
    {
        memory.AlignUp(NN_ALIGNOF(Section));
        size = m_SectionArray[i].CalculateWorkMemorySize(sectionCountMax);
        m_SectionArray[i].ResetWorkMemory(memory.Get(), size, sectionCountMax);
        memory.Advance(size);
    }

    memory.AlignUp(NN_ALIGNOF(int));
    size = m_SectionCount.CalculateWorkMemorySize(bufferCount);
    m_SectionCount.ResetWorkMemory(memory.Get(), size, bufferCount);
    memory.Advance(size);
    for(int i = 0; i < bufferCount; ++i)
    {
        m_SectionCount[i] = 0;
    }

    memory.AlignUp(NN_ALIGNOF(nn::os::Tick));
    size = m_FinalEnd.CalculateWorkMemorySize(bufferCount);
    m_FinalEnd.ResetWorkMemory(memory.Get(), size, bufferCount);
    memory.Advance(size);

    NN_SDK_ASSERT(initMemory.Distance(memory.Get()) <= static_cast<ptrdiff_t>(LoadMeterBase::CalculateBufferSize(info)), "memory shortage");
    NN_UNUSED(initMemory);
}

void LoadMeterBase::Finalize() NN_NOEXCEPT
{
    m_FinalEnd.ResetWorkMemory();
    m_SectionCount.ResetWorkMemory();
    for(int i = 0; i < m_Info.GetBufferCount(); ++i)
    {
        m_SectionArray[i].ResetWorkMemory();
    }
    m_SectionArray.ResetWorkMemory();
}

size_t LoadMeterBase::GetBufferAlignment() NN_NOEXCEPT
{
    return NN_ALIGNOF(Section);
}

size_t LoadMeterBase::CalculateBufferSize(LoadMeterInfo info) NN_NOEXCEPT
{
    int bufferCount = info.GetBufferCount();
    int sectionCountMax = info.GetSectionCountMax();

    size_t memorySize = 0;
    memorySize += nn::util::PlacementArray<nn::util::PlacementArray<Section>>::CalculateWorkMemorySize(bufferCount);
    for(int i = 0; i < bufferCount; ++i)
    {
        memorySize += NN_ALIGNOF(Section);
        memorySize += nn::util::PlacementArray<Section>::CalculateWorkMemorySize(sectionCountMax);
    }
    memorySize += NN_ALIGNOF(int);
    memorySize += nn::util::PlacementArray<int>::CalculateWorkMemorySize(bufferCount);
    memorySize += NN_ALIGNOF(nn::os::Tick);
    memorySize += nn::util::PlacementArray<nn::os::Tick>::CalculateWorkMemorySize(bufferCount);
    return memorySize;
}

nn::TimeSpan LoadMeterBase::GetLastTotalSpan() const NN_NOEXCEPT
{
    nn::os::Tick tick;

    for( int i = 0; i < GetLastSectionCount(); ++i )
    {
        const Section& section = m_SectionArray[ m_ReferenceBufferIndex ][ i ];

        // 親要素のみカウントします。
        if ( section.parent == -1 )
        {
            tick += ( section.end - section.begin );
        }
    }
    return tick.ToTimeSpan();
}

void LoadMeterBase::BeginSection( const char* name, uint32_t tag, const nn::util::Color4u8Type& color, int parent, int depth) NN_NOEXCEPT
{
    NN_SDK_ASSERT( m_Info.GetSectionCountMax() > m_SectionCount[ m_CurrentBufferIndex ] );

    Section& section = m_SectionArray[m_CurrentBufferIndex][ m_SectionCount[ m_CurrentBufferIndex ] ];
    section.begin = nn::os::GetSystemTick();
    section.color = color;
    section.name  = name;
    section.tag  = tag;
    section.parent = parent;
    section.depth = depth;
}

void LoadMeterBase::EndSection(int sectionIndex) NN_NOEXCEPT
{
    nn::os::Tick tick = nn::os::GetSystemTick();
    Section& section = m_SectionArray[m_CurrentBufferIndex][ sectionIndex ];
    if (  tick.GetInt64Value() - section.begin.GetInt64Value() < 0 )
    {
        tick = section.begin;
    }

    section.end = tick;
    m_FinalEnd[ m_CurrentBufferIndex ] = tick;
}

void LoadMeterBase::Next() NN_NOEXCEPT
{
    NN_SDK_ASSERT( this->m_OverCount == 0, "Section is not enough, Please add %d or more sections.", this->m_OverCount );

    //　フレームの結果を格納するバッファを入れ替える
    this->m_CurrentBufferIndex = this->m_ReferenceBufferIndex;
    if(this->m_ReferenceBufferIndex == m_Info.GetBufferCount() - 1)
    {
        this->m_ReferenceBufferIndex = 0;
    }
    else
    {
        this->m_ReferenceBufferIndex++;
    }

    this->m_OverCount = 0;
    this->m_SectionCount[ this->m_CurrentBufferIndex ] = 0;

    for ( LoadMeterList::iterator it = this->m_LoadMeterList.begin(); it != this->m_LoadMeterList.end(); ++it )
    {
        it->Next();
    }
}

void LoadMeterBase::AttachLoadMeter( LoadMeterBase* meter ) NN_NOEXCEPT
{
    m_LoadMeterList.push_back( *meter );
}

void LoadMeterBase::DetachLoadMeter( LoadMeterBase* meter ) NN_NOEXCEPT
{
    m_LoadMeterList.erase( m_LoadMeterList.iterator_to( *meter ) );
}

void LoadMeterBase::Dump() NN_NOEXCEPT
{
    int64_t referenceMicroSeconds = static_cast<int64_t>( 1000000.f / m_FrameRate );
    nn::TimeSpan totalSpan = GetLastTotalSpan();
    float rate = 100.f * totalSpan.GetMicroSeconds() / referenceMicroSeconds;
    NN_PERF_DUMP_LOG("[ %s ] %10.3f ms (%f%%)\n", GetName(), static_cast<float>(totalSpan.GetMicroSeconds() / 1000.f), rate );
    for ( LoadMeterList::iterator it = m_LoadMeterList.begin(); it != m_LoadMeterList.end(); ++it )
    {
        totalSpan = it->GetLastTotalSpan();
        rate = 100.f * totalSpan.GetMicroSeconds() / referenceMicroSeconds;
        NN_PERF_DUMP_LOG("[ %s ] %10.3f ms (%f%%)\n", it->GetName(), static_cast<float>(totalSpan.GetMicroSeconds() / 1000.f), rate );
    }
}

void GpuMeter::Initialize( nn::gfx::Device* pDevice, const LoadMeterInfo& info, void* pMemory, size_t memorySize, nn::gfx::MemoryPool* pMemoryPool, ptrdiff_t memoryPoolOffset, size_t memoryPoolSize) NN_NOEXCEPT
{
    NN_SDK_ASSERT(pMemory != NULL);
    NN_SDK_ASSERT(memorySize >= CpuMeter::CalculateBufferSize(info));
    NN_SDK_ASSERT(memoryPoolSize >= GpuMeter::CalculateQueryBufferSize(pDevice, info));
    NN_UNUSED(memorySize);
    NN_UNUSED(memoryPoolSize);
    size_t initialOffset = memoryPoolOffset;

    m_Info = info;
    m_GetCoreNumberFunction = nn::os::GetCurrentCoreNumber;
    nn::util::BytePtr memory( pMemory );
    nn::util::BytePtr initMemory = memory;
    NN_SDK_ASSERT(memory.IsAligned(GpuMeter::GetBufferAlignment()));

    size_t size = m_QueryBuffer.CalculateWorkMemorySize(m_Info.GetBufferCount());
    m_QueryBuffer.ResetWorkMemory(memory.Get(), size, m_Info.GetBufferCount());
    memory.Advance(size);

    size_t queryBufferSize = sizeof( nn::gfx::TimestampBuffer ) * 2 * m_Info.GetSectionCountMax();
    nn::gfx::Buffer::InfoType bufferInfo;
    bufferInfo.SetDefault();
    bufferInfo.SetSize( queryBufferSize );
    bufferInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_QueryBuffer );
    for(int i = 0; i < m_Info.GetBufferCount(); ++i)
    {
        if( NN_STATIC_CONDITION( nn::gfx::Buffer::IsMemoryPoolRequired ) )
        {
            m_QueryBuffer[i].Initialize( pDevice, bufferInfo, pMemoryPool, memoryPoolOffset, bufferInfo.GetSize() );
            memoryPoolOffset += bufferInfo.GetSize();
            memoryPoolOffset = nn::util::align_up( memoryPoolOffset, nn::gfx::Buffer::GetBufferAlignment( pDevice, bufferInfo ) );
        }
        else
        {
            m_QueryBuffer[i].Initialize( pDevice, bufferInfo, NULL, 0, 0 );
        }
    }

    memory.AlignUp(NN_ALIGNOF(nn::gfx::TimestampBuffer));
    size = m_TimestampBuffer.CalculateWorkMemorySize(2 * m_Info.GetSectionCountMax());
    m_TimestampBuffer.ResetWorkMemory(memory.Get(), size, 2 * m_Info.GetSectionCountMax());
    memory.Advance(size);

    memory.AlignUp(NN_ALIGNOF(nn::util::PlacementArray<int>));
    size = m_SectionCountCore.CalculateWorkMemorySize(m_Info.GetBufferCount());
    m_SectionCountCore.ResetWorkMemory(memory.Get(), size, m_Info.GetBufferCount());
    memory.Advance(size);
    for(int i = 0; i < m_Info.GetBufferCount(); ++i)
    {
        memory.AlignUp(NN_ALIGNOF(int));
        size = m_SectionCountCore[i].CalculateWorkMemorySize(m_Info.GetCoreCount());
        m_SectionCountCore[i].ResetWorkMemory(memory.Get(), size, m_Info.GetCoreCount());
        memory.Advance(size);
        for(int f = 0; f < m_Info.GetCoreCount(); ++f)
        {
            m_SectionCountCore[i][f] = 0;
        }
    }

    memory.AlignUp(NN_ALIGNOF(int));
    size = m_CurrentSectionIndexCore.CalculateWorkMemorySize(m_Info.GetCoreCount());
    m_CurrentSectionIndexCore.ResetWorkMemory(memory.Get(), size, m_Info.GetCoreCount());
    memory.Advance(size);
    for(int i = 0; i < m_Info.GetCoreCount(); ++i)
    {
        m_CurrentSectionIndexCore[i] = -1;
    }

    memory.AlignUp(NN_ALIGNOF(nn::util::Color4u8));
    size = m_CoreColor.CalculateWorkMemorySize(m_Info.GetCoreCount());
    m_CoreColor.ResetWorkMemory(memory.Get(), size, m_Info.GetCoreCount());
    memory.Advance(size);
    for(int i = 0; i < m_Info.GetCoreCount(); ++i)
    {
        m_CoreColor[i] = m_Color;
    }

    memory.AlignUp(NN_ALIGNOF(nn::util::PlacementArray<int>));
    size = m_CoreDepth.CalculateWorkMemorySize(m_Info.GetCoreCount());
    m_CoreDepth.ResetWorkMemory(memory.Get(), size, m_Info.GetCoreCount());
    memory.Advance(size);
    for(int i = 0; i < m_Info.GetCoreCount(); ++i)
    {
        m_CoreDepth[i] = 0;
    }

    nn::os::InitializeMutex( &m_Mutex, false, 0 );

    memory.AlignUp(LoadMeterBase::GetBufferAlignment());
    LoadMeterBase::Initialize(m_Info, memory.Get(), LoadMeterBase::CalculateBufferSize(info));
    memory.Advance(LoadMeterBase::CalculateBufferSize(info));

    m_Device = pDevice;

    NN_SDK_ASSERT(initMemory.Distance(memory.Get()) <= static_cast<ptrdiff_t>(GpuMeter::CalculateBufferSize(info)), "memory shortage");
    NN_SDK_ASSERT(memoryPoolOffset - initialOffset <= GpuMeter::CalculateQueryBufferSize(pDevice, info));
    NN_UNUSED(initMemory);
    NN_UNUSED(initialOffset);
}

void GpuMeter::Finalize( nn::gfx::Device* pDevice) NN_NOEXCEPT
{
    m_Device = NULL;

    LoadMeterBase::Finalize();
    m_CoreDepth.ResetWorkMemory();
    m_CoreColor.ResetWorkMemory();
    m_CurrentSectionIndexCore.ResetWorkMemory();

    for(int i = 0; i < m_Info.GetBufferCount(); ++i)
    {
        m_SectionCountCore[i].ResetWorkMemory();
    }
    m_SectionCountCore.ResetWorkMemory();
    m_TimestampBuffer.ResetWorkMemory();

    for(int i = 0; i < m_Info.GetBufferCount(); ++i)
    {
        m_QueryBuffer[i].Finalize( pDevice );
    }
    m_QueryBuffer.ResetWorkMemory();
}

size_t GpuMeter::GetBufferAlignment() NN_NOEXCEPT
{
    return NN_ALIGNOF(nn::gfx::Buffer);
}

size_t GpuMeter::CalculateBufferSize(LoadMeterInfo info) NN_NOEXCEPT
{
    size_t memorySize = 0;
    memorySize += nn::util::PlacementArray<nn::gfx::Buffer>::CalculateWorkMemorySize(info.GetBufferCount());
    memorySize += NN_ALIGNOF(nn::gfx::TimestampBuffer);
    memorySize +=  nn::util::PlacementArray<nn::gfx::TimestampBuffer>::CalculateWorkMemorySize(4 * info.GetSectionCountMax());
    memorySize += NN_ALIGNOF(nn::util::PlacementArray<int>);
    memorySize += nn::util::PlacementArray<nn::util::PlacementArray<int>>::CalculateWorkMemorySize(info.GetBufferCount());
    for(int i = 0; i < info.GetBufferCount(); ++i)
    {
        memorySize += NN_ALIGNOF(int);
        memorySize += nn::util::PlacementArray<int>::CalculateWorkMemorySize(info.GetCoreCount());
    }
    memorySize += NN_ALIGNOF(int);
    memorySize += nn::util::PlacementArray<int>::CalculateWorkMemorySize(info.GetCoreCount());
    memorySize += NN_ALIGNOF(nn::util::Color4u8);
    memorySize += nn::util::PlacementArray<nn::util::Color4u8>::CalculateWorkMemorySize(info.GetCoreCount());
    memorySize += NN_ALIGNOF(int);
    memorySize += nn::util::PlacementArray<int>::CalculateWorkMemorySize(info.GetCoreCount());
    memorySize += LoadMeterBase::GetBufferAlignment();
    memorySize += LoadMeterBase::CalculateBufferSize(info);

    return memorySize;
}

size_t GpuMeter::GetQueryBufferAlignment( nn::gfx::Device* pDevice, LoadMeterInfo info) NN_NOEXCEPT
{
    size_t g_QueryBufferSize = sizeof( nn::gfx::TimestampBuffer ) * 2 * info.GetSectionCountMax();
    nn::gfx::Buffer::InfoType bufferInfo;
    bufferInfo.SetDefault();
    bufferInfo.SetSize( g_QueryBufferSize );
    bufferInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_QueryBuffer );

    return nn::gfx::Buffer::GetBufferAlignment( pDevice, bufferInfo );
}

size_t GpuMeter::CalculateQueryBufferSize( nn::gfx::Device* pDevice, LoadMeterInfo info) NN_NOEXCEPT
{
    size_t g_QueryBufferSize = sizeof( nn::gfx::TimestampBuffer ) * 2 * info.GetSectionCountMax();
    nn::gfx::Buffer::InfoType bufferInfo;
    bufferInfo.SetDefault();
    bufferInfo.SetSize( g_QueryBufferSize );
    bufferInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_QueryBuffer );

    size_t bufferSize = bufferInfo.GetSize();
    size_t alignment = nn::gfx::Buffer::GetBufferAlignment( pDevice, bufferInfo );
    bufferSize += alignment;

    return bufferSize * info.GetBufferCount();
}

void GpuMeter::BeginMeasure( nn::gfx::CommandBuffer* pCommandBuffer, const char* name, uint32_t tag ) NN_NOEXCEPT
{
    int coreNumber = m_GetCoreNumberFunction();
    NN_SDK_ASSERT( coreNumber < m_Info.GetCoreCount() );
    const nn::util::Color4u8& color = m_CoreColor[coreNumber];

    nn::gfx::GpuAddress GpuAddress;
    m_QueryBuffer[m_CurrentBufferIndex].GetGpuAddress( &GpuAddress );

    nn::os::LockMutex( &m_Mutex );
    {
        ptrdiff_t bufferOffset = m_SectionCount[ m_CurrentBufferIndex ] * sizeof( nn::gfx::TimestampBuffer ) * 2;
        GpuAddress.Offset( bufferOffset );
        pCommandBuffer->WriteTimestamp( GpuAddress );

        LoadMeterBase::BeginSection( name, tag, color, m_CurrentSectionIndexCore[coreNumber], m_CoreDepth[coreNumber] );
        m_CurrentSectionIndexCore[coreNumber] = m_SectionCount[ m_CurrentBufferIndex ];
        ++m_SectionCountCore[ m_CurrentBufferIndex ][coreNumber];
        ++m_SectionCount[ m_CurrentBufferIndex ];
        ++m_CoreDepth[coreNumber];
    }
    nn::os::UnlockMutex( &m_Mutex );
}

void GpuMeter::BeginMeasure( nn::gfx::CommandBuffer* pCommandBuffer, const char* name, uint32_t tag, const nn::util::Color4u8Type& color ) NN_NOEXCEPT
{
    int coreNumber = m_GetCoreNumberFunction();
    NN_SDK_ASSERT( coreNumber < m_Info.GetCoreCount() );

    nn::gfx::GpuAddress GpuAddress;
    m_QueryBuffer[m_CurrentBufferIndex].GetGpuAddress( &GpuAddress );

    nn::os::LockMutex( &m_Mutex );
    {
        ptrdiff_t bufferOffset = m_SectionCount[ m_CurrentBufferIndex ] * sizeof( nn::gfx::TimestampBuffer ) * 2;
        GpuAddress.Offset( bufferOffset );
        pCommandBuffer->WriteTimestamp( GpuAddress );

        LoadMeterBase::BeginSection( name, tag, color, m_CurrentSectionIndexCore[coreNumber], m_CoreDepth[coreNumber] );
        m_CurrentSectionIndexCore[coreNumber] = m_SectionCount[ m_CurrentBufferIndex ];
        ++m_SectionCountCore[ m_CurrentBufferIndex ][coreNumber];
        ++m_SectionCount[ m_CurrentBufferIndex ];
        ++m_CoreDepth[coreNumber];
    }
    nn::os::UnlockMutex( &m_Mutex );
}

void GpuMeter::EndMeasure( nn::gfx::CommandBuffer* commandBuffer ) NN_NOEXCEPT
{
    int coreNumber = m_GetCoreNumberFunction();
    NN_SDK_ASSERT( coreNumber < m_Info.GetCoreCount() );

    nn::os::LockMutex( &m_Mutex );
    {
        nn::gfx::GpuAddress GpuAddress;
        m_QueryBuffer[m_CurrentBufferIndex].GetGpuAddress( &GpuAddress );

        ptrdiff_t bufferOffset = m_CurrentSectionIndexCore[coreNumber] * sizeof( nn::gfx::TimestampBuffer ) * 2 + sizeof( nn::gfx::TimestampBuffer );
        GpuAddress.Offset( bufferOffset );
        commandBuffer->WriteTimestamp( GpuAddress );

        int sectionIndex = m_CurrentSectionIndexCore[coreNumber];
        Section& section = m_SectionArray[m_CurrentBufferIndex][ sectionIndex ];
        LoadMeterBase::EndSection( sectionIndex  );
        m_CurrentSectionIndexCore[coreNumber] = section.parent;
        --m_CoreDepth[coreNumber];
    }
    nn::os::UnlockMutex( &m_Mutex );
}

void GpuMeter::Next() NN_NOEXCEPT
{
    for(int i = 0; i < m_Info.GetCoreCount(); ++i)
    {
        this->m_CurrentSectionIndexCore[i] = -1;
        this->m_SectionCountCore[ this->m_CurrentBufferIndex ][i] = 0;
        this->m_CoreDepth[i] = 0;
    }
    LoadMeterBase::Next();

    // GPU バッファに書き込まれたタイムスタンプをコピー
    size_t currentBufferSize =  m_Info.GetSectionCountMax() * sizeof( nn::gfx::TimestampBuffer ) * 2;
    void* pMapped = m_QueryBuffer[this->m_ReferenceBufferIndex].Map();
    m_QueryBuffer[this->m_ReferenceBufferIndex].InvalidateMappedRange( 0, currentBufferSize );
    memcpy( m_TimestampBuffer.data(), pMapped, currentBufferSize );
    m_QueryBuffer[this->m_ReferenceBufferIndex].Unmap();

    // 読み込んだタイムスタンプをシステムチックと比較可能な単位に変換
    // NX 実機版
#if defined NN_BUILD_CONFIG_OS_SUPPORTS_HORIZON
    nn::os::Tick finalEnd( 0 );
    uint64_t finalTimeStamp = 0;
    for(int i = 0; i < this->m_SectionCount[ this->m_ReferenceBufferIndex ]; ++i)
    {
        Section& destSection = this->m_SectionArray[ this->m_ReferenceBufferIndex ][i];

        NVNcounterData beginNvnCounter;
        NVNcounterData endNvnCounter;
        beginNvnCounter.timestamp = m_TimestampBuffer[2 * i].GetValue();
        endNvnCounter.timestamp = m_TimestampBuffer[2 * i + 1].GetValue();
        beginNvnCounter.counter = 0;
        endNvnCounter.counter = 0;
        int64_t beginTimeStamp = nvnDeviceGetTimestampInNanoseconds(m_Device->ToData()->pNvnDevice, &beginNvnCounter);
        int64_t endTimeStamp = nvnDeviceGetTimestampInNanoseconds(m_Device->ToData()->pNvnDevice, &endNvnCounter);
        destSection.begin = nn::os::ConvertToTick( nn::TimeSpan::FromNanoSeconds( beginTimeStamp ) );
        destSection.end = nn::os::ConvertToTick( nn::TimeSpan::FromNanoSeconds( endTimeStamp ) );

        if (finalTimeStamp < endNvnCounter.timestamp)
        {
            finalTimeStamp = endNvnCounter.timestamp;
            finalEnd = destSection.end;
        }
        if (  destSection.end.GetInt64Value() - destSection.begin.GetInt64Value() < 0 )
        {
            destSection.end = destSection.begin;
        }
    }
    this->m_FinalEnd[ this->m_ReferenceBufferIndex ] = finalEnd;

#elif defined NN_BUILD_CONFIG_OS_SUPPORTS_WIN32
    nn::os::Tick initialCpuTick;
    int64_t initialGpuTimeStamp;

    // Generic 版
#if defined( NN_GFX_CONFIG_INCLUDE_GL )
    HDC hDc = static_cast< HDC >( m_Device->ToData()->hDc );
    HGLRC hRc = static_cast< HGLRC >( m_Device->ToData()->renderingContext.hGlRc);

    NN_SDK_ASSERT_NOT_NULL( hDc );
    NN_SDK_ASSERT_NOT_NULL( &m_Device->ToData()->renderingContext );
    NN_SDK_ASSERT_NOT_NULL( m_Device->ToData()->renderingContext.hGlRc );

    HDC currentDc = ::wglGetCurrentDC();
    HGLRC currentGlRc = ::wglGetCurrentContext();

    BOOL result = ::wglMakeCurrent( hDc, hRc );
    NN_SDK_ASSERT( result == TRUE, "GetLastError: 0x%08X\n", GetLastError() );

    GLint64 glTimeStamp;
    glGetInteger64v( GL_TIMESTAMP, &glTimeStamp );
    initialCpuTick = nn::os::GetSystemTick();
    initialGpuTimeStamp = glTimeStamp;

    result = ::wglMakeCurrent( currentDc, currentGlRc );
    NN_SDK_ASSERT( result == TRUE, "GetLastError: 0x%08X\n", GetLastError() );
    NN_UNUSED( result );

    // NXOnWin 版
#elif defined NN_GFX_CONFIG_INCLUDE_NVN
    initialCpuTick = nn::os::GetSystemTick();
    initialGpuTimeStamp = nvnDeviceGetCurrentTimestampInNanoseconds(m_Device->ToData()->pNvnDevice);

#endif
    nn::os::Tick finalEnd( 0 );
    int64_t finalTimeStamp = 0;
    for(int i = 0; i < this->m_SectionCount[ this->m_ReferenceBufferIndex ]; ++i)
    {
        Section& destSection = this->m_SectionArray[ this->m_ReferenceBufferIndex ][i];

        int64_t beginTimeStamp = m_TimestampBuffer[2 * i].GetValue();
        int64_t endTimeStamp = m_TimestampBuffer[2 * i + 1].GetValue();

        destSection.begin = initialCpuTick + nn::os::ConvertToTick( nn::TimeSpan::FromNanoSeconds( beginTimeStamp - initialGpuTimeStamp) );
        destSection.end = destSection.begin + nn::os::ConvertToTick( nn::TimeSpan::FromNanoSeconds( endTimeStamp - beginTimeStamp ) );

        if (finalTimeStamp < endTimeStamp)
        {
            finalTimeStamp = endTimeStamp;
            finalEnd = destSection.end;
        }

        if ( destSection.end.GetInt64Value() - destSection.begin.GetInt64Value() < 0 )
        {
            destSection.end = destSection.begin;
        }
    }
    this->m_FinalEnd[ this->m_ReferenceBufferIndex ] = finalEnd;
#endif

    // コマンドの実行順に並び替え
    if(this->m_SectionCount[ this->m_ReferenceBufferIndex ] > 0)
    {
        std::sort(
            this->m_SectionArray[ this->m_ReferenceBufferIndex ].data(),
            &this->m_SectionArray[ this->m_ReferenceBufferIndex ][this->m_SectionCount[ this->m_ReferenceBufferIndex ] - 1],
            CompareSectionBeginTick
            );
    }
}

} // namespace perf
} // namespace nns
