﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <nn/perf/perf_Profile.h>
#include <nn/util/util_FormatString.h>
#include <nn/nn_SdkLog.h>

// NN_SDK_LOG 系のマクロが Release ビルドだと使えなくなってしまう。
// 暫定対応として NN_SDK_LOG をコピーして内部に抱えておく。
#define NN_PERF_DUMP_LOG(...)                                    \
    do                                                                  \
    {                                                                   \
        ::nn::diag::LogMetaData logMetaData;                            \
        logMetaData.sourceInfo.lineNumber = __LINE__;                   \
        logMetaData.sourceInfo.fileName = __FILE__;                     \
        logMetaData.sourceInfo.functionName = NN_CURRENT_FUNCTION_NAME; \
        logMetaData.moduleName = "$";                                   \
        logMetaData.severity = ::nn::diag::LogSeverity_Info;            \
        logMetaData.verbosity = 0;                                      \
        logMetaData.useDefaultLocaleCharset = false;                    \
        logMetaData.pAdditionalData = static_cast<void*>(0);            \
        logMetaData.additionalDataBytes = 0;                            \
        ::nn::diag::detail::LogImpl(logMetaData, __VA_ARGS__);          \
    } while (NN_STATIC_CONDITION(0))

namespace nn
{
namespace perf
{
// LoadMeterCenter のインスタンスです。
LoadMeterCenter* LoadMeterCenter::m_Instance = NULL;

void LoadMeterCenter::Initialize( nn::gfx::Device* pDevice, nn::perf::LoadMeterCenterInfo& info, void* pMemory, size_t memorySize, nn::gfx::MemoryPool* pMemoryPool, ptrdiff_t memoryPoolOffset, size_t memoryPoolSize ) NN_NOEXCEPT
{
    NN_SDK_ASSERT(m_Instance == NULL);
    NN_SDK_ASSERT(pMemory != NULL);
    NN_SDK_ASSERT( memorySize >= LoadMeterCenter::CalculateBufferSize(info) );
    NN_SDK_ASSERT( memoryPoolSize >= LoadMeterCenter::CalculateMemoryPoolSize(pDevice, info) );
    NN_UNUSED(memorySize);
    NN_UNUSED(memoryPoolSize);

    nn::util::BytePtr memory( pMemory );
    nn::util::BytePtr initMemory = memory;
    NN_SDK_ASSERT(memory.IsAligned(LoadMeterCenter::GetBufferAlignment()));

    m_Instance = new(memory.Get()) LoadMeterCenter();
    memory.Advance(sizeof(LoadMeterCenter));

    LoadMeterCenter::GetInstance().m_GetCoreNumberFunction = nn::os::GetCurrentCoreNumber;

    LoadMeterCenter::GetInstance().m_Info = info;
    NN_SDK_ASSERT(LoadMeterCenter::GetInstance().m_Info.GetCpuSectionCountMax() > 0);
    NN_SDK_ASSERT(LoadMeterCenter::GetInstance().m_Info.GetGpuSectionCountMax() > 0);

    LoadMeterCenter::GetInstance().m_ProfiledCpuSectionCount = 0;
    LoadMeterCenter::GetInstance().m_ProfiledGpuSectionCount = 0;
    LoadMeterCenter::GetInstance().m_FrameRate = 60.f;
    LoadMeterCenter::GetInstance().m_IsEnabled = true;

    memory.AlignUp(LoadMeterCenter::GetFrameMeterBufferAlignment());
    LoadMeterCenter::GetInstance().InitializeFrameMeter(memory.Get());
    memory.Advance(LoadMeterCenter::GetInstance().CalculateFrameMeterBufferSize(info));

    memory.AlignUp(LoadMeterCenter::GetCpuMeterBufferAlignment());
    LoadMeterCenter::GetInstance().InitializeCpuMeter(memory.Get());
    memory.Advance(LoadMeterCenter::GetInstance().CalculateCpuMeterBufferSize(info));

    memory.AlignUp(LoadMeterCenter::GetGpuMeterBufferAlignment());
    LoadMeterCenter::GetInstance().InitializeGpuMeter( memory.Get(), pDevice, pMemoryPool, memoryPoolOffset);
    memory.Advance(LoadMeterCenter::GetInstance().CalculateGpuMeterBufferSize(info));

    NN_SDK_ASSERT(initMemory.Distance(memory.Get()) <= static_cast<ptrdiff_t>(LoadMeterCenter::CalculateBufferSize(info)), "memory shortage");
    NN_UNUSED(initMemory);
}

void LoadMeterCenter::Initialize( nn::perf::LoadMeterCenterInfo& info, void* pMemory, size_t memorySize ) NN_NOEXCEPT
{
    NN_SDK_ASSERT(m_Instance == NULL);
    NN_SDK_ASSERT(pMemory != NULL);
    NN_SDK_ASSERT( memorySize >= LoadMeterCenter::CalculateBufferSize(info) );
    NN_UNUSED(memorySize);

    nn::util::BytePtr memory( pMemory );
    nn::util::BytePtr initMemory = memory;
    NN_SDK_ASSERT(memory.IsAligned(LoadMeterCenter::GetBufferAlignment()));

    m_Instance = new(memory.Get()) LoadMeterCenter();
    memory.Advance(sizeof(LoadMeterCenter));

    LoadMeterCenter::GetInstance().m_GetCoreNumberFunction = nn::os::GetCurrentCoreNumber;

    LoadMeterCenter::GetInstance().m_Info = info;
    NN_SDK_ASSERT(LoadMeterCenter::GetInstance().m_Info.GetCoreCount() > 0 || LoadMeterCenter::GetInstance().m_Info.GetUserMeterCount() > 0);
    NN_SDK_ASSERT(LoadMeterCenter::GetInstance().m_Info.GetCpuSectionCountMax() > 0);

    LoadMeterCenter::GetInstance().m_Info.SetGpuSectionCountMax(0);
    LoadMeterCenter::GetInstance().m_ProfiledCpuSectionCount = 0;
    LoadMeterCenter::GetInstance().m_ProfiledGpuSectionCount = 0;
    LoadMeterCenter::GetInstance().m_FrameRate = 60.f;
    LoadMeterCenter::GetInstance().m_IsEnabled = true;

    memory.AlignUp(LoadMeterCenter::GetFrameMeterBufferAlignment());
    LoadMeterCenter::GetInstance().InitializeFrameMeter(memory.Get());
    memory.Advance(LoadMeterCenter::GetInstance().CalculateFrameMeterBufferSize(info));

    memory.AlignUp(LoadMeterCenter::GetCpuMeterBufferAlignment());
    LoadMeterCenter::GetInstance().InitializeCpuMeter(memory.Get());
    memory.Advance(LoadMeterCenter::GetInstance().CalculateCpuMeterBufferSize(info));

    NN_SDK_ASSERT(initMemory.Distance(memory.Get()) <= static_cast<ptrdiff_t>(LoadMeterCenter::CalculateBufferSize(info)), "memory shortage");
    NN_UNUSED(initMemory);
}

void LoadMeterCenter::Finalize() NN_NOEXCEPT
{
    LoadMeterCenter::GetInstance().m_ProfileCpuSectionArray.ResetWorkMemory();
    if(LoadMeterCenter::GetInstance().m_Info.GetUserMeterCount() > 0)
    {
        for(int i = 0; i < LoadMeterCenter::GetInstance().m_Info.GetUserMeterCount(); ++i)
        {
            LoadMeterCenter::GetInstance().m_FrameMeter.DetachLoadMeter( &LoadMeterCenter::GetInstance().m_CpuUserMeterArray[i] );
            LoadMeterCenter::GetInstance().m_CpuUserMeterArray[i].Finalize();
        }
        LoadMeterCenter::GetInstance().m_CpuUserMeterArray.ResetWorkMemory();
    }
    if(LoadMeterCenter::GetInstance().m_Info.GetCoreCount() > 0)
    {
        for(int i = 0; i < LoadMeterCenter::GetInstance().m_Info.GetCoreCount(); ++i)
        {
            LoadMeterCenter::GetInstance().m_FrameMeter.DetachLoadMeter( &LoadMeterCenter::GetInstance().m_CpuCoreMeterArray[i] );
            LoadMeterCenter::GetInstance().m_CpuCoreMeterArray[i].Finalize();
        }
        LoadMeterCenter::GetInstance().m_CpuCoreMeterArray.ResetWorkMemory();
    }

    if (m_Instance != NULL)
    {
        m_Instance = NULL;
    }
}

void LoadMeterCenter::Finalize(nn::gfx::Device* pDevice) NN_NOEXCEPT
{
    LoadMeterCenter::GetInstance().m_ProfileGpuSectionArray.ResetWorkMemory();
    LoadMeterCenter::GetInstance().m_FrameMeter.DetachLoadMeter( &LoadMeterCenter::GetInstance().m_GpuMeter );
    LoadMeterCenter::GetInstance().m_GpuMeter.Finalize( pDevice );

    LoadMeterCenter::GetInstance().m_ProfileCpuSectionArray.ResetWorkMemory();
    if(LoadMeterCenter::GetInstance().m_Info.GetUserMeterCount() > 0)
    {
        for(int i = 0; i < LoadMeterCenter::GetInstance().m_Info.GetUserMeterCount(); ++i)
        {
            LoadMeterCenter::GetInstance().m_FrameMeter.DetachLoadMeter( &LoadMeterCenter::GetInstance().m_CpuUserMeterArray[i] );
            LoadMeterCenter::GetInstance().m_CpuUserMeterArray[i].Finalize();
        }
        LoadMeterCenter::GetInstance().m_CpuUserMeterArray.ResetWorkMemory();
    }
    if(LoadMeterCenter::GetInstance().m_Info.GetCoreCount() > 0)
    {
        for(int i = 0; i < LoadMeterCenter::GetInstance().m_Info.GetCoreCount(); ++i)
        {
            LoadMeterCenter::GetInstance().m_FrameMeter.DetachLoadMeter( &LoadMeterCenter::GetInstance().m_CpuCoreMeterArray[i] );
            LoadMeterCenter::GetInstance().m_CpuCoreMeterArray[i].Finalize();
        }
        LoadMeterCenter::GetInstance().m_CpuCoreMeterArray.ResetWorkMemory();
    }

    if (m_Instance != NULL)
    {
        m_Instance = NULL;
    }
}

size_t LoadMeterCenter::GetBufferAlignment() NN_NOEXCEPT
{
    return NN_ALIGNOF(LoadMeterCenter);
}

size_t LoadMeterCenter::CalculateBufferSize(LoadMeterCenterInfo info) NN_NOEXCEPT
{
    size_t size = 0;
    size += sizeof(LoadMeterCenter);
    size += LoadMeterCenter::GetFrameMeterBufferAlignment();
    size += LoadMeterCenter::CalculateFrameMeterBufferSize(info);
    size += LoadMeterCenter::GetCpuMeterBufferAlignment();
    size += LoadMeterCenter::CalculateCpuMeterBufferSize(info);
    if(info.GetGpuSectionCountMax() > 0)
    {
        size += LoadMeterCenter::GetGpuMeterBufferAlignment();
        size += LoadMeterCenter::CalculateGpuMeterBufferSize(info);
    }
    return size;
}

size_t LoadMeterCenter::GetMemoryPoolAlignment(nn::gfx::Device* pDevice, LoadMeterCenterInfo info) NN_NOEXCEPT
{
    LoadMeterInfo loadMeterInfo;
    loadMeterInfo.SetSectionCountMax( info.GetGpuSectionCountMax() );
    loadMeterInfo.SetBufferCount( info.GetGpuBufferCount() );

    return GpuMeter::GetQueryBufferAlignment(pDevice, loadMeterInfo);
}

size_t LoadMeterCenter::CalculateMemoryPoolSize(nn::gfx::Device* pDevice, LoadMeterCenterInfo info) NN_NOEXCEPT
{
    LoadMeterInfo loadMeterInfo;
    loadMeterInfo.SetSectionCountMax( info.GetGpuSectionCountMax() );
    loadMeterInfo.SetBufferCount( info.GetGpuBufferCount() );

    size_t bufferSize = GpuMeter::CalculateQueryBufferSize(pDevice, loadMeterInfo);
    size_t alignment = GpuMeter::GetQueryBufferAlignment(pDevice, loadMeterInfo);
    bufferSize += alignment;
    return bufferSize;
}

void LoadMeterCenter::Dump() NN_NOEXCEPT
{
    NN_PERF_DUMP_LOG("\nTime: [ms]\n");
    const int MAX_LENGTH = 256;
    char caption[MAX_LENGTH];
    caption[0] = '\0';
    char form[MAX_LENGTH];
    form[0] = '\0';
    const char* str = "|     total|   call| average|     max|     min|";
    std::strncat(caption, str, strlen(str) + 1);

    str = "  tag| name |\n";
    std::strncat(caption, str, strlen(str) + 1);

    NN_PERF_DUMP_LOG(caption);
    NN_PERF_DUMP_LOG(form);


    int64_t referenceMicroSeconds = static_cast<int64_t>( 1000000.f / LoadMeterCenter::GetInstance().m_FrameRate );
    nn::TimeSpan totalSpan = LoadMeterCenter::GetInstance().m_FrameMeter.GetLastTotalSpan();
    float rate = 100.f * totalSpan.GetMicroSeconds() / referenceMicroSeconds;
    NN_PERF_DUMP_LOG("[ %s ] %9.3f ms (%f%%)\n", LoadMeterCenter::GetInstance().m_FrameMeter.GetName(), static_cast<float>(totalSpan.GetMicroSeconds() / 1000.f), rate );

    //CPU の集計結果出力
    int cpuMeterCount = LoadMeterCenter::GetInstance().m_Info.GetCoreCount() + LoadMeterCenter::GetInstance().m_Info.GetUserMeterCount();
    for(int meterIndex = 0; meterIndex < cpuMeterCount; ++meterIndex)
    {
        CpuMeter* loadMeter = NULL;
        if(meterIndex < LoadMeterCenter::GetInstance().m_Info.GetCoreCount())
        {
            loadMeter = &LoadMeterCenter::GetInstance().m_CpuCoreMeterArray[meterIndex];
        }
        else
        {
            loadMeter = &LoadMeterCenter::GetInstance().m_CpuUserMeterArray[meterIndex - LoadMeterCenter::GetInstance().m_Info.GetCoreCount()];
        }
        totalSpan = loadMeter->GetLastTotalSpan();
        rate = 100.f * totalSpan.GetMicroSeconds() / referenceMicroSeconds;
        NN_PERF_DUMP_LOG("[ %s ] %10.3f ms (%f%%)\n", loadMeter->GetName(), static_cast<float>(totalSpan.GetMicroSeconds() / 1000.f), rate );

        for( int i = 0; i < LoadMeterCenter::GetInstance().m_ProfiledCpuSectionCount; ++i )
        {
            ProfileSection* profileSection = &LoadMeterCenter::GetInstance().m_ProfileCpuSectionArray[i];

            if( !nn::util::Strncmp( profileSection->meterName, loadMeter->GetName(), 128) )
            {
                LoadMeterCenter::GetInstance().DumpDetails( profileSection );
            }
        }
    }

    //GPU の集計結果出力
    if(LoadMeterCenter::GetInstance().m_Info.GetGpuSectionCountMax() > 0)
    {
        LoadMeterBase* loadMeter = &LoadMeterCenter::GetInstance().m_GpuMeter;
        totalSpan = loadMeter->GetLastTotalSpan();
        rate = 100.f * totalSpan.GetMicroSeconds() / referenceMicroSeconds;
        NN_PERF_DUMP_LOG("[ %s ] %10.3f ms (%f%%)\n", loadMeter->GetName(), static_cast<float>(totalSpan.GetMicroSeconds() / 1000.f), rate );

        for( int i = 0; i < LoadMeterCenter::GetInstance().m_ProfiledGpuSectionCount; ++i )
        {
            ProfileSection* profileSection = &LoadMeterCenter::GetInstance().m_ProfileGpuSectionArray[i];
            if( !nn::util::Strncmp( profileSection->meterName, loadMeter->GetName(), 128) )
            {
                LoadMeterCenter::GetInstance().DumpDetails( profileSection );
            }
        }
    }
}

void LoadMeterCenter::InitializeFrameMeter(void* pMemory) NN_NOEXCEPT
{
    nn::util::BytePtr memory( pMemory );
    nn::util::BytePtr initMemory = memory;
    NN_SDK_ASSERT(memory.IsAligned(LoadMeterCenter::GetCpuMeterBufferAlignment()));

    LoadMeterInfo info;
    info.SetSectionCountMax(1);
    info.SetBufferCount(m_Info.GetCpuBufferCount());

    m_FrameMeter.Initialize(info, memory.Get(), CpuMeter::CalculateBufferSize(info));
    m_FrameMeter.SetName("Frame");
    m_FrameMeter.SetColor(nn::util::Color4u8(16,16,16,150));
    m_FrameMeter.SetFrameRate(m_FrameRate);
    memory.Advance(CpuMeter::CalculateBufferSize(info));

    NN_SDK_ASSERT(initMemory.Distance(memory.Get()) <= static_cast<ptrdiff_t>(LoadMeterCenter::CalculateFrameMeterBufferSize(m_Info)), "memory shortage");
    NN_UNUSED(initMemory);
}

void LoadMeterCenter::InitializeCpuMeter(void* pMemory) NN_NOEXCEPT
{
    int coreMeterCount = m_Info.GetCoreCount();
    int userMeterCount = m_Info.GetUserMeterCount();
    int meterCount = coreMeterCount + userMeterCount;
    int sectionCountMax = m_Info.GetCpuSectionCountMax();

    nn::util::BytePtr memory( pMemory );
    nn::util::BytePtr initMemory = memory;
    NN_SDK_ASSERT(memory.IsAligned(LoadMeterCenter::GetCpuMeterBufferAlignment()));

    size_t memorySize;
    if(coreMeterCount > 0)
    {
        memory.AlignUp(CpuMeter::GetBufferAlignment());
        memorySize = m_CpuCoreMeterArray.CalculateWorkMemorySize(coreMeterCount);
        m_CpuCoreMeterArray.ResetWorkMemory(memory.Get(), memorySize, coreMeterCount);
        memory.Advance(memorySize);
        for(int i = 0; i < coreMeterCount; ++i)
        {
            LoadMeterInfo info;
            info.SetSectionCountMax( sectionCountMax );
            info.SetBufferCount(m_Info.GetCpuBufferCount());

            memory.AlignUp(CpuMeter::GetBufferAlignment());
            memorySize = CpuMeter::CalculateBufferSize(info);
            m_CpuCoreMeterArray[i].Initialize( info, memory.Get(), memorySize );
            memory.Advance(memorySize);

            m_CpuCoreMeterArray[i].SetFrameRate(m_FrameRate);
            m_FrameMeter.AttachLoadMeter( &m_CpuCoreMeterArray[i] );

            char name[128];
            nn::util::SNPrintf(name, 5, "CPU%d", i);
            m_CpuCoreMeterArray[i].SetName( name );
        }
    }

    if(userMeterCount > 0)
    {
        memory.AlignUp(CpuMeter::GetBufferAlignment());
        memorySize = m_CpuUserMeterArray.CalculateWorkMemorySize(userMeterCount);
        m_CpuUserMeterArray.ResetWorkMemory(memory.Get(), memorySize, userMeterCount);
        memory.Advance(memorySize);
        for(int i = 0; i < userMeterCount; ++i)
        {
            LoadMeterInfo info;
            info.SetSectionCountMax( sectionCountMax );
            info.SetBufferCount(m_Info.GetCpuBufferCount());

            memory.AlignUp(CpuMeter::GetBufferAlignment());
            memorySize = CpuMeter::CalculateBufferSize(info);
            m_CpuUserMeterArray[i].Initialize( info, memory.Get(), memorySize );
            memory.Advance(memorySize);

            m_CpuUserMeterArray[i].SetFrameRate(m_FrameRate);
            m_FrameMeter.AttachLoadMeter( &m_CpuUserMeterArray[i] );

            char name[128];
            nn::util::SNPrintf(name, 6, "USER%d", i);
            m_CpuUserMeterArray[i].SetName( name );
        }
    }

    //集計結果を格納するメモリを確保
    memory.AlignUp(NN_ALIGNOF(ProfileSection));
    memorySize = m_ProfileCpuSectionArray.CalculateWorkMemorySize(sectionCountMax * meterCount);
    m_ProfileCpuSectionArray.ResetWorkMemory(memory.Get(), memorySize, sectionCountMax * meterCount);
    memory.Advance(memorySize);
    NN_SDK_ASSERT(initMemory.Distance(memory.Get()) <= static_cast<ptrdiff_t>(LoadMeterCenter::CalculateCpuMeterBufferSize(m_Info)), "memory shortage");
    NN_UNUSED(initMemory);
}

void LoadMeterCenter::InitializeGpuMeter( void* pMemory, nn::gfx::Device* pDevice, nn::gfx::MemoryPool* pMemoryPool, ptrdiff_t memoryPoolOffset ) NN_NOEXCEPT
{
    int sectionCountMax = m_Info.GetGpuSectionCountMax();

    nn::util::BytePtr memory( pMemory );
    nn::util::BytePtr initMemory = memory;
    NN_SDK_ASSERT(memory.IsAligned(LoadMeterCenter::GetGpuMeterBufferAlignment()));

    LoadMeterInfo info;
    info.SetSectionCountMax( sectionCountMax );
    info.SetBufferCount(m_Info.GetGpuBufferCount());
    info.SetCoreCount(m_Info.GetCoreCount());
    m_GpuMeter.Initialize( pDevice, info, memory.Get(), GpuMeter::CalculateBufferSize(info), pMemoryPool, memoryPoolOffset, GpuMeter::CalculateQueryBufferSize(pDevice, info));
    memory.Advance(GpuMeter::CalculateBufferSize(info));
    m_FrameMeter.AttachLoadMeter( &m_GpuMeter );
    m_GpuMeter.SetName("GPU");
    m_GpuMeter.SetFrameRate(m_FrameRate);

    //集計結果を格納するメモリを確保
    memory.AlignUp(NN_ALIGNOF(ProfileSection));
    size_t memorySize = m_ProfileGpuSectionArray.CalculateWorkMemorySize(sectionCountMax);
    m_ProfileGpuSectionArray.ResetWorkMemory(memory.Get(), memorySize, sectionCountMax);
    memory.Advance(memorySize);

    NN_SDK_ASSERT(initMemory.Distance(memory.Get()) <= static_cast<ptrdiff_t>(LoadMeterCenter::CalculateGpuMeterBufferSize(m_Info)), "memory shortage");
    NN_UNUSED(initMemory);
}

size_t LoadMeterCenter::GetFrameMeterBufferAlignment() NN_NOEXCEPT
{
    return CpuMeter::GetBufferAlignment();
}

size_t LoadMeterCenter::CalculateFrameMeterBufferSize(LoadMeterCenterInfo info) NN_NOEXCEPT
{
    LoadMeterInfo meterInfo;
    meterInfo.SetSectionCountMax(1);
    meterInfo.SetBufferCount(info.GetCpuBufferCount());
    return CpuMeter::CalculateBufferSize(meterInfo);
}

size_t LoadMeterCenter::GetCpuMeterBufferAlignment() NN_NOEXCEPT
{
    return CpuMeter::GetBufferAlignment();
}

size_t LoadMeterCenter::CalculateCpuMeterBufferSize(LoadMeterCenterInfo info) NN_NOEXCEPT
{
    int coreMeterCount = info.GetCoreCount();
    int userMeterCount = info.GetUserMeterCount();
    int meterCount = coreMeterCount + userMeterCount;
    int sectionCountMax = info.GetCpuSectionCountMax();

    size_t size = 0;
    if(coreMeterCount > 0)
    {
        size += CpuMeter::GetBufferAlignment();
        size += nn::util::PlacementArray<CpuMeter>::CalculateWorkMemorySize(coreMeterCount);
        for(int i = 0; i < coreMeterCount; ++i)
        {
            LoadMeterInfo meterInfo;
            meterInfo.SetSectionCountMax( sectionCountMax );
            meterInfo.SetBufferCount(info.GetCpuBufferCount());
            size += CpuMeter::GetBufferAlignment();
            size += CpuMeter::CalculateBufferSize(meterInfo);
        }
    }

    if(userMeterCount > 0)
    {
        size += CpuMeter::GetBufferAlignment();
        size += nn::util::PlacementArray<CpuMeter>::CalculateWorkMemorySize(userMeterCount);
        for(int i = 0; i < userMeterCount; ++i)
        {
            LoadMeterInfo meterInfo;
            meterInfo.SetSectionCountMax( sectionCountMax );
            meterInfo.SetBufferCount(info.GetCpuBufferCount());
            size += CpuMeter::GetBufferAlignment();
            size += CpuMeter::CalculateBufferSize(meterInfo);
        }
    }

    size += NN_ALIGNOF(ProfileSection);
    size += nn::util::PlacementArray<ProfileSection>::CalculateWorkMemorySize(sectionCountMax * meterCount);
    return size;
}

size_t LoadMeterCenter::GetGpuMeterBufferAlignment() NN_NOEXCEPT
{
    return GpuMeter::GetBufferAlignment();
}

size_t LoadMeterCenter::CalculateGpuMeterBufferSize(LoadMeterCenterInfo info) NN_NOEXCEPT
{
    size_t size = 0;
    int sectionCountMax = info.GetGpuSectionCountMax();

    LoadMeterInfo meterInfo;
    meterInfo.SetSectionCountMax( sectionCountMax );
    meterInfo.SetBufferCount(info.GetGpuBufferCount());
    meterInfo.SetCoreCount(info.GetCoreCount());

    size += GpuMeter::CalculateBufferSize(meterInfo);
    size += NN_ALIGNOF(ProfileSection);
    size += nn::util::PlacementArray<ProfileSection>::CalculateWorkMemorySize(sectionCountMax);
    return size;
}

LoadMeterCenter::ProfileSection LoadMeterCenter::GetResultAll(UnitType unit, const char* sectionName, uint32_t tag ) NN_NOEXCEPT
{
    return GetResultAll(unit, NULL, sectionName, tag);
}

LoadMeterCenter::ProfileSection LoadMeterCenter::GetResultAll(UnitType unit, const char* meterName, const char* sectionName, uint32_t tag ) NN_NOEXCEPT
{
    ProfileSection result;
    LoadMeterCenter& instance = LoadMeterCenter::GetInstance();

    int profiledSectionCount = instance.GetProfiledSectionCount(unit);
    ProfileSection* profileSectionArray = instance.GetProfileSectionArray(unit);

    for (int f = 0; f < profiledSectionCount; ++f)
    {
        if (meterName == NULL || !nn::util::Strncmp(meterName, profileSectionArray[f].meterName, 128))
        {
            if (sectionName == NULL || !nn::util::Strncmp(sectionName, profileSectionArray[f].sectionName, 128))
            {
                if (tag == 0 || tag == profileSectionArray[f].tag)
                {
                    result.callCount += profileSectionArray[f].callCount;
                    result.elapsedTime += profileSectionArray[f].elapsedTime;
                    result.maxElapsedTime = std::max(result.maxElapsedTime, profileSectionArray[f].maxElapsedTime);
                    result.minElapsedTime = std::min(result.minElapsedTime, profileSectionArray[f].minElapsedTime);
                }
            }
        }
    }
    return result;
}

LoadMeterCenter::ProfileSection* LoadMeterCenter::GetProfileSectionArray( UnitType unit ) NN_NOEXCEPT
{
    if(unit == UnitType_Cpu)
    {
        return m_ProfileCpuSectionArray.data();
    }
    else
    {
        return m_ProfileGpuSectionArray.data();
    }
}

int LoadMeterCenter::GetProfiledSectionCount( UnitType unit ) NN_NOEXCEPT
{
    if(unit == UnitType_Cpu)
    {
        return m_ProfiledCpuSectionCount;
    }
    else
    {
        return m_ProfiledGpuSectionCount;
    }
}

void LoadMeterCenter::SetProfiledSectionCount( UnitType unit, int value ) NN_NOEXCEPT
{
    if(unit == UnitType_Cpu)
    {
        m_ProfiledCpuSectionCount = value;
    }
    else
    {
        m_ProfiledGpuSectionCount = value;
    }
}

void LoadMeterCenter::Profile( UnitType unit ) NN_NOEXCEPT
{
    int maxMeter = 0;
    if(unit == UnitType_Gpu)
    {
        if(m_Info.GetGpuSectionCountMax() > 0)
        {
            maxMeter = 1;
        }
        else
        {
            maxMeter = 0;
        }
    }
    else
    {
        maxMeter = LoadMeterCenter::GetInstance().m_Info.GetCoreCount() + LoadMeterCenter::GetInstance().m_Info.GetUserMeterCount();
    }

    for(int meterIndex = 0; meterIndex < maxMeter; ++meterIndex)
    {
        LoadMeterBase* loadMeter = NULL;
        if(m_Info.GetGpuSectionCountMax() > 0 && unit == UnitType_Gpu)
        {
            loadMeter = &m_GpuMeter;
        }
        else if(meterIndex < m_Info.GetCoreCount())
        {
            loadMeter = &m_CpuCoreMeterArray[meterIndex];
        }
        else
        {
            loadMeter = &m_CpuUserMeterArray[meterIndex - m_Info.GetCoreCount()];
        }

        for(int f = 0; f < loadMeter->GetLastSectionCount(); ++f)
        {
            LoadMeterBase::Section result = loadMeter->GetLastResult(f);
            // セクションに名前がついていない場合は文字列 "NULL" として扱う
            if( result.name == NULL )
            {
                result.name = "NULL";
            }
            int profiledSectionCount = GetProfiledSectionCount(unit);
            ProfileSection* profileSectionArray = GetProfileSectionArray(unit);

            bool isOverlap = false;
            int insertIndex = profiledSectionCount;

            for(int i = 0; i < profiledSectionCount; ++i)
            {
                if( !nn::util::Strncmp(loadMeter->GetName(), profileSectionArray[i].meterName, 128) )
                {
                    if( !nn::util::Strncmp(result.name, profileSectionArray[i].sectionName, 128) )
                    {
                        if( result.tag == profileSectionArray[i].tag)
                        {
                            insertIndex = i;
                            isOverlap = true;
                            break;
                        }
                    }
                }
            }

            nn::TimeSpan elapsedTime;
            elapsedTime = ( result.end - result.begin ).ToTimeSpan();
            profileSectionArray[insertIndex].sectionName = result.name;
            profileSectionArray[insertIndex].tag = result.tag;
            profileSectionArray[insertIndex].meterName = loadMeter->GetName();
            profileSectionArray[insertIndex].callCount++;
            profileSectionArray[insertIndex].elapsedTime += elapsedTime;
            profileSectionArray[insertIndex].maxElapsedTime = std::max(profileSectionArray[insertIndex].maxElapsedTime, elapsedTime);
            profileSectionArray[insertIndex].minElapsedTime = std::min(profileSectionArray[insertIndex].minElapsedTime, elapsedTime);

            if(isOverlap == false)
            {
                SetProfiledSectionCount(unit, profiledSectionCount + 1);
            }
        }
    }
}

void LoadMeterCenter::Clear(UnitType unit) NN_NOEXCEPT
{
    int profiledSectionCount = GetProfiledSectionCount(unit);
    ProfileSection* profileSectionArray = GetProfileSectionArray(unit);

    for( int i = 0; i < profiledSectionCount; ++i )
    {
        profileSectionArray[i].elapsedTime = nn::TimeSpan( 0 );
        profileSectionArray[i].callCount = 0;
        profileSectionArray[i].maxElapsedTime = nn::TimeSpan( std::numeric_limits<nn::TimeSpan>::min() );
        profileSectionArray[i].minElapsedTime = nn::TimeSpan::FromSeconds( 999 );
        profileSectionArray[i].meterName = NULL;
        profileSectionArray[i].sectionName = NULL;
        profileSectionArray[i].tag = 0;
    }
    SetProfiledSectionCount(unit, 0);
}

void LoadMeterCenter::DumpDetails( ProfileSection* profileSection) NN_NOEXCEPT
{
    float elapsedTime = profileSection->elapsedTime.GetMicroSeconds() / 1000.f;
    float maxElapsedTime = profileSection->maxElapsedTime.GetMicroSeconds() / 1000.f;
    float minElapsedTime = profileSection->minElapsedTime.GetMicroSeconds() / 1000.f;
    float averageTime = elapsedTime / static_cast<float>(profileSection->callCount);
    char formatString[256];
    formatString[0] = '\0';

    char output[256];
    output[0] = '\0';

    const char* format = "| %9.3f| %6d| %7.3f| %7.3f| %7.3f| %4d|";
    std::sprintf(formatString, format,
        elapsedTime,
        profileSection->callCount,
        averageTime,
        maxElapsedTime,
        minElapsedTime,
        profileSection->tag);

    std::strncat(output, formatString, strlen(formatString) + 1);
    if(profileSection->sectionName != NULL)
    {
        std::strncat(output, profileSection->sectionName, strlen(profileSection->sectionName) + 1);
    }
    const char* str = "\n";
    std::strncat(output, str, strlen(str) + 1);

    NN_PERF_DUMP_LOG(output);
}

} // namespace perf
} // namespace nn
