﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

//-----------------------------------------------------------------------------
// メモリレイテンシ測定テスト
//-----------------------------------------------------------------------------

#include <random>

#include <nn/nn_Common.h>
#include <nn/nn_Abort.h>
#include <nn/nn_Log.h>

#include <nn/oe.h>
#include <nn/os.h>
#include <nn/os/os_MemoryAttribute.h>

namespace
{
    const int CacheLineSize = 64;
    struct NN_ALIGNAS(CacheLineSize) List
    {
        List* pPrev;
        List* pNext;
        int dummy; // 最適化抑制のために格納する値
    };

    NN_STATIC_ASSERT(sizeof(List) == CacheLineSize);

    enum AccessPattern
    {
        AccessPattern_Sequential,
        AccessPattern_Random,
    };

    // テストセット
    struct TestSetEntry
    {
        const char* name;
        size_t size;
        int loop;
        bool enableCache;
        AccessPattern accessPattern;
    };
    const TestSetEntry g_TestSet[] = {
        { " 32KB (TX1 L1 Size)              ",          32 * 1024, 10 * 1000 * 1000, true,  AccessPattern_Sequential },
        { "128KB                            ",         128 * 1024, 10 * 1000 * 1000, true,  AccessPattern_Sequential },
        { "512KB                            ",         512 * 1024, 10 * 1000 * 1000, true,  AccessPattern_Sequential },
        { "  2MB (TX1 L2 Size)              ",    2 * 1024 * 1024, 10 * 1000 * 1000, true,  AccessPattern_Sequential },
        { "  8MB                            ",    8 * 1024 * 1024, 10 * 1000 * 1000, true,  AccessPattern_Sequential },
        { " 32MB (Cached & Sequential)      ",   32 * 1024 * 1024, 10 * 1000 * 1000, true,  AccessPattern_Sequential },
        { " 32MB (Cached & Random)          ",   32 * 1024 * 1024, 10 * 1000 * 1000, true,  AccessPattern_Random },
        { " 32MB (Uncached & Sequential)    ",   32 * 1024 * 1024, 10 * 1000 * 1000, false, AccessPattern_Sequential },
        { " 32MB (Uncached & Random)        ",   32 * 1024 * 1024, 10 * 1000 * 1000, false, AccessPattern_Random },
        { "128MB (Cached & Sequential)      ",  128 * 1024 * 1024, 10 * 1000 * 1000, true,  AccessPattern_Sequential },
        { "128MB (Cached & Random)          ",  128 * 1024 * 1024, 10 * 1000 * 1000, true,  AccessPattern_Sequential },
        { "128MB (Uncached & Sequential)    ",  128 * 1024 * 1024, 10 * 1000 * 1000, false, AccessPattern_Sequential },
        { "128MB (Uncached & Random)        ",  128 * 1024 * 1024, 10 * 1000 * 1000, false, AccessPattern_Random },
    };

    // リンクリスト用のメモリ
    const size_t ListMemorySize = 128 * 1024 * 1024;
    NN_ALIGNAS(nn::os::MemoryPageSize) List g_ListMemory[ListMemorySize / sizeof(List)];

    // 最適化抑制のために、ワーキングメモリの内容から計算した値を書き込む領域
    int g_Dummy;
    volatile int* g_pDummy = &g_Dummy;

    void Swap(List* p1, List* p2)
    {
        List* p0 = p1->pPrev;
        List* p3 = p2->pNext;

        // p0->p1->p2->p3 を p0->p2->p1->p3 につなぎ変える
        p0->pNext = p1;
        p2->pPrev = p0;
        p2->pNext = p1;
        p1->pPrev = p2;
        p1->pNext = p3;
        p3->pPrev = p0;
    }

    void InitializeList(size_t size, bool enableShuffle)
    {
        NN_ABORT_UNLESS_EQUAL(0U, size % sizeof(List));
        const size_t listLength = size / sizeof(List);

        std::mt19937_64 mt;

        // 循環リストの作成
        for (size_t i = 0; i < listLength; i++)
        {
            const size_t prevIndex = (i - 1 + listLength) % listLength;
            const size_t nextIndex = (i + 1) % listLength;
            g_ListMemory[i].pPrev = &g_ListMemory[prevIndex];
            g_ListMemory[i].pNext = &g_ListMemory[nextIndex];
            g_ListMemory[i].dummy = static_cast<int>(mt());
        }

        // 循環リストの要素を適当につなぎ変える
        if (enableShuffle)
        {
            for (size_t i = 0; i < listLength; i++)
            {
                auto p1 = &g_ListMemory[mt() % listLength];
                auto p2 = &g_ListMemory[mt() % listLength];
                Swap(p1, p2);
            }
        }
    }

    int Search(int loop)
    {
        List* p = &g_ListMemory[0];
        for (int i = 0; i < loop; i++)
        {
            p = p->pNext;
        }
        return p->dummy;
    }
}

extern "C" void nnMain()
{
#if defined(NN_BUILD_CONFIG_SPEC_NX)
    nn::oe::DisableRecording();
#endif

    NN_LOG("%24s%16s%16s%16s\n", "", "LoopCount", "ExecutionTime", "Latency");
    for (int i = 0; i < sizeof(g_TestSet) / sizeof(g_TestSet[0]); i++)
    {
        auto& testSet = g_TestSet[i];

        InitializeList(testSet.size, testSet.accessPattern == AccessPattern_Random);

        if (!testSet.enableCache)
        {
            nn::os::SetMemoryAttribute(reinterpret_cast<uintptr_t>(g_ListMemory), testSet.size, nn::os::MemoryAttribute_Uncached);
        }

        auto start = nn::os::GetSystemTick();
        auto dummy = Search(testSet.loop);
        auto end = nn::os::GetSystemTick();

        if (!testSet.enableCache)
        {
            nn::os::SetMemoryAttribute(reinterpret_cast<uintptr_t>(g_ListMemory), testSet.size, nn::os::MemoryAttribute_Normal);
        }

        *g_pDummy = dummy;

        auto executionTime = static_cast<float>(nn::os::ConvertToTimeSpan(end - start).GetMicroSeconds()) / 1000;
        auto latency = static_cast<float>(nn::os::ConvertToTimeSpan(end - start).GetNanoSeconds()) / testSet.loop;

        NN_LOG("%15s: %10d loop %9.3f ms %9.3f ns\n", testSet.name, testSet.loop, executionTime, latency);
    }

#if defined(NN_BUILD_CONFIG_SPEC_NX)
    nn::oe::EnableRecording();
#endif
}
