﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <nn/init.h>
#include <nn/nn_Abort.h>
#include <nn/nn_Assert.h>
#include <nn/nn_Log.h>
#include <nn/os.h>
#include <nn/os/os_VirtualAddressMemory.h>
#include <nn/util/util_TinyMt.h>

#include <nnt.h>

#include "test_VirtualAddressMemoryCommon.h"

// 64bit のみ対応
#if !(defined(NN_OS_CPU_ARM_AARCH64_ARMV8A) || defined(NN_BUILD_CONFIG_CPU_X64))
    #error "This program supports only 64bit architecture."
#endif

//---------------------------------------------------------------------------
//  VirtualAddressMemory 機能のマルチスレッド系ストレステスト
//---------------------------------------------------------------------------

namespace
{
    const size_t HeapSize = 64 * 1024 * 1024UL;
    uint8_t g_Heap[HeapSize];
}

namespace nnt { namespace os { namespace VirtualAddressMemory {

namespace
{
    const size_t StackSize = 4096;
    const int MaxThreadCount = 64;

    NN_ALIGNAS(4096) uint8_t g_Stacks[MaxThreadCount][StackSize];
    nn::os::ThreadType g_Threads[MaxThreadCount];

    struct ThreadCoreMask
    {
        int idealCore;
        nn::Bit64 affinityMask;
    };

    struct ThreadProperty
    {
        CommandProcessor processor;
        CommandList commands;
    };

    ThreadProperty g_Properties[MaxThreadCount];

    // メインスレッド用の CommandProcessor
    CommandProcessor g_MainProcessor;

    // g_MainProcessor に格納された処理を並列実行する
    void ParallelProcess(int threadCount, ThreadCoreMask* threadCoreMasks, ThreadProperty* properties)
    {
        NN_ABORT_UNLESS(threadCount <= MaxThreadCount);

        auto threadFunc = [] (void* pArg)
        {
            auto pProperty = reinterpret_cast<ThreadProperty*>(pArg);

            pProperty->processor.Process(pProperty->commands.begin(), pProperty->commands.end());
            EXPECT_TRUE(pProperty->processor.IsVerificationPassed());
        };

        for (int i = 0; i < threadCount; i++)
        {
            nn::os::CreateThread(&g_Threads[i], threadFunc, &properties[i], g_Stacks[i], StackSize, nn::os::DefaultThreadPriority);
            nn::os::SetThreadCoreMask(&g_Threads[i], threadCoreMasks[i].idealCore, threadCoreMasks[i].affinityMask);
        }

        for (int i = 0; i < threadCount; i++)
        {
            nn::os::StartThread(&g_Threads[i]);
        }

        for (int i = 0; i < threadCount; i++)
        {
            nn::os::WaitThread(&g_Threads[i]);
            nn::os::DestroyThread(&g_Threads[i]);
        }
    }

    void AddressRegionApiTest(int threadCount, ThreadCoreMask* threadCoreMasks)
    {
        // 各スレッド内のループ回数
        const int loopCount = 100;

        // 各ループで API を呼ぶ回数
        const int apiCallCount = 100;

        // 各スレッドで作成・破棄するリージョンのパラメータ
        const size_t regionSize = 16 * 1024 * 1024;

        // 並列リージョン作成・破棄
        for (int loop = 0; loop < loopCount; loop++)
        {
            NN_LOG("Parallel allocate/free address region (%d).\n", loop);

            for (int threadId = 0; threadId < threadCount; threadId++)
            {
                g_Properties[threadId].commands.clear();

                for (int i = 0; i < apiCallCount; i++)
                {
                    g_Properties[threadId].commands.AddAllocateRegionCommand(0, regionSize);
                    g_Properties[threadId].commands.AddFreeRegionCommand(0);
                }
            }
            ParallelProcess(threadCount, threadCoreMasks, g_Properties);
        }
    }

    void MemoryPageApiTest(int threadCount, ThreadCoreMask* threadCoreMasks)
    {
        NN_ABORT_UNLESS(threadCount <= MaxThreadCount);

        // 各スレッド内でのループ回数
        const int loopCount = 10;

        // メインスレッドが管理する、メモリ化け確認用リージョンのパラメータ
        const size_t mainRegionSize = 16 * 1024 * 1024;
        const int mainRegionId = threadCount;

        // 各スレッドで作成・破棄するリージョン・ページのパラメータ
        const size_t threadRegionSize = 4 * 1024 * 1024;
        const size_t threadPageCount = 64;
        const size_t threadPageSize = threadRegionSize / threadPageCount;

        nn::os::Barrier barrier(threadCount);

        // メモリ化け確認用領域作成
        g_MainProcessor.ProcessAllocateAddressRegion(mainRegionId, mainRegionSize);
        g_MainProcessor.ProcessAllocateMemoryPages(mainRegionId, 0, mainRegionSize);
        g_MainProcessor.ProcessFillMemory(mainRegionId, 0, mainRegionSize);

        // リージョン作成
        NN_LOG("Parallel allocate address region.\n");
        for (int i = 0; i < threadCount; i++)
        {
            auto regionId = i;
            g_MainProcessor.ProcessAllocateAddressRegion(regionId, threadRegionSize);
        }
        for (int i = 0; i < threadCount; i++)
        {
            g_Properties[i].processor.LoadRegionInfo(g_MainProcessor);
        }

        // 並列ページ確保・解放
        for (int loop = 0; loop < loopCount; loop++)
        {
            NN_LOG("Parallel allocate/free memory pages (%d).\n", loop);

            for (int threadId = 0; threadId < threadCount; threadId++)
            {
                g_Properties[threadId].commands.clear();

                for (int i = 0; i < threadCount; i++)
                {
                    auto regionId = (threadId + i) % threadCount;

                    for (int j = 0; j < threadPageCount; j++)
                    {
                        auto pageId = j;

                        g_Properties[threadId].commands.AddAllocatePagesCommand(regionId, pageId * threadPageSize, threadPageSize);
                        g_Properties[threadId].commands.AddFreePagesCommand(regionId, pageId * threadPageSize, threadPageSize);
                    }

                    g_Properties[threadId].commands.AddSyncCommand(barrier.GetBase());
                    g_Properties[threadId].commands.AddSleepCommand(nn::TimeSpan::FromMilliSeconds(1));
                }
            }
            ParallelProcess(threadCount, threadCoreMasks, g_Properties);
        }

        // リージョン破棄
        NN_LOG("Parallel free address region.\n");
        for (int i = 0; i < threadCount; i++)
        {
            auto regionId = i;
            g_MainProcessor.ProcessFreeAddressRegion(regionId);
        }

        // メモリ化け確認
        g_MainProcessor.ProcessVerifyMemory(mainRegionId, 0, mainRegionSize);
        g_MainProcessor.ProcessFreeMemoryPages(mainRegionId, 0, mainRegionSize);
        g_MainProcessor.ProcessFreeAddressRegion(mainRegionId);
        EXPECT_TRUE(g_MainProcessor.IsVerificationPassed());

        ConfirmResourceUsage(16 * 1024 * 1024);
    }
}

TEST(VirtualAddressMemoryStressParallel, SingleCore)
{
    ThreadCoreMask threadCoreMasks[] = {
        { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 },
        { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 },
        { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 },
        { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 },
        { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 },
        { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 },
        { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 },
        { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 },
    };

    AddressRegionApiTest(NN_ARRAY_SIZE(threadCoreMasks), threadCoreMasks);
    MemoryPageApiTest(NN_ARRAY_SIZE(threadCoreMasks), threadCoreMasks);
}

TEST(VirtualAddressMemoryStressParallel, MultiCore)
{
    ThreadCoreMask threadCoreMasks[] = {
        { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 },
        { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 },
        { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 },
        { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 },
        { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 },
        { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 },
        { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 },
        { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 }, { 1, 2 }, { 2, 4 }, { 0, 1 },
    };

    AddressRegionApiTest(NN_ARRAY_SIZE(threadCoreMasks), threadCoreMasks);
    MemoryPageApiTest(NN_ARRAY_SIZE(threadCoreMasks), threadCoreMasks);
}

TEST(VirtualAddressMemoryStressParallel, AffinityMask)
{
    // Affinity mask を 0x7 にしてコア移動が起きるようにする
    ThreadCoreMask threadCoreMasks[] = {
        { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 },
        { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 },
        { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 },
        { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 },
        { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 },
        { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 },
        { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 },
        { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 }, { 1, 7 }, { 2, 7 }, { 0, 7 },
    };

    AddressRegionApiTest(NN_ARRAY_SIZE(threadCoreMasks), threadCoreMasks);
    MemoryPageApiTest(NN_ARRAY_SIZE(threadCoreMasks), threadCoreMasks);
}

}}} // namespace nnt::os::VirtualAddressMemory

extern "C" void nninitStartup()
{
    ::nn::init::InitializeAllocator(reinterpret_cast<void*>(g_Heap), HeapSize);
}
