﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <nn/nn_Common.h>
#include <nn/nn_BitTypes.h>
#include "../../../kern_Platform.h"
#include "kern_MemoryMap.h"
#include "../../ARM64/kern_RegisterAccess.h"
#include "../kern_KPageTableDefinition.h"
#include "../../../kern_KTaggedAddress.h"
#include "../../../kern_Utility.h"
#include "../../../kern_KThread.h"
#include "../../../kern_InterruptControllerSelect.h"
#include "kern_SystemControl.h"
#include "../../../kern_Main.h"
#include "../../../init/kern_InitFunctions.h"
#include "kern_InitPageTable.h"
#if defined(NN_BUILD_CONFIG_SOC_TEGRA_K1) || defined(NN_BUILD_CONFIG_SOC_TEGRA_X1)
#include "../../../kern_DevicePageTableSelect.h"
#endif

namespace nn { namespace kern { namespace init { namespace ARMv8A {
extern "C" void ExceptionVector();

namespace {

KProcessAddress SearchFreeRegion(size_t size, size_t align, const KPageTableBody& pageTable, KProcessAddress begin, KProcessAddress end)
{
    KProcessAddress addr;
    for (;;)
    {
        addr = KProcessAddress(RoundDown(KSystemControl::GetRandomValue(GetAsInteger(begin), GetAsInteger(end)), align));
        if (addr >= addr + size)
        {
            continue;
        }
        if (!((addr + size - 1) <= end))
        {
            continue;
        }
        if (!pageTable.IsFree(addr, size))
        {
            continue;
        }
        break;
    }
    return addr;
}

bool CheckOverlap(KProcessAddress begin0, KProcessAddress end0, KProcessAddress begin1, KProcessAddress end1)
{
    return !((end0 < begin1) || (end1 < begin0));
}

class InitialPageAllocator :
    public KPageTableBody::PageTableAllocator
{
public:
    InitialPageAllocator(): m_Base(Null<KPhysicalAddress>()) {}
    void Initialize(KPhysicalAddress base)
    {
        m_Base = base;
    }
    KPhysicalAddress Allocate()
    {
        while (m_Base == Null<KPhysicalAddress>()) {}
        KPhysicalAddress ret = m_Base;
        m_Base += HW_MMU_PAGE_SIZE;
        std::memset(reinterpret_cast<void *>(GetAsInteger(ret)), 0, HW_MMU_PAGE_SIZE);
        return ret;
    }
    KPhysicalAddress Allocate(size_t size)
    {
        size = RoundUp(size, HW_MMU_PAGE_SIZE);
        KPhysicalAddress ret = m_Base;
        m_Base += size;
        std::memset(reinterpret_cast<void *>(GetAsInteger(ret)), 0, size);
        return ret;
    }
    KPhysicalAddress GetBase() const { return m_Base; }
    void Finalize()
    {
        m_Base = Null<KPhysicalAddress>();
    }
private:
    KPhysicalAddress m_Base;
};

InitialPageAllocator g_InitPageAllocator;

const PteAttr attrRw(
        false,
        PteAttr::Shared_InnerShared,
        PteAttr::Permission_KernelReadWrite,
        PteAttr::Attribute_NormalMemory);

const PteAttr attrIo(
        false,
        PteAttr::Shared_OuterShared,
        PteAttr::Permission_KernelReadWrite,
        PteAttr::Attribute_nGnRE);

KProcessAddress MapStack(const KPageTableBody& pt)
{
    KProcessAddress sp;
    KPhysicalAddress spPhys = g_InitPageAllocator.Allocate(NN_KERN_THREAD_SVC_STACK_SIZE);
    do
    {
        sp = SearchFreeRegion(NN_KERN_THREAD_SVC_STACK_SIZE * 3, NN_KERN_THREAD_SVC_STACK_SIZE, pt,
                KMemoryLayout::GetMiscRegionBegin(), KMemoryLayout::GetMiscRegionEnd() - 1);
    } while (!pt.IsFree(sp, NN_KERN_THREAD_SVC_STACK_SIZE * 3));

    pt.Map(
            sp + NN_KERN_THREAD_SVC_STACK_SIZE,
            NN_KERN_THREAD_SVC_STACK_SIZE,
            spPhys,
            attrRw, &g_InitPageAllocator);
    return sp + NN_KERN_THREAD_SVC_STACK_SIZE;
}


size_t GetMainMemorySize()
{
#if defined NN_BUILD_CONFIG_HARDWARE_NX
    const Bit64     SmcFucntionReadWriteRegister = 0xC3000008;
    const uint64_t  McEmemCfg0PhysAddr = NN_KERN_P_ADDR_MC_REGISTER + 0x50;

    register Bit64 x0 asm("x0") = SmcFucntionReadWriteRegister;
    register Bit64 x1 asm("x1") = McEmemCfg0PhysAddr;
    register Bit64 x2 asm("x2") = 0;    // mask
    register Bit64 x3 asm("x3") = 0;    // value
    register Bit64 x4 asm("x4") = 0;
    register Bit64 x5 asm("x5") = 0;
    register Bit64 x6 asm("x6") = 0;
    register Bit64 x7 asm("x7") = 0;

    asm volatile ("smc #1"
            : "+r"(x0), "+r"(x1), "+r"(x2), "+r"(x3), "+r"(x4), "+r"(x5), "+r"(x6), "+r"(x7)
            :
            : "x8", "x9", "x10", "x11", "x12",
            "x13", "x14", "x15", "x16", "x17", "x18", "cc", "memory");

    while (x0 != 0) {}

    return (x1 & 0x3FFF) * 0x00100000ul;
#else
    return NN_KERN_P_ADDR_MAIN_MEMORY_SIZE;
#endif
}

#if defined(NN_BUILD_CONFIG_HARDWARE_JETSONTX2) || defined(NN_BUILD_CONFIG_HARDWARE_JETSONTK2) || defined(NN_BUILD_CONFIG_HARDWARE_NX)
enum PsciFucntion
{
    PsciFucntion_CpuOn = 0xC4000003
};

enum
{
#if defined(NN_BUILD_CONFIG_HARDWARE_NX)
    SmcNumber = 1
#else
    SmcNumber = 0
#endif
};

Bit64 PsciCall(
        PsciFucntion functionId,
        Bit64 param0 = 0, Bit64 param1 = 0, Bit64 param2 = 0,
        Bit64 param3 = 0, Bit64 param4 = 0, Bit64 param5 = 0, Bit64 clientId = 0)
{
    register Bit64 x0 asm("x0") = functionId;
    register Bit64 x1 asm("x1") = param0;
    register Bit64 x2 asm("x2") = param1;
    register Bit64 x3 asm("x3") = param2;
    register Bit64 x4 asm("x4") = param3;
    register Bit64 x5 asm("x5") = param4;
    register Bit64 x6 asm("x6") = param5;
    register Bit64 x7 asm("x7") = clientId;

    asm volatile ("smc %8"
            : "+r"(x0), "+r"(x1), "+r"(x2), "+r"(x3), "+r"(x4), "+r"(x5), "+r"(x6), "+r"(x7)
            : "I"(SmcNumber)
            : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "cc", "memory");
    return x0;
}

void CpuOn(Bit64 cpuId, uintptr_t entryPoint)
{
    Bit64 ret;

    ret = PsciCall(PsciFucntion_CpuOn, cpuId, entryPoint);
    while (ret != 0) {}
}
#endif

void StoreDataCache(const void* addr, size_t size)
{
    Bit64 ctr;
    HW_GET_CTR_EL0(ctr);

    size_t cacheLineSize = (4 << ((ctr >> 16) & 0xF));

    uintptr_t startAddr = reinterpret_cast<uintptr_t>(addr) & ~(cacheLineSize - 1);
    uintptr_t endAddr   = reinterpret_cast<uintptr_t>(addr) + size;
    for (uintptr_t a = startAddr; a < endAddr; a += cacheLineSize)
    {
        asm volatile ("dc cvac, %0" : : "r" (a));
    }

    asm volatile ("dsb sy" : : : "memory");
}
}

struct InitArgument
{
    Bit64 ttbr0;
    Bit64 ttbr1;
    Bit64 tcr;
    Bit64 mair;
    Bit64 cpuactlr;
    Bit64 cpuectlr;
    Bit64 sctlr;
    Bit64 vbar;
    Bit64 sp;
    Bit64 entry;
    Bit64 arg;
} g_InitArguments[KCPU::NUM_CORE];

KPhysicalAddress g_InitArgumentsAddress[KCPU::NUM_CORE] __attribute__((section(".data")));

extern "C" void _start();
extern "C" void end();

/*
 * MMU有効・リロケーション後に呼び出される
 */
void Step0(uint64_t freeBase)
{
    g_InitPageAllocator.Initialize(freeBase);

    Bit64 vbar;
    HW_GET_VBAR_EL1(vbar);

    Bit64 ttbr0;
    HW_GET_TTBR0_EL1(ttbr0);

    Bit64 ttbr1;
    HW_GET_TTBR1_EL1(ttbr1);

    Bit64 tcr;
    HW_GET_TCR_EL1(tcr);

    Bit64 sctlr;
    HW_GET_SCTLR_EL1(sctlr);

    Bit64 cpuectlr;
    HW_GET_CPUECTLR_EL1(cpuectlr);

    Bit64 cpuactlr;
    HW_GET_CPUACTLR_EL1(cpuactlr);

    Bit64 mair;
    HW_GET_MAIR_EL1(mair);

    KPageTableBody pt;
    pt.Initialize(KPhysicalAddress(ttbr1 & HW_MMU_PFN_MASK), 0, NN_KERN_MMU_L1_PAGE_TABLE_1_SIZE / HW_MMU_PTE_SIZE, true);


    /* ---------------------------------------------------------------------
     *  1G Map Region
     * --------------------------------------------------------------------- */
    // code, misc, stack, slab は code の存在する 1GB単位の領域に収める。
    size_t kernelRegionAlign = 0x40000000;
    size_t kernelRegionSize  = 0x40000000;
    KProcessAddress kernelRegion = RoundDown(reinterpret_cast<uintptr_t>(_start), kernelRegionAlign);
    if (!(kernelRegion + kernelRegionSize - 1 <= NN_KERN_V_ADDR_KERNEL_END - 1))
    {
        kernelRegionSize = KProcessAddress(NN_KERN_V_ADDR_KERNEL_END) - kernelRegion;
    }
    KMemoryLayout::SetKernelRegion(kernelRegion, kernelRegionSize);

    size_t slabReqSize = RoundUp(CalcAllocatorsSize(), HW_MMU_PAGE_SIZE);
    KPhysicalAddress slabPhysStart = reinterpret_cast<uintptr_t>(__slab_pt_start) - reinterpret_cast<uintptr_t>(_start) + NN_KERN_P_ADDR_CODE;
    KPhysicalAddress slabPhysEnd = slabPhysStart + slabReqSize;
    KMemoryLayout::SetPtHeapRegionPhysical(slabPhysEnd, KPhysicalAddress(NN_KERN_P_ADDR_SLAB_PT_HEAP_END) - slabPhysEnd);

    // リニアマップ空間を決める 1GB単位
    size_t linearAlign = 0x40000000;
    size_t mainMemorySize = GetMainMemorySize();
    KPhysicalAddress mainMemoryStart = NN_KERN_P_ADDR_MAIN_MEMORY;
    KPhysicalAddress mainMemoryEnd = mainMemoryStart + mainMemorySize;

    KPhysicalAddress mainMemoryAlignedStart = RoundDown(mainMemoryStart, linearAlign);
    KPhysicalAddress mainMemoryAlignedEnd   = RoundUp(mainMemoryEnd, linearAlign);

    KProcessAddress linearAlignedRegion;
    do
    {
        linearAlignedRegion = SearchFreeRegion(mainMemoryAlignedEnd - mainMemoryAlignedStart, linearAlign, pt, NN_KERN_V_ADDR_KERNEL, NN_KERN_V_ADDR_KERNEL_END - 1);
    } while (
            CheckOverlap(linearAlignedRegion, linearAlignedRegion + (mainMemoryAlignedEnd - mainMemoryAlignedStart) - 1, kernelRegion, kernelRegion + kernelRegionSize - 1));

    KPhysicalAddress linearPhysEnd = NN_KERN_P_ADDR_MAIN_MEMORY + mainMemorySize - NN_KERN_P_ADDR_RESERVED_HI_SIZE;
    KPhysicalAddress linearPhysStart = slabPhysEnd;

    while (!(linearPhysStart <= NN_KERN_P_ADDR_INITIAL_PROCESS_DEST)) {}

    KMemoryLayout::SetLinearRegion(
            KVirtualAddress(GetAsInteger(linearAlignedRegion)) + (linearPhysStart - KPhysicalAddress(NN_KERN_P_ADDR_MAIN_MEMORY)),
            linearPhysEnd - linearPhysStart);
    KMemoryLayout::SetLinearRegionPhysical(linearPhysStart, linearPhysEnd - linearPhysStart);
    pt.Map(
            KProcessAddress(GetAsInteger(KMemoryLayout::GetLinearRegionBegin())),
            KMemoryLayout::GetLinearRegionSize(),
            KMemoryLayout::GetLinearRegionPhysicalBegin(),
            attrRw, &g_InitPageAllocator);

    /* ---------------------------------------------------------------------
     *  Kernel Region
     * --------------------------------------------------------------------- */
    // CODE空間を決める ASLR単位
    size_t codeRegionAlign = NN_KERN_ASLR_ALIGN;
    KProcessAddress codeRegion = RoundDown(reinterpret_cast<uintptr_t>(_start), codeRegionAlign);
    KProcessAddress codeRegionEnd = RoundUp(reinterpret_cast<uintptr_t>(end), codeRegionAlign);
    KMemoryLayout::SetCodeRegion(codeRegion, codeRegionEnd - codeRegion);


    // MISC空間を決める ASLR単位
    size_t miscRegionAlign = NN_KERN_ASLR_ALIGN;
    size_t miscRegionSize = RoundUp(NN_KERN_V_ADDR_MISC_SIZE, miscRegionAlign);
    KProcessAddress miscRegion;
    do
    {
        miscRegion = SearchFreeRegion(miscRegionSize, miscRegionAlign, pt, KMemoryLayout::GetKernelRegionBegin(), KMemoryLayout::GetKernelRegionEnd() - 1);
    } while (
            CheckOverlap(miscRegion, miscRegion + miscRegionSize - 1, KMemoryLayout::GetCodeRegionBegin(), KMemoryLayout::GetCodeRegionEnd() - 1)
            );
    KMemoryLayout::SetMiscRegion(miscRegion, miscRegionSize);


    // STACK空間を決める ASLR単位
    size_t stackRegionAlign = NN_KERN_ASLR_ALIGN;
    size_t stackRegionSize = RoundUp(NN_KERN_V_ADDR_STACK_REGION_SIZE, stackRegionAlign);
    KProcessAddress stackRegion;
    do
    {
        stackRegion = SearchFreeRegion(stackRegionSize, stackRegionAlign, pt, KMemoryLayout::GetKernelRegionBegin(), KMemoryLayout::GetKernelRegionEnd() - 1);
    } while (
            CheckOverlap(stackRegion, stackRegion + stackRegionSize - 1, KMemoryLayout::GetCodeRegionBegin(), KMemoryLayout::GetCodeRegionEnd() - 1) ||
            CheckOverlap(stackRegion, stackRegion + stackRegionSize - 1, KMemoryLayout::GetMiscRegionBegin(), KMemoryLayout::GetMiscRegionEnd() - 1)
            );
    KMemoryLayout::SetStackRegion(stackRegion, stackRegionSize);


    // SLAB空間を決める ASLR単位
    size_t slabRegionAlign = NN_KERN_ASLR_ALIGN;
    KPhysicalAddress slabPhysAlignedStart = RoundDown(slabPhysStart, slabRegionAlign);
    KPhysicalAddress slabPhysAlignedEnd   = RoundUp(slabPhysEnd, slabRegionAlign);

    KProcessAddress slabRegion;
    do
    {
        slabRegion = SearchFreeRegion(slabPhysAlignedEnd - slabPhysAlignedStart + 1, slabRegionAlign, pt, KMemoryLayout::GetKernelRegionBegin(), KMemoryLayout::GetKernelRegionEnd() - 1);
    } while (
            CheckOverlap(slabRegion, slabRegion + (slabPhysAlignedEnd - slabPhysAlignedStart) - 1, KMemoryLayout::GetCodeRegionBegin(),  KMemoryLayout::GetCodeRegionEnd()  - 1) ||
            CheckOverlap(slabRegion, slabRegion + (slabPhysAlignedEnd - slabPhysAlignedStart) - 1, KMemoryLayout::GetMiscRegionBegin(),  KMemoryLayout::GetMiscRegionEnd()  - 1) ||
            CheckOverlap(slabRegion, slabRegion + (slabPhysAlignedEnd - slabPhysAlignedStart) - 1, KMemoryLayout::GetStackRegionBegin(), KMemoryLayout::GetStackRegionEnd() - 1)
            );
    KMemoryLayout::SetSlabRegion(slabRegion + (slabPhysStart - slabPhysAlignedStart), slabPhysEnd - slabPhysStart);
    KMemoryLayout::SetSlabRegionPhysical(slabPhysStart, slabPhysEnd - slabPhysStart);
    pt.Map(
            KMemoryLayout::GetSlabRegionBegin(),
            KMemoryLayout::GetSlabRegionSize(),
            KMemoryLayout::GetSlabRegionPhysicalBegin(),
            attrRw, &g_InitPageAllocator);

    // Temp空間を決める ASLR単位
    size_t tempRegionAlign = NN_KERN_ASLR_ALIGN;
    size_t tempRegionSize = RoundUp(NN_KERN_V_ADDR_TEMP_SIZE, tempRegionAlign);
    KProcessAddress tempRegion;
    do
    {
        tempRegion = SearchFreeRegion(tempRegionSize, tempRegionAlign, pt, KMemoryLayout::GetKernelRegionBegin(), KMemoryLayout::GetKernelRegionEnd() - 1);
    } while (
            CheckOverlap(tempRegion, tempRegion + tempRegionSize - 1, KMemoryLayout::GetCodeRegionBegin(),  KMemoryLayout::GetCodeRegionEnd()  - 1) ||
            CheckOverlap(tempRegion, tempRegion + tempRegionSize - 1, KMemoryLayout::GetMiscRegionBegin(),  KMemoryLayout::GetMiscRegionEnd()  - 1) ||
            CheckOverlap(tempRegion, tempRegion + tempRegionSize - 1, KMemoryLayout::GetStackRegionBegin(), KMemoryLayout::GetStackRegionEnd() - 1) ||
            CheckOverlap(tempRegion, tempRegion + tempRegionSize - 1, KMemoryLayout::GetSlabRegionBegin(),  KMemoryLayout::GetSlabRegionEnd()  - 1)
            );
    KMemoryLayout::SetTempRegion(tempRegion, tempRegionSize);


    // uart
    KProcessAddress uart;
    do
    {
        uart = SearchFreeRegion(NN_KERN_P_ADDR_UART_REGISTER_SIZE + HW_MMU_PAGE_SIZE * 2, HW_MMU_PAGE_SIZE, pt,
                KMemoryLayout::GetMiscRegionBegin(), KMemoryLayout::GetMiscRegionEnd() - 1);
    } while (!pt.IsFree(uart, NN_KERN_P_ADDR_UART_REGISTER_SIZE + HW_MMU_PAGE_SIZE * 2));
    uart += HW_MMU_PAGE_SIZE;
    pt.Map(
            uart,
            NN_KERN_P_ADDR_UART_REGISTER_SIZE,
            KPhysicalAddress(NN_KERN_P_ADDR_UART_REGISTER),
            attrIo, &g_InitPageAllocator);

    InitializeDebugLog(GetAsInteger(uart));

    // GIC
    KProcessAddress gicRegister[KInterruptController::NumMaps];
    for (int i = 0; i < KInterruptController::NumMaps; i++)
    {
        do
        {
            gicRegister[i] = SearchFreeRegion(KInterruptController::GetRegisterSize(i) + HW_MMU_PAGE_SIZE * 2, HW_MMU_PAGE_SIZE, pt,
                    KMemoryLayout::GetMiscRegionBegin(), KMemoryLayout::GetMiscRegionEnd() - 1);
        } while (!pt.IsFree(gicRegister[i], KInterruptController::GetRegisterSize(i) + HW_MMU_PAGE_SIZE * 2));
        gicRegister[i] += HW_MMU_PAGE_SIZE;
        pt.Map(
                gicRegister[i],
                KInterruptController::GetRegisterSize(i),
                KInterruptController::GetRegisterAddress(i),
                attrIo, &g_InitPageAllocator);
    }
    KInterruptController::PrepareInitialize(gicRegister);

#if defined(NN_BUILD_CONFIG_SOC_TEGRA_K1) || defined(NN_BUILD_CONFIG_SOC_TEGRA_X1)
    if (KDevicePageTable::GetRegisterNum() > 0)
    {
        KProcessAddress *pRegisters = static_cast<KProcessAddress*>(__builtin_alloca(KDevicePageTable::GetRegisterNum() * sizeof(KProcessAddress)));
        for (int i = 0; i < KDevicePageTable::GetRegisterNum(); i++)
        {
            do
            {
                pRegisters[i] = SearchFreeRegion(KDevicePageTable::GetRegisterSize(i) + HW_MMU_PAGE_SIZE * 2, HW_MMU_PAGE_SIZE, pt,
                        KMemoryLayout::GetMiscRegionBegin(), KMemoryLayout::GetMiscRegionEnd() - 1);
            } while (!pt.IsFree(pRegisters[i], KDevicePageTable::GetRegisterSize(i) + HW_MMU_PAGE_SIZE * 2));
            pRegisters[i] += HW_MMU_PAGE_SIZE;
            pt.Map(
                    pRegisters[i],
                    KDevicePageTable::GetRegisterSize(i),
                    KDevicePageTable::GetRegisterAddress(i),
                    attrIo, &g_InitPageAllocator);
        }
        KDevicePageTable::PrepareInitialize(pRegisters);
    }
#endif

    for (int coreNo = 0; coreNo < KCPU::NUM_CORE; coreNo++)
    {
        KMemoryLayout::SetMainStackBottom(coreNo, MapStack(pt) + NN_KERN_THREAD_SVC_STACK_SIZE);
        KMemoryLayout::SetIdleStackBottom(coreNo, MapStack(pt) + NN_KERN_THREAD_SVC_STACK_SIZE);
        KMemoryLayout::SetExceptionStackBottom(coreNo, MapStack(pt) + NN_KERN_THREAD_SVC_STACK_SIZE);
    }

    /* ---------------------------------------------------------------------
     *  CoreLocal Region
     *      current, core0, core1, ... の順序でならぶ
     * --------------------------------------------------------------------- */
    KProcessAddress coreLocalRegion;
    do
    {
        coreLocalRegion = SearchFreeRegion(NN_KERN_V_ADDR_CORE_LOCAL_REGION_SIZE * (KCPU::NUM_CORE + 1) + HW_MMU_PAGE_SIZE * 2, HW_MMU_PAGE_SIZE, pt,
                NN_KERN_V_ADDR_KERNEL, NN_KERN_V_ADDR_KERNEL_END - 1);
    } while (
            CheckOverlap(coreLocalRegion, coreLocalRegion + NN_KERN_V_ADDR_CORE_LOCAL_REGION_SIZE * (KCPU::NUM_CORE + 1) + HW_MMU_PAGE_SIZE * 2 - 1, kernelRegion, kernelRegion + kernelRegionSize - 1) ||
            CheckOverlap(coreLocalRegion, coreLocalRegion + NN_KERN_V_ADDR_CORE_LOCAL_REGION_SIZE * (KCPU::NUM_CORE + 1) + HW_MMU_PAGE_SIZE * 2 - 1, linearAlignedRegion, linearAlignedRegion + (mainMemoryAlignedEnd - mainMemoryAlignedStart) - 1)
            );

    coreLocalRegion += HW_MMU_PAGE_SIZE;
    KMemoryLayout::SetCoreLocalRegion(coreLocalRegion, NN_KERN_V_ADDR_CORE_LOCAL_REGION_SIZE * (KCPU::NUM_CORE + 1));

    KPhysicalAddress coreLocalRegionPhysical[KCPU::NUM_CORE] = {};
    for (int coreNo = 0; coreNo < KCPU::NUM_CORE; coreNo++)
    {
        coreLocalRegionPhysical[coreNo] = g_InitPageAllocator.Allocate(NN_KERN_V_ADDR_CORE_LOCAL_REGION_SIZE);
    }

    KPhysicalAddress l1PageTable[KCPU::NUM_CORE] = {};
    l1PageTable[0] = KPhysicalAddress(ttbr1 & HW_MMU_PFN_MASK);
    for (int coreNo = 1; coreNo < KCPU::NUM_CORE; coreNo++)
    {
        l1PageTable[coreNo] = g_InitPageAllocator.Allocate(NN_KERN_MMU_L1_PAGE_TABLE_1_SIZE);
        std::memcpy(reinterpret_cast<void*>(GetAsInteger(l1PageTable[coreNo])), reinterpret_cast<void*>(GetAsInteger(l1PageTable[0])), NN_KERN_MMU_L1_PAGE_TABLE_1_SIZE);
    }

    for (int coreNo = 0; coreNo < KCPU::NUM_CORE; coreNo++)
    {
        KPageTableBody ptbl;
        ptbl.Initialize(l1PageTable[coreNo], 0, NN_KERN_MMU_L1_PAGE_TABLE_1_SIZE / HW_MMU_PTE_SIZE, true);
        ptbl.Map(
                KMemoryLayout::GetCoreLocalRegionBegin(),
                NN_KERN_V_ADDR_CORE_LOCAL_REGION_SIZE,
                coreLocalRegionPhysical[coreNo],
                attrRw, &g_InitPageAllocator);
        for (int i = 0; i < KCPU::NUM_CORE; i++)
        {
            ptbl.Map(
                    KMemoryLayout::GetCoreLocalRegionBegin() + (i + 1) * NN_KERN_V_ADDR_CORE_LOCAL_REGION_SIZE,
                    NN_KERN_V_ADDR_CORE_LOCAL_REGION_SIZE,
                    coreLocalRegionPhysical[i],
                    attrRw, &g_InitPageAllocator);
        }
    }

    for (int coreNo = 0; coreNo < KCPU::NUM_CORE; coreNo++)
    {
        g_InitArguments[coreNo].ttbr0    = ttbr0;
        g_InitArguments[coreNo].ttbr1    = GetAsInteger(l1PageTable[coreNo]);
        g_InitArguments[coreNo].tcr      = tcr;
        g_InitArguments[coreNo].sctlr    = sctlr;
        g_InitArguments[coreNo].vbar     = vbar;
        g_InitArguments[coreNo].cpuectlr = cpuectlr;
        g_InitArguments[coreNo].cpuactlr = cpuactlr;
        g_InitArguments[coreNo].mair     = mair;
        g_InitArguments[coreNo].sp       = GetAsInteger(KMemoryLayout::GetMainStackBottom(coreNo)) - sizeof(KThread::ParamsOnStack);
        g_InitArguments[coreNo].entry    = reinterpret_cast<uintptr_t>(KernelMain);
        g_InitArguments[coreNo].arg      = coreNo;
    }
    StoreDataCache(g_InitArguments, sizeof(g_InitArguments));

    KPhysicalAddress heapRegionPhysical = g_InitPageAllocator.GetBase();
    KPhysicalAddress heapRegionPhysicalEnd = KMemoryLayout::GetLinearRegionPhysicalEnd() - NN_KERN_KTRACE_BUFFER_SIZE - NN_KERN_TRACE_BUFFER_SIZE;
    KMemoryLayout::SetHeapRegionPhysical(heapRegionPhysical, heapRegionPhysicalEnd - heapRegionPhysical);
    KMemoryLayout::SetHeapRegion(KMemoryLayout::ToLinearVirtualAddress(heapRegionPhysical), heapRegionPhysicalEnd - heapRegionPhysical);
    KMemoryLayout::SetKernelRegionPhysical(NN_KERN_P_ADDR_CODE, g_InitPageAllocator.GetBase() - KPhysicalAddress(NN_KERN_P_ADDR_CODE));
    KMemoryLayout::SetDebugRegionPhysical(KMemoryLayout::GetHeapRegionPhysicalEnd(), NN_KERN_KTRACE_BUFFER_SIZE + NN_KERN_TRACE_BUFFER_SIZE);

    g_InitPageAllocator.Finalize();

    for (int coreNo = 0; coreNo < KCPU::NUM_CORE; coreNo++)
    {
        g_InitArgumentsAddress[coreNo] = reinterpret_cast<uintptr_t>(&g_InitArguments[coreNo]) - reinterpret_cast<uintptr_t>(_start) + NN_KERN_P_ADDR_CODE;
    }
    StoreDataCache(g_InitArgumentsAddress, sizeof(g_InitArgumentsAddress));

#if defined(NN_BUILD_CONFIG_HARDWARE_JETSONTX2) || defined(NN_BUILD_CONFIG_HARDWARE_JETSONTK2) || defined(NN_BUILD_CONFIG_HARDWARE_NX)
    // PSCI CPU On
    //  Target CPU
    //      Bits [40:63]: Must be zero
    //      Bits [32:39] Aff3 : Match Aff3 of target core MPIDR
    //      Bits [24:31] Must be zero
    //      Bits [16:23] Aff2 : Match Aff2 of target core MPIDR
    //      Bits [8:15] Aff1 : Match Aff1 of target core MPIDR
    //      Bits [0:7] Aff0 : Match Aff0 of target core MPIDR

    // 同じクラスタの CPU1～CPU3をOnにする
    Bit64 mpidr;
    HW_GET_MPIDR_EL1(mpidr);
    Bit64 mpidrAttr = (mpidr & ~(HW_MPIDR_EL1_AFF0_MASK | 0xFFFFFF00FF000000ul));

#if defined(NN_BUILD_CONFIG_HARDWARE_JETSONTK2)
    volatile Bit32& CLK_RST_CONTROLLER_RST_CPUG_CMPLX_CLR_0 = *reinterpret_cast<volatile Bit32*>(0x60006454);
    CLK_RST_CONTROLLER_RST_CPUG_CMPLX_CLR_0 = 0x000E000E;
#endif

    for (int i = 0; i < KCPU::NUM_CORE; i++)
    {
        if (static_cast<int>(mpidr & HW_MPIDR_EL1_AFF0_MASK) != i)
        {
            CpuOn(mpidrAttr | i, NN_KERN_P_ADDR_CODE);
        }
    }
#endif
}

void InitializeDebugRegister()
{
    Bit64 dfr0;
    HW_GET_ID_AA64DFR0_EL1(dfr0);
    int wrps = ((dfr0 >> 20) & 0xF);
    int brps = ((dfr0 >> 12) & 0xF);

    asm volatile ("dsb sy; isb");
    HW_SET_MDSCR_EL1(0ul);
    asm volatile ("dsb sy; isb");
    HW_SET_OSLAR_EL1(0ul);
    asm volatile ("dsb sy; isb");

    switch (wrps)
    {
    case 0xF:
        {
            HW_SET_DBGWCR_EL1(15, 0ul);
            HW_SET_DBGWVR_EL1(15, 0ul);
        } NN_FALL_THROUGH;
    case 0xE:
        {
            HW_SET_DBGWCR_EL1(14, 0ul);
            HW_SET_DBGWVR_EL1(14, 0ul);
        } NN_FALL_THROUGH;
    case 0xD:
        {
            HW_SET_DBGWCR_EL1(13, 0ul);
            HW_SET_DBGWVR_EL1(13, 0ul);
        } NN_FALL_THROUGH;
    case 0xC:
        {
            HW_SET_DBGWCR_EL1(12, 0ul);
            HW_SET_DBGWVR_EL1(12, 0ul);
        } NN_FALL_THROUGH;
    case 0xB:
        {
            HW_SET_DBGWCR_EL1(11, 0ul);
            HW_SET_DBGWVR_EL1(11, 0ul);
        } NN_FALL_THROUGH;
    case 0xA:
        {
            HW_SET_DBGWCR_EL1(10, 0ul);
            HW_SET_DBGWVR_EL1(10, 0ul);
        } NN_FALL_THROUGH;
    case 0x9:
        {
            HW_SET_DBGWCR_EL1(9, 0ul);
            HW_SET_DBGWVR_EL1(9, 0ul);
        } NN_FALL_THROUGH;
    case 0x8:
        {
            HW_SET_DBGWCR_EL1(8, 0ul);
            HW_SET_DBGWVR_EL1(8, 0ul);
        } NN_FALL_THROUGH;
    case 0x7:
        {
            HW_SET_DBGWCR_EL1(7, 0ul);
            HW_SET_DBGWVR_EL1(7, 0ul);
        } NN_FALL_THROUGH;
    case 0x6:
        {
            HW_SET_DBGWCR_EL1(6, 0ul);
            HW_SET_DBGWVR_EL1(6, 0ul);
        } NN_FALL_THROUGH;
    case 0x5:
        {
            HW_SET_DBGWCR_EL1(5, 0ul);
            HW_SET_DBGWVR_EL1(5, 0ul);
        } NN_FALL_THROUGH;
    case 0x4:
        {
            HW_SET_DBGWCR_EL1(4, 0ul);
            HW_SET_DBGWVR_EL1(4, 0ul);
        } NN_FALL_THROUGH;
    case 0x3:
        {
            HW_SET_DBGWCR_EL1(3, 0ul);
            HW_SET_DBGWVR_EL1(3, 0ul);
        } NN_FALL_THROUGH;
    case 0x2:
        {
            HW_SET_DBGWCR_EL1(2, 0ul);
            HW_SET_DBGWVR_EL1(2, 0ul);
        } NN_FALL_THROUGH;
    case 0x1:
        {
            HW_SET_DBGWCR_EL1(1, 0ul);
            HW_SET_DBGWVR_EL1(1, 0ul);

            HW_SET_DBGWCR_EL1(0, 0ul);
            HW_SET_DBGWVR_EL1(0, 0ul);
        } NN_FALL_THROUGH;
    default:
        break;
    }
    switch (brps)
    {
    case 0xF:
        {
            HW_SET_DBGBCR_EL1(15, 0ul);
            HW_SET_DBGBVR_EL1(15, 0ul);
        } NN_FALL_THROUGH;
    case 0xE:
        {
            HW_SET_DBGBCR_EL1(14, 0ul);
            HW_SET_DBGBVR_EL1(14, 0ul);
        } NN_FALL_THROUGH;
    case 0xD:
        {
            HW_SET_DBGBCR_EL1(13, 0ul);
            HW_SET_DBGBVR_EL1(13, 0ul);
        } NN_FALL_THROUGH;
    case 0xC:
        {
            HW_SET_DBGBCR_EL1(12, 0ul);
            HW_SET_DBGBVR_EL1(12, 0ul);
        } NN_FALL_THROUGH;
    case 0xB:
        {
            HW_SET_DBGBCR_EL1(11, 0ul);
            HW_SET_DBGBVR_EL1(11, 0ul);
        } NN_FALL_THROUGH;
    case 0xA:
        {
            HW_SET_DBGBCR_EL1(10, 0ul);
            HW_SET_DBGBVR_EL1(10, 0ul);
        } NN_FALL_THROUGH;
    case 0x9:
        {
            HW_SET_DBGBCR_EL1(9, 0ul);
            HW_SET_DBGBVR_EL1(9, 0ul);
        } NN_FALL_THROUGH;
    case 0x8:
        {
            HW_SET_DBGBCR_EL1(8, 0ul);
            HW_SET_DBGBVR_EL1(8, 0ul);
        } NN_FALL_THROUGH;
    case 0x7:
        {
            HW_SET_DBGBCR_EL1(7, 0ul);
            HW_SET_DBGBVR_EL1(7, 0ul);
        } NN_FALL_THROUGH;
    case 0x6:
        {
            HW_SET_DBGBCR_EL1(6, 0ul);
            HW_SET_DBGBVR_EL1(6, 0ul);
        } NN_FALL_THROUGH;
    case 0x5:
        {
            HW_SET_DBGBCR_EL1(5, 0ul);
            HW_SET_DBGBVR_EL1(5, 0ul);
        } NN_FALL_THROUGH;
    case 0x4:
        {
            HW_SET_DBGBCR_EL1(4, 0ul);
            HW_SET_DBGBVR_EL1(4, 0ul);
        } NN_FALL_THROUGH;
    case 0x3:
        {
            HW_SET_DBGBCR_EL1(3, 0ul);
            HW_SET_DBGBVR_EL1(3, 0ul);
        } NN_FALL_THROUGH;
    case 0x2:
        {
            HW_SET_DBGBCR_EL1(2, 0ul);
            HW_SET_DBGBVR_EL1(2, 0ul);
        } NN_FALL_THROUGH;
    case 0x1:
        {
            HW_SET_DBGBCR_EL1(1, 0ul);
            HW_SET_DBGBVR_EL1(1, 0ul);
        } NN_FALL_THROUGH;
    default:
        break;
    }
    HW_SET_DBGBCR_EL1(0, 0ul);
    HW_SET_DBGBVR_EL1(0, 0ul);

    asm volatile ("dsb sy; isb");
    HW_SET_CONTEXTIDR_EL1(0xfffffffful);

    asm volatile ("dsb sy; isb");
    HW_SET_MDSCR_EL1((1ul << 15) | (1ul << 12));
    asm volatile ("dsb sy; isb");
}

}}}}


