﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include "TestVmJitAarch64.h"

#include <nn/nn_Common.h>
#include <memory>
#include "./Utility.h"

namespace nn { namespace jit { namespace testvm {

#ifdef __INTELLISENSE__

    #define BEGIN_TEMPLATE \
        int32_t*& currentStack = *pCurrentStack; \
        int32_t& currentPc = *pPc;

    #define END_TEMPLATE

#else

    #define BEGIN_TEMPLATE \
        register int32_t* currentStack asm("x8"); \
        register int32_t currentPc asm("w9"); \
        asm volatile ("ldr %0, [%1]":"=r"(currentStack):"r"(pCurrentStack)); \
        asm volatile ("ldr %w0, [%1]":"=r"(currentPc):"r"(pPc));

    #define END_TEMPLATE \
        asm volatile ("str %0, [%1]"::"r"(currentStack), "r"(pCurrentStack)); \
        asm volatile ("str %w0, [%1]"::"r"(currentPc), "r"(pPc));

#endif

namespace {

NN_FORCEINLINE int32_t PopImpl(int32_t** pCurrentStack) NN_NOEXCEPT
{
    auto& currentStack = *pCurrentStack;
    --currentStack;
    return *currentStack;
}

NN_FORCEINLINE void PushImpl(int32_t** pCurrentStack, int32_t v) NN_NOEXCEPT
{
    auto& currentStack = *pCurrentStack;
    *(currentStack++) = v;
}

}

NN_NOINLINE void NopTemplate(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction) NN_NOEXCEPT
{
    BEGIN_TEMPLATE
    {
        ++currentPc;
    }
    END_TEMPLATE
}

const Bit32 g_NopCode[] =
{
    0x11000529, // add w9, w9, #0x1
};

NN_NOINLINE void ImmTemplate(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction) NN_NOEXCEPT
{
    BEGIN_TEMPLATE
    {
        auto inst = pInstruction[currentPc];
        PushImpl(&currentStack, inst.imm);
        ++currentPc;
    }
    END_TEMPLATE
}

const Bit32 g_ImmCode[] =
{
    0x8b29c84a, // add x10, x2, w9, sxtw #2
    0x11000529, // add w9, w9, #0x1
    0x79c0054a, // ldrsh w10, [x10,#2]
    0xb800450a, // str w10, [x8],#4
};

NN_NOINLINE void ReadTemplate(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction) NN_NOEXCEPT
{
    BEGIN_TEMPLATE
    {
        auto inst = pInstruction[currentPc];
        auto x = currentStack[-inst.imm];
        PushImpl(&currentStack, x);
        ++currentPc;
    }
    END_TEMPLATE
}

const Bit32 g_ReadCode[] =
{
    0x8b29c84a, // add x10, x2, w9, sxtw #2
    0x11000529, // add w9, w9, #0x1
    0x7980054a, // ldrsh x10, [x10,#2]
    0xcb0a090a, // sub x10, x8, x10, lsl #2
    0xb940014b, // ldr w11, [x10]
    0x9100110a, // add x10, x8, #0x4
    0xb900010b, // str w11, [x8]
    0xaa0a03e8, // mov x8, x10
};

NN_NOINLINE void WriteTemplate(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction) NN_NOEXCEPT
{
    BEGIN_TEMPLATE
    {
        auto inst = pInstruction[currentPc];
        auto& target = currentStack[-inst.imm];
        auto x = PopImpl(&currentStack);
        target = x;
        ++currentPc;
    }
    END_TEMPLATE
}

const Bit32 g_WriteCode[] =
{
    0x8b29c84a, // add x10, x2, w9, sxtw #2
    0x11000529, // add w9, w9, #0x1
    0x7980054a, // ldrsh x10, [x10,#2]
    0xcb0a090a, // sub x10, x8, x10, lsl #2
    0xb85fcd0b, // ldr w11, [x8,#-4]!
    0xb900014b, // str w11, [x10]
};

NN_NOINLINE void PopTemplate(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction) NN_NOEXCEPT
{
    BEGIN_TEMPLATE
    {
        PopImpl(&currentStack);
        ++currentPc;
    }
    END_TEMPLATE
}

const Bit32 g_PopCode[] =
{
    0xd1001108, // sub x8, x8, #0x4
    0x11000529, // add w9, w9, #0x1
};

NN_NOINLINE void DupTemplate(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction) NN_NOEXCEPT
{
    BEGIN_TEMPLATE
    {
        auto x = PopImpl(&currentStack);
        PushImpl(&currentStack, x);
        PushImpl(&currentStack, x);
        ++currentPc;
    }
    END_TEMPLATE
}

const Bit32 g_DupCode[] =
{
    0xb85fc10b, // ldur w11, [x8,#-4]
    0x9100110a, // add x10, x8, #0x4
    0x11000529, // add w9, w9, #0x1
    0xb900010b, // str w11, [x8]
    0xaa0a03e8, // mov x8, x10
};

NN_NOINLINE void SwapTemplate(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction) NN_NOEXCEPT
{
    BEGIN_TEMPLATE
    {
        auto x = PopImpl(&currentStack);
        auto y = PopImpl(&currentStack);
        PushImpl(&currentStack, x);
        PushImpl(&currentStack, y);
        ++currentPc;
    }
    END_TEMPLATE
}

const Bit32 g_SwapCode[] =
{
    0x11000529, // add w9, w9, #0x1
    0x297f2d0a, // ldp w10, w11, [x8,#-8]
    0x293f290b, // stp w11, w10, [x8,#-8]
};

NN_NOINLINE void AddTemplate(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction) NN_NOEXCEPT
{
    BEGIN_TEMPLATE
    {
        auto x = PopImpl(&currentStack);
        auto y = PopImpl(&currentStack);
        PushImpl(&currentStack, x + y);
        ++currentPc;
    }
    END_TEMPLATE
}

const Bit32 g_AddCode[] =
{
    0x11000529, // add w9, w9, #0x1
    0xb85fcd0a, // ldr w10, [x8,#-4]!
    0xb85fc10b, // ldur w11, [x8,#-4]
    0x0b0a016a, // add w10, w11, w10
    0xb81fc10a, // stur w10, [x8,#-4]
};

NN_NOINLINE void MulTemplate(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction) NN_NOEXCEPT
{
    BEGIN_TEMPLATE
    {
        auto x = PopImpl(&currentStack);
        auto y = PopImpl(&currentStack);
        PushImpl(&currentStack, x * y);
        ++currentPc;
    }
    END_TEMPLATE
}

const Bit32 g_MulCode[] =
{
    0x11000529, // add w9, w9, #0x1
    0xb85fcd0a, // ldr w10, [x8,#-4]!
    0xb85fc10b, // ldur w11, [x8,#-4]
    0x1b0a7d6a, // mul w10, w11, w10
    0xb81fc10a, // stur w10, [x8,#-4]
};

NN_NOINLINE void NegTemplate(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction) NN_NOEXCEPT
{
    BEGIN_TEMPLATE
    {
        auto x = PopImpl(&currentStack);
        PushImpl(&currentStack, -x);
        ++currentPc;
    }
    END_TEMPLATE
}

const Bit32 g_NegCode[] =
{
    0xb85fc10a, // ldur w10, [x8,#-4]
    0x11000529, // add w9, w9, #0x1
    0x4b0a03ea, // neg w10, w10
    0xb81fc10a, // stur w10, [x8,#-4]
};

NN_NOINLINE void NotTemplate(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction) NN_NOEXCEPT
{
    BEGIN_TEMPLATE
    {
        auto x = PopImpl(&currentStack);
        PushImpl(&currentStack, x == 0 ? 1 : 0);
        ++currentPc;
    }
    END_TEMPLATE
}

const Bit32 g_NotCode[] =
{
    0xb85fc10a, // ldur w10, [x8,#-4]
    0x7100015f, // cmp w10, #0x0
    0x1a9f17ea, // cset w10, eq
    0x11000529, // add w9, w9, #0x1
    0xb81fc10a, // stur w10, [x8,#-4]
};

typedef void (*TemplateFunction)(int32_t* pPc, int32_t** pCurrentStack, const Instruction* pInstruction);

template <size_t N>
size_t WriteCode(Bit32** pp, const Bit32 (&code)[N]) NN_NOEXCEPT
{
    auto& p = *pp;
    for (auto i = 0u; i < N; ++i)
    {
        *(p++) = code[i];
    }
    return N * sizeof(Bit32);
}

size_t WriteInstruction(Bit32** pp, Instruction inst) NN_NOEXCEPT
{
    switch (inst.kind)
    {
        case InstructionKind::Nop:
        case InstructionKind::Label:
        {
            return WriteCode(pp, g_NopCode);
        }
        case InstructionKind::Imm:
        {
            return WriteCode(pp, g_ImmCode);
        }
        case InstructionKind::Read:
        {
            return WriteCode(pp, g_ReadCode);
        }
        case InstructionKind::Write:
        {
            return WriteCode(pp, g_WriteCode);
        }
        case InstructionKind::Pop:
        {
            return WriteCode(pp, g_PopCode);
        }
        case InstructionKind::Dup:
        {
            return WriteCode(pp, g_DupCode);
        }
        case InstructionKind::Swap:
        {
            return WriteCode(pp, g_SwapCode);
        }
        case InstructionKind::Add:
        {
            return WriteCode(pp, g_AddCode);
        }
        case InstructionKind::Mul:
        {
            return WriteCode(pp, g_MulCode);
        }
        case InstructionKind::Neg:
        {
            return WriteCode(pp, g_NegCode);
        }
        case InstructionKind::Not:
        {
            return WriteCode(pp, g_NotCode);
        }
        default: NN_UNEXPECTED_DEFAULT;
    }
}

size_t WriteBlockContents(Bit32** pp, const Instruction* instructions, int32_t* pPc) NN_NOEXCEPT
{
    auto& pc = *pPc;
    auto ret = 0;
    for (;;)
    {
        auto inst = instructions[pc];
        switch (inst.kind)
        {
            case InstructionKind::End:
            case InstructionKind::JmpR:
            case InstructionKind::JmpRIf0:
            case InstructionKind::Call:
            case InstructionKind::Ret:
            {
                return ret;
            }
            default:
            {
                ret += WriteInstruction(pp, inst);
                ++pc;
                continue;
            }
        }
    }
}

const Bit32 g_BeginCode[] =
{
    0xf9400028, // ldr x8, [x1]
    0xb9400009, // ldr w9, [x0]
};

const Bit32 g_EndCode[] =
{
    0xf9000028, // str x8, [x1]
    0xb9000009, // str w9, [x0]
    0xd65f03c0, // ret
};

size_t GenerateBlock(uint32_t* pInstructionCount, char* buffer, const Instruction* instructions, int32_t pc) NN_NOEXCEPT
{
    auto currentPc = pc;
    size_t ret = 0;
    auto p = reinterpret_cast<Bit32*>(buffer);
    ret += WriteCode(&p, g_BeginCode);
    ret += WriteBlockContents(&p, instructions, &currentPc);
    ret += WriteCode(&p, g_EndCode);
    *pInstructionCount = currentPc - pc;
    return ret;
}

void RunJitCode(void* p, MachineContext* pContext) NN_NOEXCEPT
{
    auto f = reinterpret_cast<TemplateFunction>(p);
    auto currentStack = pContext->state.stack + pContext->state.stackCount;
    f(&pContext->pc, &currentStack, pContext->program.instructions);
    pContext->state.stackCount = currentStack - pContext->state.stack;
}

JitRunner::JitRunner(MachineContext* pContext) NN_NOEXCEPT
    : m_pContext(pContext)
    , m_Infos(new Info[pContext->program.instructionCount])
{
}

int32_t JitRunner::Pop() NN_NOEXCEPT
{
    auto& n = m_pContext->state.stackCount;
    if (n == 0)
    {
        return 0;
    }
    --n;
    auto ret = m_pContext->state.stack[n];
    m_pContext->state.stack[n] = 0xCCCCCCCC;
    return ret;
}

void JitRunner::Push(int32_t x) NN_NOEXCEPT
{
    auto& n = m_pContext->state.stackCount;
    if (n == m_pContext->state.stackCountMax)
    {
        return;
    }
    m_pContext->state.stack[n++] = x;
}

void JitRunner::Jump(int32_t pc) NN_NOEXCEPT
{
    m_pContext->pc = pc;
}
void JitRunner::JumpRelative(int32_t d) NN_NOEXCEPT
{
    Jump(m_pContext->pc + d - 1);
}

void JitRunner::Run() NN_NOEXCEPT
{
    auto& pc = m_pContext->pc;
    for (;;)
    {
        auto inst = m_pContext->program.instructions[pc];
        switch (inst.kind)
        {
            case InstructionKind::End:
            {
                return;
            }
            case InstructionKind::JmpR:
            {
                ++pc;
                auto d = Pop();
                JumpRelative(d);
                continue;
            }
            case InstructionKind::JmpRIf0:
            {
                ++pc;
                auto d = Pop();
                auto x = Pop();
                if (x == 0)
                {
                    JumpRelative(d);
                }
                continue;
            }
            case InstructionKind::Call:
            {
                ++pc;
                auto d = Pop();
                Push(pc);
                JumpRelative(d);
                continue;
            }
            case InstructionKind::Ret:
            {
                auto newPc = Pop();
                Jump(newPc);
                continue;
            }
            default:
            {
                auto& info = m_Infos[pc];
                if (!info.f)
                {
                    size_t generatedCodeSize;
                    uint32_t instructionCount;
                    info.f = this->GenerateCode(&generatedCodeSize, &instructionCount, pc);
                    NN_UNUSED(instructionCount);

                    InvalidateInstructionCacheAndMemoryBarrier(reinterpret_cast<uintptr_t>(info.f), generatedCodeSize);
                }
                RunJitCode(info.f, m_pContext);
                continue;
            }
        }
    }
}

void JitRunner::GenerateCodeImpl(size_t* pByteSize, uint32_t* pInstructionCount, char* buffer, const Instruction* instructions, int32_t pc) NN_NOEXCEPT
{
    *pByteSize = GenerateBlock(pInstructionCount, buffer, instructions, pc);
}

void JitRunner::GenerateCodeImpl(size_t* pByteSize, uint32_t* pInstructionCount, char* buffer, int32_t pc) NN_NOEXCEPT
{
    GenerateCodeImpl(pByteSize, pInstructionCount, buffer, m_pContext->program.instructions, pc);
}

}}}
