﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include "kern_AssemblyOffset.h"
#include "../ARM64/kern_Assembly.h"
#include "../ARM64/kern_RegisterDefinition.h"

#if !defined(KSCHEDULER_SCHEDULING_REQUIRED) || (KSCHEDULER_SCHEDULING_REQUIRED != 0)
#error KSCHEDULER_SCHEDULING_REQUIRED should be 0 for stlrb
#endif

#define SAVE_CONTEXT(ctx, tmp0, tmp1)                                   \
    mov     tmp0, sp;                                                   \
    mrs     tmp1, cpacr_el1;                                            \
    stp     x19, x20, [ctx, #(KCONTEXT_CPUREGISTERS + 0 * 8 * 2)];      \
    stp     x21, x22, [ctx, #(KCONTEXT_CPUREGISTERS + 1 * 8 * 2)];      \
    stp     x23, x24, [ctx, #(KCONTEXT_CPUREGISTERS + 2 * 8 * 2)];      \
    stp     x25, x26, [ctx, #(KCONTEXT_CPUREGISTERS + 3 * 8 * 2)];      \
    stp     x27, x28, [ctx, #(KCONTEXT_CPUREGISTERS + 4 * 8 * 2)];      \
    stp     x29, x30, [ctx, #(KCONTEXT_CPUREGISTERS + 5 * 8 * 2)];      \
    stp     tmp0, tmp1, [ctx, #(KCONTEXT_SP)];                          \
    and     tmp1, tmp1, #(HW_CPACR_EL1_FEN_MASK);                       \
    cbz     tmp1, .Lskip_fpusave;                                       \
    /* ---- FPCR, FPSR 保存 */                                          \
    mrs     tmp0, fpcr;                                                 \
    mrs     tmp1, fpsr;                                                 \
    stp     tmp0, tmp1, [ctx, #KCONTEXT_FPCR];                          \
    /* ---- FPU 数値レジスタ保存 */                                     \
    stp     q0, q1, [ctx, #KCONTEXT_FPUREGISTERS + 0 * 16 * 2];         \
    stp     q2, q3, [ctx, #KCONTEXT_FPUREGISTERS + 1 * 16 * 2];         \
    stp     q4, q5, [ctx, #KCONTEXT_FPUREGISTERS + 2 * 16 * 2];         \
    stp     q6, q7, [ctx, #KCONTEXT_FPUREGISTERS + 3 * 16 * 2];         \
    stp     q8, q9, [ctx, #KCONTEXT_FPUREGISTERS + 4 * 16 * 2];         \
    stp     q10, q11, [ctx, #KCONTEXT_FPUREGISTERS + 5 * 16 * 2];       \
    stp     q12, q13, [ctx, #KCONTEXT_FPUREGISTERS + 6 * 16 * 2];       \
    stp     q14, q15, [ctx, #KCONTEXT_FPUREGISTERS + 7 * 16 * 2];       \
    stp     q16, q17, [ctx, #KCONTEXT_FPUREGISTERS + 8 * 16 * 2];       \
    stp     q18, q19, [ctx, #KCONTEXT_FPUREGISTERS + 9 * 16 * 2];       \
    stp     q20, q21, [ctx, #KCONTEXT_FPUREGISTERS + 10 * 16 * 2];      \
    stp     q22, q23, [ctx, #KCONTEXT_FPUREGISTERS + 11 * 16 * 2];      \
    stp     q24, q25, [ctx, #KCONTEXT_FPUREGISTERS + 12 * 16 * 2];      \
    stp     q26, q27, [ctx, #KCONTEXT_FPUREGISTERS + 13 * 16 * 2];      \
    stp     q28, q29, [ctx, #KCONTEXT_FPUREGISTERS + 14 * 16 * 2];      \
    stp     q30, q31, [ctx, #KCONTEXT_FPUREGISTERS + 15 * 16 * 2];      \
.Lskip_fpusave:;                                                        \

#define RESTORE_CONTEXT(ctx, tmp0, tmp1)                                \
    ldp     tmp0, tmp1, [ctx, #(KCONTEXT_SP)];                          \
    mov     sp, tmp0;                                                   \
    ldp     x19, x20, [ctx, #(KCONTEXT_CPUREGISTERS + 0 * 8 * 2)];      \
    ldp     x21, x22, [ctx, #(KCONTEXT_CPUREGISTERS + 1 * 8 * 2)];      \
    ldp     x23, x24, [ctx, #(KCONTEXT_CPUREGISTERS + 2 * 8 * 2)];      \
    ldp     x25, x26, [ctx, #(KCONTEXT_CPUREGISTERS + 3 * 8 * 2)];      \
    ldp     x27, x28, [ctx, #(KCONTEXT_CPUREGISTERS + 4 * 8 * 2)];      \
    ldp     x29, x30, [ctx, #(KCONTEXT_CPUREGISTERS + 5 * 8 * 2)];      \
    msr     cpacr_el1, tmp1;                                            \
    isb;                                                                \
    and     tmp1, tmp1, #(HW_CPACR_EL1_FEN_MASK);                       \
    cbz     tmp1, .Lskip_fpurestore;                                    \
    /* ---- FPCR, FPSR 復帰 */                                          \
    ldp     tmp0, tmp1, [ctx, #KCONTEXT_FPCR];                          \
    msr     fpcr, tmp0;                                                 \
    msr     fpsr, tmp1;                                                 \
    /* ---- FPU 数値レジスタ保存 */                                     \
    ldp     q0, q1, [ctx, #KCONTEXT_FPUREGISTERS + 0 * 16 * 2];         \
    ldp     q2, q3, [ctx, #KCONTEXT_FPUREGISTERS + 1 * 16 * 2];         \
    ldp     q4, q5, [ctx, #KCONTEXT_FPUREGISTERS + 2 * 16 * 2];         \
    ldp     q6, q7, [ctx, #KCONTEXT_FPUREGISTERS + 3 * 16 * 2];         \
    ldp     q8, q9, [ctx, #KCONTEXT_FPUREGISTERS + 4 * 16 * 2];         \
    ldp     q10, q11, [ctx, #KCONTEXT_FPUREGISTERS + 5 * 16 * 2];       \
    ldp     q12, q13, [ctx, #KCONTEXT_FPUREGISTERS + 6 * 16 * 2];       \
    ldp     q14, q15, [ctx, #KCONTEXT_FPUREGISTERS + 7 * 16 * 2];       \
    ldp     q16, q17, [ctx, #KCONTEXT_FPUREGISTERS + 8 * 16 * 2];       \
    ldp     q18, q19, [ctx, #KCONTEXT_FPUREGISTERS + 9 * 16 * 2];       \
    ldp     q20, q21, [ctx, #KCONTEXT_FPUREGISTERS + 10 * 16 * 2];      \
    ldp     q22, q23, [ctx, #KCONTEXT_FPUREGISTERS + 11 * 16 * 2];      \
    ldp     q24, q25, [ctx, #KCONTEXT_FPUREGISTERS + 12 * 16 * 2];      \
    ldp     q26, q27, [ctx, #KCONTEXT_FPUREGISTERS + 13 * 16 * 2];      \
    ldp     q28, q29, [ctx, #KCONTEXT_FPUREGISTERS + 14 * 16 * 2];      \
    ldp     q30, q31, [ctx, #KCONTEXT_FPUREGISTERS + 15 * 16 * 2];      \
.Lskip_fpurestore:;                                                     \


    .text
    .balign 0x40
ENTRY(_ZN2nn4kern10KScheduler8ScheduleEPNS1_17SchedulingCounterE)
    // カレントコンテキストの保存
    // カレントスレッドの KContext へのポインタを計算
    add     x3, sp, #NN_KERN_THREAD_SVC_STACK_SIZE
    and     x3, x3, #(0 - NN_KERN_THREAD_SVC_STACK_SIZE)
    ldr     x10, [x3, #-(PARAMS_ON_STACK_SIZE - PARAMS_ON_STACK_CONTEXT)]

    SAVE_CONTEXT(x10, x5, x6)

    mov     x19, x0
    mov     x20, x1

    // スタック切り替え
    ldr     x5, [x20, #KSCHEDULER_IDLE_STASK]
    mov     sp, x5

.Lreschdule:
    //  x3: sp end
    // x10: ctx
    // x19: this
    // x20: SchedulingCounter
    ldrb    w5, [x20, #KSCHEDULER_INTERRUPTTASK_IS_RUNNABLE]
    cbz     w5, .Linterrupt_checked
    strb    wzr, [x20, #KSCHEDULER_INTERRUPTTASK_IS_RUNNABLE]
    mov     x0, x19
    mov     x21, x3
    mov     x22, x10
    bl      _ZN2nn4kern10KScheduler29InterruptTaskThreadToRunnableEv
    mov     x3, x21
    mov     x10, x22

.Linterrupt_checked:
    cbz     x3, .Lskip_unlock

    // 終了されたスレッドはロックを解除しない
    ldrb    w5, [x3, #-(PARAMS_ON_STACK_SIZE - PARAMS_ON_STACK_DPCFLAGS)]
    tbnz    w5, #DPC_FUNC_TERMINATED_SHIFT, .Lskip_unlock

    // ロック解除(タイミングは要調整)
    add     x5,  x10, #KCONTEXT_LOCK
    stlrb   wzr, [x5]

.Lskip_unlock:
    // this: x19
    // SchedulingCounter: x20
    clrex
    stlrb   wzr, [x20] // stlrb   wzr, [x20, #KSCHEDULER_SCHEDULING_REQUIRED]

    ldr     x21, [x20, #KSCHEDULER_HIGHEST_THREAD]
    cbz     x21, .Lswitch_idle

    mov     x0, x21
    bl      _ZN2nn4kern7KThread17GetContextPointerEv
    mov     x22, x0

    // 次のコンテキストの ロック獲得
    // x19: this
    // x20: SchedulingCounter
    // x21: pNextThread
    // x22: pNextCtx
    add     x1,  x22, #KCONTEXT_LOCK
    mov     w2, #1
.Llock_loop0:
    sevl
    prfm    pstl1keep, [x1]
.Llock_loop1:
    wfe
    ldaxrb  w3, [x1]
    cbnz    w3, .Lcheck_resh
    stxrb   w3, w2, [x1]
    cbnz    w3, .Llock_loop1
    b       .Llocked

.Lcheck_resh:
    ldarb   w3, [x20] // ldarb   w3, [x20, #KSCHEDULER_SCHEDULING_REQUIRED]
    cbz     w3, .Llock_loop0

    // アイドルにスイッチしてから割り込みチェック
    mov     x0, x19
    mov     x1, #0
    bl      _ZN2nn4kern10KScheduler12SwitchThreadEPNS0_7KThreadE
    mov     x3, #0
    b       .Lreschdule

.Llocked:
    mov     x0, x19
    mov     x1, x21
    bl      _ZN2nn4kern10KScheduler12SwitchThreadEPNS0_7KThreadE
    cbz     w0, .Lrestore_noctx

    mov     x0, x19
    mov     x1, x20
    mov     x10, x22
    RESTORE_CONTEXT(x10, x3, x4)
    b       .Lrestore_end

.Lrestore_noctx:
    // スケジューラで破壊したコンテキストを復帰
    mov     x0, x19
    mov     x1, x20
    mov     x10, x22
    ldp     x19, x20, [x10, #(KCONTEXT_CPUREGISTERS + 0 * 8 * 2)]
    ldp     x21, x22, [x10, #(KCONTEXT_CPUREGISTERS + 1 * 8 * 2)]
    ldp     x30, x4,  [x10, #(KCONTEXT_LR)]
    mov     sp, x4

.Lrestore_end:
    // x0: this
    // x1: SchedulingCounter
    // x10: ctx
    ldarb   w3, [x1] // ldarb   w3, [x1, #KSCHEDULER_SCHEDULING_REQUIRED]
    cbnz    w3, .Lrestore_without_restore

    ret

    // もう一度スケジューリングをやり直す
.Lrestore_without_restore:
    mov     x19, x0
    mov     x20, x1

    // スタック切り替え
    add     x3, sp, #NN_KERN_THREAD_SVC_STACK_SIZE
    and     x3, x3, #(0 - NN_KERN_THREAD_SVC_STACK_SIZE)

    ldr     x5, [x1, #KSCHEDULER_IDLE_STASK]
    mov     sp, x5
    b       .Lreschdule

.Lswitch_idle:
    // アイドルにスイッチ
    // x19: this
    // x20: SchedulingCounter
    mov     x0, x19
    mov     x1, #0
    bl      _ZN2nn4kern10KScheduler12SwitchThreadEPNS0_7KThreadE

.Lidle_loop:
    ldarb   w3, [x20] // ldarb   w3, [x20, #KSCHEDULER_SCHEDULING_REQUIRED]
    cbnz    w3, .Lidle_loop_end
    wfi
    msr     daifclr, #2
    msr     daifset, #2
    b       .Lidle_loop
.Lidle_loop_end:
    mov     x3, #0
    b       .Lreschdule
SET_SIZE(_ZN2nn4kern10KScheduler8ScheduleEPNS1_17SchedulingCounterE)

