﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <cstring>
#include <algorithm>
#include <nn/nn_Common.h>
#include <nn/nn_SdkAssert.h>
#include <nn/crypto/crypto_AesEncryptor.h>
#include <nn/crypto/detail/crypto_CbcMacImpl.h>
#include "crypto_UpdateImpl.h"

#include <arm_neon.h>

namespace nn { namespace crypto { namespace detail {

void CbcMacImpl::UpdateGeneric(const void* pData, size_t dataSize) NN_NOEXCEPT
{
    NN_SDK_REQUIRES(m_State == State_Initialized, "Invalid state. Please restart from Initialize().");

    UpdateImpl<void>(this, pData, dataSize); // void is dummy template parameter
}

void CbcMacImpl::ProcessBlocksGeneric(const void* pData, int numBlocks) NN_NOEXCEPT
{
    /* バッファが埋まっていたら先に処理する */
    if (m_BufferedByte == BlockSize)
    {
        ProcessBlock(m_TemporalBlockBuffer);
        m_BufferedByte = 0;
    }

    const uint8_t* pData8 = static_cast<const uint8_t*>(pData);

    while (--numBlocks > 0)
    {
        /* 入力をベクタに変換 */
        uint8x16_t tmp = vld1q_u8(static_cast<const uint8_t*>(pData8));

        /* 現在の MAC (= CBC モードでの IV) と入力の XOR を取り */
        tmp = veorq_u8(tmp, vld1q_u8(m_Mac));

        /* 暗号化した結果が新しい IV (= MAC) になる */
        m_pCipherFunction(m_Mac, &tmp, m_pCipherContext);

        pData8 += BlockSize;
    }

    // 最後のブロックは平文として保存しておく
    std::memcpy(m_TemporalBlockBuffer, pData8, BlockSize);
    m_BufferedByte = BlockSize;
}

template <>
void CbcMacImpl::Update<AesEncryptor128>(const void* pData, size_t dataSize) NN_NOEXCEPT
{
    NN_SDK_REQUIRES(m_State == State_Initialized, "Invalid state. Please restart from Initialize().");

    UpdateImpl<AesEncryptor128>(this, pData, dataSize);
}

template <>
void CbcMacImpl::ProcessBlocks<AesEncryptor128>(const void* pData, int numBlocks) NN_NOEXCEPT
{
    /* Optimized implementation for 128bit AES */

    /* バッファが埋まっていたら先に処理する */
    if (m_BufferedByte == BlockSize)
    {
        ProcessBlock(m_TemporalBlockBuffer);
        m_BufferedByte = 0;
    }

    const uint8_t* pData8 = static_cast<const uint8_t*>(pData);

    // Preload round keys
    const uint8_t* keys = static_cast<const AesEncryptor128*>(m_pCipherContext)->GetRoundKey();
    const uint8x16_t key0  = vld1q_u8(keys);
    const uint8x16_t key1  = vld1q_u8(keys + 16);
    const uint8x16_t key2  = vld1q_u8(keys + 16 * 2);
    const uint8x16_t key3  = vld1q_u8(keys + 16 * 3);
    const uint8x16_t key4  = vld1q_u8(keys + 16 * 4);
    const uint8x16_t key5  = vld1q_u8(keys + 16 * 5);
    const uint8x16_t key6  = vld1q_u8(keys + 16 * 6);
    const uint8x16_t key7  = vld1q_u8(keys + 16 * 7);
    const uint8x16_t key8  = vld1q_u8(keys + 16 * 8);
    const uint8x16_t key9  = vld1q_u8(keys + 16 * 9);
    const uint8x16_t key10 = vld1q_u8(keys + 16 * 10);

    uint8x16_t mask = vld1q_u8(m_Mac);
    while (--numBlocks > 0)
    {
        /* 入力をベクタに変換 */
        uint8x16_t tmp = vld1q_u8(static_cast<const uint8_t*>(pData8));

        /* 現在の MAC (= CBC モードでの IV) と入力の XOR を取り */
        tmp = veorq_u8(tmp, mask);

        /* 暗号化する */
        tmp = vaesmcq_u8(vaeseq_u8(tmp, key0));
        tmp = vaesmcq_u8(vaeseq_u8(tmp, key1));
        tmp = vaesmcq_u8(vaeseq_u8(tmp, key2));
        tmp = vaesmcq_u8(vaeseq_u8(tmp, key3));
        tmp = vaesmcq_u8(vaeseq_u8(tmp, key4));
        tmp = vaesmcq_u8(vaeseq_u8(tmp, key5));
        tmp = vaesmcq_u8(vaeseq_u8(tmp, key6));
        tmp = vaesmcq_u8(vaeseq_u8(tmp, key7));
        tmp = vaesmcq_u8(vaeseq_u8(tmp, key8));
        tmp = vaeseq_u8(tmp, key9);
        tmp = veorq_u8(tmp, key10);

        mask = tmp;

        pData8 += BlockSize;
    }

    /* この時点の mask が MAC になる */
    vst1q_u8(m_Mac, mask);

    // 最後のブロックは平文として保存しておく
    std::memcpy(m_TemporalBlockBuffer, pData8, BlockSize);
    m_BufferedByte = BlockSize;
}

}}} // namespace nn::crypto::detail
