﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <cstring>
#include <algorithm>
#include <nn/nn_SdkAssert.h>
#include <nn/nn_Common.h>
#include <nn/crypto/crypto_Config.h>
#include <nn/crypto/detail/crypto_Md5Impl.h>
#include <nn/crypto/detail/crypto_Clear.h>

#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))
#define I(x, y, z) ((y) ^ ((x) | (~z)))
#define ROTL(x, n) (((x) << (n)) | ((x) >> (32 - (n))))

namespace nn { namespace crypto { namespace detail {

namespace
{
    inline Bit32 CalcRound1(Bit32 a, Bit32 b, Bit32 c, Bit32 d, Bit32 x, Bit32 s, Bit32 t) NN_NOEXCEPT
    {
        return b + ROTL((a + F(b,c,d) + x + t), s);
    }

    inline Bit32 CalcRound2(Bit32 a, Bit32 b, Bit32 c, Bit32 d, Bit32 x, Bit32 s, Bit32 t) NN_NOEXCEPT
    {
        return b + ROTL((a + G(b,c,d) + x + t), s);
    }

    inline Bit32 CalcRound3(Bit32 a, Bit32 b, Bit32 c, Bit32 d, Bit32 x, Bit32 s, Bit32 t) NN_NOEXCEPT
    {
        return b + ROTL((a + H(b,c,d) + x + t), s);
    }

    inline Bit32 CalcRound4(Bit32 a, Bit32 b, Bit32 c, Bit32 d, Bit32 x, Bit32 s, Bit32 t) NN_NOEXCEPT
    {
        return b + ROTL((a + I(b,c,d) + x + t), s);
    }

    /* 32ビット整数列からリトルエンディアンバイト列へ */
    inline void Encode (Bit8* output, const Bit32* input, size_t len)
    {
#if defined(NN_BUILD_CONFIG_ENDIAN_BIG)
        size_t i;
        size_t j;

        for (i = 0, j = 0; j < len; i++, j += 4)
        {
            output[j] =   static_cast<Bit8>( input[i]        & 0xff);
            output[j + 1] = static_cast<Bit8>((input[i] >> 8)  & 0xff);
            output[j + 2] = static_cast<Bit8>((input[i] >> 16) & 0xff);
            output[j + 3] = static_cast<Bit8>((input[i] >> 24) & 0xff);
        }
#else
        std::memcpy(output, input, len);
#endif
    }
    /* リトルエンディアンバイト列から32ビット整数列へ */
    inline void Decode (Bit32* output, const Bit8* input, size_t len)
    {
#if defined(NN_BUILD_CONFIG_ENDIAN_BIG)
        size_t i;
        size_t j;

        for (i = 0, j = 0; j < len; i++, j += 4)
        {
            output[i] = (static_cast<Bit32>(input[j]))           | (static_cast<Bit32>(input[j + 1]) << 8) |
                        (static_cast<Bit32>(input[j + 2]) << 16) | (static_cast<Bit32>(input[j + 3]) << 24);
        }
#else
        std::memcpy(output, input, len);
#endif
    }
}   // anonymous namespace


Md5Impl::~Md5Impl() NN_NOEXCEPT
{
    ClearMemory(this, sizeof(*this));
}

void Md5Impl::Initialize() NN_NOEXCEPT
{
    m_x.p.a = 0x67452301;
    m_x.p.b = 0xefcdab89;
    m_x.p.c = 0x98badcfe;
    m_x.p.d = 0x10325476;
    m_size = 0;

    m_State = State_Initialized;
}

void Md5Impl::Update(const void* pData, size_t length) NN_NOEXCEPT
{
    NN_SDK_REQUIRES(m_State == State_Initialized, "Invalid state. Please restart from Initialize().");
    int i;
    const Bit8* p;

    size_t buffer_index = static_cast<size_t>(m_size & (BlockSize - 1)); // (length % BlockSize(=64));
    size_t buffer_space = BlockSize - buffer_index;
    m_size += length;

    /* 新たにバッファにコピーしてもブロックサイズに達しないデータ量の場合は、m_yにコピーして次回へ処理を送る */
    if (buffer_space > length)
    {
        if (length > 0)
        {
            std::memcpy(&(m_y[buffer_index]), pData, length);
        }
        return;
    }

    /* ブロックサイズ分に達するまでデータをコピー */
    std::memcpy(&(m_y[buffer_index]), pData, buffer_space);
    ProcessBlock();
    p = reinterpret_cast<const Bit8*>(reinterpret_cast<uintptr_t>(pData) + buffer_space);
    length -= buffer_space;

    /* ブロックサイズ以上の残りがある場合はブロックサイズごとに処理 */
    i = static_cast<int>(length >> BlockSizeShift); // length / BlockSize(=64)
    for (; i > 0; i--)
    {
        std::memcpy(m_y, p, BlockSize);
        p += BlockSize;
        ProcessBlock();
    }

    /* ブロックサイズ以下の端数は次の処理のために保存しておく */
    length &= (BlockSize - 1); // length % BlockSize(=64)
    if (length > 0)
    {
        std::memcpy(m_y, p, length);
    }
}

void Md5Impl::GetHash(void* pHash, size_t hashSize) NN_NOEXCEPT
{
    NN_SDK_REQUIRES((m_State == State_Initialized) || (m_State == State_Done), "Invalid state. Please restart from Initialize().");
    NN_SDK_REQUIRES(hashSize >= HashSize, "It requires %d bytes buffer", HashSize);
    NN_UNUSED(hashSize);

    if (m_State == State_Initialized)
    {
        ProcessLastBlock();
        m_State = State_Done;
    }

    Encode(reinterpret_cast<Bit8 *>(pHash), m_x.state, HashSize);
}

void Md5Impl::ProcessBlock() NN_NOEXCEPT
{
    static const Bit32 t[] =
    {
        0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
        0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
        0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
        0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
        0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
        0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
        0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
        0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
        0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
        0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
        0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
        0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
        0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
        0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
        0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
        0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
    };

    static const Bit32 k[] =
    {
         1,  6, 11,  0,
         5, 10, 15,  4,
         9, 14,  3,  8,
        13,  2,  7, 12,
         5,  8, 11, 14,
         1,  4,  7, 10,
        13,  0,  3,  6,
         9, 12, 15,  2,
         0,  7, 14,  5,
        12,  3, 10,  1,
         8, 15,  6, 13,
         4, 11,  2,  9,
    };
    Bit32 x[BlockSize / sizeof(Bit32)];
    const Bit32 *kp = k;
    const Bit32 *tp = t;
    const Bit32 *xp = x;
    int j;

    Bit32 a = m_x.p.a;
    Bit32 b = m_x.p.b;
    Bit32 c = m_x.p.c;
    Bit32 d = m_x.p.d;

    Decode (x, m_y, BlockSize);

    // /* Round 1. */
    // /* Let [abcd k s i] denote the operation
    //      a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
    // /* Do the following 16 operations. */
    // [ABCD  0  7  1]  [DABC  1 12  2]  [CDAB  2 17  3]  [BCDA  3 22  4]
    // [ABCD  4  7  5]  [DABC  5 12  6]  [CDAB  6 17  7]  [BCDA  7 22  8]
    // [ABCD  8  7  9]  [DABC  9 12 10]  [CDAB 10 17 11]  [BCDA 11 22 12]
    // [ABCD 12  7 13]  [DABC 13 12 14]  [CDAB 14 17 15]  [BCDA 15 22 16]

    for (j = 0; j < 4; j++)
    {
        a = CalcRound1(a, b, c, d, *xp++,  7, *tp++);
        d = CalcRound1(d, a, b, c, *xp++, 12, *tp++);
        c = CalcRound1(c, d, a, b, *xp++, 17, *tp++);
        b = CalcRound1(b, c, d, a, *xp++, 22, *tp++);
    }

    // /* Round 2. */
    // /* Let [abcd k s i] denote the operation
    //      a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */
    // /* Do the following 16 operations. */
    // [ABCD  1  5 17]  [DABC  6  9 18]  [CDAB 11 14 19]  [BCDA  0 20 20]
    // [ABCD  5  5 21]  [DABC 10  9 22]  [CDAB 15 14 23]  [BCDA  4 20 24]
    // [ABCD  9  5 25]  [DABC 14  9 26]  [CDAB  3 14 27]  [BCDA  8 20 28]
    // [ABCD 13  5 29]  [DABC  2  9 30]  [CDAB  7 14 31]  [BCDA 12 20 32]

    for (j = 0; j < 4; j++)
    {
        a = CalcRound2(a, b, c, d, x[*kp++],  5, *tp++);
        d = CalcRound2(d, a, b, c, x[*kp++],  9, *tp++);
        c = CalcRound2(c, d, a, b, x[*kp++], 14, *tp++);
        b = CalcRound2(b, c, d, a, x[*kp++], 20, *tp++);
    }

    // /* Round 3. */
    // /* Let [abcd k s t] denote the operation
    //      a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */
    // /* Do the following 16 operations. */
    // [ABCD  5  4 33]  [DABC  8 11 34]  [CDAB 11 16 35]  [BCDA 14 23 36]
    // [ABCD  1  4 37]  [DABC  4 11 38]  [CDAB  7 16 39]  [BCDA 10 23 40]
    // [ABCD 13  4 41]  [DABC  0 11 42]  [CDAB  3 16 43]  [BCDA  6 23 44]
    // [ABCD  9  4 45]  [DABC 12 11 46]  [CDAB 15 16 47]  [BCDA  2 23 48]

    for (j = 0; j < 4; j++)
    {
        a = CalcRound3(a, b, c, d, x[*kp++],  4, *tp++);
        d = CalcRound3(d, a, b, c, x[*kp++], 11, *tp++);
        c = CalcRound3(c, d, a, b, x[*kp++], 16, *tp++);
        b = CalcRound3(b, c, d, a, x[*kp++], 23, *tp++);
    }

    // /* Round 4. */
    // /* Let [abcd k s t] denote the operation
    //      a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */
    // /* Do the following 16 operations. */
    // [ABCD  0  6 49]  [DABC  7 10 50]  [CDAB 14 15 51]  [BCDA  5 21 52]
    // [ABCD 12  6 53]  [DABC  3 10 54]  [CDAB 10 15 55]  [BCDA  1 21 56]
    // [ABCD  8  6 57]  [DABC 15 10 58]  [CDAB  6 15 59]  [BCDA 13 21 60]
    // [ABCD  4  6 61]  [DABC 11 10 62]  [CDAB  2 15 63]  [BCDA  9 21 64]

    for (j = 0; j < 4; j++)
    {
        a = CalcRound4(a, b, c, d, x[*kp++],  6, *tp++);
        d = CalcRound4(d, a, b, c, x[*kp++], 10, *tp++);
        c = CalcRound4(c, d, a, b, x[*kp++], 15, *tp++);
        b = CalcRound4(b, c, d, a, x[*kp++], 21, *tp++);
    }

    m_x.p.a += a;
    m_x.p.b += b;
    m_x.p.c += c;
    m_x.p.d += d;
}

void Md5Impl::ProcessLastBlock() NN_NOEXCEPT
{
    uint64_t  total_length = static_cast<uint64_t>(m_size << 3); // bytes to bits
    size_t buffer_index;
    size_t buffer_space;
    Bit32 total_length_context[2];
    static const Bit8 padding = 0x80;

    total_length_context[0] = static_cast<Bit32>(total_length);
    total_length_context[1] = static_cast<Bit32>(total_length >> 32);

    Update(&padding, sizeof(Bit8));
    buffer_index = static_cast<size_t>(m_size & (BlockSize - 1)); // (length % 64);
    buffer_space = BlockSize - buffer_index;
    if (buffer_space < sizeof(uint64_t))
    {
        std::memset(&(m_y[buffer_index]), 0x00, buffer_space);
        ProcessBlock();
        buffer_index = 0;
        buffer_space = BlockSize;
    }
    if (buffer_space > sizeof(uint64_t))
    {
        std::memset(&(m_y[buffer_index]), 0x00, buffer_space - sizeof(uint64_t));
    }
    Encode(&(m_y[BlockSize - sizeof(uint64_t)]), total_length_context, sizeof(total_length_context));

    ProcessBlock();
}

}}}
