﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <nn/g3d/g3d_ShapeObj.h>
#include <nn/g3d/g3d_SkeletonObj.h>
#include <nn/gfx/util/gfx_ObjectDebugLabel.h>
#include <nn/util/util_BitFlagSet.h>
#include <nn/util/util_FloatFormat.h>

namespace nn { namespace g3d {

void Sphere::Transform(const Sphere& sphere, const nn::util::Matrix4x3fType& mtx) NN_NOEXCEPT
{
    VectorTransform(&this->center, sphere.center, mtx);
    nn::util::Vector3fType vectorScale;
    MatrixExtractScaleBase(&vectorScale, mtx);
    float scaleMax = std::max(std::max(VectorGetX(vectorScale), VectorGetY(vectorScale)), VectorGetZ(vectorScale));
    this->radius = sphere.radius * scaleMax;
}

// VS2013の64bit版での最適化で不具合が発生するので、原因と思われる箇所の最適化を無効にします。
#if(_MSC_VER == 1800)
#if defined(_M_X64)
#pragma optimize("",off)
#endif
#endif
void Sphere::Merge(const Sphere& lhs, const Sphere& rhs) NN_NOEXCEPT
{
    nn::util::Vector3fType diff;
    VectorSubtract(&diff, rhs.center, lhs.center);
    float distSq = VectorLengthSquared(diff);
    float radiusDiff = rhs.radius - lhs.radius;
    float epsilon = std::max(radiusDiff * radiusDiff, 0.0001f);
    if (distSq < epsilon)
    {
        // 一方が他方を包含する場合、もしくは中心が限りなく近い場合。
        *this = lhs.radius > rhs.radius ? lhs : rhs;
    }
    else
    {
        float dist = distSq * nn::util::Rsqrt(distSq);
        float newRadius = 0.5f * (lhs.radius + rhs.radius + dist);
        VectorMultiply(&diff, diff, (newRadius - lhs.radius) / dist);
        VectorAdd(&this->center, lhs.center, diff);
        this->radius = newRadius;
    }
}
// 最適化を有効に戻します。
#if(_MSC_VER == 1800)
#if defined(_M_X64)
#pragma optimize("",on)
#endif
#endif

void Aabb::Set(const nn::util::Vector3fType* pPointArray, int count) NN_NOEXCEPT
{
    nn::util::Vector3fType minP, maxP;
    minP = maxP = pPointArray[0];
    for (int idxPoint = 0; idxPoint < count; ++idxPoint)
    {
        VectorSet(&minP,
                  std::min(VectorGetX(minP), VectorGetX(pPointArray[idxPoint])),
                  std::min(VectorGetY(minP), VectorGetY(pPointArray[idxPoint])),
                  std::min(VectorGetZ(minP), VectorGetZ(pPointArray[idxPoint])));

        VectorSet(&maxP,
                  std::max(VectorGetX(maxP), VectorGetX(pPointArray[idxPoint])),
                  std::max(VectorGetY(maxP), VectorGetY(pPointArray[idxPoint])),
                  std::max(VectorGetZ(maxP), VectorGetZ(pPointArray[idxPoint])));
    }
    min = minP;
    max = maxP;
}

void Aabb::Transform(const Bounding& aabb, const nn::util::Matrix4x3fType& mtx) NN_NOEXCEPT
{
    nn::util::Vector3fType center, extent;

    VectorLoad(&center, aabb.center);
    VectorTransform(&center, center, mtx);
    nn::util::Vector3fType axis;
    MatrixGetAxisX(&axis, mtx);
    VectorSet(&axis,
              std::abs(VectorGetX(axis)),
              std::abs(VectorGetY(axis)),
              std::abs(VectorGetZ(axis)));
    VectorMultiply(&extent, axis, aabb.extent.v[0]);
    MatrixGetAxisY(&axis, mtx);
    VectorSet(&axis,
              std::abs(VectorGetX(axis)),
              std::abs(VectorGetY(axis)),
              std::abs(VectorGetZ(axis)));
    VectorMultiply(&axis, axis, aabb.extent.v[1]);
    VectorAdd(&extent, extent, axis);
    MatrixGetAxisZ(&axis, mtx);
    VectorSet(&axis,
              std::abs(VectorGetX(axis)),
              std::abs(VectorGetY(axis)),
              std::abs(VectorGetZ(axis)));
    VectorMultiply(&axis, axis, aabb.extent.v[2]);
    VectorAdd(&extent, extent, axis);

    VectorSubtract(&this->min, center, extent);
    VectorAdd(&this->max, center, extent);
}

void Aabb::Merge(const Aabb& lhs, const Aabb& rhs) NN_NOEXCEPT
{
    VectorSet(&this->min,
              std::min(VectorGetX(lhs.min), VectorGetX(rhs.min)),
              std::min(VectorGetY(lhs.min), VectorGetY(rhs.min)),
              std::min(VectorGetZ(lhs.min), VectorGetZ(rhs.min)));

    VectorSet(&this->max,
              std::max(VectorGetX(lhs.max), VectorGetX(rhs.max)),
              std::max(VectorGetY(lhs.max), VectorGetY(rhs.max)),
              std::max(VectorGetZ(lhs.max), VectorGetZ(rhs.max)));
}

void Plane::Set(const nn::util::Vector3fType& p0, const nn::util::Vector3fType& p1, const nn::util::Vector3fType& p2) NN_NOEXCEPT
{
    // 法線方向から見て p0, p1, p2 の順に時計回り。
    nn::util::Vector3fType v0, v1, v2;
    VectorSubtract(&v0, p2, p0);
    VectorSubtract(&v1, p1, p0);
    VectorCross(&v2, v0, v1);
    VectorNormalize(&normal, v2);
    dist = - VectorDot(normal, p0);
}

void ViewVolume::SetPerspective(
    float fovy, float aspect, float zNear, float zFar, const nn::util::Matrix4x3fType& viewToWorld) NN_NOEXCEPT
{
    float yNear = zNear * nn::util::TanTable(nn::util::RadianToAngleIndex(fovy * 0.5f));
    float xNear = yNear * aspect;
    SetFrustum(yNear, -yNear, -xNear, xNear, zNear, zFar, viewToWorld);
}

void ViewVolume::SetFrustum(
    float top, float bottom, float left, float right, float zNear, float zFar,
    const nn::util::Matrix4x3fType& viewToWorld) NN_NOEXCEPT
{
    float nearToFar = zFar * nn::util::Rcp(zNear);
    float topFar = top * nearToFar;
    float bottomFar = bottom * nearToFar;
    float leftFar = left * nearToFar;
    float rightFar = right * nearToFar;

    // 右手系
    nn::util::Vector3fType pt[8];
    VectorSet(&pt[0], left, top, -zNear);          // pt[0] は near の左上。
    VectorSet(&pt[1], right, top, -zNear);         // pt[1] は near の右上。
    VectorSet(&pt[2], right, bottom, -zNear);      // pt[2] は near の右下。
    VectorSet(&pt[3], left, bottom, -zNear);       // pt[3] は near の左下。
    VectorSet(&pt[4], leftFar, topFar, -zFar);     // pt[4] は far の左上。
    VectorSet(&pt[5], rightFar, topFar, -zFar);    // pt[5] は far の右上。
    VectorSet(&pt[6], rightFar, bottomFar, -zFar); // pt[6] は far の右下。
    VectorSet(&pt[7], leftFar, bottomFar, -zFar);  // pt[7] は far の左下。

    for (int index = 0; index < 8; ++index)
    {
        VectorTransform(&pt[index], pt[index], viewToWorld);
    }
    aabb.Set(pt, 8);

    nn::util::Vector3fType eye;
    MatrixGetAxisW(&eye, viewToWorld);

    planes[0].Set(eye, pt[3], pt[0]); // left
    planes[1].Set(eye, pt[1], pt[2]); // right
    planes[2].Set(pt[0], pt[1], pt[2]); // near
    planes[3].Set(pt[4], pt[7], pt[6]); // far
    planes[4].Set(eye, pt[0], pt[1]); // top
    planes[5].Set(eye, pt[2], pt[3]); // bottom
    planeCount = 6;
    flag = 0;
}

void ViewVolume::SetOrtho(
    float top, float bottom, float left, float right, float zNear, float zFar,
    const nn::util::Matrix4x3fType& viewToWorld) NN_NOEXCEPT
{
    // 右手系
    nn::util::Vector3fType pt[8];

    VectorSet(&pt[0], left, top, -zNear);      // pt[0] は near の左上。
    VectorSet(&pt[1], right, top, -zNear);     // pt[1] は near の右上。
    VectorSet(&pt[2], right, bottom, -zNear);  // pt[2] は near の右下。
    VectorSet(&pt[3], left, bottom, -zNear);   // pt[3] は near の左下。
    VectorSet(&pt[4], left, top, -zFar);       // pt[4] は far の左上。
    VectorSet(&pt[5], right, top, -zFar);      // pt[5] は far の右上。
    VectorSet(&pt[6], right, bottom, -zFar);   // pt[6] は far の右下。
    VectorSet(&pt[7], left, bottom, -zFar);    // pt[7] は far の左下。

    for (int index = 0; index < 8; ++index)
    {
        VectorTransform(&pt[index], pt[index], viewToWorld);
        //pt[index].Mul(viewToWorld, pt[index]);
    }
    aabb.Set(pt, 8);

    planes[0].Set(pt[0], pt[7], pt[4]); // left
    planes[1].Set(pt[1], pt[5], pt[6]); // right
    planes[2].Set(pt[0], pt[1], pt[2]); // near
    planes[3].Set(pt[4], pt[7], pt[6]); // far
    planes[4].Set(pt[0], pt[4], pt[5]); // top
    planes[5].Set(pt[2], pt[6], pt[7]); // bottom
    planeCount = 6;
    flag = 0;
}

bool ViewVolume::TestIntersection(const Sphere& sphere) const NN_NOEXCEPT
{
    for (int idxPlane = 0; idxPlane < this->planeCount; ++idxPlane)
    {
        const Plane& plane = this->planes[idxPlane];
        float dist = VectorDot(plane.normal, sphere.center) + plane.dist;
        if (dist > sphere.radius)
        {
            return false;
        }
    }
    return true;
}

int ViewVolume::TestIntersectionEx(const Sphere& sphere) const NN_NOEXCEPT
{
    int result = 1; // 内側
    for (int idxPlane = 0; idxPlane < this->planeCount; ++idxPlane)
    {
        const Plane& plane = this->planes[idxPlane];
        float dist = VectorDot(plane.normal, sphere.center) + plane.dist;
        if (dist > sphere.radius)
        {
            return -1; // 外側
        }
        if (dist >= -sphere.radius)
        {
            result = 0; // 交差
        }
    }
    return result;
}

bool ViewVolume::TestIntersection(const Aabb& targetAabb) const NN_NOEXCEPT
{
    if (this->flag)
    {
        if (VectorGetX(this->aabb.min) > VectorGetX(targetAabb.max) || VectorGetX(targetAabb.min) > VectorGetX(this->aabb.max) ||
            VectorGetY(this->aabb.min) > VectorGetY(targetAabb.max) || VectorGetY(targetAabb.min) > VectorGetY(this->aabb.max) ||
            VectorGetZ(this->aabb.min) > VectorGetZ(targetAabb.max) || VectorGetZ(targetAabb.min) > VectorGetZ(this->aabb.max))
        {
            return false;
        }
    }
    for (int idxPlane = 0; idxPlane < this->planeCount; ++idxPlane)
    {
        const Plane& plane = this->planes[idxPlane];
        nn::util::Vector3fType pos;
        VectorSet(&pos,
                  VectorGetX(plane.normal) >= 0.0f ? VectorGetX(targetAabb.min) : VectorGetX(targetAabb.max),
                  VectorGetY(plane.normal) >= 0.0f ? VectorGetY(targetAabb.min) : VectorGetY(targetAabb.max),
                  VectorGetZ(plane.normal) >= 0.0f ? VectorGetZ(targetAabb.min) : VectorGetZ(targetAabb.max));

        if (VectorDot(plane.normal, pos) + plane.dist > 0.0f)
        {
            return false;
        }
    }
    return true;
}

int ViewVolume::TestIntersectionEx(const Aabb& targetAabb) const NN_NOEXCEPT
{
    if (this->flag)
    {
        if (VectorGetX(this->aabb.min) > VectorGetX(targetAabb.max) || VectorGetX(targetAabb.min) > VectorGetX(this->aabb.max) ||
            VectorGetY(this->aabb.min) > VectorGetY(targetAabb.max) || VectorGetY(targetAabb.min) > VectorGetY(this->aabb.max) ||
            VectorGetZ(this->aabb.min) > VectorGetZ(targetAabb.max) || VectorGetZ(targetAabb.min) > VectorGetZ(this->aabb.max))
        {
            return -1; // 外側
        }
    }
    int result = 1; // 内側
    for (int idxPlane = 0; idxPlane < this->planeCount; ++idxPlane)
    {
        const Plane& plane = this->planes[idxPlane];
        nn::util::Vector3fType pos, neg;
        VectorSet(&pos,
                  VectorGetX(plane.normal) >= 0.0f ? VectorGetX(targetAabb.min) : VectorGetX(targetAabb.max),
                  VectorGetY(plane.normal) >= 0.0f ? VectorGetY(targetAabb.min) : VectorGetY(targetAabb.max),
                  VectorGetZ(plane.normal) >= 0.0f ? VectorGetZ(targetAabb.min) : VectorGetZ(targetAabb.max));
        VectorSet(&neg,
                  VectorGetX(plane.normal) >= 0.0f ? VectorGetX(targetAabb.max) : VectorGetX(targetAabb.min),
                  VectorGetY(plane.normal) >= 0.0f ? VectorGetY(targetAabb.max) : VectorGetY(targetAabb.min),
                  VectorGetZ(plane.normal) >= 0.0f ? VectorGetZ(targetAabb.max) : VectorGetZ(targetAabb.min));

        if (VectorDot(plane.normal, pos) + plane.dist > 0.0f)
        {
            return -1; // 外側
        }
        if (result && VectorDot(plane.normal, neg) + plane.dist >= 0.0f)
        {
            result = 0; // 交差
        }
    }
    return result;
}

int SubMeshRange::And(SubMeshRange* pDst, const SubMeshRange* pLHS, const SubMeshRange* pRHS) NN_NOEXCEPT
{
    NN_SDK_REQUIRES_NOT_NULL(pDst);
    NN_SDK_REQUIRES_NOT_NULL(pLHS);
    NN_SDK_REQUIRES_NOT_NULL(pRHS);

    const SubMeshRange* pLower = pLHS;
    const SubMeshRange* pUpper = pRHS;
    if (pLower->index > pUpper->index) // index が小さい方を pLower とします。
    {
        std::swap(pLower, pUpper);
    }

    int rangeCount = 0;
    while (pLower->count != 0 && pUpper->count != 0)
    {
        int end = pLower->index + pLower->count;
        if (end <= pUpper->index)
        {
            // pUpper の先頭が pLower の終端よりも後ろにあるので pLower を進めて再試行します。
            ++pLower;
            if (pLower->index > pUpper->index)
            {
                std::swap(pLower, pUpper);
            }
        }
        else if (end >= pUpper->index + pUpper->count)
        {
            // pUpper が pLower に包含されるので pUpper を進めて再試行します。
            *pDst = *pUpper;
            pDst->lodLevel = (std::max)(pLower->lodLevel, pUpper->lodLevel);
            ++pDst;
            ++pUpper;
            ++rangeCount;
        }
        else
        {
            // pLower と pUpper が交差しているので pLower を進めて再試行します。
            pDst->index = pUpper->index;
            pDst->count = static_cast<uint16_t>(end - pUpper->index);
            pDst->lodLevel = (std::max)(pLower->lodLevel, pUpper->lodLevel);
            ++pDst;
            ++rangeCount;
            ++pLower;
            std::swap(pLower, pUpper); // 交差しているので pLower を進めると必ず逆転します。
        }
    }

    pDst->index = pDst->count = pDst->lodLevel = 0;
    return rangeCount;
}

//--------------------------------------------------------------------------------------------------

class ShapeObj::Impl
{
public:
    static const int AttributeInfoCount = ResKeyShape::KeyAttr_PositionCount
        + ResKeyShape::KeyAttr_NormalCount
        + ResKeyShape::KeyAttr_TangentCount
        + ResKeyShape::KeyAttr_BinormalCount
        + ResKeyShape::KeyAttr_ColorCount;
    NN_STATIC_ASSERT(AttributeInfoCount + 2 == NN_ARRAY_SIZE(ResKeyShapeData::targetAttribIndices));

    static void ClearResultBuffer(void* pBuffer, ptrdiff_t offset, uint32_t vertexCount, ptrdiff_t stride, nn::gfx::AttributeFormat format) NN_NOEXCEPT;
    static void BlendShape(void* pDestBuffer, ptrdiff_t destOffset, ptrdiff_t destStride, void* pSrcBuffer, ptrdiff_t srcOffset, ptrdiff_t srcStride, nn::gfx::AttributeFormat format, float weight, uint32_t vertexCount) NN_NOEXCEPT;
private:

    typedef void (BlendShapeImplFunctionType)(
        void* pDestBuffer, ptrdiff_t destOffset, ptrdiff_t destStride,
        void* pSrcBuffer, ptrdiff_t srcOffset, ptrdiff_t srcStride,
        float weight, int elementCount, uint32_t vertexCount
        );

    template<typename QuantizedType>
    static void BlendShapeImpl(
        void* pDestBuffer, ptrdiff_t destOffset, ptrdiff_t destStride,
        void* pSrcBuffer, ptrdiff_t srcOffset, ptrdiff_t srcStride,
        float weight, int elementCount, uint32_t vertexCount
    ) NN_NOEXCEPT;

    template<int (ResKeyShape::* getAttributeIndexFunc)() const>
    static int GetAttributeIndex(const ResKeyShape* pResKeyShape) NN_NOEXCEPT
    {
        return (pResKeyShape->*getAttributeIndexFunc)();
    }

    template<int (ResKeyShape::* getAttributeIndexFunc)(int) const, int index>
    static int GetAttributeIndex(const ResKeyShape* pResKeyShape) NN_NOEXCEPT
    {
        return (pResKeyShape->*getAttributeIndexFunc)(index);
    }

    static size_t GetAttributeSize(nn::gfx::AttributeFormat format) NN_NOEXCEPT;

    static int GetElementCount(nn::gfx::AttributeFormat format) NN_NOEXCEPT;
};

size_t ShapeObj::Impl::GetAttributeSize(nn::gfx::AttributeFormat format) NN_NOEXCEPT
{
    nn::gfx::ChannelFormat channelFormat = static_cast<nn::gfx::ChannelFormat>(format >> nn::gfx::TypeFormat_Bits);
    if (channelFormat <= nn::gfx::ChannelFormat_R8)
    {
        return 1;
    }
    if (channelFormat <= nn::gfx::ChannelFormat_R16)
    {
        return 2;
    }
    if (channelFormat <= nn::gfx::ChannelFormat_R32)
    {
        return 4;
    }
    if (channelFormat <= nn::gfx::ChannelFormat_R32_G32)
    {
        return 8;
    }
    if (channelFormat <= nn::gfx::ChannelFormat_R32_G32_B32)
    {
        return 12;
    }
    return 16;
}

int ShapeObj::Impl::GetElementCount(nn::gfx::AttributeFormat format) NN_NOEXCEPT
{
    switch (format)
    {
    case nn::gfx::AttributeFormat_8_Uint:
    case nn::gfx::AttributeFormat_8_Unorm:
    case nn::gfx::AttributeFormat_8_Snorm:
    case nn::gfx::AttributeFormat_16_Unorm:
    case nn::gfx::AttributeFormat_16_Float:
    case nn::gfx::AttributeFormat_32_Float:
        return 1;

    case nn::gfx::AttributeFormat_8_8_Uint:
    case nn::gfx::AttributeFormat_8_8_Unorm:
    case nn::gfx::AttributeFormat_8_8_Snorm:
    case nn::gfx::AttributeFormat_16_16_Unorm:
    case nn::gfx::AttributeFormat_16_16_Snorm:
    case nn::gfx::AttributeFormat_16_16_Float:
    case nn::gfx::AttributeFormat_32_32_Float:
        return 2;

    case nn::gfx::AttributeFormat_32_32_32_Float:
        return 3;

    case nn::gfx::AttributeFormat_8_8_8_8_Uint:
    case nn::gfx::AttributeFormat_8_8_8_8_Unorm:
    case nn::gfx::AttributeFormat_8_8_8_8_Snorm:
    case nn::gfx::AttributeFormat_16_16_16_16_Unorm:
    case nn::gfx::AttributeFormat_16_16_16_16_Float:
    case nn::gfx::AttributeFormat_32_32_32_32_Float:
        return 4;

    default:
        // unsupported format.
        NN_UNEXPECTED_DEFAULT;
    }
}

void ShapeObj::Impl::ClearResultBuffer(void* pBuffer, ptrdiff_t offset, uint32_t vertexCount, ptrdiff_t stride, nn::gfx::AttributeFormat format) NN_NOEXCEPT
{
    size_t attributeSize = GetAttributeSize(format);

    nn::util::BytePtr ptr(pBuffer, offset);
    for (uint32_t vertexIndex = 0; vertexIndex < vertexCount; ++vertexIndex)
    {
        memset(ptr.Get(), 0, attributeSize);
        ptr.Advance(stride);
    }
}

template<typename QuantizedType>
void ShapeObj::Impl::BlendShapeImpl(
    void* pDestBuffer, ptrdiff_t destOffset, ptrdiff_t destStride,
    void* pSrcBuffer, ptrdiff_t srcOffset, ptrdiff_t srcStride,
    float weight, int elementCount, uint32_t vertexCount
) NN_NOEXCEPT
{
    nn::util::BytePtr destPtr(pDestBuffer, destOffset);
    nn::util::BytePtr srcPtr(pSrcBuffer, srcOffset);

    for (uint32_t vertexIndex = 0; vertexIndex < vertexCount; ++vertexIndex)
    {
        QuantizedType* pDest = destPtr.Get<QuantizedType>();
        const QuantizedType* pSrc = srcPtr.Get<QuantizedType>();

        for (int elementIndex = 0; elementIndex < elementCount; ++elementIndex)
        {
            *pDest += static_cast<QuantizedType>(static_cast<float>(*pSrc) * weight);
            ++pDest;
            ++pSrc;
        }

        destPtr.Advance(destStride);
        srcPtr.Advance(srcStride);
    }
}

template<>
void ShapeObj::Impl::BlendShapeImpl<nn::util::FloatFormat16>(
    void* pDestBuffer, ptrdiff_t destOffset, ptrdiff_t destStride,
    void* pSrcBuffer, ptrdiff_t srcOffset, ptrdiff_t srcStride,
    float weight, int elementCount, uint32_t vertexCount
) NN_NOEXCEPT
{
    nn::util::BytePtr destPtr(pDestBuffer, destOffset);
    nn::util::BytePtr srcPtr(pSrcBuffer, srcOffset);

    for (uint32_t vertexIndex = 0; vertexIndex < vertexCount; ++vertexIndex)
    {
        int16_t* pDest = destPtr.Get<int16_t>();
        const int16_t* pSrc = srcPtr.Get<int16_t>();

        for (int elementIndex = 0; elementIndex < elementCount; ++elementIndex)
        {
            float value = nn::util::FloatFormat16::Decode(static_cast<int>(*pSrc)) * weight;
            value += nn::util::FloatFormat16::Decode(static_cast<int>(*pDest));
            *pDest = static_cast<int16_t>(nn::util::FloatFormat16::Encode(value));
            ++pDest;
            ++pSrc;
        }

        destPtr.Advance(destStride);
        srcPtr.Advance(srcStride);
    }
}

template<>
void ShapeObj::Impl::BlendShapeImpl<nn::util::Float3>(
    void* pDestBuffer, ptrdiff_t destOffset, ptrdiff_t destStride,
    void* pSrcBuffer, ptrdiff_t srcOffset, ptrdiff_t srcStride,
    float weight, int elementCount, uint32_t vertexCount
) NN_NOEXCEPT
{
    NN_UNUSED(elementCount);

    nn::util::BytePtr destPtr(pDestBuffer, destOffset);
    nn::util::BytePtr srcPtr(pSrcBuffer, srcOffset);

    for (uint32_t vertexIndex = 0; vertexIndex < vertexCount; ++vertexIndex)
    {
        nn::util::Float3* pDest = destPtr.Get<nn::util::Float3>();
        nn::util::Float3 temp = *pDest;
        nn::util::Float3 src = *srcPtr.Get<nn::util::Float3>();

        temp.x += src.x * weight;
        temp.y += src.y * weight;
        temp.z += src.z * weight;
        *pDest = temp;

        destPtr.Advance(destStride);
        srcPtr.Advance(srcStride);
    }
}

template<>
void ShapeObj::Impl::BlendShapeImpl<nn::util::Float4>(
    void* pDestBuffer, ptrdiff_t destOffset, ptrdiff_t destStride,
    void* pSrcBuffer, ptrdiff_t srcOffset, ptrdiff_t srcStride,
    float weight, int elementCount, uint32_t vertexCount
) NN_NOEXCEPT
{
    NN_UNUSED(elementCount);

    nn::util::BytePtr destPtr(pDestBuffer, destOffset);
    nn::util::BytePtr srcPtr(pSrcBuffer, srcOffset);

    for (uint32_t vertexIndex = 0; vertexIndex < vertexCount; ++vertexIndex)
    {
        nn::util::Float4* pDest = destPtr.Get<nn::util::Float4>();
        nn::util::Float4 temp = *pDest;
        nn::util::Float4 src = *srcPtr.Get<nn::util::Float4>();

        temp.x += src.x * weight;
        temp.y += src.y * weight;
        temp.z += src.z * weight;
        temp.w += src.w * weight;
        *pDest = temp;

        destPtr.Advance(destStride);
        srcPtr.Advance(srcStride);
    }
}

void ShapeObj::Impl::BlendShape(void* pDestBuffer, ptrdiff_t destOffset, ptrdiff_t destStride, void* pSrcBuffer, ptrdiff_t srcOffset, ptrdiff_t srcStride, nn::gfx::AttributeFormat format, float weight, uint32_t vertexCount) NN_NOEXCEPT
{
    nn::util::BytePtr destPtr(pDestBuffer, destOffset);
    nn::util::BytePtr srcPtr(pSrcBuffer, srcOffset);

    int elementCount = GetElementCount(format);

    BlendShapeImplFunctionType* pBlendShapeFunction = NULL;
    switch (format)
    {
    case nn::gfx::AttributeFormat_8_Uint:
    case nn::gfx::AttributeFormat_8_8_Uint:
    case nn::gfx::AttributeFormat_8_8_8_8_Uint:

    case nn::gfx::AttributeFormat_8_Unorm:
    case nn::gfx::AttributeFormat_8_8_Unorm:
    case nn::gfx::AttributeFormat_8_8_8_8_Unorm:
        pBlendShapeFunction = &BlendShapeImpl<uint8_t>;
        break;

    case nn::gfx::AttributeFormat_8_Snorm:
    case nn::gfx::AttributeFormat_8_8_Snorm:
    case nn::gfx::AttributeFormat_8_8_8_8_Snorm:
        pBlendShapeFunction = &BlendShapeImpl<int8_t>;
        break;

    case nn::gfx::AttributeFormat_16_Unorm:
    case nn::gfx::AttributeFormat_16_16_Unorm:
    case nn::gfx::AttributeFormat_16_16_16_16_Unorm:
        pBlendShapeFunction = &BlendShapeImpl<uint16_t>;
        break;

    case nn::gfx::AttributeFormat_16_16_Snorm:
        pBlendShapeFunction = &BlendShapeImpl<int16_t>;
        break;

    case nn::gfx::AttributeFormat_16_Float:
    case nn::gfx::AttributeFormat_16_16_Float:
    case nn::gfx::AttributeFormat_16_16_16_16_Float:
        pBlendShapeFunction = &BlendShapeImpl<nn::util::FloatFormat16>;
        break;

    case nn::gfx::AttributeFormat_32_Float:
    case nn::gfx::AttributeFormat_32_32_Float:
        pBlendShapeFunction = &BlendShapeImpl<float>;
        break;

    case nn::gfx::AttributeFormat_32_32_32_Float:
        pBlendShapeFunction = &BlendShapeImpl<nn::util::Float3>;
        break;

    case nn::gfx::AttributeFormat_32_32_32_32_Float:
        pBlendShapeFunction = &BlendShapeImpl<nn::util::Float4>;
        break;

    default:
        // unsupported format.
        NN_UNEXPECTED_DEFAULT;
    }
    NN_SDK_ASSERT_NOT_NULL(pBlendShapeFunction);

    pBlendShapeFunction(
        pDestBuffer, destOffset, destStride,
        pSrcBuffer, srcOffset, srcStride,
        weight, elementCount, vertexCount
    );
}

void ShapeObj::InitializeArgument::CalculateMemorySize() NN_NOEXCEPT
{
    NN_G3D_REQUIRES(GetBufferingCount() > 0, NN_G3D_RES_GET_NAME(GetResource(), GetName()));
    const ResShape* pRes = GetResource();
    bool boundingEnabled = IsBoundingEnabled();
    int totalSubMeshCount = 0;
    for (int index = 0; index < pRes->GetMeshCount(); ++index)
    {
        totalSubMeshCount += pRes->GetSubMeshCount(index);
    }

    // シェイプの AABB は計算しません。
    size_t subboundingSize;
    // LODごとにバウンディング情報を持っていると想定して領域は確保してしまう
    subboundingSize = boundingEnabled ? nn::util::align_up(sizeof(Aabb) * totalSubMeshCount, 4) : 0;

    int viewCount = GetViewCount();
    int shapeBlockCount = IsViewDependent() ? viewCount : 1;
    int keyShapeCount = pRes->GetKeyShapeCount();
    int bufferCount = GetBufferingCount();
    size_t userAreaAlignedSize = nn::util::align_up(GetUserAreaSize(), Alignment_Default);
    NN_G3D_ASSERT(userAreaAlignedSize <= ShapeBlock::Size_UserArea, NN_G3D_RES_GET_NAME(GetResource(), GetName()));

    for (int blockIndex = 0; blockIndex < MemoryBlockIndex_End; ++blockIndex)
    {
        m_MemoryBlock[blockIndex].Initialize();
    }

    m_MemoryBlock[MemoryBlockIndex_SubBoundingArray].SetAlignment(MatrixVectorAlignment);
    m_MemoryBlock[MemoryBlockIndex_SubBoundingArray].SetSize(subboundingSize);
    m_MemoryBlock[MemoryBlockIndex_ShapeBlockArray].SetSizeBy<nn::gfx::Buffer>(1,  shapeBlockCount * bufferCount);
    m_MemoryBlock[MemoryBlockIndex_BlendWeightArray].SetAlignment(4);
    m_MemoryBlock[MemoryBlockIndex_BlendWeightArray].SetSizeBy<float>(1, keyShapeCount);
    m_MemoryBlock[MemoryBlockIndex_BlendWeightFlags].SetAlignment(4);
    m_MemoryBlock[MemoryBlockIndex_BlendWeightFlags].SetSize(nn::util::align_up(keyShapeCount, 32) >> 3);
    m_MemoryBlock[MemoryBlockIndex_BoundingArray].SetAlignment(MatrixVectorAlignment);
    if (boundingEnabled == true)
    {
        m_MemoryBlock[MemoryBlockIndex_BoundingArray].SetSizeBy<Sphere>(1, BoundingCoord_Count * pRes->GetMeshCount());
    }
    else
    {
        m_MemoryBlock[MemoryBlockIndex_BoundingArray].SetSize(0);
    }

    // 動的頂点バッファーのサイズ計算
    CalculateDynamicVertexBufferSize();

    m_MemoryBlock[MemoryBlockIndex_UserArea].SetSize( userAreaAlignedSize );

    m_WorkMemory.Initialize();
    for (int blockIndex = 0; blockIndex < MemoryBlockIndex_End; ++blockIndex)
    {
        m_WorkMemory.Append(&m_MemoryBlock[blockIndex]);
    }
}

void ShapeObj::InitializeArgument::CalculateDynamicVertexBufferSize() NN_NOEXCEPT
{
    const ResShape* pResShape = GetResource();
    const ResVertex* pResVertex = pResShape->GetVertex();

    // 生成する動的頂点バッファーのインデックスを収集
    ResVertex::DynamicVertexBufferMask dynamicVertexBufferMask;
    pResVertex->CalculateDynamicVertexBufferIndex(&dynamicVertexBufferMask);

    // 生成不要なのでスキップ
    if (dynamicVertexBufferMask.IsAllOff())
    {
        m_MemoryBlock[MemoryBlockIndex_DynamicVertexBufferPtr].SetSize(0);
        m_MemoryBlock[MemoryBlockIndex_DynamicVertexBufferArray].SetSize(0);
        return;
    }

    const int vertexBufferCount = pResVertex->GetVertexBufferCount();
    const int generateDynamicVertexBufferCount = dynamicVertexBufferMask.CountPopulation();
    NN_SDK_ASSERT_LESS_EQUAL(generateDynamicVertexBufferCount, vertexBufferCount);

    // DynamicVertexBuffer 用メモリブロックのサイズを計算
    m_MemoryBlock[MemoryBlockIndex_DynamicVertexBufferPtr].SetSizeBy<nn::gfx::Buffer*>(1, vertexBufferCount);
    m_MemoryBlock[MemoryBlockIndex_DynamicVertexBufferPtr].SetAlignment(NN_ALIGNOF(nn::gfx::Buffer*));

    const size_t VertexBufferAlignment = 8;
    m_MemoryBlock[MemoryBlockIndex_DynamicVertexBufferArray].SetSizeBy<nn::gfx::Buffer>(VertexBufferAlignment, generateDynamicVertexBufferCount * GetBufferingCount());
    m_MemoryBlock[MemoryBlockIndex_DynamicVertexBufferArray].SetAlignment(NN_ALIGNOF(nn::gfx::Buffer));
}

bool ShapeObj::Initialize(const InitializeArgument& arg, void* pBuffer, size_t bufferSize) NN_NOEXCEPT
{
    NN_G3D_REQUIRES(pBuffer != NULL || bufferSize == 0,   NN_G3D_RES_GET_NAME(arg.GetResource(), GetName()));
    NN_G3D_REQUIRES(IsAligned(pBuffer, Alignment_Buffer), NN_G3D_RES_GET_NAME(arg.GetResource(), GetName()));

    if (arg.IsMemoryCalculated() == false)
    {
        return false;
    }

    if (arg.GetWorkMemorySize() > bufferSize)
    {
        // バッファーが必要なサイズに満たない場合は失敗。
        return false;
    }

    const ResShape* pRes = arg.GetResource();
    int viewCount  = arg.GetViewCount();

    // メンバの初期化。
    m_pRes                   = pRes;
    m_Flag                   = 0;
    m_ViewCount              = static_cast<uint8_t>(viewCount);
    m_ViewDependent          = arg.IsViewDependent() ? 1 : 0;
    m_ShapeBlockCount        = arg.IsViewDependent() ? m_ViewCount : 1;
    m_BufferingCount         = static_cast<uint8_t>(arg.GetBufferingCount());
    m_pShapeBlockArray       = arg.GetBuffer<nn::gfx::Buffer>(pBuffer, InitializeArgument::MemoryBlockIndex_ShapeBlockArray);
    m_pBlendWeightArray      = arg.GetBuffer<float>(pBuffer, InitializeArgument::MemoryBlockIndex_BlendWeightArray);
    m_pBlendWeightValidFlags = arg.GetBuffer<Bit32>(pBuffer, InitializeArgument::MemoryBlockIndex_BlendWeightFlags);
    m_pBounding              = arg.GetBuffer<Sphere>(pBuffer, InitializeArgument::MemoryBlockIndex_BoundingArray);
    m_pSubMeshBounding       = arg.GetBuffer<Aabb>(pBuffer, InitializeArgument::MemoryBlockIndex_SubBoundingArray);

    SetupDynamicVertexBuffers(pBuffer, arg);

    m_pUserArea              = arg.GetBuffer(pBuffer, InitializeArgument::MemoryBlockIndex_UserArea);
    m_UserAreaSize           = nn::util::align_up(arg.GetUserAreaSize(), Alignment_Default);
    m_pUserPtr               = NULL;
    m_pBufferPtr             = pBuffer;
    m_pMemoryPool            = NULL;
    m_MemoryPoolOffset       = 0;

    if (m_pBounding)
    {
        for (int meshIndex = 0; meshIndex < m_pRes->GetMeshCount(); ++meshIndex)
        {
            Sphere& sphere = m_pBounding[meshIndex * BoundingCoord_Count + BoundingCoord_Local];
            const Bounding& bounding = m_pRes->GetBounding(meshIndex);
            VectorLoad(&sphere.center, bounding.center);
            sphere.radius = m_pRes->GetRadius(meshIndex);
            m_pBounding[meshIndex * BoundingCoord_Count + BoundingCoord_Word] = sphere; // 安全のためローカルと同じもので初期化しておく。
        }
    }

    ClearBlendWeights();

    return true;
}

size_t ShapeObj::GetBlockBufferAlignment(nn::gfx::Device* pDevice) const NN_NOEXCEPT
{
    nn::gfx::Buffer::InfoType info;
    info.SetDefault();
    info.SetSize(sizeof(ShapeBlock));
    info.SetGpuAccessFlags( nn::gfx::GpuAccess_ConstantBuffer );

    return nn::gfx::Buffer::GetBufferAlignment(pDevice, info);
}

size_t ShapeObj::CalculateBlockBufferSize(nn::gfx::Device* pDevice) const NN_NOEXCEPT
{
    size_t totalSize = 0;

    // 常にユニフォームブロックを作成する。
    totalSize += CalculateShapeBlockBufferSize(pDevice);

    // シェイプアニメーションの計算結果を格納するバッファーのサイズを計算。
    totalSize += CalculateDynamicVertexBufferSize(pDevice);

    return totalSize;
}

size_t ShapeObj::CalculateShapeBlockBufferSize(nn::gfx::Device* pDevice) const NN_NOEXCEPT
{
    return nn::util::align_up(sizeof(ShapeBlock), GetBlockBufferAlignment(pDevice)) * GetShapeBlockCount() * m_BufferingCount;
}

void ShapeObj::SetupBlockBufferImpl(nn::gfx::Device* pDevice, nn::gfx::MemoryPool* pMemoryPool, ptrdiff_t offset, size_t memoryPoolSize) NN_NOEXCEPT
{
    NN_UNUSED(memoryPoolSize);

    nn::util::Matrix4x3fType worldMtx;
    MatrixIdentity(&worldMtx);
    int vtxSkinCount = GetVertexSkinCount();
    ptrdiff_t memoryPoolOffset = offset;
    for (int idxShpBlock = 0, shapeBlockCount = GetShapeBlockCount(); idxShpBlock < shapeBlockCount; ++idxShpBlock)
    {
        for (int idxBuffer = 0; idxBuffer < m_BufferingCount; ++idxBuffer)
        {
            int index = idxShpBlock * m_BufferingCount + idxBuffer;
            // 定数バッファー作成m_BufferingCount
            nn::gfx::Buffer::InfoType bufferInfo;
            bufferInfo.SetDefault();
            bufferInfo.SetSize(sizeof(ShapeBlock));
            bufferInfo.SetGpuAccessFlags(nn::gfx::GpuAccess_ConstantBuffer);

            // RigidBody 以外は更新されないので初期化時に値を設定しておく。
            nn::gfx::Buffer* pShapeBlock = new(&m_pShapeBlockArray[index]) nn::gfx::Buffer;
            // 複数バッファーリングするときでもバッファー自体は１つ。
            pShapeBlock->Initialize(pDevice, bufferInfo, pMemoryPool, memoryPoolOffset, sizeof(ShapeBlock));
            nn::gfx::util::SetBufferDebugLabel(pShapeBlock, "g3d_ShapeUniformBlock");
            memoryPoolOffset += nn::util::align_up(sizeof(ShapeBlock), nn::gfx::Buffer::GetBufferAlignment(pDevice, bufferInfo));

            // 初期化。領域をマップ。
            ShapeBlock* pShapeBuffer = pShapeBlock->Map< ShapeBlock >();

            if (IsBlockSwapEnabled())
            {
                MatrixStore(&pShapeBuffer->worldMtx, worldMtx);
                MatrixSwapEndian(&pShapeBuffer->worldMtx, pShapeBuffer->worldMtx);
                Copy32<true>(&pShapeBuffer->vtxSkinCount, &vtxSkinCount, sizeof(int32_t) >> 2);
            }
            else
            {
                MatrixStore(&pShapeBuffer->worldMtx, worldMtx);
                Copy32<false>(&pShapeBuffer->vtxSkinCount, &vtxSkinCount, sizeof(int32_t) >> 2);
            }
            pShapeBlock->FlushMappedRange(0, sizeof(ShapeBlock));
            pShapeBlock->Unmap();
        }
    }
    m_Flag |= Flag_BlockBufferValid;
}

void ShapeObj::SetupDynamicVertexBuffer(nn::gfx::Device* pDevice, nn::gfx::MemoryPool* pMemoryPool, ptrdiff_t offset, size_t memoryPoolSize) NN_NOEXCEPT
{
    NN_UNUSED(memoryPoolSize);
    if (!m_pDynamicVertexBufferArrayPtr)
    {
        return;
    }

    NN_G3D_REQUIRES(!IsDynamicVertexBufferValid(), GetName());

    const ResVertex* pResVertex = m_pRes->GetVertex();
    nn::gfx::Buffer::InfoType resultBufferInfo;
    for (int vertexBufferIndex = 0, vertexBufferCount = pResVertex->GetVertexBufferCount(); vertexBufferIndex < vertexBufferCount; ++vertexBufferIndex)
    {
        if (!m_pDynamicVertexBufferArrayPtr[vertexBufferIndex])
        {
            continue;
        }

        const nn::gfx::Buffer::InfoType* pOriginalBufferInfo = pResVertex->GetVertexBufferInfo(vertexBufferIndex);
        memcpy(&resultBufferInfo, pOriginalBufferInfo, sizeof(nn::gfx::Buffer::InfoType));

        // UnorderedAccessBuffer としてアクセス出来るフラグを設定し、GPU 上で直接頂点バッファーを書き換えられるように設定
        resultBufferInfo.SetGpuAccessFlags(resultBufferInfo.GetGpuAccessFlags() | nn::gfx::GpuAccess_UnorderedAccessBuffer);

        size_t bufferSize = resultBufferInfo.GetSize();
        ptrdiff_t align = nn::gfx::Buffer::GetBufferAlignment(pDevice, resultBufferInfo);
        for (int bufferIndex = 0; bufferIndex < m_BufferingCount; ++bufferIndex)
        {
            offset = nn::util::align_up(offset, align);
            m_pDynamicVertexBufferArrayPtr[vertexBufferIndex][bufferIndex].Initialize(pDevice, resultBufferInfo, pMemoryPool, offset, bufferSize);
            offset += bufferSize;
        }
    }
    m_Flag |= Flag_DynamicVertexBufferValid;

    // 初期値を設定
    for (int bufferIndex = 0; bufferIndex < m_BufferingCount; ++bufferIndex)
    {
        ClearDynamicVertexBuffer(bufferIndex);
    }
}

void ShapeObj::CleanupDynamicVertexBuffer(nn::gfx::Device* pDevice) NN_NOEXCEPT
{
    if (!IsDynamicVertexBufferValid())
    {
        return;
    }

    const ResVertex* pResVertex = m_pRes->GetVertex();
    for (int vertexBufferIndex = 0, vertexBufferCount = pResVertex->GetVertexBufferCount(); vertexBufferIndex < vertexBufferCount; ++vertexBufferIndex)
    {
        if (!m_pDynamicVertexBufferArrayPtr[vertexBufferIndex])
        {
            continue;
        }

        for (int bufferIndex = 0; bufferIndex < m_BufferingCount; ++bufferIndex)
        {
            nn::gfx::Buffer* pDynamicVertexBuffer = &m_pDynamicVertexBufferArrayPtr[vertexBufferIndex][bufferIndex];
            pDynamicVertexBuffer->Finalize(pDevice);
            pDynamicVertexBuffer->nn::gfx::Buffer::~TBuffer();
        }
    }
    m_Flag &= ~Flag_DynamicVertexBufferValid;
}

void ShapeObj::SetupDynamicVertexBuffers(void* pBuffer, const InitializeArgument& arg) NN_NOEXCEPT
{
    NN_SDK_ASSERT(GetBufferingCount() > 0);
    NN_SDK_ASSERT_NOT_NULL(pBuffer);

    m_pDynamicVertexBufferArrayPtr = arg.GetBuffer<nn::gfx::Buffer*>(pBuffer, InitializeArgument::MemoryBlockIndex_DynamicVertexBufferPtr);
    if (!m_pDynamicVertexBufferArrayPtr)
    {
        return;
    }
    NN_SDK_ASSERT(nn::util::is_aligned(reinterpret_cast<uintptr_t>(m_pDynamicVertexBufferArrayPtr), NN_ALIGNOF(nn::gfx::Buffer*)));

    // 生成する動的頂点バッファーのインデックスを収集
    const ResVertex* pResVertex = m_pRes->GetVertex();
    ResVertex::DynamicVertexBufferMask dynamicVertexBufferMask;
    pResVertex->CalculateDynamicVertexBufferIndex(&dynamicVertexBufferMask);

    const int vertexBufferCount = pResVertex->GetVertexBufferCount();
    nn::gfx::Buffer* pDynamicVertexBufferArray = arg.GetBuffer<nn::gfx::Buffer>(pBuffer, InitializeArgument::MemoryBlockIndex_DynamicVertexBufferArray);
    for (int vertexBufferIndex = 0; vertexBufferIndex < vertexBufferCount; ++vertexBufferIndex)
    {
        if (!dynamicVertexBufferMask.Test(vertexBufferIndex))
        {
            m_pDynamicVertexBufferArrayPtr[vertexBufferIndex] = NULL;
            continue;
        }

        m_pDynamicVertexBufferArrayPtr[vertexBufferIndex] = pDynamicVertexBufferArray;
        for (int bufferingIndex = 0, bufferingCount = GetBufferingCount(); bufferingIndex < bufferingCount; ++bufferingIndex)
        {
            new (&m_pDynamicVertexBufferArrayPtr[vertexBufferIndex][bufferingIndex]) nn::gfx::Buffer();
            ++pDynamicVertexBufferArray;
        }
    }
}

size_t ShapeObj::CalculateDynamicVertexBufferSize(nn::gfx::Device* pDevice) const NN_NOEXCEPT
{
    if (!m_pDynamicVertexBufferArrayPtr)
    {
        return 0ULL;
    }

    NN_SDK_ASSERT_NOT_NULL(pDevice);
    NN_SDK_ASSERT(nn::gfx::IsInitialized(*pDevice));

    size_t size = 0ULL;
    const ResVertex* pResVertex = m_pRes->GetVertex();
    nn::gfx::Buffer::InfoType bufferInfo;

    for (int vertexBufferIndex = 0, vertexBufferCount = pResVertex->GetVertexBufferCount(); vertexBufferIndex < vertexBufferCount; ++vertexBufferIndex)
    {
        if (!m_pDynamicVertexBufferArrayPtr[vertexBufferIndex])
        {
            continue;
        }

        const nn::gfx::Buffer::InfoType* pOriginalVertexBufferInfo = pResVertex->GetVertexBufferInfo(vertexBufferIndex);
        memcpy(&bufferInfo, pOriginalVertexBufferInfo, sizeof(nn::gfx::Buffer::InfoType));

        // UnorderedAccessBuffer としてアクセス出来るフラグを設定し、GPU 上で直接頂点バッファーを書き換えられるように設定
        bufferInfo.SetGpuAccessFlags(bufferInfo.GetGpuAccessFlags() | nn::gfx::GpuAccess_UnorderedAccessBuffer);
        size += nn::util::align_up(bufferInfo.GetSize(), nn::gfx::Buffer::GetBufferAlignment(pDevice, bufferInfo)) * m_BufferingCount;
    }
    return size;
}

void ShapeObj::CalculateShapeAnimResult(int bufferIndex) NN_NOEXCEPT
{
    NN_G3D_ASSERT(GetKeyShapeCount() > 0, GetName());

    // 出力先の情報を保持するベース形状のキーシェイプを取得
    const ResKeyShape* pBaseResKeyShape = GetResKeyShape(0);
    const ResVertex* pResVertex = GetResVertex();
    for (int attributeInfoIndex = 0; attributeInfoIndex < Impl::AttributeInfoCount; ++attributeInfoIndex)
    {
        // 出力先の情報を取得
        int destAttributeIndex = pBaseResKeyShape->ToData().targetAttribIndices[attributeInfoIndex] - 1;
        if (destAttributeIndex == ResKeyShape::InvalidIndex)
        {
            continue;
        }

        const nn::g3d::ResVertexAttr* pDestResVertexAttribute = GetResVertexAttr(destAttributeIndex);
        int destVerexBufferIndex = pDestResVertexAttribute->GetBufferIndex();

        // 出力先の動的頂点バッファーが生成されていない場合はスキップ
        if (!m_pDynamicVertexBufferArrayPtr[destVerexBufferIndex])
        {
            continue;
        }

        nn::gfx::Buffer& resultBuffer = m_pDynamicVertexBufferArrayPtr[destVerexBufferIndex][bufferIndex];
        void* pResultBuffer = resultBuffer.Map();

        nn::gfx::AttributeFormat destFormat = pDestResVertexAttribute->GetFormat();
        ptrdiff_t destOffset = pDestResVertexAttribute->GetOffset();
        ptrdiff_t destStride = pResVertex->GetVertexBufferStride(destVerexBufferIndex);

        // 値をクリア
        Impl::ClearResultBuffer(pResultBuffer, destOffset, pResVertex->GetCount(), destStride, destFormat);

        for (int keyShapeIndex = 0, keyShapeCount = GetKeyShapeCount(); keyShapeIndex < keyShapeCount; ++keyShapeIndex)
        {
            float weight = GetBlendWeight(keyShapeIndex);
            if (weight < 0.001f)
            {
                continue;
            }

            const nn::g3d::ResKeyShape* pResKeyShape = GetResKeyShape(keyShapeIndex);
            int srcAttributeIndex = pResKeyShape->ToData().targetAttribIndices[attributeInfoIndex] - 1;
            if (srcAttributeIndex == nn::g3d::ResKeyShape::InvalidIndex)
            {
                continue;
            }

            const nn::g3d::ResVertexAttr* pResVertexAttribute = GetResVertexAttr(srcAttributeIndex);
            int srcBufferIndex = pResVertexAttribute->GetBufferIndex();
            const nn::gfx::Buffer* pSrcBuffer = pResVertex->GetVertexBuffer(srcBufferIndex);

            NN_SDK_ASSERT_EQUAL(pResVertexAttribute->GetFormat(), destFormat);

            // 値を書きこみ
            Impl::BlendShape(
                pResultBuffer, destOffset, destStride,
                pSrcBuffer->Map(), pResVertexAttribute->GetOffset(), pResVertex->GetVertexBufferStride(srcBufferIndex),
                destFormat, weight, pResVertex->GetCount()
            );
            pSrcBuffer->Unmap();
        }

        const nn::gfx::Buffer::InfoType* pDestBufferInfo = pResVertex->GetVertexBufferInfo(destVerexBufferIndex);
        resultBuffer.FlushMappedRange(0, pDestBufferInfo->GetSize());
        resultBuffer.Unmap();
    }
}

bool ShapeObj::SetupBlockBuffer(nn::gfx::Device* pDevice, nn::gfx::MemoryPool* pMemoryPool, ptrdiff_t offset, size_t memoryPoolSize) NN_NOEXCEPT
{
    NN_G3D_REQUIRES(pMemoryPool != NULL || memoryPoolSize == 0, NN_G3D_RES_GET_NAME(m_pRes, GetName()));
    NN_G3D_REQUIRES(IsBlockBufferValid() == false,              NN_G3D_RES_GET_NAME(m_pRes, GetName()));

    size_t size = CalculateBlockBufferSize(pDevice);

    if (size > memoryPoolSize)
    {
        // バッファーが必要なサイズに満たない場合は失敗。
        return false;
    }

    // sizeが0の場合、nn::gfx::bufferを作成する必要がないので、リターンする
    if (size == 0)
    {
        return true;
    }

    m_pMemoryPool = pMemoryPool;
    m_MemoryPoolOffset = offset;

    SetupBlockBufferImpl(pDevice, pMemoryPool, offset, memoryPoolSize);
    ptrdiff_t shapeBlockOffsets = CalculateShapeBlockBufferSize(pDevice);
    SetupDynamicVertexBuffer(pDevice, pMemoryPool, offset + shapeBlockOffsets, memoryPoolSize);

    return true;
}

void ShapeObj::CleanupBlockBuffer(nn::gfx::Device* pDevice) NN_NOEXCEPT
{
    NN_SDK_REQUIRES_NOT_NULL(pDevice);
    NN_SDK_REQUIRES(nn::gfx::IsInitialized(*pDevice));
    NN_G3D_REQUIRES(IsBlockBufferValid() == true, NN_G3D_RES_GET_NAME(m_pRes, GetName()));

    for (int idxShpBlock = 0, shapeBlockCount = GetShapeBlockCount(); idxShpBlock < shapeBlockCount; ++idxShpBlock)
    {
        for (int idxBuffer = 0; idxBuffer < m_BufferingCount; ++idxBuffer)
        {
            int index = idxShpBlock * m_BufferingCount + idxBuffer;
            nn::gfx::Buffer& shapeBlock = m_pShapeBlockArray[index];
            shapeBlock.Finalize(pDevice);
            shapeBlock.nn::gfx::Buffer::~TBuffer();
        }
    }

    m_Flag &= ~Flag_BlockBufferValid;

    CleanupDynamicVertexBuffer(pDevice);

    m_pMemoryPool = NULL;
    m_MemoryPoolOffset = 0;
}

void ShapeObj::CalculateBounding(const SkeletonObj* pSkeleton, int meshIndex) NN_NOEXCEPT
{
    NN_G3D_REQUIRES_RANGE(meshIndex, 0, GetMeshCount(), NN_G3D_RES_GET_NAME(m_pRes, GetName()));
    NN_G3D_REQUIRES(pSkeleton != NULL, NN_G3D_RES_GET_NAME(m_pRes, GetName()));
    if (m_pBounding == NULL)
    {
        return;
    }
    const nn::util::Matrix4x3fType* pWorldMtxArray = pSkeleton->GetWorldMtxArray();
    Sphere& worldBounding = m_pBounding[meshIndex * BoundingCoord_Count + BoundingCoord_Word];
    Sphere& localBounding = m_pBounding[meshIndex * BoundingCoord_Count + BoundingCoord_Local];
    if (IsRigidBody())
    {
        const nn::util::Matrix4x3fType& worldMtx = pWorldMtxArray[GetBoneIndex()];
        worldBounding.Transform(localBounding, worldMtx);
    }
    else
    {
        const uint16_t* pSkinBoneIndexArray = m_pRes->GetSkinBoneIndexArray();
        {
            int idxBone = pSkinBoneIndexArray[0];
            const nn::util::Matrix4x3fType& worldMtx = pWorldMtxArray[idxBone];
            worldBounding.Transform(localBounding, worldMtx);
        }
        for (int idxIndex = 1, indexCount = m_pRes->GetSkinBoneIndexCount();
             idxIndex < indexCount; ++idxIndex)
        {
            Sphere sphere;
            int idxBone = pSkinBoneIndexArray[idxIndex];
            const nn::util::Matrix4x3fType& worldMtx = pWorldMtxArray[idxBone];
            sphere.Transform(localBounding, worldMtx);
            worldBounding.Merge(worldBounding, sphere);
        }
    }
}

void ShapeObj::CalculateSubMeshBounding(const SkeletonObj* pSkeleton, int meshIndex) NN_NOEXCEPT
{
    NN_G3D_REQUIRES_RANGE(meshIndex, 0, GetMeshCount(), NN_G3D_RES_GET_NAME(m_pRes, GetName()));
    NN_G3D_REQUIRES(pSkeleton != NULL, NN_G3D_RES_GET_NAME(m_pRes, GetName()));
    if (m_pSubMeshBounding == NULL || !IsRigidBody())
    {
        return;
    }
    int startBoundingIndex = 0;
    for (int index = 0; index < meshIndex; ++index)
    {
        startBoundingIndex += m_pRes->GetSubMeshCount(index);
    }

    const nn::util::Matrix4x3fType* pWorldMtxArray = pSkeleton->GetWorldMtxArray();
    const nn::util::Matrix4x3fType& worldMtx = pWorldMtxArray[GetBoneIndex()];
    const Bounding* pResSubMeshBoundingArray = m_pRes->ToData().pSubMeshBoundingArray.Get();
    Aabb* pSubMeshBounding = &m_pSubMeshBounding[startBoundingIndex];
    pResSubMeshBoundingArray = &pResSubMeshBoundingArray[startBoundingIndex + meshIndex];
#if defined(NN_BUILD_CONFIG_OS_COS)
    size_t startAddress = nn::util::align_up(reinterpret_cast<size_t>(pSubMeshBounding), CACHE_BLOCK_SIZE);
    size_t endAddress = nn::util::align_down(reinterpret_cast<size_t>(pSubMeshBounding + m_pRes->GetSubMeshCount(meshIndex)), CACHE_BLOCK_SIZE);
    size_t bufferSize = endAddress - startAddress;
    DCZeroRange(reinterpret_cast<void*>(startAddress), bufferSize);
#endif
    for (int idxSubMesh = 0, subMeshCount = m_pRes->GetSubMeshCount(meshIndex);
        idxSubMesh < subMeshCount; ++idxSubMesh)
    {
        const Bounding& aabb = pResSubMeshBoundingArray[idxSubMesh];
        pSubMeshBounding[idxSubMesh].Transform(aabb, worldMtx);
    }
}

void ShapeObj::CalculateShape(int viewIndex, const nn::util::Matrix4x3fType& worldMtx, int bufferIndex) NN_NOEXCEPT
{
    NN_G3D_REQUIRES_RANGE(viewIndex, 0, GetViewCount(),        NN_G3D_RES_GET_NAME(m_pRes, GetName()));
    NN_G3D_REQUIRES_RANGE(bufferIndex, 0, GetBufferingCount(), NN_G3D_RES_GET_NAME(m_pRes, GetName()));

    if (GetShapeBlockCount() == 0)
    {
        return; // 更新不要。
    }

    nn::gfx::Buffer& shapeBlock = *GetShapeBlock(viewIndex, bufferIndex);
    // バッファーをマップ
    ShapeBlock* pShpBuffer = shapeBlock.Map< ShapeBlock >();
    int32_t vtxSkinCount = GetVertexSkinCount();
    if (IsBlockSwapEnabled())
    {
        MatrixStore(&pShpBuffer->worldMtx, worldMtx);
        MatrixSwapEndian(&pShpBuffer->worldMtx, pShpBuffer->worldMtx);
        Copy32<true>(&pShpBuffer->vtxSkinCount, &vtxSkinCount, sizeof(int32_t) >> 2);
        Copy32<true>(&pShpBuffer->userFloat, m_pUserArea, static_cast<int>(m_UserAreaSize >> 2));
    }
    else
    {
        MatrixStore(&pShpBuffer->worldMtx, worldMtx);
        Copy32<false>(&pShpBuffer->vtxSkinCount, &vtxSkinCount, sizeof(int32_t) >> 2);
        Copy32<false>(&pShpBuffer->userFloat, m_pUserArea, static_cast<int>(m_UserAreaSize >> 2));
    }
    shapeBlock.FlushMappedRange(0, sizeof(ShapeBlock));
    shapeBlock.Unmap();

    // シェイプアニメーションの結果を頂点バッファーに反映させる
    if (HasValidBlendWeight() && IsDynamicVertexBufferValid() && IsShapeAnimCalculationEnabled())
    {
        CalculateShapeAnimResult(bufferIndex);
    }
}

void ShapeObj::ClearDynamicVertexBuffer(int bufferIndex) NN_NOEXCEPT
{
    NN_G3D_REQUIRES(IsDynamicVertexBufferValid(), GetName());
    NN_G3D_REQUIRES_RANGE(bufferIndex, 0, GetBufferingCount(), GetName());

    const nn::g3d::ResVertex* pResVertex = m_pRes->GetVertex();
    for (int vertexBufferIndex = 0, vertexBufferCount = pResVertex->GetVertexBufferCount(); vertexBufferIndex < vertexBufferCount; ++vertexBufferIndex)
    {
        if (!m_pDynamicVertexBufferArrayPtr[vertexBufferIndex])
        {
            continue;
        }

        const nn::gfx::Buffer::InfoType* pBufferInfo = pResVertex->GetVertexBufferInfo(vertexBufferIndex);
        size_t bufferSize = pBufferInfo->GetSize();

        const nn::gfx::Buffer* pOriginalBuffer = pResVertex->GetVertexBuffer(vertexBufferIndex);
        nn::gfx::Buffer* pDynamicVertexBuffer = &m_pDynamicVertexBufferArrayPtr[vertexBufferIndex][bufferIndex];

        // オリジナルの頂点バッファーから値を全てコピー (特定の頂点属性にはキーが打たれていない可能性があるため)
        void* pBuffer = pDynamicVertexBuffer->Map();
        {
            void* pOriginal = pOriginalBuffer->Map();
            memcpy(pBuffer, pOriginal, bufferSize);
            pOriginalBuffer->Unmap();

            pDynamicVertexBuffer->FlushMappedRange(0, bufferSize);
        }
        pDynamicVertexBuffer->Unmap();
    }
}

void ShapeObj::ClearBlendWeights() NN_NOEXCEPT
{
    int keyShapeCount = GetResource()->GetKeyShapeCount();
    for (int idxKeyShape = 0; idxKeyShape < keyShapeCount; ++idxKeyShape)
    {
        m_pBlendWeightArray[idxKeyShape] = 0.0f;
    }
    memset(m_pBlendWeightValidFlags, 0, nn::util::align_up(keyShapeCount, 32) >> 3);
    m_Flag &= ~Flag_BlendWeightValid;
}

const nn::gfx::Buffer* ShapeObj::GetDynamicVertexBuffer(int vertexBufferIndex, int bufferIndex) const NN_NOEXCEPT
{
    NN_G3D_REQUIRES_RANGE(vertexBufferIndex, 0, GetVertexBufferCount(), GetName());
    NN_G3D_REQUIRES_RANGE(bufferIndex, 0, GetBufferingCount(), GetName());

    if (!m_pDynamicVertexBufferArrayPtr || !m_pDynamicVertexBufferArrayPtr[vertexBufferIndex])
    {
        return NULL;
    }

    return &m_pDynamicVertexBufferArrayPtr[vertexBufferIndex][bufferIndex];
}

bool ShapeObj::IsDynamicVertexAttr(int vertexAttrIndex) const NN_NOEXCEPT
{
    NN_G3D_REQUIRES_RANGE(vertexAttrIndex, 0, GetVertexAttrCount(), GetName());

    if (!m_pDynamicVertexBufferArrayPtr)
    {
        return false;
    }

    const ResVertexAttr* pResVertexAttr = GetResVertexAttr(vertexAttrIndex);
    return m_pDynamicVertexBufferArrayPtr[pResVertexAttr->GetBufferIndex()] != NULL;
}

bool ShapeObj::TestSubMeshIntersection(CullingContext* pCtx, const ViewVolume& volume, int meshIndex) const NN_NOEXCEPT
{
    NN_G3D_REQUIRES(pCtx != NULL,               NN_G3D_RES_GET_NAME(m_pRes, GetName()));
    NN_G3D_REQUIRES(m_pSubMeshBounding != NULL, NN_G3D_RES_GET_NAME(m_pRes, GetName()));

    if (meshIndex >= GetMeshCount())
    {
        return false;
    }

    int startBoundingIndex = 0;
    for (int index = 0; index < meshIndex; ++index)
    {
        startBoundingIndex += m_pRes->GetSubMeshCount(index);
    }

    const Aabb* pSubMeshBounding = &m_pSubMeshBounding[startBoundingIndex];

    int subMeshCount = GetSubMeshCount(meshIndex);

    // 描画すべき最初のサブメッシュを探索
    for (pCtx->submeshIndex = pCtx->nodeIndex;
        pCtx->submeshIndex < subMeshCount; ++pCtx->submeshIndex)
    {
        if (volume.TestIntersection(pSubMeshBounding[pCtx->submeshIndex]))
        {
            break;
        }
    }

    // カリングされるサブメッシュが見つかるまで探索しながらサブメッシュ情報をマージ。
    for (pCtx->nodeIndex = pCtx->submeshIndex + 1; pCtx->nodeIndex < subMeshCount; ++pCtx->nodeIndex)
    {
        if (!volume.TestIntersection(pSubMeshBounding[pCtx->nodeIndex]))
        {
            break;
        }
    }
    pCtx->submeshCount = pCtx->nodeIndex - pCtx->submeshIndex;

    return pCtx->submeshIndex < subMeshCount;
}

bool ShapeObj::TestSubMeshLodIntersection(CullingContext* pCtx, const ViewVolume& volume,
        ICalculateLodLevelFunctor& calcLodLevelFunctor) const NN_NOEXCEPT
{
    NN_G3D_REQUIRES(pCtx != NULL,               NN_G3D_RES_GET_NAME(m_pRes, GetName()));
    NN_G3D_REQUIRES(m_pSubMeshBounding != NULL, NN_G3D_RES_GET_NAME(m_pRes, GetName()));

    int subMeshCount = GetSubMeshCount();
    if (pCtx->nodeIndex >= subMeshCount)
    {
        return false;
    }

    const Aabb* pBounding = NULL;
    pCtx->submeshLodLevel = pCtx->nodeLodLevel;
    pCtx->submeshIndex = pCtx->nodeIndex;
    // 描画するべき最初のサブメッシュを探索。
    if (pCtx->nodeLodLevel == ICalculateLodLevelFunctor::InvaidLodLevel)
    {
        for (; pCtx->nodeIndex < subMeshCount; ++pCtx->nodeIndex)
        {
            pBounding = m_pSubMeshBounding + pCtx->nodeIndex;
            if (volume.TestIntersection(*pBounding))
            {
                break;
            }
        }
        if (pCtx->nodeIndex >= subMeshCount)
        {
            return false;
        }
        pCtx->submeshLodLevel = calcLodLevelFunctor(*pBounding, *this);
        NN_G3D_ASSERT(pCtx->submeshLodLevel != ICalculateLodLevelFunctor::InvaidLodLevel, NN_G3D_RES_GET_NAME(m_pRes, GetName()));
        pCtx->submeshIndex = pCtx->nodeIndex;
    }

    // 同時に描画できるサブメッシュをマージ。
    pCtx->nodeLodLevel = ICalculateLodLevelFunctor::InvaidLodLevel;
    for (++pCtx->nodeIndex; pCtx->nodeIndex < subMeshCount; ++pCtx->nodeIndex)
    {
        pBounding = m_pSubMeshBounding + pCtx->nodeIndex;
        if (!volume.TestIntersection(*pBounding))
        {
            pCtx->nodeLodLevel = ICalculateLodLevelFunctor::InvaidLodLevel;
            pCtx->submeshCount = pCtx->nodeIndex++ - pCtx->submeshIndex;
            return true;
        }
        pCtx->nodeLodLevel = calcLodLevelFunctor(*pBounding, *this);
        NN_G3D_ASSERT(pCtx->submeshLodLevel != ICalculateLodLevelFunctor::InvaidLodLevel, NN_G3D_RES_GET_NAME(m_pRes, GetName()));
        if (pCtx->submeshLodLevel != pCtx->nodeLodLevel)
        {
            break;
        }
    }

    pCtx->submeshCount = pCtx->nodeIndex - pCtx->submeshIndex;
    return true;
}

int ShapeObj::MakeSubMeshRange(SubMeshRange* pRangeArray, const ViewVolume& volume, int meshIndex) const NN_NOEXCEPT
{
    NN_G3D_REQUIRES(pRangeArray != NULL, NN_G3D_RES_GET_NAME(m_pRes, GetName()));

    CullingContext ctx;
    int idxRange = 0;
    while (TestSubMeshIntersection(&ctx, volume, meshIndex))
    {
        pRangeArray->index = static_cast<uint16_t>(ctx.submeshIndex);
        pRangeArray->count = static_cast<uint16_t>(ctx.submeshCount);
        pRangeArray->lodLevel = static_cast<uint16_t>(meshIndex);
        ++pRangeArray;
        ++idxRange;
    }
    pRangeArray->index = pRangeArray->count = pRangeArray->lodLevel = 0;
    return idxRange;
}

int ShapeObj::MakeSubMeshLodRange(SubMeshRange* pRangeArray,
                               const ViewVolume& volume, ICalculateLodLevelFunctor& calcLodLevelFunctor) const NN_NOEXCEPT
{
    NN_G3D_REQUIRES(pRangeArray != NULL, NN_G3D_RES_GET_NAME(m_pRes, GetName()));

    CullingContext ctx;
    int idxRange = 0;
    while (TestSubMeshLodIntersection(&ctx, volume, calcLodLevelFunctor) )
    {
        pRangeArray->index = static_cast<uint16_t>(ctx.submeshIndex);
        pRangeArray->count = static_cast<uint16_t>(ctx.submeshCount);
        pRangeArray->lodLevel = static_cast<uint16_t>(ctx.submeshLodLevel);
        ++pRangeArray;
        ++idxRange;
    }
    pRangeArray->index = pRangeArray->count = pRangeArray->lodLevel = 0;
    return idxRange;
}

}} // namespace nn::g3d

