﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include "GlareEffect.h"

#include <nn/util/util_BytePtr.h>

//---------------------------------------------------------------------------
//  切り上げ
//---------------------------------------------------------------------------
inline size_t _RoundUp( const size_t x, const uint32_t base ) NN_NOEXCEPT
{
    size_t mask = static_cast<size_t>( base ) - 1;
    return static_cast< size_t >( static_cast< size_t >( x + mask ) & ~mask );
}

//---------------------------------------------------------------------------
//  バッファを初期化する
//---------------------------------------------------------------------------
bool ConstantBuffer::Initialize(
    nn::gfx::Device* pGfxDevice,
    nn::AlignedAllocateFunctionWithUserData pAllocateFunction,
    nn::FreeFunctionWithUserData pFreeFunction,
    void*                      pUserData,
    const size_t bufferSize, const uint32_t bufferCount ) NN_NOEXCEPT
{
    NN_SDK_ASSERT_NOT_NULL(pAllocateFunction);
    NN_SDK_ASSERT_NOT_NULL(pFreeFunction);
    NN_UNUSED(pFreeFunction);

    if ( m_IsInitialized ) return true;

    m_pBuffer           = nullptr;
    m_BufferCount       = bufferCount;
    m_BufferSize        = nn::util::align_up( bufferSize, 256 );
    m_MappedPtr         = nullptr;

    nn::gfx::Buffer::InfoType bfInfo;
    bfInfo.SetDefault();
    bfInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_ConstantBuffer );

    nn::gfx::MemoryPool::InfoType mpInfo;
    mpInfo.SetDefault();
    mpInfo.SetMemoryPoolProperty( nn::gfx::MemoryPoolProperty_CpuUncached | nn::gfx::MemoryPoolProperty_GpuCached );

    size_t bufferAlign = nn::gfx::Buffer::GetBufferAlignment( pGfxDevice, bfInfo );
    size_t poolAlign   = nn::gfx::MemoryPool::GetPoolMemoryAlignment( pGfxDevice, mpInfo );

    size_t allocedBufferSize =
        nn::util::align_up( ( m_BufferCount * m_BufferSize + bufferAlign ),
                            nn::gfx::MemoryPool::GetPoolMemoryAlignment( pGfxDevice, mpInfo ) );

    m_pBuffer = (*pAllocateFunction)( allocedBufferSize, poolAlign, pUserData );
    NN_SDK_ASSERT_NOT_NULL( m_pBuffer );

    mpInfo.SetPoolMemory( m_pBuffer, allocedBufferSize );
    m_GfxMemoryPool.Initialize( pGfxDevice, mpInfo );

    bfInfo.SetSize( m_BufferCount * m_BufferSize );
    m_GfxBuffer.Initialize( pGfxDevice, bfInfo, &m_GfxMemoryPool, bufferAlign, allocedBufferSize );

    m_IsInitialized = true;
    return true;
}

//---------------------------------------------------------------------------
//  バッファを終了する
//---------------------------------------------------------------------------
void ConstantBuffer::Finalize(
    nn::gfx::Device* pGfxDevice,
    nn::AlignedAllocateFunctionWithUserData pAllocateFunction,
    nn::FreeFunctionWithUserData pFreeFunction,
    void*                      pUserData) NN_NOEXCEPT
{
    NN_SDK_ASSERT_NOT_NULL(pAllocateFunction);
    NN_SDK_ASSERT_NOT_NULL(pFreeFunction);
    NN_UNUSED(pAllocateFunction);

    if ( !m_IsInitialized ) return;

    if( m_pBuffer )
    {
        (*pFreeFunction)( m_pBuffer, pUserData );
        m_pBuffer = nullptr;
    }

    m_GfxBuffer.Finalize( pGfxDevice );
    m_GfxMemoryPool.Finalize( pGfxDevice );
    m_IsInitialized = false;
}


//---------------------------------------------------------------------------
//  指定バッファをMapする
//---------------------------------------------------------------------------
void* ConstantBuffer::Map( int index )
{
    if ( m_MappedPtr ) return nullptr;

    void* ptr = m_GfxBuffer.Map();
    nn::util::BytePtr retPtr( ptr );
    retPtr.Advance( m_BufferSize * index );
    m_MappedPtr = retPtr.Get();
    return m_MappedPtr;
}

//---------------------------------------------------------------------------
//  Unmapする
//---------------------------------------------------------------------------
void ConstantBuffer::Unmap()
{
    if ( !m_MappedPtr ) return;

    nn::util::BytePtr retPtr( m_pBuffer );

    m_GfxBuffer.FlushMappedRange( retPtr.Distance( m_MappedPtr ), m_BufferSize );
    m_GfxBuffer.Unmap();
    m_MappedPtr = nullptr;
}

//---------------------------------------------------------------------------
//  GpuAddressを取得する
//---------------------------------------------------------------------------
void ConstantBuffer::GetGpuAddress( nn::gfx::GpuAddress* address, int index )
{
    m_GfxBuffer.GetGpuAddress( address );
    address->Offset( m_BufferSize * index );
}



#define SHADER_SOURCE( ... ) #__VA_ARGS__

// コピーシェーダ
static const char COPY_PS_SOURCE[] =
    "#version 430 \n"
    SHADER_SOURCE(
        layout( std140, binding = 1 ) uniform Model
        {
            uniform mat4 u_userMatrix;
            uniform vec4 u_color0;
            uniform vec4 u_color1;
            uniform vec2 u_uv_src;
            uniform vec2 u_uv_size;
            uniform vec4 u_layer;
            float rate;
        };

        layout( binding = 0 ) uniform sampler2D texture0;

        layout( location = 0 ) in vec4 v_texCoord;
        layout( location = 1 ) in vec4 v_color;

        out vec4 o_Color;

        void main()
        {
            vec4 color0 = texture2D(texture0, v_texCoord.st);
            o_Color = color0;
            gl_FragDepth = color0.r;
        }
    );

// 輝度シェーダ
static const char LUMINANCE_SELECTION_PS_SOURCE[] =
    "#version 430 \n"
    SHADER_SOURCE(
        layout( std140, binding = 1 ) uniform Model
        {
            uniform mat4 u_userMatrix;
            uniform vec4 u_color0;
            uniform vec4 u_color1;
            uniform vec2 u_uv_src;
            uniform vec2 u_uv_size;
            uniform vec4 u_layer;
            float rate;
        };

        layout( binding = 0 ) uniform sampler2D texture0;

        layout( location = 0 ) in vec4 v_texCoord;
        layout( location = 1 ) in vec4 v_color;

        out vec4 o_Color;

        void main()
        {
            vec4 color0 = texture2D(texture0, v_texCoord.st);

            float luminance = dot( color0.rgb, vec3( 0.298912, 0.586611, 0.114478 ) );
            if ( luminance != 0 ) {
                color0 /= luminance;
            }

            luminance = max( 1, luminance );
            o_Color.xyz = color0.xyz * ( luminance - 1 );
            o_Color.w = 1;
        }
    );

// X軸ガウスシェーダ
static const char GAUSS_X_PS_SOURCE[] =
    "#version 430 \n"
    SHADER_SOURCE(
        layout( std140, binding = 1 ) uniform Model
        {
            uniform mat4 u_userMatrix;
            uniform vec4 u_color0;
            uniform vec4 u_color1;
            uniform vec2 u_uv_src;
            uniform vec2 u_uv_size;
            uniform vec4 u_layer;
            float rate;
        };

        layout( binding = 0 ) uniform sampler2D texture0;

        layout(std140, binding = 2) uniform TextureParam
        {
            uniform vec2        TextureSize0;
        };

        layout(std140, binding = 3) uniform WeightParam
        {
            uniform vec4 Weights0;
            uniform vec4 Weights1;
        };

        layout( location = 0 ) in vec4 v_texCoord;
        layout( location = 1 ) in vec4 v_color;

        out vec4 o_Color;

        void main()
        {
            vec2 ox_1 = vec2( 1.0 / TextureSize0.x, 0.0 );
            vec2 ox_2 = vec2( 2.0 / TextureSize0.x, 0.0 );
            vec2 ox_3 = vec2( 3.0 / TextureSize0.x, 0.0 );
            vec2 ox_4 = vec2( 4.0 / TextureSize0.x, 0.0 );

            o_Color =  Weights1.x * texture2D( texture0, v_texCoord.st - ox_4 );
            o_Color += Weights0.w * texture2D( texture0, v_texCoord.st - ox_3 );
            o_Color += Weights0.z * texture2D( texture0, v_texCoord.st - ox_2 );
            o_Color += Weights0.y * texture2D( texture0, v_texCoord.st - ox_1 );
            o_Color += Weights0.x * texture2D( texture0, v_texCoord.st );
            o_Color += Weights0.y * texture2D( texture0, v_texCoord.st + ox_1 );
            o_Color += Weights0.z * texture2D( texture0, v_texCoord.st + ox_2 );
            o_Color += Weights0.w * texture2D( texture0, v_texCoord.st + ox_3 );
            o_Color += Weights1.x * texture2D( texture0, v_texCoord.st + ox_4 );
        }
    );

// Y軸ガウスシェーダ
static const char GAUSS_Y_PS_SOURCE[] =
    "#version 430 \n"
    SHADER_SOURCE(
        layout( std140, binding = 1 ) uniform Model
        {
            uniform mat4 u_userMatrix;
            uniform vec4 u_color0;
            uniform vec4 u_color1;
            uniform vec2 u_uv_src;
            uniform vec2 u_uv_size;
            uniform vec4 u_layer;
            float rate;
        };

        layout( binding = 0 ) uniform sampler2D texture0;

        layout(std140, binding = 2) uniform TextureParam
        {
            uniform vec2        TextureSize0;
        };

        layout(std140, binding = 3) uniform WeightParam
        {
            uniform vec4 Weights0;
            uniform vec4 Weights1;
        };

        layout( location = 0 ) in vec4 v_texCoord;
        layout( location = 1 ) in vec4 v_color;

        out vec4 o_Color;

        void main()
        {
            vec2 oy_1 = vec2( 0.0, 1.0 / TextureSize0.y );
            vec2 oy_2 = vec2( 0.0, 2.0 / TextureSize0.y );
            vec2 oy_3 = vec2( 0.0, 3.0 / TextureSize0.y );
            vec2 oy_4 = vec2( 0.0, 4.0 / TextureSize0.y );

            o_Color =  Weights1.x * texture2D( texture0, v_texCoord.st - oy_4 );
            o_Color += Weights0.w * texture2D( texture0, v_texCoord.st - oy_3 );
            o_Color += Weights0.z * texture2D( texture0, v_texCoord.st - oy_2 );
            o_Color += Weights0.y * texture2D( texture0, v_texCoord.st - oy_1 );
            o_Color += Weights0.x * texture2D( texture0, v_texCoord.st );
            o_Color += Weights0.y * texture2D( texture0, v_texCoord.st + oy_1 );
            o_Color += Weights0.z * texture2D( texture0, v_texCoord.st + oy_2 );
            o_Color += Weights0.w * texture2D( texture0, v_texCoord.st + oy_3 );
            o_Color += Weights1.x * texture2D( texture0, v_texCoord.st + oy_4 );
        }
    );

//---------------------------------------------------------------------------
//! @brief        ガウスフィルタの重みを計算します。
//!
//! @param[out] weight   計算した重み
//! @param[in]  scale    グレアのスケール
//! @param[in]  tapScale 1タップの大きさ
//---------------------------------------------------------------------------
static void CalcWeight( float* weight, float scale, float tapScale )
{
    float sum = 0.0f;

    for ( int i = 0; i < GLARE_GAUSS_LENGTH; ++i )
    {
        float r = static_cast<float>(i) * 2.0f * scale;
        if ( i == 0 ) r = 3.0f * scale;
        weight[i] = expf( -(r * r) / (2.0f * tapScale * tapScale) );
        sum += weight[i] * scale;
    }

    for ( int i = 0; i < GLARE_GAUSS_LENGTH; ++i )
    {
        weight[i] /= sum;
    }
}

//---------------------------------------------------------------------------
//! @brief                   終了処理を行います。
//---------------------------------------------------------------------------
void GlareEffect::Finalize(
     nn::gfx::Device* device,
     nn::AlignedAllocateFunctionWithUserData pAllocateFunction,
     nn::FreeFunctionWithUserData pFreeFunction,
     void*                      pUserData)
{
    NN_SDK_ASSERT_NOT_NULL(pAllocateFunction);
    NN_SDK_ASSERT_NOT_NULL(pFreeFunction);

    for ( int i = 0; i < GLARE_BUFFER_MAX; i++)
    {
        m_DownscaledYColorTargetView[i].Finalize( device );
        m_DownscaledYTextureView[i].Finalize( device );
        m_DownscaledYTexture[i].Finalize( device );
    }

    for ( int i = 0; i < GLARE_BUFFER_MAX; i++)
    {
        m_DownscaledXColorTargetView[i].Finalize( device );
        m_DownscaledXTextureView[i].Finalize( device );
        m_DownscaledXTexture[i].Finalize( device );
    }

    for ( int i = 0; i < GLARE_BUFFER_MAX; i++)
    {
        m_DownscaledViewportScissor[i].Finalize( device );
    }

    m_ViewportScissor.Finalize( device );

    m_Sampler.Finalize( device );

    m_AddSumBlendState.Finalize( device );
    m_GaussWeightBuffer.Finalize( device, pAllocateFunction, pFreeFunction, pUserData );
    m_TextureSizeBuffer.Finalize( device, pAllocateFunction, pFreeFunction, pUserData );
    m_GaussYPsShader.Finalize( device );
    m_GaussXPsShader.Finalize( device );
    m_LuminanceSelectionPsShader.Finalize( device );
    m_CopyPsShader.Finalize( device );
    m_GfxTextureMemoryPool.Finalize( device );
}

#define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0]))

//---------------------------------------------------------------------------
//! @brief                   グレア関連の初期化を行います.
//---------------------------------------------------------------------------
void GlareEffect::Initialize(
    nn::gfx::Device*           device,
    nn::AlignedAllocateFunctionWithUserData pAllocateFunction,
    nn::FreeFunctionWithUserData pFreeFunction,
    void*                      pAllocatorFunctionUserData,
    RegisterTextureViewSlot    pRegisterTextureViewSlotFunc,
    UnregisterTextureViewSlot  pUnregisterTextureViewSlotFunc,
    RegisterSamplerSlot        pRegisterSamplerSlotFunc,
    UnregisterSamplerSlot      pUnregisterSamplerSlotFunc,
    void*                      pRegisterSlotUserData,
    int width, int height )
{
    NN_SDK_ASSERT_NOT_NULL(pAllocateFunction);
    NN_SDK_ASSERT_NOT_NULL(pFreeFunction);
    NN_SDK_ASSERT_NOT_NULL(pRegisterTextureViewSlotFunc);
    NN_SDK_ASSERT_NOT_NULL(pUnregisterTextureViewSlotFunc);
    NN_SDK_ASSERT_NOT_NULL(pRegisterSamplerSlotFunc);
    NN_SDK_ASSERT_NOT_NULL(pUnregisterSamplerSlotFunc);
    NN_UNUSED(pUnregisterTextureViewSlotFunc);
    NN_UNUSED(pUnregisterSamplerSlotFunc);

    // フレームバッファのスケール
    float scale[GLARE_BUFFER_MAX] = {
        1.0f /  4.0f,
        1.0f /  8.0f,
        1.0f / 16.0f,
        1.0f / 32.0f,
        1.0f / 64.0f
    };

    m_Width = width;
    m_Height = height;

    // 縮小バッファのサイズをセットする
    for ( int i = 0; i < GLARE_BUFFER_MAX; i++)
    {
        m_DownscaledTextureSize[i].v[0] = static_cast<float>(width)  * scale[i];
        m_DownscaledTextureSize[i].v[1] = static_cast<float>(height) * scale[i];
    }

    // copy pixel shader
    {
        nn::gfx::Shader::InfoType info;
        info.SetDefault();
        info.SetSeparationEnabled( true );

        nn::gfx::ShaderCode psCode;
        psCode.codeSize = static_cast<uint32_t>(strlen( COPY_PS_SOURCE ));
        psCode.pCode    = COPY_PS_SOURCE;

        info.SetShaderCodePtr( nn::gfx::ShaderStage_Pixel, &psCode );
        info.SetSourceFormat(nn::gfx::ShaderSourceFormat_Glsl);
        info.SetCodeType( nn::gfx::ShaderCodeType_Source );
        nn::gfx::ShaderInitializeResult result = m_CopyPsShader.Initialize(device, info);
        NN_SDK_ASSERT( result == nn::gfx::ShaderInitializeResult_Success, "nn::gfx::Shader::Initialize() failed (%d)\n", result);
        NN_UNUSED( result );
    }

    // luminance slection pixel shader
    {
        nn::gfx::Shader::InfoType info;
        info.SetDefault();
        info.SetSeparationEnabled( true );

        nn::gfx::ShaderCode psCode;
        psCode.codeSize = static_cast<uint32_t>(strlen( LUMINANCE_SELECTION_PS_SOURCE ));
        psCode.pCode    = LUMINANCE_SELECTION_PS_SOURCE;

        info.SetShaderCodePtr( nn::gfx::ShaderStage_Pixel, &psCode );
        info.SetSourceFormat(nn::gfx::ShaderSourceFormat_Glsl);
        info.SetCodeType( nn::gfx::ShaderCodeType_Source );
        nn::gfx::ShaderInitializeResult result = m_LuminanceSelectionPsShader.Initialize(device, info);
        NN_SDK_ASSERT( result == nn::gfx::ShaderInitializeResult_Success, "nn::gfx::Shader::Initialize() failed (%d)\n", result);
        NN_UNUSED( result );
    }

    // x gauss shader
    {
        nn::gfx::Shader::InfoType info;
        info.SetDefault();
        info.SetSeparationEnabled( true );

        nn::gfx::ShaderCode psCode;
        psCode.codeSize = static_cast<uint32_t>(strlen( GAUSS_X_PS_SOURCE ));
        psCode.pCode    = GAUSS_X_PS_SOURCE;

        info.SetShaderCodePtr( nn::gfx::ShaderStage_Pixel, &psCode );
        info.SetSourceFormat(nn::gfx::ShaderSourceFormat_Glsl);
        info.SetCodeType( nn::gfx::ShaderCodeType_Source );
        nn::gfx::ShaderInitializeResult result = m_GaussXPsShader.Initialize(device, info);
        NN_SDK_ASSERT( result == nn::gfx::ShaderInitializeResult_Success, "nn::gfx::Shader::Initialize() failed (%d)\n", result);
        NN_UNUSED( result );

        m_SlotGaussXParam       = m_GaussXPsShader.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_ConstantBuffer, "TextureParam" );
        m_SlotGaussXWeightParam = m_GaussXPsShader.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_ConstantBuffer, "WeightParam" );
    }

    // y gauss shader
    {
        nn::gfx::Shader::InfoType info;
        info.SetDefault();
        info.SetSeparationEnabled( true );

        nn::gfx::ShaderCode psCode;
        psCode.codeSize = static_cast<uint32_t>(strlen( GAUSS_Y_PS_SOURCE ));
        psCode.pCode    = GAUSS_Y_PS_SOURCE;

        info.SetShaderCodePtr( nn::gfx::ShaderStage_Pixel, &psCode );
        info.SetSourceFormat(nn::gfx::ShaderSourceFormat_Glsl);
        info.SetCodeType( nn::gfx::ShaderCodeType_Source );
        nn::gfx::ShaderInitializeResult result = m_GaussYPsShader.Initialize(device, info);
        NN_SDK_ASSERT( result == nn::gfx::ShaderInitializeResult_Success, "nn::gfx::Shader::Initialize() failed (%d)\n", result);
        NN_UNUSED( result );

        m_SlotGaussYParam       = m_GaussYPsShader.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_ConstantBuffer, "TextureParam" );
        m_SlotGaussYWeightParam = m_GaussYPsShader.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_ConstantBuffer, "WeightParam" );
    }

    // constant buffer of downscaled buffer size
    m_TextureSizeBuffer.Initialize( device, pAllocateFunction, pFreeFunction, pAllocatorFunctionUserData, sizeof( float ) * 2, GLARE_BUFFER_MAX );
    for ( int i = 0; i < GLARE_BUFFER_MAX; i++)
    {
        float* pConstantBuffer = static_cast<float *>( m_TextureSizeBuffer.Map( i ) );
        {
            pConstantBuffer[ 0 ] = static_cast<float>(m_DownscaledTextureSize[i].v[0]);
            pConstantBuffer[ 1 ] = static_cast<float>(m_DownscaledTextureSize[i].v[1]);
        }
        m_TextureSizeBuffer.Unmap();
    }

    // constant buffer of gauss weight
    m_GaussWeightBuffer.Initialize( device, pAllocateFunction, pFreeFunction, pAllocatorFunctionUserData, sizeof( float ) * 8, GLARE_BUFFER_MAX );

    for ( int i = 0; i < GLARE_BUFFER_MAX; i++)
    {
        float* pConstantBuffer = static_cast<float *>( m_GaussWeightBuffer.Map( i ) );
        {
            float glareScale = ( 1.0f / scale[i] ) / 1.6f;
            CalcWeight( pConstantBuffer, glareScale, glareScale * 3.0f );
        }
        m_GaussWeightBuffer.Unmap();
    }

    // 加算合成を行うブレンドステート
    {
        nn::gfx::BlendState::InfoType info;
        info.SetDefault();
        nn::gfx::BlendTargetStateInfo targetInfo;
        {
            targetInfo.SetDefault();
        };
        targetInfo.SetBlendEnabled(true);
        targetInfo.SetSourceColorBlendFactor( nn::gfx::BlendFactor::BlendFactor_One );
        targetInfo.SetDestinationColorBlendFactor( nn::gfx::BlendFactor::BlendFactor_One );
        targetInfo.SetSourceAlphaBlendFactor( nn::gfx::BlendFactor::BlendFactor_One );
        targetInfo.SetDestinationAlphaBlendFactor( nn::gfx::BlendFactor::BlendFactor_One );
        info.SetBlendTargetStateInfoArray( &targetInfo, 1 );
        size_t memorySize = nn::gfx::BlendState::GetRequiredMemorySize( info );
        m_AddSumBlendState.SetMemory(  (*pAllocateFunction)(memorySize, nn::gfx::BlendState::RequiredMemoryInfo_Alignment, pAllocatorFunctionUserData), memorySize);
        m_AddSumBlendState.Initialize( device, info );
    }

    // テクスチャサンプラ
    {
        nn::gfx::Sampler::InfoType info;
        info.SetDefault();
        info.SetFilterMode(nn::gfx::FilterMode_MinLinear_MagLinear_MipPoint);
        info.SetAddressU(nn::gfx::TextureAddressMode_ClampToEdge);
        info.SetAddressV(nn::gfx::TextureAddressMode_ClampToEdge);
        info.SetAddressW(nn::gfx::TextureAddressMode_ClampToEdge);
        m_Sampler.Initialize(device, info);
        (*pRegisterSamplerSlotFunc)(&m_SamplerDescSlot, m_Sampler, pRegisterSlotUserData);
    }


    // テクスチャ用のプールを準備する
    //size_t poolSize = nn::util::align_up( 1024 * 1024 * 16, 64 * 1024 );
    //m_pTextureMemoryPooBuffer = allocator->Alloc( poolSize, 4 * 1024 );
    //NN_SDK_ASSERT_NOT_NULL( m_pTextureMemoryPooBuffer );
    {
        nn::gfx::MemoryPool::InfoType mpInfo;
        mpInfo.SetDefault();
        mpInfo.SetMemoryPoolProperty(nn::gfx::MemoryPoolProperty_CpuInvisible | nn::gfx::MemoryPoolProperty_GpuCached | nn::gfx::MemoryPoolProperty_Compressible );

        size_t poolAlign = nn::gfx::MemoryPool::GetPoolMemoryAlignment(device, mpInfo);
        size_t poolSize = nn::util::align_up((1024 * 1024 * 16 + poolAlign), nn::gfx::MemoryPool::GetPoolMemorySizeGranularity(device, mpInfo));
        m_pTextureMemoryPooBuffer = (*pAllocateFunction)(poolSize, poolAlign, pAllocatorFunctionUserData);
        NN_SDK_ASSERT_NOT_NULL(m_pTextureMemoryPooBuffer);

        mpInfo.SetPoolMemory(m_pTextureMemoryPooBuffer, poolSize);
        m_GfxTextureMemoryPool.Initialize(device, mpInfo);
    }

    ptrdiff_t m_TextureMemoryPoolOffet = 0;

    // viewport and scissor
    {
        nn::gfx::ViewportScissorState::InfoType info;
        info.SetDefault();
        info.SetScissorEnabled(true);

        nn::gfx::ViewportStateInfo viewportInfo;
        viewportInfo.SetDefault();
        viewportInfo.SetOriginX(0.f);
        viewportInfo.SetOriginY(0.f);
        viewportInfo.SetWidth(static_cast<float>(m_Width));
        viewportInfo.SetHeight(static_cast<float>(m_Height));

        nn::gfx::ScissorStateInfo scissorInfo;
        scissorInfo.SetDefault();
        scissorInfo.SetOriginX(0);
        scissorInfo.SetOriginY(0);
        scissorInfo.SetWidth(m_Width);
        scissorInfo.SetHeight(m_Height);

        info.SetViewportStateInfoArray(&viewportInfo, 1);
        info.SetScissorStateInfoArray(&scissorInfo, 1);
        size_t memorySize = nn::gfx::ViewportScissorState::GetRequiredMemorySize( info );
        m_ViewportScissor.SetMemory( (*pAllocateFunction)(memorySize, nn::gfx::ViewportScissorState::RequiredMemoryInfo_Alignment, pAllocatorFunctionUserData), memorySize);
        m_ViewportScissor.Initialize( device, info );
    }

    for ( int i = 0; i < GLARE_BUFFER_MAX; i++)
    {
        nn::gfx::ViewportScissorState::InfoType info;
        info.SetDefault();
        info.SetScissorEnabled(true);

        nn::gfx::ViewportStateInfo viewportInfo;
        viewportInfo.SetDefault();
        viewportInfo.SetOriginX(0.f);
        viewportInfo.SetOriginY(0.f);
        viewportInfo.SetWidth(static_cast<float>(static_cast<int>(m_DownscaledTextureSize[i].v[0])));
        viewportInfo.SetHeight(static_cast<float>(static_cast<int>(m_DownscaledTextureSize[i].v[1])));

        nn::gfx::ScissorStateInfo scissorInfo;
        scissorInfo.SetDefault();
        scissorInfo.SetOriginX(0);
        scissorInfo.SetOriginY(0);
        scissorInfo.SetWidth(static_cast<int>(m_DownscaledTextureSize[i].v[0]));
        scissorInfo.SetHeight(static_cast<int>(m_DownscaledTextureSize[i].v[1]));

        info.SetViewportStateInfoArray(&viewportInfo, 1);
        info.SetScissorStateInfoArray(&scissorInfo, 1);
        size_t memorySize = nn::gfx::ViewportScissorState::GetRequiredMemorySize( info );
        m_DownscaledViewportScissor[i].SetMemory( (*pAllocateFunction)(memorySize, nn::gfx::ViewportScissorState::RequiredMemoryInfo_Alignment, pAllocatorFunctionUserData), memorySize);
        m_DownscaledViewportScissor[i].Initialize( device, info );
    }

    // downscaled buffers for glare x
    for ( int i = 0; i < GLARE_BUFFER_MAX; i++)
    {
        {
            nn::gfx::Texture::InfoType info;
            info.SetDefault();
            info.SetWidth(  static_cast<int>(m_DownscaledTextureSize[i].v[0]) );
            info.SetHeight( static_cast<int>(m_DownscaledTextureSize[i].v[1]) );
            info.SetGpuAccessFlags( nn::gfx::GpuAccess_ColorBuffer );
            info.SetImageStorageDimension( nn::gfx::ImageStorageDimension_2d );
            info.SetImageFormat(    nn::gfx::ImageFormat_R16_G16_B16_A16_Float );
            info.SetMipCount( 1 );
            m_TextureMemoryPoolOffet = nn::util::align_up( m_TextureMemoryPoolOffet, nn::gfx::Texture::CalculateMipDataAlignment( device, info ) );
            m_DownscaledXTexture[i].Initialize( device, info, &m_GfxTextureMemoryPool, m_TextureMemoryPoolOffet, nn::gfx::Texture::CalculateMipDataSize( device, info ) );
            m_TextureMemoryPoolOffet += nn::gfx::Texture::CalculateMipDataSize( device, info );
        }

        {
            nn::gfx::TextureView::InfoType info;
            info.SetDefault();
            info.SetImageDimension( nn::gfx::ImageDimension_2d );
            info.SetImageFormat( nn::gfx::ImageFormat_R16_G16_B16_A16_Float );
            info.SetTexturePtr( &m_DownscaledXTexture[i] );
            m_DownscaledXTextureView[i].Initialize( device, info );
        }

        {
            nn::gfx::ColorTargetView::InfoType info;
            info.SetDefault();
            info.SetImageDimension( nn::gfx::ImageDimension_2d );
            info.SetImageFormat(    nn::gfx::ImageFormat_R16_G16_B16_A16_Float );
            info.SetTexturePtr( &m_DownscaledXTexture[i] );
            m_DownscaledXColorTargetView[i].Initialize( device, info );
            (*pRegisterTextureViewSlotFunc)(&m_DownscaledXTextureDescSlot[i], m_DownscaledXTextureView[i], pRegisterSlotUserData);
        }
    }

    // downscaled buffers for glare y
    for ( int i = 0; i < GLARE_BUFFER_MAX; i++)
    {
        {
            nn::gfx::Texture::InfoType info;
            info.SetDefault();
            info.SetWidth(  static_cast<int>(m_DownscaledTextureSize[i].v[0]) );
            info.SetHeight( static_cast<int>(m_DownscaledTextureSize[i].v[1]) );
            info.SetGpuAccessFlags( nn::gfx::GpuAccess_ColorBuffer );
            info.SetImageStorageDimension( nn::gfx::ImageStorageDimension_2d );
            info.SetImageFormat(    nn::gfx::ImageFormat_R16_G16_B16_A16_Float );
            info.SetMipCount( 1 );
            m_TextureMemoryPoolOffet = nn::util::align_up( m_TextureMemoryPoolOffet, nn::gfx::Texture::CalculateMipDataAlignment( device, info ) );
            m_DownscaledYTexture[i].Initialize( device, info, &m_GfxTextureMemoryPool, m_TextureMemoryPoolOffet, nn::gfx::Texture::CalculateMipDataSize( device, info ) );
            m_TextureMemoryPoolOffet += nn::gfx::Texture::CalculateMipDataSize( device, info );
        }

        {
            nn::gfx::TextureView::InfoType info;
            info.SetDefault();
            info.SetImageDimension( nn::gfx::ImageDimension_2d );
            info.SetImageFormat( nn::gfx::ImageFormat_R16_G16_B16_A16_Float );
            info.SetTexturePtr( &m_DownscaledYTexture[i] );
            m_DownscaledYTextureView[i].Initialize( device, info );
        }

        {
            nn::gfx::ColorTargetView::InfoType info;
            info.SetDefault();
            info.SetImageDimension( nn::gfx::ImageDimension_2d );
            info.SetImageFormat(    nn::gfx::ImageFormat_R16_G16_B16_A16_Float );
            info.SetTexturePtr( &m_DownscaledYTexture[i] );
            m_DownscaledYColorTargetView[i].Initialize( device, info );
            (*pRegisterTextureViewSlotFunc)( &m_DownscaledYTextureDescSlot[i], m_DownscaledYTextureView[i], pRegisterSlotUserData);
        }
    }
}  // NOLINT(impl/function_size)

//---------------------------------------------------------------------------
//! @brief  テクスチャのコピー( TextureView -> ColorTarget )を行います。
//---------------------------------------------------------------------------
void GlareEffect::CopyTexture( nn::gfx::CommandBuffer*                commandBuffer,
                               nns::gfx::PrimitiveRenderer::Renderer* primitiveRenderer,
                               nn::gfx::ColorTargetView*              pDstColorTarget,
                               nn::gfx::DescriptorSlot                srcTextureDescSlot )
{
    const nn::util::Uint8x4 white ={{ 255, 255, 255, 255 }};
    commandBuffer->FlushMemory( nn::gfx::GpuAccess_ColorBuffer );

    nn::gfx::ColorTargetView* pTarget = pDstColorTarget;
    commandBuffer->SetRenderTargets(1, &pTarget, nullptr);
    primitiveRenderer->SetBlendState( commandBuffer, nns::gfx::PrimitiveRenderer::BlendType_Opacity );
    primitiveRenderer->SetDepthStencilState( commandBuffer, nns::gfx::PrimitiveRenderer::DepthStencilType_DepthNoWriteTest );
    commandBuffer->SetViewportScissorState( &m_ViewportScissor );
    primitiveRenderer->SetProjectionMatrix( &m_ProjMtx );
    primitiveRenderer->SetViewMatrix( &m_ViewMtx );
    primitiveRenderer->SetColor(white);
    nn::util::Matrix4x3fType modelMatrix;
    nn::util::MatrixIdentity(&modelMatrix);
    primitiveRenderer->SetModelMatrix( &modelMatrix );
    primitiveRenderer->DrawScreenQuadYFlip(commandBuffer, srcTextureDescSlot, m_SamplerDescSlot );

}

//---------------------------------------------------------------------------
//! @brief                   グレアの描画を行います。
//---------------------------------------------------------------------------
void GlareEffect::Draw(
        nn::gfx::CommandBuffer* pCommandBuffer,
        nn::gfx::ColorTargetView*   pOutputTarget,
        nns::gfx::PrimitiveRenderer::Renderer* pPrimitiveRenderer,
        const nn::gfx::DescriptorSlot* pSrcTextureSlot)
{
    NN_SDK_ASSERT_NOT_NULL(pCommandBuffer);
    NN_SDK_ASSERT_NOT_NULL(pOutputTarget);
    NN_SDK_ASSERT_NOT_NULL(pPrimitiveRenderer);
    NN_SDK_ASSERT_NOT_NULL(pSrcTextureSlot);

    int glareNum = GLARE_BUFFER_MAX;
    nn::gfx::ColorTargetView* pTarget = &m_DownscaledYColorTargetView[0];

    // 本メソッドの実行前に、行列などの設定が残っていると描画がおかしくなります。
    // そのため、一度ここでデフォルト設定にリセットします。
    pPrimitiveRenderer->SetDefaultParameters();

    const nn::util::Uint8x4 white ={{ 255, 255, 255, 255 }};
    const nn::gfx::RasterizerState* pRasterizerState = pPrimitiveRenderer->GetRasterizerState(
        nn::gfx::PrimitiveTopologyType_Triangle, nn::gfx::CullMode_Back, nn::gfx::FillMode_Solid);

    pCommandBuffer->FlushMemory( nn::gfx::GpuAccess_ColorBuffer );
    pCommandBuffer->SetRenderTargets(1, &pTarget, nullptr);
    pCommandBuffer->SetRasterizerState( pRasterizerState );
    pPrimitiveRenderer->SetBlendState( pCommandBuffer, nns::gfx::PrimitiveRenderer::BlendType_Opacity );
    pPrimitiveRenderer->SetDepthStencilState( pCommandBuffer, nns::gfx::PrimitiveRenderer::DepthStencilType_DepthNoWriteTest );
    pCommandBuffer->SetViewportScissorState(&m_DownscaledViewportScissor[0]);
    pPrimitiveRenderer->SetProjectionMatrix( &m_ProjMtx );
    pPrimitiveRenderer->SetViewMatrix( &m_ViewMtx );
    pPrimitiveRenderer->SetColor(white);
    pPrimitiveRenderer->SetUserPixelShader(&m_LuminanceSelectionPsShader);
    pPrimitiveRenderer->DrawScreenQuad( pCommandBuffer, *pSrcTextureSlot, m_SamplerDescSlot );

    // 縮小バッファの生成
    for ( int i = 1; i < glareNum; i++)
    {
        pTarget = &m_DownscaledYColorTargetView[i];
        pCommandBuffer->FlushMemory( nn::gfx::GpuAccess_ColorBuffer );
        pCommandBuffer->SetRenderTargets(1, &pTarget, nullptr);
        pPrimitiveRenderer->SetBlendState( pCommandBuffer, nns::gfx::PrimitiveRenderer::BlendType_Opacity );
        pPrimitiveRenderer->SetDepthStencilState( pCommandBuffer, nns::gfx::PrimitiveRenderer::DepthStencilType_DepthNoWriteTest );
        pCommandBuffer->SetViewportScissorState(&m_DownscaledViewportScissor[i]);
        pPrimitiveRenderer->SetUserPixelShader(&m_CopyPsShader);
        pPrimitiveRenderer->DrawScreenQuad( pCommandBuffer, m_DownscaledYTextureDescSlot[i - 1], m_SamplerDescSlot  );
    }

    // 横方向にガウスをかけてコピー
    for ( int i = 0; i < glareNum; i++)
    {
        pCommandBuffer->FlushMemory( nn::gfx::GpuAccess_ColorBuffer );
        pTarget = &m_DownscaledXColorTargetView[i];
        pCommandBuffer->FlushMemory( nn::gfx::GpuAccess_ColorBuffer );
        pCommandBuffer->SetRenderTargets(1, &pTarget, nullptr);
        pPrimitiveRenderer->SetBlendState( pCommandBuffer, nns::gfx::PrimitiveRenderer::BlendType_Opacity );
        pPrimitiveRenderer->SetDepthStencilState( pCommandBuffer, nns::gfx::PrimitiveRenderer::DepthStencilType_DepthNoWriteTest );
        pCommandBuffer->SetViewportScissorState(&m_DownscaledViewportScissor[i]);
        pPrimitiveRenderer->SetUserPixelShader(&m_GaussXPsShader);
        nn::gfx::GpuAddress gpuAddress;
        m_TextureSizeBuffer.GetGpuAddress(&gpuAddress, i);
        pCommandBuffer->SetConstantBuffer(m_SlotGaussXParam,nn::gfx::ShaderStage::ShaderStage_Pixel, gpuAddress, sizeof( float ) * 2 * GLARE_BUFFER_MAX);
        m_GaussWeightBuffer.GetGpuAddress(&gpuAddress, i);
        pCommandBuffer->SetConstantBuffer(m_SlotGaussXWeightParam, nn::gfx::ShaderStage::ShaderStage_Pixel, gpuAddress, sizeof( float ) * 8 * GLARE_BUFFER_MAX);
        pPrimitiveRenderer->DrawScreenQuad( pCommandBuffer, m_DownscaledYTextureDescSlot[i], m_SamplerDescSlot );
    }

    // 縦方向にガウスをかけてコピー
    for ( int i = 0; i < glareNum; i++)
    {
        pCommandBuffer->FlushMemory( nn::gfx::GpuAccess_ColorBuffer );
        pTarget = &m_DownscaledYColorTargetView[i];
        pCommandBuffer->SetRenderTargets(1, &pTarget, nullptr);
        pPrimitiveRenderer->SetBlendState( pCommandBuffer, nns::gfx::PrimitiveRenderer::BlendType_Opacity );
        pPrimitiveRenderer->SetDepthStencilState( pCommandBuffer, nns::gfx::PrimitiveRenderer::DepthStencilType_DepthNoWriteTest );
        pCommandBuffer->SetViewportScissorState(&m_DownscaledViewportScissor[i]);
        pPrimitiveRenderer->SetUserPixelShader(&m_GaussYPsShader);
        nn::gfx::GpuAddress gpuAddress;
        m_TextureSizeBuffer.GetGpuAddress(&gpuAddress, i);
        pCommandBuffer->SetConstantBuffer(m_SlotGaussYParam,nn::gfx::ShaderStage::ShaderStage_Pixel, gpuAddress, sizeof( float ) * 2 * GLARE_BUFFER_MAX);
        m_GaussWeightBuffer.GetGpuAddress(&gpuAddress, i);
        pCommandBuffer->SetConstantBuffer(m_SlotGaussYWeightParam, nn::gfx::ShaderStage::ShaderStage_Pixel, gpuAddress, sizeof( float ) * 8 * GLARE_BUFFER_MAX);
        pCommandBuffer->FlushMemory( nn::gfx::GpuAccess_ColorBuffer );
        pPrimitiveRenderer->DrawScreenQuad( pCommandBuffer, m_DownscaledXTextureDescSlot[i], m_SamplerDescSlot );

    }

    // 縮小バッファを加算合成していく
    for ( int i = glareNum - 1; i > 0; i--)
    {
        pTarget = &m_DownscaledYColorTargetView[i - 1];
        pCommandBuffer->FlushMemory( nn::gfx::GpuAccess_ColorBuffer );
        pCommandBuffer->SetRenderTargets(1, &pTarget, nullptr);
        pCommandBuffer->SetBlendState( &m_AddSumBlendState );
        pPrimitiveRenderer->SetDepthStencilState( pCommandBuffer, nns::gfx::PrimitiveRenderer::DepthStencilType_DepthNoWriteTest );
        pCommandBuffer->SetViewportScissorState(&m_DownscaledViewportScissor[i - 1]);
        pPrimitiveRenderer->SetUserPixelShader(&m_CopyPsShader);
        pPrimitiveRenderer->DrawScreenQuad( pCommandBuffer, m_DownscaledYTextureDescSlot[i], m_SamplerDescSlot );
    }

    // グレアをフレームバッファにコピーする
    pPrimitiveRenderer->SetUserPixelShader(nullptr);
    pCommandBuffer->SetRenderTargets(1, &pOutputTarget, nullptr);
    pCommandBuffer->SetBlendState( &m_AddSumBlendState );
    pPrimitiveRenderer->SetDepthStencilState( pCommandBuffer, nns::gfx::PrimitiveRenderer::DepthStencilType_DepthNoWriteTest );
    pCommandBuffer->SetViewportScissorState(&m_ViewportScissor);
    pCommandBuffer->FlushMemory( nn::gfx::GpuAccess_ColorBuffer );
    pPrimitiveRenderer->DrawScreenQuad( pCommandBuffer, m_DownscaledYTextureDescSlot[0], m_SamplerDescSlot );
}
