﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

//==============================================================================
//  INCLUDES
//==============================================================================

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include <cfloat>
#include <algorithm>

#include <nnt.h>
#include <nnt/nnt_Argument.h>

#include <gfx/demo.h>

#if NN_GFX_IS_TARGET_GX
#include <cafe/gx2ut.h>
#endif

#if NN_GFX_IS_TARGET_GL
#include <GL/glew.h>
#pragma warning( disable : 4127 )
#endif

#if NN_GFX_IS_TARGET_NVN
#include <nvn/nvn.h>
#include <nvn/nvn_FuncPtrInline.h>
#if !defined( NN_BUILD_CONFIG_OS_SUPPORTS_HORIZON )
#pragma warning( disable : 4127 )
#endif
#endif

//==============================================================================
//  DEFINES
//==============================================================================

#define SURFACE_WIDTH           (DEMOColorBufferInfo.GetWidth())
#define SURFACE_HEIGHT          (DEMOColorBufferInfo.GetHeight())

#define ALLOW_MISSING_UNIFORMS   0 // NOLINT(preprocessor/const)

static const int MAX_KERNEL_SIZE = 32;

static const float CAM_FOV = 60.0f;
static const float CAM_ZNEAR = 1.0f;
static const float CAM_ZFAR = 300.0f;

#define MAX(a, b) ((a) > (b) ? (a) : (b))

//==============================================================================
// PATHS
//==============================================================================

#define SHADER_ENC_NORMALS  "shaders/ssaoEncNormals"
#define SHADER_SSAO         "shaders/ssao"
#define SHADER_BLUR         "shaders/ssaoBlur5x5"
#define SHADER_OBJ          "shaders/ssaoObj"
#define SHADER_SKYBOX       "shaders/ssaoSkyboxQuad"
#define SHADER_ENC_NORMALS_HLSLCC  "shaders/ssaoEncNormalsHlslcc"
#define SHADER_SSAO_HLSLCC         "shaders/ssaoHlslcc"
#define SHADER_BLUR_HLSLCC         "shaders/ssaoBlur5x5Hlslcc"
#define SHADER_OBJ_HLSLCC          "shaders/ssaoObjHlslcc"
#define SHADER_SKYBOX_HLSLCC       "shaders/ssaoSkyboxQuadHlslcc"

#define MESH_ROOM           "geometries/cuberoom.scn"
#define MESH_QUAD           "geometries/quad.scn"

#define TEXTURE_SKYBOX      "textures/cubemap_sky"
#define TEXTURE_CONCRETE    "textures/concrete"

//==============================================================================
// PROTOTYPES
//==============================================================================

void OnMenuItemChangeSSAOSize       ( f32 Value );
void OnMenuItemChangeSSAOKernel     ( f32 Value );
void OnMenuItemChangeNoiseSize      ( f32 Value );

//==============================================================================
// TYPES
//==============================================================================

typedef enum
{
    ATTRIBUTE_POSITION,      // Local coordinates
    ATTRIBUTE_NORMAL,        // Normal vectors
    ATTRIBUTE_TEXCOORD,      // Texture coordinates

    _ATTRIBUTE_COUNT
} AttributeType;

//--------------------------------------------------------------------------

typedef enum
{
    CS_UNIFORM_DIMENSIONS,

    CS_UNIFORM_KERNEL_OFFSETS,
    CS_UNIFORM_KERNEL_SIZE,
    CS_UNIFORM_PROJ_COEFFS,
    CS_UNIFORM_NOISE_SCALE,
    CS_UNIFORM_SAMPLE_RADIUS,
    CS_UNIFORM_DEPTH_BIAS,
    CS_UNIFORM_POWER,
    CS_UNIFORM_TEXEL_SIZE,
    CS_UNIFORM_DIRECTION,
    CS_UNIFORM_ALBEDO,
    CS_UNIFORM_CAMERA_POS,

    _CS_UNIFORM_COUNT
} CSUniformType;

//--------------------------------------------------------------------------

typedef enum
{
    PS_SAMPLER_NORMALS,
    PS_SAMPLER_DEPTH,
    PS_SAMPLER_NOISE,
    PS_SAMPLER_SOURCE,
    PS_SAMPLER_LIGHT,
    PS_SAMPLER_SKYBOX,
    PS_SAMPLER_ALBEDO,

    _PS_SAMPLER_COUNT

} PSSampleType;

//--------------------------------------------------------------------------

typedef enum
{
    CS_SAMPLER_SRC_TEXTURE,
    CS_SAMPLER_DST_IMAGE,

    CS_SAMPLER_NORMALS,
    CS_SAMPLER_DEPTH,
    CS_SAMPLER_NOISE,
    CS_SAMPLER_SOURCE,
    CS_SAMPLER_LIGHT,
    CS_SAMPLER_SKYBOX,
    CS_SAMPLER_ALBEDO,

    _CS_SAMPLER_COUNT

} CSSampleType;

//--------------------------------------------------------------------------

typedef enum
{
    ITEM_TYPE_FLOAT,
    ITEM_TYPE_INT,
    ITEM_TYPE_BOOL,
} ItemType;

//--------------------------------------------------------------------------

typedef enum
{
    MENU_HEAD,

    MENU_SIZE,
    MENU_POWER,
    MENU_KERNEL,
    MENU_RADIUS,
    MENU_NOISE_SIZE,
    MENU_DEPTH_SMOOTH,
    MENU_BLUR_ENABLE,
    MENU_SSAO_ENABLE,
    MENU_SSAO_ONLY,

    _MENU_ITEM_COUNT,

    _MENU_HEADER_FIRST = MENU_HEAD,
    _MENU_HEADER_LAST  = MENU_HEAD,
    _MENU_HEADER_COUNT = _MENU_HEADER_LAST - _MENU_HEADER_FIRST + 1,

    _MENU_FIRST = MENU_SIZE,
    _MENU_LAST  = MENU_SSAO_ONLY,

} MenuItems;

//--------------------------------------------------------------------------

typedef enum
{
    TIME_NORMAL_DEPTH,
    TIME_SSAO,
    TIME_BLUR,
    TIME_OBJECT,

    _TIME_COUNT,
} SampleTimes;

//--------------------------------------------------------------------------

typedef struct _MeshData
{
    s32     VertexCount;
    s32     NormalCount;
    s32     TexCoordCount;
    s32     Indices;
    u32     pPos;
    u32     pNrm;
    u32     pTex;
    u32     pIdx;
} MeshData;

//--------------------------------------------------------------------------

typedef struct _Mesh
{
    u32 VertexCount;
    u32 NormalCount;
    u32 TexCoordCount;
    u32 Indices;
    DEMOGfxBuffer vertexBuffer;
    DEMOGfxBuffer normalBuffer;
    DEMOGfxBuffer texCoordBuffer;
    DEMOGfxBuffer indexBuffer;
} Mesh;

//--------------------------------------------------------------------------

typedef struct _ShaderData
{
    char  Filename[256];
    DEMOGfxPipeline demoPipeline;
    DEMOGfxBuffer vsUniformBlock;
    DEMOGfxBuffer psUniformBlock;
    nn::gfx::Pipeline computePipeline;
    bool  bComputeShader;
    s32   vsUniformSize;
    s32   psUniformSize;
    s32   AttrLocs     [_ATTRIBUTE_COUNT];
    s32   VSUniformLocs[1];
    s32   PSUniformLocs[1];
    s32   CSUniformLocs[_CS_UNIFORM_COUNT];
    s32   PSSamplerLocs[_PS_SAMPLER_COUNT];
    s32   CSSamplerLocs[_CS_SAMPLER_COUNT];
} ShaderData;

//--------------------------------------------------------------------------

typedef void (*pfnValueChanged)(f32);
typedef struct _MenuItemData
{
    char                Name[256];
    f32                 Default;
    f32                 Min;
    f32                 Max;
    f32                 Inc;
    ItemType            Type;
    pfnValueChanged     OnChanged;
    f32                 Value;
} MenuItemData;

typedef struct _uvec4 {
    u32 x;
    u32 y;
    u32 z;
    u32 w;
} uvec4;

typedef struct _vec4 {
    f32 x;
    f32 y;
    f32 z;
    f32 w;
} vec4;

typedef struct _SSAOBlurUniform {
    uvec4 dimensions;
} SSAOBlurUniform;

typedef struct _SSAOVsUniforms {
    vec4 uViewRay;
} SSAOVsUniforms;

typedef struct _SSAOPsUniforms {
    vec4        uProjMtx[4];
    vec4        uKernelOffsets[ MAX_KERNEL_SIZE ];
    int         uKernelSize;
    int         uPadding[ 3 ];
    float       uProjCoeffs[2];
    float       uNoiseScale[2];
    float       uSampleRadius;
    float       uDepthBias;
    float       uPower;
} SSAOPsUniforms;

typedef struct _ShaderEncNormalVsUniforms
{
    Mtx44 uModelMtx;
    Mtx44 uViewMtx;
    Mtx44 uProjMtx;
} ShaderEncNormalVsUniforms;

typedef struct _SSAOObjVsUniforms
{
    Mtx44 uModelMtx;
    Mtx44 uViewMtx;
    Mtx44 uProjMtx;
} SSAOObjVsUniforms;

//==============================================================================
//  PROTOTYPES
//==============================================================================
static void CameraSetup                ();
static bool LoadMesh                   ( const char* pFilename, Mesh* pDataOut );
static void FreeMesh                   ( Mesh* pMesh );
static bool LoadShader                 ( const char* pFilename, ShaderData* pShaderOut, size_t vsUniformSize, size_t psUniformSize );
static void FreeShader                 ( ShaderData* pShader );
static void ReloadShader               ( ShaderData* pShader );
static void GenerateSampleKernel       ();
static void GenerateNoise              ();
static void CreateRenderTargets        ();
static void DEBUGSetPixelTextureAndSampler       ( const nn::gfx::DescriptorSlot& textureSlot, nn::gfx::DescriptorSlot& samplerSlot, s32 UnitNumber );
static void DrawMesh                   ( Mesh* pMesh, ShaderData* pShader );
static void DrawSceneMeshes            ( ShaderData* pShader );
static void DrawDepthAndNormals        ();
static void DrawSSAO                   ();
static void BlurSSAO                   ();
static void DrawObjects                ();
static void DrawUI                     ();
static void DrawScene                  ();
static void ProcessPad                 ();

//==============================================================================
//  CONSTANTS
//==============================================================================

const char* kAttributeNames[_ATTRIBUTE_COUNT] =
{
    "aPosition",
    "aNormal",
    "aTexCoord",
};
const nn::gfx::AttributeFormat kAttributeFormats[_ATTRIBUTE_COUNT] =
{
    nn::gfx::AttributeFormat_32_32_32_32_Float,
    nn::gfx::AttributeFormat_32_32_32_32_Float,
    nn::gfx::AttributeFormat_32_32_Float,
};

const size_t kAttributeFormatSizes[_ATTRIBUTE_COUNT] =
{
    sizeof( float ) * 4,
    sizeof( float ) * 4,
    sizeof( float ) * 2,
};

const char* kPSSamplerNames[_PS_SAMPLER_COUNT] =
{
    "sTexNormals",
    "sTexDepth",
    "sTexNoise",
    "sTexSource",
    "sTexLight",
    "sTexSkybox",
    "sTexAlbedo",
};

const char* kCSUniformNames[_CS_UNIFORM_COUNT] =
{
    "tiledConvUniform",

    "uKernelOffsets",
    "uKernelSize",
    "uProjCoeffs",
    "uNoiseScale",
    "uSampleRadius",
    "uDepthBias",
    "uPower",
    "uTexelSize",
    "uDirection",
    "uAlbedo",
    "uCameraPos",
};

const char* kCSSamplerNames[_CS_SAMPLER_COUNT] =
{
    "srcTexture",
    "dstImage",

    "sTexNormals",
    "sTexDepth",
    "sTexNoise",
    "sTexSource",
    "sTexLight",
    "sTexSkybox",
    "sTexAlbedo",
};

_MenuItemData g_MenuData[_MENU_ITEM_COUNT] =
{
    // Name                 First Item          Last Item
    // --------------       ------------------  ----------------
    { "Menu"            ,0  , _MENU_FIRST       , _MENU_LAST },

    // Name             Default     Min         Max     Increment       IsInteger?          Change Callback
    // -------------    -------     ---------   ------  -----------     ----------          ----------------------------
    { "Target Size"     , 0.5f      , 0.25f     ,  1.0f , 0.25f         , ITEM_TYPE_FLOAT   , OnMenuItemChangeSSAOSize      },
    { "Power"           , 3.0f      , 1.0f      , 10.0f , 0.25f         , ITEM_TYPE_FLOAT   , NULL                          },
    { "Kernel Size"     , 8.0f      , 2.0f      , (float)MAX_KERNEL_SIZE, 1.00f         , ITEM_TYPE_INT     , OnMenuItemChangeSSAOKernel    },
    { "Radius"          , 3.0f      , 0.1f      , 10.0f , 0.10f         , ITEM_TYPE_FLOAT   , NULL                          },
    { "Noise Size"      , 4.0f      , 4.0f      , 64.0f , 4.00f         , ITEM_TYPE_INT     , OnMenuItemChangeNoiseSize     },
    { "Depth Smooth"    , 0.3f      , 0.1f      ,  5.0f , 0.10f         , ITEM_TYPE_FLOAT   , NULL                          },
    { "Blur Enabled"    , 1.0       , 0.0f      ,  1.0f , 1.00f         , ITEM_TYPE_BOOL    , NULL                          },
    { "SSAO Enabled"    , 1.0       , 0.0f      ,  1.0f , 1.00f         , ITEM_TYPE_BOOL    , NULL                          },
    { "SSAO Only"       , 0.0       , 0.0f      ,  1.0f , 1.00f         , ITEM_TYPE_BOOL    , NULL                          },
};

//==============================================================================
//  GLOBALS
//==============================================================================

Mesh   g_MeshRoom;
Mesh   g_MeshQuad;

ShaderData  g_ShaderEncNormals;
ShaderData  g_ShaderSSAO;
ShaderData  g_ShaderBlur;
ShaderData  g_ShaderObj;
ShaderData  g_ShaderSkybox;
nn::gfx::DepthStencilState g_DepthReadOnlyState;

// ColorBuffers
nn::gfx::ColorTargetView  g_NormalCB;
nn::gfx::Texture          g_NormalResolveTexture;
nn::gfx::TextureView      g_NormalResolveTextureView;
nn::gfx::DescriptorSlot   g_NormalResolveTextureSlot;
DEMOGfxMemPool* g_pNormalCBResolveTexturePool;

nn::gfx::DepthStencilView  g_DepthBuffer;
nn::gfx::Texture           g_DepthResolveTexture;
nn::gfx::TextureView       g_DepthResolveTextureView;
nn::gfx::DescriptorSlot    g_DepthResolveTextureSlot;
DEMOGfxMemPool*           g_pDepthBufferResolveTexturePool;

nn::gfx::ColorTargetView  g_SSAOCb[2];
nn::gfx::Texture          g_SSAOResolveTextures[2];
nn::gfx::TextureView      g_SSAOResolveTextureViews[2];
nn::gfx::DescriptorSlot   g_SSAOResolveTextureSlots[2];
DEMOGfxMemPool*           g_pSSAOCbResolveTextures[2];
#if NN_GFX_IS_TARGET_NVN
NVNimageHandle g_ShaderBlurImageHandle;
#endif
#if NN_GFX_IS_TARGET_GX
DEMOGfxMemPool* g_pSSAOResolveTextureExportPool;
nn::gfx::Buffer g_SSAOResolveTextureExportBuffer;
nn::gfx::DescriptorSlot g_SSAOResolveTextureExportBufferSlot;
#endif
#if NN_GFX_IS_TARGET_D3D
nn::gfx::Buffer g_SSAOResolveTextureExportBuffer;
nn::gfx::DescriptorSlot g_SSAOResolveTextureExportBufferSlot;
#endif

// Textures
nn::gfx::Texture  g_NoiseTexture;
nn::gfx::TextureView  g_NoiseTextureView;
nn::gfx::DescriptorSlot g_NoiseTextureSlot;
DEMOGfxMemPool* g_pNoiseTexturePool;
DEMOGfxBuffer g_NoiseTextureData;

DEMOGfxTexture g_TextureSkybox;
DEMOGfxTexture g_TextureConcrete;

// Samplers
nn::gfx::Sampler  g_SamplerPointWrap;
nn::gfx::Sampler  g_SamplerPointClamp;
nn::gfx::Sampler  g_SamplerLinearClamp;
nn::gfx::DescriptorSlot  g_SamplerPointWrapSlot;
nn::gfx::DescriptorSlot  g_SamplerPointClampSlot;
nn::gfx::DescriptorSlot  g_SamplerLinearClampSlot;

// SSAO
float   g_SampleKernel [MAX_KERNEL_SIZE][4];
s32     g_NoiseSize     = 8;
s32     g_SSAOWidth     = 0;
s32     g_SSAOHeight    = 0;
int     g_BlurAlignWidth = 0;
int     g_BlurAlignHeight = 0;

nn::gfx::ViewportScissorState g_SSAOViewportScissor;
void* g_pSSAOViewportScissorData;

// Camera
Point3d g_CamPos        = { -30, 30, 30 };
Point3d g_CamLookAt     = {   0,  0,  0 };
Vec     g_CamUp         = {   0,  1,  0 };
Mtx44   g_Model;
Mtx44   g_View;
Mtx44   g_Proj;

// Menu
s32     g_MenuHeaderActive = -1;
s32     g_MenuItemActive   = -1;
bool    g_MenuHidden       = false;

// Metrics/Timing
DEMOGfxGpuTimestamp g_Times[ 2 * _TIME_COUNT ];
f32 g_AvgTimes[_TIME_COUNT];
u32 g_FrameCount = 0;

// SSAO Blur Compute
DEMOGfxBuffer g_BlurUniform;
#if NN_GFX_IS_TARGET_GX
DEMOGfxBuffer g_BlurDispatchBuffer;
#endif

//==============================================================================
//  INLINE HELPER FUNCTIONS
//==============================================================================

inline
f32 fRand( f32 Min, f32 Max )
{
    static const f32 s_RandMax = 0x7fff;
    static const f32 s_RandMaxI = 1.0f / s_RandMax;

    return( ( ( ( f32 ) DEMORand() * s_RandMaxI ) * ( Max - Min ) ) + Min );
}

inline
f32 Lerp( f32 a, f32 b, f32 t )
{
    return( a + (b - a) * t );
}

inline
Vec VecMake( f32 x, f32 y, f32 z )
{
    Vec v = { x, y, z };
    return v;
}

inline
f32 Sqr( f32 x )
{
    return x * x;
}

inline
s16 F32toS16( f32 f )
{
#if NN_GFX_IS_TARGET_GX
    return nn::util::reverse_bytes< s16 >( static_cast< s16 >( -32768 + ((f * 0.5f + 0.5f ) * 65535) ) );
#else
    return static_cast< s16 >( -32768 + ((f * 0.5f + 0.5f ) * 65535) );
#endif
}

inline
f32 GpuTicksToMilliseconds(u64 tick)
{
    return DEMOGfxGpuTimestamp::TicksToMicroseconds( tick ) / 1000.0f;
}

//==============================================================================
//  INLINE HELPER FUNCTIONS
//==============================================================================

void CameraSetup()
{
    f32   pers = CAM_FOV;
    f32 aspect = (f32)SURFACE_WIDTH / (f32)SURFACE_HEIGHT;
    f32  znear = CAM_ZNEAR;
    f32   zfar = CAM_ZFAR;

    // Compute perspective matrix
    MTXPerspective(g_Proj, pers, aspect, znear, zfar);

    const Vec kUp = { 0.0f, 1.0f, 0.0f };

    // Compute lookAt matrix
    Mtx matLookAt;
    MTXLookAt(matLookAt, &g_CamPos, &kUp, &g_CamLookAt );
    MTX34To44(matLookAt, g_View );
}

//--------------------------------------------------------------------------

bool LoadMesh( const char* pFilename, Mesh* pMesh )
{
    nn::gfx::MemoryPool::InfoType memoryPoolInfo;
    u32 Len;
    u8* pFileData = (u8*)DEMOGfxLoadAssetFile(pFilename, &Len);
    u8* pOrigFileData = pFileData;

#if !NN_GFX_IS_TARGET_GX
    // Data is big-endian (Cafe)
    DEMOSwapBuffer32( pFileData, Len );
#endif

    MeshData* pMeshData = reinterpret_cast< MeshData* >( pFileData );
    pMesh->VertexCount = pMeshData->VertexCount;
    pMesh->NormalCount = pMeshData->NormalCount;
    pMesh->TexCoordCount = pMeshData->TexCoordCount;
    pMesh->Indices = pMeshData->Indices;
    pFileData        += sizeof(MeshData);

    u32 size;

    size = sizeof(f32) * 4 * pMesh->VertexCount;
    if ( size )
    {
        pMesh->vertexBuffer.Initialize( size, pFileData, nn::gfx::GpuAccess_VertexBuffer, 0 );
    }
    pFileData += size;

    size = sizeof(f32) * 4 * pMesh->NormalCount;
    if ( size )
    {
        pMesh->normalBuffer.Initialize( size, pFileData, nn::gfx::GpuAccess_VertexBuffer, 0 );
    }
    pFileData += size;

    size = sizeof(f32) * 2 * pMesh->TexCoordCount;
    if ( size )
    {
        pMesh->texCoordBuffer.Initialize( size, pFileData, nn::gfx::GpuAccess_VertexBuffer, 0 );
    }
    pFileData += size;

    size = sizeof(u32) * pMesh->Indices;
    if ( size )
    {
        pMesh->indexBuffer.Initialize( size, pFileData, nn::gfx::GpuAccess_IndexBuffer, 0 );
    }

    DEMOFree( pOrigFileData );

    return true;
}

void FreeMesh( Mesh* pMesh )
{
    if ( pMesh->VertexCount )
    {
        pMesh->vertexBuffer.Finalize();
    }

    if ( pMesh->NormalCount )
    {
        pMesh->normalBuffer.Finalize();
    }

    if ( pMesh->TexCoordCount )
    {
        pMesh->texCoordBuffer.Finalize();
    }

    if ( pMesh->Indices )
    {
        pMesh->indexBuffer.Finalize();
    }
}

//--------------------------------------------------------------------------

bool LoadComputeShader( const char* pFilename, ShaderData* pShaderOut )
{
    strcpy( pShaderOut->Filename, pFilename );

    DEMOGfxLoadShadersFromFile( &pShaderOut->demoPipeline.shaders, 0, pFilename );

    nn::gfx::ComputePipelineInfo info;
    info.SetDefault();
    info.SetShaderPtr( pShaderOut->demoPipeline.shaders.GetShader() );
    pShaderOut->computePipeline.Initialize( &DEMODevice, info );

    // Locate CS Uniforms.
    for( s32 i = 0; i < _CS_UNIFORM_COUNT; ++i )
    {
        const char* pName = kCSUniformNames[i];
        ASSERT( pName != NULL );
        pShaderOut->CSUniformLocs[ i ] = pShaderOut->demoPipeline.shaders.GetInterfaceSlot(
            nn::gfx::ShaderStage_Compute, nn::gfx::ShaderInterfaceType_ConstantBuffer, pName );
    }

    // Locate CS Samplers.
    for( s32 i = 0; i < _CS_SAMPLER_COUNT; ++i )
    {
        const char* pName = kCSSamplerNames[i];
        ASSERT( pName != NULL );
        pShaderOut->CSSamplerLocs[ i ] = pShaderOut->demoPipeline.shaders.GetInterfaceSlot(
            nn::gfx::ShaderStage_Compute, nn::gfx::ShaderInterfaceType_Sampler, pName );
    }

#if !NN_GFX_IS_TARGET_GX
    pShaderOut->CSSamplerLocs[ CS_SAMPLER_DST_IMAGE ] = pShaderOut->demoPipeline.shaders.GetInterfaceSlot(
        nn::gfx::ShaderStage_Compute, nn::gfx::ShaderInterfaceType_Image, "dstImage" );
#endif

    pShaderOut->bComputeShader = true;
    return true;
}

//--------------------------------------------------------------------------

bool LoadShader( const char* pFilename, ShaderData* pShaderOut, size_t vsUniformSize, size_t psUniformSize )
{
    strcpy( pShaderOut->Filename, pFilename );

    DEMOGfxLoadShadersFromFile( &pShaderOut->demoPipeline.shaders, 0, pFilename );

    // Locate Attributes.
    s32 AttribCount = 0;
    for( s32 i = 0; i < _ATTRIBUTE_COUNT; ++i )
    {
        const char* pName      = kAttributeNames[i];
        nn::gfx::AttributeFormat Format = kAttributeFormats[i];
        ASSERT( pName != NULL );

        pShaderOut->AttrLocs[i] = pShaderOut->demoPipeline.shaders.GetInterfaceSlot(nn::gfx::ShaderStage_Vertex,
            nn::gfx::ShaderInterfaceType_Input, pName );

        if( pShaderOut->AttrLocs[i] != -1 )
        {
            DEMOGfxInitShaderAttribute( &pShaderOut->demoPipeline.shaders,
                pName, i, 0, Format );
            DEMOGfxInitShaderVertexBuffer( &pShaderOut->demoPipeline.shaders,
                i, static_cast< u32 >( kAttributeFormatSizes[ i ] ), 0 );
            AttribCount++;
        }
    }

    // Locate VS Uniforms.
    pShaderOut->VSUniformLocs[0] = pShaderOut->demoPipeline.shaders.GetInterfaceSlot(
        nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_ConstantBuffer, "VSUniforms" );

    // Locate PS Uniforms.
    pShaderOut->PSUniformLocs[0] = pShaderOut->demoPipeline.shaders.GetInterfaceSlot(
        nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_ConstantBuffer, "PSUniforms" );

    // Locate PS Samplers.
    for( s32 i = 0; i < _PS_SAMPLER_COUNT; ++i )
    {
        const char* pName = kPSSamplerNames[i];
        ASSERT( pName != NULL );
        pShaderOut->PSSamplerLocs[i] = pShaderOut->demoPipeline.shaders.GetInterfaceSlot(
            nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_Sampler, pName );
    }

    // Allocate the uniform blocks
    pShaderOut->vsUniformSize = static_cast< s32 >( vsUniformSize );
    pShaderOut->psUniformSize = static_cast< s32 >( psUniformSize );
    pShaderOut->vsUniformBlock.Initialize( vsUniformSize, NULL, nn::gfx::GpuAccess_ConstantBuffer, 0 );
    pShaderOut->psUniformBlock.Initialize( psUniformSize, NULL, nn::gfx::GpuAccess_ConstantBuffer, 0 );

    // Create the pipeline
    pShaderOut->demoPipeline.Initialize( &DEMODevice );

    pShaderOut->bComputeShader = false;
    return true;
}

//--------------------------------------------------------------------------

void FreeShader( ShaderData* pShader )
{
    if ( pShader->bComputeShader )
    {
        pShader->computePipeline.Finalize( &DEMODevice );
        DEMOGfxFreeShaders( &pShader->demoPipeline.shaders );
    }
    else
    {
        pShader->demoPipeline.Finalize( &DEMODevice );
        pShader->vsUniformBlock.Finalize();
        pShader->psUniformBlock.Finalize();
    }
}

//--------------------------------------------------------------------------

void ReloadShader( ShaderData* pShader )
{
    FreeShader( pShader );

    if ( pShader->bComputeShader )
    {
        LoadComputeShader( pShader->Filename, pShader );
    }
    else
    {
        LoadShader( pShader->Filename, pShader, pShader->vsUniformSize, pShader->psUniformSize );
    }
}

//--------------------------------------------------------------------------

void GenerateSampleKernel()
{
    //
    // We want a random sampling within a normal-aligned unit hemisphere.
    //

    s32 KernelSize = (s32)g_MenuData[MENU_KERNEL].Value;
    for (s32 i = 0; i < KernelSize; ++i)
    {
        Vec K = { fRand(-1.0f, 1.0f),
                  fRand(-1.0f, 1.0f),
                  fRand( 0.0f, 1.0f) };

        Vec Kn;
        VECNormalize( &K, &Kn );

        // Scale to within the hemisphere.
        Vec Ks;
        VECScale( &Kn, &Ks,  fRand( 0.0f, 1.0f ) );

        // We also want the distance to the origin to falloff as we generate more points.
        f32 Scalar = (f32)(i + 1) / (f32)KernelSize;
        Scalar = Lerp( 0.1f, 1.0f, Scalar * Scalar );

        VECScale( &Ks, (Vec*)&g_SampleKernel[i][0], Scalar );
        g_SampleKernel[i][3] = 0.0f;
    }
}

//--------------------------------------------------------------------------

void DestroyNoise()
{
    g_NoiseTextureView.Finalize( &DEMODevice );
    g_NoiseTexture.Finalize( &DEMODevice );
    g_pNoiseTexturePool->Finalize();
    delete g_pNoiseTexturePool;
    g_pNoiseTexturePool = NULL;

    g_NoiseTextureData.Finalize();
}

//--------------------------------------------------------------------------

void GenerateNoise()
{
    s32 Size = std::max( 8, g_NoiseSize );

    DEMOGfxSetupTextureBuffer( &g_NoiseTexture, NULL, &g_NoiseTextureSlot, NULL, NULL,
        &g_pNoiseTexturePool, Size, Size, 1, 1, nn::gfx::ImageDimension_2d,
        nn::gfx::ImageFormat_R16_G16_Snorm, nn::gfx::DepthStencilFetchMode_DepthComponent, 0 );

    // Special setup so that missing components are selected correctly.
    {
        nn::gfx::TextureView::InfoType info;
        info.SetDefault();
        info.SetChannelMapping( nn::gfx::ChannelMapping_Red, nn::gfx::ChannelMapping_Green, nn::gfx::ChannelMapping_Zero, nn::gfx::ChannelMapping_One );
        info.SetImageDimension( nn::gfx::ImageDimension_2d );
        info.SetImageFormat( nn::gfx::ImageFormat_R16_G16_Snorm );
        info.SetTexturePtr( &g_NoiseTexture );
        g_NoiseTextureView.Initialize( &DEMODevice, info );

        int index = DEMOGfxRegisterTextureView( &g_NoiseTextureView );
        DEMOTextureDescriptorPool.GetDescriptorSlot( &g_NoiseTextureSlot, index );
    }

    s32 N = Size*Size;
    s32 p = 0;
    g_NoiseTextureData.Initialize( N * 2 * sizeof( s16 ), NULL, nn::gfx::GpuAccess_Read, 0 );
    s16* pWrite = g_NoiseTextureData.Map< s16 >();
    for( s32 i = 0; i < N; ++i )
    {
        // Noise is just some random XY-normalized vectors.
        Vec V = { fRand( -1.0f, 1.0f ),
                  fRand( -1.0f, 1.0f ),
                  0.0f };
        Vec Vn;
        VECNormalize( &V, &Vn );

        // Store as Signed integers since floating-point targets are slow.
        pWrite[p++] = F32toS16(Vn.x);
        pWrite[p++] = F32toS16(Vn.y);
    }

#if NN_GFX_IS_TARGET_GX
    GX2TileTexture( g_NoiseTextureView.ToData()->pGx2Texture, pWrite );
#elif NN_GFX_IS_TARGET_D3D
    // Not flip the image
#else
    // Flip the image
    for ( int row = 0; row < Size / 2; row++ )
    {
        for ( int col = 0; col < Size; col++ )
        {
            uint32_t swapVal = reinterpret_cast< uint32_t* >( pWrite )[ row * Size + col ];
            reinterpret_cast< uint32_t* >( pWrite )[ row * Size + col ] = reinterpret_cast< uint32_t* >( pWrite )[ (Size - row - 1) * Size + col ];
            reinterpret_cast< uint32_t* >( pWrite )[ (Size - row - 1) * Size + col ] = swapVal;
        }
    }
#endif

    g_NoiseTextureData.Unmap();

#if !NN_GFX_IS_TARGET_GX
    nn::gfx::TextureCopyRegion dstRegion;
    dstRegion.SetDefault();
    dstRegion.SetWidth( Size );
    dstRegion.SetHeight( Size );
    DEMOCommandBuffer.Begin();
    DEMOCommandBuffer.CopyBufferToImage( &g_NoiseTexture, dstRegion, &g_NoiseTextureData.buffer, 0 );
    DEMOCommandBuffer.End();
    DEMOQueue.ExecuteCommand( &DEMOCommandBuffer, NULL );
    DEMOQueue.Sync();
#endif
}

//--------------------------------------------------------------------------

void CreateComputeBuffers()
{
#if NN_GFX_IS_TARGET_GX
    uint32_t data[] = {
        g_SSAOWidth / g_BlurAlignWidth,
        g_SSAOHeight / g_BlurAlignHeight,
        1,
        0,
    };
    g_BlurDispatchBuffer.Initialize( sizeof( uint32_t ) * 4, data, nn::gfx::GpuAccess_IndirectBuffer, 0 );
#endif
    g_BlurUniform.Initialize( sizeof( SSAOBlurUniform ), NULL, nn::gfx::GpuAccess_ConstantBuffer, 0 );

}

//--------------------------------------------------------------------------

void CreateRenderTargets()
{
    // Make sure all memory accesses are done
    DEMOQueue.Sync();

    // Normal Color Buffer.
    DEMOGfxSetupTextureBuffer( &g_NormalResolveTexture, &g_NormalResolveTextureView, &g_NormalResolveTextureSlot,
        &g_NormalCB, NULL,
        &g_pNormalCBResolveTexturePool, g_SSAOWidth, g_SSAOHeight, 1, 1, nn::gfx::ImageDimension_2d,
        nn::gfx::ImageFormat_R8_G8_B8_A8_Snorm, nn::gfx::DepthStencilFetchMode_DepthComponent, 0 );

    // Depth Buffer.
    DEMOGfxSetupTextureBuffer( &g_DepthResolveTexture, &g_DepthResolveTextureView, &g_DepthResolveTextureSlot, NULL, &g_DepthBuffer,
        &g_pDepthBufferResolveTexturePool, g_SSAOWidth, g_SSAOHeight, 1, 1, nn::gfx::ImageDimension_2d,
        nn::gfx::ImageFormat_D32_Float, nn::gfx::DepthStencilFetchMode_DepthComponent, 0 );

#if NN_GFX_IS_TARGET_GX
    // FIXME: Turn off hiz for debugging!!!!
    GX2DepthBuffer* pDepth = g_DepthBuffer.ToData()->pGx2DepthBuffer;
    GX2InitDepthBufferHiZPtr( pDepth, NULL );
    GX2InitDepthBufferHiZEnable( pDepth, GX2_FALSE );
    GX2InitDepthBufferRegs( pDepth );
#endif

    // SSAO/Blur Buffers.
#if NN_GFX_IS_TARGET_GX
    // Setup the first resolve texture like normal
    DEMOGfxSetupTextureBuffer( &g_SSAOResolveTextures[ 0 ], &g_SSAOResolveTextureViews[ 0 ], &g_SSAOResolveTextureSlots[ 0 ], &g_SSAOCb[ 0 ], NULL,
        &g_pSSAOCbResolveTextures[ 0 ], g_SSAOWidth, g_SSAOHeight, 1, 1, nn::gfx::ImageDimension_2d,
        nn::gfx::ImageFormat_R32_Float, nn::gfx::DepthStencilFetchMode_DepthComponent, 0 );

    // Setup the second resolve texture with some extra rules for export buffers
    {
        nn::gfx::Texture::InfoType textureInfo;
        textureInfo.SetDefault();
        textureInfo.SetWidth( g_SSAOWidth );
        textureInfo.SetHeight( g_SSAOHeight );
        textureInfo.SetImageStorageDimension( nn::gfx::ImageStorageDimension_2d );
        textureInfo.SetImageFormat( nn::gfx::ImageFormat_R32_Float );
        textureInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_Texture );
        textureInfo.SetTileMode( nn::gfx::TileMode_Linear );

        size_t alignment = nn::gfx::Texture::CalculateMipDataAlignment( &DEMODevice, textureInfo );
        size_t imageSize = nn::gfx::Texture::CalculateMipDataSize( &DEMODevice, textureInfo );

        nn::gfx::Buffer::InfoType bufferInfo;
        bufferInfo.SetDefault();
        bufferInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_UnorderedAccessBuffer | nn::gfx::GpuAccess_Texture );
        bufferInfo.SetSize( imageSize );
        alignment = std::max( alignment, nn::gfx::Buffer::GetBufferAlignment( &DEMODevice, bufferInfo ) );

        g_pSSAOResolveTextureExportPool = DEMOGfxSharedPool->AllocSubPool( imageSize, alignment );

        // Create the texture and point it at the shared pool
        g_SSAOResolveTextures[ 1 ].Initialize( &DEMODevice, textureInfo,
            g_pSSAOResolveTextureExportPool->GetPool(),
            g_pSSAOResolveTextureExportPool->GetBaseOffset(),
            g_pSSAOResolveTextureExportPool->GetSize() );

        g_SSAOResolveTextureExportBuffer.Initialize(&DEMODevice, bufferInfo,
            g_pSSAOResolveTextureExportPool->GetPool(),
            g_pSSAOResolveTextureExportPool->GetBaseOffset(),
            g_pSSAOResolveTextureExportPool->GetSize() );

        nn::gfx::GpuAddress gpuAddress;
        g_SSAOResolveTextureExportBuffer.GetGpuAddress( &gpuAddress );
        int slot = DEMOGfxRegisterBufferView( gpuAddress, imageSize );
        DEMOBufferViewDescriptorPool.GetDescriptorSlot( &g_SSAOResolveTextureExportBufferSlot, slot );

        DEMOGfxSetupTextureView( &g_SSAOResolveTextures[ 1 ], &g_SSAOResolveTextureViews[ 1 ],
            &g_SSAOResolveTextureSlots[ 1 ], nn::gfx::ImageDimension_2d,
            nn::gfx::ImageFormat_R32_Float, nn::gfx::DepthStencilFetchMode_DepthComponent );

        DEMOGfxSetupColorView( &g_SSAOResolveTextures[ 1 ], &g_SSAOCb[1], nn::gfx::ImageDimension_2d,
            nn::gfx::ImageFormat_R32_Float );
    }
#elif NN_GFX_IS_TARGET_D3D
    // Setup the first resolve texture like normal
    DEMOGfxSetupTextureBuffer( &g_SSAOResolveTextures[ 0 ], &g_SSAOResolveTextureViews[ 0 ], &g_SSAOResolveTextureSlots[ 0 ], &g_SSAOCb[ 0 ], NULL,
        &g_pSSAOCbResolveTextures[ 0 ], g_SSAOWidth, g_SSAOHeight, 1, 1, nn::gfx::ImageDimension_2d,
        nn::gfx::ImageFormat_R32_Float, nn::gfx::DepthStencilFetchMode_DepthComponent, 0 );

    // Setup the second resolve texture with some extra rules for export buffers
    {
        nn::gfx::Texture::InfoType textureInfo;
        textureInfo.SetDefault();
        textureInfo.SetWidth( g_SSAOWidth );
        textureInfo.SetHeight( g_SSAOHeight );
        textureInfo.SetImageStorageDimension( nn::gfx::ImageStorageDimension_2d );
        textureInfo.SetImageFormat( nn::gfx::ImageFormat_R32_Float );
        textureInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_ColorBuffer );
        textureInfo.SetTileMode( nn::gfx::TileMode_Linear );

        size_t alignment = nn::gfx::Texture::CalculateMipDataAlignment( &DEMODevice, textureInfo );
        size_t imageSize = nn::gfx::Texture::CalculateMipDataSize( &DEMODevice, textureInfo );

        nn::gfx::Buffer::InfoType bufferInfo;
        bufferInfo.SetDefault();
        bufferInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_UnorderedAccessBuffer | nn::gfx::GpuAccess_Texture );
        bufferInfo.SetSize( imageSize );
        alignment = std::max( alignment, nn::gfx::Buffer::GetBufferAlignment( &DEMODevice, bufferInfo ) );

        // Create the texture and point it at the shared pool
        g_SSAOResolveTextures[ 1 ].Initialize( &DEMODevice, textureInfo,
            NULL,
            0,
            imageSize );

        g_SSAOResolveTextureExportBuffer.Initialize(&DEMODevice, bufferInfo,
            NULL,
            0,
            imageSize );

        nn::gfx::GpuAddress gpuAddress;
        g_SSAOResolveTextureExportBuffer.GetGpuAddress( &gpuAddress );
        int slot = DEMOGfxRegisterBufferView( gpuAddress, imageSize );
        DEMOBufferViewDescriptorPool.GetDescriptorSlot( &g_SSAOResolveTextureExportBufferSlot, slot );

        DEMOGfxSetupTextureView( &g_SSAOResolveTextures[ 1 ], &g_SSAOResolveTextureViews[ 1 ],
            &g_SSAOResolveTextureSlots[ 1 ], nn::gfx::ImageDimension_2d,
            nn::gfx::ImageFormat_R32_Float, nn::gfx::DepthStencilFetchMode_DepthComponent );

        DEMOGfxSetupColorView( &g_SSAOResolveTextures[ 1 ], &g_SSAOCb[1], nn::gfx::ImageDimension_2d,
            nn::gfx::ImageFormat_R32_Float );
    }
#else
    for( s32 i =0; i < 2; ++i )
    {
        DEMOGfxSetupTextureBuffer( &g_SSAOResolveTextures[ i ], &g_SSAOResolveTextureViews[ i ],
            &g_SSAOResolveTextureSlots[ i ], &g_SSAOCb[ i ], NULL,
            &g_pSSAOCbResolveTextures[ i ], g_SSAOWidth, g_SSAOHeight, 1, 1, nn::gfx::ImageDimension_2d,
            nn::gfx::ImageFormat_R32_Float, nn::gfx::DepthStencilFetchMode_DepthComponent, 0 );
    }
#endif
#if NN_GFX_IS_TARGET_NVN
    // Register the view again to get a different image handle (NVN requires this)
    int imageID = DEMOGfxRegisterTextureView( &g_SSAOResolveTextureViews[ 1 ] );
    DEMOTextureDescriptorPool.BeginUpdate();
    nvnTexturePoolRegisterImage( DEMOTextureDescriptorPool.ToData()->pDescriptorPool, imageID,
        g_SSAOResolveTextureViews[ 1 ].ToData()->pNvnTexture, g_SSAOResolveTextureViews[ 1 ].ToData()->pNvnTextureView );
    DEMOTextureDescriptorPool.EndUpdate();
    g_ShaderBlurImageHandle = nvnDeviceGetImageHandle( DEMODevice.ToData()->pNvnDevice, imageID );
#endif

    DEMOGfxSetViewportScissorState( &g_SSAOViewportScissor, &g_pSSAOViewportScissorData, 0.0f, 0.0f,
        static_cast< float >( g_SSAOWidth ), static_cast< float >( g_SSAOHeight ), 0.0f, 1.0f,
        static_cast< float >( g_SSAOHeight ), true );
} // NOLINT(impl/function_size)

//--------------------------------------------------------------------------

static void DestroyRenderTargets()
{
    g_NormalResolveTextureView.Finalize( &DEMODevice );
    g_NormalCB.Finalize( &DEMODevice );
    g_NormalResolveTexture.Finalize( &DEMODevice );
    g_pNormalCBResolveTexturePool->Finalize();
    delete g_pNormalCBResolveTexturePool;
    g_pNormalCBResolveTexturePool = NULL;

    for ( int i = 0; i < 2; i++ )
    {
        g_SSAOCb[ i ].Finalize( &DEMODevice );
        g_SSAOResolveTextureViews[ i ].Finalize( &DEMODevice );
        g_SSAOResolveTextures[ i ].Finalize( &DEMODevice );
        g_pSSAOCbResolveTextures[ i ]->Finalize();
        delete g_pSSAOCbResolveTextures[ i ];
        g_pSSAOCbResolveTextures[ i ] = NULL;
    }

    g_DepthBuffer.Finalize( &DEMODevice );
    g_DepthResolveTextureView.Finalize( &DEMODevice );
    g_DepthResolveTexture.Finalize( &DEMODevice );
    g_pDepthBufferResolveTexturePool->Finalize();
    delete g_pDepthBufferResolveTexturePool;
    g_pDepthBufferResolveTexturePool = NULL;

    g_SSAOViewportScissor.Finalize( &DEMODevice );
    DEMOGfxFreeMEM2( g_pSSAOViewportScissorData );
    g_pSSAOViewportScissorData = NULL;

#if NN_GFX_IS_TARGET_GX
    g_SSAOResolveTextureExportBuffer.Finalize( &DEMODevice );
    g_pSSAOResolveTextureExportPool->Finalize();
    delete g_pSSAOResolveTextureExportPool;
    g_pSSAOResolveTextureExportPool = NULL;
#endif
}

//--------------------------------------------------------------------------

// Wrapper functions that allow unused variables to be ignored when debugging shaders.

//--------------------------------------------------------------------------

void DEBUGSetPixelTextureAndSampler( const nn::gfx::DescriptorSlot& textureSlot, nn::gfx::DescriptorSlot& samplerSlot, s32 UnitNumber )
{
#if ALLOW_MISSING_UNIFORMS
    if( UnitNumber == -1 )
        return;
#endif
    DEMOCommandBuffer.SetTextureAndSampler( UnitNumber, nn::gfx::ShaderStage_Pixel, textureSlot, samplerSlot );
}

void DrawMesh( Mesh* pMesh, ShaderData* pShader )
{
    // Setup Attributes.
    for( s32 i = 0; i < _ATTRIBUTE_COUNT; ++i )
    {
        if( pShader->AttrLocs[i] != -1 )
        {
            switch( i )
            {
                case ATTRIBUTE_POSITION:
                {
                    DEMOCommandBuffer.SetVertexBuffer( i, pMesh->vertexBuffer.gpuAddress, sizeof( f32 ) * 4, pMesh->VertexCount * sizeof( f32 ) * 4 );
                    break;
                }

                case ATTRIBUTE_NORMAL:
                {
                    DEMOCommandBuffer.SetVertexBuffer( i, pMesh->normalBuffer.gpuAddress, sizeof( f32 ) * 4, pMesh->NormalCount * sizeof( f32 ) * 4 );
                    break;
                }

                case ATTRIBUTE_TEXCOORD:
                {
                    DEMOCommandBuffer.SetVertexBuffer( i, pMesh->texCoordBuffer.gpuAddress, sizeof( f32 ) * 2, pMesh->TexCoordCount * sizeof( f32 ) * 2 );
                    break;
                }

                default: break;
            }
        }
    }

    if ( pShader->VSUniformLocs[ 0 ] >= 0 )
    {
        DEMOCommandBuffer.SetConstantBuffer( pShader->VSUniformLocs[ 0 ], nn::gfx::ShaderStage_Vertex,
            pShader->vsUniformBlock.gpuAddress, pShader->vsUniformBlock.size );
    }
    if ( pShader->PSUniformLocs[ 0 ] >= 0 )
    {
        DEMOCommandBuffer.SetConstantBuffer( pShader->PSUniformLocs[ 0 ], nn::gfx::ShaderStage_Pixel,
            pShader->psUniformBlock.gpuAddress, pShader->psUniformBlock.size );
    }

    DEMOCommandBuffer.DrawIndexed( nn::gfx::PrimitiveTopology_TriangleList,
        nn::gfx::IndexFormat_Uint32, pMesh->indexBuffer.gpuAddress, pMesh->Indices, 0 );
}

//--------------------------------------------------------------------------

void DrawSceneMeshes( ShaderData* pShader )
{
    // Room.
    {
        DEBUGSetPixelTextureAndSampler( g_TextureSkybox.GetDescriptorSlot(0), g_SamplerLinearClampSlot, g_ShaderObj.PSSamplerLocs[PS_SAMPLER_SKYBOX] );
        DEBUGSetPixelTextureAndSampler( g_TextureConcrete.GetDescriptorSlot(0), g_SamplerLinearClampSlot, g_ShaderObj.PSSamplerLocs[PS_SAMPLER_ALBEDO] );
        DEBUGSetPixelTextureAndSampler( g_SSAOResolveTextureSlots[1], g_SamplerLinearClampSlot, g_ShaderObj.PSSamplerLocs[PS_SAMPLER_LIGHT] );

        DrawMesh( &g_MeshRoom, pShader );
    }
}

//--------------------------------------------------------------------------

void DrawDepthAndNormals()
{
    DEMOGfxDebugTagIndent("DrawDepthAndNormals");

    // Setup Render Targets.
    DEMOCommandBuffer.ClearColor( &g_NormalCB, 0.0f, 0.0f, 0.0f, 1.0f, NULL );
    DEMOCommandBuffer.ClearDepthStencil( &g_DepthBuffer, 1.0f, 0, nn::gfx::DepthStencilClearMode_Depth, NULL );

    nn::gfx::ColorTargetView* renderTargets[] = { &g_NormalCB };
    DEMOCommandBuffer.SetRenderTargets( 1, renderTargets, &g_DepthBuffer );

    // Setup Viewport/Stencil.
    DEMOCommandBuffer.SetViewportScissorState( &g_SSAOViewportScissor );

#if NN_GFX_IS_TARGET_GX
    GX2SetShaderMode( GX2_SHADER_MODE_UNIFORM_BLOCK );
#endif

    MTX44Identity( g_Model );

    // Setup Shaders.
    DEMOCommandBuffer.SetPipeline( &g_ShaderEncNormals.demoPipeline.pipeline );

    ShaderEncNormalVsUniforms* pVsUniforms = g_ShaderEncNormals.vsUniformBlock.Map< ShaderEncNormalVsUniforms >();
    memcpy( pVsUniforms->uModelMtx, g_Model, sizeof( g_Model ) );
    memcpy( pVsUniforms->uViewMtx, g_View, sizeof( g_View ) );
    memcpy( pVsUniforms->uProjMtx, g_Proj, sizeof( g_Proj ) );
#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap( pVsUniforms, sizeof(*pVsUniforms));
#endif
    g_ShaderEncNormals.vsUniformBlock.Unmap();

    g_Times[ ( TIME_NORMAL_DEPTH * 2 ) + 0 ].QueryTimestamp();
    DrawSceneMeshes( &g_ShaderEncNormals );
    g_Times[ ( TIME_NORMAL_DEPTH * 2 ) + 1 ].QueryTimestamp();

    DEMOCommandBuffer.FlushMemory( nn::gfx::GpuAccess_DepthStencil );
    DEMOCommandBuffer.SetTextureStateTransition( &g_DepthResolveTexture, NULL, nn::gfx::TextureState_DepthWrite, nn::gfx::ShaderStageBit_Pixel,
        nn::gfx::TextureState_DepthRead, nn::gfx::ShaderStageBit_Pixel );

#if 0 //NN_GFX_IS_TARGET_GX
    // FIXME: Currently SetTextureStateTransition is a no-op so we need to expand ourselves
    GX2UTSetExpandDepthState( GX2_TRUE );
    GX2UTExpandDepthBuffer( g_DepthBuffer.ToData()->pGx2DepthBuffer );
    GX2UTSetExpandDepthState( GX2_FALSE );

    GX2SetShaderMode( GX2_SHADER_MODE_UNIFORM_BLOCK );
#endif

    DEMOGfxDebugTagUndent();
}

//--------------------------------------------------------------------------

void DrawSSAO()
{
    DEMOGfxDebugTagIndent("DrawSSAO");

    if( g_MenuData[MENU_BLUR_ENABLE].Value != 0 )
    {
        DEMOCommandBuffer.ClearColor( &g_SSAOCb[0], 0.0f, 0.0f, 0.0f, 1.0f, NULL );

        DEMOCommandBuffer.SetTextureStateTransition( &g_SSAOResolveTextures[ 0 ], NULL,
            nn::gfx::TextureState_Clear, nn::gfx::ShaderStageBit_All,
            nn::gfx::TextureState_ColorTarget, nn::gfx::ShaderStageBit_All );

        nn::gfx::ColorTargetView* renderTargets[] = { &g_SSAOCb[ 0 ] };
        DEMOCommandBuffer.SetRenderTargets( 1, renderTargets, NULL );
    }
    else
    {
        DEMOCommandBuffer.SetTextureStateTransition( &g_SSAOResolveTextures[ 1 ], NULL,
            nn::gfx::TextureState_ShaderRead, nn::gfx::ShaderStageBit_All,
            nn::gfx::TextureState_Clear, nn::gfx::ShaderStageBit_All );

        DEMOCommandBuffer.ClearColor( &g_SSAOCb[1], 0.0f, 0.0f, 0.0f, 1.0f, NULL );

        DEMOCommandBuffer.SetTextureStateTransition( &g_SSAOResolveTextures[ 1 ], NULL,
            nn::gfx::TextureState_Clear, nn::gfx::ShaderStageBit_All,
            nn::gfx::TextureState_ColorTarget, nn::gfx::ShaderStageBit_All );

        // Draw directly to 1
        nn::gfx::ColorTargetView* renderTargets[] = { &g_SSAOCb[ 1 ] };
        DEMOCommandBuffer.SetRenderTargets( 1, renderTargets, NULL );
    }

    DEMOCommandBuffer.SetViewportScissorState( &g_SSAOViewportScissor );

#if NN_GFX_IS_TARGET_GX
    GX2SetShaderMode(GX2_SHADER_MODE_UNIFORM_BLOCK);
#endif

    // Setup Shaders.
    DEMOCommandBuffer.SetPipeline( &g_ShaderSSAO.demoPipeline.pipeline );

    // Setup Uniforms.
    float TanHalfFOV = tanf( 0.5f * MTXDegToRad( CAM_FOV ) );
    float Aspect     = (f32)g_SSAOWidth / (f32)g_SSAOHeight;
    float ViewRay[4] =
    {
        TanHalfFOV * Aspect, TanHalfFOV, 0, 0,
    };

#if !NN_GFX_IS_TARGET_GX && !NN_GFX_IS_TARGET_D3D
    ViewRay[1] = -ViewRay[1];
#endif

    SSAOVsUniforms* pVsUniforms = g_ShaderSSAO.vsUniformBlock.Map< SSAOVsUniforms >();
    memcpy( &pVsUniforms->uViewRay, ViewRay, sizeof( float ) * 4 );
#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap( &pVsUniforms->uViewRay, sizeof( float ) * 4 );
#endif
    g_ShaderSSAO.vsUniformBlock.Unmap();

    float ProjectionCoeffs[] =
    {
        CAM_ZFAR / (CAM_ZFAR - CAM_ZNEAR),
        -(CAM_ZFAR * CAM_ZNEAR) / (CAM_ZFAR - CAM_ZNEAR),
    };

    float NoiseScale[] =
    {
        static_cast< float >( g_SSAOWidth ) / g_NoiseSize,
        static_cast< float >( g_SSAOHeight ) / g_NoiseSize,
    };

    s32 KernelSize = (s32)g_MenuData[MENU_KERNEL].Value;

    SSAOPsUniforms* pPsUniforms = g_ShaderSSAO.psUniformBlock.Map< SSAOPsUniforms >();
#if !NN_GFX_IS_TARGET_GX
    Mtx44 flipped;
    MTX44ScaleApply(g_Proj, flipped, 1.0, -1.0, 1.0);
    memcpy( pPsUniforms->uProjMtx, flipped, sizeof( g_Proj ) );
#else
    memcpy( pPsUniforms->uProjMtx, g_Proj, sizeof( g_Proj ) );
#endif
    memcpy( pPsUniforms->uKernelOffsets, g_SampleKernel, MAX_KERNEL_SIZE * 4 * sizeof( float ) );
    pPsUniforms->uKernelSize = KernelSize;
    memcpy( pPsUniforms->uProjCoeffs, ProjectionCoeffs, sizeof( float ) * 2 );
    memcpy( pPsUniforms->uNoiseScale, NoiseScale, sizeof( float ) * 2 );
    pPsUniforms->uSampleRadius = g_MenuData[ MENU_RADIUS ].Value;
    pPsUniforms->uDepthBias = g_MenuData[ MENU_DEPTH_SMOOTH ].Value;
    pPsUniforms->uPower = g_MenuData[ MENU_POWER ].Value;
#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap(pPsUniforms, sizeof(*pPsUniforms));
#endif
    g_ShaderSSAO.psUniformBlock.Unmap();

    DEBUGSetPixelTextureAndSampler( g_NormalResolveTextureSlot, g_SamplerPointClampSlot, g_ShaderSSAO.PSSamplerLocs[PS_SAMPLER_NORMALS] );
    DEBUGSetPixelTextureAndSampler( g_DepthResolveTextureSlot, g_SamplerPointClampSlot, g_ShaderSSAO.PSSamplerLocs[PS_SAMPLER_DEPTH] );

    DEMOCommandBuffer.FlushMemory( nn::gfx::GpuAccess_DepthStencil );
    DEMOCommandBuffer.InvalidateMemory( nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Texture );

    DEBUGSetPixelTextureAndSampler( g_NoiseTextureSlot, g_SamplerPointWrapSlot, g_ShaderSSAO.PSSamplerLocs[PS_SAMPLER_NOISE] );

    // Draw the full-screen quad.
    g_Times[ ( TIME_SSAO * 2 ) + 0 ].QueryTimestamp();
    DrawMesh( &g_MeshQuad, &g_ShaderSSAO );
    g_Times[ ( TIME_SSAO * 2 ) + 1 ].QueryTimestamp();

    if( g_MenuData[MENU_BLUR_ENABLE].Value != 0 )
    {
        DEMOCommandBuffer.SetTextureStateTransition( &g_SSAOResolveTextures[ 0 ], NULL,
            nn::gfx::TextureState_ColorTarget, nn::gfx::ShaderStageBit_All,
            nn::gfx::TextureState_ShaderRead, nn::gfx::ShaderStageBit_All );
    }
    else
    {
        DEMOCommandBuffer.SetTextureStateTransition( &g_SSAOResolveTextures[ 1 ], NULL,
            nn::gfx::TextureState_ColorTarget, nn::gfx::ShaderStageBit_All,
            nn::gfx::TextureState_ShaderRead, nn::gfx::ShaderStageBit_All );
    }

#if NN_GFX_IS_TARGET_D3D
    DEMOCommandBuffer.SetRenderTargets( 0, NULL, NULL );
#endif

    DEMOGfxDebugTagUndent();
}

//--------------------------------------------------------------------------

#if NN_GFX_IS_TARGET_GL
static void SetImageCallback( const void* )
{
    glBindImageTexture( g_ShaderBlur.CSSamplerLocs[ CS_SAMPLER_DST_IMAGE ],
        g_SSAOResolveTextureViews[ 1 ].ToData()->hTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32F );
}

#endif

//--------------------------------------------------------------------------

void BlurSSAO()
{
    DEMOGfxDebugTagIndent("BlurSSAO");

#if NN_GFX_IS_TARGET_GX
    // Set shader mode to compute shaders
    GX2SetShaderMode(GX2_SHADER_MODE_COMPUTE_SHADER);
#endif

    // Blur SSAO.
    DEMOCommandBuffer.SetPipeline( &g_ShaderBlur.computePipeline );

    // Fix uniform block setup for shader
    // FIXME: Should wait till this resource is no longer in use before changing
    // any data
    SSAOBlurUniform* pBlurUniform = g_BlurUniform.Map< SSAOBlurUniform >();
    pBlurUniform->dimensions.x = g_SSAOWidth;  // Src.x
    pBlurUniform->dimensions.y = g_SSAOHeight; // Src.y
    pBlurUniform->dimensions.z = g_SSAOWidth;  // Dst.x
    pBlurUniform->dimensions.w = g_SSAOHeight; // Dst.y
#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap(pBlurUniform, sizeof(SSAOBlurUniform));
#endif
    g_BlurUniform.Unmap();

    // Setup uniform blocks
    DEMOCommandBuffer.SetConstantBuffer( g_ShaderBlur.CSUniformLocs[ CS_UNIFORM_DIMENSIONS ], nn::gfx::ShaderStage_Compute,
        g_BlurUniform.gpuAddress, g_BlurUniform.size );

    // Setup textures
    DEMOCommandBuffer.SetTextureAndSampler( g_ShaderBlur.CSSamplerLocs[ CS_SAMPLER_SRC_TEXTURE ], nn::gfx::ShaderStage_Compute,
        g_SSAOResolveTextureSlots[ 0 ], g_SamplerLinearClampSlot );

    // Make sure the input texture has been flushed
    DEMOCommandBuffer.SetTextureStateTransition( &g_SSAOResolveTextures[ 0 ], NULL, nn::gfx::TextureState_ColorTarget, nn::gfx::ShaderStageBit_All,
        nn::gfx::TextureState_ShaderRead, nn::gfx::ShaderStageBit_All );

    // Setup output buffer
#if NN_GFX_IS_TARGET_GL
    DEMOCommandBuffer.Gl4SetUserCommand( SetImageCallback, NULL );
#elif NN_GFX_IS_TARGET_NVN
    nvnCommandBufferBindImage( DEMOCommandBuffer.ToData()->pNvnCommandBuffer, NVN_SHADER_STAGE_COMPUTE, g_ShaderBlur.CSSamplerLocs[ CS_SAMPLER_DST_IMAGE ], g_ShaderBlurImageHandle );
#elif NN_GFX_IS_TARGET_GX
    DEMOCommandBuffer.SetUnorderedAccessBuffer( 0, nn::gfx::ShaderStage_Compute, g_SSAOResolveTextureExportBufferSlot );
#elif NN_GFX_IS_TARGET_D3D
    DEMOCommandBuffer.SetUnorderedAccessBuffer( 0, nn::gfx::ShaderStage_Compute, g_SSAOResolveTextureExportBufferSlot );
#endif

    // Run the shader
    g_Times[ ( TIME_BLUR * 2 ) + 0 ].QueryTimestamp();
#if NN_GFX_IS_TARGET_GX
    DEMOCommandBuffer.DispatchIndirect( g_BlurDispatchBuffer.gpuAddress );
#else
    DEMOCommandBuffer.Dispatch(
        g_SSAOWidth / g_BlurAlignWidth,
        g_SSAOHeight / g_BlurAlignHeight,
        1 );
#endif
    g_Times[ ( TIME_BLUR * 2 ) + 1 ].QueryTimestamp();

#if NN_GFX_IS_TARGET_GX
    DEMOCommandBuffer.FlushMemory( nn::gfx::GpuAccess_UnorderedAccessBuffer );
    DEMOCommandBuffer.InvalidateMemory( nn::gfx::GpuAccess_Texture );
#elif NN_GFX_IS_TARGET_D3D

    // 以下の一連の処理は、Unordered Access ViewのTexture2Dにgfx on D3D11が対応していないため
    // マニュアルでバッファからテクスチャへのコピーを行います。

    // ここまでのコマンドを一旦フラッシュします。
    DEMOCommandBuffer.End();
    DEMOQueue.ExecuteCommand(&DEMOCommandBuffer, NULL);

    // 本パスの出力結果を、次のパスの入力テクスチャにコピーします。
    DEMOCommandBuffer.Begin();
    nn::gfx::TextureCopyRegion dstRegion;
    dstRegion.SetDefault();
    dstRegion.SetWidth(g_SSAOWidth);
    dstRegion.SetHeight(g_SSAOHeight);
    DEMOCommandBuffer.CopyBufferToImage( &g_SSAOResolveTextures[ 1 ], dstRegion, &g_SSAOResolveTextureExportBuffer, 0 );
    DEMOCommandBuffer.End();

    // コマンドの蓄積を再開します。
    DEMOQueue.ExecuteCommand(&DEMOCommandBuffer, NULL);
    DEMOCommandBuffer.Begin();

#else
    // Invalidate the export buffer
    DEMOCommandBuffer.SetTextureStateTransition( &g_SSAOResolveTextures[ 1 ], NULL, nn::gfx::TextureState_ShaderWrite, nn::gfx::ShaderStageBit_All,
        nn::gfx::TextureState_ShaderRead, nn::gfx::ShaderStageBit_All );
    DEMOCommandBuffer.InvalidateMemory( nn::gfx::GpuAccess_Texture );
#endif

    // Reset shader mode
#if NN_GFX_IS_TARGET_GX
    GX2SetShaderMode(GX2_SHADER_MODE_UNIFORM_BLOCK);
#endif

    DEMOGfxDebugTagUndent();
}

//--------------------------------------------------------------------------

void DrawObjects()
{
    DEMOGfxDebugTagIndent("DrawObjects");

    // Clear the color buffer
    DEMOCommandBuffer.ClearColor( DEMOGetColorBufferView(), 0.0f, 0.0f, 0.0f, 1.0f, NULL );

#if NN_GFX_IS_TARGET_GX
    GX2SetShaderMode(GX2_SHADER_MODE_UNIFORM_BLOCK);
#endif

    // Setup Viewport/Stencil.
    DEMOGfxSetDefaultViewportScissor();

    // Setup Render Targets.
    nn::gfx::ColorTargetView* renderTargets[] = { DEMOGetColorBufferView() };
    DEMOCommandBuffer.SetRenderTargets( 1, renderTargets, &DEMODepthBufferView );

    // Render sky...quad.
    {
        MTX44Identity( g_Model );

        Mtx44 ViewProj, InvViewProj;
        MTX44Concat( g_Proj, g_View, ViewProj );
        MTX44Inverse( ViewProj, InvViewProj );

        // Setup Shaders.
        DEMOCommandBuffer.SetPipeline( &g_ShaderSkybox.demoPipeline.pipeline );

        DEBUGSetPixelTextureAndSampler( g_TextureSkybox.GetDescriptorSlot(0), g_SamplerLinearClampSlot, g_ShaderSkybox.PSSamplerLocs[PS_SAMPLER_SKYBOX] );

        // Setup uniforms
        float* pInvViewProj = g_ShaderSkybox.vsUniformBlock.Map< float >();
        memcpy( pInvViewProj, InvViewProj, sizeof( InvViewProj ) );
#if NN_GFX_IS_TARGET_GX
        GX2EndianSwap( pInvViewProj, sizeof( InvViewProj ) );
#endif
        g_ShaderSkybox.vsUniformBlock.Unmap();

        float* pCameraPos = g_ShaderSkybox.psUniformBlock.Map< float >();
        memcpy( pCameraPos, &g_CamPos, sizeof( g_CamPos ) );
#if NN_GFX_IS_TARGET_GX
        GX2EndianSwap( pCameraPos, sizeof( g_CamPos ) );
#endif
        g_ShaderSkybox.psUniformBlock.Unmap();

        DrawMesh( &g_MeshQuad, &g_ShaderSkybox );
    }

    // We can use the existing depth buffer only if it matches the surface size.
    if( g_SSAOWidth != SURFACE_WIDTH || g_SSAOHeight != SURFACE_HEIGHT )
    {
        DEMOCommandBuffer.ClearDepthStencil( &DEMODepthBufferView, 1.0f, 0, nn::gfx::DepthStencilClearMode_Depth, NULL );

        DEMOCommandBuffer.SetRenderTargets( 1, renderTargets, &DEMODepthBufferView );

        // Setup Shaders.
        DEMOCommandBuffer.SetPipeline( &g_ShaderObj.demoPipeline.pipeline );
    }
    else
    {
        DEMOCommandBuffer.SetRenderTargets( 1, renderTargets, &g_DepthBuffer );

        // Setup Shaders.
        DEMOCommandBuffer.SetPipeline( &g_ShaderObj.demoPipeline.pipeline );

        // Depth buffer is already written, so just read.
        DEMOCommandBuffer.SetDepthStencilState( &g_DepthReadOnlyState );
    }

#if NN_GFX_IS_TARGET_GX
    GX2SetShaderMode(GX2_SHADER_MODE_UNIFORM_BLOCK);
#endif

    MTX44Identity( g_Model );

    SSAOObjVsUniforms* pVsUniforms = g_ShaderObj.vsUniformBlock.Map< SSAOObjVsUniforms >();
    memcpy( pVsUniforms->uModelMtx, g_Model, sizeof( g_Model ) );
    memcpy( pVsUniforms->uViewMtx, g_View, sizeof( g_View ) );
    memcpy( pVsUniforms->uProjMtx, g_Proj, sizeof( g_Proj ) );
#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap( pVsUniforms, sizeof(*pVsUniforms));
#endif
    g_ShaderObj.vsUniformBlock.Unmap();

    vec4* pCameraPos = g_ShaderObj.psUniformBlock.Map< vec4 >();
    memcpy( pCameraPos, &g_CamPos, sizeof( g_CamPos ) );
#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap( pCameraPos, sizeof(*pCameraPos));
#endif
    g_ShaderObj.psUniformBlock.Unmap();

    // Draw the scene.
    g_Times[ ( TIME_OBJECT * 2 ) + 0 ].QueryTimestamp();
    DrawSceneMeshes( &g_ShaderObj );
    g_Times[ ( TIME_OBJECT * 2 ) + 1 ].QueryTimestamp();

    DEMOGfxDebugTagUndent();
}

//--------------------------------------------------------------------------

static float s_NormalDepthTime = 0.0f;
static float s_SsaoTime = 0.0f;
static float s_BlurTime = 0.0f;
static float s_ObjectTime = 0.0f;

void UpdateUI()
{
    // Command buffer must be recording for GetTimestampResults
    DEMOCommandBuffer.Begin();
    s_NormalDepthTime = GpuTicksToMilliseconds(
        g_Times[ TIME_NORMAL_DEPTH * 2 + 1 ].GetTimestampResult()
            - g_Times[ TIME_NORMAL_DEPTH * 2 ].GetTimestampResult() );

    if ( g_MenuData[ MENU_SSAO_ENABLE ].Value != 0 )
    {
        s_SsaoTime = GpuTicksToMilliseconds(
            g_Times[ TIME_SSAO * 2 + 1 ].GetTimestampResult()
            - g_Times[ TIME_SSAO * 2 ].GetTimestampResult() );

        if ( g_MenuData[ MENU_BLUR_ENABLE ].Value != 0 )
        {
            s_BlurTime = GpuTicksToMilliseconds(
                ( g_Times[ TIME_BLUR * 2 + 1 ].GetTimestampResult()
                    - g_Times[ TIME_BLUR * 2 ].GetTimestampResult() ) );
        }
    }

    if ( g_MenuData[ MENU_SSAO_ONLY ].Value == 0 )
    {
        s_ObjectTime = GpuTicksToMilliseconds(
            g_Times[ TIME_OBJECT * 2 + 1 ].GetTimestampResult()
            - g_Times[ TIME_OBJECT * 2 ].GetTimestampResult() );
    }
    // Command buffer is left recording after GetTimestampResults so stop recording
    DEMOCommandBuffer.End();
}

//--------------------------------------------------------------------------

void DrawUI()
{
    DEMOGfxDebugTagIndent("DrawUI");

    // Setup Viewport/Stencil.
    DEMOGfxSetDefaultViewportScissor();

    // Set Demo Font state
    //DEMOFontSetViewport(0.0f, 0.0f, SURFACE_WIDTH, SURFACE_HEIGHT );
    DEMOFontSetGridSize(120,40);
    DEMOFontSetSpacing(FALSE);      // use proportional fonts
    DEMOFontSetColor(1.0f, 1.0f, 1.0f, 1.0f);

    f32 iRow = 1;
    f32 iCol = 1;

    // Times.
    {
        DEMOFontPrintf( iCol, iRow++, "[Times]" );
        ++iCol;
            DEMOFontPrintf( iCol, iRow++, "%-16s - %.3fms - %.3fms Avg", "Normal/Depth", s_NormalDepthTime, g_AvgTimes[TIME_NORMAL_DEPTH] );
            DEMOFontPrintf( iCol, iRow++, "%-16s - %.3fms - %.3fms Avg", "SSAO"        , s_SsaoTime, g_AvgTimes[TIME_SSAO] );
            DEMOFontPrintf( iCol, iRow++, "%-16s - %.3fms - %.3fms Avg", "Blur"        , s_BlurTime, g_AvgTimes[TIME_BLUR] );
            DEMOFontPrintf( iCol, iRow++, "%-16s - %.3fms - %.3fms Avg", "Object"      , s_ObjectTime, g_AvgTimes[TIME_OBJECT] );
        --iCol;

        // Normal average
        g_FrameCount++;

        g_AvgTimes[ TIME_NORMAL_DEPTH ] = ( g_AvgTimes[ TIME_NORMAL_DEPTH ] * ( g_FrameCount - 1 ) + s_NormalDepthTime ) / g_FrameCount;
        g_AvgTimes[ TIME_SSAO ] = ( g_AvgTimes[ TIME_SSAO ] * ( g_FrameCount - 1 ) + s_SsaoTime ) / g_FrameCount;
        g_AvgTimes[ TIME_BLUR ] = ( g_AvgTimes[ TIME_BLUR ] * ( g_FrameCount - 1 ) + s_BlurTime ) / g_FrameCount;
        g_AvgTimes[ TIME_OBJECT ] = ( g_AvgTimes[ TIME_OBJECT ] * ( g_FrameCount - 1 ) + s_ObjectTime ) / g_FrameCount;
    }

    ++iRow;

    // Menu Items.
    if( g_MenuHeaderActive == -1 )
    {
        DEMOFontPrintf( iCol, iRow++, "[a] - Menu" );
        DEMOFontPrintf( iCol, iRow++, "[b] - Hide UI" );
    }
    else
    {
        for( s32 iHeader = _MENU_HEADER_FIRST; iHeader <= _MENU_HEADER_LAST; ++iHeader )
        {
            MenuItemData* pHeader = &g_MenuData[iHeader];
            if( g_MenuHeaderActive == iHeader )
            {
                DEMOFontSetColor( 1.0f, 1.0f, 1.0f, 1.0f );
                DEMOFontPrintf  ( iCol, iRow++, "[%s]", pHeader->Name );

                if( g_MenuItemActive != -1 )
                {
                    s32 iFirst = static_cast< s32 >( pHeader->Min ) ;
                    s32 iLast  = static_cast< s32 >( pHeader->Max ) ;

                    ++iCol;
                    for( s32 iItem = iFirst; iItem <= iLast; ++iItem )
                    {
                        MenuItemData* pItem = &g_MenuData[iItem];

                        DEMOFontSetColor( 1.0f, 1.0f, 1.0f, (iItem == g_MenuItemActive) ? 1.0f : 0.5f );

                        switch( pItem->Type )
                        {
                            case ITEM_TYPE_INT:
                            {
                                DEMOFontPrintf( iCol, iRow++, "%-20s%d", pItem->Name, ( s32 ) pItem->Value );
                                break;
                            }
                            case ITEM_TYPE_FLOAT:
                            {
                                DEMOFontPrintf( iCol, iRow++, "%-20s%.2f", pItem->Name, pItem->Value );
                                break;
                            }
                            case ITEM_TYPE_BOOL:
                            {
                                DEMOFontPrintf( iCol, iRow++, "%-20s%s", pItem->Name, pItem->Value != 0 ? "TRUE" : "FALSE" );
                                break;
                            }
                            default: break;
                        }
                    }
                    --iCol;
                }
            }
            else
            {
                DEMOFontSetColor( 1.0f, 1.0f, 1.0f, 0.5f );
                DEMOFontPrintf  ( iCol, iRow++, "[%s]", pHeader->Name );
            }
        }

        ++iRow;
        DEMOFontSetColor( 1.0f, 1.0f, 1.0f, 1.0f );
        DEMOFontPrintf( iCol, iRow++, "[a] - Select" );
        DEMOFontPrintf( iCol, iRow++, "[b] - Back" );
    }
    DEMOGfxDebugTagUndent();
} // NOLINT(impl/function_size)

//--------------------------------------------------------------------------

void DrawScene()
{
    // Clear resources.
    MTX44Identity( g_Model );

    DEMOGfxBeforeRender();

    // Render Normals and Depth.
    DrawDepthAndNormals();

    // Render SSAO pass.
    if( g_MenuData[MENU_SSAO_ENABLE].Value != 0 )
    {
        DrawSSAO();
        if( g_MenuData[MENU_BLUR_ENABLE].Value != 0 )
        {
            BlurSSAO();
        }
    }
    else if ( g_MenuData[MENU_SSAO_ONLY].Value == 0 )
    {
        DEMOCommandBuffer.ClearColor( &g_SSAOCb[1], 1.0f, 1.0f, 1.0f, 1.0f, NULL );
    }

    // Render shaded objects.
    if( g_MenuData[MENU_SSAO_ONLY].Value == 0 )
    {
        DrawObjects();
    }
    else
    {
        nn::gfx::TextureSubresource dstSubResource;
        nn::gfx::TextureCopyRegion srcCopyRegion;
        dstSubResource.SetDefault();
        srcCopyRegion.SetDefault();
        srcCopyRegion.SetWidth( g_SSAOWidth );
        srcCopyRegion.SetHeight( g_SSAOHeight );
        DEMOCommandBuffer.CopyImage( DEMOGetColorBuffer(), dstSubResource, 0, 0, 0, &g_SSAOResolveTextures[ 1 ], srcCopyRegion );
    }

    // Render debug UI.
    if ( !g_MenuHidden )
    {
        DrawUI();
    }

    DEMOGfxDoneRender();

    if ( !g_MenuHidden )
    {
        UpdateUI();
    }

}

//--------------------------------------------------------------------------

void ProcessPad()
{
    DEMOPadRead();
    u16 buttonDown = DEMOPadGetButtonDown(0);

    {
        // L Stick translates camera
        f32 sx = (f32)DEMOPadGetStickX(0) / 255.0f;
        f32 sy = (f32)DEMOPadGetStickY(0) / 255.0f;

        Vec vec, tempVec;
        tempVec.x =  sx * 1.0f;
        tempVec.y = 0.0f;
        tempVec.z = -sy * 1.0f;

        Mtx44 inv;
        MTX44Inverse(g_View, inv);
        MTX44MultVecSR(inv, &tempVec, &vec);
        Vec tempCamLoc = g_CamPos;
        Vec tempObjPt = g_CamLookAt;
        VECAdd(&vec, &tempCamLoc, &g_CamPos);
        VECAdd(&vec, &tempObjPt, &g_CamLookAt);

        CameraSetup();
    }
    {
        // R Stick rotates camera
        f32 sx = (f32)DEMOPadGetSubStickX(0) / 255.0f;
        f32 sy = (f32)DEMOPadGetSubStickY(0) / 255.0f;

        Vec eyev, tempEyev;
        VECSubtract(&g_CamLookAt, &g_CamPos, &eyev);
        Vec wupv =  {0.0f, 1.0f, 0.0f};
        VECCrossProduct(&eyev, &g_CamUp, &tempEyev);
        VECNormalize(&tempEyev, &eyev);

        Mtx44 rot,rot0,rot1;
        MTX44RotAxisRad(rot0, &eyev, MTXDegToRad( sy * 5.0f));
        MTX44RotAxisRad(rot1, &wupv, MTXDegToRad(-sx * 5.0f));

        MTX44Concat(rot0, rot1, rot);

        Vec camv, tempCamv;
        VECSubtract(&g_CamLookAt, &g_CamPos, &tempCamv);
        MTX44MultVecSR(rot, &tempCamv, &camv);

        VECAdd(&camv, &g_CamPos, &g_CamLookAt);
        Vec tempUp = g_CamUp;
        MTX44MultVecSR(rot, &tempUp, &g_CamUp);

        CameraSetup();
    }

    // Reload shaders.
    if (DEMO_PAD_BUTTON_X & buttonDown)
    {
        ReloadShader( &g_ShaderSSAO );
        ReloadShader( &g_ShaderEncNormals );
        ReloadShader( &g_ShaderObj );
        ReloadShader( &g_ShaderSkybox );
    }

    //
    // Menu Navigation.
    //

    if( DEMO_PAD_BUTTON_B & buttonDown)
    {
        if( g_MenuItemActive != -1 )
        {
            g_MenuItemActive = -1;
            if( NN_STATIC_CONDITION( _MENU_HEADER_COUNT == 1 ) )
                g_MenuHeaderActive = -1;
        }
        else
        if( g_MenuHeaderActive != -1 )
        {
            g_MenuHeaderActive = -1;
            g_MenuItemActive = -1;
        }
        else
        {
            g_MenuHidden = !g_MenuHidden;
        }
    }
    if( DEMO_PAD_BUTTON_A & buttonDown)
    {
        if( g_MenuHidden )
            g_MenuHidden = false;

        if( g_MenuHeaderActive == -1 )
        {
            g_MenuHeaderActive = _MENU_HEADER_FIRST;
            if( NN_STATIC_CONDITION( _MENU_HEADER_COUNT == 1 ) )
                g_MenuItemActive = (int)g_MenuData[g_MenuHeaderActive].Min;
        }
        else
        if( g_MenuItemActive == -1 )
        {
            g_MenuItemActive = (int)g_MenuData[g_MenuHeaderActive].Min;
        }
    }
    if( DEMO_PAD_BUTTON_UP & buttonDown)
    {
        if( g_MenuItemActive != -1 )
        {
            if( --g_MenuItemActive < (int)g_MenuData[g_MenuHeaderActive].Min )
                g_MenuItemActive = (int)g_MenuData[g_MenuHeaderActive].Max;
        }
        else
        if( g_MenuHeaderActive != -1 )
        {
            if( --g_MenuHeaderActive < _MENU_HEADER_FIRST )
                g_MenuHeaderActive = _MENU_HEADER_LAST;
        }
    }
    if( DEMO_PAD_BUTTON_DOWN & buttonDown )
    {
        if( g_MenuItemActive != -1 )
        {
            if( ++g_MenuItemActive > (int)g_MenuData[g_MenuHeaderActive].Max )
                g_MenuItemActive = (int)g_MenuData[g_MenuHeaderActive].Min;
        }
        else
        if( g_MenuHeaderActive != -1 )
        {
            if( ++g_MenuHeaderActive > _MENU_HEADER_LAST )
                g_MenuHeaderActive = _MENU_HEADER_FIRST;
        }
    }

    if( g_MenuItemActive != -1 )
    {
        MenuItemData* pData = &g_MenuData[g_MenuItemActive];

        f32 NewValue = pData->Value;


        if( pData->Type == ITEM_TYPE_BOOL )
        {
            if( buttonDown & DEMO_PAD_BUTTON_RIGHT || buttonDown & DEMO_PAD_BUTTON_LEFT )
            {
                NewValue = 1 - NewValue;
            }
        }
        else
        {
            if( buttonDown & DEMO_PAD_BUTTON_RIGHT) NewValue += pData->Inc;
            if( buttonDown & DEMO_PAD_BUTTON_LEFT ) NewValue -= pData->Inc;
        }

        NewValue = std::min( pData->Max, std::max( NewValue, pData->Min ) );
        if( NewValue != pData->Value )
        {
            pData->Value = NewValue;

            if( pData->OnChanged != NULL )
                pData->OnChanged( NewValue );
        }
    }
} // NOLINT(impl/function_size)

//--------------------------------------------------------------------------

void OnMenuItemChangeSSAOSize( f32 Value )
{
    g_SSAOWidth = nn::util::align_up( (int)(SURFACE_WIDTH * Value), g_BlurAlignWidth );
    g_SSAOHeight = nn::util::align_up( (int)(SURFACE_HEIGHT * Value), g_BlurAlignHeight );

    DestroyRenderTargets();
    CreateRenderTargets();

#if NN_GFX_IS_TARGET_GX
    int* pDispatchParams = g_BlurDispatchBuffer.Map< int >();
    pDispatchParams[0] = g_SSAOWidth / g_BlurAlignWidth;
    pDispatchParams[1] = g_SSAOHeight / g_BlurAlignHeight;
    pDispatchParams[2] = 1;
    g_BlurDispatchBuffer.Unmap();
#endif
}

//--------------------------------------------------------------------------

void OnMenuItemChangeSSAOKernel( f32 )
{
    GenerateSampleKernel();
}

//--------------------------------------------------------------------------

void OnMenuItemChangeNoiseSize( f32 Value )
{
    g_NoiseSize = (int)Value;
    DestroyNoise();
    GenerateNoise();
}

//==============================================================================
//  main
//==============================================================================

//extern "C" void nnMain()
TEST(GfxSsao, Run)
{
    int argc = nnt::GetHostArgc();
    char** argv = nnt::GetHostArgv();

    DEMOInit();
    DEMOTestInit(argc, argv);
    DEMOGfxInit(argc, argv);
    DEMOFontInit();

    DEMOSRand( 12345 );

    // Initialize Menu Data.
    for( s32 i = 0; i < _MENU_ITEM_COUNT; ++i )
    {
        g_MenuData[i].Value = g_MenuData[i].Default;
    }

    // Load Meshes.
    LoadMesh( MESH_ROOM, &g_MeshRoom );
    LoadMesh( MESH_QUAD, &g_MeshQuad );

    // Setup special read only depth buffer
    {
        nn::gfx::DepthStencilState::InfoType info;
        info.SetDefault();
        info.SetDepthTestEnabled( true );
        info.SetDepthWriteEnabled( false );
        info.SetDepthComparisonFunction( nn::gfx::ComparisonFunction_LessEqual );
        g_DepthReadOnlyState.Initialize( &DEMODevice, info );
    }

    // Load Shaders.
    g_ShaderEncNormals.demoPipeline.SetDefaults();
    g_ShaderEncNormals.demoPipeline.rasterizerStateInfo.SetFrontFace( nn::gfx::FrontFace_Cw );
    g_ShaderEncNormals.demoPipeline.rasterizerStateInfo.SetCullMode( nn::gfx::CullMode_Back );
    g_ShaderEncNormals.demoPipeline.renderTargetStateInfo.SetDepthStencilFormat( nn::gfx::ImageFormat_D32_Float );
    g_ShaderEncNormals.demoPipeline.blendTargetStateCount = 1;
    g_ShaderEncNormals.demoPipeline.colorTargetStateCount = 1;
    g_ShaderEncNormals.demoPipeline.blendTargetStateInfoArray[ 0 ].SetDefault();
    g_ShaderEncNormals.demoPipeline.colorTargetStateInfoArray[ 0 ].SetDefault();
    g_ShaderEncNormals.demoPipeline.colorTargetStateInfoArray[ 0 ].SetFormat( nn::gfx::ImageFormat_R8_G8_B8_A8_UnormSrgb );
    LoadShader( DEMOTestIsUseHlslccGlsl() ? SHADER_ENC_NORMALS_HLSLCC : SHADER_ENC_NORMALS, &g_ShaderEncNormals, 3 * 16 * sizeof( float ), 4 * sizeof( float ) );

    g_ShaderSSAO.demoPipeline.SetDefaults();
    g_ShaderSSAO.demoPipeline.depthStencilStateInfo.SetDepthTestEnabled( false );
    g_ShaderSSAO.demoPipeline.depthStencilStateInfo.SetDepthWriteEnabled( false );
    g_ShaderSSAO.demoPipeline.depthStencilStateInfo.SetDepthComparisonFunction( nn::gfx::ComparisonFunction_Always );
    g_ShaderSSAO.demoPipeline.blendTargetStateCount = 1;
    g_ShaderSSAO.demoPipeline.colorTargetStateCount = 1;
    g_ShaderSSAO.demoPipeline.blendTargetStateInfoArray[ 0 ].SetDefault();
    g_ShaderSSAO.demoPipeline.colorTargetStateInfoArray[ 0 ].SetDefault();
    g_ShaderSSAO.demoPipeline.colorTargetStateInfoArray[ 0 ].SetFormat( nn::gfx::ImageFormat_R32_Float );
    LoadShader( DEMOTestIsUseHlslccGlsl() ? SHADER_SSAO_HLSLCC : SHADER_SSAO, &g_ShaderSSAO, sizeof( SSAOVsUniforms ), sizeof( SSAOPsUniforms ) );

    LoadComputeShader( DEMOTestIsUseHlslccGlsl() ? SHADER_BLUR_HLSLCC : SHADER_BLUR, &g_ShaderBlur );

    g_ShaderObj.demoPipeline.SetDefaults();
    g_ShaderObj.demoPipeline.rasterizerStateInfo.SetFrontFace( nn::gfx::FrontFace_Cw );
    g_ShaderObj.demoPipeline.rasterizerStateInfo.SetCullMode( nn::gfx::CullMode_Back );
    g_ShaderObj.demoPipeline.renderTargetStateInfo.SetDepthStencilFormat( nn::gfx::ImageFormat_D32_Float );
    g_ShaderObj.demoPipeline.depthStencilStateInfo.SetDepthComparisonFunction( nn::gfx::ComparisonFunction_Less );
    g_ShaderObj.demoPipeline.blendTargetStateCount = 1;
    g_ShaderObj.demoPipeline.colorTargetStateCount = 1;
    g_ShaderObj.demoPipeline.blendTargetStateInfoArray[ 0 ].SetDefault();
    g_ShaderObj.demoPipeline.colorTargetStateInfoArray[ 0 ].SetDefault();
    g_ShaderObj.demoPipeline.colorTargetStateInfoArray[ 0 ].SetFormat( DEMOColorBufferInfo.GetImageFormat() );
    LoadShader( DEMOTestIsUseHlslccGlsl() ? SHADER_OBJ_HLSLCC : SHADER_OBJ, &g_ShaderObj, 3 * 16 * sizeof( float ), 4 * sizeof( float ) );

    g_ShaderSkybox.demoPipeline.SetDefaults();
    g_ShaderSkybox.demoPipeline.depthStencilStateInfo.SetDepthTestEnabled( false );
    g_ShaderSkybox.demoPipeline.depthStencilStateInfo.SetDepthWriteEnabled( false );
    g_ShaderSkybox.demoPipeline.depthStencilStateInfo.SetDepthComparisonFunction( nn::gfx::ComparisonFunction_Always );
    g_ShaderSkybox.demoPipeline.blendTargetStateCount = 1;
    g_ShaderSkybox.demoPipeline.colorTargetStateCount = 1;
    g_ShaderSkybox.demoPipeline.blendTargetStateInfoArray[ 0 ].SetDefault();
    g_ShaderSkybox.demoPipeline.colorTargetStateInfoArray[ 0 ].SetDefault();
    g_ShaderSkybox.demoPipeline.colorTargetStateInfoArray[ 0 ].SetFormat( DEMOColorBufferInfo.GetImageFormat() );
    LoadShader( DEMOTestIsUseHlslccGlsl() ? SHADER_SKYBOX_HLSLCC : SHADER_SKYBOX, &g_ShaderSkybox, 16 * sizeof( float ), 4 * sizeof( float ) );

    // Adjust the width and height to match the total 4x4 regions computed by a work group.
    int tmpDepth = 0;
    g_ShaderBlur.demoPipeline.shaders.GetShader()->GetWorkGroupSize( &g_BlurAlignWidth, &g_BlurAlignHeight, &tmpDepth );
    g_BlurAlignWidth *= 4;
    g_BlurAlignHeight *= 4;
    g_SSAOWidth = nn::util::align_up( SURFACE_WIDTH / 2, g_BlurAlignWidth );
    g_SSAOHeight = nn::util::align_up( SURFACE_HEIGHT / 2, g_BlurAlignHeight );

    // Load Textures.
    g_TextureSkybox.Initialize( TEXTURE_SKYBOX );
    g_TextureConcrete.Initialize( TEXTURE_CONCRETE );

    // Setup Samplers.
    DEMOGfxInitSampler( &g_SamplerPointClamp, &g_SamplerPointClampSlot, nn::gfx::TextureAddressMode_ClampToEdge,
        nn::gfx::FilterMode_MinPoint_MagPoint_MipPoint,
        nn::gfx::ComparisonFunction_Always );

    DEMOGfxInitSampler( &g_SamplerPointWrap, &g_SamplerPointWrapSlot, nn::gfx::TextureAddressMode_Repeat,
        nn::gfx::FilterMode_MinPoint_MagPoint_MipPoint,
        nn::gfx::ComparisonFunction_Always );

    DEMOGfxInitSampler( &g_SamplerLinearClamp, &g_SamplerLinearClampSlot, nn::gfx::TextureAddressMode_ClampToEdge,
        nn::gfx::FilterMode_MinLinear_MagLinear_MipPoint,
        nn::gfx::ComparisonFunction_Always );

    // Generate resources for the SSAO.
    CreateRenderTargets();
    CreateComputeBuffers();
    GenerateSampleKernel();
    GenerateNoise();

    for ( int i = 0; i < _TIME_COUNT * 2; i++ )
    {
        g_Times[ i ].Initialize();
    }

    // Run.
    while (DEMOIsRunning())
    {
        ProcessPad();
        DrawScene();
    }

    for ( int i = 0; i < _TIME_COUNT * 2; i++ )
    {
        g_Times[ i ].Finalize();
    }

    // Free Shaders.
    FreeShader( &g_ShaderSSAO );
    FreeShader( &g_ShaderEncNormals );
    FreeShader( &g_ShaderBlur );
    FreeShader( &g_ShaderObj );
    FreeShader( &g_ShaderSkybox );

    // Free Resources.
    DestroyRenderTargets();
    DestroyNoise();

    g_TextureSkybox.Finalize();
    g_TextureConcrete.Finalize();

    FreeMesh( &g_MeshRoom );
    FreeMesh( &g_MeshQuad );

    g_SamplerPointWrap.Finalize( &DEMODevice );
    g_SamplerPointClamp.Finalize( &DEMODevice );
    g_SamplerLinearClamp.Finalize( &DEMODevice );
    g_DepthReadOnlyState.Finalize( &DEMODevice );

    g_BlurUniform.Finalize();
#if NN_GFX_IS_TARGET_GX
    g_BlurDispatchBuffer.Finalize();
#endif

    // Shutdown demo lib
    DEMOFontShutdown();
    DEMOTestShutdown();
    DEMOGfxShutdown();
    DEMOShutdown();

    SUCCEED();
} // NOLINT(impl/function_size)
