﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cmath>

#include <algorithm>

#include <gfx/demo.h>
#include <nnt.h>
#include <nnt/nnt_Argument.h>

#if NN_GFX_IS_TARGET_GL
#include <GL/glew.h>
#endif

#if NN_GFX_IS_TARGET_NVN
#include <nvn/nvn.h>
#include <nvn/nvn_FuncPtrInline.h>
#endif

#if !NN_GFX_IS_TARGET_GX
// fix Visual Studio missing function
#define snprintf _snprintf
#endif

static int g_ShaderFileIdx = 0;

static const char* COPY_TEXTURE_SHADERS[] =
{
    "shaders/leaf/copyTexture",
    "shaders/leaf/copyTextureHlslcc"
};
static const char* LEAF_SIM_VERLET_SHADERS[] =
{
    "shaders/leaf/leafSimVerlet",
    "shaders/leaf/leafSimVerletHlslcc",
};
static const char* LEAF_SIM_FORCE_SHADERS[] =
{
    "shaders/leaf/leafSimForce",
    "shaders/leaf/leafSimForceHlslcc"
};

static const char* const TREE_MODEL_FILE = "geometries/leaf/tree.nmod";
static const char* const GROUND_MODEL_FILE = "geometries/leaf/ground.nmod";
static const char* const LEAF_DATA_FILE = "geometries/leaf/leafPosition.dat";

static nn::gfx::ViewportScissorState s_fullViewport;
void* s_fullViewportMem;

struct LeafState
{
    float wind[4];
    float leaf_counter;
};

namespace LegacyGPGPULeaf {

////////////////////////////////////////////////////
//
// Perf
//
////////////////////////////////////////////////////


// For Perf
static DEMOGfxGpuTimestamp s_startGpuTimestamp;
static DEMOGfxGpuTimestamp s_markGpuTimestamp;
static DEMOGfxGpuTimestamp s_endGpuTimestamp;
static uint64_t s_startGpuTick;
static uint64_t s_markGpuTick;
static uint64_t s_endGpuTick;

// Perf Starting point
static void CheckPerfBegin()
{
    if ( DEMOGfxIsRunning() )
    {
        s_startGpuTimestamp.QueryTimestamp();
    }
}

static void CheckPerfMark()
{
    if(DEMOGfxIsRunning())
    {
        s_markGpuTimestamp.QueryTimestamp();
    }
}

// Perf End point
static void CheckPerfEnd()
{
    if(DEMOGfxIsRunning())
    {
        s_endGpuTimestamp.QueryTimestamp();
    }
}

static void GetTimestampResult()
{
    s_startGpuTick = s_startGpuTimestamp.GetTimestampResult();
    s_markGpuTick = s_markGpuTimestamp.GetTimestampResult();
    s_endGpuTick = s_endGpuTimestamp.GetTimestampResult();
}

static float GetPerfBeginToMark()
{
    return DEMOGfxGpuTimestamp::TicksToMicroseconds( s_markGpuTick - s_startGpuTick );
}

static float GetPerfMarkToEnd()
{
    return DEMOGfxGpuTimestamp::TicksToMicroseconds( s_endGpuTick - s_markGpuTick );
}

////////////////////////////////////////////////////
//
// Common Quad VB/IB, Sampler, ContextState
//
////////////////////////////////////////////////////

// -----
static const int QUAD_VTX_STRIDE = (sizeof(float) * 2 + sizeof(float) * 2);
static const int QUAD_POS_OFFSET = 0;
static const int QUAD_TEXCOORD_OFFSET = 2;
static const int BUFFER_IDX = 0;
static const int QUAD_TRIANGLES_IDX_COUNT = 6;
// ----- Model Vertex Data
static float modelVtxs[] =
{
    -1.0f, -1.0f,  0.0f,  1.0f,
    1.0f, -1.0f,  1.0f,  1.0f,
    1.0f,  1.0f,  1.0f,  0.0f,
    -1.0f,  1.0f,  0.0f,  0.0f
};
// ----- Quad Triangles
static int quadTrianglesIdxs[] = {0, 1, 2, 3, 0, 2};

enum Sampler
{
    POINT,
    LINEAR
};

struct Common
{
    // GPU buffers
    DEMOGfxBuffer vtxModelBuffer;
    DEMOGfxBuffer idxModelBuffer;

    nn::gfx::Sampler mySampler[2];
    nn::gfx::DescriptorSlot mySamplerSlot[2];
} g_Common;

static void Init()
{
    // Allocate memory
    g_Common.vtxModelBuffer.Initialize( sizeof( modelVtxs ), modelVtxs, nn::gfx::GpuAccess_VertexBuffer | nn::gfx::GpuAccess_Read, 0 );

    g_Common.idxModelBuffer.Initialize( sizeof( quadTrianglesIdxs ), quadTrianglesIdxs, nn::gfx::GpuAccess_IndexBuffer | nn::gfx::GpuAccess_Read, 0 );


    // Sampler setting
    DEMOGfxInitSampler(&g_Common.mySampler[POINT], &g_Common.mySamplerSlot[POINT], nn::gfx::TextureAddressMode_ClampToEdge, nn::gfx::FilterMode_MinPoint_MagPoint_MipPoint, nn::gfx::ComparisonFunction_Always);
    DEMOGfxInitSampler(&g_Common.mySampler[LINEAR], &g_Common.mySamplerSlot[LINEAR], nn::gfx::TextureAddressMode_ClampToEdge, nn::gfx::FilterMode_MinLinear_MagLinear_MipLinear, nn::gfx::ComparisonFunction_Always);

    s_startGpuTimestamp.Initialize();
    s_markGpuTimestamp.Initialize();
    s_endGpuTimestamp.Initialize();
}

static void Free()
{
    g_Common.idxModelBuffer.Finalize();
    g_Common.vtxModelBuffer.Finalize();

    g_Common.mySampler[POINT].Finalize(&DEMODevice);
    g_Common.mySampler[LINEAR].Finalize(&DEMODevice);

    s_startGpuTimestamp.Finalize();
    s_markGpuTimestamp.Finalize();
    s_endGpuTimestamp.Finalize();
}


////////////////////////////////////////////////////
//
// Texture2D
//
////////////////////////////////////////////////////

//
// Buffer for input data
//
struct Texture2D
{
    nn::gfx::Texture textureBuffer;
    nn::gfx::TextureView textureBufferView;
    nn::gfx::TextureInfo textureBufferInfo;
    nn::gfx::DescriptorSlot textureSlot;

    nn::gfx::Sampler* sampler;
    nn::gfx::DescriptorSlot* samplerSlot;
    int width;
    int height;

    void* imagePtr;
    size_t imageSize;
    nn::gfx::MemoryPool pool;
};

static void InitTexture2D(Texture2D& texture, int w, int h,
                   nn::gfx::ImageFormat format, Sampler sampler, void* pinitvalues)
{
    size_t alignment;

    texture.textureBufferInfo.SetDefault();
    texture.textureBufferInfo.SetWidth( w );
    texture.textureBufferInfo.SetHeight( h );
    texture.textureBufferInfo.SetDepth( 1 );
    texture.textureBufferInfo.SetMipCount( 1 );
    texture.textureBufferInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_Read );
    texture.textureBufferInfo.SetImageStorageDimension( nn::gfx::ImageStorageDimension_2d );
    texture.textureBufferInfo.SetImageFormat( format );

    texture.imageSize = nn::gfx::Texture::CalculateMipDataSize( &DEMODevice, texture.textureBufferInfo );
    alignment = nn::gfx::Texture::CalculateMipDataAlignment( &DEMODevice, texture.textureBufferInfo );

    // Setup the memory pool info to guarantee we're properly aligned
    nn::gfx::MemoryPool::InfoType memoryPoolInfo;
    memoryPoolInfo.SetDefault();
    memoryPoolInfo.SetMemoryPoolProperty( nn::gfx::MemoryPoolProperty_CpuInvisible | nn::gfx::MemoryPoolProperty_GpuCached );
    alignment = std::max( alignment, nn::gfx::MemoryPool::GetPoolMemoryAlignment( &DEMODevice, memoryPoolInfo ) );

    texture.imagePtr = DEMOGfxAllocMEM2(texture.imageSize, alignment);
    ASSERT(texture.imagePtr != NULL);

    // Setup the memory pool
    memoryPoolInfo.SetPoolMemory( texture.imagePtr, texture.imageSize );
    texture.pool.Initialize( &DEMODevice, memoryPoolInfo );

    texture.sampler = &g_Common.mySampler[sampler];
    texture.samplerSlot = &g_Common.mySamplerSlot[sampler];
    texture.width = w;
    texture.height = h;

    texture.textureBuffer.Initialize(&DEMODevice, texture.textureBufferInfo, &texture.pool, 0, texture.imageSize);

//#define OLD
    if(pinitvalues)
    {
        DEMOGfxBuffer tmpBuffer;
#ifdef OLD
        nn::gfx::TextureCopyRegion dstRegion;
        dstRegion.SetDefault();
        dstRegion.SetWidth( w );
        dstRegion.SetHeight( h );
#else
        nn::gfx::BufferTextureCopyRegion dstRegion;
        dstRegion.SetDefault();
        dstRegion.EditTextureCopyRegion().SetWidth(w);
        dstRegion.EditTextureCopyRegion().SetHeight(h);
        dstRegion.SetBufferImageHeight(h);
#endif

#if NN_GFX_IS_TARGET_GX
        GX2EndianSwap( pinitvalues, texture.imageSize );
#endif
        tmpBuffer.Initialize( texture.imageSize, pinitvalues, nn::gfx::GpuAccess_Read, 0 );
        DEMOCommandBuffer.Begin();
#ifdef OLD
        DEMOCommandBuffer.CopyBufferToImage(&texture.textureBuffer, dstRegion, &tmpBuffer.buffer, 0);
#else
        DEMOCommandBuffer.CopyBufferToImage( &texture.textureBuffer, &tmpBuffer.buffer, dstRegion);
#endif
        DEMOCommandBuffer.End();
        DEMOQueue.ExecuteCommand( &DEMOCommandBuffer, NULL );
        DEMOQueue.Sync();
        tmpBuffer.Finalize();
    }

    nn::gfx::TextureViewInfo vinfo;
    vinfo.SetDefault();
    vinfo.SetImageDimension(nn::gfx::ImageDimension_2d);
    vinfo.SetImageFormat(format);
    vinfo.SetTexturePtr(&texture.textureBuffer);
    texture.textureBufferView.Initialize(&DEMODevice, vinfo);

    int slot = DEMOGfxRegisterTextureView( &texture.textureBufferView );
    DEMOTextureDescriptorPool.GetDescriptorSlot( &texture.textureSlot, slot );
}

static void FreeTexture2D(Texture2D& texture)
{
    texture.textureBufferView.Finalize(&DEMODevice);
    texture.textureBuffer.Finalize(&DEMODevice);
    texture.pool.Finalize( &DEMODevice );
    DEMOGfxFreeMEM2(texture.imagePtr);
}


////////////////////////////////////////////////////
//
// Render Target Texture
//
////////////////////////////////////////////////////

//
// Buffer for I/O data
//
struct RenderTarget
{
    nn::gfx::ColorTargetView colorBuffer;
    nn::gfx::TextureView textureView;
    nn::gfx::Texture textureBuffer;
    nn::gfx::DescriptorSlot textureSlot;
    nn::gfx::TextureInfo textureBufferInfo;
    nn::gfx::Sampler* sampler;
    nn::gfx::DescriptorSlot* samplerSlot;
    int width;
    int height;

    size_t imageSize;
    DEMOGfxMemPool* pPool;
};

static void InitRenderTarget(RenderTarget& renderTarget, int w, int h,
                      nn::gfx::ImageFormat format, Sampler sampler)
{
    renderTarget.height = h;
    renderTarget.width = w;

    renderTarget.textureBufferInfo.SetDefault();
    renderTarget.textureBufferInfo.SetWidth(w);
    renderTarget.textureBufferInfo.SetHeight(h);
#if NN_GFX_IS_TARGET_D3D
    renderTarget.textureBufferInfo.SetGpuAccessFlags(nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_Read | nn::gfx::GpuAccess_ColorBuffer);
#else
    renderTarget.textureBufferInfo.SetGpuAccessFlags(nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_Read);
#endif
    renderTarget.textureBufferInfo.SetImageStorageDimension(nn::gfx::ImageStorageDimension_2d);
    renderTarget.textureBufferInfo.SetImageFormat(format);

    renderTarget.imageSize = nn::gfx::Texture::CalculateMipDataSize( &DEMODevice, renderTarget.textureBufferInfo );
    size_t alignment = nn::gfx::Texture::CalculateMipDataAlignment( &DEMODevice, renderTarget.textureBufferInfo );

    // Setup the memory pool
    renderTarget.pPool = DEMOGfxGpuPool->AllocSubPool( renderTarget.imageSize, alignment );

    renderTarget.sampler = &g_Common.mySampler[sampler];
    renderTarget.samplerSlot = &g_Common.mySamplerSlot[sampler];
    renderTarget.width = w;
    renderTarget.height = h;

    renderTarget.textureBuffer.Initialize(&DEMODevice, renderTarget.textureBufferInfo, renderTarget.pPool->GetPool(), renderTarget.pPool->GetBaseOffset(), renderTarget.imageSize);

    DEMOGfxSetupColorView( &renderTarget.textureBuffer, &renderTarget.colorBuffer, nn::gfx::ImageDimension_2d, format );
    DEMOGfxSetupTextureView( &renderTarget.textureBuffer, &renderTarget.textureView,
        &renderTarget.textureSlot, nn::gfx::ImageDimension_2d, format, nn::gfx::DepthStencilFetchMode_DepthComponent );
}

static void FreeRenderTarget(RenderTarget& renderTarget)
{
    renderTarget.colorBuffer.Finalize(&DEMODevice);
    renderTarget.textureView.Finalize(&DEMODevice);
    renderTarget.textureBuffer.Finalize(&DEMODevice);
    renderTarget.pPool->Finalize();
}


////////////////////////////////////////////////////
//
// Legacy GPGPU pixel shader operations pass
//
////////////////////////////////////////////////////
//
// Wrapper to facilitate execution of Legacy GPGPU (pixel shader operations)
//
// Initialize pass in InitPass()
// Free the pass in FreePass()
//
// Processing for each frame
// SetInputUniform();               // Set input constant
// SetInputRenderTarget();          // Set input RenderTarget
// SetInputTexture2D();             // Set input Texture2D
// SetInputGX2Texture();            // Set input GX2Texture
// ComputePass(output RenderTargets);  //Run pixel shader operations
//
//

struct Pass
{
    DEMOGfxPipeline pipeline;

    // -- Texture Setting -- //
    int myColorBufferWidth;
    int myColorBufferHeight;
    nn::gfx::Texture          myDepthTexture;
    nn::gfx::DepthStencilView myDepthView;
    DEMOGfxMemPool*           pPool;

    // Uniform buffer object
    DEMOGfxBuffer uniformBuffer[ 2 ];
    int ubLoc;
    int currentBuffer;

    // scissor and viewport state
    nn::gfx::ViewportScissorState viewport;
    void*                         viewportPtr;
};

static void SetInputUniform(Pass& pass, const void* values, size_t size)
{
    nn::gfx::CommandBuffer* cmdbuf = &DEMOCommandBuffer;

    float* ptr = pass.uniformBuffer[ pass.currentBuffer % 2 ].Map< float >();
    memcpy(ptr, values, size);
#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap( ptr, size );
#endif
    pass.uniformBuffer[ pass.currentBuffer % 2 ].Unmap();

    cmdbuf->SetConstantBuffer(pass.ubLoc, nn::gfx::ShaderStage_Pixel, pass.uniformBuffer[ pass.currentBuffer % 2 ].gpuAddress, pass.uniformBuffer[ pass.currentBuffer % 2 ].size);

    pass.currentBuffer++;
}

static void SetInputRenderTarget(Pass& pass, RenderTarget& texture, const char* texture_name, int loc)
{
    NN_UNUSED( pass );

    nn::gfx::CommandBuffer* cmdbuf = &DEMOCommandBuffer;

    // Texture Sampler lookup
    if(loc >= 0)
    {
        // Set Texture
        cmdbuf->SetTextureAndSampler(loc, nn::gfx::ShaderStage_Pixel, texture.textureSlot, *texture.samplerSlot);
    }
    else
    {
        printf("Warning: SetInputRenderTarget(%s) Failure. loc(%d)\n",
            texture_name, loc);
    }
}

static void SetInputTexture2D(Pass& pass, Texture2D& texture, const char* texture_name, int loc)
{
    NN_UNUSED(pass);

    nn::gfx::CommandBuffer* cmdbuf = &DEMOCommandBuffer;

    // Texture Sampler lookup
    if(loc >= 0)
    {
        // Set Texture
        cmdbuf->SetTextureAndSampler(loc, nn::gfx::ShaderStage_Pixel, texture.textureSlot, *texture.samplerSlot);
    }
    else
    {
        printf("Warning: SetInputTexture2D(%s) Failure. loc(%d)\n",
            texture_name, loc);
    }
}

static void SetInputTextureView(Pass& pass, nn::gfx::DescriptorSlot& textureSlot, nn::gfx::DescriptorSlot& samplerSlot,
                        const char* texture_name, int loc)
{
    NN_UNUSED(pass);

    nn::gfx::CommandBuffer* cmdbuf;

    cmdbuf = &DEMOCommandBuffer;
    // Texture Sampler lookup
    if(loc >= 0)
    {
        // Set Texture
        cmdbuf->SetTextureAndSampler(loc, nn::gfx::ShaderStage_Pixel, textureSlot, samplerSlot);
    }
    else
    {
        printf("Warning: SetInputGX2Texture(%s) Failure. loc(%d)\n",
            texture_name, loc);
    }
}

static const int MAX_RENDER_TARGETS = 16;

static void SetRenderTargets(nn::gfx::CommandBuffer* cmdbuf, RenderTarget* rt[], int rtNum, nn::gfx::DepthStencilView* nnDepthView)
{
    ASSERT(rtNum < MAX_RENDER_TARGETS);

    nn::gfx::ColorTargetView* nnRenderTargets[MAX_RENDER_TARGETS];

    for (int i = 0; i < rtNum; i++)
    {
        nnRenderTargets[i] = &rt[i]->colorBuffer;
    }
    cmdbuf->SetRenderTargets(rtNum, nnRenderTargets, nnDepthView);
}

static void ClearPass(Pass& pass, RenderTarget* renderTarget[], int renderTargetNum)
{
    nn::gfx::CommandBuffer* cmdbuf = &DEMOCommandBuffer;

    SetRenderTargets(cmdbuf, renderTarget, renderTargetNum, &pass.myDepthView);
    for (int i = 0; i < renderTargetNum; ++i)
    {
        cmdbuf->ClearColor(&renderTarget[i]->colorBuffer, 0.0f, 0.0f, 0.0f, 0.0f, NULL);
    }
    cmdbuf->ClearDepthStencil(&pass.myDepthView, 1.0f, 0, nn::gfx::DepthStencilClearMode_DepthStencil, NULL);

#if NN_GFX_IS_TARGET_GX
    GX2SetShaderMode( GX2_SHADER_MODE_UNIFORM_BLOCK );
#endif
}

static void ComputePass(Pass& pass, RenderTarget* renderTarget[], int renderTargetNum)
{
    nn::gfx::CommandBuffer* cmdbuf = &DEMOCommandBuffer;

    // Set Render Targets
    SetRenderTargets(cmdbuf, renderTarget, renderTargetNum, &pass.myDepthView);
    cmdbuf->SetPipeline(&pass.pipeline.pipeline);
    cmdbuf->SetViewportScissorState(&pass.viewport);
    cmdbuf->SetVertexBuffer(BUFFER_IDX, g_Common.vtxModelBuffer.gpuAddress, QUAD_VTX_STRIDE, g_Common.vtxModelBuffer.size );
    cmdbuf->DrawIndexed(nn::gfx::PrimitiveTopology_TriangleList, nn::gfx::IndexFormat_Uint32, g_Common.idxModelBuffer.gpuAddress, QUAD_TRIANGLES_IDX_COUNT, 0);

    cmdbuf->FlushMemory( nn::gfx::GpuAccess_ColorBuffer | nn::gfx::GpuAccess_DepthStencil );
    cmdbuf->InvalidateMemory( nn::gfx::GpuAccess_Texture );
}

static void InitPass(Pass& pass, const char* shaderFileName,
              const int domain_width, const int domain_height, int numRenderTargets, nn::gfx::ImageFormat* pFormats )
{
    // initialize pipeline
    pass.pipeline.SetDefaults();

    // Setup blend/color states for each render target
    pass.pipeline.blendTargetStateCount = numRenderTargets;
    pass.pipeline.colorTargetStateCount = numRenderTargets;

    for ( int i = 0; i < numRenderTargets; i++ )
    {
        pass.pipeline.blendTargetStateInfoArray[ i ].SetDefault();
        pass.pipeline.colorTargetStateInfoArray[ i ].SetDefault();
        pass.pipeline.colorTargetStateInfoArray[ i ].SetFormat( pFormats[ i ] );
    }

    DEMOGfxLoadShadersFromFile(&pass.pipeline.shaders, 0, shaderFileName);

    // Query the uniform block location
    pass.ubLoc = pass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_ConstantBuffer, "ub_LeafInp");

    // Attribute Format Setup
    DEMOGfxInitShaderAttribute(&pass.pipeline.shaders, "a_position", BUFFER_IDX, QUAD_POS_OFFSET * sizeof(float), nn::gfx::AttributeFormat_32_32_Float);
    DEMOGfxInitShaderAttribute(&pass.pipeline.shaders, "a_texCoord", BUFFER_IDX, QUAD_TEXCOORD_OFFSET * sizeof(float), nn::gfx::AttributeFormat_32_32_Float);

    DEMOGfxInitShaderVertexBuffer(&pass.pipeline.shaders, BUFFER_IDX, QUAD_VTX_STRIDE, 0);

    // Initialize render depth buffer
    DEMOGfxSetupTextureBuffer(&pass.myDepthTexture, NULL, NULL, NULL, &pass.myDepthView, &pass.pPool, domain_width, domain_height, 1, 1,
        nn::gfx::ImageDimension_2d, nn::gfx::ImageFormat_D32_Float, nn::gfx::DepthStencilFetchMode_DepthComponent, 0);

    pass.myColorBufferWidth = domain_width;
    pass.myColorBufferHeight = domain_height;

    DEMOGfxSetViewportScissorState(&pass.viewport, &pass.viewportPtr,
        0.0f, 0.0f,
        static_cast< float >( pass.myColorBufferWidth ),
        static_cast< float >( pass.myColorBufferHeight ),
        0.0f, 1.0f,
        static_cast< float >( pass.myColorBufferHeight ), true);

    pass.pipeline.Initialize(&DEMODevice);

    // set up uniform buffer view
    pass.uniformBuffer[ 0 ].Initialize( sizeof( LeafState ), NULL, nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Read, 0 );
    pass.uniformBuffer[ 1 ].Initialize( sizeof( LeafState ), NULL, nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Read, 0 );

    pass.currentBuffer = 0;
}

static void FreePass(Pass& pass)
{
    pass.myDepthTexture.Finalize(&DEMODevice);
    pass.myDepthView.Finalize(&DEMODevice);
    pass.pipeline.Finalize(&DEMODevice);
    pass.viewport.Finalize(&DEMODevice);

    pass.uniformBuffer[ 0 ].Finalize();
    pass.uniformBuffer[ 1 ].Finalize();

    pass.pPool->Finalize();
    DEMOGfxFreeMEM2(pass.viewportPtr);
}

} //namespace LegacyGPGPULeaf

namespace SimpleModelLeaf {


static nn::gfx::BlendState opaqueBlend;
static void* opaqueBlendMem;

// Simple model
typedef struct _SimpleModelLeaf
{
    // Model data
    DemoModelData g_ModelData;
    // pipeline simple shader
    DEMOGfxPipeline g_pipeline;
    // Constant Buffers
    int ub_inpLoc;
    int samplerLoc;
    nn::gfx::Sampler mySampler;
    nn::gfx::DescriptorSlot mySamplerSlot;

    DEMOGfxBuffer uniformBuffer;
}SimpleModelLeaf;

// The shader file that contains vertex and pixel
// shader data. In this case the vertex shader just passes through
// the position and color without modification. and the pixel shader
// outputs the interpolated vertex color.
static const char * const SIMPLE_SHADER_FILE[] =
{
    "shaders/leaf/simpleLighting",
    "shaders/leaf/simpleLightingHlslcc",
};

// Texture directory
static const char * const TEXTURE_DIRECTORY = "textures/leaf";

////////////////////////////////////////////////////
//
// Prototypes
//
////////////////////////////////////////////////////

static void Init(SimpleModelLeaf& model, const char* MODEL_FILE);
static void Free(SimpleModelLeaf& model);

static void InitShaderPipeline(SimpleModelLeaf* model);

////////////////////////////////////////////////////
//
// Functions
//
////////////////////////////////////////////////////


// The initialization function for the rendering portions of this sample.
// It is responsible for allocating the three types of shaders and buffers
// as well as ensuring that data is flushed from the CPU to GPU memory
// for Simple Shader
static void InitShaderPipeline(SimpleModelLeaf* model)
{
    DEMOGfxPipeline* pPipeline = &model->g_pipeline;
    DEMOGfxShader *pShader = &pPipeline->shaders;
    const DemoAttributeData* attributes = model->g_ModelData.attributes;
    int attributeCount = model->g_ModelData.header.attributeCount;

    DEMOGfxLoadShadersFromFile(pShader, 0, SIMPLE_SHADER_FILE[g_ShaderFileIdx]);
    int totalAttributeSize = 0;
    // Set attribute buffer to shader
    for ( int i = 0; i < attributeCount; i++ )
    {
        const DemoAttributeData* pAttribute = &attributes[i];

        const char* attributeName = NULL;
        nn::gfx::AttributeFormat attributeFormat;
        int attributeSize;

        switch(pAttribute->type)
        {
        case DEMO_ATTRIBUTE_POSITION:
            {
                //Local coordinate attributes
                attributeName = "a_position";
                attributeFormat = nn::gfx::AttributeFormat_32_32_32_Float;
                attributeSize = 3 * sizeof(float);
            }
            break;
        case DEMO_ATTRIBUTE_NORMAL:
            {
                //Normal attributes
                attributeName = "a_normal";
                attributeFormat = nn::gfx::AttributeFormat_32_32_32_Float;
                attributeSize = 3 * sizeof(float);
            }
            break;
        case DEMO_ATTRIBUTE_TEXCOORD:
            {
                //Texture coordinate attributes
                attributeName = "a_texCoord";
                attributeFormat = nn::gfx::AttributeFormat_32_32_Float;
                attributeSize = 2 * sizeof(float);
            }
            break;
        default:
            {
                //Attributes not used in shader
            }
            continue;
        }

        DEMOGfxInitShaderAttribute(pShader,
            attributeName,
            0,
            pAttribute->offset,
            attributeFormat);
        totalAttributeSize += attributeSize;
    }
    DEMOGfxInitShaderVertexBuffer(pShader, 0, totalAttributeSize, 0);

    // Uniform Location Lookup
    model->ub_inpLoc = pShader->GetInterfaceSlot( nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_ConstantBuffer, "ub_inp");

    // Texture Sampler lookup
    model->samplerLoc  = pShader->GetInterfaceSlot( nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_Sampler, "s_texture");

    pPipeline->blendTargetStateCount = 1;
    pPipeline->blendTargetStateInfoArray[0].SetDefault();
    pPipeline->blendTargetStateInfoArray[0].SetBlendEnabled(false);

    pPipeline->colorTargetStateCount = 1;
    pPipeline->colorTargetStateInfoArray[0].SetDefault();
    pPipeline->colorTargetStateInfoArray[0].SetFormat(DEMOColorBufferInfo.GetImageFormat());

    // Triangle rendering

    pPipeline->Initialize(&DEMODevice);

    // set up uniform buffer view
    model->uniformBuffer.Initialize( sizeof( LeafState ), NULL, nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Read, 0 );

}

static void InitBlendStates()
{
    // set up blend states
    {
        nn::gfx::BlendStateInfo info;
        nn::gfx::BlendTargetStateInfo blendTargets[1];

        blendTargets[0].SetDefault();
        blendTargets[0].SetBlendEnabled(false);
        blendTargets[0].SetChannelMask(nn::gfx::ChannelMask_All);
        blendTargets[0].SetColorBlendFunction(nn::gfx::BlendFunction_Add);
        blendTargets[0].SetSourceColorBlendFactor(nn::gfx::BlendFactor_One);
        blendTargets[0].SetDestinationColorBlendFactor(nn::gfx::BlendFactor_OneMinusSourceAlpha);
        blendTargets[0].SetAlphaBlendFunction(nn::gfx::BlendFunction_Add);
        blendTargets[0].SetSourceAlphaBlendFactor(nn::gfx::BlendFactor_One);
        blendTargets[0].SetDestinationAlphaBlendFactor(nn::gfx::BlendFactor_OneMinusSourceAlpha);

        info.SetDefault();
        info.SetAlphaToCoverageEnabled(false);
        info.SetBlendTargetStateInfoArray(blendTargets, 1);
        size_t size = nn::gfx::BlendState::GetRequiredMemorySize(info);
        opaqueBlendMem = DEMOGfxAllocMEM2(size, nn::gfx::BlendState::RequiredMemoryInfo_Alignment);
        opaqueBlend.SetMemory(opaqueBlendMem, size);
        opaqueBlend.Initialize(&DEMODevice, info);

    }
}
// The init function for the rendering portions of this app
static void Init(SimpleModelLeaf& model, const char* MODEL_FILE)
{
    // Load the model data
    DEMOLoadModelData(&model.g_ModelData,MODEL_FILE,TEXTURE_DIRECTORY);

    // Initialize shader
    InitShaderPipeline(&model);

    // Sampler setting
    DEMOGfxInitSampler(&model.mySampler, &model.mySamplerSlot, nn::gfx::TextureAddressMode_Repeat, nn::gfx::FilterMode_MinLinear_MagLinear_MipLinear, nn::gfx::ComparisonFunction_Always);

}

static void Free(SimpleModelLeaf& model)
{
    // Free model data
    DEMOFreeModelData(&model.g_ModelData);

    // Free shaders
    model.g_pipeline.Finalize(&DEMODevice);

    // free nn::gfx structures
    model.uniformBuffer.Finalize();
    model.mySampler.Finalize(&DEMODevice);
}

//
// Set material
//
//
static void SetMaterial(nn::gfx::CommandBuffer* cmdbuf, SimpleModelLeaf& model, const DemoMaterialData* pMaterial)
{
    DemoModelData* pModelData = &model.g_ModelData;

    DEMOGfxTexture* pTextures = pModelData->pTextures;

    // Set Texture
    cmdbuf->SetTextureAndSampler(model.samplerLoc, nn::gfx::ShaderStage_Pixel, pTextures[pMaterial->textureIndex].GetDescriptorSlot( 0 ), model.mySamplerSlot);

    // Rendering state settings
    switch(pMaterial->type)
    {
    case DEMO_MATERIAL_OPAQUE:
        {
            //Translucent material
            cmdbuf->SetBlendState(&opaqueBlend);
        }
        break;
    default:
        DEMOAssert( false && "Unexpected material type!" );
        break;
    }
}

//
// Render the model data
//
//
static void DrawModel(nn::gfx::CommandBuffer* cmdbuf, SimpleModelLeaf& model)
{
    DEMOGfxDebugTagIndent( "DrawModel" );

    DemoModelData* pModelData = &model.g_ModelData;


    int currentMaterialIndex = -1;

    for ( int i = 0; i < pModelData->header.meshCount; i++ )
    {
        const DemoMeshData* pMesh = &pModelData->meshes[i];

        // Set material
        if (currentMaterialIndex != pMesh->materialIndex)
        {
            currentMaterialIndex  = pMesh->materialIndex;

            SetMaterial(cmdbuf, model, &pModelData->materials[pMesh->materialIndex]);
        }

        nn::gfx::GpuAddress vertexAddress = pModelData->attributeBuffer.buffer.gpuAddress;
        vertexAddress.Offset( pMesh->attributeOffset );
        cmdbuf->SetVertexBuffer( 0, vertexAddress, pMesh->attributeStride, pModelData->attributeBuffer.size - pMesh->attributeOffset );

        // Draw Triangle.
        nn::gfx::GpuAddress indexAddress =  pModelData->indexBuffer.buffer.gpuAddress;
        indexAddress.Offset( pMesh->indexOffset );
        cmdbuf->DrawIndexed(nn::gfx::PrimitiveTopology_TriangleList, nn::gfx::IndexFormat_Uint16,
            indexAddress, pMesh->indexCount, 0);
    }

    DEMOGfxDebugTagUndent();
}


} // namespace SimpleModelLeaf

// ----- Surface Information

#define SURFACE_WIDTH  ( DEMOColorBufferInfo.GetWidth() )
#define SURFACE_HEIGHT  ( DEMOColorBufferInfo.GetHeight() )

// Texture size for calculations
static const int TEX_SIZE = 256;

// ----- GX2 Shader information
static const char* const LEAF_SHADER_FILE[] =
{
    "shaders/leaf/leafInstance",
    "shaders/leaf/leafInstanceHlslcc",
};

static const char* const COLOR_FILE = "textures/leaf/leaf";
static const char* const HEIGHTMAP_FILE = "textures/leaf/groundHeightMap";

// Pipeline for rendering leaves
static DEMOGfxPipeline s_leafPipeline;

// ----- Texture
static DEMOGfxTexture s_colorTex;
static DEMOGfxTexture s_groundTex;

// Locations
static int s_posLoc;
static int s_texLoc;
static int s_insLoc;
static int s_instancingTetraVtxXLoc;
static int s_instancingTetraVtxYLoc;
static int s_instancingTetraVtxZLoc;
static int s_baseMapLoc;

// Constant Buffer Objects
static int s_ubInpLoc;

struct UbInp {
    Mtx44   modelMtx44;
    Mtx44   viewMtx44;
    Mtx44   projMtx44;
};

DEMOGfxBuffer s_leafUniformBuffer[ 2 ];

static const int NUM_TREE = 16;
static DEMOGfxBuffer s_ubBufferTree[ NUM_TREE ][ 2 ];

static DEMOGfxBuffer s_ubBufferTerrain[ 2 ];

// Matrices
static Mtx44 s_modelMtx44;
static Mtx44 s_viewMtx44;
static Mtx44 s_projMtx44;

//---------------------------------------------------------------------------*
//  Model Data
//---------------------------------------------------------------------------*/

static int MODEL_INDEX_COUNT = 6;

static const int MODEL_VTX_STRIDE = (sizeof(float) * 3);
static const int MODEL_TEX_STRIDE = (sizeof(float) * 2);
static const int MODEL_INS_STRIDE = (sizeof(float) * 2);

static const int MODEL_OFFSET = 0;

static int s_numInstance;

// Pointers to GPU buffers
static DEMOGfxBuffer s_posBuffer;
static DEMOGfxBuffer s_texBuffer;
static DEMOGfxBuffer s_insBuffer;
static DEMOGfxBuffer s_idxBuffer;

// Simple Model
static SimpleModelLeaf::SimpleModelLeaf s_modelTree;
static SimpleModelLeaf::SimpleModelLeaf s_modelTerrain;

// Model Position Offset
static float s_offsetX[NUM_TREE];
static float s_offsetY[NUM_TREE];
static float s_offsetZ[NUM_TREE];
static float s_scaleX;
static float s_scaleY;
static float s_scaleZ;

// LegacyGPGPULeaf Buffers
static LegacyGPGPULeaf::Texture2D s_texInit0;
static LegacyGPGPULeaf::Texture2D s_texInitG;
static LegacyGPGPULeaf::Texture2D s_texInitX;
static LegacyGPGPULeaf::Texture2D s_texInitY;
static LegacyGPGPULeaf::Texture2D s_texInitZ;
static LegacyGPGPULeaf::Texture2D s_texStartTime;

static LegacyGPGPULeaf::RenderTarget s_texVtxX[3];
static LegacyGPGPULeaf::RenderTarget s_texVtxY[3];
static LegacyGPGPULeaf::RenderTarget s_texVtxZ[3];
static LegacyGPGPULeaf::RenderTarget s_texForceX;
static LegacyGPGPULeaf::RenderTarget s_texForceY;
static LegacyGPGPULeaf::RenderTarget s_texForceZ;

static LegacyGPGPULeaf::Pass s_verletPass;
static LegacyGPGPULeaf::Pass s_forcePass;
static LegacyGPGPULeaf::Pass s_initPass;

// Shader Resource locations
static int s_initPassSrcTexLoc = -1;
static int s_leafCurrVtxXLoc = -1;
static int s_leafCurrVtxYLoc = -1;
static int s_leafCurrVtxZLoc = -1;
static int s_leafPrevVtxXLoc = -1;
static int s_leafPrevVtxYLoc = -1;
static int s_leafPrevVtxZLoc = -1;
static int s_leafForceXLoc = -1;
static int s_leafForceYLoc = -1;
static int s_leafForceZLoc = -1;
static int s_leafStartTimeLoc = -1;
static int s_leafDepthMapLoc = -1;
static int s_leafForcePassCurrVtxXLoc = -1;
static int s_leafForcePassCurrVtxYLoc = -1;
static int s_leafForcePassCurrVtxZLoc = -1;
static int s_leafForcePassNextVtxXLoc = -1;
static int s_leafForcePassNextVtxYLoc = -1;
static int s_leafForcePassNextVtxZLoc = -1;
static int s_leafForcePassStartTimeLoc = -1;

// Program States
static int s_Curr = 0;
static int s_Prev = 1;
static int s_Next = 2;
static bool s_doneInit = false;

static bool s_firstFrame;
static int s_frameCount = 0;
static OSTime s_lastTime = 0;

static LeafState g_State;

static Vec     s_up = {0.0f,  1.0f, 0.0f};
static Vec  s_objPt = {0.0f, 900.0f, 200.0f};
static Vec s_camLoc = {-2000.0f, 200.0f, -400.0f};

// Prototype
static void InitCamera(Mtx44 resultProjMtx44, Mtx44 resultViewMtx44);
static void InitScene();
static void DrawScene();
static void RunSimulationShader();
static void InitSimulationShader();
static void ProcessPad();
static void CreateLeafSimulationDataAndShaders();

// Init function for setting projection matrix
static void InitCamera(Mtx44 resultProjMtx44, Mtx44 resultViewMtx44)
{
    // Row major matrices
    Mtx   lookAtMtx34;

    float   pers = 60.0f;
    float aspect = static_cast< float >( SURFACE_WIDTH ) / static_cast< float >( SURFACE_HEIGHT );
    float  znear = 10.0f;
    float   zfar = 100000.0f;

    // Compute perspective matrix
    MTXPerspective(resultProjMtx44, pers, aspect, znear, zfar);

    // Compute lookAt matrix
    MTXLookAt(lookAtMtx34, &s_camLoc, &s_up, &s_objPt);
    MTX34To44(lookAtMtx34, resultViewMtx44);
}

static void SetupLeafSimulationInputLocations()
{
    s_initPassSrcTexLoc = s_initPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_sourceTexture" );
    s_leafCurrVtxXLoc = s_verletPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafCurrVtxX" );
    s_leafCurrVtxYLoc = s_verletPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafCurrVtxY");
    s_leafCurrVtxZLoc = s_verletPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafCurrVtxZ");
    s_leafPrevVtxXLoc = s_verletPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafPrevVtxX");
    s_leafPrevVtxYLoc = s_verletPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafPrevVtxY");
    s_leafPrevVtxZLoc = s_verletPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafPrevVtxZ");
    s_leafForceXLoc = s_verletPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafForceX");
    s_leafForceYLoc = s_verletPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafForceY");
    s_leafForceZLoc = s_verletPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafForceZ");
    s_leafStartTimeLoc = s_verletPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafStartTime");
    s_leafDepthMapLoc = s_verletPass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafDepthMap");
    s_leafForcePassCurrVtxXLoc = s_forcePass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafCurrVtxX" );
    s_leafForcePassCurrVtxYLoc = s_forcePass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafCurrVtxY" );
    s_leafForcePassCurrVtxZLoc = s_forcePass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafCurrVtxZ" );
    s_leafForcePassNextVtxXLoc = s_forcePass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafNextVtxX" );
    s_leafForcePassNextVtxYLoc = s_forcePass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafNextVtxY" );
    s_leafForcePassNextVtxZLoc = s_forcePass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafNextVtxZ" );
    s_leafForcePassStartTimeLoc = s_forcePass.pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel,  nn::gfx::ShaderInterfaceType_Sampler, "s_leafStartTime" );
}

static void SetupLeafSimulationPasses()
{
    // RenderTarget Setup
    for ( int i = 0; i < 3; ++i )
    {
        LegacyGPGPULeaf::InitRenderTarget(s_texVtxX[i], TEX_SIZE, TEX_SIZE,
            nn::gfx::ImageFormat_R32_G32_B32_A32_Float, LegacyGPGPULeaf::POINT);
        LegacyGPGPULeaf::InitRenderTarget(s_texVtxY[i], TEX_SIZE, TEX_SIZE,
            nn::gfx::ImageFormat_R32_G32_B32_A32_Float, LegacyGPGPULeaf::POINT);
        LegacyGPGPULeaf::InitRenderTarget(s_texVtxZ[i], TEX_SIZE, TEX_SIZE,
            nn::gfx::ImageFormat_R32_G32_B32_A32_Float, LegacyGPGPULeaf::POINT);
    }
    LegacyGPGPULeaf::InitRenderTarget(s_texForceX, TEX_SIZE, TEX_SIZE,
        nn::gfx::ImageFormat_R32_G32_B32_A32_Float, LegacyGPGPULeaf::POINT);
    LegacyGPGPULeaf::InitRenderTarget(s_texForceY, TEX_SIZE, TEX_SIZE,
        nn::gfx::ImageFormat_R32_G32_B32_A32_Float, LegacyGPGPULeaf::POINT);
    LegacyGPGPULeaf::InitRenderTarget(s_texForceZ, TEX_SIZE, TEX_SIZE,
        nn::gfx::ImageFormat_R32_G32_B32_A32_Float, LegacyGPGPULeaf::POINT);

    nn::gfx::ImageFormat initFormats[] = { nn::gfx::ImageFormat_R32_G32_B32_A32_Float };
    nn::gfx::ImageFormat simulationFormats[] = { nn::gfx::ImageFormat_R32_G32_B32_A32_Float,
        nn::gfx::ImageFormat_R32_G32_B32_A32_Float,
        nn::gfx::ImageFormat_R32_G32_B32_A32_Float };

    // Legacy GPGPU Simulation Shader Setup
    LegacyGPGPULeaf::InitPass(s_initPass, COPY_TEXTURE_SHADERS[g_ShaderFileIdx], TEX_SIZE, TEX_SIZE, 1, initFormats);
    LegacyGPGPULeaf::InitPass(s_verletPass, LEAF_SIM_VERLET_SHADERS[g_ShaderFileIdx], TEX_SIZE, TEX_SIZE, 3, simulationFormats);
    LegacyGPGPULeaf::InitPass(s_forcePass, LEAF_SIM_FORCE_SHADERS[g_ShaderFileIdx], TEX_SIZE, TEX_SIZE, 3, simulationFormats);
}

static void InitLeafStartTime(int size)
{
    // Falling Leaf start timer
    int startTime = 0;
    float* initStartTime = reinterpret_cast< float* >( DEMOAlloc( TEX_SIZE * TEX_SIZE * 2 * sizeof( float ) ) );
    for ( int i = 0; i < size / 4; ++i )
    {
        if ( 4 * i + 3 >= size - 1 )
        {
            break;
        }

        for ( int j = 0; j < NUM_TREE; ++j )
        {
            initStartTime[ NUM_TREE * i + j ] = static_cast< float >( ++startTime );
        }
    }
    LegacyGPGPULeaf::InitTexture2D(s_texStartTime, TEX_SIZE, TEX_SIZE,
        nn::gfx::ImageFormat_R32_Float, LegacyGPGPULeaf::POINT, initStartTime);
    DEMOFree(initStartTime);

}

static void CreateLeafSimulationDataAndShaders()
{
    void * modelBuf;
    u32 modelLen[2];

    // Initial layout data for each Leaf (enough for one tree)
    modelBuf = DEMOGfxLoadAssetFile(LEAF_DATA_FILE, &modelLen[0]);
    int size = modelLen[0] / sizeof(DEMO_F32x3);
    DEMO_F32x3* initPos = reinterpret_cast< DEMO_F32x3* >( DEMOAlloc(modelLen[0]) );
    memcpy(initPos, modelBuf, modelLen[0]);
#if !NN_GFX_IS_TARGET_GX
    // swap data
    DEMOSwapBuffer32(initPos, modelLen[0]);
#endif
    DEMOFree(modelBuf);

    // Total Leaf count (number of instances)
    s_numInstance = size / 4 * NUM_TREE;
    if ( s_numInstance > TEX_SIZE * TEX_SIZE )
    {
        s_numInstance = TEX_SIZE * TEX_SIZE;
    }

    // Tree layout
    const int split = 4;
    for ( int i = 0; i < split; ++i )
    {
        for ( int j = 0; j < split; ++j )
        {
            if ( split * i + j >= NUM_TREE )
            {
                continue;
            }

            float x = -1000.0f + 750.0f * i;
            float z = -1000.0f + 750.0f * j;
            s_offsetX[ split * i + j ] = x;
            s_offsetY[ split * i + j ] = 0.0f;
            s_offsetZ[ split * i + j ] = z;
        }
    }
    s_scaleX = 2.0f;
    s_scaleY = 2.0f;
    s_scaleZ = 2.0f;

    // Convert initial coordinates for simulation vertices (4 vertices per Leaf) from SOA format to textures
    DEMO_F32x4* init = reinterpret_cast< DEMO_F32x4* >( DEMOAlloc( TEX_SIZE * TEX_SIZE * 2 * sizeof( DEMO_F32x4 ) ) );
    // 0
    for ( int i = 0; i < TEX_SIZE * TEX_SIZE; ++i )
    {
        init[i].u.v.x = 0.0f;
        init[i].u.v.y = 0.0f;
        init[i].u.v.z = 0.0f;
        init[i].u.v.w = 0.0f;
    }
    LegacyGPGPULeaf::InitTexture2D(s_texInit0, TEX_SIZE, TEX_SIZE,
        nn::gfx::ImageFormat_R32_G32_B32_A32_Float, LegacyGPGPULeaf::POINT, init);

    // Gravity
    for ( int i = 0; i < size / 4; ++i )
    {
        if ( 4 * i + 3 >= size - 1 )
        {
            break;
        }
        for ( int j = 0; j < NUM_TREE; ++j )
        {
            init[ NUM_TREE * i + j ].u.v.x = -9.8f;
            init[ NUM_TREE * i + j ].u.v.y = -9.8f;
            init[ NUM_TREE * i + j ].u.v.z = -9.8f;
            init[ NUM_TREE * i + j ].u.v.w = -9.8f;
        }
    }
    LegacyGPGPULeaf::InitTexture2D(s_texInitG, TEX_SIZE, TEX_SIZE,
        nn::gfx::ImageFormat_R32_G32_B32_A32_Float, LegacyGPGPULeaf::POINT, init);

    // Position X
    for ( int i = 0; i < size / 4; ++i )
    {
        if ( 4 * i + 3 >= size - 1 )
        {
            break;
        }
        for ( int j = 0; j < NUM_TREE; ++j )
        {
            init[ NUM_TREE * i + j ].u.v.x = initPos[ 4 * i + 0 ].u.v.x + s_offsetX[ j ];
            init[ NUM_TREE * i + j ].u.v.y = initPos[ 4 * i + 1 ].u.v.x + s_offsetX[ j ];
            init[ NUM_TREE * i + j ].u.v.z = initPos[ 4 * i + 2 ].u.v.x + s_offsetX[ j ];
            init[ NUM_TREE * i + j ].u.v.w = initPos[ 4 * i + 3 ].u.v.x + s_offsetX[ j ];
        }

    }
    LegacyGPGPULeaf::InitTexture2D(s_texInitX, TEX_SIZE, TEX_SIZE,
        nn::gfx::ImageFormat_R32_G32_B32_A32_Float , LegacyGPGPULeaf::POINT, init);

    // Position Y
    for ( int i = 0; i < size / 4; ++i )
    {
        if ( 4 * i + 3 >= size - 1 )
        {
            break;
        }
        for ( int j = 0; j < NUM_TREE; ++j )
        {
            init[ NUM_TREE * i + j ].u.v.x = initPos[ 4 * i + 0 ].u.v.y + s_offsetY[ j ];
            init[ NUM_TREE * i + j ].u.v.y = initPos[ 4 * i + 1 ].u.v.y + s_offsetY[ j ];
            init[ NUM_TREE * i + j ].u.v.z = initPos[ 4 * i + 2 ].u.v.y + s_offsetY[ j ];
            init[ NUM_TREE * i + j ].u.v.w = initPos[ 4 * i + 3 ].u.v.y + s_offsetY[ j ];
        }

    }
    LegacyGPGPULeaf::InitTexture2D(s_texInitY, TEX_SIZE, TEX_SIZE,
        nn::gfx::ImageFormat_R32_G32_B32_A32_Float, LegacyGPGPULeaf::POINT, init);

    // Position Z
    for ( int i = 0; i < size / 4; ++i )
    {
        if ( 4 * i + 3 >= size - 1 )
        {
            break;
        }
        for ( int j = 0; j < NUM_TREE; ++j )
        {
            init[ NUM_TREE * i + j ].u.v.x = initPos[ 4 * i + 0 ].u.v.z + s_offsetZ[ j ];
            init[ NUM_TREE * i + j ].u.v.y = initPos[ 4 * i + 1 ].u.v.z + s_offsetZ[ j ];
            init[ NUM_TREE * i + j ].u.v.z = initPos[ 4 * i + 2 ].u.v.z + s_offsetZ[ j ];
            init[ NUM_TREE * i + j ].u.v.w = initPos[ 4 * i + 3 ].u.v.z + s_offsetZ[ j ];
        }

    }
    LegacyGPGPULeaf::InitTexture2D(s_texInitZ, TEX_SIZE, TEX_SIZE,
        nn::gfx::ImageFormat_R32_G32_B32_A32_Float , LegacyGPGPULeaf::POINT, init);

    DEMOFree(init);
    DEMOFree(initPos);

    InitLeafStartTime(size);

    SetupLeafSimulationPasses();

    SetupLeafSimulationInputLocations();

} // NOLINT(impl/function_size)

static void InitTextures()
{
    bool fOK;

    fOK = s_colorTex.Initialize( COLOR_FILE );
    ASSERT(fOK && "Unable to load color texture file");
    fOK = s_groundTex.Initialize( HEIGHTMAP_FILE );
    ASSERT(fOK && "Unable to load heightmap file");
}

// Initialize a pipeline object
static void InitPipeline(DEMOGfxPipeline* pipeline, bool blendEnable)
{
    nn::gfx::Pipeline::InfoType info;

    pipeline->SetDefaults();

    pipeline->rasterizerStateInfo.SetScissorEnabled(true);

    pipeline->depthStencilStateInfo.SetDepthTestEnabled(true);
    pipeline->depthStencilStateInfo.SetDepthWriteEnabled(true);
    pipeline->depthStencilStateInfo.SetDepthComparisonFunction( nn::gfx::ComparisonFunction_Less );

    pipeline->blendTargetStateCount = 1;
    pipeline->colorTargetStateCount = 1;

    pipeline->colorTargetStateInfoArray[ 0 ].SetDefault();
    pipeline->colorTargetStateInfoArray[ 0 ].SetFormat( DEMOColorBufferInfo.GetImageFormat() );

    pipeline->blendTargetStateInfoArray[0].SetDefault();
    pipeline->blendTargetStateInfoArray[0].SetBlendEnabled(blendEnable);
    pipeline->blendTargetStateInfoArray[0].SetColorBlendFunction(nn::gfx::BlendFunction_Add);
    pipeline->blendTargetStateInfoArray[0].SetSourceColorBlendFactor(nn::gfx::BlendFactor_One);
    pipeline->blendTargetStateInfoArray[0].SetDestinationColorBlendFactor(nn::gfx::BlendFactor_OneMinusSourceAlpha);
    pipeline->blendTargetStateInfoArray[0].SetAlphaBlendFunction(nn::gfx::BlendFunction_Add);
    pipeline->blendTargetStateInfoArray[0].SetSourceAlphaBlendFactor(nn::gfx::BlendFactor_One);
    pipeline->blendTargetStateInfoArray[0].SetDestinationAlphaBlendFactor(nn::gfx::BlendFactor_OneMinusSourceAlpha);

    pipeline->Initialize( &DEMODevice );
}

static void SetupLeafInstancingData()
{
    //
    // Leaf Instancing Setup
    //
    DEMOGfxLoadShadersFromFile(&s_leafPipeline.shaders, 0, LEAF_SHADER_FILE[g_ShaderFileIdx]);
    // Uniform Location Lookup
    s_ubInpLoc = s_leafPipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_ConstantBuffer, "ub_inp");

    // Attribute Location Lookup
    s_posLoc = s_leafPipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_Input, "a_position");
    s_texLoc = s_leafPipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_Input, "a_texCoord");
    s_insLoc = s_leafPipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_Input, "a_instanceCoord");

    s_instancingTetraVtxXLoc = s_leafPipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_Sampler, "s_instancingTetraVtxX");
    s_instancingTetraVtxYLoc = s_leafPipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_Sampler, "s_instancingTetraVtxY");
    s_instancingTetraVtxZLoc = s_leafPipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_Sampler, "s_instancingTetraVtxZ");
    s_baseMapLoc = s_leafPipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_Sampler, "s_basemap");

    // position setup
    DEMOGfxInitShaderAttribute(&s_leafPipeline.shaders, "a_position", s_posLoc, MODEL_OFFSET * sizeof(float), nn::gfx::AttributeFormat_32_32_32_Float);
    DEMOGfxInitShaderAttribute(&s_leafPipeline.shaders, "a_texCoord", s_texLoc, MODEL_OFFSET * sizeof(float), nn::gfx::AttributeFormat_32_32_Float);
    DEMOGfxInitShaderAttribute(&s_leafPipeline.shaders, "a_instanceCoord", s_insLoc, MODEL_OFFSET * sizeof(float), nn::gfx::AttributeFormat_32_32_Float);
    DEMOGfxInitShaderVertexBuffer(&s_leafPipeline.shaders, s_posLoc, sizeof(float) * 3, 0);
    DEMOGfxInitShaderVertexBuffer(&s_leafPipeline.shaders, s_texLoc, sizeof(float) * 2, 0);
    DEMOGfxInitShaderVertexBuffer(&s_leafPipeline.shaders, s_insLoc, sizeof(float) * 2, 1); // instanced

    // position buffer
    // 4 vertices quad
    float posBuf[ 3 * 4 ];
    float scale = 30.0f;
    posBuf[0] = 0.5f * scale;
    posBuf[1] = 0.1f * scale;
    posBuf[2] = 0.5f * scale;
    posBuf[3] = 0.5f * scale;
    posBuf[4] = 0.1f * scale;
    posBuf[5] = -0.5f * scale;
    posBuf[6] = -0.5f * scale;
    posBuf[7] = 0.0f * scale;
    posBuf[8] = -0.5f * scale;
    posBuf[9] = -0.5f * scale;
    posBuf[10] = 0.0f * scale;
    posBuf[11] = 0.5f * scale;
    s_posBuffer.Initialize( MODEL_VTX_STRIDE * 4, posBuf, nn::gfx::GpuAccess_VertexBuffer, 0);

    // tex coord buffer
    // 4 vertices quad
    float texBuf[ 4 * 2 ];
    texBuf[0] = 1.0f;
    texBuf[1] = 0.0f;
    texBuf[2] = 1.0f;
    texBuf[3] = 1.0f;
    texBuf[4] = 0.0f;
    texBuf[5] = 1.0f;
    texBuf[6] = 0.0f;
    texBuf[7] = 0.0f;
    s_texBuffer.Initialize(MODEL_TEX_STRIDE * 4, texBuf, nn::gfx::GpuAccess_VertexBuffer | nn::gfx::GpuAccess_Read, 0 );

    // instance matrix texture coord
    float* pInsBuf = new float[ MODEL_INS_STRIDE * s_numInstance / sizeof( float ) ];
    for (int i = 0; i < s_numInstance; ++i)
    {
        float w = TEX_SIZE;
        float h = TEX_SIZE;
        float idx = (float)i;
        float v = floor(idx / w);
        float u = idx - (w*v);
        pInsBuf[2 * i + 0] = (u + 0.25f) / w;
        pInsBuf[2 * i + 1] = (v + 0.25f) / h;
    }
    s_insBuffer.Initialize( MODEL_INS_STRIDE * s_numInstance, pInsBuf, nn::gfx::GpuAccess_VertexBuffer | nn::gfx::GpuAccess_Read, 0 );
    delete[] pInsBuf;

    // index buffer setup
    // 2 triangles

    // For performance reasons we create the index buffer so that it is only visible by the GPU.
    size_t indexSize = sizeof(u16) * MODEL_INDEX_COUNT;
    size_t indexAlign;
    {
        nn::gfx::Buffer::InfoType bufferInfo;

        bufferInfo.SetDefault();
        bufferInfo.SetGpuAccessFlags(nn::gfx::GpuAccess_IndexBuffer);
        bufferInfo.SetSize(indexSize);
        indexAlign = nn::gfx::Buffer::GetBufferAlignment(&DEMODevice, bufferInfo);
    }
    {
        // Align up the memory for any memory pool restrictions
        nn::gfx::MemoryPool::InfoType info;
        info.SetDefault();
        info.SetMemoryPoolProperty( nn::gfx::MemoryPoolProperty_CpuInvisible | nn::gfx::MemoryPoolProperty_GpuCached );
        indexAlign = std::max( indexAlign, nn::gfx::MemoryPool::GetPoolMemoryAlignment( &DEMODevice, info ) );
        indexSize = std::max( indexSize, nn::gfx::MemoryPool::GetPoolMemorySizeGranularity( &DEMODevice, info ) );
    }
    u16* indexMem = static_cast<u16*>(DEMOGfxAllocMEM2(indexSize, indexAlign));
    indexMem[0] = 0;
    indexMem[1] = 3;
    indexMem[2] = 1;
    indexMem[3] = 1;
    indexMem[4] = 3;
    indexMem[5] = 2;

    DEMOGfxMemPool* pPool = DEMOGfxMemPool::CreateFromMemory(indexMem, indexSize, nn::gfx::MemoryPoolProperty_CpuInvisible | nn::gfx::MemoryPoolProperty_GpuCached);
    s_idxBuffer.CreateFromPool(pPool, indexSize, nn::gfx::GpuAccess_IndexBuffer);
}

// The init function for the rendering portions of this app
static void InitScene()
{
    SimpleModelLeaf::Init(s_modelTree, TREE_MODEL_FILE);
    SimpleModelLeaf::Init(s_modelTerrain, GROUND_MODEL_FILE);
    SimpleModelLeaf::InitBlendStates();

    LegacyGPGPULeaf::Init();

    CreateLeafSimulationDataAndShaders();

    DEMOGfxSetViewportScissorState( &s_fullViewport, &s_fullViewportMem,
        0.0f, 0.0f,
        static_cast< float >( SURFACE_WIDTH ),
        static_cast< float >( SURFACE_HEIGHT ),
        0.0f, 1.0f,
        static_cast< float >( SURFACE_HEIGHT ), false);

    SetupLeafInstancingData();

    InitTextures();

    // Setup the uniform buffer for the leaves
    s_leafUniformBuffer[ 0 ].Initialize( sizeof( UbInp ), NULL, nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Read, 0 );
    s_leafUniformBuffer[ 1 ].Initialize( sizeof( UbInp ), NULL, nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Read, 0 );

    for (int i = 0; i < NUM_TREE; i++)
    {
        s_ubBufferTree[i][0].Initialize( sizeof(UbInp), NULL, nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Read, 0);
        s_ubBufferTree[i][1].Initialize( sizeof(UbInp), NULL, nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Read, 0);
    }

    s_ubBufferTerrain[0].Initialize( sizeof(UbInp), NULL, nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Read, 0);
    s_ubBufferTerrain[1].Initialize( sizeof(UbInp), NULL, nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Read, 0);

    // Init State
    g_State.wind[0] = 0.0f;
    g_State.wind[1] = 1.0f;
    g_State.wind[2] = 0.0f;
    g_State.wind[3] = 0.0f;
    g_State.leaf_counter = 0.0f;

    // Set up pipeline here
    InitPipeline(&s_leafPipeline, false);
}

static void RunSimulationShader()
{
    {
        LegacyGPGPULeaf::RenderTarget* rt[] = { &s_texVtxX[s_Next], &s_texVtxY[s_Next], &s_texVtxZ[s_Next] };
        LegacyGPGPULeaf::ClearPass(s_verletPass, rt, 3);
        LegacyGPGPULeaf::SetInputRenderTarget(s_verletPass, s_texVtxX[s_Curr], "s_leafCurrVtxX", s_leafCurrVtxXLoc );
        LegacyGPGPULeaf::SetInputRenderTarget(s_verletPass, s_texVtxY[s_Curr], "s_leafCurrVtxY", s_leafCurrVtxYLoc );
        LegacyGPGPULeaf::SetInputRenderTarget(s_verletPass, s_texVtxZ[s_Curr], "s_leafCurrVtxZ", s_leafCurrVtxZLoc );
        LegacyGPGPULeaf::SetInputRenderTarget(s_verletPass, s_texVtxX[s_Prev], "s_leafPrevVtxX", s_leafPrevVtxXLoc );
        LegacyGPGPULeaf::SetInputRenderTarget(s_verletPass, s_texVtxY[s_Prev], "s_leafPrevVtxY", s_leafPrevVtxYLoc );
        LegacyGPGPULeaf::SetInputRenderTarget(s_verletPass, s_texVtxZ[s_Prev], "s_leafPrevVtxZ", s_leafPrevVtxZLoc );
        LegacyGPGPULeaf::SetInputRenderTarget(s_verletPass, s_texForceX, "s_leafForceX", s_leafForceXLoc );
        LegacyGPGPULeaf::SetInputRenderTarget(s_verletPass, s_texForceY, "s_leafForceY", s_leafForceYLoc );
        LegacyGPGPULeaf::SetInputRenderTarget(s_verletPass, s_texForceZ, "s_leafForceZ", s_leafForceZLoc );
        LegacyGPGPULeaf::SetInputTexture2D(s_verletPass, s_texStartTime, "s_leafStartTime", s_leafStartTimeLoc );
        LegacyGPGPULeaf::SetInputTextureView(s_verletPass, s_groundTex.GetDescriptorSlot( 0 ),
            (LegacyGPGPULeaf::g_Common.mySamplerSlot[LegacyGPGPULeaf::LINEAR]), "s_leafDepthMap", s_leafDepthMapLoc);
        LegacyGPGPULeaf::SetInputUniform(s_verletPass, &g_State, sizeof(g_State));
        LegacyGPGPULeaf::ComputePass(s_verletPass, rt, 3);
    }
    {
        LegacyGPGPULeaf::RenderTarget* rt[] = { &s_texForceX, &s_texForceY, &s_texForceZ };
        LegacyGPGPULeaf::ClearPass(s_forcePass, rt, 3);
        LegacyGPGPULeaf::SetInputRenderTarget(s_forcePass, s_texVtxX[s_Curr], "s_leafCurrVtxX", s_leafForcePassCurrVtxXLoc);
        LegacyGPGPULeaf::SetInputRenderTarget(s_forcePass, s_texVtxY[s_Curr], "s_leafCurrVtxY", s_leafForcePassCurrVtxYLoc);
        LegacyGPGPULeaf::SetInputRenderTarget(s_forcePass, s_texVtxZ[s_Curr], "s_leafCurrVtxZ", s_leafForcePassCurrVtxZLoc);
        LegacyGPGPULeaf::SetInputRenderTarget(s_forcePass, s_texVtxX[s_Next], "s_leafNextVtxX", s_leafForcePassNextVtxXLoc);
        LegacyGPGPULeaf::SetInputRenderTarget(s_forcePass, s_texVtxY[s_Next], "s_leafNextVtxY", s_leafForcePassNextVtxYLoc);
        LegacyGPGPULeaf::SetInputRenderTarget(s_forcePass, s_texVtxZ[s_Next], "s_leafNextVtxZ", s_leafForcePassNextVtxZLoc);
        LegacyGPGPULeaf::SetInputTexture2D(s_forcePass, s_texStartTime, "s_leafStartTime", s_leafForcePassStartTimeLoc );
        LegacyGPGPULeaf::SetInputUniform(s_forcePass, &g_State, sizeof(g_State));
        LegacyGPGPULeaf::ComputePass(s_forcePass, rt, 3);
    }

#if NN_GFX_IS_TARGET_D3D
    DEMOCommandBuffer.End();
    DEMOQueue.ExecuteCommand(&DEMOCommandBuffer, NULL);
    DEMOCommandBuffer.Begin();
#endif

    // Swap Buffers
    int temp = s_Curr;
    s_Curr = s_Next;
    s_Next = s_Prev;
    s_Prev = temp;
}

static void InitSimulationShader()
{
    for ( int i = 0; i < 3; ++i )
    {
        {
            LegacyGPGPULeaf::SetInputTexture2D(s_initPass, s_texInitX, "s_sourceTexture", s_initPassSrcTexLoc);
            LegacyGPGPULeaf::RenderTarget* rt[] = { &s_texVtxX[i] };
            LegacyGPGPULeaf::ClearPass(s_initPass, rt, 1);
            LegacyGPGPULeaf::ComputePass(s_initPass, rt, 1);
        }
        {
            LegacyGPGPULeaf::SetInputTexture2D(s_initPass, s_texInitY, "s_sourceTexture", s_initPassSrcTexLoc);
            LegacyGPGPULeaf::RenderTarget* rt[] = { &s_texVtxY[i] };
            LegacyGPGPULeaf::ClearPass(s_initPass, rt, 1);
            LegacyGPGPULeaf::ComputePass(s_initPass, rt, 1);
        }
        {
            LegacyGPGPULeaf::SetInputTexture2D(s_initPass, s_texInitZ, "s_sourceTexture", s_initPassSrcTexLoc);
            LegacyGPGPULeaf::RenderTarget* rt[] = { &s_texVtxZ[i] };
            LegacyGPGPULeaf::ClearPass(s_initPass, rt, 1);
            LegacyGPGPULeaf::ComputePass(s_initPass, rt, 1);
        }
    }
    {
        LegacyGPGPULeaf::SetInputTexture2D(s_initPass, s_texInit0, "s_sourceTexture", s_initPassSrcTexLoc);
        LegacyGPGPULeaf::RenderTarget* rt[] = { &s_texForceX };
        LegacyGPGPULeaf::ClearPass(s_initPass, rt, 1);
        LegacyGPGPULeaf::ComputePass(s_initPass, rt, 1);
    }
    {
        LegacyGPGPULeaf::SetInputTexture2D(s_initPass, s_texInitG, "s_sourceTexture", s_initPassSrcTexLoc);
        LegacyGPGPULeaf::RenderTarget* rt[] = { &s_texForceY };
        LegacyGPGPULeaf::ClearPass(s_initPass, rt, 1);
        LegacyGPGPULeaf::ComputePass(s_initPass, rt, 1);
    }
    {
        LegacyGPGPULeaf::SetInputTexture2D(s_initPass, s_texInit0, "s_sourceTexture", s_initPassSrcTexLoc);
        LegacyGPGPULeaf::RenderTarget* rt[] = { &s_texForceZ };
        LegacyGPGPULeaf::ClearPass(s_initPass, rt, 1);
        LegacyGPGPULeaf::ComputePass(s_initPass, rt, 1);
    }
}

static void DrawLeaves()
{
    nn::gfx::CommandBuffer *cmdbuf = &DEMOCommandBuffer;

    DEMOGfxDebugTagIndent( "DrawLeaves" );

    cmdbuf->SetPipeline( &s_leafPipeline.pipeline );
    cmdbuf->InvalidateMemory( nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_VertexBuffer | nn::gfx::GpuAccess_IndexBuffer );

    // Bind vertex & color & instance buffer
    cmdbuf->SetVertexBuffer( s_posLoc, s_posBuffer.gpuAddress, MODEL_VTX_STRIDE, s_posBuffer.size );
    cmdbuf->SetVertexBuffer( s_texLoc, s_texBuffer.gpuAddress, MODEL_TEX_STRIDE, s_texBuffer.size );
    cmdbuf->SetVertexBuffer( s_insLoc, s_insBuffer.gpuAddress, MODEL_INS_STRIDE, s_insBuffer.size );

    // Set Leaf Position Textures
    cmdbuf->SetTextureAndSampler( s_instancingTetraVtxXLoc, nn::gfx::ShaderStage_Vertex, s_texVtxX[ s_Curr ].textureSlot, *s_texVtxX[ s_Curr ].samplerSlot );
    cmdbuf->SetTextureAndSampler( s_instancingTetraVtxYLoc, nn::gfx::ShaderStage_Vertex, s_texVtxY[ s_Curr ].textureSlot, *s_texVtxY[ s_Curr ].samplerSlot );
    cmdbuf->SetTextureAndSampler( s_instancingTetraVtxZLoc, nn::gfx::ShaderStage_Vertex, s_texVtxZ[ s_Curr ].textureSlot, *s_texVtxZ[ s_Curr ].samplerSlot );

    // Set Leaf Basemap Texture
    cmdbuf->SetTextureAndSampler( s_baseMapLoc, nn::gfx::ShaderStage_Pixel, s_colorTex.GetDescriptorSlot( 0 ), ( LegacyGPGPULeaf::g_Common.mySamplerSlot[ LegacyGPGPULeaf::LINEAR ] ) );

    // Update Model Matrix Uniform
    MTX44Identity( s_modelMtx44 );

    UbInp* ptr;
    ptr = s_leafUniformBuffer[ s_frameCount % 2 ].Map< UbInp >();

    // Update View Projection Matrix Uniforms
    memcpy( ptr->modelMtx44, s_modelMtx44, sizeof( s_modelMtx44 ) );
    memcpy( ptr->projMtx44, s_projMtx44, sizeof( s_projMtx44 ) );
    memcpy( ptr->viewMtx44, s_viewMtx44, sizeof( s_viewMtx44 ) );
#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap(ptr, sizeof(*ptr));
#endif
    s_leafUniformBuffer[ s_frameCount % 2 ].Unmap();
    cmdbuf->SetConstantBuffer( s_ubInpLoc, nn::gfx::ShaderStage_Vertex, s_leafUniformBuffer[ s_frameCount % 2 ].gpuAddress, s_leafUniformBuffer[ s_frameCount % 2 ].size );

    // Draw Leaves Using Instancing
    cmdbuf->DrawIndexed( nn::gfx::PrimitiveTopology_TriangleList, nn::gfx::IndexFormat_Uint16,
        s_idxBuffer.gpuAddress, MODEL_INDEX_COUNT, 0, s_numInstance, 0 );

    DEMOGfxDebugTagUndent();
}

static void DrawTrees()
{
    nn::gfx::CommandBuffer *cmdbuf = &DEMOCommandBuffer;

    DEMOGfxDebugTagIndent( "DrawTrees" );

    // Disable Alpha Test
    cmdbuf->SetBlendState( &SimpleModelLeaf::opaqueBlend );

    // Set shader
    cmdbuf->SetPipeline( &s_modelTree.g_pipeline.pipeline );
    cmdbuf->InvalidateMemory( nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_IndexBuffer | nn::gfx::GpuAccess_VertexBuffer );

    // Draw Trees
    Mtx44 treeModelMtx;
    for ( int tree = 0; tree < NUM_TREE; ++tree )
    {
        UbInp* ptr = s_ubBufferTree[ tree ][ s_frameCount % 2 ].Map< UbInp >();

        MTX44Copy( s_modelMtx44, treeModelMtx );
        treeModelMtx[ 0 ][ 3 ] += s_offsetX[ tree ];
        treeModelMtx[ 1 ][ 3 ] += s_offsetY[ tree ];
        treeModelMtx[ 2 ][ 3 ] += s_offsetZ[ tree ];
        treeModelMtx[ 0 ][ 0 ] *= s_scaleX;
        treeModelMtx[ 1 ][ 1 ] *= s_scaleY;
        treeModelMtx[ 2 ][ 2 ] *= s_scaleZ;

        // Update View Projection Matrix Uniforms
        memcpy( ptr->modelMtx44, treeModelMtx, sizeof( treeModelMtx ) );
        memcpy( ptr->projMtx44, s_projMtx44, sizeof( s_projMtx44 ) );
        memcpy( ptr->viewMtx44, s_viewMtx44, sizeof( s_viewMtx44 ) );
#if NN_GFX_IS_TARGET_GX
        GX2EndianSwap(ptr, sizeof(*ptr));
#endif
        s_ubBufferTree[ tree ][ s_frameCount % 2 ].Unmap();
        cmdbuf->SetConstantBuffer( s_ubInpLoc, nn::gfx::ShaderStage_Vertex, s_ubBufferTree[ tree ][ s_frameCount % 2 ].gpuAddress, s_ubBufferTree[ tree ][ s_frameCount % 2 ].size );

        SimpleModelLeaf::DrawModel( cmdbuf, s_modelTree );
    }

    DEMOGfxDebugTagUndent();
}

static void DrawTerrain()
{
    nn::gfx::CommandBuffer *cmdbuf = &DEMOCommandBuffer;

    DEMOGfxDebugTagIndent( "DrawTerrain" );

    // Draw Terrain
    Mtx44 treeModelMtx;
    MTX44Copy( s_modelMtx44, treeModelMtx );
    treeModelMtx[ 0 ][ 0 ] *= 2.0f;
    treeModelMtx[ 2 ][ 2 ] *= 2.0f;
    treeModelMtx[ 1 ][ 3 ] += -1.0f;

    UbInp* ptr = s_ubBufferTerrain[ s_frameCount % 2 ].Map< UbInp >();
    // Update View Projection Matrix Uniforms
    memcpy(ptr->modelMtx44, treeModelMtx, sizeof(treeModelMtx));
    memcpy(ptr->projMtx44, s_projMtx44, sizeof(s_projMtx44));
    memcpy(ptr->viewMtx44, s_viewMtx44, sizeof(s_viewMtx44));
#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap(ptr, sizeof(*ptr));
#endif
    s_ubBufferTerrain[ s_frameCount % 2 ].Unmap();
    cmdbuf->SetConstantBuffer( s_ubInpLoc, nn::gfx::ShaderStage_Vertex, s_ubBufferTerrain[ s_frameCount % 2 ].gpuAddress, s_ubBufferTerrain[ s_frameCount % 2 ].size );

    cmdbuf->SetPipeline(&s_modelTerrain.g_pipeline.pipeline);
    SimpleModelLeaf::DrawModel(cmdbuf, s_modelTerrain);

    DEMOGfxDebugTagUndent();
}

static void DrawPerformanceMetrics( float dtime )
{
    DEMOGfxDebugTagIndent( "PrintInfo" );
    if(s_firstFrame == false)
    {
        LegacyGPGPULeaf::GetTimestampResult();

        DEMOFontPrintf( 5, 2, "Leaf Simulation : %u leaves(instances) : %.1f fps\n",
            s_numInstance, 1.0f / std::max( 1.0e-7f, dtime ) );
        DEMOFontPrintf( 5, 3, "Active Leaves : %u\n",
            static_cast< unsigned int >( std::min( static_cast< float >( TEX_SIZE * TEX_SIZE ), g_State.leaf_counter ) ) );
        DEMOFontPrintf(5,4, "Simulation GPU Time : %0.f usec\n", LegacyGPGPULeaf::GetPerfBeginToMark());
        DEMOFontPrintf(5,5, "Rendering  GPU Time : %0.f usec\n", LegacyGPGPULeaf::GetPerfMarkToEnd());
        DEMOFontPrintf(5,7, "Reset (y)");
        DEMOFontPrintf(5,8, "Activate All (x)");
        DEMOFontPrintf(5,9, "Move Camera (L Stick/R Stick)");
        DEMOFontPrintf(5,10, "Wind (s_up/down/left/right)");
    }

    DEMOGfxDebugTagUndent();
}

// The draw function for the rendering portions of this app
static void DrawScene()
{
    nn::gfx::CommandBuffer *cmdbuf = &DEMOCommandBuffer;
    float dtime = OSTicksToMilliseconds( OSGetTime() - s_lastTime ) / 1000.0f;
    s_lastTime = OSGetTime();

    InitCamera( s_projMtx44, s_viewMtx44 );

    DEMOGfxBeforeRender();

    LegacyGPGPULeaf::CheckPerfBegin();

    if ( s_doneInit )
    {
        RunSimulationShader();
        g_State.leaf_counter += 5.0f;
    }
    else
    {
        InitSimulationShader();
        s_doneInit = true;
    }

    LegacyGPGPULeaf::CheckPerfMark();

    // Clear buffers
    nn::gfx::ColorTargetView* pCurrentScanBuffer = DEMOGetColorBufferView();
    cmdbuf->ClearColor( pCurrentScanBuffer, 0.2f, 0.65f, 0.9f, 1.0f, NULL );
    cmdbuf->ClearDepthStencil( &DEMODepthBufferView, 1.0f, 0, nn::gfx::DepthStencilClearMode_DepthStencil, NULL );

#if NN_GFX_IS_TARGET_GX
    GX2SetShaderMode( GX2_SHADER_MODE_UNIFORM_BLOCK );
#endif
    nn::gfx::ColorTargetView *rt[ 1 ];
    rt[ 0 ] = pCurrentScanBuffer;
    cmdbuf->SetRenderTargets( 1, rt, &DEMODepthBufferView );

    cmdbuf->SetViewportScissorState( &s_fullViewport );

    DrawLeaves();

    DrawTrees();

    DrawTerrain();

    // Draw Information after first frame
    LegacyGPGPULeaf::CheckPerfEnd();


    DrawPerformanceMetrics(dtime);

    DEMOGfxDoneRender();

    s_frameCount++;
}


static void ProcessPad()
{
    DEMOPadRead();
    u16 button = DEMOPadGetButton(0);

    {
        // L Stick translates the camera
        float sx = static_cast< float >( DEMOPadGetStickX( 0 ) ) / 255.0f;
        float sy = static_cast< float >( DEMOPadGetStickY( 0 ) ) / 255.0f;

        Vec vec, tempVec;
        tempVec.x =  sx * 100.0f;
        tempVec.y = 0.0f;
        tempVec.z = -sy * 100.0f;

        Mtx44 inv;
        MTX44Inverse(s_viewMtx44, inv);
        MTX44MultVecSR(inv, &tempVec, &vec);
        Vec tempCamLoc = s_camLoc;
        Vec tempObjPt = s_objPt;
        VECAdd(&vec, &tempCamLoc, &s_camLoc);
        VECAdd(&vec, &tempObjPt, &s_objPt);

        InitCamera(s_projMtx44, s_viewMtx44);
    }

    {
        // R Stick rotates the camera
        float sx = static_cast< float >( DEMOPadGetSubStickX( 0 ) ) / 255.0f;
        float sy = static_cast< float >( DEMOPadGetSubStickY( 0 ) ) / 255.0f;

        Vec eyev, tempEyev;
        VECSubtract(&s_objPt, &s_camLoc, &eyev);
        Vec wupv =  {0.0f, 1.0f, 0.0f};
        VECCrossProduct(&eyev, &s_up, &tempEyev);
        VECNormalize(&tempEyev, &eyev);

        Mtx44 rot,rot0,rot1;
        MTX44RotAxisRad(rot0, &eyev, MTXDegToRad( sy * 5.0f));
        MTX44RotAxisRad(rot1, &wupv, MTXDegToRad(-sx * 5.0f));

        MTX44Concat(rot0, rot1, rot);

        Vec camv, tempCamv;
        VECSubtract(&s_objPt, &s_camLoc, &tempCamv);
        MTX44MultVecSR(rot, &tempCamv, &camv);

        VECAdd(&camv, &s_camLoc, &s_objPt);
        Vec tempUp = s_up;
        MTX44MultVecSR(rot, &tempUp, &s_up);

        InitCamera(s_projMtx44, s_viewMtx44);
    }

    g_State.wind[3] = 0.0f;

    if(DEMO_PAD_BUTTON_X & button)
    {
        // Make them all fall
        g_State.leaf_counter = 10000000.0f;
    }

    if(DEMO_PAD_BUTTON_Y & button)
    {
        // Reset
        g_State.leaf_counter = 0.0f;
        s_doneInit = false;
    }

    if(DEMO_PAD_BUTTON_UP & button)
    {
        // Up Wind
        g_State.wind[0] = 0.0f;
        g_State.wind[1] = 1.0f;
        g_State.wind[2] = 0.0f;
        g_State.wind[3] = 3.0f;
    }

    if(DEMO_PAD_BUTTON_DOWN & button)
    {
        // Down Wind
        g_State.wind[0] = 0.0f;
        g_State.wind[1] = -1.0f;
        g_State.wind[2] = 0.0f;
        g_State.wind[3] = 3.0f;
    }

    if(DEMO_PAD_BUTTON_RIGHT & button)
    {
        // Right Wind
        Vec dir = {1,0,0};
        Vec tempDir;
        Mtx44 inv;
        MTX44Inverse(s_viewMtx44, inv);
        MTX44MultVecSR(inv, &dir, &tempDir);
        VECNormalize(&tempDir, &dir);
        g_State.wind[0] = dir.x;
        g_State.wind[1] = dir.y;
        g_State.wind[2] = dir.z;
        g_State.wind[3] = 3.0f;
    }

    if(DEMO_PAD_BUTTON_LEFT & button)
    {
        // Left Wind
        Vec dir = {-1,0,0};
        Vec tempDir;
        Mtx44 inv;
        MTX44Inverse(s_viewMtx44, inv);
        MTX44MultVecSR(inv, &dir, &tempDir);
        VECNormalize(&tempDir, &dir);
        g_State.wind[0] = dir.x;
        g_State.wind[1] = dir.y;
        g_State.wind[2] = dir.z;
        g_State.wind[3] = 3.0f;
    }
}

//extern "C" void nnMain()
TEST(GfxLeaf, Run)
{
    int argc = nnt::GetHostArgc();
    char** argv = nnt::GetHostArgv();

    DEMOInit();
    DEMOTestInit(argc, argv);
    DEMOGfxInit(argc, argv);
    DEMOFontInit();
    DEMOTestIsUseHlslccGlsl() ? g_ShaderFileIdx = 1 : g_ShaderFileIdx = 0;

    InitScene();
    s_firstFrame = true;

    while (DEMOIsRunning())
    {
        ProcessPad();
        DrawScene();

        s_firstFrame = false;
    }

    s_leafPipeline.Finalize(&DEMODevice);

    s_colorTex.Finalize();
    s_groundTex.Finalize();

    s_posBuffer.Finalize(&DEMODevice);
    s_texBuffer.Finalize(&DEMODevice);
    s_insBuffer.Finalize(&DEMODevice);
    s_idxBuffer.Finalize(&DEMODevice);
    s_leafUniformBuffer[0].Finalize();
    s_leafUniformBuffer[1].Finalize();
    s_ubBufferTerrain[0].Finalize();
    s_ubBufferTerrain[1].Finalize();

    for (int i = 0; i < NUM_TREE; i++)
    {
        s_ubBufferTree[ i ][ 0 ].Finalize();
        s_ubBufferTree[ i ][ 1 ].Finalize();
    }
    LegacyGPGPULeaf::FreePass(s_verletPass);
    LegacyGPGPULeaf::FreePass(s_forcePass);
    LegacyGPGPULeaf::FreePass(s_initPass);
    LegacyGPGPULeaf::FreeTexture2D(s_texInit0);
    LegacyGPGPULeaf::FreeTexture2D(s_texInitG);
    LegacyGPGPULeaf::FreeTexture2D(s_texInitX);
    LegacyGPGPULeaf::FreeTexture2D(s_texInitY);
    LegacyGPGPULeaf::FreeTexture2D(s_texInitZ);
    LegacyGPGPULeaf::FreeTexture2D(s_texStartTime);
    for ( int i = 0; i < 3; ++i )
    {
        LegacyGPGPULeaf::FreeRenderTarget(s_texVtxX[i]);
        LegacyGPGPULeaf::FreeRenderTarget(s_texVtxY[i]);
        LegacyGPGPULeaf::FreeRenderTarget(s_texVtxZ[i]);
    }
    LegacyGPGPULeaf::FreeRenderTarget(s_texForceX);
    LegacyGPGPULeaf::FreeRenderTarget(s_texForceY);
    LegacyGPGPULeaf::FreeRenderTarget(s_texForceZ);
    LegacyGPGPULeaf::Free();

    SimpleModelLeaf::Free(s_modelTree);
    SimpleModelLeaf::Free(s_modelTerrain);
    SimpleModelLeaf::opaqueBlend.Finalize(&DEMODevice);
    DEMOGfxFreeMEM2(SimpleModelLeaf::opaqueBlendMem);

    s_fullViewport.Finalize(&DEMODevice);
    DEMOFree(s_fullViewportMem);

    DEMOFontShutdown();
    DEMOTestShutdown();
    DEMOGfxShutdown();
    DEMOShutdown();

    SUCCEED();
}
