﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <cstdio>
#include <cstring>
#include <cmath>
#include <algorithm>

#include <gfx/demo.h>

#include <nnt.h>
#include <nnt/nnt_Argument.h>

#if NN_GFX_IS_TARGET_GX
#include <cafe/gx2ut.h>
#endif

#if NN_GFX_IS_TARGET_NVN
#include <nvn/nvn.h>
#include <nvn/nvn_FuncPtrInline.h>
#endif

#ifdef WIN32
#define snprintf _snprintf
#endif

////////////////////////////////////////////////////
//
// Assets data, types and interface for demos
//
////////////////////////////////////////////////////

#define SURFACE_WIDTH  (DEMOColorBufferInfo.GetWidth())
#define SURFACE_HEIGHT  (DEMOColorBufferInfo.GetHeight())

typedef struct _BufferData
{
    DEMOGfxBuffer buffer;
    size_t stride;
    size_t elementCount;
} BufferData;


typedef struct _UniformBlockGeometryShader
{
    Mtx44  wvpMtx;
} UniformBlockGeometryShader;

typedef struct _UniformBlockPixelShader
{
    Qtrn  alphaTestValue;
} UniformBlockPixelShader;

typedef struct _UniformBlockVertexShader
{
    Vec     CameraPosition;
    f32     __padding;
    f32     WindPhase;
    f32     FadeScale;
    f32     FadeOffset;
    f32     __padding2;
} UniformBlockVertexShader;

typedef struct _GrassUniforms
{
    int                         SamplerLocation;
    int                         UBPSLocation;
    int                         UBGSLocation;
    int                         UBVSLocation;
    BufferData  UBPS;
    BufferData  UBGS[2];
    BufferData  UBVS[2];
} GrassUniforms;


static int g_ShaderFileIdx = 0;
static const char *GRASS_SHADER_FILE[] =
{
    "shaders/gsGrass/grass",
    "shaders/gsGrass/grassHlslcc",
};
static const char *SIMPLE_SHADER_FILE[] =
{
    "shaders/gsGrass/transform",
    "shaders/gsGrass/transformHlslcc",
};

static const char *MODEL_FILE              = "geometries/gsGrass/ground.nmod";
static const char *GRASS_TEXTURE_FILENAME  = "textures/gsGrass/grass";
static const char *MODEL_TEXTURE_DIRECTORY = "textures/gsGrass";

static DEMOGfxPipeline g_ModelPipeline;
static nn::gfx::BlendState g_ModelBlendStates[ 2 ];
static void* g_ModelBlendStateData[ 2 ];

typedef struct _TransformUniformData
{
    Mtx44 u_modelMtx;
    Mtx44 u_viewMtx;
    Mtx44 u_projMtx;
} TransformUniformData;

static nn::gfx::ViewportScissorState g_viewportState;
static void* g_pViewportStateData;

static DemoModelData   g_ModelData;
static BufferData  g_TransformUniformData[2];
static int         g_transformMatrixLoc;
static int         g_samplerLoc;

static nn::gfx::Sampler  g_groundSampler;
static nn::gfx::Sampler  g_grassSampler;
static nn::gfx::DescriptorSlot  g_groundSamplerSlot;
static nn::gfx::DescriptorSlot  g_grassSamplerSlot;

static DEMOGfxPipeline   g_GrassPipeline;
static DEMOGfxTexture    g_GrassTexture;

static GrassUniforms    g_GrassUniforms;

static u32         g_FrameCount = 0;

static Vec         g_objPt = {0.0f, 0.0f, 0.0f};
static Vec         g_camUp = {0.0f, 1.0f, 0.0f};
static Vec         g_camLoc = {0.0f, 3.0f, -5.0f};
static Mtx44       g_vpMtx;
static Mtx44       g_projMtx;
static Mtx44       g_viewMtx;

static f32         g_grassLodBeginDistance = 5.f;
static f32         g_grassLodFadeOutRange = 100.f;
static f32         g_grassDensity = 0.5f;
static bool        g_viewWireframe = false;

static BufferData  g_GrassAttribData;

#if NN_GFX_IS_TARGET_GX
// Ring Buffer used for Geometry Shader
static GX2RBuffer  g_ringInBuffer, g_ringOutBuffer;
#endif

static bool s_firstFrame;
static f32 s_wind = 0.0f;

////////////////////////////////////////////////////
//
// Prototypes
//
////////////////////////////////////////////////////
static void UpdateGrassPipeline();
static void InitGrassPipeline( DEMOGfxPipeline *pPipeline, GrassUniforms *pGrassUniforms, const char* pFileNames );
static void FreeGrassShader( DEMOGfxPipeline *pPipeline, GrassUniforms *pGrassUniforms );

static void DrawGrass( DEMOGfxPipeline *pPipeline, BufferData *pAttribData, GrassUniforms *pGrassUniform );
static void UpdateGrass( GrassUniforms *pGrassUniforms );

static void DrawScene(f32 dt);


static void InitModelPipeline( DEMOGfxPipeline *pPipeline, nn::gfx::BlendState* pBlendStates,
    void** pMemory, const DemoAttributeData* attributes, s32 attributeCount );
static void SetMaterial( const DemoMaterialData* pMaterial, DEMOGfxTexture* pTextures );

////////////////////////////////////////////////////
//
// Functions
//
////////////////////////////////////////////////////

static void SetWireframeGrass()
{
    //Render wireframe
    g_GrassPipeline.rasterizerStateInfo.SetFillMode( nn::gfx::FillMode_Wireframe );
    g_GrassPipeline.blendTargetStateInfoArray[ 0 ].SetBlendEnabled( false );

    UniformBlockPixelShader *pUBPS =
        g_GrassUniforms.UBPS.buffer.Map< UniformBlockPixelShader >();

    // Turn off the alpha test (always passes)
    pUBPS->alphaTestValue.x = -1.0f;

#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap(pUBPS, sizeof(UniformBlockPixelShader));
#endif

    g_GrassUniforms.UBPS.buffer.Unmap();
}

static void SetNormalGrass()
{
    //Standard rendering
    g_GrassPipeline.rasterizerStateInfo.SetFillMode( nn::gfx::FillMode_Solid );
    g_GrassPipeline.blendTargetStateInfoArray[ 0 ].SetBlendEnabled( true );

    UniformBlockPixelShader *pUBPS =
        g_GrassUniforms.UBPS.buffer.Map< UniformBlockPixelShader >();

    pUBPS->alphaTestValue.x = 0.5f;

#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap(pUBPS, sizeof(UniformBlockPixelShader));
#endif

    g_GrassUniforms.UBPS.buffer.Unmap();
}

static void UpdateGrassPipeline()
{
    // Make sure everything is sync'd before changing the grass pipeline
    DEMOQueue.Sync();

    g_GrassPipeline.pipeline.Finalize( &DEMODevice );

    // Grass
    if (g_viewWireframe)
    {
        SetWireframeGrass();
    }
    else
    {
        SetNormalGrass();
    }

    g_GrassPipeline.Initialize( &DEMODevice );
}

// Initialize grass pipeline
static void InitGrassPipeline(DEMOGfxPipeline *pPipeline,
                            GrassUniforms *pGrassUniforms,
                            const char* pFileName)
{
    DEMOGfxShader* pShader = &pPipeline->shaders;
    u32 attribBuffer = 0;

    // Initialize the pipeline
    pPipeline->SetDefaults();

    DEMOGfxLoadShadersFromFile(pShader, 0, pFileName);

    DEMOGfxInitShaderAttribute(pShader,
                               "a_position",
                               attribBuffer,
                               0,
                               nn::gfx::AttributeFormat_32_32_32_32_Float);

    DEMOGfxInitShaderVertexBuffer( pShader, 0, sizeof( float ) * 4, 0 );

    //Get the uniform's position
    pGrassUniforms->UBVSLocation =
        pShader->GetInterfaceSlot( nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_ConstantBuffer, "ub_vsBlock");

    pGrassUniforms->UBGSLocation =
        pShader->GetInterfaceSlot( nn::gfx::ShaderStage_Geometry, nn::gfx::ShaderInterfaceType_ConstantBuffer, "ub_gsBlock");

    pGrassUniforms->UBPSLocation =
        pShader->GetInterfaceSlot( nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_ConstantBuffer, "ub_psBlock");

    pGrassUniforms->SamplerLocation  =
        pShader->GetInterfaceSlot( nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_Sampler, "s_texture");

    for (u32 i = 0;i < 2; ++i)
    {
        pGrassUniforms->UBGS[ i ].buffer.Initialize( sizeof( UniformBlockGeometryShader ), NULL, nn::gfx::GpuAccess_ConstantBuffer, 0 );
        pGrassUniforms->UBVS[ i ].buffer.Initialize( sizeof( UniformBlockVertexShader ), NULL, nn::gfx::GpuAccess_ConstantBuffer, 0 );
    }

    // Setup the constant uniform for alpha testing
    pGrassUniforms->UBPS.buffer.Initialize( sizeof( UniformBlockPixelShader ), NULL, nn::gfx::GpuAccess_ConstantBuffer, 0 );
    UniformBlockPixelShader* pUniformBlockPixelShader = pGrassUniforms->UBPS.buffer.Map< UniformBlockPixelShader >();
    pUniformBlockPixelShader->alphaTestValue.x = 0.5f;
#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap( pUniformBlockPixelShader, sizeof( UniformBlockPixelShader ) );
#endif
    pGrassUniforms->UBPS.buffer.Unmap();

    // Setup the blend targets
    g_GrassPipeline.blendTargetStateInfoArray[ 0 ].SetDefault();
    g_GrassPipeline.blendTargetStateInfoArray[ 0 ].SetBlendEnabled( true );
    g_GrassPipeline.blendTargetStateInfoArray[ 0 ].SetSourceColorBlendFactor( nn::gfx::BlendFactor_SourceAlpha );
    g_GrassPipeline.blendTargetStateInfoArray[ 0 ].SetSourceAlphaBlendFactor( nn::gfx::BlendFactor_SourceAlpha );
    g_GrassPipeline.blendTargetStateInfoArray[ 0 ].SetDestinationColorBlendFactor( nn::gfx::BlendFactor_OneMinusSourceAlpha );
    g_GrassPipeline.blendTargetStateInfoArray[ 0 ].SetDestinationAlphaBlendFactor( nn::gfx::BlendFactor_OneMinusSourceAlpha );
    g_GrassPipeline.blendTargetStateCount = 1;

    // Setup the color targets
    g_GrassPipeline.colorTargetStateInfoArray[ 0 ].SetDefault();
    g_GrassPipeline.colorTargetStateInfoArray[ 0 ].SetFormat( DEMOColorBufferInfo.GetImageFormat() );
    g_GrassPipeline.colorTargetStateCount = 1;

    SetNormalGrass();
    pPipeline->Initialize( &DEMODevice );

#if NN_GFX_IS_TARGET_GX
    GX2UTCreateGeometryShaderInputRingBuffer(&g_ringInBuffer, reinterpret_cast< GX2VertexShader* >( pShader->vertexShaderData ) );
    GX2RSetBufferName(&g_ringInBuffer, "GS ring in");
    GX2UTCreateGeometryShaderOutputRingBuffer(&g_ringOutBuffer, reinterpret_cast< GX2GeometryShader* >( pShader->geomShaderData ) );
    GX2RSetBufferName(&g_ringOutBuffer, "GS ring out");
    GX2UTSetGeometryShaderRingBuffers(&g_ringInBuffer, &g_ringOutBuffer);
#endif
}

static void FreeBufferData( BufferData* pBufferData )
{
    pBufferData->buffer.Finalize();
}

// Free the grass shader
static void FreeGrassShader(DEMOGfxPipeline *pPipeline,
                     GrassUniforms *pGrassUniforms)
{
    pPipeline->Finalize( &DEMODevice );

    FreeBufferData( &pGrassUniforms->UBPS);

    for (u32 i = 0;i < 2; ++i)
    {
        FreeBufferData( &pGrassUniforms->UBGS[i]);
        FreeBufferData(&pGrassUniforms->UBVS[i]);
    }

#if NN_GFX_IS_TARGET_GX
    GX2RDestroyBufferEx(&g_ringInBuffer, GX2R_OPTION_NO_TOUCH_DESTROY);  // mem1
    GX2RDestroyBufferEx(&g_ringOutBuffer, GX2R_OPTION_NO_TOUCH_DESTROY); // mem1
#endif
}

// Get camera matrix
static void CameraInit(Mtx44 resultProjMtx44, Mtx44 resultViewMtx44)
{
    Mtx   lookAtMtx34;

    f32   pers = 50.0f;
    f32 aspect = (f32)SURFACE_WIDTH / (f32)SURFACE_HEIGHT;
    f32  znear = 0.1f;
    f32   zfar = 10000.0f;

    MTXPerspective(resultProjMtx44, pers, aspect, znear, zfar);

    MTXLookAt(lookAtMtx34, &g_camLoc, &g_camUp, &g_objPt);
    MTX34To44(lookAtMtx34, resultViewMtx44);
}

// Update grass state
static void UpdateGrass(GrassUniforms *pGrassUniforms)
{
    Mtx44 modelMtx44;
    MTX44Identity( modelMtx44);

    UniformBlockGeometryShader *pUBGS =
        pGrassUniforms->UBGS[ ( g_FrameCount + 1 ) % 2 ].buffer.Map<UniformBlockGeometryShader>();

    MTX44Concat( g_vpMtx, modelMtx44, pUBGS->wvpMtx);

#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap(pUBGS, sizeof(UniformBlockGeometryShader));
#endif

    pGrassUniforms->UBGS[ ( g_FrameCount + 1 ) % 2 ].buffer.Unmap();

    UniformBlockVertexShader *pUBVS =
        pGrassUniforms->UBVS[ ( g_FrameCount + 1 ) % 2 ].buffer.Map<UniformBlockVertexShader>();

    pUBVS->CameraPosition = g_camLoc;
    pUBVS->WindPhase = s_wind;
    pUBVS->FadeScale  = 1.f / (g_grassLodFadeOutRange);
    pUBVS->FadeOffset = -g_grassLodBeginDistance / (g_grassLodFadeOutRange);
    pUBVS->__padding  = s_wind;
    pUBVS->__padding2 = s_wind;

#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap(pUBVS, sizeof(UniformBlockVertexShader));
#endif

    pGrassUniforms->UBVS[ ( g_FrameCount + 1 ) % 2 ].buffer.Unmap();

    s_wind += 0.01f;
    s_wind = fmodf(s_wind, 3.1415926535f * 2.f);
}

// Render grass
static void DrawGrass(DEMOGfxPipeline *pPipeline,
                      BufferData *pAttribData,
                      GrassUniforms *pGrassUniforms)
{
    // Spark Instrumentation
    DEMOGfxDebugTagIndent( "DrawGrass" );

#if NN_GFX_IS_TARGET_GX
    GX2SetShaderMode( GX2_SHADER_MODE_GEOMETRY_SHADER );
#endif
    DEMOCommandBuffer.SetPipeline( &pPipeline->pipeline );

    DEMOCommandBuffer.SetVertexBuffer( 0, pAttribData->buffer.gpuAddress, pAttribData->stride, pAttribData->buffer.size );
    DEMOCommandBuffer.SetTextureAndSampler( pGrassUniforms->SamplerLocation, nn::gfx::ShaderStage_Pixel,
        g_GrassTexture.GetDescriptorSlot( 0 ), g_grassSamplerSlot );

    DEMOCommandBuffer.SetConstantBuffer( pGrassUniforms->UBVSLocation, nn::gfx::ShaderStage_Vertex,
        pGrassUniforms->UBVS[ g_FrameCount % 2 ].buffer.gpuAddress, pGrassUniforms->UBVS[ g_FrameCount % 2 ].buffer.size );
    DEMOCommandBuffer.SetConstantBuffer( pGrassUniforms->UBGSLocation, nn::gfx::ShaderStage_Geometry,
        pGrassUniforms->UBGS[ g_FrameCount % 2 ].buffer.gpuAddress, pGrassUniforms->UBGS[ g_FrameCount % 2 ].buffer.size );
    DEMOCommandBuffer.SetConstantBuffer( pGrassUniforms->UBPSLocation, nn::gfx::ShaderStage_Pixel,
        pGrassUniforms->UBPS.buffer.gpuAddress, pGrassUniforms->UBPS.buffer.size );

    u32 num = (u32)((f32)(pAttribData->elementCount) * g_grassDensity);

    if (num)
    {
        DEMOCommandBuffer.Draw( nn::gfx::PrimitiveTopology_PointList, num, 0 );
    }

    // Spark Instrumentation
    DEMOGfxDebugTagUndent();
}

// Generate location where grass grows
static f32 GenerateGrass(DemoModelData* pModelData,
                  f32 Density,
                  DEMO_F32x4* pOutput,
                  u32* pOutputNum)
{
    f32 totalTriangleSize = 0.f;
    f32 grassCount = 0.f;

    if ( pOutput )
    {
        *pOutputNum = 0;
    }

    for (s32 i = 0; i < pModelData->header.meshCount; i++)
    {
        DemoMeshData* pMesh = &pModelData->meshes[ i];

        const u16* indices = pModelData->indexBuffer.buffer.Map< u16 >();
        const u8* vertices = pModelData->attributeBuffer.buffer.Map< u8 >();

        for (u32 idx = 0;idx < pMesh->indexCount;idx +=3)
        {
            //Calculate grass distribution ratio from the triangle's surface area

            u16 index0 = indices[ idx];
            u16 index1 = indices[ idx + 1];
            u16 index2 = indices[ idx + 2];

            Vec v0,v1,v2;
            VECScale( (Vec*)( vertices + index0 * pMesh->attributeStride), &v0, 0.01f);
            VECScale( (Vec*)( vertices + index1 * pMesh->attributeStride), &v1, 0.01f);
            VECScale( (Vec*)( vertices + index2 * pMesh->attributeStride), &v2, 0.01f);

            Vec e0,e1,e2;

            VECSubtract( &v1, &v0, &e0);
            VECSubtract( &v2, &v0, &e1);
            VECSubtract( &v2, &v1, &e2);

            f32         length0 = sqrtf( VECDotProduct( &e0, &e0));
            f32         length1 = sqrtf( VECDotProduct( &e1, &e1));
            f32         length2 = sqrtf( VECDotProduct( &e2, &e2));

            const f32   EPSILON = 0.000001f;

            if (length0 < EPSILON || length1 < EPSILON || length2 < EPSILON)
            {
                continue;
            }

            f32 s = (length0 + length1 + length2) * 0.5f;
            f32 sz_sq = s * (s - length0) * (s - length1) * (s - length2);

            if (sz_sq < EPSILON)
            {
                continue;
            }

            f32 triangleSize = sqrtf( sz_sq);
            totalTriangleSize += triangleSize;

            if (pOutput)
            {
                grassCount += triangleSize * Density;

                u32 gen_count = static_cast< u32 >( floorf( grassCount) );
                for (u32 j = 0;j < gen_count;++j)
                {
                    f32 u, v;
                    do
                    {
                        u = static_cast< float >( DEMORand() ) / static_cast< float >( DEMO_RAND_MAX );
                        v = static_cast< float >( DEMORand() ) / static_cast< float >( DEMO_RAND_MAX );
                    } while ( u + v > 1.f );

                    Vec up,vp;
                    VECScale( &e0, &up, u);
                    VECScale( &e1, &vp, v);

                    Vec pt,tmp;
                    VECAdd( &up, &vp, &tmp);
                    VECAdd( &tmp, &v0, &pt);


                    pOutput->u.v.x = pt.x;
                    pOutput->u.v.y = pt.y;
                    pOutput->u.v.z = pt.z;
                    pOutput->u.v.w = DEMORand() / (f32)DEMO_RAND_MAX * 3.1415926535f;

                    pOutput++;
                    (*pOutputNum)++;
                }
                grassCount -= gen_count;
            }
        }

        pModelData->attributeBuffer.buffer.Unmap();
        pModelData->indexBuffer.buffer.Unmap();
    }
    return totalTriangleSize;
}

// Overall initialization
static int SceneInit()
{

    // Load the model data
    DEMOLoadModelData(&g_ModelData,MODEL_FILE,MODEL_TEXTURE_DIRECTORY);


    // Initialize model shader
    InitModelPipeline( &g_ModelPipeline, g_ModelBlendStates, g_ModelBlendStateData,
        g_ModelData.attributes, g_ModelData.header.attributeCount );

    // Initialize sampler
    DEMOGfxInitSampler( &g_groundSampler, &g_groundSamplerSlot, nn::gfx::TextureAddressMode_Repeat,
        nn::gfx::FilterMode_MinLinear_MagLinear_MipLinear, nn::gfx::ComparisonFunction_Always );

    DEMOGfxInitSampler( &g_grassSampler, &g_grassSamplerSlot, nn::gfx::TextureAddressMode_Repeat,
        nn::gfx::FilterMode_MinLinear_MagLinear_MipLinear, nn::gfx::ComparisonFunction_Always );

    DEMOSRand(1);
    {
        // Calculate total grass surface area
        f32 triangleSize = GenerateGrass(&g_ModelData,0,NULL,NULL);

        const f32   MAX_DENSITY = 16.f;
        u32 numGrass = (u32)ceilf(triangleSize * MAX_DENSITY);

        // Just make a generic untyped buffer as we don't render from this, however it's useful still
        // to get guard bands etc.
        g_GrassAttribData.buffer.Initialize( sizeof( DEMO_F32x4 ) * numGrass, NULL, nn::gfx::GpuAccess_VertexBuffer, 0 );
        DEMO_F32x4* vertices = g_GrassAttribData.buffer.Map< DEMO_F32x4 >();

        u32 num = 0;
        // Write the locations where grass occurs
        GenerateGrass(&g_ModelData, MAX_DENSITY, vertices, &num);

        // Randomly shuffle grass locations
        for (u32 i = 0;i < num;++i)
        {
            DEMO_F32x4 tmp;
            u32 swapTarget = DEMORand() % num;

            if (swapTarget != i)
            {
                memcpy( &tmp, &vertices[i], sizeof(tmp));
                memcpy( &vertices[i], &vertices[swapTarget], sizeof(tmp));
                memcpy( &vertices[swapTarget], &tmp, sizeof(tmp));
            }
        }

        g_GrassAttribData.buffer.Unmap();
        g_GrassAttribData.elementCount = num;
        g_GrassAttribData.stride = sizeof( DEMO_F32x4 );
    }

    {
        // Load textures
        BOOL fOK = g_GrassTexture.Initialize( GRASS_TEXTURE_FILENAME );
        DEMOAssert( fOK && "Unable to load texture file" );
        NN_UNUSED( fOK );
    }

    InitGrassPipeline(&g_GrassPipeline,
                    &g_GrassUniforms,
                    GRASS_SHADER_FILE[g_ShaderFileIdx]
                    );


    CameraInit(g_projMtx, g_viewMtx);

    // Setup Viewport
    DEMOGfxSetViewportScissorState( &g_viewportState, &g_pViewportStateData, 0.0f, 0.0f,
        static_cast< float >( DEMOColorBufferInfo.GetWidth() ),
        static_cast< float >( DEMOColorBufferInfo.GetHeight() ), 0.0f, 1.0f,
        static_cast< float >( DEMOColorBufferInfo.GetHeight() ), false );

    return 1;
}


// Create model shader
static void InitModelPipeline(DEMOGfxPipeline *pPipeline,
    nn::gfx::BlendState* pBlendStates,
    void** ppMemory,
    const DemoAttributeData* attributes,
    s32 attributeCount)
{
    DEMOGfxShader* pShader = &pPipeline->shaders;

    pPipeline->SetDefaults();

    DEMOGfxLoadShadersFromFile(pShader, 0, SIMPLE_SHADER_FILE[g_ShaderFileIdx]);

    int attributeStride = 0;
    for (s32 i=0; i<attributeCount; i++)
    {
        const DemoAttributeData* pAttribute = &attributes[i];
        nn::gfx::AttributeFormat format = nn::gfx::AttributeFormat_Undefined;

        const char* attributeName = NULL;

        switch(pAttribute->type)
        {
           case DEMO_ATTRIBUTE_POSITION:
               {
                   //Local coordinate attributes
                   attributeName = "a_position";
                   format = nn::gfx::AttributeFormat_32_32_32_Float;
                   attributeStride += sizeof( float ) * 3;
               }
               break;
           case DEMO_ATTRIBUTE_NORMAL:
               {
                   //Normal attributes
                   attributeName = "a_normal";
                   format = nn::gfx::AttributeFormat_32_32_32_Float;
                   attributeStride += sizeof( float ) * 3;
               }
               break;
           case DEMO_ATTRIBUTE_TEXCOORD:
               {
                   //Texture coordinate attributes
                   attributeName = "a_texCoord";
                   format = nn::gfx::AttributeFormat_32_32_Float;
                   attributeStride += sizeof( float ) * 2;
               }
               break;
           default:
               {
                   //Attributes not used in shader
               }
               continue;
        }

        DEMOGfxInitShaderAttribute(pShader,
                                   attributeName,
                                   0,
                                   pAttribute->offset,
                                   format);
    }

    DEMOGfxInitShaderVertexBuffer( pShader, 0, attributeStride, 0 );

    g_transformMatrixLoc = pShader->GetInterfaceSlot( nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_ConstantBuffer, "u_matrices" );
    g_samplerLoc = pShader->GetInterfaceSlot( nn::gfx::ShaderStage_Pixel, nn::gfx::ShaderInterfaceType_Sampler, "s_texture" );

    DEMOAssert( g_transformMatrixLoc != -1 && "u_modelMtx location is invalid.");
    DEMOAssert( g_samplerLoc  != -1 && "samplerLoc location is invalid.");

    pPipeline->blendTargetStateCount = 1;
    pPipeline->blendTargetStateInfoArray[ 0 ].SetDefault();
    pPipeline->blendTargetStateInfoArray[ 0 ].SetBlendEnabled( false );

    pPipeline->colorTargetStateCount = 1;
    pPipeline->colorTargetStateInfoArray[ 0 ].SetDefault();
    pPipeline->colorTargetStateInfoArray[ 0 ].SetFormat( DEMOColorBufferInfo.GetImageFormat() );

    pPipeline->Initialize( &DEMODevice );

    // Setup the different BlendStates for screen door settings
    nn::gfx::BlendStateInfo blendStateInfo;
    nn::gfx::BlendTargetStateInfo blendTargetStateInfo;
    blendStateInfo.SetDefault();
    blendStateInfo.SetAlphaToCoverageEnabled( false );
    blendStateInfo.SetBlendTargetStateInfoArray( &blendTargetStateInfo, 1 );

    blendTargetStateInfo.SetDefault();
    blendTargetStateInfo.SetBlendEnabled( false );

    size_t size = nn::gfx::BlendState::GetRequiredMemorySize( blendStateInfo );
    ppMemory[ 0 ] = DEMOGfxAllocMEM2( size, nn::gfx::BlendState::RequiredMemoryInfo_Alignment );
    pBlendStates[ 0 ].SetMemory( ppMemory[ 0 ], size );
    pBlendStates[ 0 ].Initialize( &DEMODevice, blendStateInfo );

    // Change the alpha to coverage setting
    blendStateInfo.SetAlphaToCoverageEnabled( true );
    size = nn::gfx::BlendState::GetRequiredMemorySize( blendStateInfo );
    ppMemory[ 1 ] = DEMOGfxAllocMEM2( size, nn::gfx::BlendState::RequiredMemoryInfo_Alignment );
    pBlendStates[ 1 ].SetMemory( ppMemory[ 1 ], size );
    pBlendStates[ 1 ].Initialize( &DEMODevice, blendStateInfo );


    // Setup the transform buffer
    for ( int index = 0; index < 2; index++ )
    {
        g_TransformUniformData[ index ].stride = sizeof( TransformUniformData );
        g_TransformUniformData[ index ].elementCount = 1;
        g_TransformUniformData[ index ].buffer.Initialize( sizeof( TransformUniformData ), NULL,
            nn::gfx::GpuAccess_ConstantBuffer, 0 );
    }
}


//
// Set material
//
//
static void SetMaterial(const DemoMaterialData* pMaterial,
                 DEMOGfxTexture* pTextures)
{
    // Set Texture
    DEMOCommandBuffer.SetTextureAndSampler( g_samplerLoc, nn::gfx::ShaderStage_Pixel,
        pTextures[ pMaterial->textureIndex ].GetDescriptorSlot( 0 ), g_groundSamplerSlot );

    // Rendering state settings
    switch(pMaterial->type)
    {
        case DEMO_MATERIAL_OPAQUE:
            {
                //Translucent material
                DEMOCommandBuffer.SetBlendState( &g_ModelBlendStates[ 0 ] );
            }
            break;
        case DEMO_MATERIAL_OVERLAP:
            {
                //Translucent material (no Z-write)
                DEMOCommandBuffer.SetBlendState( &g_ModelBlendStates[ 1 ] );
            }
            break;
        case DEMO_MATERIAL_TRANSPARENT:
            {
                //Translucent material
                DEMOCommandBuffer.SetBlendState( &g_ModelBlendStates[ 1 ] );
            }
            break;
        case DEMO_MATERIAL_ALPHA_TO_COVERAGE:
            {
                //Coverage mask material
                DEMOCommandBuffer.SetBlendState( &g_ModelBlendStates[ 1 ] );
            }
            break;

        default: break;
    }
}

//
// Render the model data
//
//
static void DrawModel(const DemoModelData* pModelData)
{
    // Spark Instrumentation
    DEMOGfxDebugTagIndent( "DrawModel" );

    s32 currentMaterialIndex = -1;

    DEMOCommandBuffer.SetPipeline( &g_ModelPipeline.pipeline );
    DEMOCommandBuffer.SetConstantBuffer( g_transformMatrixLoc, nn::gfx::ShaderStage_Vertex,
        g_TransformUniformData[ g_FrameCount % 2].buffer.gpuAddress, g_TransformUniformData[ g_FrameCount % 2].buffer.size );

    for (s32 i=0; i<pModelData->header.meshCount; i++)
    {
        const DemoMeshData* pMesh = &pModelData->meshes[i];

        // Set material
        if (currentMaterialIndex != pMesh->materialIndex)
        {
            currentMaterialIndex  = pMesh->materialIndex;

            SetMaterial(&pModelData->materials[pMesh->materialIndex],pModelData->pTextures);
        }

        // Set attribute buffer
        nn::gfx::GpuAddress attributeOffset = pModelData->attributeBuffer.buffer.gpuAddress;
        attributeOffset.Offset( pModelData->attributes[ i ].offset );
        DEMOCommandBuffer.SetVertexBuffer( 0,  attributeOffset, pModelData->attributeBuffer.stride, pModelData->attributeBuffer.size - pModelData->attributes[i].offset );

        // Draw Triangle.
        DEMOCommandBuffer.DrawIndexed( nn::gfx::PrimitiveTopology_TriangleList, pModelData->indexBuffer.stride == 2 ? nn::gfx::IndexFormat_Uint16 : nn::gfx::IndexFormat_Uint32,
            pModelData->indexBuffer.buffer.gpuAddress, pMesh->indexCount, 0 );
    }

    // Spark Instrumentation
    DEMOGfxDebugTagUndent();
}

//Update states with pad information
static BOOL ProcessPad(f32 dt)
{
    DEMOPadRead();
    u16 button = DEMOPadGetButton(0);
    //u16 buttonDown = DEMOPadGetButtonDown(0);
    u16 buttonUp = DEMOPadGetButtonUp(0);

    if (DEMO_PAD_BUTTON_X & button)
    {
        g_grassLodBeginDistance += 60.f * dt;
        g_grassLodBeginDistance = std::min( 1000.f, g_grassLodBeginDistance);
    }
    if (DEMO_PAD_BUTTON_Y & button)
    {
        g_grassLodBeginDistance -= 60.f * dt;
        g_grassLodBeginDistance = std::max( -100.f, g_grassLodBeginDistance);
    }

    if (DEMO_PAD_BUTTON_A & button)
    {
        g_grassLodFadeOutRange += 60.f * dt;
        g_grassLodFadeOutRange = std::min( 1000.f, g_grassLodFadeOutRange);
    }
    if (DEMO_PAD_BUTTON_B & button)
    {
        g_grassLodFadeOutRange -= 60.f * dt;
        g_grassLodFadeOutRange = std::max( 1.f, g_grassLodFadeOutRange);
    }

    if (DEMO_PAD_BUTTON_UP & button)
    {
        g_grassDensity+= 0.125f * dt;
        g_grassDensity = std::min( 1.f, g_grassDensity);
    }

    if (DEMO_PAD_BUTTON_DOWN & button)
    {
        g_grassDensity-= 0.125f * dt;
        g_grassDensity = std::max( 0.f, g_grassDensity);
    }

    if (DEMO_PAD_TRIGGER_R & buttonUp)
    {
        g_viewWireframe ^= true;
        UpdateGrassPipeline();
    }

    if (DEMO_PAD_TRIGGER_L & buttonUp)
    {
        g_objPt.x = g_objPt.y = g_objPt.z = 0;
        g_camUp.x = 0.f;
        g_camUp.y = 1.f;
        g_camUp.z = 0.f;

        g_camLoc.x = 0.f;
        g_camLoc.y = 3.f;
        g_camLoc.z =-5.f;
    }

    {
        // L Stick translates the camera
        f32 sx = (f32)DEMOPadGetStickX(0) / 255.0f;
        f32 sy = (f32)DEMOPadGetStickY(0) / 255.0f;

        Vec vec, tempVec;
        tempVec.x =  sx * 60.f * dt;
        tempVec.y = 0.0f;
        tempVec.z = -sy * 60.f * dt;

        Mtx44 inv;
        MTX44Inverse(g_viewMtx, inv);
        MTX44MultVecSR(inv, &tempVec, &vec);
        Vec tempCamLoc = g_camLoc;
        Vec tempObjPt = g_objPt;
        VECAdd(&vec, &tempCamLoc, &g_camLoc);
        VECAdd(&vec, &tempObjPt, &g_objPt);

        CameraInit(g_projMtx, g_viewMtx);
    }

    {
        // R Stick rotates the camera
        f32 sx = (f32)DEMOPadGetSubStickX(0) / 255.0f;
        f32 sy = (f32)DEMOPadGetSubStickY(0) / 255.0f;

        Vec eyev, tempEyev;
        VECSubtract(&g_objPt, &g_camLoc, &eyev);
        Vec wupv =  {0.0f, 1.0f, 0.0f};
        VECCrossProduct(&eyev, &g_camUp, &tempEyev);
        VECNormalize(&tempEyev, &eyev);

        Mtx44 rot,rot0,rot1;
        MTX44RotAxisRad(rot0, &eyev, MTXDegToRad( sy * 300.f * dt));
        MTX44RotAxisRad(rot1, &wupv, MTXDegToRad(-sx * 300.f * dt));

        MTX44Concat(rot0, rot1, rot);

        Vec camv, tempCamv;
        VECSubtract(&g_objPt, &g_camLoc, &tempCamv);
        MTX44MultVecSR(rot, &tempCamv, &camv);

        VECAdd(&camv, &g_camLoc, &g_objPt);
        Vec tempUp = g_camUp;
        MTX44MultVecSR(rot, &tempUp, &g_camUp);

        CameraInit( g_projMtx, g_viewMtx);
    }

    MTX44Concat( g_projMtx, g_viewMtx, g_vpMtx);

    return 1;
}

static void UpdateModel()
{
    // Update the MVP for the transform shader
    Mtx44   modelMtx;
    MTX44Scale( modelMtx,0.01f,0.01f,0.01f);

    TransformUniformData* pTransformUniform =
        g_TransformUniformData[ ( g_FrameCount + 1 ) % 2 ].buffer.Map< TransformUniformData >();
    memcpy( &pTransformUniform->u_viewMtx, g_viewMtx, sizeof( g_viewMtx ) );
    memcpy( &pTransformUniform->u_projMtx, g_projMtx, sizeof( g_projMtx ) );
    memcpy( &pTransformUniform->u_modelMtx, modelMtx, sizeof( modelMtx ) );

#if NN_GFX_IS_TARGET_GX
    GX2EndianSwap( pTransformUniform, sizeof( TransformUniformData ) );
#endif
    g_TransformUniformData[ ( g_FrameCount + 1 ) % 2 ].buffer.Unmap();

}

//Render all
static void DrawScene(f32 dt)
{
    UpdateGrass(&g_GrassUniforms);
    UpdateModel();

    DEMOGfxBeforeRender();

    g_FrameCount++;

    nn::gfx::ColorTargetView* pCurrentScanBuffer = DEMOGetColorBufferView();
    DEMOCommandBuffer.ClearColor( pCurrentScanBuffer, 0.2f, 0.2f, 0.2f, 1.0f, NULL );
    DEMOCommandBuffer.ClearDepthStencil( &DEMODepthBufferView, 1.0f, 0, nn::gfx::DepthStencilClearMode_Depth, NULL );

#if NN_GFX_IS_TARGET_GX
    GX2SetShaderMode( GX2_SHADER_MODE_UNIFORM_BLOCK );
#endif
    DEMOGfxSetDefaultRenderTarget();
    DEMOCommandBuffer.SetViewportScissorState( &g_viewportState );

    // Render ground
    DrawModel(&g_ModelData);

    // Render grass
    DrawGrass(&g_GrassPipeline, &g_GrassAttribData, &g_GrassUniforms);

    DEMOGfxDebugTagIndent( "PrintInfo" );

    // Set Demo Font state
    if ( s_firstFrame == false )
    {
        DEMOFontPrintf( 5, 2, "GeometryShader Grass : %.1f fps", 1.0f / std::max( 1.0e-7f, dt ) );
        DEMOFontPrintf( 5, 3, "LOD Begin Distance (x/y) : %.02fm", g_grassLodBeginDistance );
        DEMOFontPrintf( 5, 4, "LOD Fade Out Range (a/b) : %.02fm", g_grassLodFadeOutRange );
        DEMOFontPrintf( 5, 5, "Density (up/down) : %.02f", g_grassDensity );
        DEMOFontPrintf( 5, 6, "Toggle Fillmode (R Trigger)" );
        DEMOFontPrintf( 5, 7, "Move Camera (L Stick/R Stick)" );
        DEMOFontPrintf( 5, 8, "Reset Camera (L Trigger)" );
    }
    else
    {
        s_firstFrame = false;
    }

    // Spark Instrumentation
    DEMOGfxDebugTagUndent();

    DEMOGfxDoneRender();
}

//extern "C" void nnMain()
TEST(GfxGsGrass, Run)
{
    int argc = nnt::GetHostArgc();
    char** argv = nnt::GetHostArgv();

    DEMOInit();
    DEMOTestInit(argc, argv);
    DEMOGfxInit(argc, argv);

    DEMOFontInit();
    DEMOTestIsUseHlslccGlsl() ? g_ShaderFileIdx = 1 : g_ShaderFileIdx = 0;
    s_firstFrame = true;
    s_wind = 0.0f;

    // Initialize scene
    SceneInit();

    OSTime  lastTime = OSGetTime();
    while (DEMOIsRunning())
    {
        f32 dtime = OSTicksToMilliseconds(OSGetTime() - lastTime) / 1000.0f;
        lastTime = OSGetTime();

        ProcessPad(dtime);
        DrawScene(dtime);
    }

    FreeGrassShader(&g_GrassPipeline, &g_GrassUniforms);

    g_GrassTexture.Finalize();

    DEMOFreeModelData(&g_ModelData);

    FreeBufferData(&g_GrassAttribData);

    g_ModelPipeline.Finalize( &DEMODevice );

    g_groundSampler.Finalize( &DEMODevice );
    g_grassSampler.Finalize( &DEMODevice );


    FreeBufferData( &g_TransformUniformData[ 0 ] );
    FreeBufferData( &g_TransformUniformData[ 1 ] );
    g_ModelBlendStates[ 0 ].Finalize( &DEMODevice );
    g_ModelBlendStates[ 1 ].Finalize( &DEMODevice );
    DEMOGfxFreeMEM2( g_ModelBlendStateData[ 0 ] );
    DEMOGfxFreeMEM2( g_ModelBlendStateData[ 1 ] );

    g_viewportState.Finalize( &DEMODevice );

    DEMOFontShutdown();
    DEMOTestShutdown();
    DEMOGfxShutdown();
    DEMOShutdown();

    SUCCEED();
}
