﻿//
// File:       grass_simulator.cpp
//
// Abstract:   This example shows how OpenCL can be used to create a procedural field of
//             grass on a generated terrain model which is then rendered with OpenGL.
//             Because OpenGL buffers are shared with OpenCL, the data can remain on the
//             graphics card, thus eliminating the API overhead of creating and submitting
//             the vertices from the host.
//
//             All geometry is generated on the compute device, and outputted into
//             a shared OpenGL buffer.  The terrain gets generated only within the
//             visible arc covering the camera's view frustum to avoid the need for
//             culling.  A page of grass is computed on the surface of the terrain as
//             bezier patches, and flow noise is applied to the angle of the blades
//             to simulate wind.  Multiple instances of grass are rendered at jittered
//             offsets to add more grass coverage without having to compute new pages.
//             Finally, a physically based sky shader (via OpenGL) is applied to
//             the background to provide an environment for the grass.
//
// Version:    <1.0>
//
// Disclaimer: IMPORTANT:  This Apple software is supplied to you by Apple Inc. ("Apple")
//             in consideration of your agreement to the following terms, and your use,
//             installation, modification or redistribution of this Apple software
//             constitutes acceptance of these terms.  If you do not agree with these
//             terms, please do not use, install, modify or redistribute this Apple
//             software.
//
//             In consideration of your agreement to abide by the following terms, and
//             subject to these terms, Apple grants you a personal, non - exclusive
//             license, under Apple's copyrights in this original Apple software ( the
//             "Apple Software" ), to use, reproduce, modify and redistribute the Apple
//             Software, with or without modifications, in source and / or binary forms;
//             provided that if you redistribute the Apple Software in its entirety and
//             without modifications, you must retain this notice and the following text
//             and disclaimers in all such redistributions of the Apple Software. Neither
//             the name, trademarks, service marks or logos of Apple Inc. may be used to
//             endorse or promote products derived from the Apple Software without specific
//             prior written permission from Apple.  Except as expressly stated in this
//             notice, no other rights or licenses, express or implied, are granted by
//             Apple herein, including but not limited to any patent rights that may be
//             infringed by your derivative works or by other works in which the Apple
//             Software may be incorporated.
//
//             The Apple Software is provided by Apple on an "AS IS" basis.  APPLE MAKES NO
//             WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED
//             WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A
//             PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION
//             ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
//
//             IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR
//             CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
//             SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
//             INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION
//             AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER
//             UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR
//             OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright ( C ) 2008 Apple Inc. All Rights Reserved.
//
////////////////////////////////////////////////////////////////////////////////////////////////////

#include <cmath>
#include <cassert>
#include <ctime>

#include <gfx/demo.h>

#if NN_GFX_IS_TARGET_GX
#include <cafe/gx2.h>
#define MTX_USE_PS
#include <cafe/mat.h>
#endif

#include "compute_types.h"
#include "compute_math.h"
#include "grass_simulator.h"

/////////////////////////////////////////////////////////////////////////////

GrassSimulator::GrassSimulator() :
    m_uiWorkItemCount(0),
    m_uiRowCount(0),
    m_uiColumnCount(0),
    m_uiBladeCount(0),
    m_uiMaxSegmentCount(0),
    m_uiMaxElementCount(0),
    m_fJitterAmount(0),
    m_fCameraFov(0),
    m_fNoiseAmplitude(0),
    m_fBladeIntensity(0),
    m_fBladeOpacity(0),
    m_fFlowScale(0),
    m_fFlowSpeed(0),
    m_fFlowAmount(0),
    m_uiVertexBytes(0),
    m_uiVertexComponents(4),
    m_uiColorBytes(0),
    m_uiColorCount(0),
    m_uiColorComponents(4),
    m_fFalloff(0)
{
    m_auiLocalDim[0] = m_auiLocalDim[1] = m_auiLocalDim[2] = 0;
    m_auiGlobalDim[0] = m_auiGlobalDim[1] = m_auiGlobalDim[2] = 0;
}

GrassSimulator::~GrassSimulator()
{
    m_uiVertexBytes = 0;

    m_uiColorBytes = 0;
}

void
GrassSimulator::finalize()
{
    m_VariableGrassUniforms.Finalize();
    m_FixedGrassUniforms.Finalize();
#if NN_GFX_IS_TARGET_GX
    m_DispatchBuf.Finalize();
#endif
    m_Pipeline.Finalize();
}

bool
GrassSimulator::allocate(uint uiCount)
{
    m_uiBladeCount = uiCount;
    m_uiVertexCount = m_uiBladeCount * m_uiMaxElementCount * m_uiMaxSegmentCount;
    m_uiColorCount = m_uiVertexCount;
    m_uiVertexBytes = getRequiredVertexBufferSize(uiCount);
    m_uiColorBytes = getRequiredColorBufferSize(uiCount);

    // Uniform buffers
    m_FixedGrassUniforms.Initialize( sizeof( FixedGrassUniforms ), NULL, nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Read, 0 );
    m_VariableGrassUniforms.Initialize( sizeof( VariableGrassUniforms ), NULL, nn::gfx::GpuAccess_ConstantBuffer | nn::gfx::GpuAccess_Read, 0 );

#if NN_GFX_IS_TARGET_GX
    // Setup compute dispatch buffer
    m_DispatchBuf.Initialize( sizeof( uint32_t ) * 4, NULL, nn::gfx::GpuAccess_IndirectBuffer | nn::gfx::GpuAccess_Read, 0 );
#endif

    return true;
}

uint
GrassSimulator::getRequiredVertexBufferSize(
    uint uiBladeCount)
{
    nn::gfx::Buffer::InfoType bufferInfo;
    uint uiVertexCount = uiBladeCount * m_uiMaxElementCount * m_uiMaxSegmentCount;
    size_t uiVertexBytes = static_cast< size_t >( uiVertexCount * sizeof(float) * m_uiVertexComponents );
    bufferInfo.SetDefault();
    bufferInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_VertexBuffer | nn::gfx::GpuAccess_Read );
    bufferInfo.SetSize( uiVertexBytes );
    size_t align = nn::gfx::Buffer::GetBufferAlignment( &DEMODevice, bufferInfo );
    uiVertexBytes = ( uiVertexBytes + align - 1 ) & ~( align - 1 );
    return static_cast< uint >( uiVertexBytes );
}

uint
GrassSimulator::getRequiredColorBufferSize(
    uint uiBladeCount)
{
    nn::gfx::Buffer::InfoType bufferInfo;
    uint uiColorCount = uiBladeCount * m_uiMaxElementCount * m_uiMaxSegmentCount;
    size_t uiColorBytes = static_cast< size_t >( uiColorCount * sizeof(float) * m_uiColorComponents );
    bufferInfo.SetDefault();
    bufferInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_VertexBuffer | nn::gfx::GpuAccess_Read );
    bufferInfo.SetSize( uiColorBytes );
    size_t align = nn::gfx::Buffer::GetBufferAlignment( &DEMODevice, bufferInfo );
    uiColorBytes = ( uiColorBytes + align - 1 ) & ~( align - 1 );
    return static_cast< uint >( uiColorBytes );
}


bool
GrassSimulator::setup( uint uiBladeCount, uint uiRows, uint uiColumns, const char* pFilename )
{
    if (!allocate(uiBladeCount))
    {
        return false;
    }

    m_uiRowCount = uiRows;
    m_uiColumnCount = uiColumns;

    OSReport("Grass Simulator: BladeCount[%d] RowCount[%d] ColumnCount[%d]\n",
        uiBladeCount, m_uiRowCount, m_uiColumnCount);

    m_Pipeline.SetDefaults();
    DEMOGfxLoadShadersFromFile( &m_Pipeline.shaders, 0, pFilename );

    // Setup Local Work Group Size
    m_Pipeline.shaders.GetShader()->GetWorkGroupSize( &m_auiLocalDim[ 0 ], &m_auiLocalDim[ 1 ], &m_auiLocalDim[ 2 ] );
    m_uiWorkItemCount = m_auiLocalDim[ 0 ] * m_auiLocalDim[ 1 ] * m_auiLocalDim[ 2 ];

    // Uniform Location lookup
    m_iVUniformsLoc = m_Pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Compute, nn::gfx::ShaderInterfaceType_ConstantBuffer, "variable_grass" );
    m_iFUniformsLoc = m_Pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Compute, nn::gfx::ShaderInterfaceType_ConstantBuffer, "fixed_grass" );
    m_iExpBufLoc = m_Pipeline.shaders.GetInterfaceSlot( nn::gfx::ShaderStage_Compute, nn::gfx::ShaderInterfaceType_UnorderedAccessBuffer, "attribute_output" );

    DEMOAssert( m_iVUniformsLoc != 0xffffffff && "variable_grass location is invalid." );
    DEMOAssert( m_iFUniformsLoc != 0xffffffff && "fixed grass location is invalid." );

    // Setup the fixed uniform buffer
    FixedGrassUniforms* pFUniform = m_FixedGrassUniforms.Map< FixedGrassUniforms >( );
    for (u32 i = 0; i < 512; i++)
    {
        // bug work around caused by gshCompile 9397
        pFUniform->P[i].x = P[i];
    }
    for (u32 i = 0; i < 64; i++)
    {
        // bug work around caused by gshCompile 9397
        pFUniform->G[i].x = G[i];
    }

#ifdef CAFE
    GX2EndianSwap(pFUniform, sizeof(FixedGrassUniforms));
#endif
    m_FixedGrassUniforms.Unmap( );

#if NN_GFX_IS_TARGET_GX
    m_auiDispatch = m_DispatchBuf.Map< uint32_t >();
    m_auiDispatch[ 0 ] = uiRows / m_auiLocalDim[ 0 ];
    m_auiDispatch[ 1 ] = uiColumns / m_auiLocalDim[ 1 ];
    m_auiDispatch[ 2 ] = 1;
    m_DispatchBuf.Unmap();
#endif

    // Initialize the pipeline
    nn::gfx::ComputePipelineInfo info;
    info.SetDefault();
    info.SetShaderPtr( m_Pipeline.shaders.GetShader() );
    m_Pipeline.pipeline.Initialize( &DEMODevice, info );

    return true;
}

void
GrassSimulator::computeGrassOnTerrain(uint uiIteration)
{
    float fSqrtElements = ceil(sqrtf((float)m_uiBladeCount));

    int2 aiGridResolution((int)fSqrtElements, (int)fSqrtElements);

    float fDT = 0.01f * uiIteration;

    float2 fBladeLuminanceAlpha(m_fBladeIntensity, m_fBladeOpacity);

    float4 fFlowScaleSpeedAmount(
        m_fFlowScale,
        m_fFlowScale,
        m_fFlowSpeed,
        m_fFlowAmount);

    float4 afNoiseBiasScale(
        m_kNoiseBias.x, m_kNoiseBias.y,
        m_kNoiseScale.x, m_kNoiseScale.y);

    uint4 auiBladeCurveSegmentCounts(
        m_uiBladeCount,
        m_uiMaxElementCount,
        m_uiMaxSegmentCount,
        m_uiVertexCount);

    VariableGrassUniforms* pVUniform = m_VariableGrassUniforms.Map< VariableGrassUniforms >( );
    pVUniform->blade_curve_segment_counts = auiBladeCurveSegmentCounts;
    pVUniform->camera_position = m_kCameraPosition;
    pVUniform->camera_rotation = m_kCameraRotation;
    pVUniform->camera_view = m_kCameraView;
    pVUniform->camera_left = m_kCameraLeft;
    pVUniform->camera_up = m_kCameraUp;
    pVUniform->flow_scale_speed_amount = fFlowScaleSpeedAmount;
    pVUniform->noise_bias_scale = afNoiseBiasScale;
    pVUniform->grid_resolution = aiGridResolution;
    pVUniform->clip_range = m_kClipRange;
    pVUniform->blade_length_range = m_kBladeLengthRange;
    pVUniform->blade_thickness_range = m_kBladeThicknessRange;
    pVUniform->blade_luminance_alpha = fBladeLuminanceAlpha;
    pVUniform->output_offset_vertex = m_uiVertexOffset;
    pVUniform->output_offset_color = m_uiColorOffset;
    pVUniform->output_stride = 2; // each output_data is 1 vec4
    pVUniform->jitter_amount = m_fJitterAmount;
    pVUniform->time_delta = fDT;
    pVUniform->falloff_distance = m_fFalloff;
    pVUniform->camera_fov = m_fCameraFov;
    pVUniform->noise_amplitude = m_fNoiseAmplitude;

#ifdef CAFE
    GX2EndianSwap(pVUniform, sizeof(VariableGrassUniforms));
#endif
    m_VariableGrassUniforms.Unmap( );

    // Generate commands
    DEMOCommandBuffer.SetConstantBuffer( m_iVUniformsLoc, nn::gfx::ShaderStage_Compute, m_VariableGrassUniforms.gpuAddress, m_VariableGrassUniforms.size );
    DEMOCommandBuffer.SetConstantBuffer( m_iFUniformsLoc, nn::gfx::ShaderStage_Compute, m_FixedGrassUniforms.gpuAddress, m_FixedGrassUniforms.size );
    DEMOCommandBuffer.SetUnorderedAccessBuffer( m_iExpBufLoc, nn::gfx::ShaderStage_Compute, m_ExportBufferAddress, m_ExportBufferSize );
    DEMOCommandBuffer.SetPipeline( &m_Pipeline.pipeline );
    DEMOCommandBuffer.InvalidateMemory( nn::gfx::GpuAccess_VertexBuffer | nn::gfx::GpuAccess_ConstantBuffer );
#if NN_GFX_IS_TARGET_GX
    DEMOCommandBuffer.DispatchIndirect( m_DispatchBuf.gpuAddress );
#else
    DEMOCommandBuffer.Dispatch(
        m_uiRowCount / m_auiLocalDim[ 0 ],
        m_uiColumnCount / m_auiLocalDim[ 1 ],
        1 );
#endif
    // Flush output from GPU
    DEMOCommandBuffer.FlushMemory(nn::gfx::GpuAccess_UnorderedAccessBuffer);

#if NN_GFX_IS_TARGET_D3D
    nn::gfx::GpuAddress nullAddress;
    DEMOCommandBuffer.SetUnorderedAccessBuffer( m_iExpBufLoc, nn::gfx::ShaderStage_Compute, nullAddress, 0 );
#endif
}
