﻿// --------------------------------------------------------------------------------
// <copyright>
// Copyright (C)Nintendo. All rights reserved.
//
// These coded instructions, statements, and computer programs contain proprietary
// information of Nintendo and/or its licensed developers and are protected by
// national and international copyright laws. They may not be disclosed to third
// parties or copied or duplicated in any form, in whole or in part, without the
// prior written consent of Nintendo.
//
// The content herein is highly confidential and should be handled accordingly.
// </copyright>
// --------------------------------------------------------------------------------

//
// This example shows how Compute can be used to create a procedural field of
// grass on a generated terrain model which is then rendered with GX2.
// Because we share buffers, the data we produce is consumed in-place by
// rendering, thus eliminating the API overhead of creating and submitting the
// vertices from the host.
//
// All geometry is generated on the GPU, and outputted into a shared buffer.
// A page of grass is computed on the surface of the terrain as bezier patches,
// and flow noise is applied to the angle of the blades to simulate wind.
// Multiple instances of grass are rendered at jittered offsets to add more
// grass coverage without having to compute new pages.
//
// Finally, a physically based sky shader (via GX2) is applied to the
// background to provide an environment for the grass.
//
////////////////////////////////////////////////////////////////////////////////////////////////////

#version 330

layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;

#ifndef M_PI
#define M_PI        3.14159265358979323846264338327950288   /* pi */
#endif
#define DEG_TO_RAD             ((float)(M_PI / 180.0))
#define RADIANS(x)             (radians((x)))

#define ONE_F1                 (1.0f)
#define ZERO_F1                (0.0f)

layout(std140) uniform fixed_grass {
    ivec4 P[512];  // Bug 9397, should just be int
    vec4  G[64];   // Bug 9397, should just be int
};

layout(std140) uniform variable_grass {
    uvec4 blade_curve_segment_counts;
    vec4 camera_position;
    vec4 camera_rotation;
    vec4 camera_view;
    vec4 camera_left;
    vec4 camera_up;
    vec4 flow_scale_speed_amount;
    vec4 noise_bias_scale;
    ivec2 grid_resolution;
    vec2 clip_range;
    vec2 blade_length_range;
    vec2 blade_thickness_range;
    vec2 blade_luminance_alpha;
    uint output_offset_vertex;
    uint output_offset_color;
    float jitter_amount;
    float time_delta;
    float falloff_distance;
    float camera_fov;
    float noise_amplitude;
};

buffer attribute_output {
    vec4 output_data[4096];
};

////////////////////////////////////////////////////////////////////////////////////////////////////

const vec4 ZERO_F4 = vec4(0.0f, 0.0f, 0.0f, 0.0f);
const vec4 ONE_F4  = vec4(1.0f, 1.0f, 1.0f, 1.0f);

////////////////////////////////////////////////////////////////////////////////////////////////////

const int P_MASK = 255;
const int P_SIZE = 256;

const int G_MASK = 15;
const int G_SIZE = 16;
const int G_VECSIZE = 4;

////////////////////////////////////////////////////////////////////////////////////////////////////

int mod(int x, int a)
{
    int n = (x / a);
    int v = x - n * a;
    if (v < 0)
        v += a;
    return v;
}

// smooth is a GLSL built-in
float smoothie(float t)
{
    return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
}

vec4 normalized(vec4 v)
{
    float d = sqrt(v.x * v.x + v.y * v.y + v.z * v.z);
    d = d > 0.0f ? d : 1.0f;

    vec4 result = vec4(v.x, v.y, v.z, 0.0f);
    result /= d;
    result.w = 1.0f;
    return result;
}

////////////////////////////////////////////////////////////////////////////////////////////////////

int lattice1d(int i)
{
    return P[i].x;   // Bug 9397, should just be int
}

int lattice2d(ivec2 i)
{
    return P[i.x + P[i.y].x].x;   // Bug 9397, should just be int
}

int lattice3d(ivec4 i)
{
    return P[i.x + P[i.y + P[i.z].x].x].x;   // Bug 9397, should just be int
}

////////////////////////////////////////////////////////////////////////////////////////////////////

float gradient1d(int i, float v)
{
    int index = (lattice1d(i) & G_MASK) * G_VECSIZE;
    float g = G[index + 0].x;
    return (v * g);
}

float gradient2d(ivec2 i, vec2 v)
{
    int index = (lattice2d(i) & G_MASK) * G_VECSIZE;
    vec2 g = vec2(G[index + 0].x, G[index + 1].x);
    return dot(v, g);
}

float gradient3d(ivec4 i, vec4 v)
{
    int index = (lattice3d(i) & G_MASK) * G_VECSIZE;
    vec4 g = vec4(G[index + 0].x, G[index + 1].x, G[index + 2].x, 1.0f);
    return dot(v, g);
}

vec2 rotated_gradient2d(ivec2 i, vec2 v, vec2 r)
{
    int index = (lattice2d(i) & G_MASK) * G_VECSIZE;
    vec2 g = vec2(G[index + 0].x, G[index + 1].x);
    g.x = r.x * g.x - r.y * g.y;
    g.y = r.y * g.x + r.x * g.y;
    return g;
}

float dot_rotated_gradient2d(ivec2 i, vec2 v, vec2 r)
{
    int index = (lattice2d(i) & G_MASK) * G_VECSIZE;
    vec2 g = vec2(G[index + 0].x, G[index + 1].x);
    g.x = r.x * g.x - r.y * g.y;
    g.y = r.y * g.x + r.x * g.y;
    return dot(v, g);
}

////////////////////////////////////////////////////////////////////////////////////////////////////


// Unsigned cell noise 1d  (+0.0f -> +1.0f)
float CellNoise1dfu(float position)
{
    float p = position;
    float pf = floor(p);
    int ip = int(pf);
    float fp = p - pf;
    ip &= P_MASK;

    return (lattice1d(ip) * (1.0f / (P_SIZE - 1)));
}

// Signed cell noise 1d (-1.0 -> +1.0f)
float CellNoise1dfs(float position)
{
    return 2.0f * CellNoise1dfu(position) - 1.0f;
}

// Unsigned cell noise 2d  (+0.0f -> +1.0f)
float CellNoise2dfu(vec2 position)
{
    vec2 p = position;
    vec2 pf = floor(p);
    ivec2 ip = ivec2(int(pf.x), int(pf.y));
    vec2 fp = p - pf;
    ip &= P_MASK;

    return (lattice2d(ip) * (1.0f / (P_SIZE - 1)));
}

// Signed cell noise 2d (-1.0 -> +1.0f)
float CellNoise2dfs(vec2 position)
{
    return 2.0f * CellNoise2dfu(position) - 1.0f;
}

// Unsigned cell noise 3d (+0.0f -> +1.0f)
float CellNoise3dfu(vec4 position)
{
    vec4 p = position;
    vec4 pf = floor(p);
    ivec4 ip = ivec4(int(pf.x), int(pf.y), int(pf.z), 0);
    vec4 fp = p - pf;
    ip &= P_MASK;

    return (lattice3d(ip) * (1.0f / (P_SIZE - 1)));
}

// Signed cell noise 2d (-1.0 -> +1.0f)
float CellNoise3dfs(vec4 position)
{
    return 2.0f * CellNoise3dfu(position) - 1.0f;
}

////////////////////////////////////////////////////////////////////////////////////////////////////

// Signed gradient noise 1d (-1.0 -> +1.0f)
float GradientNoise1dfs(float position)
{
    float p = position;
    float pf = floor(p);
    int ip = int(pf);
    float fp = p - pf;
    ip &= P_MASK;

    float n0 = gradient1d(ip + 0, fp - 0.0f);
    float n1 = gradient1d(ip + 1, fp - 1.0f);

    float n = mix(n0, n1, smoothie(fp));
    return n * (1.0f / 0.7f);
}

// Unsigned Gradient Noise 1d
float GradientNoise1dfu(float position)
{
    return (0.5f - 0.5f * GradientNoise1dfs(position));
}

// Signed gradient noise 2d (-1.0 -> +1.0f)
float GradientNoise2dfs(vec2 position)
{
    vec2 p = position;
    vec2 pf = floor(p);
    ivec2 ip = ivec2(int(pf.x), int(pf.y));
    vec2 fp = p - pf;
    ip &= P_MASK;

    const ivec2 I00 = ivec2(0, 0);
    const ivec2 I01 = ivec2(0, 1);
    const ivec2 I10 = ivec2(1, 0);
    const ivec2 I11 = ivec2(1, 1);

    const vec2 F00 = vec2(0.0f, 0.0f);
    const vec2 F01 = vec2(0.0f, 1.0f);
    const vec2 F10 = vec2(1.0f, 0.0f);
    const vec2 F11 = vec2(1.0f, 1.0f);

    float n00 = gradient2d(ip + I00, fp - F00);
    float n10 = gradient2d(ip + I10, fp - F10);
    float n01 = gradient2d(ip + I01, fp - F01);
    float n11 = gradient2d(ip + I11, fp - F11);

    vec2 n0001 = vec2(n00, n01);
    vec2 n1011 = vec2(n10, n11);

    vec2 n2 = mix(n0001, n1011, smoothie(fp.x));
    float n = mix(n2.x, n2.y, smoothie(fp.y));
    return n * (1.0f / 0.7f);
}

// Unsigned Gradient Noise 2d
float GradientNoise2dfu(vec2 position)
{
    return (0.5f - 0.5f * GradientNoise2dfs(position));
}

// Signed gradient noise 3d (-1.0 -> +1.0f)
float GradientNoise3dfs(vec4 position)
{
    vec4 p = position;
    vec4 pf = floor(p);
    ivec4 ip = ivec4(int(pf.x), int(pf.y), int(pf.z), 0);
    vec4 fp = p - pf;
    ip &= P_MASK;

    ivec4 I000 = ivec4(0, 0, 0, 0);
    ivec4 I001 = ivec4(0, 0, 1, 0);
    ivec4 I010 = ivec4(0, 1, 0, 0);
    ivec4 I011 = ivec4(0, 1, 1, 0);
    ivec4 I100 = ivec4(1, 0, 0, 0);
    ivec4 I101 = ivec4(1, 0, 1, 0);
    ivec4 I110 = ivec4(1, 1, 0, 0);
    ivec4 I111 = ivec4(1, 1, 1, 0);

    vec4 F000 = vec4(0.0f, 0.0f, 0.0f, 0.0f);
    vec4 F001 = vec4(0.0f, 0.0f, 1.0f, 0.0f);
    vec4 F010 = vec4(0.0f, 1.0f, 0.0f, 0.0f);
    vec4 F011 = vec4(0.0f, 1.0f, 1.0f, 0.0f);
    vec4 F100 = vec4(1.0f, 0.0f, 0.0f, 0.0f);
    vec4 F101 = vec4(1.0f, 0.0f, 1.0f, 0.0f);
    vec4 F110 = vec4(1.0f, 1.0f, 0.0f, 0.0f);
    vec4 F111 = vec4(1.0f, 1.0f, 1.0f, 0.0f);

    float n000 = gradient3d(ip + I000, fp - F000);
    float n001 = gradient3d(ip + I001, fp - F001);

    float n010 = gradient3d(ip + I010, fp - F010);
    float n011 = gradient3d(ip + I011, fp - F011);

    float n100 = gradient3d(ip + I100, fp - F100);
    float n101 = gradient3d(ip + I101, fp - F101);

    float n110 = gradient3d(ip + I110, fp - F110);
    float n111 = gradient3d(ip + I111, fp - F111);

    vec4 n40 = vec4(n000, n001, n010, n011);
    vec4 n41 = vec4(n100, n101, n110, n111);

    vec4 n4 = mix(n40, n41, smoothie(fp.x));
    vec2 n2 = mix(n4.xy, n4.zw, smoothie(fp.y));
    float n = mix(n2.x, n2.y, smoothie(fp.z));
    return n * (1.0f / 0.7f);
}

// Unsigned Gradient Noise 3d
float GradientNoise3dfu(vec4 position)
{
    return (0.5f - 0.5f * GradientNoise3dfs(position));
}

////////////////////////////////////////////////////////////////////////////////////////////////////

float RotatedGradientNoise2dfs(vec2 position, float angle)
{
    vec2 p = position;
    vec2 pf = floor(p);
    ivec2 ip = ivec2(int(pf.x), int(pf.y));
    vec2 fp = p - pf;
    ip &= P_MASK;

    float r = radians(angle);
    vec2 rg = vec2(sin(r), cos(r));

    const ivec2 I00 = ivec2(0, 0);
    const ivec2 I01 = ivec2(0, 1);
    const ivec2 I10 = ivec2(1, 0);
    const ivec2 I11 = ivec2(1, 1);

    const vec2 F00 = vec2(0.0f, 0.0f);
    const vec2 F01 = vec2(0.0f, 1.0f);
    const vec2 F10 = vec2(1.0f, 0.0f);
    const vec2 F11 = vec2(1.0f, 1.0f);

    float n00 = dot_rotated_gradient2d(ip + I00, fp - F00, rg);
    float n10 = dot_rotated_gradient2d(ip + I10, fp - F10, rg);
    float n01 = dot_rotated_gradient2d(ip + I01, fp - F01, rg);
    float n11 = dot_rotated_gradient2d(ip + I11, fp - F11, rg);

    vec2 n0001 = vec2(n00, n01);
    vec2 n1011 = vec2(n10, n11);

    vec2 n2 = mix(n0001, n1011, smoothie(fp.x));
    float n = mix(n2.x, n2.y, smoothie(fp.y));
    return n * (1.0f / 0.7f);
}

vec4
RotatedSimplexNoise2dfs(vec2 position, float angle)
{
    vec2 p = position;
    float r = radians(angle);
    vec2 rg = vec2(sin(r), cos(r));

    const float F2 = 0.366025403f; // 0.5*(sqrt(3.0)-1.0)
    const float G2 = 0.211324865f; // (3.0-Math.sqrt(3.0))/6.0
    const float G22 = 2.0f * G2;

    const vec2 FF = vec2(F2, F2);
    const vec2 GG = vec2(G2, G2);
    const vec2 GG2 = vec2(G22, G22);

   	const vec2 F00 = vec2(0.0f, 0.0f);
   	const vec2 F01 = vec2(0.0f, 1.0f);
   	const vec2 F10 = vec2(1.0f, 0.0f);
   	const vec2 F11 = vec2(1.0f, 1.0f);

    const ivec2 I00 = ivec2(0, 0);
    const ivec2 I01 = ivec2(0, 1);
    const ivec2 I10 = ivec2(1, 0);
    const ivec2 I11 = ivec2(1, 1);

    float s = (p.x + p.y) * F2;
    vec2 ps = vec2(p.x + s, p.y + s);
    vec2 pf = floor(ps);
    ivec2 ip = ivec2(int(pf.x), int(pf.y));
    ip &= ivec2(P_MASK, P_MASK);

    float t = (pf.x + pf.y) * G2;
    vec2 tt = vec2(t, t);
    vec2 tf = pf - tt;
    vec2 fp = p - tf;

    vec2 p0 = fp;
    ivec2 i1 = (p0.x > p0.y) ? (I10) : (I01);
    vec2 f1 = (p0.x > p0.y) ? (F10) : (F01);

    vec2 p1 = p0 - f1 + GG;
    vec2 p2 = p0 - F11 + GG2;

    float t0 = 0.5f - p0.x * p0.x - p0.y * p0.y;
    float t1 = 0.5f - p1.x * p1.x - p1.y * p1.y;
    float t2 = 0.5f - p2.x * p2.x - p2.y * p2.y;

    vec2 g0 = F00;
    vec2 g1 = F00;
    vec2 g2 = F00;

    float n0 = 0.0f;
    float n1 = 0.0f;
    float n2 = 0.0f;

    float t20 = 0.0f;
    float t40 = 0.0f;
    float t21 = 0.0f;
    float t41 = 0.0f;
    float t22 = 0.0f;
    float t42 = 0.0f;

    if (t0 >= 0.0f)
    {
        g0 = rotated_gradient2d(ip + I00, p0, rg);
        t20 = t0 * t0;
        t40 = t20 * t20;
        n0 = t40 * dot(p0, g0);
    }

    if (t1 >= 0.0f)
    {
        g1 = rotated_gradient2d(ip + i1, p1, rg);
        t21 = t1 * t1;
        t41 = t21 * t21;
        n1 = t41 * dot(p1, g1);
    }

    if (t2 >= 0.0f)
    {
        g2 = rotated_gradient2d(ip + I11, p2, rg);
        t22 = t2 * t2;
        t42 = t22 * t22;
        n2 = t42 * dot(p2, g2);
    }

    float noise = 40.0f * (n0 + n1 + n2);

    vec2 dn = p0 * t20 * t0 * dot(p0, g0);
    dn += p1 * t21 * t1 * dot(p1, g1);
    dn += p2 * t22 * t2 * dot(p2, g2);
    dn *= -8.0f;
    dn += t40 * g0 + t41 * g1 + t42 * g2;
    dn *= 40.0f;

    return vec4(noise, dn.x, dn.y, 1.0f);
}

////////////////////////////////////////////////////////////////////////////////////////////////////

vec4
GetGridPosition(
    vec4 position,
    vec4 origin,
    vec4 cell_size)
{
    vec4 grid_position;
    grid_position.x = floor((position.x - origin.x) / cell_size.x);
    grid_position.y = floor((position.y - origin.y) / cell_size.y);
    grid_position.z = floor((position.z - origin.z) / cell_size.z);
    grid_position.w = 0;
    return grid_position;
}

vec2
GetGridCoordinates(uint index, ivec2 size)
{
    vec2 coord;
    coord.x = index % uint(size.x);
    index /= uint(size.x);
    coord.y = index % uint(size.y);
    return coord;
}

vec4
GetPageCoordinates(vec2 position)
{
    vec2 p = position;
    vec2 pf = floor(p);
    ivec2 ip = ivec2(int(pf.x), int(pf.y));
    vec2 fp = p - pf;
    return vec4(float(ip.x), float(ip.y), fp.x, fp.y);
}

vec2
Bezier2d(
    float p, float b, vec2 ba, vec2 bb, vec2 bc, vec2 bd)
{
    float p2 = p * p;
    float p3 = p2 * p;

    float b2 = b * b;
    float b3 = b2 * b;

    vec2 bezier;
    bezier  = ba * (b3);
    bezier += bb * (p * b2);
    bezier += bc * (p2 * b);
    bezier += bd * (p3);
    return bezier;
}

vec4
ComputeBladeVertex(
    vec2 bezier, vec4 pos, vec4 dir)
{
    vec4 vertex = pos + dir * bezier.x;
    vertex.y = bezier.y;
    vertex.w = 1.0f;
    return (vertex);
}

vec4
ComputeBladeColor(
    vec4 base, vec4 value, vec4 mult, vec4 offset,
    vec2 luminance_alpha)
{
    vec4 c = value * mult + offset + base;
    c *= luminance_alpha.x;
    c.w = 1.0f; // luminance_alpha.y;
    return c;
}

float
ComputeNoiseSample(
    vec2 uv,
    vec4 noise_bias_scale,
    float noise_amplitude)
{
    vec2 noise_bias = noise_bias_scale.xy;
    vec2 noise_scale = noise_bias_scale.zw;

    vec2 p = (uv * noise_scale) + noise_bias;
    float noise_sample = GradientNoise2dfs(p) * noise_amplitude;
    return noise_sample;
}

float
ComputeFlowNoiseSample(
    vec2 uv,
    float time_delta,
    vec4 field_range,
    vec4 noise_bias_scale,
    float noise_amplitude)
{
    vec2 noise_bias = noise_bias_scale.xy;
    vec2 noise_scale = noise_bias_scale.zw;

    vec2 time_bias = vec2(time_delta, time_delta);
    vec2 field_bias = field_range.xy + time_bias;
    vec2 field_size = field_range.zw;

    vec2 p = (uv + field_bias) * 0.010f; // * noise_scale;
    float flow_sample = RotatedGradientNoise2dfs(p, time_delta) * noise_amplitude;
    return flow_sample;
}

float
ComputeBladeLength(
    float noise_sample,
    vec2 blade_length_range)
{
    float min_length = blade_length_range.x;
    float max_length = blade_length_range.y;
    float blade_length = mix(min_length, max_length, (noise_sample));
    return blade_length;
}

float
ComputeBladeThickness(
    float noise_sample,
    vec2 blade_thickness_range)
{
    float min_thickness = blade_thickness_range.x;
    float max_thickness = blade_thickness_range.y;
    float blade_thickness = mix(min_thickness, max_thickness, (noise_sample));
    return blade_thickness;
}

vec2
ComputeBladeAngleTilt(
    vec2 uv,
    float time_delta,
    float blade_angle,
    float blade_length,
    vec4 flow_scale_speed_amount)
{
    vec2 vp = vec2(uv.x, uv.y + time_delta);

    float flow_amount = flow_scale_speed_amount.w;

    float blade_tilt = blade_angle * blade_length  * flow_amount * (1.0f / 5.0f);

    return vec2(blade_angle, blade_tilt);
}

vec4
ComputeBladeOrientation(
    vec4 camera_position,
    vec4 blade_position,
    float eye_distance)
{
    vec4 view_delta = (camera_position - blade_position) * (1.0f / eye_distance);
    vec4 blade_up = vec4(0.0f, 1.0f, 0.0f, 1.0f);
    vec4 blade_orientation = normalize(vec4(cross(view_delta.xyz,  blade_up.xyz), 0.0f));
    return blade_orientation;
}

float
ComputeEyeDistance(
    vec4 eye_position,
    vec4 blade_position)
{
    vec4 view_delta = (eye_position - blade_position);
    float distance = sqrt(dot(view_delta, view_delta));
    return distance;
}


float
ComputeFalloff(
    float eye_distance,
    float max_distance,
    float falloff_distance)
{
    max_distance *= falloff_distance;
    float falloff = clamp(max_distance - eye_distance, 0.0f, max_distance) * (1.0f / max_distance);
    float f3 = falloff * falloff * falloff;
    return f3 * f3;
}

uint
CreateBezierPatch(
    uint vertex_index,
    float curve_detail,
    vec2 uv,
    vec4 blade_position,
    vec4 blade_orientation,
    float blade_length,
    float blade_thickness,
    vec2 blade_angle_tilt)
{
    uint blade_count = blade_curve_segment_counts.x;
    float max_elements = blade_curve_segment_counts.y;
    float max_segments = blade_curve_segment_counts.z;
    uint max_vertex_count = uint(max_elements * max_segments * blade_count);

    float goffset = blade_angle_tilt.y / 100.0f;

    const vec4 gbase = vec4((10.0f / 256.0f), (20.0f / 256.0f), 0.0f, 0.0f);
    const vec4 gvalue = vec4((110.0f / 256.0f), (120.0f / 256.0f), (50.0f / 256.0f), 0.0f);

    float curve_segments = mix(2.0f, max_segments, curve_detail);
    float curve_delta = (1.0f / curve_segments);

    float thickness_delta = blade_thickness / blade_length;
    float thickness_scale = 0.1f;

    float k = 0.0f;
    uint element_count = 0u;
    uint segment_count = 0u;
    uint element_vertices = 0u;

    float bxt = blade_angle_tilt.y;
    float bx = 0.0f;

    for (float p = 0.0f; p <= 1.0f && vertex_index < (max_vertex_count - 4u); p += curve_delta)
    {
        float ck = k;
        float gt = (ck < (blade_thickness * 0.5f)) ? (blade_thickness - ck) : ck;

        vec2 ba = 1.0f * vec2(bxt, blade_position.y + blade_length);
        vec2 bb = 3.0f * vec2(bx + (ck * 0.040f), blade_position.y + blade_length * 0.75f);
        vec2 bc = 3.0f * vec2(bx + (ck * 0.045f), blade_position.y + blade_length * 0.25f);
        vec2 bd = 1.0f * vec2(bx + (ck * 0.030f), blade_position.y);

        float cp = p;
        float cb = 1.0f - cp;
        float gradient = cb * thickness_scale;
        vec2 bezier = Bezier2d(cp, cb, ba, bb, bc, bd);
        vec4 vertex = ComputeBladeVertex(bezier, blade_position, blade_orientation);
        vec4 color = ComputeBladeColor(gbase, gvalue, vec4(gt * gradient), vec4(goffset * cb), blade_luminance_alpha);

        output_data[output_offset_vertex + vertex_index] = vertex;
        output_data[output_offset_color + vertex_index++] = color;

        cp = p + curve_delta;
        cb = 1.0f - cp;
        gradient = cb * thickness_scale;
        bezier = Bezier2d(cp, cb, ba, bb, bc, bd);
        vertex = ComputeBladeVertex(bezier, blade_position, blade_orientation);
        color = ComputeBladeColor(gbase, gvalue, vec4(gt * gradient), vec4(goffset * cb), blade_luminance_alpha);

        output_data[output_offset_vertex + vertex_index] = vertex;
        output_data[output_offset_color + vertex_index++] = color;

        ck = (k + thickness_delta);
        gt = (ck < (blade_thickness * 0.5f)) ? (blade_thickness - ck) : ck;

        ba = 1.0f * vec2(bxt, blade_position.y + blade_length);
        bb = 3.0f * vec2(bx + (ck * 0.040f), blade_position.y + blade_length * 0.75f);
        bc = 3.0f * vec2(bx + (ck * 0.045f), blade_position.y + blade_length * 0.25f);
        bd = 1.0f * vec2(bx + (ck * 0.030f), blade_position.y);

        cp = p + curve_delta;
        cb = 1.0f - cp;
        gradient = cb * thickness_scale;
        bezier = Bezier2d(cp, cb, ba, bb, bc, bd);
        vertex = ComputeBladeVertex(bezier, blade_position, blade_orientation);
        color = ComputeBladeColor(gbase, gvalue, vec4(gt * gradient), vec4(goffset * cb), blade_luminance_alpha);

        output_data[output_offset_vertex + vertex_index] = vertex;
        output_data[output_offset_color + vertex_index++] = color;

        cp = p;
        cb = 1.0f - cp;
        gradient = cb * thickness_scale;
        bezier = Bezier2d(cp, cb, ba, bb, bc, bd);
        vertex = ComputeBladeVertex(bezier, blade_position, blade_orientation);
        color = ComputeBladeColor(gbase, gvalue, vec4(gt * gradient), vec4(goffset * cb), blade_luminance_alpha);

        output_data[output_offset_vertex + vertex_index] = vertex;
        output_data[output_offset_color + vertex_index++] = color;
        element_vertices += 4u;
    }

    return element_vertices;
}

//////////////////////////////////////////////////////////////////////////////

vec4
ComputeRadialGridPosition(
    vec2 uv,
    vec2 vt,
    vec2 ve,
    vec4 camera_position,
    vec4 camera_view)
{
    float extend = 360.0 - camera_fov;
    float angle = RADIANS(camera_fov + extend) * uv.y;

    float a0 = 0.0f; //clip_range.x;
    float a1 = clip_range.y - clip_range.x;

    float x2 = vt.x * vt.x;
    float fr = a0 + a1 * x2;
    float fx = fr * cos(angle);
    float fy = fr * sin(angle);

    vec4 view = vec4(0.0f, 0.0f, fy, 1.0f);
    vec4 left = vec4(fx, 0.0f, 0.0f, 1.0f);

    vec4 position = view + left;
    position.y = 0.0f;
    position.w = 1.0f;
    return position;
}

void main()
{
    uint tx = gl_GlobalInvocationID.x;
    uint ty = gl_GlobalInvocationID.y;
    uint sx = gl_NumWorkGroups.x * gl_WorkGroupSize.x;
    uint sy = gl_NumWorkGroups.y * gl_WorkGroupSize.y;
    uint index = ty * sx + tx;

    uint blade_count = blade_curve_segment_counts.x;
    float max_elements = blade_curve_segment_counts.y;
    float max_segments = blade_curve_segment_counts.z;
    if (index >= blade_count)
        return;

    uint vertex_index = uint(index * max_elements * max_segments);

    vec2 vt = GetGridCoordinates(index, grid_resolution);
    vec2 vs = vec2(1.0f / float(grid_resolution.x), 1.0f / float(grid_resolution.y));
    vec2 ve = vec2(float(grid_resolution.x), float(grid_resolution.y));
    vec2 uv = vt * vs;

    vec2 jitter_amplitude = vec2(jitter_amount, jitter_amount);
    vec4 jitter_bias_scale = vec4(0.0f, 0.0f, 10.0f, 10.0f);
    float clip_distance = length(clip_range);

    float frequency = 0.0025f;
    float amplitude = 70.00f;
    float phase = 1.0f;

    // These operations could be done on the CPU as it writes the uniforms...
    vec4 lcl_camera_position = camera_position;
    vec4 lcl_camera_view = camera_view;
    lcl_camera_position.y = 0.0f;
    lcl_camera_position -= (lcl_camera_position - floor(lcl_camera_position * 0.1f));
    lcl_camera_view -= (lcl_camera_view - floor(lcl_camera_view));

    vec4 position = ComputeRadialGridPosition(uv, vt, ve, lcl_camera_position, lcl_camera_view);

    vec4 bias = vec4(phase, 0.0f, phase, 0.0f);
    vec4 sampleVar = position + bias;
    vec4 noise = RotatedSimplexNoise2dfs(sampleVar.xz * frequency, 35.0f);
    float displacement = noise.x;

    vec4 normal = vec4(0.0f, 1.0f, 0.0f, 1.0f);
    vec4 blade_position = sampleVar + (amplitude * displacement * normal);
    blade_position.w = 1.0f;

    vec4 left = vec4(1.0f, 0.0f, 0.0f, 1.0f);
    vec4 up = vec4(0.0f, 1.0f, 0.0f, 1.0f);
    vec4 view = vec4(0.0f, 0.0f, 1.0f, 1.0f);

    vec4 rdnoise = RotatedSimplexNoise2dfs(sampleVar.xz, uv.x * uv.y) * jitter_amount;
    blade_position = blade_position + rdnoise.y * left + rdnoise.z * view - abs(rdnoise.x) * up;

    vec2 np = blade_position.xz;
    vec4 field_range = noise_bias_scale;
    field_range.xy = noise_bias_scale.xy;

    vec2 field_bias = field_range.xy;
    vec2 field_size = field_range.zw;
    float field_length = length(field_size);

    float dist_falloff = uv.x;
    float noise_sample = ComputeNoiseSample(np, noise_bias_scale, noise_amplitude);
    float eye_distance = ComputeEyeDistance(lcl_camera_position, blade_position);

    vec4 flow_bias_scale = noise_bias_scale;
    flow_bias_scale.zw = flow_scale_speed_amount.xy;
    float flow_speed = flow_scale_speed_amount.z;
    float blade_angle = ComputeNoiseSample(np, noise_bias_scale, 0.1f);
    blade_angle += ComputeFlowNoiseSample(np, time_delta * flow_speed, field_range, flow_bias_scale, noise_amplitude);

    eye_distance = ComputeEyeDistance(lcl_camera_position, blade_position);
    float falloff = ComputeFalloff(eye_distance, field_length, falloff_distance);
    float blade_detail = smoothie(falloff);

    float blade_length = ComputeBladeLength(noise_sample, blade_length_range);
    float blade_thickness = ComputeBladeThickness(noise_sample, blade_thickness_range);
    vec2 blade_angle_tilt = ComputeBladeAngleTilt(uv, time_delta, blade_angle, blade_length, flow_scale_speed_amount);
    vec4 blade_orientation = ComputeBladeOrientation(lcl_camera_position, blade_position, eye_distance);

    uint element_vertices = CreateBezierPatch(vertex_index,
                                              blade_detail, uv,
                                              blade_position,
                                              blade_orientation,
                                              blade_length,
                                              blade_thickness,
                                              blade_angle_tilt);
}
