﻿#version 440 core
#extension GL_EXT_gpu_shader4 : enable

#ifndef ESTIMATE_COLORLINE_WITH_PCA
#define ESTIMATE_COLORLINE_WITH_PCA true  //default: true
#endif


#ifndef ESTIMATE_COLORLINE_WITH_PCA
#define ESTIMATE_COLORLINE_WITH_PCA true  //default: true
#endif

#ifndef PCA_EIGENVECTOR_LOOP_COUNT
#define PCA_EIGENVECTOR_LOOP_COUNT 8
#endif

#ifndef REFINE_ESTIMATED_COLORLINE
#define REFINE_ESTIMATED_COLORLINE true
#endif

in vec2 v_texCoord;

out uvec4 oColor;

uniform sampler2D sTexture;

uvec4 CompressRgb(vec4 texels[16])
{
    vec3 colorMin;
    vec3 colorMax;
    
    bool isTransparent = false;
    float clampedTransparentValue = 1.0;
    
    if (ESTIMATE_COLORLINE_WITH_PCA)
    {
        vec3 eigenVector;
        {
            vec3 mean = vec3(.0);
            {
                int sampleCount = 0;
                for (int i = 0; i < 16; i++)
                {
                    if (texels[i].a < clampedTransparentValue)
                    {
                        isTransparent = true;
                        continue;
                    }

                    mean += texels[i].xyz;
                    sampleCount++;
                }

                if (sampleCount == 0)
                {
                    uvec4 o_Color;
                    o_Color.r = 0;
                    o_Color.g = 1;
                    o_Color.b = 65535;
                    o_Color.a = 65535;
                    return o_Color;
                }

                mean /= float(sampleCount);
                colorMin = colorMax = mean;
            }

            mat3 mtx;
            {
                vec3 diag = vec3(.0);
                vec3 ndiag = vec3(.0);
                for (int i = 0; i < 16; i++)
                {
                    if (texels[i].a < clampedTransparentValue)
                    {
                        continue;
                    }

                    vec3 diff  = texels[i].xyz - mean;
                    diag  += diff.xyz * diff.xyz;
                    ndiag += diff.xyz * diff.yzx;
                }

                mtx[0][0] = diag.x;
                mtx[1][1] = diag.y;
                mtx[2][2] = diag.z;
                mtx[1][0] = mtx[0][1] = ndiag.x;
                mtx[2][1] = mtx[1][2] = ndiag.y;
                mtx[0][2] = mtx[2][0] = ndiag.z;
            }
            {
                {
                    vec3 vx = mtx * vec3(1.0, 0.0, 0.0);
                    vec3 vy = mtx * vec3(0.0, 1.0, 0.0);
                    vec3 vz = mtx * vec3(0.0, 0.0, 1.0);
                    float maxLen2 = dot(vx, vx);
                    eigenVector = vx;
                    float len2 = dot(vy, vy);
                    if(isnan(maxLen2) || len2 > maxLen2)
                    {
                        maxLen2 = len2;
                        eigenVector = vy;
                    }
                    len2 = dot(vz, vz);
                    if(isnan(maxLen2) || len2 > maxLen2)
                    {
                        maxLen2 = len2;
                        eigenVector = vz;
                    }
                    eigenVector = normalize(eigenVector);
                }
                for (int i = 0; i < PCA_EIGENVECTOR_LOOP_COUNT; i++)
                {
                    eigenVector = normalize(mtx * eigenVector);
                }
            }
        }
        {
            float projMax = -100000000000.0;
            float projMin = 1000000000000.0;

            for (int i = 0; i < 16; i++)
            {
                if (texels[i].a < clampedTransparentValue)
                {
                    continue;
                }

                float proj = dot(texels[i].xyz, eigenVector);
                if (proj > projMax)
                {
                    projMax = proj;
                    colorMax = texels[i].xyz;
                }
                else if (proj < projMin)
                {
                    projMin = proj;
                    colorMin = texels[i].xyz;
                }
            }
        }
    }
    else
    {
        colorMin = colorMax = texels[0].xyz;
        for(int i = 1; i < 16; i++)
        {
            colorMin = min(colorMin, texels[i].xyz);
            colorMax = max(colorMax, texels[i].xyz);
        }
    }

    if (REFINE_ESTIMATED_COLORLINE)
    {
        vec4  sum0 = vec4(0.0);
        vec4  sum1 = vec4(0.0);
        vec4  sum2 = vec4(0.0);
        vec4  sum3 = vec4(0.0);
        {
            vec3  colorLine = colorMax - colorMin;
            colorLine *= 3.0 / dot(colorLine, colorLine);

            for(int i = 0 ; i < 16 ; ++i)
            {

                if(texels[i].a < clampedTransparentValue)
                {
                    continue;
                }

        		vec4  color = vec4(texels[i].rgb, 1.0);
                float proj  = dot((color.rgb - colorMin), colorLine);

        		switch(int(clamp(round(proj), 0.0, 3.0)))
        		{
            		case 0:
                        {
                            sum0 += color;
                            break;
                        }
            		case 1:
                        {
                            sum1 += color;
                            break;
                        }
            		case 2:
                        {
                            sum2 += color;
                            break;
                        }
            		case 3:
                        {
                            sum3 += color;
                            break;
                        }
        		}
            }
        }
        {
            vec4  bd = sum0 * vec4(9,9,9,9) + sum1 * vec4(6,6,6,4) + sum2 * vec4(3,3,3,1);
            vec4  ce = sum1 * vec4(3,3,3,1) + sum2 * vec4(6,6,6,4) + sum3 * vec4(9,9,9,9);
            float a  = 2.0 * (sum1.a + sum2.a);
            float delta = a*a - bd.a * ce.a;

            if( delta != 0.0 )
            {
                colorMax = (a * bd.rgb - bd.a * ce.rgb) / delta;
                colorMin = (a * ce.rgb - ce.a * bd.rgb) / delta;
            }
        }
    }

    uvec4 o_Color;
    {
        {
            uvec3 color0 = uvec3(clamp(colorMax, 0.0, 1.0) * vec3(31.99, 63.99, 31.99));
            o_Color.r = (color0.x << 11) | (color0.y << 5) | color0.z;

            uvec3 color1 = uvec3(clamp(colorMin, 0.0, 1.0) * vec3(31.99, 63.99, 31.99));
            o_Color.g = (color1.x << 11) | (color1.y << 5) | color1.z;
        }

        if (isTransparent)
        {
            if ( o_Color.r < o_Color.g )
            {
                {
                    vec3 temp = colorMax;
                    colorMax = colorMin;
                    colorMin = temp;
                }
            }
            else
            {
                uint temp = o_Color.r;
                o_Color.r = o_Color.g;
                o_Color.g = temp;
            }

            vec3 colorLine = colorMax - colorMin;
            colorLine /= dot(colorLine, colorLine);

            // BC1 order
            //   min |--0--|--2--|--1--| max
            //   3: Transparent
            uint bit = 0, shift = 0;
            for (int i = 0; i < 8; i++)
            {
                uint index = 0;
                if (texels[i].a < clampedTransparentValue)
                {
                    index = 3;
                }
                else if (o_Color.r != o_Color.g)
                {
                    float i_val = dot((texels[i].xyz - colorMin), colorLine);

                    if (i_val >= 0.33) index = 2;
                    if (i_val >= 0.66) index = 1;
                }

                bit |= (index << shift);
                shift += 2;
            }
            o_Color.b = bit;

            bit = 0; shift = 0;
            for (int i = 8; i < 16; i++)
            {
                uint index = 0;

                if (texels[i].a < clampedTransparentValue)
                {
                    index = 3;
                }
                else if (o_Color.r != o_Color.g)
                {
                    float i_val = dot((texels[i].xyz - colorMin), colorLine);
                    if (i_val >= 0.33) index = 2;
                    if (i_val >= 0.66) index = 1;
                }

                bit |= (index << shift);
                shift += 2;
            }
            o_Color.a = bit;

        }
        else
        {
            if (o_Color.r == o_Color.g)
            {
                o_Color.b = o_Color.a = 0;
            }
            else
            {
                if( o_Color.r < o_Color.g )
                {
                    {
                        uint temp = o_Color.r;
                        o_Color.r = o_Color.g;
                        o_Color.g = temp;
                    }
                    {
                        vec3 temp = colorMax;
                        colorMax = colorMin;
                        colorMin = temp;
                    }
                }
                vec3 colorLine = colorMax - colorMin;
                colorLine /= dot(colorLine, colorLine);

                // BC1 order
                //   max |--0--|--2--|--3--|--1--| min
                uint bit = 0, shift = 0;
                for(int i = 0; i < 8; i++){
                    float i_val = dot((texels[i].xyz - colorMin), colorLine);
                    uint index = 1;
                    if(i_val >= 0.25) index = 3;
                    if(i_val >= 0.50) index = 2;
                    if(i_val >= 0.75) index = 0;
                    bit |= (index << shift);
                    shift += 2;
                }
                o_Color.b = bit;

                bit = 0; shift = 0;
                for(int i = 8; i < 16; i++){
                    float i_val = dot((texels[i].xyz - colorMin), colorLine);
                    uint index = 1;
                    if(i_val >= 0.25) index = 3;
                    if(i_val >= 0.50) index = 2;
                    if(i_val >= 0.75) index = 0;
                    bit |= (index << shift);
                    shift += 2;
                }
                o_Color.a = bit;
            }
        }
    }
    return o_Color;
}


void main()
{
    vec4 sourceTexels[16];

    ivec2 destTextureSize = ivec2(textureSize(sTexture, 0));
    ivec2 roundUpTextureSize = ((destTextureSize + 3) / 4) * 4;

    vec2 sizeRatio = vec2(roundUpTextureSize) / vec2(destTextureSize);
    vec2 fetchOffset = .5 / vec2(destTextureSize);
    
    vec2 fetchPosition = vec2((v_texCoord + fetchOffset) * sizeRatio);
    
    sourceTexels[ 0] = textureOffset( sTexture, fetchPosition, ivec2(-2, -2) );
    sourceTexels[ 1] = textureOffset( sTexture, fetchPosition, ivec2(-1, -2) );
    sourceTexels[ 2] = textureOffset( sTexture, fetchPosition, ivec2( 0, -2) );
    sourceTexels[ 3] = textureOffset( sTexture, fetchPosition, ivec2(+1, -2) );
    sourceTexels[ 4] = textureOffset( sTexture, fetchPosition, ivec2(-2, -1) );
    sourceTexels[ 5] = textureOffset( sTexture, fetchPosition, ivec2(-1, -1) );
    sourceTexels[ 6] = textureOffset( sTexture, fetchPosition, ivec2( 0, -1) );
    sourceTexels[ 7] = textureOffset( sTexture, fetchPosition, ivec2(+1, -1) );
    sourceTexels[ 8] = textureOffset( sTexture, fetchPosition, ivec2(-2,  0) );
    sourceTexels[ 9] = textureOffset( sTexture, fetchPosition, ivec2(-1,  0) );
    sourceTexels[10] = textureOffset( sTexture, fetchPosition, ivec2( 0,  0) );
    sourceTexels[11] = textureOffset( sTexture, fetchPosition, ivec2(+1,  0) );
    sourceTexels[12] = textureOffset( sTexture, fetchPosition, ivec2(-2, +1) );
    sourceTexels[13] = textureOffset( sTexture, fetchPosition, ivec2(-1, +1) );
    sourceTexels[14] = textureOffset( sTexture, fetchPosition, ivec2( 0, +1) );
    sourceTexels[15] = textureOffset( sTexture, fetchPosition, ivec2(+1, +1) );
    
    oColor = CompressRgb(sourceTexels);
}
