﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

uvec4 CompressRgb(vec4 texels[16])
{
    // ColorLine を推定する。
    // ColorLine は colorMin と colorMax の 2 点を通る直線。
    vec3 colorMin;
    vec3 colorMax;

    bool isTransparent = false;
    float clampedTransparentValue = 1.0;

    if (ESTIMATE_COLORLINE_WITH_PCA)
    {
        // 主成分分析で分散最大の軸を求める
        vec3 eigenVector;
        {
            // 平均 mean = E(texel) を計算
            vec3 mean = vec3(.0);
            {
                int sampleCount = 0;
                for (int i = 0; i < 16; i++)
                {
                    if (texels[i].a < clampedTransparentValue)
                    {
                        isTransparent = true;
                        continue;
                    }

                    mean += texels[i].xyz;
                    sampleCount++;
                }

                // 全てが、透明だった場合、早期に打ち切る。
                if (sampleCount == 0)
                {
                    uvec4 o_Color;
                    o_Color.r = 0;
                    o_Color.g = 1;
                    o_Color.b = 65535;
                    o_Color.a = 65535;
                    return o_Color;
                }

                mean /= float(sampleCount);
                colorMin = colorMax = mean;
            }

            // 分散共分散行列 mtx = Σ(texel) を計算
            mat3 mtx;
            {
                vec3 diag = vec3(.0);
                vec3 ndiag = vec3(.0);
                for (int i = 0; i < 16; i++)
                {
                    if (texels[i].a < clampedTransparentValue)
                    {
                        continue;
                    }

                    vec3 diff  = texels[i].xyz - mean;
                    diag  += diff.xyz * diff.xyz;
                    ndiag += diff.xyz * diff.yzx;
                }

                mtx[0][0] = diag.x;
                mtx[1][1] = diag.y;
                mtx[2][2] = diag.z;
                mtx[1][0] = mtx[0][1] = ndiag.x;
                mtx[2][1] = mtx[1][2] = ndiag.y;
                mtx[0][2] = mtx[2][0] = ndiag.z;
            }
            // 第一主成分の向き eigenVector を計算
            {
                // 初期値を選ぶ
                {
                    vec3 vx = mtx * vec3(1.0, 0.0, 0.0);
                    vec3 vy = mtx * vec3(0.0, 1.0, 0.0);
                    vec3 vz = mtx * vec3(0.0, 0.0, 1.0);
                    float maxLen2 = dot(vx, vx);
                    eigenVector = vx;
                    float len2 = dot(vy, vy);
                    if(isnan(maxLen2) || len2 > maxLen2)
                    {
                        maxLen2 = len2;
                        eigenVector = vy;
                    }
                    len2 = dot(vz, vz);
                    if(isnan(maxLen2) || len2 > maxLen2)
                    {
                        maxLen2 = len2;
                        eigenVector = vz;
                    }
                    eigenVector = normalize(eigenVector);
                }
                for (int i = 0; i < PCA_EIGENVECTOR_LOOP_COUNT; i++)
                {
                    eigenVector = normalize(mtx * eigenVector);
                }
            }
        }
        // 第一主成分が最大/最小となる色を最大色/最小色とする
        {
            float projMax = -100000000000.0;
            float projMin = 1000000000000.0;

            for (int i = 0; i < 16; i++)
            {
                if (texels[i].a < clampedTransparentValue)
                {
                    continue;
                }

                float proj = dot(texels[i].xyz, eigenVector);
                if (proj > projMax)
                {
                    projMax = proj;
                    colorMax = texels[i].xyz;
                }
                else if (proj < projMin)
                {
                    projMin = proj;
                    colorMin = texels[i].xyz;
                }
            }
        }

        // パレットの最適化
        for(int paletteLoopIndex = 0; paletteLoopIndex < PCA_PALETTE_OPTIMIZE_LOOP_COUNT; ++paletteLoopIndex)
        {
            const float initScale = 1.0;
            vec3  tempMax   = colorMax;
            vec3  tempMin   = colorMin;
            float normalMax = initScale;
            float normalMin = initScale;

            colorMax *= normalMax;
            colorMin *= normalMin;

            for( int i = 0 ; i < 16 ; ++i )
            {
                if (texels[i].a < clampedTransparentValue)
                {
                        continue;
                }

                if( length(texels[i].rgb - tempMin) < length(texels[i].rgb - tempMax) )
                {
                    colorMin  += texels[i].rgb;
                    normalMin += 1.0;
                }
                else
                {
                    colorMax  += texels[i].rgb;
                    normalMax += 1.0;
                }
            }

            if( (normalMin == initScale) || (normalMax == initScale) )
            {
                colorMin = tempMin;
                colorMax = tempMax;
                break;
            }

            colorMin /= normalMin;
            colorMax /= normalMax;
        }
    }
    else
    {
        colorMin = colorMax = texels[0].xyz;
        for(int i = 1; i < 16; i++)
        {
            colorMin = min(colorMin, texels[i].xyz);
            colorMax = max(colorMax, texels[i].xyz);
        }
    }

    // 最小二乗法でフィッティング
    if (REFINE_ESTIMATED_COLORLINE)
    {
        vec4  sum0 = vec4(0.0);
        vec4  sum1 = vec4(0.0);
        vec4  sum2 = vec4(0.0);
        vec4  sum3 = vec4(0.0);
        {
            vec3  colorLine = colorMax - colorMin;
            colorLine *= 3.0 / dot(colorLine, colorLine);

            for(int i = 0 ; i < 16 ; ++i)
            {

                if(texels[i].a < clampedTransparentValue)
                {
                    continue;
                }

        		vec4  color = vec4(texels[i].rgb, 1.0);
                float proj  = dot((color.rgb - colorMin), colorLine);

        		switch(int(clamp(round(proj), 0.0, 3.0)))
        		{
            		case 0:
                        {
                            sum0 += color;
                            break;
                        }
            		case 1:
                        {
                            sum1 += color;
                            break;
                        }
            		case 2:
                        {
                            sum2 += color;
                            break;
                        }
            		case 3:
                        {
                            sum3 += color;
                            break;
                        }
        		}
            }
        }
        {
            vec4  bd = sum0 * vec4(9,9,9,9) + sum1 * vec4(6,6,6,4) + sum2 * vec4(3,3,3,1);
            vec4  ce = sum1 * vec4(3,3,3,1) + sum2 * vec4(6,6,6,4) + sum3 * vec4(9,9,9,9);
            float a  = 2.0 * (sum1.a + sum2.a);
            float delta = a*a - bd.a * ce.a;

            if( delta != 0.0 )
            {
                colorMax = (a * bd.rgb - bd.a * ce.rgb) / delta;
                colorMin = (a * ce.rgb - ce.a * bd.rgb) / delta;
            }
        }
    }

    // 圧縮結果を計算
    uvec4 o_Color;
    {
        {
            const vec3 colorLimit = vec3(31.0, 63.0, 31.0);
            uvec3 color0 = uvec3(round(clamp(colorMax, 0.0, 1.0) * colorLimit));
            o_Color.r = (color0.x << 11) | (color0.y << 5) | color0.z;

            uvec3 color1 = uvec3(round(clamp(colorMin, 0.0, 1.0) * colorLimit));
            o_Color.g = (color1.x << 11) | (color1.y << 5) | color1.z;
        }

        if(o_Color.r == o_Color.g)
        {
            if(!isTransparent)
            {
                o_Color.ba = uvec2(0u);
                return o_Color;
            }

            uint bit = 0, shift = 0;
            for(int i = 0; i < 8; i++)
            {
                uint index = texels[i].a < clampedTransparentValue ? 3u : 0u;
                bit |= (index << shift);
                shift += 2u;
            }
            o_Color.b = bit;

            bit = 0; shift = 0;
            for(int i = 8; i < 16; i++)
            {
                uint index = texels[i].a < clampedTransparentValue ? 3u : 0u;
                bit |= (index << shift);
                shift += 2u;
            }
            o_Color.a = bit;
            return o_Color;
        }

        if (isTransparent)
        {
            if ( o_Color.r < o_Color.g )
            {
                {
                    vec3 temp = colorMax;
                    colorMax = colorMin;
                    colorMin = temp;
                }
            }
            else
            {
                // 透明が含まれる場合、 特徴色を反転させる
                uint temp = o_Color.r;
                o_Color.r = o_Color.g;
                o_Color.g = temp;
            }

            vec3 colorLine = colorMax - colorMin;
            colorLine /= dot(colorLine, colorLine);

            // BC1 order
            //   min |--0--|--2--|--1--| max
            //   3: Transparent
            uint bit = 0, shift = 0;
            for (int i = 0; i < 8; i++)
            {
                uint index;
                if (texels[i].a < clampedTransparentValue)
                {
                    index = 3u;
                }
                else
                {
                    float i_val = dot((texels[i].xyz - colorMin), colorLine);
                    if(abs(i_val - 0.5) < (1.0 / 6.0))
                    {
                        index = 2u;
                    }
                    else
                    {
                        index = uint(i_val + 0.5);
                    }
                }

                bit |= (index << shift);
                shift += 2u;
            }
            o_Color.b = bit;

            bit = 0; shift = 0;
            for (int i = 8; i < 16; i++)
            {
                uint index;
                if (texels[i].a < clampedTransparentValue)
                {
                    index = 3u;
                }
                else
                {
                    float i_val = dot((texels[i].xyz - colorMin), colorLine);
                    if(abs(i_val - 0.5) < (1.0 / 6.0))
                    {
                        index = 2u;
                    }
                    else
                    {
                        index = uint(i_val + 0.5);
                    }
                }

                bit |= (index << shift);
                shift += 2u;
            }
            o_Color.a = bit;

        }
        else
        {
            if( o_Color.r < o_Color.g )
            {
                {
                    uint temp = o_Color.r;
                    o_Color.r = o_Color.g;
                    o_Color.g = temp;
                }
                {
                    vec3 temp = colorMax;
                    colorMax = colorMin;
                    colorMin = temp;
                }
            }
            vec3 colorLine = colorMax - colorMin;
            colorLine /= dot(colorLine, colorLine);

            // BC1 order
            //   max |--0--|--2--|--3--|--1--| min
            uint bit = 0, shift = 0;
            for(int i = 0; i < 8; i++){
                float i_val = dot((texels[i].xyz - colorMin), colorLine);
                i_val = 1.0 - i_val;

                uint index;
                if(abs(i_val - 0.5) < 0.25)
                {
                    index = uint(i_val * 3.999999) + 1u;
                }
                else
                {
                    index = uint(i_val + 0.5);
                }
                bit |= (index << shift);
                shift += 2u;
            }
            o_Color.b = bit;

            bit = 0; shift = 0;
            for(int i = 8; i < 16; i++){
                float i_val = dot((texels[i].xyz - colorMin), colorLine);
                i_val = 1.0 - i_val;

                uint index;
                if(abs(i_val - 0.5) < 0.25)
                {
                    index = uint(i_val * 3.999999) + 1u;
                }
                else
                {
                    index = uint(i_val + 0.5);
                }
                bit |= (index << shift);
                shift += 2u;
            }
            o_Color.a = bit;
        }
    }
    return o_Color;
}


void main()
{
    vec4 sourceTexels[16];

    ivec2 destTextureSize = ivec2(textureSize(sTexture,mipLevel));
    ivec2 roundUpTextureSize = ((destTextureSize + 3) / 4) * 4;

    vec2 sizeRatio = vec2(roundUpTextureSize) / vec2(destTextureSize);
    vec2 fetchOffset = .5 / vec2(destTextureSize);

#if TEXTURE_DIMENSION_IS_2D_ARRAY
    vec3 fetchPosition = vec3((vTexCoord + fetchOffset) * sizeRatio, layer);
#else
    vec2 fetchPosition = vec2((vTexCoord + fetchOffset) * sizeRatio);
#endif

    sourceTexels[ 0] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(-2, -2) );
    sourceTexels[ 1] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(-1, -2) );
    sourceTexels[ 2] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2( 0, -2) );
    sourceTexels[ 3] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(+1, -2) );
    sourceTexels[ 4] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(-2, -1) );
    sourceTexels[ 5] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(-1, -1) );
    sourceTexels[ 6] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2( 0, -1) );
    sourceTexels[ 7] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(+1, -1) );
    sourceTexels[ 8] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(-2,  0) );
    sourceTexels[ 9] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(-1,  0) );
    sourceTexels[10] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2( 0,  0) );
    sourceTexels[11] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(+1,  0) );
    sourceTexels[12] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(-2, +1) );
    sourceTexels[13] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(-1, +1) );
    sourceTexels[14] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2( 0, +1) );
    sourceTexels[15] = textureLodOffset( sTexture, fetchPosition, mipLevel, ivec2(+1, +1) );

    // Srgb フォーマットの場合は、デガンマする。
#if TEXTURE_FORMAT_IS_SRGB
    for (int i = 0; i < 16; i++)
    {
        sourceTexels[i].rgb = RGBToSRGB(sourceTexels[i].rgb);
    }
#endif

    oColor = CompressRgb(sourceTexels);
}

