﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/
#extension GL_EXT_gpu_shader4 : enable

//-----------------------------------------------
// RGB チャンネル圧縮処理の設定
//-----------------------------------------------

// ColorLine の向きを主成分分析を使って推定するか
// false にした場合、RGBの各成分の最小値を集めたベクトルと最大値を集めたベクトルを
// 結んだ方向を ColorLine の向きとして使用します。
#ifndef ESTIMATE_COLORLINE_WITH_PCA
#define ESTIMATE_COLORLINE_WITH_PCA true
#endif
// ColorLine の向きを主成分分析で推定する場合、軸の向きの計算のループ数
#ifndef PCA_EIGENVECTOR_LOOP_COUNT
#define PCA_EIGENVECTOR_LOOP_COUNT 8
#endif
// // ColorLine の向きを主成分分析で推定する場合、カラーパレットの最適化を行うループ数
#ifndef PCA_PALETTE_OPTIMIZE_LOOP_COUNT
#define PCA_PALETTE_OPTIMIZE_LOOP_COUNT 1
#endif

// 推定した ColorLine の向きを改善するか
#ifndef REFINE_ESTIMATED_COLORLINE
#define REFINE_ESTIMATED_COLORLINE true
#endif

//-----------------------------------------------
// 入出力
//-----------------------------------------------

#ifdef NN_GFX_VULKAN
#define GFXUTIL_LAYOUT_LOCATION( x ) layout( location = x )
#define GFXUTIL_LAYOUT_BINDING( x ) layout( binding = x )
#else
#define GFXUTIL_LAYOUT_LOCATION( x )
#define GFXUTIL_LAYOUT_BINDING( x )
#endif

GFXUTIL_LAYOUT_LOCATION( 0 ) in vec2 vTexCoord;
GFXUTIL_LAYOUT_LOCATION( 1 ) in flat int layer;
GFXUTIL_LAYOUT_LOCATION( 2 ) in flat int mipLevel;

GFXUTIL_LAYOUT_LOCATION( 0 ) out uvec4 oColor;

#if TEXTURE_DIMENSION_IS_2D_ARRAY
GFXUTIL_LAYOUT_BINDING( 0 ) uniform sampler2DArray sTexture;
#else
GFXUTIL_LAYOUT_BINDING( 0 ) uniform sampler2D sTexture;
#endif

//-----------------------------------------------
// 圧縮処理用関数
//-----------------------------------------------

// RGB 空間から SRGB 空間へ近似式を用いて変換します。
vec3 RGBToSRGB(vec3 color)
{
    return max(vec3(0), 1.055 * pow(color, vec3(0.416666667)) - vec3(0.055));
}

// RGBチャンネルを圧縮します。 (16bit uint4)
// BC2, BC3 で利用します。
uvec4 CompressionTextureRGBChannel(vec4 texels[16])
{
    // ColorLine を推定する。
    // ColorLine は colorMin と colorMax の 2 点を通る直線。
    vec3 colorMin;
    vec3 colorMax;
    if (ESTIMATE_COLORLINE_WITH_PCA)
    {
        // 主成分分析で分散最大の軸を求める
        vec3 eigenVector;
        {
            // 平均 mean = E(texel) を計算
            vec3 mean = vec3(.0);
            {
                for (int i = 0; i < 16; i++)
                {
                    mean += texels[i].xyz;
                }
                mean /= 16.0;
                colorMin = colorMax = mean;
            }

            // 分散共分散行列 mtx = Σ(texel) を計算
            mat3 mtx;
            {
                vec3 diag = vec3(.0);
                vec3 ndiag = vec3(.0);
                for (int i = 0; i < 16; i++)
                {
                    vec3 diff  = texels[i].xyz - mean;
                    diag  += diff.xyz * diff.xyz;
                    ndiag += diff.xyz * diff.yzx;
                }

                mtx[0][0] = diag.x;
                mtx[1][1] = diag.y;
                mtx[2][2] = diag.z;
                mtx[1][0] = mtx[0][1] = ndiag.x;
                mtx[2][1] = mtx[1][2] = ndiag.y;
                mtx[0][2] = mtx[2][0] = ndiag.z;
            }
            // 第一主成分の向き eigenVector を計算
            {
                // 初期値を選ぶ
                {
                    vec3 vx = mtx * vec3(1.0, 0.0, 0.0);
                    vec3 vy = mtx * vec3(0.0, 1.0, 0.0);
                    vec3 vz = mtx * vec3(0.0, 0.0, 1.0);
                    float maxLen2 = dot(vx, vx);
                    eigenVector = vx;
                    float len2 = dot(vy, vy);
                    if(isnan(maxLen2) || len2 > maxLen2)
                    {
                        maxLen2 = len2;
                        eigenVector = vy;
                    }
                    len2 = dot(vz, vz);
                    if(isnan(maxLen2) || len2 > maxLen2)
                    {
                        maxLen2 = len2;
                        eigenVector = vz;
                    }
                    eigenVector = normalize(eigenVector);
                }
                for (int i = 0; i < PCA_EIGENVECTOR_LOOP_COUNT; i++)
                {
                    eigenVector = normalize(mtx * eigenVector);
                }
            }
        }
        // 第一主成分が最大/最小となる色を最大色/最小色とする
        {
            float projMax = -100000000000.0;
            float projMin = 1000000000000.0;

            for (int i = 0; i < 16; i++)
            {
                float proj = dot(texels[i].xyz, eigenVector);
                if (proj > projMax)
                {
                    projMax = proj;
                    colorMax = texels[i].xyz;
                }
                else if (proj < projMin)
                {
                    projMin = proj;
                    colorMin = texels[i].xyz;
                }
            }
        }

        // パレットの最適化
        for(int paletteLoopIndex = 0; paletteLoopIndex < PCA_PALETTE_OPTIMIZE_LOOP_COUNT; ++paletteLoopIndex)
        {
            const float initScale = 1.0;
            vec3  tempMax   = colorMax;
            vec3  tempMin   = colorMin;
            float normalMax = initScale;
            float normalMin = initScale;

            colorMax *= normalMax;
            colorMin *= normalMin;

            for( int i = 0 ; i < 16 ; ++i )
            {
                vec3 diffMin = texels[i].rgb - tempMin;
                vec3 diffMax = texels[i].rgb - tempMax;
                if( dot(diffMin, diffMin) < dot(diffMax,diffMax) )
                {
                    colorMin  += texels[i].rgb;
                    normalMin += 1.0;
                }
                else
                {
                    colorMax  += texels[i].rgb;
                    normalMax += 1.0;
                }
            }

            if( (normalMin == initScale) || (normalMax == initScale) )
            {
                colorMin = tempMin;
                colorMax = tempMax;
                break;
            }

            colorMin /= normalMin;
            colorMax /= normalMax;
        }
    }
    else
    {
        colorMin = colorMax = texels[0].xyz;
        for(int i = 1; i < 16; i++)
        {
            colorMin = min(colorMin, texels[i].xyz);
            colorMax = max(colorMax, texels[i].xyz);
        }
    }

    // 最小二乗法でフィッティング
    if (REFINE_ESTIMATED_COLORLINE)
    {
        vec4  sum0 = vec4(0.0);
        vec4  sum1 = vec4(0.0);
        vec4  sum2 = vec4(0.0);
        vec4  sum3 = vec4(0.0);
        {
            vec3  colorLine = colorMax - colorMin;
            colorLine *= 3.0 / dot(colorLine, colorLine);

            for(int i = 0 ; i < 16 ; ++i)
            {
        		vec4  color = vec4(texels[i].rgb, 1.0);
                float proj  = dot((color.rgb - colorMin), colorLine);

        		switch(int(clamp(round(proj), 0.0, 3.0)))
        		{
            		case 0:
                        {
                            sum0 += color;
                            break;
                        }
            		case 1:
                        {
                            sum1 += color;
                            break;
                        }
            		case 2:
                        {
                            sum2 += color;
                            break;
                        }
            		case 3:
                        {
                            sum3 += color;
                            break;
                        }
        		}
            }
        }
        {
            vec4  bd = sum0 * vec4(9,9,9,9) + sum1 * vec4(6,6,6,4) + sum2 * vec4(3,3,3,1);
            vec4  ce = sum1 * vec4(3,3,3,1) + sum2 * vec4(6,6,6,4) + sum3 * vec4(9,9,9,9);
            float a  = 2.0 * (sum1.a + sum2.a);
            float delta = a*a - bd.a * ce.a;

            if( delta != 0.0 )
            {
                colorMax = (a * bd.rgb - bd.a * ce.rgb) / delta;
                colorMin = (a * ce.rgb - ce.a * bd.rgb) / delta;
            }
        }
    }

    // 圧縮結果を計算
    uvec4 o_Color;
    {
        {
            const vec3 colorLimit = vec3(31.0, 63.0, 31.0);
            uvec3 color0 = uvec3(round(clamp(colorMax, 0.0, 1.0) * colorLimit));
            o_Color.r = (color0.x << 11) | (color0.y << 5) | color0.z;

            uvec3 color1 = uvec3(round(clamp(colorMin, 0.0, 1.0) * colorLimit));
            o_Color.g = (color1.x << 11) | (color1.y << 5) | color1.z;
        }

        if(o_Color.r == o_Color.g)
        {
            o_Color.b = o_Color.a = 0;
            return o_Color;
        }

        if( o_Color.r < o_Color.g )
        {
            {
                uint temp = o_Color.r;
                o_Color.r = o_Color.g;
                o_Color.g = temp;
            }
            {
                vec3 temp = colorMax;
                colorMax = colorMin;
                colorMin = temp;
            }
        }

        vec3 colorLine = colorMax - colorMin;
        colorLine /= dot(colorLine, colorLine);

        // BC1 order
        //   max |--0--|--2--|--3--|--1--| min
        uint bit = 0, shift = 0;
        for(int i = 0; i < 8; i++){
            float i_val = dot((texels[i].xyz - colorMin), colorLine);
            i_val = 1.0 - i_val;

            uint index;
            if(abs(i_val - 0.5) < 0.25)
            {
                index = uint(i_val * 3.999999) + 1u;
            }
            else
            {
                index = uint(i_val + 0.5);
            }
            bit |= (index << shift);
            shift += 2u;
        }
        o_Color.b = bit;

        bit = 0; shift = 0;
        for(int i = 8; i < 16; i++){
            float i_val = dot((texels[i].xyz - colorMin), colorLine);
            i_val = 1.0 - i_val;

            uint index;
            if(abs(i_val - 0.5) < 0.25)
            {
                index = uint(i_val * 3.999999) + 1u;
            }
            else
            {
                index = uint(i_val + 0.5);
            }
            bit |= (index << shift);
            shift += 2u;
        }
        o_Color.a = bit;
    }
    return o_Color;
}

// 1つのチャンネルを圧縮します。(16bit uint4)
// BC3, BC4, BC5 で利用します。
uvec4 CompresionTextureSingleChannel(vec4 texels[16], int channelIndex)
{
    uvec4 outColor;

    // 最大・最小色を求める
    float minValue, maxValue;
    {
        minValue = maxValue = texels[0][channelIndex];
        for (int i = 1; i < 16; i++)
        {
            minValue = min(minValue, texels[i][channelIndex]);
            maxValue = max(maxValue, texels[i][channelIndex]);
        }
    }

#if TEXTURE_FORMAT_IS_SNORM
    const float clampedMin = -1.0;
    bool isClampModeEnabled = (minValue <= clampedMin || maxValue >= 1.0);
    uint color0 = int ( round(minValue * 127.0) );
    uint color1 = int ( round(maxValue * 127.0) );
#else // UNORM
    const float clampedMin = .0;
    bool isClampModeEnabled = (minValue <= clampedMin || maxValue >= 1.0);
    uint color0 = uint ( round(minValue * 255.0) );
    uint color1 = uint ( round(maxValue * 255.0) );
#endif

    if(isClampModeEnabled)
    {
        outColor.r = ((color1 << 8u) & 0xFF00u) | (color0 & 0xFFu);
    }
    else
    {
        outColor.r = ((color0 << 8u) & 0xFF00u) | (color1 & 0xFFu);
    }

    // 全て同じ色の場合は早期打ち切り
    if (minValue >= maxValue)
    {
        outColor.g = outColor.b = outColor.a = 0;
        return outColor;
    }

    float colorLine = maxValue - minValue;
    float colorLineSq = colorLine * colorLine;
    colorLine /= colorLineSq;
    uint upperBit, lowerBit, shift, index;

    if (isClampModeEnabled)
    {
        //  index order
        //  min |--0--|--2--|--3--|--4--|--5--|--1--| max
        //  6: 0 or -1
        //  7: 1

        upperBit = shift = 0u;
        for (int i = 0; i < 8; ++i)
        {
            if (texels[i][channelIndex] == clampedMin) index = 6;
            else if (texels[i][channelIndex] == 1.0) index = 7;
            else
            {
                float diff = (texels[i][channelIndex] - minValue) * colorLine;
                if ( abs(diff - .5) > .333) index = uint ( diff * 1.999 );
                else index = uint (diff * 5.9999) + 1u;
            }
            upperBit |= index << shift;
            shift += 3u;
        }

        lowerBit = shift = 0u;
        for (int i = 8; i < 16; ++i)
        {
            if (texels[i][channelIndex] == clampedMin) index = 6;
            else if (texels[i][channelIndex] == 1.0) index = 7;
            else
            {
                float diff = (texels[i][channelIndex] - minValue) * colorLine;
                if ( abs(diff - .5) > .333) index = uint ( diff * 1.999 );
                else index = uint (diff * 5.9999) + 1u;
            }
            lowerBit |= index << shift;
            shift += 3u;
        }
    }
    else
    {
        //  index order
        //  max |--0--|--2--|--3--|--4--|--5--|--6--|--7--|--1--| min

        upperBit = shift = 0u;
        for (int i = 0; i < 8; ++i)
        {
            float diff = (texels[i][channelIndex] - minValue) * colorLine;
            diff = 1.0 - diff;
            if (abs(diff - .5) > .375) index = uint (diff * 1.999);
            else index = uint (diff * 7.99999) + 1u;

            upperBit |= index << shift;
            shift += 3u;
        }

        lowerBit = shift = 0u;
        for (int i = 8; i < 16; ++i)
        {
            float diff = (texels[i][channelIndex] - minValue) * colorLine;
            diff = 1.0 - diff;
            if (abs(diff - .5) > .375) index = uint (diff * 1.999);
            else index = uint (diff * 7.99999) + 1u;

            lowerBit |= index << shift;
            shift += 3u;
        }
    }
    outColor.g = upperBit & 0xFFFFu;
    outColor.b = (upperBit >> 16u) | ((lowerBit << 8u) & 0xFF00u);
    outColor.a = ((lowerBit >> 8u) & 0xFFFFu);
    return outColor;
}

#define TEXTURE_COMPRESSION_BC1 (0)
#define TEXTURE_COMPRESSION_BC2 (1)
#define TEXTURE_COMPRESSION_BC3 (2)
#define TEXTURE_COMPRESSION_BC4 (3)
#define TEXTURE_COMPRESSION_BC5 (4)

// バリエーションにより、インクルードするファイルが変化します
#if TEXTURE_COMPRESSION_TYPE == TEXTURE_COMPRESSION_BC1
    #include "TextureCompression_Bc1_PixelShader.glsl"

#elif TEXTURE_COMPRESSION_TYPE == TEXTURE_COMPRESSION_BC2
    #include "TextureCompression_Bc2_PixelShader.glsl"

#elif TEXTURE_COMPRESSION_TYPE == TEXTURE_COMPRESSION_BC3
    #include "TextureCompression_Bc3_PixelShader.glsl"

#elif TEXTURE_COMPRESSION_TYPE == TEXTURE_COMPRESSION_BC4
    #include "TextureCompression_Bc4_PixelShader.glsl"

#elif TEXTURE_COMPRESSION_TYPE == TEXTURE_COMPRESSION_BC5
    #include "TextureCompression_Bc5_PixelShader.glsl"
#endif
