﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include "stdafx.h"
#include "DecodeInternal.h"

namespace TexUtils
{
namespace
{

const unsigned char Table0F[0x10] =
{
    (unsigned char)(float(0xFF) * float(0x00) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x01) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x02) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x03) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x04) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x05) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x06) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x07) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x08) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x09) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x0A) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x0B) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x0C) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x0D) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x0E) / float(0x0F)),
    (unsigned char)(float(0xFF) * float(0x0F) / float(0x0F)),
};

const unsigned char Table1F[0x20] =
{
    (unsigned char)(float(0xFF) * float(0x00) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x01) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x02) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x03) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x04) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x05) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x06) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x07) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x08) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x09) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x0A) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x0B) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x0C) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x0D) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x0E) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x0F) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x10) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x11) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x12) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x13) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x14) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x15) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x16) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x17) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x18) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x19) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x1A) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x1B) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x1C) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x1D) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x1E) / float(0x1F)),
    (unsigned char)(float(0xFF) * float(0x1F) / float(0x1F)),
};

const unsigned char Table3F[0x40] =
{
    (unsigned char)(float(0xFF) * float(0x00) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x01) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x02) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x03) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x04) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x05) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x06) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x07) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x08) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x09) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x0A) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x0B) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x0C) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x0D) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x0E) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x0F) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x10) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x11) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x12) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x13) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x14) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x15) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x16) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x17) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x18) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x19) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x1A) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x1B) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x1C) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x1D) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x1E) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x1F) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x20) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x21) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x22) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x23) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x24) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x25) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x26) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x27) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x28) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x29) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x2A) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x2B) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x2C) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x2D) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x2E) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x2F) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x30) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x31) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x32) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x33) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x34) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x35) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x36) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x37) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x38) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x39) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x3A) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x3B) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x3C) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x3D) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x3E) / float(0x3F)),
    (unsigned char)(float(0xFF) * float(0x3F) / float(0x3F)),
};

template<bool, typename T>	inline T Select(T trueValue, T falseValue);
template<>					inline unsigned int Select<true>(unsigned int trueValue, unsigned int falseValue){	return trueValue;	}
template<>					inline unsigned int Select<false>(unsigned int trueValue, unsigned int falseValue){	return falseValue;	}

template<bool HasSign, unsigned int ExpSize, unsigned int FraSize>
inline float ToFloat32(unsigned int src)
{
    // 注意：NaN,無限大は無視してます。

    // ■浮動小数点数
    // http://ja.wikipedia.org/wiki/%E6%B5%AE%E5%8B%95%E5%B0%8F%E6%95%B0%E7%82%B9%E6%95%B0

    // ■11 ビットおよび 10 ビット浮動小数点の規則
    // http://msdn.microsoft.com/ja-jp/library/cc308050(v=vs.85)

    enum
    {
        // IEEE 754 単精度浮動小数点
        IeeeExpSize = 8,
        IeeeFlaSize = 23,
        IeeeBias    = (1 << (IeeeExpSize - 1)) - 1,

        // 入力値
        SrcSgnMask = 1 << (ExpSize + FraSize),
        SrcExpMask = ((1 << ExpSize) - 1) << FraSize,
        SrcFraMask = (1 << FraSize) - 1,
        SrcBias    = (1 << (ExpSize - 1)) - 1
    };

    //
    const unsigned int srcSgn = (src & SrcSgnMask) >> (ExpSize + FraSize);
    const unsigned int srcExp = (src & SrcExpMask) >> FraSize;
    const unsigned int srcFra = (src & SrcFraMask) >> 0;

    if ((srcExp == 0) && (srcFra == 0))
    {
        return 0.0f;
    }
    else
    {
        const unsigned int ansSgn = Select<HasSign, unsigned int>(srcSgn, 0UL);
        const unsigned int ansExp = (srcExp - SrcBias + IeeeBias) & ((1 << IeeeExpSize) - 1);

        unsigned int ansFra = 0;
        {
            int fraDiff = FraSize - IeeeFlaSize;
            if (fraDiff > 0)
            {
                ansFra = (srcFra >> (+fraDiff)) & ((1 << IeeeFlaSize) - 1);
            }
            else if (fraDiff < 0)
            {
                ansFra = (srcFra << (-fraDiff)) & ((1 << IeeeFlaSize) - 1);
            }
            else
            {
                ansFra = srcFra & ((1 << IeeeFlaSize) - 1);
            }
        }

        unsigned int ans =
            (ansSgn << (IeeeExpSize + IeeeFlaSize)) |
            (ansExp << (IeeeFlaSize              )) |
            (ansFra << (0                        ));

        return reinterpret_cast<float &>(ans);
    }
}

inline float Float16ToFloat32(unsigned int src)
{
    return ToFloat32<true, 5, 10>(src);
}

inline float Float11ToFloat32(unsigned int src)
{
    return ToFloat32<false, 5, 6>(src);
}

inline float Float10ToFloat32(unsigned int src)
{
    return ToFloat32<false, 5, 5>(src);
}

}	// namespace

void Decode_unorm_8(Byte *dstByteColor, const Byte *src, int size)
{
    for(int i = 0, dstByteColorPtr = 0;i != size;++ i, dstByteColorPtr += 4)
    {
        dstByteColor[dstByteColorPtr + 0] = 0x00;
        dstByteColor[dstByteColorPtr + 1] = 0x00;
        dstByteColor[dstByteColorPtr + 2] = src[i];
//		dstByteColor[dstByteColorPtr + 3] = 0x00;
        dstByteColor[dstByteColorPtr + 3] = 0xFF;
    }
}

void Decode_snorm_8(Byte *dstByteColor, const Byte *src, int size)
{
    const signed char *signedSrc = reinterpret_cast<const signed char *>(src);

    for(int i = 0, dstByteColorPtr = 0;i != size;++ i, dstByteColorPtr += 4)
    {
        dstByteColor[dstByteColorPtr + 0] = 0x00;
        dstByteColor[dstByteColorPtr + 1] = 0x00;
        dstByteColor[dstByteColorPtr + 2] = (unsigned char)std::max<signed char>(signedSrc[i], 0) * 2;
//		dstByteColor[dstByteColorPtr + 3] = 0x00;
        dstByteColor[dstByteColorPtr + 3] = 0xFF;
    }
}

void Decode_unorm_4_4(Byte *dstByteColor, const Byte *src, int size)
{
    for(int i = 0, dstByteColorPtr = 0;i != size;++ i, dstByteColorPtr += 4)
    {
        dstByteColor[dstByteColorPtr + 0] = 0x00;
        dstByteColor[dstByteColorPtr + 1] = Table0F[(src[i] >> (4    )) & 0x0F];
        dstByteColor[dstByteColorPtr + 2] = Table0F[(src[i] >> (0    )) & 0x0F];
//		dstByteColor[dstByteColorPtr + 3] = 0x00;
        dstByteColor[dstByteColorPtr + 3] = 0xFF;
    }
}

void Decode_unorm_8_8(Byte *dstByteColor, const Byte *src, int size)
{
    for(int i = 0, dstByteColorPtr = 0;i != size;i += 2, dstByteColorPtr += 4)
    {
        dstByteColor[dstByteColorPtr + 0] = 0x00;
        dstByteColor[dstByteColorPtr + 1] = src[i + 1];
        dstByteColor[dstByteColorPtr + 2] = src[i + 0];
//		dstByteColor[dstByteColorPtr + 3] = 0x00;
        dstByteColor[dstByteColorPtr + 3] = 0xFF;
    }
}

void Decode_snorm_8_8(Byte *dstByteColor, const Byte *src, int size, bool isHintNormal)
{
    const signed char *signedSrc = reinterpret_cast<const signed char *>(src);

    for(int i = 0, dstByteColorPtr = 0;i != size;i += 2, dstByteColorPtr += 4)
    {
        int sr = signedSrc[i + 0] + 0x80;
        int sg = signedSrc[i + 1] + 0x80;

        if (isHintNormal)
        {
            float r = sr / 255.0f;
            float g = sg / 255.0f;
            float b = std::sqrtf(1.0f - std::min(r * r + g * g, 1.0f));

            dstByteColor[dstByteColorPtr + 0] = 128 + int(127.0f * b);
            dstByteColor[dstByteColorPtr + 1] = 128 + int(127.0f * g);
            dstByteColor[dstByteColorPtr + 2] = 128 + int(127.0f * r);
//			dstByteColor[dstByteColorPtr + 3] = 0x00;
            dstByteColor[dstByteColorPtr + 3] = 0xFF;
        }
        else
        {
            dstByteColor[dstByteColorPtr + 0] = 0x00;
            dstByteColor[dstByteColorPtr + 1] = (unsigned char)std::max<signed char>(signedSrc[i + 1], 0) * 2;
            dstByteColor[dstByteColorPtr + 2] = (unsigned char)std::max<signed char>(signedSrc[i + 0], 0) * 2;
//			dstByteColor[dstByteColorPtr + 3] = 0x00;
            dstByteColor[dstByteColorPtr + 3] = 0xFF;
        }
    }
}

void Decode_unorm_5_6_5(Byte *dstByteColor, const Byte *src, int size)
{
    for(int i = 0, dstByteColorPtr = 0;i != size;i += 2, dstByteColorPtr += 4)
    {
        unsigned int srcColor = (src[i + 1] << 8) | (src[i + 0] << 0);

        dstByteColor[dstByteColorPtr + 0] = Table1F[(srcColor >> (5+6)) & 0x1F];
        dstByteColor[dstByteColorPtr + 1] = Table3F[(srcColor >> (5  )) & 0x3F];
        dstByteColor[dstByteColorPtr + 2] = Table1F[(srcColor >> (0  )) & 0x1F];
//		dstByteColor[dstByteColorPtr + 3] = 0x00;
        dstByteColor[dstByteColorPtr + 3] = 0xFF;
    }
}

void Decode_unorm_5_5_5_1(Byte *dstByteColor, const Byte *src, int size)
{
    for(int i = 0, dstByteColorPtr = 0;i != size;i += 2, dstByteColorPtr += 4)
    {
        unsigned int srcColor = (src[i + 1] << 8) | (src[i + 0] << 0);

        dstByteColor[dstByteColorPtr + 0] = Table1F[(srcColor >> (5+5)) & 0x1F];
        dstByteColor[dstByteColorPtr + 1] = Table1F[(srcColor >> (5  )) & 0x1F];
        dstByteColor[dstByteColorPtr + 2] = Table1F[(srcColor >> (0  )) & 0x1F];
        dstByteColor[dstByteColorPtr + 3] = (unsigned char)(0xFF * ((srcColor >> (5+5+5)) & 0x01));
    }
}

void Decode_unorm_4_4_4_4(Byte *dstByteColor, const Byte *src, int size)
{
    for(int i = 0, dstByteColorPtr = 0;i != size;i += 2, dstByteColorPtr += 4)
    {
        unsigned int srcColor = (src[i + 1] << 8) | (src[i + 0] << 0);

        dstByteColor[dstByteColorPtr + 0] = Table0F[(srcColor >> (4+4  )) & 0x0F];
        dstByteColor[dstByteColorPtr + 1] = Table0F[(srcColor >> (4    )) & 0x0F];
        dstByteColor[dstByteColorPtr + 2] = Table0F[(srcColor >> (0    )) & 0x0F];
        dstByteColor[dstByteColorPtr + 3] = Table0F[(srcColor >> (4+4+4)) & 0x0F];
    }
}

void Decode_unorm_8_8_8_8(Byte *dstByteColor, const Byte *src, int size)
{
    for(int i = 0, dstByteColorPtr = 0;i != size;i += 4, dstByteColorPtr += 4)
    {
        dstByteColor[dstByteColorPtr + 0] = src[i + 2];
        dstByteColor[dstByteColorPtr + 1] = src[i + 1];
        dstByteColor[dstByteColorPtr + 2] = src[i + 0];
        dstByteColor[dstByteColorPtr + 3] = src[i + 3];
    }
}

void Decode_snorm_8_8_8_8(Byte *dstByteColor, const Byte *src, int size)
{
    const signed char *signedSrc = reinterpret_cast<const signed char *>(src);

    for(int i = 0, dstByteColorPtr = 0;i != size;i += 4, dstByteColorPtr += 4)
    {
        dstByteColor[dstByteColorPtr + 0] = (unsigned char)std::max<signed char>(signedSrc[i + 2], 0) * 2;
        dstByteColor[dstByteColorPtr + 1] = (unsigned char)std::max<signed char>(signedSrc[i + 1], 0) * 2;
        dstByteColor[dstByteColorPtr + 2] = (unsigned char)std::max<signed char>(signedSrc[i + 0], 0) * 2;
        dstByteColor[dstByteColorPtr + 3] = (unsigned char)std::max<signed char>(signedSrc[i + 3], 0) * 2;
    }
}

void Decode_float_16(float *dstFloatColor, const Byte *src, int size)
{
    const unsigned short *shortSrc = reinterpret_cast<const unsigned short *>(src);

    for(int i = 0, dstFloatColorPtr = 0;i != size / 2;i += 1, dstFloatColorPtr += 4, shortSrc += 1)
    {
        float r = Float16ToFloat32(*(shortSrc + 0));

        dstFloatColor[dstFloatColorPtr + 0] = 0.0f;
        dstFloatColor[dstFloatColorPtr + 1] = 0.0f;
        dstFloatColor[dstFloatColorPtr + 2] = r;
//		dstFloatColor[dstFloatColorPtr + 3] = 0.0f;
        dstFloatColor[dstFloatColorPtr + 3] = 1.0f;
    }
}

void Decode_float_32(float *dstFloatColor, const Byte *src, int size)
{
    const float *float32Src = reinterpret_cast<const float *>(src);

    for(int i = 0, dstFloatColorPtr = 0;i != size / 4;i += 1, dstFloatColorPtr += 4)
    {
        float r = float32Src[i];

        dstFloatColor[dstFloatColorPtr + 0] = 0.0f;
        dstFloatColor[dstFloatColorPtr + 1] = 0.0f;
        dstFloatColor[dstFloatColorPtr + 2] = r;
//		dstFloatColor[dstFloatColorPtr + 3] = 0.0f;
        dstFloatColor[dstFloatColorPtr + 3] = 1.0f;
    }
}

void Decode_float_16_16(float *dstFloatColor, const Byte *src, int size)
{
    const unsigned short *shortSrc = reinterpret_cast<const unsigned short *>(src);

    for(int i = 0, dstFloatColorPtr = 0;i != size / 2;i += 2, dstFloatColorPtr += 4, shortSrc += 2)
    {
        float r = Float16ToFloat32(*(shortSrc + 0));
        float g = Float16ToFloat32(*(shortSrc + 1));

        dstFloatColor[dstFloatColorPtr + 0] = 0.0f;
        dstFloatColor[dstFloatColorPtr + 1] = g;
        dstFloatColor[dstFloatColorPtr + 2] = r;
//		dstFloatColor[dstFloatColorPtr + 3] = 0.0f;
        dstFloatColor[dstFloatColorPtr + 3] = 1.0f;
    }
}

void Decode_float_32_32(float *dstFloatColor, const Byte *src, int size)
{
    const float *float32Src = reinterpret_cast<const float *>(src);

    for(int i = 0, dstFloatColorPtr = 0;i != size / 4;i += 2, dstFloatColorPtr += 4)
    {
        float r = float32Src[i + 0];
        float g = float32Src[i + 1];

        dstFloatColor[dstFloatColorPtr + 0] = 0.0f;
        dstFloatColor[dstFloatColorPtr + 1] = g;
        dstFloatColor[dstFloatColorPtr + 2] = r;
//		dstFloatColor[dstFloatColorPtr + 3] = 0.0f;
        dstFloatColor[dstFloatColorPtr + 3] = 1.0f;
    }
}

void Decode_float_11_11_10(float *dstFloatColor, const Byte *src, int size)
{
    const unsigned int *intSrc = reinterpret_cast<const unsigned int *>(src);

    for(int i = 0, dstFloatColorPtr = 0;i != size / 4;i += 1, dstFloatColorPtr += 4, intSrc += 1)
    {
        float b = Float10ToFloat32((*intSrc >> (11 + 11)) & ((1<<10) - 1));
        float g = Float11ToFloat32((*intSrc >> (11     )) & ((1<<11) - 1));
        float r = Float11ToFloat32((*intSrc >> ( 0     )) & ((1<<11) - 1));

        dstFloatColor[dstFloatColorPtr + 0] = b;
        dstFloatColor[dstFloatColorPtr + 1] = g;
        dstFloatColor[dstFloatColorPtr + 2] = r;
//		dstFloatColor[dstFloatColorPtr + 3] = 0.0f;
        dstFloatColor[dstFloatColorPtr + 3] = 1.0f;
    }
}

void Decode_float_16_16_16_16(float *dstFloatColor, const Byte *src, int size)
{
    const unsigned short *shortSrc = reinterpret_cast<const unsigned short *>(src);

    for(int i = 0, dstFloatColorPtr = 0;i != size / 4;i += 2, dstFloatColorPtr += 4, shortSrc += 4)
    {
        float r = Float16ToFloat32(*(shortSrc + 0));
        float g = Float16ToFloat32(*(shortSrc + 1));
        float b = Float16ToFloat32(*(shortSrc + 2));
        float a = Float16ToFloat32(*(shortSrc + 3));

        dstFloatColor[dstFloatColorPtr + 0] = b;
        dstFloatColor[dstFloatColorPtr + 1] = g;
        dstFloatColor[dstFloatColorPtr + 2] = r;
        dstFloatColor[dstFloatColorPtr + 3] = a;
    }
}

void Decode_float_32_32_32_32(float *dstFloatColor, const Byte *src, int size)
{
    const float *float32Src = reinterpret_cast<const float *>(src);

    for(int i = 0, dstFloatColorPtr = 0;i != size / 4;i += 4, dstFloatColorPtr += 4)
    {
        float r = float32Src[i + 0];
        float g = float32Src[i + 1];
        float b = float32Src[i + 2];
        float a = float32Src[i + 3];

        dstFloatColor[dstFloatColorPtr + 0] = b;
        dstFloatColor[dstFloatColorPtr + 1] = g;
        dstFloatColor[dstFloatColorPtr + 2] = r;
        dstFloatColor[dstFloatColorPtr + 3] = a;
    }
}

namespace
{

struct RGBA
{
    unsigned char r_;
    unsigned char g_;
    unsigned char b_;
    unsigned char a_;
};

struct IntRGBA
{
    int r_;
    int g_;
    int b_;
    int a_;
};

}	// namespace RGBA

void Decode_unorm_bc1(unsigned char *dst, const Byte *src, int x, int y, int dstStride)
{
    assert(((x & 3) == 0) && ((y & 3) == 0));

    RGBA srcRgba[4];
    {
        unsigned int color0 = (src[1] << 8) | (src[0] << 0);
        unsigned int color1 = (src[3] << 8) | (src[2] << 0);

        srcRgba[0].r_ = Table1F[(color0 >> (5+6)) & 0x1F];
        srcRgba[0].g_ = Table3F[(color0 >> (5  )) & 0x3F];
        srcRgba[0].b_ = Table1F[(color0 >> (0  )) & 0x1F];
        srcRgba[0].a_ = 0xFF;

        srcRgba[1].r_ = Table1F[(color1 >> (5+6)) & 0x1F];
        srcRgba[1].g_ = Table3F[(color1 >> (5  )) & 0x3F];
        srcRgba[1].b_ = Table1F[(color1 >> (0  )) & 0x1F];
        srcRgba[1].a_ = 0xFF;

        if (color0 > color1)
        {
            srcRgba[2].r_ = (unsigned char)((srcRgba[0].r_ * 2 + srcRgba[1].r_) / 3);
            srcRgba[2].g_ = (unsigned char)((srcRgba[0].g_ * 2 + srcRgba[1].g_) / 3);
            srcRgba[2].b_ = (unsigned char)((srcRgba[0].b_ * 2 + srcRgba[1].b_) / 3);
            srcRgba[2].a_ = 0xFF;

            srcRgba[3].r_ = (unsigned char)((srcRgba[0].r_ + srcRgba[1].r_ * 2) / 3);
            srcRgba[3].g_ = (unsigned char)((srcRgba[0].g_ + srcRgba[1].g_ * 2) / 3);
            srcRgba[3].b_ = (unsigned char)((srcRgba[0].b_ + srcRgba[1].b_ * 2) / 3);
            srcRgba[3].a_ = 0xFF;
        }
        else
        {
            srcRgba[2].r_ = (unsigned char)((srcRgba[0].r_ + srcRgba[1].r_) / 2);
            srcRgba[2].g_ = (unsigned char)((srcRgba[0].g_ + srcRgba[1].g_) / 2);
            srcRgba[2].b_ = (unsigned char)((srcRgba[0].b_ + srcRgba[1].b_) / 2);
            srcRgba[2].a_ = 0xFF;

            srcRgba[3].r_ = 0x00;
            srcRgba[3].g_ = 0x00;
            srcRgba[3].b_ = 0x00;
            srcRgba[3].a_ = 0x00;
        }
    }

    {
        RGBA *dstRgba = reinterpret_cast<RGBA *>(dst + x * sizeof(RGBA) + y * dstStride);

        int colorRgbaStride = dstStride / sizeof(RGBA);

        dstRgba[colorRgbaStride * 0 + 0] = srcRgba[(src[4] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 0 + 1] = srcRgba[(src[4] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 0 + 2] = srcRgba[(src[4] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 0 + 3] = srcRgba[(src[4] >> (2*3)) & 3];
        dstRgba[colorRgbaStride * 1 + 0] = srcRgba[(src[5] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 1 + 1] = srcRgba[(src[5] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 1 + 2] = srcRgba[(src[5] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 1 + 3] = srcRgba[(src[5] >> (2*3)) & 3];
        dstRgba[colorRgbaStride * 2 + 0] = srcRgba[(src[6] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 2 + 1] = srcRgba[(src[6] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 2 + 2] = srcRgba[(src[6] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 2 + 3] = srcRgba[(src[6] >> (2*3)) & 3];
        dstRgba[colorRgbaStride * 3 + 0] = srcRgba[(src[7] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 3 + 1] = srcRgba[(src[7] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 3 + 2] = srcRgba[(src[7] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 3 + 3] = srcRgba[(src[7] >> (2*3)) & 3];
    }
}

void Decode_unorm_bc2(unsigned char *dst, const Byte *src, int x, int y, int dstStride)
{
    assert(((x & 3) == 0) && ((y & 3) == 0));

    RGBA srcRgba[4];
    unsigned char srcAlpha[4*4];
    {
        unsigned int alpha0 = (src[ 1] << 8) | (src[ 0] << 0);
        unsigned int alpha1 = (src[ 3] << 8) | (src[ 2] << 0);
        unsigned int alpha2 = (src[ 5] << 8) | (src[ 4] << 0);
        unsigned int alpha3 = (src[ 7] << 8) | (src[ 6] << 0);
        unsigned int color0 = (src[ 9] << 8) | (src[ 8] << 0);
        unsigned int color1 = (src[11] << 8) | (src[10] << 0);

        srcAlpha[ 0] = Table0F[(alpha0 >> (0*4)) & 0x0F];
        srcAlpha[ 1] = Table0F[(alpha0 >> (1*4)) & 0x0F];
        srcAlpha[ 2] = Table0F[(alpha0 >> (2*4)) & 0x0F];
        srcAlpha[ 3] = Table0F[(alpha0 >> (3*4)) & 0x0F];
        srcAlpha[ 4] = Table0F[(alpha1 >> (0*4)) & 0x0F];
        srcAlpha[ 5] = Table0F[(alpha1 >> (1*4)) & 0x0F];
        srcAlpha[ 6] = Table0F[(alpha1 >> (2*4)) & 0x0F];
        srcAlpha[ 7] = Table0F[(alpha1 >> (3*4)) & 0x0F];
        srcAlpha[ 8] = Table0F[(alpha2 >> (0*4)) & 0x0F];
        srcAlpha[ 9] = Table0F[(alpha2 >> (1*4)) & 0x0F];
        srcAlpha[10] = Table0F[(alpha2 >> (2*4)) & 0x0F];
        srcAlpha[11] = Table0F[(alpha2 >> (3*4)) & 0x0F];
        srcAlpha[12] = Table0F[(alpha3 >> (0*4)) & 0x0F];
        srcAlpha[13] = Table0F[(alpha3 >> (1*4)) & 0x0F];
        srcAlpha[14] = Table0F[(alpha3 >> (2*4)) & 0x0F];
        srcAlpha[15] = Table0F[(alpha3 >> (3*4)) & 0x0F];

        srcRgba[0].r_ = Table1F[(color0 >> (5+6)) & 0x1F];
        srcRgba[0].g_ = Table3F[(color0 >> (5  )) & 0x3F];
        srcRgba[0].b_ = Table1F[(color0 >> (0  )) & 0x1F];

        srcRgba[1].r_ = Table1F[(color1 >> (5+6)) & 0x1F];
        srcRgba[1].g_ = Table3F[(color1 >> (5  )) & 0x3F];
        srcRgba[1].b_ = Table1F[(color1 >> (0  )) & 0x1F];

        srcRgba[2].r_ = (unsigned char)((srcRgba[0].r_ * 2 + srcRgba[1].r_) / 3);
        srcRgba[2].g_ = (unsigned char)((srcRgba[0].g_ * 2 + srcRgba[1].g_) / 3);
        srcRgba[2].b_ = (unsigned char)((srcRgba[0].b_ * 2 + srcRgba[1].b_) / 3);

        srcRgba[3].r_ = (unsigned char)((srcRgba[0].r_ + srcRgba[1].r_ * 2) / 3);
        srcRgba[3].g_ = (unsigned char)((srcRgba[0].g_ + srcRgba[1].g_ * 2) / 3);
        srcRgba[3].b_ = (unsigned char)((srcRgba[0].b_ + srcRgba[1].b_ * 2) / 3);
    }

    {
        srcRgba[0].r_ = srcRgba[0].r_;
        srcRgba[0].g_ = srcRgba[0].g_;
        srcRgba[0].b_ = srcRgba[0].b_;
        srcRgba[1].r_ = srcRgba[1].r_;
        srcRgba[1].g_ = srcRgba[1].g_;
        srcRgba[1].b_ = srcRgba[1].b_;
        srcRgba[2].r_ = srcRgba[2].r_;
        srcRgba[2].g_ = srcRgba[2].g_;
        srcRgba[2].b_ = srcRgba[2].b_;
        srcRgba[3].r_ = srcRgba[3].r_;
        srcRgba[3].g_ = srcRgba[3].g_;
        srcRgba[3].b_ = srcRgba[3].b_;

        RGBA *dstRgba = reinterpret_cast<RGBA *>(dst + x * sizeof(RGBA) + y * dstStride);
        int colorRgbaStride = dstStride / sizeof(RGBA);

        dstRgba[colorRgbaStride * 0 + 0] = srcRgba[(src[12] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 0 + 1] = srcRgba[(src[12] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 0 + 2] = srcRgba[(src[12] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 0 + 3] = srcRgba[(src[12] >> (2*3)) & 3];
        dstRgba[colorRgbaStride * 1 + 0] = srcRgba[(src[13] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 1 + 1] = srcRgba[(src[13] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 1 + 2] = srcRgba[(src[13] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 1 + 3] = srcRgba[(src[13] >> (2*3)) & 3];
        dstRgba[colorRgbaStride * 2 + 0] = srcRgba[(src[14] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 2 + 1] = srcRgba[(src[14] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 2 + 2] = srcRgba[(src[14] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 2 + 3] = srcRgba[(src[14] >> (2*3)) & 3];
        dstRgba[colorRgbaStride * 3 + 0] = srcRgba[(src[15] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 3 + 1] = srcRgba[(src[15] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 3 + 2] = srcRgba[(src[15] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 3 + 3] = srcRgba[(src[15] >> (2*3)) & 3];

        dstRgba[colorRgbaStride * 0 + 0].a_ = srcAlpha[ 0];
        dstRgba[colorRgbaStride * 0 + 1].a_ = srcAlpha[ 1];
        dstRgba[colorRgbaStride * 0 + 2].a_ = srcAlpha[ 2];
        dstRgba[colorRgbaStride * 0 + 3].a_ = srcAlpha[ 3];
        dstRgba[colorRgbaStride * 1 + 0].a_ = srcAlpha[ 4];
        dstRgba[colorRgbaStride * 1 + 1].a_ = srcAlpha[ 5];
        dstRgba[colorRgbaStride * 1 + 2].a_ = srcAlpha[ 6];
        dstRgba[colorRgbaStride * 1 + 3].a_ = srcAlpha[ 7];
        dstRgba[colorRgbaStride * 2 + 0].a_ = srcAlpha[ 8];
        dstRgba[colorRgbaStride * 2 + 1].a_ = srcAlpha[ 9];
        dstRgba[colorRgbaStride * 2 + 2].a_ = srcAlpha[10];
        dstRgba[colorRgbaStride * 2 + 3].a_ = srcAlpha[11];
        dstRgba[colorRgbaStride * 3 + 0].a_ = srcAlpha[12];
        dstRgba[colorRgbaStride * 3 + 1].a_ = srcAlpha[13];
        dstRgba[colorRgbaStride * 3 + 2].a_ = srcAlpha[14];
        dstRgba[colorRgbaStride * 3 + 3].a_ = srcAlpha[15];
    }
}

void Decode_unorm_bc3(unsigned char *dst, const Byte *src, int x, int y, int dstStride)
{
    assert(((x & 3) == 0) && ((y & 3) == 0));

    RGBA srcRgba[4];
    unsigned char srcAlpha[4*4];
    {
        srcAlpha[0] = src[ 0];
        srcAlpha[1] = src[ 1];

        if(srcAlpha[0] > srcAlpha[1])
        {
            srcAlpha[2] = (srcAlpha[0] * 6 + srcAlpha[1] * 1) / 7;
            srcAlpha[3] = (srcAlpha[0] * 5 + srcAlpha[1] * 2) / 7;
            srcAlpha[4] = (srcAlpha[0] * 4 + srcAlpha[1] * 3) / 7;
            srcAlpha[5] = (srcAlpha[0] * 3 + srcAlpha[1] * 4) / 7;
            srcAlpha[6] = (srcAlpha[0] * 2 + srcAlpha[1] * 5) / 7;
            srcAlpha[7] = (srcAlpha[0] * 1 + srcAlpha[1] * 6) / 7;
        }
        else
        {
            srcAlpha[2] = (srcAlpha[0] * 4 + srcAlpha[1] * 1) / 5;
            srcAlpha[3] = (srcAlpha[0] * 3 + srcAlpha[1] * 2) / 5;
            srcAlpha[4] = (srcAlpha[0] * 2 + srcAlpha[1] * 3) / 5;
            srcAlpha[5] = (srcAlpha[0] * 1 + srcAlpha[1] * 4) / 5;
            srcAlpha[6] = 0;
            srcAlpha[7] = 255;
        }

        unsigned int color0 = (src[ 9] << 8) | (src[ 8] << 0);
        unsigned int color1 = (src[11] << 8) | (src[10] << 0);

        srcRgba[0].r_ = Table1F[(color0 >> (5+6)) & 0x1F];
        srcRgba[0].g_ = Table3F[(color0 >> (5  )) & 0x3F];
        srcRgba[0].b_ = Table1F[(color0 >> (0  )) & 0x1F];

        srcRgba[1].r_ = Table1F[(color1 >> (5+6)) & 0x1F];
        srcRgba[1].g_ = Table3F[(color1 >> (5  )) & 0x3F];
        srcRgba[1].b_ = Table1F[(color1 >> (0  )) & 0x1F];

        srcRgba[2].r_ = (unsigned char)((srcRgba[0].r_ * 2 + srcRgba[1].r_) / 3);
        srcRgba[2].g_ = (unsigned char)((srcRgba[0].g_ * 2 + srcRgba[1].g_) / 3);
        srcRgba[2].b_ = (unsigned char)((srcRgba[0].b_ * 2 + srcRgba[1].b_) / 3);

        srcRgba[3].r_ = (unsigned char)((srcRgba[0].r_ + srcRgba[1].r_ * 2) / 3);
        srcRgba[3].g_ = (unsigned char)((srcRgba[0].g_ + srcRgba[1].g_ * 2) / 3);
        srcRgba[3].b_ = (unsigned char)((srcRgba[0].b_ + srcRgba[1].b_ * 2) / 3);
    }

    {
        srcRgba[0].r_ = srcRgba[0].r_;
        srcRgba[0].g_ = srcRgba[0].g_;
        srcRgba[0].b_ = srcRgba[0].b_;
        srcRgba[1].r_ = srcRgba[1].r_;
        srcRgba[1].g_ = srcRgba[1].g_;
        srcRgba[1].b_ = srcRgba[1].b_;
        srcRgba[2].r_ = srcRgba[2].r_;
        srcRgba[2].g_ = srcRgba[2].g_;
        srcRgba[2].b_ = srcRgba[2].b_;
        srcRgba[3].r_ = srcRgba[3].r_;
        srcRgba[3].g_ = srcRgba[3].g_;
        srcRgba[3].b_ = srcRgba[3].b_;

        RGBA *dstRgba = reinterpret_cast<RGBA *>(dst + x * sizeof(RGBA) + y * dstStride);
        int colorRgbaStride = dstStride / sizeof(RGBA);

        dstRgba[colorRgbaStride * 0 + 0] = srcRgba[(src[12] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 0 + 1] = srcRgba[(src[12] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 0 + 2] = srcRgba[(src[12] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 0 + 3] = srcRgba[(src[12] >> (2*3)) & 3];
        dstRgba[colorRgbaStride * 1 + 0] = srcRgba[(src[13] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 1 + 1] = srcRgba[(src[13] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 1 + 2] = srcRgba[(src[13] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 1 + 3] = srcRgba[(src[13] >> (2*3)) & 3];
        dstRgba[colorRgbaStride * 2 + 0] = srcRgba[(src[14] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 2 + 1] = srcRgba[(src[14] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 2 + 2] = srcRgba[(src[14] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 2 + 3] = srcRgba[(src[14] >> (2*3)) & 3];
        dstRgba[colorRgbaStride * 3 + 0] = srcRgba[(src[15] >> (2*0)) & 3];
        dstRgba[colorRgbaStride * 3 + 1] = srcRgba[(src[15] >> (2*1)) & 3];
        dstRgba[colorRgbaStride * 3 + 2] = srcRgba[(src[15] >> (2*2)) & 3];
        dstRgba[colorRgbaStride * 3 + 3] = srcRgba[(src[15] >> (2*3)) & 3];

        int alphaIndex0 = (src[2] << 0*8) | (src[3] << 1*8) | (src[4] << 2*8);
        int alphaIndex1 = (src[5] << 0*8) | (src[6] << 1*8) | (src[7] << 2*8);

        dstRgba[colorRgbaStride * 0 + 0].a_ = srcAlpha[(alphaIndex0 >> (3*0)) & 7];
        dstRgba[colorRgbaStride * 0 + 1].a_ = srcAlpha[(alphaIndex0 >> (3*1)) & 7];
        dstRgba[colorRgbaStride * 0 + 2].a_ = srcAlpha[(alphaIndex0 >> (3*2)) & 7];
        dstRgba[colorRgbaStride * 0 + 3].a_ = srcAlpha[(alphaIndex0 >> (3*3)) & 7];
        dstRgba[colorRgbaStride * 1 + 0].a_ = srcAlpha[(alphaIndex0 >> (3*4)) & 7];
        dstRgba[colorRgbaStride * 1 + 1].a_ = srcAlpha[(alphaIndex0 >> (3*5)) & 7];
        dstRgba[colorRgbaStride * 1 + 2].a_ = srcAlpha[(alphaIndex0 >> (3*6)) & 7];
        dstRgba[colorRgbaStride * 1 + 3].a_ = srcAlpha[(alphaIndex0 >> (3*7)) & 7];
        dstRgba[colorRgbaStride * 2 + 0].a_ = srcAlpha[(alphaIndex1 >> (3*0)) & 7];
        dstRgba[colorRgbaStride * 2 + 1].a_ = srcAlpha[(alphaIndex1 >> (3*1)) & 7];
        dstRgba[colorRgbaStride * 2 + 2].a_ = srcAlpha[(alphaIndex1 >> (3*2)) & 7];
        dstRgba[colorRgbaStride * 2 + 3].a_ = srcAlpha[(alphaIndex1 >> (3*3)) & 7];
        dstRgba[colorRgbaStride * 3 + 0].a_ = srcAlpha[(alphaIndex1 >> (3*4)) & 7];
        dstRgba[colorRgbaStride * 3 + 1].a_ = srcAlpha[(alphaIndex1 >> (3*5)) & 7];
        dstRgba[colorRgbaStride * 3 + 2].a_ = srcAlpha[(alphaIndex1 >> (3*6)) & 7];
        dstRgba[colorRgbaStride * 3 + 3].a_ = srcAlpha[(alphaIndex1 >> (3*7)) & 7];
    }
}

void Decode_unorm_bc4(unsigned char *dst, const Byte *src, int x, int y, int dstStride)
{
    assert(((x & 3) == 0) && ((y & 3) == 0));

    RGBA srcRgba[8];
    {
        unsigned int srcRed[8];
        {
            srcRed[0] = src[0];
            srcRed[1] = src[1];

            if(srcRed[0] > srcRed[1])
            {
                srcRed[2] = (srcRed[0] * 6 + srcRed[1] * 1) / 7;
                srcRed[3] = (srcRed[0] * 5 + srcRed[1] * 2) / 7;
                srcRed[4] = (srcRed[0] * 4 + srcRed[1] * 3) / 7;
                srcRed[5] = (srcRed[0] * 3 + srcRed[1] * 4) / 7;
                srcRed[6] = (srcRed[0] * 2 + srcRed[1] * 5) / 7;
                srcRed[7] = (srcRed[0] * 1 + srcRed[1] * 6) / 7;
            }
            else
            {
                srcRed[2] = (srcRed[0] * 4 + srcRed[1] * 1) / 5;
                srcRed[3] = (srcRed[0] * 3 + srcRed[1] * 2) / 5;
                srcRed[4] = (srcRed[0] * 2 + srcRed[1] * 3) / 5;
                srcRed[5] = (srcRed[0] * 1 + srcRed[1] * 4) / 5;
                srcRed[6] = 0;
                srcRed[7] = 255;
            }
        }

        for(int i = 0;i != 8;++ i)
        {
            srcRgba[i].r_ = srcRed[i];
            srcRgba[i].g_ = 0x00;
            srcRgba[i].b_ = 0x00;
//			srcRgba[i].a_ = 0x00;
            srcRgba[i].a_ = 0xFF;
        }
    }

    {
        RGBA *dstRgba = reinterpret_cast<RGBA *>(dst + x * sizeof(RGBA) + y * dstStride);

        int colorRgbaStride = dstStride / sizeof(RGBA);

        int redIndex0 = (src[2] << 0*8) | (src[3] << 1*8) | (src[4] << 2*8);
        int redIndex1 = (src[5] << 0*8) | (src[6] << 1*8) | (src[7] << 2*8);

        dstRgba[colorRgbaStride * 0 + 0] = srcRgba[(redIndex0 >> (3*0)) & 7];
        dstRgba[colorRgbaStride * 0 + 1] = srcRgba[(redIndex0 >> (3*1)) & 7];
        dstRgba[colorRgbaStride * 0 + 2] = srcRgba[(redIndex0 >> (3*2)) & 7];
        dstRgba[colorRgbaStride * 0 + 3] = srcRgba[(redIndex0 >> (3*3)) & 7];
        dstRgba[colorRgbaStride * 1 + 0] = srcRgba[(redIndex0 >> (3*4)) & 7];
        dstRgba[colorRgbaStride * 1 + 1] = srcRgba[(redIndex0 >> (3*5)) & 7];
        dstRgba[colorRgbaStride * 1 + 2] = srcRgba[(redIndex0 >> (3*6)) & 7];
        dstRgba[colorRgbaStride * 1 + 3] = srcRgba[(redIndex0 >> (3*7)) & 7];
        dstRgba[colorRgbaStride * 2 + 0] = srcRgba[(redIndex1 >> (3*0)) & 7];
        dstRgba[colorRgbaStride * 2 + 1] = srcRgba[(redIndex1 >> (3*1)) & 7];
        dstRgba[colorRgbaStride * 2 + 2] = srcRgba[(redIndex1 >> (3*2)) & 7];
        dstRgba[colorRgbaStride * 2 + 3] = srcRgba[(redIndex1 >> (3*3)) & 7];
        dstRgba[colorRgbaStride * 3 + 0] = srcRgba[(redIndex1 >> (3*4)) & 7];
        dstRgba[colorRgbaStride * 3 + 1] = srcRgba[(redIndex1 >> (3*5)) & 7];
        dstRgba[colorRgbaStride * 3 + 2] = srcRgba[(redIndex1 >> (3*6)) & 7];
        dstRgba[colorRgbaStride * 3 + 3] = srcRgba[(redIndex1 >> (3*7)) & 7];
    }
}

void Decode_snorm_bc4(unsigned char *dst, const Byte *src, int x, int y, int dstStride)
{
    assert(((x & 3) == 0) && ((y & 3) == 0));

    RGBA srcRgba[8];
    {
        signed int srcRed[8];
        {
            srcRed[0] = reinterpret_cast<const signed char *>(src)[0];
            srcRed[1] = reinterpret_cast<const signed char *>(src)[1];

            if(srcRed[0] > srcRed[1])
            {
                srcRed[2] = (srcRed[0] * 6 + srcRed[1] * 1) / 7;
                srcRed[3] = (srcRed[0] * 5 + srcRed[1] * 2) / 7;
                srcRed[4] = (srcRed[0] * 4 + srcRed[1] * 3) / 7;
                srcRed[5] = (srcRed[0] * 3 + srcRed[1] * 4) / 7;
                srcRed[6] = (srcRed[0] * 2 + srcRed[1] * 5) / 7;
                srcRed[7] = (srcRed[0] * 1 + srcRed[1] * 6) / 7;
            }
            else
            {
                srcRed[2] = (srcRed[0] * 4 + srcRed[1] * 1) / 5;
                srcRed[3] = (srcRed[0] * 3 + srcRed[1] * 2) / 5;
                srcRed[4] = (srcRed[0] * 2 + srcRed[1] * 3) / 5;
                srcRed[5] = (srcRed[0] * 1 + srcRed[1] * 4) / 5;
                srcRed[6] = -128;
                srcRed[7] = +127;
            }
        }

        for(int i = 0;i != 8;++ i)
        {
            srcRed[i] = std::min(std::max(srcRed[i], 0), 255);

            srcRgba[i].r_ = std::max(srcRed[i], 0) * 2;
            srcRgba[i].g_ = 0x00;
            srcRgba[i].b_ = 0x00;
//			srcRgba[i].a_ = 0x00;
            srcRgba[i].a_ = 0xFF;
        }
    }

    {
        RGBA *dstRgba = reinterpret_cast<RGBA *>(dst + x * sizeof(RGBA) + y * dstStride);

        int colorRgbaStride = dstStride / sizeof(RGBA);

        int redIndex0 = (src[2] << 0*8) | (src[3] << 1*8) | (src[4] << 2*8);
        int redIndex1 = (src[5] << 0*8) | (src[6] << 1*8) | (src[7] << 2*8);

        dstRgba[colorRgbaStride * 0 + 0] = srcRgba[(redIndex0 >> (3*0)) & 7];
        dstRgba[colorRgbaStride * 0 + 1] = srcRgba[(redIndex0 >> (3*1)) & 7];
        dstRgba[colorRgbaStride * 0 + 2] = srcRgba[(redIndex0 >> (3*2)) & 7];
        dstRgba[colorRgbaStride * 0 + 3] = srcRgba[(redIndex0 >> (3*3)) & 7];
        dstRgba[colorRgbaStride * 1 + 0] = srcRgba[(redIndex0 >> (3*4)) & 7];
        dstRgba[colorRgbaStride * 1 + 1] = srcRgba[(redIndex0 >> (3*5)) & 7];
        dstRgba[colorRgbaStride * 1 + 2] = srcRgba[(redIndex0 >> (3*6)) & 7];
        dstRgba[colorRgbaStride * 1 + 3] = srcRgba[(redIndex0 >> (3*7)) & 7];
        dstRgba[colorRgbaStride * 2 + 0] = srcRgba[(redIndex1 >> (3*0)) & 7];
        dstRgba[colorRgbaStride * 2 + 1] = srcRgba[(redIndex1 >> (3*1)) & 7];
        dstRgba[colorRgbaStride * 2 + 2] = srcRgba[(redIndex1 >> (3*2)) & 7];
        dstRgba[colorRgbaStride * 2 + 3] = srcRgba[(redIndex1 >> (3*3)) & 7];
        dstRgba[colorRgbaStride * 3 + 0] = srcRgba[(redIndex1 >> (3*4)) & 7];
        dstRgba[colorRgbaStride * 3 + 1] = srcRgba[(redIndex1 >> (3*5)) & 7];
        dstRgba[colorRgbaStride * 3 + 2] = srcRgba[(redIndex1 >> (3*6)) & 7];
        dstRgba[colorRgbaStride * 3 + 3] = srcRgba[(redIndex1 >> (3*7)) & 7];
    }
}

void Decode_unorm_bc5(unsigned char *dst, const Byte *src, int x, int y, int dstStride)
{
    assert(((x & 3) == 0) && ((y & 3) == 0));

    RGBA srcRgba[8];
    {
        unsigned int srcRed[8];
        {
            srcRed[0] = src[0];
            srcRed[1] = src[1];

            if(srcRed[0] > srcRed[1])
            {
                srcRed[2] = (srcRed[0] * 6 + srcRed[1] * 1) / 7;
                srcRed[3] = (srcRed[0] * 5 + srcRed[1] * 2) / 7;
                srcRed[4] = (srcRed[0] * 4 + srcRed[1] * 3) / 7;
                srcRed[5] = (srcRed[0] * 3 + srcRed[1] * 4) / 7;
                srcRed[6] = (srcRed[0] * 2 + srcRed[1] * 5) / 7;
                srcRed[7] = (srcRed[0] * 1 + srcRed[1] * 6) / 7;
            }
            else
            {
                srcRed[2] = (srcRed[0] * 4 + srcRed[1] * 1) / 5;
                srcRed[3] = (srcRed[0] * 3 + srcRed[1] * 2) / 5;
                srcRed[4] = (srcRed[0] * 2 + srcRed[1] * 3) / 5;
                srcRed[5] = (srcRed[0] * 1 + srcRed[1] * 4) / 5;
                srcRed[6] = 0;
                srcRed[7] = 255;
            }

            for(int i = 0;i != 8;++ i)
            {
                srcRgba[i].r_ = srcRed[i];
            }
        }

        unsigned int srcGreen[8];
        {
            srcGreen[0] = src[8];
            srcGreen[1] = src[9];

            if(srcGreen[0] > srcGreen[1])
            {
                srcGreen[2] = (srcGreen[0] * 6 + srcGreen[1] * 1) / 7;
                srcGreen[3] = (srcGreen[0] * 5 + srcGreen[1] * 2) / 7;
                srcGreen[4] = (srcGreen[0] * 4 + srcGreen[1] * 3) / 7;
                srcGreen[5] = (srcGreen[0] * 3 + srcGreen[1] * 4) / 7;
                srcGreen[6] = (srcGreen[0] * 2 + srcGreen[1] * 5) / 7;
                srcGreen[7] = (srcGreen[0] * 1 + srcGreen[1] * 6) / 7;
            }
            else
            {
                srcGreen[2] = (srcGreen[0] * 4 + srcGreen[1] * 1) / 5;
                srcGreen[3] = (srcGreen[0] * 3 + srcGreen[1] * 2) / 5;
                srcGreen[4] = (srcGreen[0] * 2 + srcGreen[1] * 3) / 5;
                srcGreen[5] = (srcGreen[0] * 1 + srcGreen[1] * 4) / 5;
                srcGreen[6] = 0;
                srcGreen[7] = 255;
            }

            for(int i = 0;i != 8;++ i)
            {
                srcRgba[i].g_ = srcGreen[i];
            }
        }
    }

    {
        RGBA *dstRgba = reinterpret_cast<RGBA *>(dst + x * sizeof(RGBA) + y * dstStride);

        int colorRgbaStride = dstStride / sizeof(RGBA);

        int redIndex0   = (src[ 2] << 0*8) | (src[ 3] << 1*8) | (src[ 4] << 2*8);
        int redIndex1   = (src[ 5] << 0*8) | (src[ 6] << 1*8) | (src[ 7] << 2*8);
        int greenIndex0 = (src[10] << 0*8) | (src[11] << 1*8) | (src[12] << 2*8);
        int greenIndex1 = (src[13] << 0*8) | (src[14] << 1*8) | (src[15] << 2*8);

        dstRgba[colorRgbaStride * 0 + 0].r_ = srcRgba[(redIndex0 >> (3*0)) & 7].r_;
        dstRgba[colorRgbaStride * 0 + 1].r_ = srcRgba[(redIndex0 >> (3*1)) & 7].r_;
        dstRgba[colorRgbaStride * 0 + 2].r_ = srcRgba[(redIndex0 >> (3*2)) & 7].r_;
        dstRgba[colorRgbaStride * 0 + 3].r_ = srcRgba[(redIndex0 >> (3*3)) & 7].r_;
        dstRgba[colorRgbaStride * 1 + 0].r_ = srcRgba[(redIndex0 >> (3*4)) & 7].r_;
        dstRgba[colorRgbaStride * 1 + 1].r_ = srcRgba[(redIndex0 >> (3*5)) & 7].r_;
        dstRgba[colorRgbaStride * 1 + 2].r_ = srcRgba[(redIndex0 >> (3*6)) & 7].r_;
        dstRgba[colorRgbaStride * 1 + 3].r_ = srcRgba[(redIndex0 >> (3*7)) & 7].r_;
        dstRgba[colorRgbaStride * 2 + 0].r_ = srcRgba[(redIndex1 >> (3*0)) & 7].r_;
        dstRgba[colorRgbaStride * 2 + 1].r_ = srcRgba[(redIndex1 >> (3*1)) & 7].r_;
        dstRgba[colorRgbaStride * 2 + 2].r_ = srcRgba[(redIndex1 >> (3*2)) & 7].r_;
        dstRgba[colorRgbaStride * 2 + 3].r_ = srcRgba[(redIndex1 >> (3*3)) & 7].r_;
        dstRgba[colorRgbaStride * 3 + 0].r_ = srcRgba[(redIndex1 >> (3*4)) & 7].r_;
        dstRgba[colorRgbaStride * 3 + 1].r_ = srcRgba[(redIndex1 >> (3*5)) & 7].r_;
        dstRgba[colorRgbaStride * 3 + 2].r_ = srcRgba[(redIndex1 >> (3*6)) & 7].r_;
        dstRgba[colorRgbaStride * 3 + 3].r_ = srcRgba[(redIndex1 >> (3*7)) & 7].r_;

        dstRgba[colorRgbaStride * 0 + 0].g_ = srcRgba[(greenIndex0 >> (3*0)) & 7].g_;
        dstRgba[colorRgbaStride * 0 + 1].g_ = srcRgba[(greenIndex0 >> (3*1)) & 7].g_;
        dstRgba[colorRgbaStride * 0 + 2].g_ = srcRgba[(greenIndex0 >> (3*2)) & 7].g_;
        dstRgba[colorRgbaStride * 0 + 3].g_ = srcRgba[(greenIndex0 >> (3*3)) & 7].g_;
        dstRgba[colorRgbaStride * 1 + 0].g_ = srcRgba[(greenIndex0 >> (3*4)) & 7].g_;
        dstRgba[colorRgbaStride * 1 + 1].g_ = srcRgba[(greenIndex0 >> (3*5)) & 7].g_;
        dstRgba[colorRgbaStride * 1 + 2].g_ = srcRgba[(greenIndex0 >> (3*6)) & 7].g_;
        dstRgba[colorRgbaStride * 1 + 3].g_ = srcRgba[(greenIndex0 >> (3*7)) & 7].g_;
        dstRgba[colorRgbaStride * 2 + 0].g_ = srcRgba[(greenIndex1 >> (3*0)) & 7].g_;
        dstRgba[colorRgbaStride * 2 + 1].g_ = srcRgba[(greenIndex1 >> (3*1)) & 7].g_;
        dstRgba[colorRgbaStride * 2 + 2].g_ = srcRgba[(greenIndex1 >> (3*2)) & 7].g_;
        dstRgba[colorRgbaStride * 2 + 3].g_ = srcRgba[(greenIndex1 >> (3*3)) & 7].g_;
        dstRgba[colorRgbaStride * 3 + 0].g_ = srcRgba[(greenIndex1 >> (3*4)) & 7].g_;
        dstRgba[colorRgbaStride * 3 + 1].g_ = srcRgba[(greenIndex1 >> (3*5)) & 7].g_;
        dstRgba[colorRgbaStride * 3 + 2].g_ = srcRgba[(greenIndex1 >> (3*6)) & 7].g_;
        dstRgba[colorRgbaStride * 3 + 3].g_ = srcRgba[(greenIndex1 >> (3*7)) & 7].g_;

        for(int y = 0;y != 4;++ y)
        {
            for(int x = 0;x != 4;++ x)
            {
                dstRgba[colorRgbaStride * y + x].b_ = 0x00;
//				dstRgba[colorRgbaStride * y + x].a_ = 0x00;
                dstRgba[colorRgbaStride * y + x].a_ = 0xFF;
            }
        }
    }
}

void Decode_snorm_bc5(unsigned char *dst, const Byte *src, int x, int y, int dstStride, bool isHintNormal)
{
    assert(((x & 3) == 0) && ((y & 3) == 0));

    IntRGBA srcRgba[8];
    {
        signed int srcRed[8];
        {
            srcRed[0] = reinterpret_cast<const signed char *>(src)[0];
            srcRed[1] = reinterpret_cast<const signed char *>(src)[1];

            if(srcRed[0] > srcRed[1])
            {
                srcRed[2] = (srcRed[0] * 6 + srcRed[1] * 1) / 7;
                srcRed[3] = (srcRed[0] * 5 + srcRed[1] * 2) / 7;
                srcRed[4] = (srcRed[0] * 4 + srcRed[1] * 3) / 7;
                srcRed[5] = (srcRed[0] * 3 + srcRed[1] * 4) / 7;
                srcRed[6] = (srcRed[0] * 2 + srcRed[1] * 5) / 7;
                srcRed[7] = (srcRed[0] * 1 + srcRed[1] * 6) / 7;
            }
            else
            {
                srcRed[2] = (srcRed[0] * 4 + srcRed[1] * 1) / 5;
                srcRed[3] = (srcRed[0] * 3 + srcRed[1] * 2) / 5;
                srcRed[4] = (srcRed[0] * 2 + srcRed[1] * 3) / 5;
                srcRed[5] = (srcRed[0] * 1 + srcRed[1] * 4) / 5;
                srcRed[6] = -128;
                srcRed[7] = +127;
            }

            for(int i = 0;i != 8;++ i)
            {
                srcRgba[i].r_ = srcRed[i];
            }
        }

        signed int srcGreen[8];
        {
            srcGreen[0] = reinterpret_cast<const signed char *>(src)[8];
            srcGreen[1] = reinterpret_cast<const signed char *>(src)[9];

            if(srcGreen[0] > srcGreen[1])
            {
                srcGreen[2] = (srcGreen[0] * 6 + srcGreen[1] * 1) / 7;
                srcGreen[3] = (srcGreen[0] * 5 + srcGreen[1] * 2) / 7;
                srcGreen[4] = (srcGreen[0] * 4 + srcGreen[1] * 3) / 7;
                srcGreen[5] = (srcGreen[0] * 3 + srcGreen[1] * 4) / 7;
                srcGreen[6] = (srcGreen[0] * 2 + srcGreen[1] * 5) / 7;
                srcGreen[7] = (srcGreen[0] * 1 + srcGreen[1] * 6) / 7;
            }
            else
            {
                srcGreen[2] = (srcGreen[0] * 4 + srcGreen[1] * 1) / 5;
                srcGreen[3] = (srcGreen[0] * 3 + srcGreen[1] * 2) / 5;
                srcGreen[4] = (srcGreen[0] * 2 + srcGreen[1] * 3) / 5;
                srcGreen[5] = (srcGreen[0] * 1 + srcGreen[1] * 4) / 5;
                srcGreen[6] = -128;
                srcGreen[7] = +127;
            }

            for(int i = 0;i != 8;++ i)
            {
                srcRgba[i].g_ = srcGreen[i];
            }
        }
    }

    {
        IntRGBA	tempRgba[4][4];
        {
            int redIndex0   = (src[ 2] << 0*8) | (src[ 3] << 1*8) | (src[ 4] << 2*8);
            int redIndex1   = (src[ 5] << 0*8) | (src[ 6] << 1*8) | (src[ 7] << 2*8);
            int greenIndex0 = (src[10] << 0*8) | (src[11] << 1*8) | (src[12] << 2*8);
            int greenIndex1 = (src[13] << 0*8) | (src[14] << 1*8) | (src[15] << 2*8);

            tempRgba[0][0].r_ = srcRgba[(redIndex0 >> (3*0)) & 7].r_;
            tempRgba[0][1].r_ = srcRgba[(redIndex0 >> (3*1)) & 7].r_;
            tempRgba[0][2].r_ = srcRgba[(redIndex0 >> (3*2)) & 7].r_;
            tempRgba[0][3].r_ = srcRgba[(redIndex0 >> (3*3)) & 7].r_;
            tempRgba[1][0].r_ = srcRgba[(redIndex0 >> (3*4)) & 7].r_;
            tempRgba[1][1].r_ = srcRgba[(redIndex0 >> (3*5)) & 7].r_;
            tempRgba[1][2].r_ = srcRgba[(redIndex0 >> (3*6)) & 7].r_;
            tempRgba[1][3].r_ = srcRgba[(redIndex0 >> (3*7)) & 7].r_;
            tempRgba[2][0].r_ = srcRgba[(redIndex1 >> (3*0)) & 7].r_;
            tempRgba[2][1].r_ = srcRgba[(redIndex1 >> (3*1)) & 7].r_;
            tempRgba[2][2].r_ = srcRgba[(redIndex1 >> (3*2)) & 7].r_;
            tempRgba[2][3].r_ = srcRgba[(redIndex1 >> (3*3)) & 7].r_;
            tempRgba[3][0].r_ = srcRgba[(redIndex1 >> (3*4)) & 7].r_;
            tempRgba[3][1].r_ = srcRgba[(redIndex1 >> (3*5)) & 7].r_;
            tempRgba[3][2].r_ = srcRgba[(redIndex1 >> (3*6)) & 7].r_;
            tempRgba[3][3].r_ = srcRgba[(redIndex1 >> (3*7)) & 7].r_;

            tempRgba[0][0].g_ = srcRgba[(greenIndex0 >> (3*0)) & 7].g_;
            tempRgba[0][1].g_ = srcRgba[(greenIndex0 >> (3*1)) & 7].g_;
            tempRgba[0][2].g_ = srcRgba[(greenIndex0 >> (3*2)) & 7].g_;
            tempRgba[0][3].g_ = srcRgba[(greenIndex0 >> (3*3)) & 7].g_;
            tempRgba[1][0].g_ = srcRgba[(greenIndex0 >> (3*4)) & 7].g_;
            tempRgba[1][1].g_ = srcRgba[(greenIndex0 >> (3*5)) & 7].g_;
            tempRgba[1][2].g_ = srcRgba[(greenIndex0 >> (3*6)) & 7].g_;
            tempRgba[1][3].g_ = srcRgba[(greenIndex0 >> (3*7)) & 7].g_;
            tempRgba[2][0].g_ = srcRgba[(greenIndex1 >> (3*0)) & 7].g_;
            tempRgba[2][1].g_ = srcRgba[(greenIndex1 >> (3*1)) & 7].g_;
            tempRgba[2][2].g_ = srcRgba[(greenIndex1 >> (3*2)) & 7].g_;
            tempRgba[2][3].g_ = srcRgba[(greenIndex1 >> (3*3)) & 7].g_;
            tempRgba[3][0].g_ = srcRgba[(greenIndex1 >> (3*4)) & 7].g_;
            tempRgba[3][1].g_ = srcRgba[(greenIndex1 >> (3*5)) & 7].g_;
            tempRgba[3][2].g_ = srcRgba[(greenIndex1 >> (3*6)) & 7].g_;
            tempRgba[3][3].g_ = srcRgba[(greenIndex1 >> (3*7)) & 7].g_;
        }

        int colorRgbaStride = dstStride / sizeof(RGBA);
        RGBA *dstRgba = reinterpret_cast<RGBA *>(dst + x * sizeof(RGBA) + y * dstStride);

        if (isHintNormal)
        {
            for(int y = 0;y != 4;++ y)
            {
                for(int x = 0;x != 4;++ x)
                {
                    int index = colorRgbaStride * y + x;

                    float r = std::min(std::max(tempRgba[y][x].r_ / 127.0f, -1.0f), +1.0f);
                    float g = std::min(std::max(tempRgba[y][x].g_ / 127.0f, -1.0f), +1.0f);
                    float b = std::sqrtf(1.0f - std::min(r * r + g * g, +1.0f));

                    dstRgba[index].r_ = std::min(std::max(128 + tempRgba[y][x].r_, 0), 255);
                    dstRgba[index].g_ = std::min(std::max(128 + tempRgba[y][x].g_, 0), 255);
                    dstRgba[index].b_ = std::min(std::max(128 + int(127.0f * b),   0), 255);
                    dstRgba[index].a_ = 0xFF;
                }
            }
        }
        else
        {
            for(int y = 0;y != 4;++ y)
            {
                for(int x = 0;x != 4;++ x)
                {
                    int index = colorRgbaStride * y + x;

                    dstRgba[index].r_ = std::min(std::max(128 + tempRgba[y][x].r_, 0), 255);
                    dstRgba[index].g_ = std::min(std::max(128 + tempRgba[y][x].g_, 0), 255);
                    dstRgba[index].b_ = 0x00;
                    dstRgba[index].a_ = 0xFF;
                }
            }
        }
    }
}

}	// namespace TexUtils
