﻿// --------------------------------------------------------------------------------
// <copyright>
// Copyright (C)Nintendo. All rights reserved.
//
// These coded instructions, statements, and computer programs contain proprietary
// information of Nintendo and/or its licensed developers and are protected by
// national and international copyright laws. They may not be disclosed to third
// parties or copied or duplicated in any form, in whole or in part, without the
// prior written consent of Nintendo.
//
// The content herein is highly confidential and should be handled accordingly.
// </copyright>
// --------------------------------------------------------------------------------

using System;
using System.IO;
using System.Text;

namespace EffectMaker.Foundation.Utility
{
    /// <summary>
    /// 文字コードユーティリティです。
    /// </summary>
    public class CharsetUtility
    {
        /// <summary>
        /// 入力ストリームのデータを指定文字コードのバイト列に変換します。
        /// </summary>
        /// <param name="srcStream">入力ストリーム</param>
        /// <param name="destEncoding">文字コード</param>
        /// <returns>バイト列を返します。</returns>
        public static byte[] ConvertToByte(FileStream srcStream, Encoding destEncoding)
        {
            // 現在の位置を取得
            long position = srcStream.Position;

            // ファイルの内容をすべて読み込む
            byte[] srcData = new byte[srcStream.Length];
            srcStream.Read(srcData, 0, (int)srcStream.Length);

            // 位置を復元
            srcStream.Position = position;

            // 文字コードを判別
            Encoding srcEncoding = GetEncoding(srcData);

            if (srcEncoding == destEncoding || srcEncoding == Encoding.ASCII || srcEncoding == null)
            {
                // 文字コードが一致、またはASCII、またはnullのとき変換せず返す
                return srcData;
            }
            else
            {
                // その他の場合、文字コードを変換して返す
                byte[] destData = Encoding.Convert(srcEncoding, destEncoding, srcData);
                return destData;
            }
        }

        /// <summary>
        /// データの文字コードを判別します。
        /// http://dobon.net/vb/dotnet/string/detectcode.html
        /// </summary>
        /// <param name="data">データ</param>
        /// <returns>適当な文字コードを返します。 </returns>
        public static Encoding GetEncoding(byte[] data)
        {
            const byte ByteEscape = 0x1B;
            const byte ByteAt = 0x40;
            const byte ByteDollar = 0x24;
            const byte ByteAnd = 0x26;
            const byte ByteOpen = 0x28;    // '('
            const byte ByteB = 0x42;
            const byte ByteD = 0x44;
            const byte ByteJ = 0x4A;
            const byte ByteI = 0x49;

            int len = data.Length;
            byte b1, b2, b3, b4;

            //// Encode::is_utf8 は無視

            bool isBinary = false;
            for (int i = 0; i < len; i++)
            {
                b1 = data[i];
                if (b1 <= 0x06 || b1 == 0x7F || b1 == 0xFF)
                {
                    // 'binary'
                    isBinary = true;
                    if (b1 == 0x00 && i < len - 1 && data[i + 1] <= 0x7F)
                    {
                        // smells like raw unicode
                        return System.Text.Encoding.Unicode;
                    }
                }
            }

            if (isBinary)
            {
                return null;
            }

            // not Japanese
            bool notJapanese = true;
            for (int i = 0; i < len; i++)
            {
                b1 = data[i];
                if (b1 == ByteEscape || 0x80 <= b1)
                {
                    notJapanese = false;
                    break;
                }
            }

            if (notJapanese)
            {
                return System.Text.Encoding.ASCII;
            }

            for (int i = 0; i < len - 2; i++)
            {
                b1 = data[i];
                b2 = data[i + 1];
                b3 = data[i + 2];

                if (b1 == ByteEscape)
                {
                    if (b2 == ByteDollar && b3 == ByteAt)
                    {
                        // JIS_0208 1978
                        // JIS
                        return System.Text.Encoding.GetEncoding(50220);
                    }
                    else if (b2 == ByteDollar && b3 == ByteB)
                    {
                        // JIS_0208 1983
                        // JIS
                        return System.Text.Encoding.GetEncoding(50220);
                    }
                    else if (b2 == ByteOpen && (b3 == ByteB || b3 == ByteJ))
                    {
                        // JIS_ASC
                        // JIS
                        return System.Text.Encoding.GetEncoding(50220);
                    }
                    else if (b2 == ByteOpen && b3 == ByteI)
                    {
                        // JIS_KANA
                        // JIS
                        return System.Text.Encoding.GetEncoding(50220);
                    }

                    if (i < len - 3)
                    {
                        b4 = data[i + 3];
                        if (b2 == ByteDollar && b3 == ByteOpen && b4 == ByteD)
                        {
                            // JIS_0212
                            // JIS
                            return System.Text.Encoding.GetEncoding(50220);
                        }

                        if (i < len - 5 &&
                            b2 == ByteAnd && b3 == ByteAt && b4 == ByteEscape &&
                            data[i + 4] == ByteDollar && data[i + 5] == ByteB)
                        {
                            // JIS_0208 1990
                            // JIS
                            return System.Text.Encoding.GetEncoding(50220);
                        }
                    }
                }
            }

            // should be euc|sjis|utf8
            // use of (?:) by Hiroki Ohzaki <ohzaki@iod.ricoh.co.jp>
            int sjis = 0;
            int euc = 0;
            int utf8 = 0;
            for (int i = 0; i < len - 1; i++)
            {
                b1 = data[i];
                b2 = data[i + 1];
                if (((0x81 <= b1 && b1 <= 0x9F) || (0xE0 <= b1 && b1 <= 0xFC)) &&
                    ((0x40 <= b2 && b2 <= 0x7E) || (0x80 <= b2 && b2 <= 0xFC)))
                {
                    // SJIS_C
                    sjis += 2;
                    i++;
                }
            }

            for (int i = 0; i < len - 1; i++)
            {
                b1 = data[i];
                b2 = data[i + 1];
                if (((0xA1 <= b1 && b1 <= 0xFE) && (0xA1 <= b2 && b2 <= 0xFE)) ||
                    (b1 == 0x8E && (0xA1 <= b2 && b2 <= 0xDF)))
                {
                    // EUC_C
                    // EUC_KANA
                    euc += 2;
                    i++;
                }
                else if (i < len - 2)
                {
                    b3 = data[i + 2];
                    if (b1 == 0x8F && (0xA1 <= b2 && b2 <= 0xFE) &&
                        (0xA1 <= b3 && b3 <= 0xFE))
                    {
                        // EUC_0212
                        euc += 3;
                        i += 2;
                    }
                }
            }

            for (int i = 0; i < len - 1; i++)
            {
                b1 = data[i];
                b2 = data[i + 1];
                if ((0xC0 <= b1 && b1 <= 0xDF) && (0x80 <= b2 && b2 <= 0xBF))
                {
                    // UTF8
                    utf8 += 2;
                    i++;
                }
                else if (i < len - 2)
                {
                    b3 = data[i + 2];
                    if ((0xE0 <= b1 && b1 <= 0xEF) && (0x80 <= b2 && b2 <= 0xBF) &&
                        (0x80 <= b3 && b3 <= 0xBF))
                    {
                        // UTF8
                        utf8 += 3;
                        i += 2;
                    }
                }
            }

            //// M. Takahashi's suggestion
            //// utf8 += utf8 / 2;

            System.Diagnostics.Debug.WriteLine(
                string.Format("sjis = {0}, euc = {1}, utf8 = {2}", sjis, euc, utf8));
            if (euc > sjis && euc > utf8)
            {
                // EUC
                return System.Text.Encoding.GetEncoding(51932);
            }
            else if (sjis > euc && sjis > utf8)
            {
                // SJIS
                return System.Text.Encoding.GetEncoding(932);
            }
            else if (utf8 > euc && utf8 > sjis)
            {
                // UTF8
                return System.Text.Encoding.UTF8;
            }

            return null;
        }
    }
}
