﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/
#include <cstdlib>
#include <cstring>  // std::strcpy, std::strlen etc.
#include <cctype>   // std::isspace, std::iscntrl
#include <nn/os.h>
#include <nn/nn_Log.h>
#include <nn/nn_Assert.h>
#include "testNs_XmlParser.h"

#define LOG_OUT( ... )  NN_LOG( "[NsSystemUpdate] " __VA_ARGS__ )

namespace detail {

//!---------------------------------------------------------------------------------
//! @brief BOMが存在した場合はスキップします。
//! @return スキップ後のコードポイントアドレス。
//!---------------------------------------------------------------------------------
class CharacterExtractor
{
private:
    //!---------------------------------------------------------------------------------
    //! @brief UCS4
    //!---------------------------------------------------------------------------------
    template< char32_t UcsOctetMask >
    static bool IsFinishExtractUtf8( const char32_t ucs, char32_t& outExtractCharacter ) NN_NOEXCEPT
    {
        if ( ( UcsOctetMask + 1 ) == ( ucs & ( ~UcsOctetMask ) ) )
        {
            outExtractCharacter = ucs & UcsOctetMask;
            return true;
        }
        return false;
    }

    //!---------------------------------------------------------------------------------
    //! @brief UTF8デコード
    //!---------------------------------------------------------------------------------
    const char* DecodeUtf8( const char* pText, char32_t& outUcs ) const NN_NOEXCEPT
    {
        char32_t ucs = 0;
        do {
            const char c = *pText++;
            if ( 0 != ( c & 0x80 ) )
            {
                if ( 0 != ( c & 0x40 ) )
                {
                    char32_t mask = 0x20;
                    for ( ; 0 != ( c & mask ) && mask > 0x02; mask >>= 1 ) ;
                    ucs = mask | ( c & ( mask - 1 ) );
                }
                else
                {
                    ucs = ( ucs << 6 ) | ( c & 0x3f );
                    ucs = ( IsFinishExtractUtf8< 0x000007ff >( ucs, outUcs ) ||
                            IsFinishExtractUtf8< 0x0000ffff >( ucs, outUcs ) ||
                            IsFinishExtractUtf8< 0x001fffff >( ucs, outUcs ) ||
                            IsFinishExtractUtf8< 0x03ffffff >( ucs, outUcs ) ||
                            IsFinishExtractUtf8< 0x7fffffff >( ucs, outUcs )
                    ) ? 0 : ucs;
                }
            }
            else
            {
                outUcs = c;
            }
        } while ( 0 != ucs );
        return pText;
    }

public:
    typedef enum Encoding
    {
        Utf8,
        Sjis,
    } Encoding;

    //!---------------------------------------------------------------------------------
    //! @brief コンストラクタです。
    //!---------------------------------------------------------------------------------
    explicit CharacterExtractor( Encoding initialEncode = Encoding::Utf8 ) NN_NOEXCEPT : encoding( initialEncode ) {}

    //!---------------------------------------------------------------------------------
    //! @brief 入力アドレスからエンコードに合わせてスキップします。
    //! @param[in] pText
    //! @param[in] outUcs 検出UCS文字
    //! @return スキップ後のストリームアドレス。
    //!---------------------------------------------------------------------------------
    const char* Decode( const char* pText, char32_t& outUcs ) const NN_NOEXCEPT
    {
        switch ( encoding )
        {
        case Encoding::Sjis:
            // TODO
            break;
        default:
            pText = DecodeUtf8( pText, outUcs );
        }
        return pText;
    }

    Encoding encoding;
};

//!---------------------------------------------------------------------------------
//! @brief SAXパーサー
//!---------------------------------------------------------------------------------
class XmlParserImpl : public XmlParser
{
    //!---------------------------------------------------------------------------------
    //! @brief
    //!---------------------------------------------------------------------------------
    typedef enum ParseState
    {
        ParseState_Normal,
        ParseState_TagNameFirst,
        ParseState_TagNameTrail,
        ParseState_Exclamation,
        ParseState_EndTag,
        ParseState_AttributeKeyFirst,
        ParseState_AttributeKeyTrail,
        ParseState_AttributeValue,
        ParseState_AttributeValueQuoted,
        ParseState_Comment,
        ParseState_CDATA,
    } ParseState;

    //!---------------------------------------------------------------------------------
    //! @brief
    //!---------------------------------------------------------------------------------
    typedef enum TagType
    {
        TagType_Question,
        TagType_Exclamation,
        TagType_StartEnd,
        TagType_Start,
        TagType_End,
    } TagType;

    //!---------------------------------------------------------------------------------
    //! @brief BOMが存在した場合はスキップします。
    //! @return スキップ後のコードポイントアドレス。
    //!---------------------------------------------------------------------------------
    inline static const char* SkipByteOrderMark( const char* pText ) NN_NOEXCEPT
    {
        const unsigned char* pChar = reinterpret_cast< const unsigned char* >( pText );
        return ( pChar[ 0 ] == 0xEFU && pChar[ 1 ] == 0xBBU && pChar[ 2 ] == 0xBFU ) ? &pText[ 3 ] : pText;
    }

    //!---------------------------------------------------------------------------------
    //! @brief
    //!---------------------------------------------------------------------------------
    const char* ParseCharacter( CallbackInterface& callback, const char32_t c, const char* pBegin, const char* pEnd ) NN_NOEXCEPT;

    //!---------------------------------------------------------------------------------
    //! @brief
    //!---------------------------------------------------------------------------------
    const char* ParseEntity( const char32_t c, const char* pBegin, const char* pEnd ) NN_NOEXCEPT;

    //!---------------------------------------------------------------------------------
    //! @brief
    //!---------------------------------------------------------------------------------
    void DispatchElementDetection( CallbackInterface& callback ) NN_NOEXCEPT;

    //!---------------------------------------------------------------------------------
    //! @brief
    //!---------------------------------------------------------------------------------
    void CleanState() NN_NOEXCEPT;

public:
    //!---------------------------------------------------------------------------------
    //! @brief コンストラクタ
    //!---------------------------------------------------------------------------------
    explicit XmlParserImpl() NN_NOEXCEPT
        : m_CurrentParseState( ParseState::ParseState_Normal ),
        m_CurrentTagType( TagType::TagType_Question ),
        m_StringBuilder(),
        m_ElementName(),
        m_EntityCode(),
        m_AttributeName(),
        m_AttributeCollection(),
        m_RequiredQuote( '\0' ),
        m_EscapeScope( false ),
        m_QuoteScope( false )
    {
        m_StringBuilder.reserve( 1024 );
        m_AttributeName.reserve( 512 );
        m_ElementName.reserve( 512 );
        m_EntityCode.reserve( 64 );
    }

    //!---------------------------------------------------------------------------------
    //! @brief デストラクタ
    //!---------------------------------------------------------------------------------
    virtual ~XmlParserImpl() NN_NOEXCEPT NN_OVERRIDE
    {
    }

    //!---------------------------------------------------------------------------------
    //! @copydoc XmlParser::Parse( CallbackInterface&, const char*, const size_t )
    //!---------------------------------------------------------------------------------
    virtual bool Parse( CallbackInterface& callback, const char* pDocument, const size_t byteSize ) NN_NOEXCEPT NN_OVERRIDE;

private:
    CharacterExtractor                  m_CharacterExtractor;
    ParseState                          m_CurrentParseState;
    TagType                             m_CurrentTagType;
    StringType                          m_StringBuilder;
    StringType                          m_ElementName;
    StringType                          m_EntityCode;
    StringType                          m_AttributeName;
    Attribute::CollectionType           m_AttributeCollection;
    char32_t                            m_RequiredQuote;
    bool                                m_EscapeScope;
    bool                                m_QuoteScope;
};

//!---------------------------------------------------------------------------------
//! @brief
//!---------------------------------------------------------------------------------
static char* CreateUtf8FromUcs( char32_t code, char* pOutBuffer ) NN_NOEXCEPT
{
    char* text = pOutBuffer;
    if ( code < 0x80 )    // 1 byte sequence
    {
        text[ 0 ] = static_cast<unsigned char>( code );
        text[ 1 ] = '\0';
    }
    else if ( code < 0x800 )  // 2 byte sequence
    {
        text[ 2 ] = '\0';
        text[ 1 ] = static_cast<unsigned char>( ( code | 0x80 ) & 0xBF ); code >>= 6;
        text[ 0 ] = static_cast<unsigned char>( code | 0xC0 );
    }
    else if ( code < 0x10000 )    // 3 byte sequence
    {
        text[ 3 ] = '\0';
        text[ 2 ] = static_cast<unsigned char>( ( code | 0x80 ) & 0xBF ); code >>= 6;
        text[ 1 ] = static_cast<unsigned char>( ( code | 0x80 ) & 0xBF ); code >>= 6;
        text[ 0 ] = static_cast<unsigned char>( code | 0xE0 );
    }
    else if ( code < 0x110000 )   // 4 byte sequence
    {
        text[ 4 ] = '\0';
        text[ 3 ] = static_cast<unsigned char>( ( code | 0x80 ) & 0xBF ); code >>= 6;
        text[ 2 ] = static_cast<unsigned char>( ( code | 0x80 ) & 0xBF ); code >>= 6;
        text[ 1 ] = static_cast<unsigned char>( ( code | 0x80 ) & 0xBF ); code >>= 6;
        text[ 0 ] = static_cast<unsigned char>( code | 0xF0 );
    }
    else    // Invalid, only codes up to 0x10FFFF are allowed in Unicode
    {
        text[ 0 ] = '\0';
        LOG_OUT( "Invalid numeric character entity, only codes up to 0x10FFFF are allowed in Unicode." );
    }
    return text;
}

//!---------------------------------------------------------------------------------
//! @brief
//!---------------------------------------------------------------------------------
static void ConvertEntityToUtf8( const XmlParser::StringType& entity, XmlParser::StringType& out ) NN_NOEXCEPT
{
    const char* e = entity.data();
    if ( '&' == e[ 0 ] )
    {
        switch ( e[ 1 ] )
        {
        case '#':
            {
                const char radix = e[ 2 ];
                const char32_t code = ( radix == 'x' || radix == 'X' )
                    ? static_cast< char32_t >( std::strtoul( &e[ 3 ], nullptr, 16 ) )
                    : static_cast< char32_t >( std::strtoul( &e[ 2 ], nullptr, 10 ) );
                char text[ 5 ] = { 0, 0, 0, 0, 0 };
                out.append( CreateUtf8FromUcs( code, text ) );
            }
            break;
        case 'a':
            if ( 'm' == e[ 2 ] && 'p' == e[ 3 ] )
            {
                out.push_back( '&' );
            }
            else if ( 'p' == e[ 2 ] && 'o' == e[ 3 ] && 's' == e[ 4 ] )
            {
                out.push_back( '\'' );
            }
            break;
        case 'q':
            if ( 'u' == e[ 2 ] && 'o' == e[ 3 ] && 't' == e[ 4 ] )
            {
                out.push_back( '\"' );
            }
            break;
        case 'g':
            if ( 't' == e[ 2 ] )
            {
                out.push_back( '>' );
            }
            break;
        case 'l':
            if ( 't' == e[ 2 ] )
            {
                out.push_back( '<' );
            }
            break;
        default:
            break;
        }
    }
}

//!---------------------------------------------------------------------------------
//! @brief
//!---------------------------------------------------------------------------------
const char* XmlParserImpl::ParseEntity( const char32_t c, const char* pBegin, const char* pEnd ) NN_NOEXCEPT
{
    const size_t length = pEnd - pBegin;
    StringType& e = m_EntityCode;
    if ( true == e.empty() ) {
        if ( c == '\\' )
        {
            if ( false == m_EscapeScope )
            {
                // begin the escape scope.
                m_EscapeScope = true;
            }
            else
            {
                // request '\'.
                m_EscapeScope = false;
                m_StringBuilder.append( pBegin, length );
            }
        }
        else if ( '\"' == c )
        {
            if ( false == m_EscapeScope )
            {
                // non-escaped double-quote.
                m_QuoteScope = !m_QuoteScope;
            }
            else
            {
                // escaped double-quote.
                m_StringBuilder.append( pBegin, length );
            }
            m_EscapeScope = false;
        }
        else if ( c == '&' )
        {
            if ( false == m_EscapeScope )
            {
                // detect entity code style beginning.
                e.assign( pBegin, length );
            }
            m_EscapeScope = false;
        }
        else if ( false == m_QuoteScope && 1 == length && true == m_StringBuilder.empty() && ( 0 != std::isspace( c ) || 0 != std::iscntrl( c ) ) )
        {
            // ignore control codes and whitespaces.
            m_EscapeScope = false;
        }
        else if ( false == m_EscapeScope )
        {
            // dispatchable characters saving.
            m_StringBuilder.append( pBegin, length );
        }
        else
        {
            m_EscapeScope = false;
        }
    }
    else if ( c == ';' )
    {
        ConvertEntityToUtf8( e, m_StringBuilder );
        e.clear();
    }
    else
    {
        e.append( pBegin, length );
    }
    return pEnd;
} // NOLINT

//!---------------------------------------------------------------------------------
//! @brief
//!---------------------------------------------------------------------------------
const char* XmlParserImpl::ParseCharacter( CallbackInterface& callback, const char32_t c, const char* pBegin, const char* pEnd ) NN_NOEXCEPT
{
    const size_t length = pEnd - pBegin;
    StringType& b = m_StringBuilder;
    switch ( m_CurrentParseState )
    {
    case ParseState::ParseState_Normal:
        if ( '<' == c )
        {
            if ( false == b.empty() )
            {
                // タグコンテンツ値 <>...</>
                callback.OnText( b );
            }
            b.clear();
            m_QuoteScope = false;
            m_AttributeCollection.Clear();
            m_CurrentParseState = ParseState::ParseState_TagNameFirst;
        }
        else
        {
            ParseEntity( c, pBegin, pEnd );
        }
        break;
    case ParseState::ParseState_TagNameFirst:
        {
            if ( '/' == c )
            {
                m_CurrentTagType = TagType::TagType_End;
                m_CurrentParseState = ParseState::ParseState_TagNameTrail;
            }
            else if ( '?' == c )
            {
                m_CurrentTagType = TagType::TagType_Question;
                m_CurrentParseState = ParseState::ParseState_TagNameTrail;
            }
            else if ( '!' == c )
            {
                m_CurrentTagType = TagType::TagType_Exclamation;
                m_CurrentParseState = ParseState::ParseState_Exclamation;
            }
            else if ( 0 != std::isspace( c ) )
            {
                // whitespace
            }
            else
            {
                b.append( pBegin, length );
                m_CurrentTagType = TagType::TagType_Start;
                m_CurrentParseState = ParseState::ParseState_TagNameTrail;
            }
        }
        break;
    case ParseState::ParseState_TagNameTrail:
        {
            if ( '>' == c )
            {
                m_ElementName.assign( b );
                b.clear();
                DispatchElementDetection( callback );
                m_CurrentParseState = ParseState::ParseState_Normal;
            }
            else if ( '/' == c )
            {
                m_ElementName.assign( b );
                b.clear();
                if ( TagType::TagType_Start != m_CurrentTagType ) {
                    NN_ABORT( "Invalid statements [ detect keyword of the '/', but that not expected. ]." );
                }
                m_CurrentTagType = TagType::TagType_StartEnd;
                m_CurrentParseState = ParseState::ParseState_EndTag;

            }
            else if ( 0 != std::isspace( c ) )
            {
                m_ElementName.assign( b );
                b.clear();
                m_CurrentParseState = ParseState::ParseState_AttributeKeyFirst;
            }
            else
            {
                b.append( pBegin, length );
            }
        }
        break;
    case ParseState::ParseState_AttributeKeyFirst:
        {
            if ( '>' == c )
            {
                DispatchElementDetection( callback );
                m_CurrentParseState = ParseState::ParseState_Normal;
            }
            else if ( '?' == c )
            {
                if ( TagType::TagType_Question != m_CurrentTagType )
                {
                    NN_ABORT( "Invalid statements [ detect keyword of the '?', but that not expected. ]." );
                }
                m_CurrentParseState = ParseState::ParseState_EndTag;
            }
            else if ( '/' == c )
            {
                if ( TagType::TagType_Start != m_CurrentTagType ) {
                    NN_ABORT( "Invalid statements [ detect keyword of the '/', but that not expected. ]." );
                }
                m_CurrentParseState = ParseState::ParseState_EndTag;
                m_CurrentTagType = TagType::TagType_StartEnd;
            }
            else if ( 0 != std::isspace( c ) )
            {
                // whitespace
            }
            else
            {
                b.append( pBegin, length );
                m_CurrentParseState = ParseState::ParseState_AttributeKeyTrail;
            }
        }
        break;
    case ParseState::ParseState_AttributeKeyTrail:
        {
            if ( '=' == c )
            {
                m_AttributeName.assign( b );
                b.clear();
                m_CurrentParseState = ParseState::ParseState_AttributeValue;
            }
            else if ( 0 != std::isspace( c ) )
            {
                // whitespace
            }
            else
            {
                b.append( pBegin, length );
            }
        }
        break;
    case ParseState::ParseState_AttributeValue:
        {
            if ( '\'' == c || '\"' == c )
            {
                m_RequiredQuote = c; // 開始時のquote codeを記憶
                m_CurrentParseState = ParseState::ParseState_AttributeValueQuoted;
            }
            else if ( 0 != std::isspace( c ) )
            {
                // whitespace
            }
        }
        break;
    case ParseState::ParseState_AttributeValueQuoted:
        {
            if ( c == m_RequiredQuote )
            {
                m_AttributeCollection.CreateNewAttribute( m_AttributeName, b );
                m_AttributeName.clear();
                b.clear();
                m_CurrentParseState = ParseState::ParseState_AttributeKeyFirst;
            }
            else
            {
                ParseEntity( c, pBegin, pEnd );
            }
        }
        break;
    case ParseState::ParseState_EndTag:
        {
            if ( '>' == c )
            {
                DispatchElementDetection( callback );
                m_CurrentParseState = ParseState::ParseState_Normal;
            }
            else if ( 0 != std::isspace( c ) )
            {
                // whitespace
            }
        }
        break;
    case ParseState::ParseState_Exclamation:
        {
            b.append( pBegin, length );
            size_t n;
            const char* text = b.data();
            if ( 2 == ( n = b.length() ) && '-' == text[ 0 ] && '-' == text[ 1 ] )
            {
                m_CurrentParseState = ParseState::ParseState_Comment;
                b.clear();
            }
            else if ( 7 == n && 0 == b.compare( "[CDATA[" ) )
            {
                m_CurrentParseState = ParseState::ParseState_CDATA;
                b.clear();
            }
            else if ( 7 < n )
            {
                NN_ABORT( "Invalid statements [ detect keyword of the '!', but that not expected. ]." );
            }
        }
        break;
    case ParseState::ParseState_Comment:
        {
            b.append( pBegin, length );
            size_t n;
            const char* text = b.data();
            if ( 3 <= ( n = b.length() ) && '-' == text[ n - 3 ] && '-' == text[ n - 2 ] && '>' == text[ n - 1 ] )
            {
                callback.OnComment( b.erase( n - 3 ) );
                b.clear();
                m_CurrentParseState = ParseState::ParseState_Normal;
            }
        }
        break;
    case ParseState::ParseState_CDATA:
        {
            b.append( pBegin, length );
            size_t n;
            const char* text = b.data();
            if ( 3 <= ( n = b.length() ) && ']' == text[ n - 3 ] && ']' == text[ n - 2 ] && '>' == text[ n - 1 ] )
            {
                callback.OnText( b.erase( n - 3 ) );
                b.clear();
                m_CurrentParseState = ParseState::ParseState_Normal;
            }
        }
        break;
    default:
        NN_UNEXPECTED_DEFAULT;
    }
    return pEnd;
} // NOLINT(impl/function_size)

//!---------------------------------------------------------------------------------
//! @copydoc XmlParserImpl::DispatchElementDetection( CallbackInterface& )
//!---------------------------------------------------------------------------------
void XmlParserImpl::DispatchElementDetection( CallbackInterface& callback ) NN_NOEXCEPT
{
    TagType type;
    StringType& e = m_ElementName;
    if ( TagType::TagType_Question == ( type = m_CurrentTagType ) && 0 == e.compare( "xml" ) )
    {
        // document declaration.
        // TODO: obtain encoding.
    }
    else if ( TagType::TagType_Start == type )
    {
        callback.OnElementBegin( e, m_AttributeCollection );
    }
    else if ( TagType::TagType_End == type )
    {
        callback.OnElementEnd( e );
    }
    else if ( TagType::TagType_StartEnd == type )
    {
        callback.OnElementBegin( e, m_AttributeCollection );
        callback.OnElementEnd( e );
    }
}

//!---------------------------------------------------------------------------------
//! @brief XmlParserImpl::CleanState()
//!---------------------------------------------------------------------------------
void XmlParserImpl::CleanState() NN_NOEXCEPT
{
    m_CurrentParseState = ParseState::ParseState_Normal;
    m_CurrentTagType = TagType::TagType_Question;
    m_EscapeScope = false;
    m_QuoteScope = false;
    m_StringBuilder.clear();
    m_ElementName.clear();
    m_EntityCode.clear();
    m_AttributeName.clear();
    m_AttributeCollection.Clear();
    m_RequiredQuote = '\0';
}

//!---------------------------------------------------------------------------------
//! @copydoc XmlParserImpl::Parse( CallbackInterface&, const char*, const size_t )
//!---------------------------------------------------------------------------------
bool XmlParserImpl::Parse( CallbackInterface& callback, const char* pDocument, const size_t byteSize ) NN_NOEXCEPT
{
    if ( nullptr != pDocument && byteSize > 0U )
    {
        const char* pStream = pDocument;
        const char* pEnd = &pStream[ byteSize ];

        // skip byte order mark(BOM).
        pStream = SkipByteOrderMark( pStream );

        // initialize.
        CleanState();

        // parse contents.
        while ( pStream < pEnd )
        {
            char32_t ucs;
            const char* pNext = m_CharacterExtractor.Decode( pStream, ucs );
            ParseCharacter( callback, ucs, pStream, pNext );
            pStream = pNext;
        }

        // post cleaning.
        CleanState();
        return true;
    }
    return false;
}


} // ::detail

//!---------------------------------------------------------------------------------
//! @copydoc XmlParser::CreateNewParser()
//!---------------------------------------------------------------------------------
XmlParser* XmlParser::CreateNewParser() NN_NOEXCEPT
{
    return new detail::XmlParserImpl();
}

//!---------------------------------------------------------------------------------
//! @copydoc XmlParser::Finalize( glv::XmlParser* )
//!---------------------------------------------------------------------------------
void XmlParser::Finalize( XmlParser* pParser ) NN_NOEXCEPT
{
    detail::XmlParserImpl* pImpl;
    if ( nullptr != ( pImpl = static_cast< detail::XmlParserImpl* >( pParser ) ) )
    {
        delete pParser;
    }
}
