﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#pragma once

/**
    @file
    @brief header defining nn::profiling::PerfLogger class, which allows
           in-memory, low-footprint logging. Log can be dumped after the fact.
           Design goal: we mean to keep this class usable as header-only.

*/

#include "OSDependencies.h"


namespace nn
{

namespace perflog
{


template<typename _UserContext, unsigned long _MaxEntries> class PerfLogger
{
private:
    class PerfLoggerData;
    static const uint64_t NanosecondsPerSecond = ( (uint64_t)1000000000 );
    #if defined(USE_ATOMIC)
        typedef std::atomic<uint32_t>         AtomicIndexType;
        typedef std::atomic<PerfLoggerData *> AtomicLogPtrType;
    #else
        typedef uint32_t                      AtomicIndexType;
        typedef PerfLoggerData *              AtomicLogPtrType;
    #endif

public:
    struct LogEntry
    {
        OSRawTimestamp  timestamp;
        OSThreadId      thread;
        uint32_t        id;
        _UserContext    context;
    };

public:
    explicit PerfLogger() NN_NOEXCEPT : // constructor
        m_CurrEntries( &m_Storage[0] ),
        m_NewEntries ( &m_Storage[1] )
    {
        #if defined(PERFLOG_PLATFORM_HORIZON)
            m_tickFrequency = nn::os::GetSystemTickFrequency();
        #elif defined(PERFLOG_PLATFORM_WINDOWS)
            if( !QueryPerformanceFrequency( &m_tickFrequency ) )
            {
                m_tickFrequency.QuadPart = 1; // avoid division by zero, still gives us raw timings - better than nothing
            }
        #else
            // none for linux
        #endif
    }

    virtual ~PerfLogger() NN_NOEXCEPT
    {
    }

    void Put(uint32_t id, const _UserContext& context ) NN_NOEXCEPT
    {
        static_cast<PerfLoggerData *>(m_CurrEntries)->Put( id, context );
    }

    uint32_t Dump(LogEntry* list, uint32_t listsize) NN_NOEXCEPT
    {
        if( listsize < _MaxEntries )
        {
            return 0; // failure
        }

        // get this ready to be our new storage, reinitialized
        m_NewEntries->Initialize();

        // Use atomic operation to swap double-buffer
        #if defined(USE_ATOMIC)
            m_NewEntries = m_CurrEntries.exchange( m_NewEntries );
        #else
            m_NewEntries = __sync_val_compare_and_swap(&m_CurrEntries, m_CurrEntries, m_NewEntries);
        #endif

        // note that this is what m_CurrEntries used to be,
        // due to the swap that happened above
        return m_NewEntries->Dump( list );
    }


    uint32_t GetMaxNumEntries() const NN_NOEXCEPT
    {
        return _MaxEntries;
    }

    uint64_t RawTsToNanos( const OSRawTimestamp &rawTSValue ) const NN_NOEXCEPT
    {
        #if defined(PERFLOG_PLATFORM_HORIZON)
            return static_cast<uint64_t>( nn::os::ConvertToTimeSpan( nn::os::Tick(rawTSValue) ).GetNanoSeconds() );
        #elif defined(PERFLOG_PLATFORM_WINDOWS)
            return static_cast<uint64_t>( ( rawTSValue.QuadPart * NanosecondsPerSecond ) / m_tickFrequency.QuadPart );
        #else
            return static_cast<uint64_t>( ( rawTSValue.tv_sec * NanosecondsPerSecond ) + rawTSValue.tv_nsec );
        #endif
    }

    // keeping this here so class works as header-only
    static OSThreadId OSGetCurrentThreadId() NN_NOEXCEPT
    {
        #if defined(PERFLOG_PLATFORM_HORIZON)
            return nn::os::GetCurrentThread();
        #elif defined(PERFLOG_PLATFORM_WINDOWS)
            return GetCurrentThreadId();
        #else
            return pthread_self(void);
        #endif
    }
    // keeping this here so class works as header-only
    static void OSGetRawTimestamp( OSRawTimestamp &now ) NN_NOEXCEPT
    {
        #if defined(PERFLOG_PLATFORM_HORIZON)
            now = nn::os::GetSystemTick().GetInt64Value();
        #elif defined(PERFLOG_PLATFORM_WINDOWS)
            QueryPerformanceCounter( &now );
        #else
            clock_gettime( CLOCK_REALTIME, &( now ) );
        #endif
    }

private:
    static const uint32_t NumBuffers = 2; // Number of the entries buffer
    HWTickFrequency       m_tickFrequency;


private:
    class PerfLoggerData
    {
    private:
        static const uint32_t InitialIdxValue = 0;

    public:
        explicit PerfLoggerData() NN_NOEXCEPT // constructor
        {
            Initialize();
        }

        void Initialize() NN_NOEXCEPT
        {
            #if defined(USE_ATOMIC)
                m_Index.store( InitialIdxValue );
            #else
                m_Index = InitialIdxValue;
            #endif
            m_WrapAroundFlag = false;
        }

        uint32_t GetCurrIndex() const NN_NOEXCEPT
        {
        #if defined(USE_ATOMIC)
            return m_Index.load();
        #else
            return m_Index;
        #endif
        }

        uint32_t GetFirstIndex() const NN_NOEXCEPT
        {
            uint32_t firstIdx;
            if( m_WrapAroundFlag )
            {
                firstIdx = ( GetCurrIndex() % _MaxEntries );
            }
            else
            {
                firstIdx = 0;
            }
            return firstIdx;
        }

        uint32_t GetNumEntries() const NN_NOEXCEPT
        {
            if( m_WrapAroundFlag )
            {
                return _MaxEntries;
            }
            return GetCurrIndex();
        }

        // WARNING: don't use this while this is
        // the official buffer, as new logs from
        // other threads will change it from under you
        uint32_t Dump(LogEntry* list) const NN_NOEXCEPT
        {
            uint32_t nUsedEntries = GetNumEntries();
            uint32_t firstIdx = GetFirstIndex();
            if( m_WrapAroundFlag && (firstIdx != 0) )
            {
                // two segments, head and tail
                uint32_t tailSize = (nUsedEntries - firstIdx);

                // tail comes first, as firstIdx points to what would be the next element
                memcpy( list, m_Entries + firstIdx, tailSize * sizeof(list[0]) );

                // head comes after that
                memcpy( list + tailSize, m_Entries, firstIdx * sizeof(list[0]) );
            }
            else
            {
                nUsedEntries = m_Index; // if it's right after reset, this is 1
                memcpy( list, m_Entries, nUsedEntries*sizeof(list[0]) );
            }
            return nUsedEntries;
        }

        void Put( uint32_t id, const _UserContext& context ) NN_NOEXCEPT
        {
            uint32_t nextIdx;

            // returns the "value preceding the effects", so the first
            // call should return 0

            // Use atomic operation to increament the index counter
            #if defined(USE_ATOMIC)
                nextIdx = m_Index.fetch_add(1);
            #else
                nextIdx = __sync_add_and_fetch( &m_Index, 1 );
            #endif

            if( nextIdx == _MaxEntries )
            {
                m_WrapAroundFlag = true;
            }

            // Circle back to the top of the buffer when the index has reached to the tail of the circular buffer
            // (Note: _MaxEntries => size of the buffer)
            nextIdx = (nextIdx  % _MaxEntries);

            LogEntry* log = &m_Entries[nextIdx];
            OSGetRawTimestamp( log->timestamp );
            log->id = id;
            log->thread = OSGetCurrentThreadId();
            log->context = context;
        }

    private:
        bool            m_WrapAroundFlag;
        AtomicIndexType m_Index;  // Log entries index - atomically incremented
        LogEntry        m_Entries[_MaxEntries];


    public:
        const LogEntry *GetEntries() const NN_NOEXCEPT
        {
            return m_Entries;
        }
    };

public:
    class Iterator
    {
    public:
        explicit Iterator() NN_NOEXCEPT : // constructor
            m_pData( NULL ),
            m_Index( 0 ),
            m_NumItems( 0 ),
            m_NumTimesAdvanced( 0 )
        {
        }

        explicit Iterator( PerfLoggerData * pUserData ) NN_NOEXCEPT : // constructor
            m_pData( pUserData ),
            m_Index( 0 ),
            m_NumItems( 0 ),
            m_NumTimesAdvanced( 0 )
        {
            Reset();
        }

        void Reset() NN_NOEXCEPT
        {
            if( m_pData != NULL )
            {
                m_Index = m_pData->GetFirstIndex();
                m_NumItems = m_pData->GetNumEntries();
            }
            m_NumTimesAdvanced = 0;
        }

        const LogEntry* Next() NN_NOEXCEPT
        {
            if( (m_NumTimesAdvanced + 1) < m_NumItems )
            {
                m_Index  = ( (m_Index + 1 ) % _MaxEntries );
                ++m_NumTimesAdvanced;
                return &( m_pData->GetEntries()[ m_Index ] );
            }
            else
            {
                m_NumTimesAdvanced = m_NumItems;
            }
            return NULL;
        }

        const LogEntry* Get() const NN_NOEXCEPT
        {
            if( m_NumTimesAdvanced < m_NumItems )
            {
                return &( m_pData->GetEntries()[ m_Index ] );
            }
            return NULL;
        }

    private:
        PerfLoggerData * m_pData;
        uint32_t         m_Index;
        uint32_t         m_NumItems;
        uint32_t         m_NumTimesAdvanced;
    };



public:
    Iterator GetEntries( uint32_t* pNentries = NULL ) NN_NOEXCEPT
    {
        // get this ready to be our new storage, reinitialized
        m_NewEntries->Initialize();

        // Use atomic operation to swap double-buffer
        #if defined(USE_ATOMIC)
            m_NewEntries = m_CurrEntries.exchange( m_NewEntries );
        #else
            m_NewEntries = __sync_val_compare_and_swap(&m_CurrEntries, m_CurrEntries, m_NewEntries);
        #endif

        if( pNentries != NULL )
        {
            (*pNentries) = m_NewEntries->GetNumEntries();
        }

        return Iterator( m_NewEntries );
    }

    inline void Dump(PerfLogger* pPerfLogger) NN_NOEXCEPT
    {
        const LogEntry *e;
        PerfLogger::Iterator it;
        it = pPerfLogger->GetEntries();
        for( it.Reset(); (e = it.Get()) != NULL; it.Next() )
        {
            // TICS_RtlUT("%d,%llu,%d,%llu\n", e->id, e->context, e->timestamp );
        }
    }

private: // cheap singleton interface to make it easy to use without passing context around
    static PerfLogger g_singletonLog;

public:
    static PerfLogger * GetLogger()
    {
        return &g_singletonLog;
    }


private:

    AtomicLogPtrType   m_CurrEntries;  // Points to front-buffer for current logging
    PerfLoggerData     m_Storage[NumBuffers];  // Log entries double buffer
    PerfLoggerData *   m_NewEntries;  // Points to back-buffer for swapping
};


template <typename _UserContext, unsigned long _MaxEntries> PerfLogger<_UserContext, _MaxEntries> PerfLogger<_UserContext, _MaxEntries>::g_singletonLog;


} /* namespace perflog */


} /* namespace nn */




