﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/
/* Copyright (c) 2015-16, NVIDIA CORPORATION.  All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto.  Any use, reproduction, disclosure or
* distribution of this software and related documentation without an
* express license agreement from NVIDIA Corporation is strictly prohibited.
*/


#include <cmath>
//#include <stdio.h>
#include <cstdio>
#include <nn/nn_Assert.h>
#include <nn/nn_Common.h>
#include <nn/os.h>
#include <nn/init.h>
#include <nn/fs.h>
#include <nn/lmem/lmem_ExpHeap.h>
#include <nnt/nntest.h>
#include <nnt/nnt_Argument.h>
#include <nn/mem/mem_StandardAllocator.h>
#include <nv/nv_MemoryManagement.h>
#include <nv/nv_ServiceName.h>
#include <map>

#include "jpeglib.h"
#include "jpegint.h"
#include "nvos.h"
#include "Utilities.h"
#include "mm_MemoryManagement.h"
#include <nn/fs/fs_SdCardForDebug.h>

#include <string>

const int BUFF_HEIGHT = 8;

namespace nn { namespace nvjpg {

namespace
{

#define PROF_START(t)   (t = NvOsGetTimeUS())
#define PROF_END(t)     (t = NvOsGetTimeUS() - t)

enum JpegSamplingRatio
{
    JpegSamplingRatio_444 = 0,
    JpegSamplingRatio_422 = 1,
    JpegSamplingRatio_420 = 2
};

const size_t heapSize = 600 * 1024 * 1024;

const unsigned long int CRC32_POLYNOMIAL = 0xEDB88320L;

void ImageJpegEncodingNvjpeg_test();

static NvU32 CRCTable[256];
static void BuildCRCTable()
{
    NvU16 i;
    NvU16 j;
    NvU32 crc;
    for (i = 0; i <= 255; i++)
    {
       crc = i;
        for (j = 8; j > 0; j--)
        {
            if (crc & 1)
            {
                crc = (crc >> 1) ^ CRC32_POLYNOMIAL;
            }
            else
            {
                crc >>= 1;
            }
        }
        CRCTable[i] = crc;
    }
}
static int32_t CalculateBufferCRC(uint32_t count, uint32_t crc, uint8_t *buffer)
{
    uint8_t *p;
    uint32_t temp1;
    uint32_t temp2;
    p = (uint8_t*)buffer;
    while (count-- != 0)
    {
        temp1 = (crc >> 8) & 0x00FFFFFFL;
        temp2 = CRCTable[((uint32_t)crc ^ *p++) & 0xFF];
        crc = temp1 ^ temp2;
    }
    return crc;
}

//---------------------------------------------------------------------------
// This unnamed namespace includes file system heap allocator and deallcator
//---------------------------------------------------------------------------

namespace{

    const int FsHeapSize = 512 * 1024;
    const int mmHeapSize = 200 * 1024 * 1024;
    const int mmFirmwareMemorySize = 8 * 1024 * 1024;

    uint8_t              g_FsHeapBuffer[FsHeapSize];
    nn::lmem::HeapHandle g_FsHeap;

    char                        g_mmHeapBuffer[mmHeapSize];
    nn::mem::StandardAllocator  g_MultimediaAllocator(g_mmHeapBuffer, sizeof(g_mmHeapBuffer));

    char                        g_mmFirmwareMemory[mmFirmwareMemorySize] __attribute__((aligned(4096)));

    void FsInitHeap()
    {
        g_FsHeap = nn::lmem::CreateExpHeap(g_FsHeapBuffer, FsHeapSize, nn::lmem::CreationOption_DebugFill);
    }

    void* FsAllocate(size_t size)
    {
        return nn::lmem::AllocateFromExpHeap(g_FsHeap, size);
    }

    void FsDeallocate(void* p, size_t size)
    {
        NN_UNUSED(size);
            return nn::lmem::FreeToExpHeap(g_FsHeap, p);
    }

    static std::map<void*, size_t> g_Allocs;
    nn::os::Mutex g_AllocMutex(true);

    void* MultimediaAllocate(size_t size, size_t alignment, void *userPtr)
    {
        g_AllocMutex.Lock();
        void *ptr = g_MultimediaAllocator.Allocate(size, alignment);
        g_Allocs.insert(std::pair<void*, size_t>(ptr, size));
        g_AllocMutex.Unlock();

        return ptr;
    }

    void MultimediaFree(void *addr, void *userPtr)
    {
        if(!addr)
            return;

        g_AllocMutex.Lock();
        std::map<void*, size_t>::iterator it = g_Allocs.find(addr);
        NN_ASSERT(it != g_Allocs.end());
        g_Allocs.erase(it);
        g_MultimediaAllocator.Free(addr);
        g_AllocMutex.Unlock();
    }

    void *MultimediaReallocate(void* addr, size_t newSize, void *userPtr)
    {
        g_AllocMutex.Lock();

        if(addr)
        {
            std::map<void*, size_t>::iterator it = g_Allocs.find(addr);
            NN_ASSERT(it != g_Allocs.end());
            g_Allocs.erase(it);
        }

        void *ptr = g_MultimediaAllocator.Reallocate(addr, newSize);
        g_Allocs[ptr] = newSize;
        g_AllocMutex.Unlock();

        return ptr;
    }
}

//----------------------------------------------------------
// nninitStartup() is invoked before calling nnMain().
//----------------------------------------------------------
extern "C" void nninitStartup()
{
    const size_t MallocMemorySize = 84 * 1024 * 1024;
    uintptr_t address;
    nn::Result result;

    /* set heap size */
    result = nn::os::SetMemoryHeapSize(heapSize);
    if (!result.IsSuccess()) {
        NN_SDK_LOG("Failed SetMemoryHeapSize\n");
        return;
    }
    result = nn::os::AllocateMemoryBlock(&address, MallocMemorySize);
    NN_ASSERT(result.IsSuccess());
    nn::init::InitializeAllocator(reinterpret_cast<void*>(address), MallocMemorySize);

    // Set file system allocator and deallocator
    FsInitHeap();
    nn::fs::SetAllocator(FsAllocate, FsDeallocate);
}

static void displayUsage()
{
    NN_LOG("usage: %s [--in <file>] -width <width> -height <height>  --out <file>\n", nnt::GetHostArgv()[0]);
    NN_LOG("usage for CRC: %s [--in <file>] -width <width> -height <height>  --outcrc <Reference Value>\n", nnt::GetHostArgv()[0]);
}

//extern "C" void nnMain()
void ImageJpegEncodingNvjpeg_test()
{
    jpeg_compress_struct cinfo = {};
    struct jpeg_error_mgr jerr;
    uint32_t Width = 0, Height = 0;
    unsigned long codeSize;
    JSAMPROW lines[BUFF_HEIGHT] = {}; // This is actually not used by the HW path
    JpegSamplingRatio kSample;
    uint8_t *pJpegData;
    uint32_t alignedWidth;
    float BytesPerPixel;
    uint32_t kWorkBufSize;
    uint8_t *pInputYuv;
    uint8_t OutCrcEnabled = 0, InCrcEnabled = 0;
    uint32_t OutRefCrc = 0, InRefCrc = 0;
    int argc = nn::os::GetHostArgc();
    char** argv = nn::os::GetHostArgv();
    const char *inFile = NULL;
    const char *outFile = NULL;
    uint32_t Rgb = 0;
    int32_t Crc = 0;
    int32_t Quality = 100;
    uint32_t Loop, LoopCount = 1;
    uint32_t JpegBufSize;
    uint32_t Perf = 0;
    uint64_t SetAllocatorTime = 0;
    uint64_t ArgParsingTime = 0;
    uint64_t MountTime = 0;
    uint64_t yuvDataReadTime = 0;
    uint64_t WorkbufAllocTime = 0;
    uint64_t CreateCompressTime = 0;
    uint64_t MemDstTime = 0;
    uint64_t SetDefaultsColorspaceTime = 0;
    uint64_t SetQualityTime = 0;
    uint64_t StartCompressTime = 0;
    uint64_t FillYuvBufTime = 0;
    uint64_t FillRgbBufTime = 0;
    uint64_t WriteScanlinesTime = 0;
    uint64_t FinishCompressTime = 0;
    uint64_t DestroyCompressTime = 0;
    uint64_t UnMountTime = 0;
    uint64_t StartTime = 0;
    uint64_t EndTime = 0;

    StartTime = NvOsGetTimeUS();

    PROF_START(ArgParsingTime);
    for (int i = 1; i < argc; i++)
    {
        if(argc - i < 2)
            break;
        if (!strcmp(argv[i], "-width"))
            Width = atoi(argv[++i]);
        else if(!strcmp(argv[i], "-height"))
            Height = atoi(argv[++i]);
        else if(!strcmp(argv[i], "--in"))
            inFile = argv[++i];
        else if(!strcmp(argv[i], "--out"))
            outFile = argv[++i];
        else if(!strcmp(argv[i], "--outcrc"))
        {
            OutCrcEnabled = 1;
            sscanf(argv[++i], "%8x\n", &OutRefCrc);
        }
        else if(!strcmp(argv[i], "--incrc"))
        {
            InCrcEnabled = 1;
            sscanf(argv[++i], "%8x\n", &InRefCrc);
        }
        else if(!strcmp(argv[i], "--loop"))
        {
            sscanf(argv[++i], "%d\n", &LoopCount);
        }
        else if (!strcmp(argv[i], "--rgb"))
        {
            sscanf(argv[++i], "%d\n", &Rgb);
        }
        else if (!strcmp(argv[i], "--quality"))
        {
            sscanf(argv[++i], "%d\n", &Quality);
        }
        else if (!strcmp(argv[i], "--perf"))
        {
            sscanf(argv[++i], "%d\n", &Perf);
        }
        else
        {
            displayUsage();
            return;
        }
    }
    if(!Width || !Height || !inFile)
    {
        displayUsage();
        return;
    }
    PROF_END(ArgParsingTime);

    if (InCrcEnabled || OutCrcEnabled)
        BuildCRCTable();

    NN_SDK_LOG("File path is :%s\n",inFile);

    PROF_START(MountTime);
    bool sdcardMounted = false;
    const char* token = "sdcard";
    int compare = strncmp(inFile, token, strlen(token));

    if (compare == 0)
    {
        NN_SDK_LOG("in sd card  :%s\n",inFile);
        nn::Result resultSdcardMount = nn::fs::MountSdCardForDebug("sdcard");
        if( resultSdcardMount.IsFailure() )
        {
            NN_SDK_LOG( "\n nn::fs::SD card mount failure. Module:%d, Description:%d\n" ,
                    resultSdcardMount.GetModule(),
                    resultSdcardMount.GetDescription());
            return;
        }
        sdcardMounted = true;
    }
    else {
        nn::Result resultHostMount = nn::fs::MountHostRoot();
        if (resultHostMount.IsFailure())
        {
            NN_SDK_LOG("nn::fs::Host root mount failure.\n",
                    resultHostMount.GetModule(),
                    resultHostMount.GetDescription());
            return;
        }

    }
    NN_SDK_LOG("mount done work on it :%s\n",inFile);
    PROF_END(MountTime);

    alignedWidth = (Width + 3) & (~3);
    BytesPerPixel = Rgb ? 4 : 1.5;

    PROF_START(yuvDataReadTime);
    FileData yuvData(inFile);
    PROF_END(yuvDataReadTime);

    if (InCrcEnabled)
    {
        Crc = CalculateBufferCRC(yuvData.GetDataSize(), Crc, (uint8_t *)(yuvData.GetDataPtr()));
        if (InRefCrc == Crc)
            NN_SDK_LOG("%s:%d Iput CRC PASSED\n", __func__, __LINE__);
        else
            NN_SDK_LOG("%s:%d Input CRC FAILED\n", __func__, __LINE__);
    }

    pInputYuv  = reinterpret_cast<uint8_t*>(yuvData.GetDataPtr());

    // Allocate worst case size same as of input yuv size
    JpegBufSize = alignedWidth * Height * BytesPerPixel + 1024; // Add margin of 1K to accomodate JPEG header

    Buffer jpegData(JpegBufSize);

    const uint32_t kRowStride = Width * 3;

    // Work buffer
    kWorkBufSize = alignedWidth * 30 +  // Large memory allocation used by libjpeg
                   alignedWidth * 16 +  // For strip buffer used by libjpeg
                   1600 +               // For first PERMANENT pool
                   16000 +              // For first IMAGE pool
                   5000 +               // additional IMAGE pools
                   1024;                // For padding

    PROF_START(WorkbufAllocTime);
    Buffer workBuf(kWorkBufSize);
    PROF_END(WorkbufAllocTime);

    auto pLineHead = reinterpret_cast<uintptr_t>(workBuf.GetDataPtr()) + (int)((kWorkBufSize - sizeof(JSAMPLE) * BUFF_HEIGHT * kRowStride));
    for (int i = 0; i < BUFF_HEIGHT; i++)
    {
        lines[i] = reinterpret_cast<JSAMPLE*>(pLineHead) + i * kRowStride;
    }
    jpeg_workbuf wbMgr = {};
    wbMgr.ptr = reinterpret_cast<JSAMPLE*>(workBuf.GetDataPtr());
    wbMgr.total = workBuf.GetDataSize();
    cinfo.workbuf = &wbMgr;

    PROF_START(CreateCompressTime);
    cinfo.err = jpeg_std_error(&jerr);
    jpeg_create_compress(&cinfo);
    PROF_END(CreateCompressTime);
    pJpegData = (uint8_t *)jpegData.GetDataPtr();
    codeSize = jpegData.GetDataSize();

    PROF_START(MemDstTime);
    jpeg_mem_dest(&cinfo, reinterpret_cast<uint8_t**>(&(pJpegData)), &codeSize);
    PROF_END(MemDstTime);

    // Initialize input/output parameters
    if (Rgb)
        cinfo.in_color_space = JCS_RGB;
    else
        cinfo.in_color_space = JCS_YCbCr;

    cinfo.tegra_acceleration = true;


    PROF_START(SetDefaultsColorspaceTime);
    jpeg_set_defaults(&cinfo);

    jpeg_set_colorspace(&cinfo, JCS_YCbCr);
    PROF_END(SetDefaultsColorspaceTime);

    kSample = JpegSamplingRatio_420;
    switch (kSample)
    {
    case JpegSamplingRatio_444:
        cinfo.comp_info[0].h_samp_factor = 1;
        cinfo.comp_info[0].v_samp_factor = 1;
        cinfo.comp_info[1].h_samp_factor = cinfo.comp_info[2].h_samp_factor = 1;
        cinfo.comp_info[1].v_samp_factor = cinfo.comp_info[2].v_samp_factor = 1;
        break;
    case JpegSamplingRatio_422:
        cinfo.comp_info[0].h_samp_factor = 2;
        cinfo.comp_info[0].v_samp_factor = 1;
        cinfo.comp_info[1].h_samp_factor = cinfo.comp_info[2].h_samp_factor = 1;
        cinfo.comp_info[1].v_samp_factor = cinfo.comp_info[2].v_samp_factor = 1;
        break;
    case JpegSamplingRatio_420:
        cinfo.comp_info[0].h_samp_factor = 2;
        cinfo.comp_info[0].v_samp_factor = 2;
        cinfo.comp_info[1].h_samp_factor = cinfo.comp_info[2].h_samp_factor = 1;
        cinfo.comp_info[1].v_samp_factor = cinfo.comp_info[2].v_samp_factor = 1;
        break;
    default:
        NN_UNEXPECTED_DEFAULT;
    }

    PROF_START(SetQualityTime);
    jpeg_set_quality(&cinfo, Quality, true);
    PROF_END(SetQualityTime);
    cinfo.input_components = 3;

    cinfo.image_width = Width;
    cinfo.image_height = Height;
    cinfo.outputBuffSize = JpegBufSize;
    cinfo.dct_method = JDCT_ISLOW;
    cinfo.optimize_coding = false;
    cinfo.do_fancy_downsampling = false;

    // Begin encoding
    PROF_START(StartCompressTime);
    jpeg_start_compress(&cinfo, TRUE);
    PROF_END(StartCompressTime);

    for (Loop = 0; Loop < LoopCount; Loop++)
    {
        cinfo.global_state = 100;
        cinfo.next_scanline = 0;
        while (cinfo.next_scanline < cinfo.image_height) {
            JDIMENSION currentLine = cinfo.next_scanline;
            uint32_t linesToEncode = Height - currentLine;

            if (cinfo.tegra_acceleration)
            {
                if (cinfo.in_color_space == JCS_YCbCr)
                {
                    const uint8_t *yData, *uData, *vData;
                    uint32_t WidthUv;
                    cinfo.jpegTegraMgr->mcu_type = 0;
                    PROF_START(FillYuvBufTime);
                    // Y offset
                    yData = pInputYuv;
                    for (uint16_t y = 0; y < Height; y++)
                    {
                        uint8_t *pPixel = (uint8_t *)cinfo.jpegTegraMgr->buff[0] + cinfo.jpegTegraMgr->pitch[0] * y;
                        for (uint16_t x = 0; x < Width; x++)
                        {
                            *pPixel++ = *yData++;
                        }
                    }

                    uData = pInputYuv + (Width * Height);
                    WidthUv = (Width + 1) & ~1;
                    vData = uData + (WidthUv / 2 * Height / 2);

                    // V offset
                    vData = vData + (WidthUv / 2 * currentLine / 2);
                    for (uint16_t y = 0; y < Height / 2; y++)
                    {
                        uint8_t *pPixel = (uint8_t *)cinfo.jpegTegraMgr->buff[1] + cinfo.jpegTegraMgr->pitch[1] * y;
                        for (uint16_t x = 0; x < WidthUv / 2; x++)
                        {
                            *pPixel++ = *vData++;
                        }
                    }
                    // U offset
                    uData = uData + (WidthUv / 2 * currentLine / 2);
                    for (uint16_t y = 0; y < Height / 2; y++)
                    {
                        uint8_t *pPixel = (uint8_t *)cinfo.jpegTegraMgr->buff[2] + cinfo.jpegTegraMgr->pitch[2] * y;
                        for (uint16_t x = 0; x < WidthUv / 2; x++)
                        {
                            *pPixel++ = *uData++;
                        }
                    }
                    PROF_END(FillYuvBufTime);
                }
                else if (cinfo.in_color_space == JCS_RGB)
                {
                    uint8_t *pRgbBuf;

                    PROF_START(FillRgbBufTime);
                    for(int y = 0; y < Height; y++)
                    {
                        pRgbBuf = cinfo.pOutputSurface + cinfo.pitch * y;
                        memcpy(pRgbBuf, pInputYuv, Width * 4);
                        pInputYuv += Width * 4;
                    }
                    PROF_END(FillRgbBufTime);
                }

                PROF_START(WriteScanlinesTime);
                (void) jpeg_write_scanlines(&cinfo, lines, linesToEncode);
                PROF_END(WriteScanlinesTime);
            }
        }

        PROF_START(FinishCompressTime);
        jpeg_finish_compress(&cinfo);
        PROF_END(FinishCompressTime);

        if (OutCrcEnabled)
        {
            Crc = CalculateBufferCRC(codeSize, 0, (uint8_t *)(jpegData.GetDataPtr()));
            if (OutRefCrc == Crc)
                NN_SDK_LOG("%s:%d Test %s PASSED\n", __func__, __LINE__, inFile);
            else
                NN_SDK_LOG("%s:%d Test %s FAILED\n", __func__, __LINE__, inFile);
        }

        if (outFile != NULL)
            FileData::Save(outFile, jpegData.GetDataPtr(), codeSize);

        if (LoopCount > 1)
            memset(jpegData.GetDataPtr(), 0, codeSize);
    }

    PROF_START(DestroyCompressTime);
    jpeg_destroy_compress(&cinfo);
    PROF_END(DestroyCompressTime);

    PROF_START(UnMountTime);
    if( sdcardMounted == true )
    {
        nn::fs::Unmount("sdcard");
    }
    else
    {
        nn::fs::UnmountHostRoot();
    }

    PROF_END(UnMountTime);
    EndTime = NvOsGetTimeUS();
    if (Perf)
    {
        NN_LOG("YuvFilename, Width, Height, SetAllocatorTime, MountTime, ArgParsingTime, WorkBufAllocTime, CreateCompressTime, JinitTime, MemDstTime, SetQualityTime, StartCompressTime, yuvDataReadTime, WriteScanlinesTime, FillYuvBufTime, FillRgbBufTime, FinishCompressTime, DestroyCompressTime, UnMountTime, TotalEncodeTime\n");
        NN_LOG("%s, %d, %d, %lld, %lld, %lld, %lld, %lld, %lld, %lld, %lld, %lld, %lld, %lld, %lld, %lld, %lld, %lld, %lld, %lld\n",
               inFile, Width, Height, SetAllocatorTime, ArgParsingTime, MountTime, yuvDataReadTime, WorkbufAllocTime, CreateCompressTime, MemDstTime, SetDefaultsColorspaceTime, SetQualityTime, StartCompressTime, FillYuvBufTime, FillRgbBufTime, WriteScanlinesTime, FinishCompressTime, DestroyCompressTime, UnMountTime, (EndTime - StartTime));
    }
}//NOLINT(impl/function_size)


TEST(ImageJpegEncodingNvjpeg, Encoder)
{
    /* Set allocator callback functions */
    nv::SetGraphicsAllocator(MultimediaAllocate, MultimediaFree, MultimediaReallocate, NULL);
    nv::SetGraphicsServiceName("nvdrv:t");
    nv::SetGraphicsDevtoolsAllocator(MultimediaAllocate, MultimediaFree, MultimediaReallocate, NULL);
    nv::InitializeGraphics(g_mmFirmwareMemory, sizeof(g_mmFirmwareMemory));
    nv::mm::SetAllocator(MultimediaAllocate, MultimediaFree, MultimediaReallocate, NULL);

    ImageJpegEncodingNvjpeg_test();

    nv::FinalizeGraphics();

    NN_LOG("Memory left: %i allocations\n", g_Allocs.size());
    int memory = 0;
    for(std::map<void*, size_t>::iterator it = g_Allocs.begin(); it != g_Allocs.end(); ++it)
    {
        NN_SDK_LOG("address: %p size: %i\n", it->first, it->second);
        memory += it->second;
    }
    NN_LOG("Total not deallocated: %i\n", memory);
    SUCCEED();
}
}}}

