﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

////===========================================================================
///  demoCapture.c
///
///     This is capture code for the demo library.
///
////===========================================================================

#include <gfx/demo.h>

#if NN_GFX_IS_TARGET_GX
#include <cafe/gx2.h>
#endif
#if NN_GFX_IS_TARGET_NVN
#include <nvn/nvn.h>
#include <nvn/nvn_FuncPtrInline.h>
#endif

#if NN_GFX_IS_TARGET_GL
#include <GL/glew.h>
#endif

DEMOCaptureDataStore DEMOCaptureData; // global

typedef struct _DEMOColor
{
    u8 x, y, z, w;
} DEMOColor;

void _DEMOEncodeBmp(const u8* inData, u8* outData, u32* outByteSize,
                    u32 width, u32 height, u32 pitch, nn::gfx::ImageFormat format);
void _DEMOEncodeTGA(const u8* inData, u8* outData, u32* outByteSize,
                    u32 width, u32 height, u32 pitch, nn::gfx::ImageFormat format);
void _DEMOComputeRunlengths(s32 cols, DEMOColor *pixelrow, s32 *runlength);
void _DEMODumpSerial(const u8* data, const u32 byte);

// GX2_UUENC is a basic 1 character encoding function to make a char printing
#define _DEMO_UUENC(c) ((c) ? ((c) & 077) + ' ': '`')

void _DEMOComputeRunlengths(s32 cols, DEMOColor *pixelrow, s32 *runlength)
{
    s32 col, start;

    // Initialize all run lengths to 0. (This is just an error check.)
    for (col = 0; col < cols; ++col)
    {
        runlength[col] = 0;
    }

    // Find runs of identical pixels.
    for ( col = 0; col < cols; )
    {
        start = col;
        do
        {
            ++col;
        } while ( col < cols &&
                  col - start < 128 &&
                  (pixelrow[col].x == pixelrow[start].x) &&
                  (pixelrow[col].y == pixelrow[start].y) &&
                  (pixelrow[col].z == pixelrow[start].z) &&
                  (pixelrow[col].w == pixelrow[start].w)  );
        runlength[start] = col - start;
    }

    // Now look for runs of length-1 runs, and turn them into negative runs.
    for (col = 0; col < cols; )
    {
        if (runlength[col] == 1)
        {
            start = col;
            while (col < cols &&
                   col - start < 128 &&
                   (runlength[col] == 1) )
            {
                runlength[col] = 0;
                ++col;
            }
            runlength[start] = - ( col - start );
        } else
            col += runlength[col];
    }
}

void _DEMODumpSerial(const u8* data, const u32 byte)
{
    u32 i, j, idx, leftByte, numLoop;
    s8 ch;
    // uuencode allows 6 bits (=63)
    const u32 maxOneLine = 63;
    // 63/3*4 = 84 characters
    char line[90];
    char *pc;

    idx = 0;
    numLoop = (u32)(byte / maxOneLine);

    DEMOPrintf("begin 644 GX2Capture.tga\n");

    for(i = 0; i < numLoop; i++)
    {
        pc = &line[0];
        *pc++ = '_'; // '_' is 63+32 = 95 in ascii
        for(j = 0; j < maxOneLine / 3; j++)
        {
            ch = data[3 * idx] >> 2;
            ch = _DEMO_UUENC(ch);
            *pc++ = ch;
            ch = ((data[3 * idx] << 4) & 060) | ((data[3 * idx + 1] >> 4) & 017);
            ch = _DEMO_UUENC(ch);
            *pc++ = ch;
            ch = ((data[3 * idx + 1] << 2) & 074) | ((data[3 * idx + 2] >> 6) & 03);
            ch = _DEMO_UUENC(ch);
            *pc++ = ch;
            ch = data[3 * idx + 2] & 077;
            ch = _DEMO_UUENC(ch);
            *pc++ = ch;
            idx++;
        }
        *pc++ = '\n';
        *pc++ = 0;
        DEMOPrintf("%s", line);
    }

    leftByte = byte % maxOneLine;
    if (leftByte > 0)
    {
        pc = &line[0];
        *pc++ = static_cast< char >( leftByte + 32 );

        if (leftByte >= 3)
        {
            for(j = 0; j < leftByte / 3; j++)
            {
                ch = data[3 * idx] >> 2;
                ch = _DEMO_UUENC(ch);
                *pc++ = ch;
                ch = ((data[3 * idx] << 4) & 060) | ((data[3 * idx + 1] >> 4) & 017);
                ch = _DEMO_UUENC(ch);
                *pc++ = ch;
                ch = ((data[3 * idx + 1] << 2) & 074) | ((data[3 * idx + 2] >> 6) & 03);
                ch = _DEMO_UUENC(ch);
                *pc++ = ch;
                ch = data[3 * idx + 2] & 077;
                ch = _DEMO_UUENC(ch);
                *pc++ = ch;
                idx++;
            }
        }

        if(leftByte % 3 != 0)
        {
            ch = data[3 * idx] >> 2;
            ch = _DEMO_UUENC(ch);
            *pc++ = ch;
            if(leftByte % 3 == 2)
            {
                ch = ((data[3 * idx] << 4) & 060) | ((data[3 * idx + 1] >> 4) & 017);
                ch = _DEMO_UUENC(ch);
                *pc++ = ch;
                ch = ((data[3 * idx + 1] << 2) & 074);
                ch = _DEMO_UUENC(ch);
                *pc++ = ch;
                ch = 0;
                ch = _DEMO_UUENC(ch);
                *pc++ = ch;
            }else{
                ch = ((data[3 * idx] << 4) & 060);
                ch = _DEMO_UUENC(ch);
                *pc++ = ch;
                ch = 0;
                ch = _DEMO_UUENC(ch);
                *pc++ = ch;
                ch = data[3 * idx + 2] & 077;
                ch = _DEMO_UUENC(ch);
                *pc++ = ch;
            }
        }
        *pc++ = '\n';
        *pc++ = 0;
        DEMOPrintf("%s", line);
    }
    DEMOPrintf("`\nend\n");
}

void _DEMOEncodeBmp(const u8* inData, u8* outData, u32* outByteSize,
                    u32 width, u32 height, u32 pitch, nn::gfx::ImageFormat format)
{
    NN_UNUSED( format );
    NN_UNUSED( pitch );
    uint32_t imageSize = width * height * 4; // RGBA8

    DEMOAssert( nn::gfx::ImageFormat_R8_G8_B8_A8_UnormSrgb == format ||
                nn::gfx::ImageFormat_R8_G8_B8_A8_Unorm == format );

    u8* pBuffer = outData;
    DEMOAssert( pBuffer );

    memset( pBuffer, 0, 54 );
    // BITMAPFILEHEADER
    *pBuffer++ = 'B';
    *pBuffer++ = 'M';
    //ihdr.bfSize
    uint32_t bfSize = imageSize + 54;
    memcpy( pBuffer, &bfSize, sizeof( uint32_t ) );
    pBuffer += 4;
    //ihdr.bfReserved1
    //ihdr.bfReserved2
    uint32_t reservedVal = 0;
    memcpy( pBuffer, &reservedVal, sizeof( uint32_t ) );
    pBuffer += 4;
    //ihdr.bfOffBits
    uint32_t bfOffBits = 54; // sizeof(BITMAPFILEHEADER)+sizeof(BITMAPINFOHEADER) = 54
    memcpy( pBuffer, &bfOffBits, sizeof( uint32_t ) );
    pBuffer += 4;

    // BITMAPINFOHEADER
    //ihdr.biSize
    uint32_t binfoSize = 40;
    memcpy( pBuffer, &binfoSize, sizeof( uint32_t ) );
    pBuffer += 4;
    //ihdr.biWidth
    memcpy( pBuffer, &DEMOCaptureData.colorBuffer.width, sizeof( uint32_t ) );
    pBuffer += 4;
    //ihdr.biHeight
    memcpy( pBuffer, &DEMOCaptureData.colorBuffer.height, sizeof( uint32_t ) );
    pBuffer += 4;
    //ihdr.biPlanes
    uint16_t planeCount = 1; // always 1.
    memcpy( pBuffer, &planeCount, sizeof( uint16_t ) );
    pBuffer += 2;
    //ihdr.biBitCount
    uint16_t bpp = 32; // limit to 32bpp
    memcpy( pBuffer, &bpp, sizeof( uint16_t ) );
    pBuffer += 2;
    //ihdr.biCompression
    uint32_t compression = 0;
    memcpy( pBuffer, &compression, sizeof( uint32_t ) );
    pBuffer += 4;
    //biSizeImage
    uint32_t biSizeImage = imageSize;
    memcpy( pBuffer, &biSizeImage, sizeof( uint32_t ) );
    pBuffer += 4;

    // RGBA to BGRA and flip vertical to top-left origin.
    uint32_t rowBytes;
    rowBytes = ( ( width * static_cast<uint32_t>( bpp ) + 7 ) >> 3 );
    const uint8_t* src = inData;
    uint8_t* dst = outData + bfOffBits + imageSize - rowBytes;
    int chCount = bpp >> 3;
    int idxLine = height;
    while( 0 < idxLine )
    {
        uint8_t* pLine = dst;
        for( int px = width - 1; 0 <= px; --px )
        {
            *( pLine++ ) = *( src + 2 ); // src:B
            *( pLine++ ) = *( src + 1 ); // src:G
            *( pLine++  ) = *( src );    // src:R
            if( chCount == 4 )
            {
                *( pLine++ ) = *( src + 3 ); // src:A
            }
            src += chCount;
        }
        --idxLine;
        dst -= rowBytes;
    }

    *outByteSize = imageSize + 54; // bmp file header + raw image size
}

void _DEMOEncodeTGA(const u8* inData, u8* outData, u32* outByteSize,
                    u32 width, u32 height, u32 pitch, nn::gfx::ImageFormat format)
{
    NN_UNUSED( format );
    u8 *pbuffer;
    u32 rows = DEMOCaptureData.colorBuffer.height;
    u32 cols = DEMOCaptureData.colorBuffer.width;
    s32* runlength;
    u32 row, col, realrow, byte = 0;
    s32 i;
    DEMOColor* buffer;

    DEMOAssert( nn::gfx::ImageFormat_R8_G8_B8_A8_UnormSrgb == format ||
                nn::gfx::ImageFormat_R8_G8_B8_A8_Unorm == format );

    pbuffer = outData;
    DEMOAssert(pbuffer);

    // 18-byte TGA header
    // (16-bit values are written in little-endian order)
    // TGA Image format codes:
    //  1: uncompressed, color-mapped      9: RLE-compressed, color-mapped
    //  2: uncompressed, true-color       10: RLE-compressed, true-color
    //  3: uncompressed, BW or gray       11: RLE-compressed, BW or gray
    //
    *pbuffer++ = 0;                  // Image ID length (0: no ID)
    *pbuffer++ = 0;                  // Color map type  (0: no color map)
    *pbuffer++ = 10;                 // Image type (10: RLE true-color image)
    *pbuffer++ = 0; *pbuffer++ = 0;  // Color map first index offset (16 bit)
    *pbuffer++ = 0; *pbuffer++ = 0;  // Color map length (16 bit)
    *pbuffer++ = 0;                  // Color map bpp
    *pbuffer++ = 0; *pbuffer++ = 0;  // Image X origin (16 bit)
    *pbuffer++ = 0; *pbuffer++ = 0;  // Image Y origin (16 bit)
    *pbuffer++ = (u8)(width % 256);  // Image width lo
    *pbuffer++ = (u8)(width / 256);  // Image width hi
    *pbuffer++ = (u8)(height % 256); // Image height lo
    *pbuffer++ = (u8)(height / 256); // Image height hi
    *pbuffer++ = (u8) 32;            // Image pixel bpp (32 bits)
    *pbuffer++ = (u8) 8;             // Image alpha bpp (8 bits), direction code

    buffer = (DEMOColor*)inData;

    // do run length encoding here
    runlength = (s32*) DEMOAlloc(sizeof(s32) * cols);
    DEMOAssert(runlength && "DEMOAlloc failed");

    // write out pixels
    for( row = 0; row < rows; ++row )
    {
        realrow = rows - row - 1;
        _DEMOComputeRunlengths( cols, &buffer[realrow * pitch], runlength );
        for( col = 0; col < cols; )
        {
            if( runlength[col] > 0 )
            {
                // set runlength
                pbuffer[byte] = (u8)(0x80 + runlength[col] - 1); byte++;
                // write pixel
                // TGA expects BGRA order
                pbuffer[byte] = buffer[realrow * pitch + col].z; byte++;
                pbuffer[byte] = buffer[realrow * pitch + col].y; byte++;
                pbuffer[byte] = buffer[realrow * pitch + col].x; byte++;
                pbuffer[byte] = buffer[realrow * pitch + col].w; byte++;
                col += runlength[col];
            }else if( runlength[col] < 0 ){
                // set runlength
                pbuffer[byte] = (u8)(- runlength[col] - 1); byte++;
                // write pixels
                // TGA expects BGRA order
                for ( i = 0; i < - runlength[col]; ++i )
                {
                    pbuffer[byte] = buffer[realrow * pitch + col + i].z; byte++;
                    pbuffer[byte] = buffer[realrow * pitch + col + i].y; byte++;
                    pbuffer[byte] = buffer[realrow * pitch + col + i].x; byte++;
                    pbuffer[byte] = buffer[realrow * pitch + col + i].w; byte++;
                }
                col += -runlength[col];
            }else
                DEMOAssert(!"Internal error: zero run length");
        }
    }

    DEMOFree(runlength);

    *outByteSize = 18 + byte;
}

void DEMOCaptureInit(u32 width, u32 height, DEMOCaptureFormat format)
{
    DEMOCaptureData.captureFormat = format;

    // Setup render buffer
    {
        nn::gfx::ImageFormat imageFormat = nn::gfx::ImageFormat_R8_G8_B8_A8_Unorm;

        if( SRGB8 == format )
        {
            imageFormat = nn::gfx::ImageFormat_R8_G8_B8_A8_UnormSrgb;
        } else if( RGBA8 == format ){
            imageFormat = nn::gfx::ImageFormat_R8_G8_B8_A8_Unorm;
        } else if( DEPTH16 == format ){
            imageFormat = nn::gfx::ImageFormat_D16_Unorm;
        } else if( DEPTH32 == format ){
            imageFormat = nn::gfx::ImageFormat_D32_Float;
        } else {
            DEMOAssert("Only RGBA8/SRGB8 formats supported at the moment.");
        }

        DEMOCaptureData.colorBuffer.width = width;
        DEMOCaptureData.colorBuffer.height = height;
        DEMOCaptureData.colorBuffer.format = imageFormat;

        // Allocate buffer to copy the data into
#if NN_GFX_IS_TARGET_GX
        nn::gfx::MemoryPool::InfoType poolInfo;
        nn::gfx::Texture::InfoType textureInfo;
        poolInfo.SetDefault();
        poolInfo.SetMemoryPoolProperty( nn::gfx::MemoryPoolProperty_CpuCached | nn::gfx::MemoryPoolProperty_GpuUncached );
        size_t alignment = nn::gfx::MemoryPool::GetPoolMemoryAlignment( &DEMODevice, poolInfo );

        textureInfo.SetDefault();
        textureInfo.SetWidth( width );
        textureInfo.SetHeight( height );
        textureInfo.SetGpuAccessFlags( nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_Write );
        textureInfo.SetImageFormat( imageFormat );
        textureInfo.SetImageStorageDimension( nn::gfx::ImageStorageDimension_2d );
        textureInfo.SetTileMode( nn::gfx::TileMode_Linear );

        alignment = std::max( alignment, nn::gfx::Texture::CalculateMipDataAlignment( &DEMODevice, textureInfo ) );
        size_t size = nn::gfx::Texture::CalculateMipDataSize( &DEMODevice, textureInfo );
        DEMOGfxMemPool* pPool = DEMOGfxSharedPool->AllocSubPool( size, alignment );
        DEMOCaptureData.colorBuffer.buffer.AllocFromPool( pPool, size, NULL, nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_Write, 0 );
#else
        DEMOCaptureData.colorBuffer.buffer.Initialize( width * height *
            ( format == DEPTH16 ? 2 : 4 ), NULL, nn::gfx::GpuAccess_Texture | nn::gfx::GpuAccess_Write, 0 );
#endif

        DEMOCaptureData.colorBuffer.pitch = width;

        DEMOCaptureData.colorBuffer.initialized = true;
    }

    if (DEMOCaptureData.TGAData)
    {
        DEMOFree(DEMOCaptureData.TGAData);
    }
    // Worst case scenario is image 1/4 bigger than original
    // because run length encoding adds up to 1 byte per 4 byte pixel
    DEMOCaptureData.TGAData = (u8*) DEMOAlloc((u32)(18 + 5 * width * height));
    DEMOAssert( DEMOCaptureData.TGAData );
}

void DEMOCaptureShutdown()
{
    if ( DEMOCaptureData.colorBuffer.initialized )
    {
        DEMOCaptureData.colorBuffer.buffer.Finalize();
        DEMOCaptureData.colorBuffer.initialized = false;
    }

    if(DEMOCaptureData.TGAData)
    {
        DEMOGfxFreeMEM2(DEMOCaptureData.TGAData);
        DEMOCaptureData.TGAData = NULL;
    }
}

void DEMOCaptureCopyHelper(
    nn::gfx::Texture *srcSurface, u32 srcMip, u32 srcSlice,
    DEMOGfxBuffer* dstBuffer )
{
    nn::gfx::CommandBuffer* pCB = &DEMOCommandBuffer;
    u32* pTmpData;
    nn::gfx::TextureCopyRegion srcCopyRegion;

    srcCopyRegion.SetDefault();
    srcCopyRegion.SetWidth( DEMOCaptureData.colorBuffer.width );
    srcCopyRegion.SetHeight( DEMOCaptureData.colorBuffer.height );
    srcCopyRegion.EditSubresource().SetDefault();
    srcCopyRegion.EditSubresource().SetMipLevel( srcMip );
    srcCopyRegion.EditSubresource().SetArrayIndex( srcSlice );

    pCB->Begin();
#if NN_GFX_IS_TARGET_GX
    if ( DEMOCaptureData.captureFormat == DEPTH16 || DEMOCaptureData.captureFormat == DEPTH32 )
    {
        GX2ExpandDepthBuffer( reinterpret_cast< GX2DepthBuffer* >( &srcSurface->ToData()->gx2Surface ) );
    }
#endif
    pCB->CopyImageToBuffer( &dstBuffer->buffer, 0, srcSurface, srcCopyRegion );
    pCB->FlushMemory( nn::gfx::GpuAccess_ColorBuffer | nn::gfx::GpuAccess_DepthStencil );
    pCB->End();
    DEMOQueue.ExecuteCommand( pCB, NULL );

    // Wait for GPU to finish
    DEMOQueue.Sync();

#if !( NN_GFX_IS_TARGET_GX || NN_GFX_IS_TARGET_D3D )
    // For GL/NVN textures the bottom left corner is offset (0, 0) so we need to flip
    pTmpData = DEMOCaptureData.colorBuffer.buffer.Map< u32 >();
    for ( u32 y = 0; y < DEMOCaptureData.colorBuffer.height / 2; y++ )
    {
        u32 flipRow = DEMOCaptureData.colorBuffer.height - y - 1;
        for ( u32 x = 0; x < DEMOCaptureData.colorBuffer.width; x++ )
        {
            // Image is always stored as RGBA
            u32 tmp;
            tmp = pTmpData[ y * DEMOCaptureData.colorBuffer.width + x ];
            pTmpData[ y * DEMOCaptureData.colorBuffer.width + x ] = pTmpData[ flipRow * DEMOCaptureData.colorBuffer.width + x ];
            pTmpData[ flipRow * DEMOCaptureData.colorBuffer.width + x ] = tmp;
        }
    }
    DEMOCaptureData.colorBuffer.buffer.Unmap();
#endif

    if ( DEMOCaptureData.captureFormat == DEPTH32 )
    {
        pTmpData = DEMOCaptureData.colorBuffer.buffer.Map< u32 >();
        for ( u32 y = 0; y < DEMOCaptureData.colorBuffer.height; y++ )
        {
            for ( u32 x = 0; x < DEMOCaptureData.colorBuffer.width; x++ )
            {
                f32* value = reinterpret_cast< f32* >( pTmpData + y * DEMOCaptureData.colorBuffer.width + x );
                u8 scaledValue = (u8)(*value * 255);
                u32 finalValue = ( 0xff << 24 ) | ( scaledValue << 16 ) | ( scaledValue << 8 ) | scaledValue;
                pTmpData[ y * DEMOCaptureData.colorBuffer.width + x ] = finalValue;
            }
        }
        DEMOCaptureData.colorBuffer.buffer.Unmap();
    }
}

void DEMOCaptureCopy(nn::gfx::Texture *srcBuffer, const char *dstFilePath)
{
    DEMOAssert(DEMOCaptureData.TGAData != NULL && "DEMOCaptureInit must be called first");

    if (srcBuffer != NULL)
    {

        // Grab a copy
        DEMOCaptureCopyHelper( srcBuffer, 0, 0,
            &DEMOCaptureData.colorBuffer.buffer );

        const char ext[] = { '.', 'b', 'm', 'p' };
        if( ( dstFilePath != nullptr ) && !memcmp( &dstFilePath[ strnlen( dstFilePath, 1024 ) - 4 ], ext, sizeof( ext ) ) )
        {
            // Convert to BMP format, for Depth formats pretend it is RGBA8 since we've converted it in the previous step
            _DEMOEncodeBmp( DEMOCaptureData.colorBuffer.buffer.Map< const u8 >(),
                           DEMOCaptureData.TGAData,
                           &DEMOCaptureData.TGALength,
                           DEMOCaptureData.colorBuffer.width,
                           DEMOCaptureData.colorBuffer.height,
                           DEMOCaptureData.colorBuffer.pitch,
                           ( DEMOCaptureData.captureFormat == DEPTH16 ||
                             DEMOCaptureData.captureFormat == DEPTH32 ) ? nn::gfx::ImageFormat_R8_G8_B8_A8_Unorm :
                             DEMOCaptureData.colorBuffer.format );
        }
        else
        {
            // Convert to TGA format, for Depth formats pretend it is RGBA8 since we've converted it in the previous step
            _DEMOEncodeTGA( DEMOCaptureData.colorBuffer.buffer.Map< const u8 >(),
                           DEMOCaptureData.TGAData,
                           &DEMOCaptureData.TGALength,
                           DEMOCaptureData.colorBuffer.width,
                           DEMOCaptureData.colorBuffer.height,
                           DEMOCaptureData.colorBuffer.pitch,
                           ( DEMOCaptureData.captureFormat == DEPTH16 ||
                             DEMOCaptureData.captureFormat == DEPTH32 ) ? nn::gfx::ImageFormat_R8_G8_B8_A8_Unorm :
                             DEMOCaptureData.colorBuffer.format );
        }
        DEMOCaptureData.colorBuffer.buffer.Unmap();
    }

    if(NULL != dstFilePath)
    {
        if(*dstFilePath==0)
        {
            _DEMODumpSerial(DEMOCaptureData.TGAData, DEMOCaptureData.TGALength);
        } else {
            s32 res;
            DEMOFSFileInfo finfo;
            res = DEMOFSOpenFileMode(dstFilePath, &finfo, "wb");
            if (res != DEMO_FS_RESULT_OK)
            {
                DEMOPrintf("Unable to open file %s for capture output\n", dstFilePath);
                return;
            }
            res = DEMOFSWrite(&finfo, DEMOCaptureData.TGAData, DEMOCaptureData.TGALength);
            if (res != DEMO_FS_RESULT_OK)
            {
                DEMOPrintf("Unable to write to file %s for capture output\n", dstFilePath);
                return;
            }
            res = DEMOFSCloseFile(&finfo);
            if (res != DEMO_FS_RESULT_OK)
            {
                DEMOPrintf("Error closing file %s for capture output\n", dstFilePath);
                return;
            }
        }
    }
}
