﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

/* Define this symbol if the shaders know how to unpack the color and
 * texture when they are put in the same vec4.
 * If it is defined, the same symbol must also be defined in the shaders
 * and the shaders must be rebuilt. Default is to have it off (undefined).
 */
//#define PACK_COLOR_WITH_TEXTURE
#include <cstdio>
#include <cstring>
#include <cmath>

#include <nn/nn_Assert.h>
#include <nnt.h>
#include <nnt/nnt_Argument.h>
#include <gfx/demo.h>

#ifdef NN_BUILD_CONFIG_OS_SUPPORTS_HORIZON
#define HORIZON_SETCLOCK
#endif

#if NN_GFX_IS_TARGET_GX
#include <cafe/gx2.h>
#define MetricResult GX2MetricResult
//static const char* PERF_RESULT_FILE = "/vol/save/common/perfResult.csv";
#endif
#if NN_GFX_IS_TARGET_NVN
#include <nvn/nvn.h>
#include <nvn/nvn_FuncPtrInline.h>

//NVNtiledCacheAction tcOnOff = NVN_TILED_CACHE_ACTION_ENABLE;
NVNtiledCacheAction tcOnOff = NVN_TILED_CACHE_ACTION_DISABLE;

#if defined( NN_BUILD_CONFIG_TOOLCHAIN_SUPPORTS_VC )
#pragma warning( disable : 4127 ) // Disable "C4127: conditional expression is constant"
#endif

struct MetricResult
{
    u64 u64Result; // the counter result
    u64 timeStamp; // timestamp for when the counter result is valid
};
//static const char* PERF_RESULT_FILE = "perfResult.csv";

static inline u64 PerfAddr(u64 baseAddr, int index)
{
    return baseAddr + index * sizeof(MetricResult);
}
#endif

static const int MAX_ARGS = 5;

#include "testList.h"

static const int MAX_NAME_LEN = 50;
static const int MAX_DESC_LEN = 1000;

/////////////////////////////////////////////////////////////////////////////
    // Arguments to pass in

// TEST_MODE=TEST_PIXELS,TEST_VERTS,TEST_TEXELS,TEST_FMAD,TEST_BANDWIDTH_READ_ONLY,TEST_BANDWIDTH_WRITE_ONLY,TEST_BANDWIDTH_READ_WRITE,TEST_BANDWIDTH_TEXTURE,TEST_BANDWIDTH_CPU_READ_ONLY,TEST_BANDWIDTH_CPU_WRITE_ONLY,TEST_BANDWIDTH_CPU_READ_WRITE
// TEST_NAME="Test_Name"
// TEST_DESC="Test_Description"
// NO_COLOR,USE_BLEND,USE_DEPTH_READ,USE_DEPTH_WRITE,USE_BC1_TEXTURE,USE_RGBA_TEXTURE,USE_128_TEXTURE,USE_BILINEAR
// USE_GS,USE_ALL,USE_NORMAL,USE_TEXTURE
// USE_ARRAY_VEC#=<0~20>
// USE_ARRAY_COLOR#=<0~20>
// SKIP_FRAMES=30
// TEST_FRAMES=100

// DEMO_AA_MODE=<0~3>
// DEMO_CB_FORMAT=10_10_10_2,2_10_10_10,8_8_8_8_SRGB,8_8_8_8,16_16_16_16F,32_32_32_32F,16,32F
// DEMO_DB_FORMAT=16,8_24F,8_24,X24_8_32F,32F
// DEMO_SCAN_FORMAT=10_10_10_2,2_10_10_10,8_8_8_8_SRGB,8_8_8_8
// DEMO_HIZ_DISABLE
// DEMO_FORCE_MEM2

/////////////////////////////////////////////////////////////////////////////
    // Testing variables

enum Tests
{
    TEST_PIXELS,
    TEST_VERTS,
    TEST_TEXELS,
    TEST_FMAD,
    TEST_BANDWIDTH_READ_ONLY,
    TEST_BANDWIDTH_WRITE_ONLY,
    TEST_BANDWIDTH_READ_WRITE,
    TEST_BANDWIDTH_TEXTURE,
    TEST_BANDWIDTH_CPU_READ_ONLY,
    TEST_BANDWIDTH_CPU_WRITE_ONLY,
    TEST_BANDWIDTH_CPU_READ_WRITE,
    TEST_CLEAR,
};

static u32 testMode = 0;

static u32 SURFACE_WIDTH = 1;
static u32 SURFACE_HEIGHT = 1;
static u32 NUM_REPEATS = 1;

static bool noColor = false;
static bool solidColor = false;
static bool useBlend = false;
static bool useDepthRead = false;
static bool useDepthWrite = false;
static bool useBC1Texture = false;
static bool useRGBATexture = false;
static bool use128Texture = false;
static bool useBilinear = false;

static bool useGS = false;
static bool useAll = false;
static bool useNormal = false;
static bool useTexture = false;
static u32 useArrayVec4 = 0;
static u32 useArrayVec3 = 0;
static u32 useArrayVec2 = 0;
static u32 useArrayColor4 = 0;
static u32 useArrayColor3 = 0;
static u32 useArrayColor2 = 0;

static char testName[MAX_NAME_LEN] = "";
static char testDesc[MAX_DESC_LEN] = "";
static u32 skipFrames = 0;
static s32 testFrames = 0;

static u32 bitsPerPixelRead = 0;
static u32 bitsPerPixelWrite = 0;
#if NN_GFX_IS_TARGET_GX
static const float sclk = 1000000.0f; // for now gx2 returns clock normalized to 1MHz
#endif
#if NN_GFX_IS_TARGET_NVN
#ifdef __horizon__
// for the TX1 the return ticks are in 1.625 ns clocks
static const unsigned int sclk = 615384615;
#else
static const unsigned int sclk = 1000000000; // for now nvn returns clock normalized to 1GHz
#endif
#endif
static u64 totalTime = 0, totalBusyTime = 0, totalVerts = 0, totalPixels = 0, totalTexels = 0, totalTexelMem = 0, totalPrims = 0, totalDrawTime = 0, totalOps = 0;
static u64 minTime = (u64)(-1);
static u64 minDrawTime = (u64)(-1);
static u64 maxTime = 0, maxDrawTime = 0;

static u64 cpuBandwidthMem = 0;
static OSTime cpuBandwidthTime = 0;

/////////////////////////////////////////////////////////////////////////////

#if NN_GFX_IS_TARGET_NVN
// Max alignment size for performance pool
static const int PERF_ALIGN = 8192;
#endif

static const char* GSH_SHADER_FILE;
static const char* GTX_TEXTURE_FILE;

static const u32 TEST_COUNT = sizeof(testList) / sizeof(*testList);

static u32 NUM_VERTICES = 0;

static DEMOGfxTexture s_baseTexture;
static DEMOGfxTexture *pTexture;
static u32 samplerLoc;
static nn::gfx::Sampler mySampler;
static nn::gfx::DescriptorSlot mySamplerSlot;

/////////////////////////////////////////////////////////////////////////////
// ----- Shader information

static DEMOGfxPipeline myPipeline;
static nn::gfx::Fence myFence;


static nn::gfx::ViewportScissorState myViewport;
static void* myViewportMem;

static const int MAX_NUM_ATTRIB = 100;
static u32 Loc[MAX_NUM_ATTRIB];
static u32 NUM_ATTRIB;
static u32 vertexSize;

/////////////////////////////////////////////////////////////////////////////
    // Misc. variables

static u32 s_ticks;
#if NN_GFX_IS_TARGET_GX
static GX2PerfData* perfInfo;
static MEMAllocator s_perfAllocator;
#endif
#if NN_GFX_IS_TARGET_NVN
static const int PERF_MEM_SIZE = 1024;
static DEMOGfxMemPool* perfBufferPool;
static DEMOGfxBuffer perfBuffer;
#endif
static u64 uDrawTime;
static OSTime lastPrintTime = 0;

static const int MAX_WRITEBUFFER_SIZE = 3276800;
static char *writeBuffer;

static u8* pVB = 0; // vertex data
DEMOGfxBuffer attribBuffer;
DEMOGfxMemPool* attribPool;

/////////////////////////////////////////////////////////////////////////////
    // Prototypes

static int SceneInit(void);
static int SceneDraw(void);
static void PrintInfo(void);

/////////////////////////////////////////////////////////////////////////////
    // Structures

struct VEC4
{
    float x,y,z,w;
};

struct VEC3
{
    float x,y,z;
};

struct VEC2
{
    float x,y;
};

struct COLOR4
{
    u8 r,g,b,a;
};

struct COLOR3
{
    u8 r,g,b;
};

struct COLOR2
{
    u8 u,v;
};

/////////////////////////////////////////////////////////////////////////////
    // Helper functions

static void randvector(VEC4& v)
{
    v.x = DEMOFRand();
    v.y = DEMOFRand();
    v.z = DEMOFRand();
    v.w = 1.0f;
}

static void randvector(VEC3& v)
{
    v.x = DEMOFRand();
    v.y = DEMOFRand();
    v.z = DEMOFRand();
}

static void randvector(VEC2& v)
{
    v.x = DEMOFRand();
    v.y = DEMOFRand();
}

static void randcolor(COLOR4& c)
{
    c.r = DEMORand() & 0xFF;
    c.g = DEMORand() & 0xFF;
    c.b = DEMORand() & 0xFF;
    c.a = 0xFF;
}

static void randcolor(COLOR3& c)
{
    c.r = DEMORand() & 0xFF;
    c.g = DEMORand() & 0xFF;
    c.b = DEMORand() & 0xFF;
}

static void randcolor(COLOR2& c)
{
    c.u = DEMORand() & 0xFF;
    c.v = DEMORand() & 0xFF;
}

static void SetAttribute(const char* name, u32 size, nn::gfx::AttributeFormat attrib)
{
    // Attribute Location Lookup
    Loc[NUM_ATTRIB] = (u32)myPipeline.shaders.GetInterfaceSlot(nn::gfx::ShaderStage_Vertex, nn::gfx::ShaderInterfaceType_Input, name);
    // position setup
    DEMOGfxInitShaderAttribute(&myPipeline.shaders, name, 0, vertexSize, attrib);
    ++NUM_ATTRIB;
    vertexSize += size;
}

/////////////////////////////////////////////////////////////////////////////

static inline float ClampPos(float x)
{
    return (x < -1.0f) ? -1.0f : (x > 1.0f ? 1.0f : x);
//  return (x < -.7f) ? -0.7f : (x > 0.7f ? 0.7f : x);
}

static inline float ClampTex(float x)
{
    return (x < 0.0f) ? 0.0f : (x > 1.0f ? 1.0f : x);
}

// The init function for the rendering portions of this app
static int SceneInit()
{
    char attribName[10];

    DEMOSRand(0);
    s_ticks = 0;
    {
        nn::gfx::FenceInfo fenceInfo;
        fenceInfo.SetDefault();
        myFence.Initialize(&DEMODevice, fenceInfo);
    }

    static float surfaceWidth = (float)DEMOColorBufferInfo.GetWidth();
    static float surfaceHeight = (float)DEMOColorBufferInfo.GetHeight();

    DEMOGfxSetViewportScissorState(&myViewport, &myViewportMem, 0.0f, 0.0f, surfaceWidth,
        surfaceHeight, 0.0f, 1.0f, surfaceHeight, false);

    memset(&myPipeline, 0, sizeof(myPipeline));
    myPipeline.SetDefaults();

        // This is strictly for the color/depth buffer
    bitsPerPixelRead = 4 * (useBlend + useDepthRead);
    bitsPerPixelWrite = 4 * (!noColor + useDepthWrite);
    OSReport("BBP = %i read, %i write\n", bitsPerPixelRead, bitsPerPixelWrite);

/////////////////////////////////////////////////////////////////////////////
        // Later shaders include features from earlier shaders
    GSH_SHADER_FILE = "shaders/perf/testSuite/perf";

    if (noColor || solidColor)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfNoColor";
    }
    if (useNormal)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfNorm";
    }
    if (useTexture)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfTex";
    }
    if (useGS)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfGS";
    }
    if (useArrayVec4)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfVec4";
        DEMOAssert(0 && "shader exceeds attribute limit on NVN");
    }
    if (useArrayVec3)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfVec3";
        DEMOAssert(0 && "shader exceeds attribute limit on NVN");
    }
    if (useArrayVec2)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfVec2";
    }
    if (useArrayColor4)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfColor4";
        DEMOAssert(0 && "shader exceeds attribute limit on NVN");
    }
    if (useArrayColor3)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfColor3";
        DEMOAssert(0 && "shader exceeds attribute limit on NVN");
    }
    if (useArrayColor2)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfColor2";
    }
    if (useAll && useGS)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfAllGS";
        DEMOAssert(0 && "shader exceeds attribute limit on NVN");

    }
    else if (useAll)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfAll";
        DEMOAssert(0 && "shader exceeds attribute limit on NVN");
    }
    if (testMode == TEST_FMAD)
    {
        GSH_SHADER_FILE = "shaders/perf/testSuite/perfFMAD";
        noColor = true;
    }
/////////////////////////////////////////////////////////////////////////////
        // Different texture types
    if (useBC1Texture)
    {
        GTX_TEXTURE_FILE = "textures/butterflies/BC1_UNORM-butterfly";
    }
    if (useRGBATexture)
    {
        GTX_TEXTURE_FILE = "textures/butterflies/R8_G8_B8_A8_SRGB-butterfly";
    }
    if (use128Texture)
    {
        GTX_TEXTURE_FILE = "textures/butterflies/R32_G32_B32_A32_FLOAT-butterfly";
    }
/////////////////////////////////////////////////////////////////////////////
        // Load shader binary to memory
    DEMOGfxLoadShadersFromFile(&myPipeline.shaders, 0, GSH_SHADER_FILE);

/////////////////////////////////////////////////////////////////////////////
    if (useTexture)
    {
        s_baseTexture.Initialize(GTX_TEXTURE_FILE);
        pTexture = &s_baseTexture;

        if (useBilinear)
        {
            // Set up the sampler object
            DEMOGfxInitSampler(&mySampler, &mySamplerSlot, nn::gfx::TextureAddressMode_ClampToBorder, nn::gfx::FilterMode_MinLinear_MagLinear_MipLinear, nn::gfx::ComparisonFunction_Always);
        }
        else
        {
            // Set up the sampler object
            DEMOGfxInitSampler(&mySampler, &mySamplerSlot, nn::gfx::TextureAddressMode_ClampToBorder, nn::gfx::FilterMode_MinPoint_MagPoint_MipPoint, nn::gfx::ComparisonFunction_Always);
        }
    }


/////////////////////////////////////////////////////////////////////////////
        // Set up all the attributes
    NUM_ATTRIB = 0;
    vertexSize = 0;

    SetAttribute("position", sizeof(VEC3), nn::gfx::AttributeFormat_32_32_32_Float);
#ifdef PACK_COLOR_WITH_TEXTURE
    if (!noColor && !solidColor && !useTexture)
    {
        SetAttribute("color", sizeof(COLOR4), nn::gfx::AttributeFormat_8_8_8_8_Unorm);
    }
#else
    if (!noColor && !solidColor)
    {
        SetAttribute("color", sizeof(COLOR4), nn::gfx::AttributeFormat_8_8_8_8_Unorm);
    }
#endif
    if (useNormal)
    {
        SetAttribute("normal", sizeof(VEC3), nn::gfx::AttributeFormat_32_32_32_Float);
    }
    if (useTexture)
    {
#ifdef PACK_COLOR_WITH_TEXTURE
        SetAttribute("utexture", sizeof(VEC3), nn::gfx::AttributeFormat_32_32_32_Uint);
#else
        SetAttribute("texture", sizeof(VEC2), nn::gfx::AttributeFormat_32_32_Float);
#endif
    }
    for (unsigned int i = 0; i < useArrayVec4; ++i)
    {
        sprintf(attribName, "vec4_%i", i);
        SetAttribute(attribName, sizeof(VEC4), nn::gfx::AttributeFormat_32_32_32_32_Float);
    }

    for (unsigned int i = 0; i < useArrayVec3; ++i)
    {
        sprintf(attribName, "vec3_%i", i);
        SetAttribute(attribName, sizeof(VEC3), nn::gfx::AttributeFormat_32_32_32_Float);
    }

    for (unsigned int i = 0; i < useArrayVec2; ++i)
    {
        sprintf(attribName, "vec2_%i", i);
        SetAttribute(attribName, sizeof(VEC2), nn::gfx::AttributeFormat_32_32_Float);
    }

    for (unsigned int i = 0; i < useArrayColor4; ++i)
    {
        sprintf(attribName, "color4_%i", i);
        SetAttribute(attribName, sizeof(COLOR4), nn::gfx::AttributeFormat_8_8_8_8_Unorm);
    }

    for (unsigned int i = 0; i < useArrayColor3; ++i)
    {
        sprintf(attribName, "color3_%i", i);
        DEMOAssert(0 && "nn::gfx::AttributeFormat_8_8_8_UNORM doesn't exist!  Can't use color3...\n");
        //SetAttribute(attribName, sizeof(COLOR3), nn::gfx::AttributeFormat_8_8_8_Unorm);
    }

    for (unsigned int i = 0; i < useArrayColor2; ++i)
    {
        sprintf(attribName, "color2_%i", i);
        SetAttribute(attribName, sizeof(COLOR2), nn::gfx::AttributeFormat_8_8_Unorm);
    }
    DEMOGfxInitShaderVertexBuffer(&myPipeline.shaders, 0, vertexSize /*STRIDE*/, 0);

/////////////////////////////////////////////////////////////////////////////
#if NN_GFX_IS_TARGET_GX
    perfInfo = (GX2PerfData*)DEMOGfxAllocMEM2(sizeof(GX2PerfData), GX2_DEFAULT_BUFFER_ALIGNMENT);
    MEMInitAllocatorForDefaultHeap(&s_perfAllocator);
    GX2PerfInit(perfInfo, 1, &s_perfAllocator);
    GX2PerfSetCollectionMethod(perfInfo, GX2_PERF_COLLECT_TAGS_ACCUMULATE);
    GX2PerfTagEnable(perfInfo, 0, GX2_ENABLE);
    GX2PerfMetricsClear(perfInfo);
#endif
#if NN_GFX_IS_TARGET_NVN
    perfBuffer.AllocFromPool(perfBufferPool, PERF_MEM_SIZE, NULL, nn::gfx::GpuAccess_UnorderedAccessBuffer, 0);
#endif
/////////////////////////////////////////////////////////////////////////////
    float TRIANGLE_SIZE;

    if (testMode == TEST_VERTS) {
      TRIANGLE_SIZE = 0.25;
    } else {
      TRIANGLE_SIZE = 4.0; // FIXME was 2.0;
    }

    //    NUM_VERTICES = (((((SURFACE_WIDTH + 1) / TRIANGLE_SIZE) * 2) + 2 + 2) * ((SURFACE_HEIGHT + 1) / TRIANGLE_SIZE)); // FIXME this calculation is too big; we fix it up later, but it's OK as long as it's an over-estimate
    NUM_VERTICES = static_cast< u32 >( (2 * (SURFACE_WIDTH / TRIANGLE_SIZE) + 4) * ((SURFACE_WIDTH / TRIANGLE_SIZE) + 2) );
    size_t vbsize = (vertexSize * NUM_VERTICES + 0x1ffff) & ~0x1ffff;
    pVB = (u8*)DEMOGfxAllocMEM2(vbsize, 0x20000);

/*
    0---2---4
    |  /|  /|
    | / | / |
    |/  |/  |
    1---3---5=a  move to next row

    to do that, add vertex a, creates degenerated triangle 4,5,a
    then add vertex b, creates degenerated triangle 5,a,b
    // in next horizontal loop
    then add vertex b, creates degenerated triangle a,b,b
    then add vertex c, creates degenerated triangle b,b,c
    then we have base for the next strip

    b--- ---
    |  /|  /|
    | / | / |
    |/  |/  |
    c--- ---
*/
        // Create the triangle strip
    float x, y;
    unsigned v = 0, c = 0;
    for (y = 0; y < SURFACE_HEIGHT; y += TRIANGLE_SIZE)
    {
        VEC3* pos;
        // vertex strip
        for (x = 0; x < SURFACE_WIDTH; x += TRIANGLE_SIZE)
        {
            pos = (VEC3*)&pVB[v];
            pos->x = (float)x;
            pos->y = (float)y;
            v += vertexSize;
            ++c;

            pos = (VEC3*)&pVB[v];
            pos->x = (float)x;
            pos->y = (float)(y + TRIANGLE_SIZE);
            v += vertexSize;
            ++c;
        }
        // last two vertices in the strip
        pos = (VEC3*)&pVB[v];
        pos->x = (float)x;
        pos->y = (float)y;
        v += vertexSize;
        ++c;

        pos = (VEC3*)&pVB[v];
        pos->x = (float)x;
        pos->y = (float)(y + TRIANGLE_SIZE);
        v += vertexSize;
        ++c;

        // repeat last one
        pos = (VEC3*)&pVB[v];
        pos->x = (float)x;
        pos->y = (float)(y + TRIANGLE_SIZE);
        v += vertexSize;
        ++c;

        // repeat vertex 1
        pos = (VEC3*)&pVB[v];
        pos->x = 0;
        pos->y = (float)(y + TRIANGLE_SIZE);
        v += vertexSize;
        ++c;
    }

    if (c > NUM_VERTICES) {
      OSReport("need %d vertices, have space for %d\n", c, NUM_VERTICES);
      ASSERT(c <= NUM_VERTICES);
    }
    NUM_VERTICES = c; // update with the real number of vertices
        // Fill the vertices with data
    for (u32 i = 0; i < c; i++)
    {
        u32 off = i * vertexSize;
        VEC3* pos = (VEC3*)&pVB[off];
        pos->x = ClampPos(-1.0f + (pos->x * 2 / SURFACE_WIDTH));
        pos->y = ClampPos(-1.0f + (pos->y * 2 / SURFACE_HEIGHT));
        pos->z = 0.25;

        off += sizeof(VEC3);
#ifdef PACK_COLOR_WITH_TEXTURE
        if (!noColor && !solidColor && !useTexture)
        {
            randcolor(*((COLOR4*)&pVB[off]));
            off += sizeof(COLOR4);
        }
#else
        if (!noColor && !solidColor)
        {
            randcolor(*((COLOR4*)&pVB[off]));
            off += sizeof(COLOR4);
        }
#endif
        if (useNormal)
        {
            randvector(*((VEC3*)&pVB[off]));
            off += sizeof(VEC3);
        }
#ifdef PACK_COLOR_WITH_TEXTURE
        if (useTexture)
        {
            VEC3* tex = (VEC3*)&pVB[off];
            tex->x = ClampTex(pos->x / 2 + 0.5f);
            tex->y = ClampTex(pos->y / 2 + 0.5f);
        COLOR4 *cptr = reinterpret_cast<COLOR4*>(&tex->z);
        randcolor(*cptr);
            off += sizeof(VEC3);
        }
#else
        if (useTexture)
        {
            VEC2* tex = (VEC2*)&pVB[off];
            tex->x = ClampTex(pos->x / 2 + 0.5f);
            tex->y = ClampTex(pos->y / 2 + 0.5f);
            off += sizeof(VEC2);
        }
#endif
        for (unsigned int j = 0; j < useArrayVec4; ++j)
        {
            VEC4* arr = (VEC4*)&pVB[off];
            randvector(*arr);
            arr->x /= useArrayVec4;
            arr->y /= useArrayVec4;
            arr->z /= useArrayVec4;
            arr->w /= useArrayVec4;
            off += sizeof(VEC4);
        }

        for (unsigned int j = 0; j < useArrayVec3; ++j)
        {
            VEC3* arr = (VEC3*)&pVB[off];
            randvector(*arr);
            arr->x /= useArrayVec3;
            arr->y /= useArrayVec3;
            arr->z /= useArrayVec3;
            off += sizeof(VEC3);
        }

        for (unsigned int j = 0; j < useArrayVec2; ++j)
        {
            VEC2* arr = (VEC2*)&pVB[off];
            randvector(*arr);
            arr->x /= useArrayVec2;
            arr->y /= useArrayVec2;
            off += sizeof(VEC2);
        }

        for (unsigned int j = 0; j < useArrayColor4; ++j)
        {
            COLOR4* arr = (COLOR4*)&pVB[off];
            randcolor(*arr);
            arr->r /= (u8)useArrayColor4;
            arr->g /= (u8)useArrayColor4;
            arr->b /= (u8)useArrayColor4;
            arr->a /= (u8)useArrayColor4;
            off += sizeof(COLOR4);
        }

        for (unsigned int j = 0; j < useArrayColor3; ++j)
        {
            COLOR3* arr = (COLOR3*)&pVB[off];
            randcolor(*arr);
            arr->r /= (u8)useArrayColor3;
            arr->g /= (u8)useArrayColor3;
            arr->b /= (u8)useArrayColor3;
            off += sizeof(COLOR3);
        }

        for (unsigned int j = 0; j < useArrayColor2; ++j)
        {
            COLOR2* arr = (COLOR2*)&pVB[off];
            randcolor(*arr);
            arr->u /= (u8)useArrayColor2;
            arr->v /= (u8)useArrayColor2;
            off += sizeof(COLOR2);
        }
    }

/////////////////////////////////////////////////////////////////////////////
#ifdef FIXME
    attribBuffer.Initialize(vertexSize*NUM_VERTICES, pVB, nn::gfx::GpuAccess_VertexBuffer, 0);
#else
    attribPool = DEMOGfxMemPool::CreateFromMemory(pVB, vbsize, nn::gfx::MemoryPoolProperty_CpuInvisible | nn::gfx::MemoryPoolProperty_GpuCached);
    attribBuffer.CreateFromPool(attribPool, vertexSize*NUM_VERTICES, nn::gfx::GpuAccess_VertexBuffer);
#endif
        // Reset all perf counters
    totalTime = 0;
    totalBusyTime = 0;
    totalVerts = 0;
    totalPixels = 0;
    totalTexels = 0;
    totalTexelMem = 0;
    totalPrims = 0;
    totalDrawTime = 0;
    totalOps = 0;

    minTime = (u64)(-1);
    minDrawTime = (u64)(-1);

    maxTime = 0;
    maxDrawTime = 0;

    cpuBandwidthMem = 0;
    cpuBandwidthTime = 0;

    myPipeline.depthStencilStateInfo.SetDepthTestEnabled(useDepthRead);
    myPipeline.depthStencilStateInfo.SetDepthWriteEnabled(useDepthWrite);
    myPipeline.depthStencilStateInfo.SetDepthComparisonFunction(useDepthRead ? nn::gfx::ComparisonFunction_LessEqual : nn::gfx::ComparisonFunction_Always);
    myPipeline.blendTargetStateCount = 1;
    myPipeline.blendTargetStateInfoArray[0].SetDefault();
    if (useBlend)
    {
        myPipeline.blendTargetStateInfoArray[0].SetBlendEnabled(true);
        myPipeline.blendTargetStateInfoArray[0].SetColorBlendFunction(nn::gfx::BlendFunction_Add);
        myPipeline.blendTargetStateInfoArray[0].SetSourceColorBlendFactor(nn::gfx::BlendFactor_SourceAlpha);
        myPipeline.blendTargetStateInfoArray[0].SetDestinationColorBlendFactor(nn::gfx::BlendFactor_OneMinusSourceAlpha);
        myPipeline.blendTargetStateInfoArray[0].SetSourceAlphaBlendFactor(nn::gfx::BlendFactor_SourceAlpha);
        myPipeline.blendTargetStateInfoArray[0].SetDestinationAlphaBlendFactor(nn::gfx::BlendFactor_OneMinusSourceAlpha);
        myPipeline.blendTargetStateInfoArray[0].SetAlphaBlendFunction(nn::gfx::BlendFunction_Add);
    }
    if (noColor)
    {
        myPipeline.rasterizerStateInfo.SetRasterEnabled(false);
    }
    myPipeline.colorTargetStateCount = 1;
    myPipeline.colorTargetStateInfoArray[0].SetDefault();
    myPipeline.colorTargetStateInfoArray[0].SetFormat(DEMOColorBufferInfo.GetImageFormat());

    myPipeline.Initialize(&DEMODevice);
    return 1;
} // NOLINT(impl/function_size)

static void PrintInfo()
{
    MetricResult time, busytime, verts, pixels, texels, texelMem, prims;
    memset(&time, 0, sizeof(MetricResult));
    memset(&busytime, 0, sizeof(MetricResult));
    memset(&verts, 0, sizeof(MetricResult));
    memset(&pixels, 0, sizeof(MetricResult));
    memset(&texels, 0, sizeof(MetricResult));
    memset(&texelMem, 0, sizeof(MetricResult));
    memset(&prims, 0, sizeof(MetricResult));
#if NN_GFX_IS_TARGET_GX
    GX2PerfGetResultByFrame(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_TIME, &time);
    GX2PerfGetResultByFrame(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_GPU_TIME, &busytime);
    GX2PerfGetResultByFrame(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_VS_VERTICES_IN, &verts);
    GX2PerfGetResultByFrame(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_PS_PIXELS_IN, &pixels);
    GX2PerfGetResultByFrame(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_TEXEL_FETCH_COUNT, &texels);
    GX2PerfGetResultByFrame(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_TEX_MEM_BYTES_READ, &texelMem);
    GX2PerfGetResultByFrame(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_PRIMITIVES_IN, &prims);
#endif
#if NN_GFX_IS_TARGET_NVN
    MetricResult* pResults;
    static const int OS_MICROSECONDS = 1000000;

    pResults = perfBuffer.Map<MetricResult>();
    verts = pResults[NVN_COUNTER_TYPE_VERTEX_SHADER_INVOCATIONS];
    texels = pResults[NVN_COUNTER_TYPE_SAMPLES_PASSED];
    prims = pResults[NVN_COUNTER_TYPE_INPUT_PRIMITIVES];
    pixels = pResults[NVN_COUNTER_TYPE_FRAGMENT_SHADER_INVOCATIONS];
    time.u64Result = pResults[NVN_COUNTER_TYPE_SAMPLES_PASSED].timeStamp - pResults[NVN_COUNTER_TYPE_TIMESTAMP].timeStamp;
    perfBuffer.Unmap();
#endif
    if (noColor)
    {
        pixels.u64Result = 1280 * 720 * NUM_REPEATS;
    }
    u64 ops = 0;

    if (testMode == TEST_FMAD)
    {
            // muladd * 5ways * loop * pixels
        ops = 2 * 5 * 1000 * pixels.u64Result;
    }

    if (s_ticks) // dont count things on first frame
    {
        // Set Demo Font String
        DEMOFontPrintf(2, 1, "%s %u", testName, s_ticks);

        DEMOFontPrintf(2, 7, "time %u busy time %u verts %u pixels %u texels %u", (u32)time.u64Result / 1000, (u32)busytime.u64Result / 1000, (u32)verts.u64Result, (u32)pixels.u64Result, (u32)texels.u64Result);
        DEMOFontPrintf(2, 8, "gpu verts/s %u", (u32)(((float)verts.u64Result * sclk) / (float)(time.u64Result)));
        DEMOFontPrintf(2, 9, "gpu pixels/s %llu", (u64)(((float)pixels.u64Result * sclk) / (float)(time.u64Result)));
        DEMOFontPrintf(2, 10, "gpu texels/s %llu", (u64)(((float)texels.u64Result * sclk) / (float)(time.u64Result)));
        DEMOFontPrintf(2, 11, "gpu texel B/s %llu", (u64)(((float)texelMem.u64Result * sclk) / (float)(time.u64Result)));
        DEMOFontPrintf(2, 12, "gpu prims/s %u", (u32)(((float)prims.u64Result * sclk) / (float)(time.u64Result)));
        DEMOFontPrintf(2, 13, "cpu verts/s %u", (u32)(((float)verts.u64Result * OS_MICROSECONDS) / (float)OSTicksToMicroseconds(uDrawTime)));
        DEMOFontPrintf(2, 14, "gpu flops %llu", (u64)(((float)ops * sclk) / (float)(time.u64Result)));

            // Since the first few frames might be more volitale, skip them for perf
        if (skipFrames <= 0)
        {
            totalTime += time.u64Result;
            totalBusyTime += busytime.u64Result;
            totalVerts += verts.u64Result;
            totalPixels += pixels.u64Result;
            totalTexels += texels.u64Result;
            totalTexelMem += texelMem.u64Result;
            totalPrims += prims.u64Result;
            totalDrawTime += uDrawTime;
            totalOps += ops;

            if (minTime > time.u64Result) minTime = time.u64Result;
            if (minDrawTime > uDrawTime) minDrawTime = uDrawTime;

            if (maxTime < time.u64Result) maxTime = time.u64Result;
            if (maxDrawTime < uDrawTime) maxDrawTime = uDrawTime;
        }
        else
        {
            --skipFrames;
        }
    }
    else
    {
        //Set lastPrintTime to now
        lastPrintTime = OSGetTime();
    }

        // If we're not in the test suite mode, print statistics periodically
    if (testFrames < 0 && ((f32)OSTicksToMilliseconds(OSGetTime() - lastPrintTime) / 1000.0f ) >= 1.0f)
    {
        //Set lastPrintTime to now
        lastPrintTime = OSGetTime();

        OSReport("vertexRate %u\n", s_ticks);
        OSReport("time:      %llu ms\nbusy time: %llu ms\nverts:     %llu\npixels:    %llu\ntexels:    %llu\nmin:       %u ms\navg:       %u ms\nmax:       %u ms\n", totalTime / 1000, totalBusyTime / 1000, totalVerts, totalPixels, totalTexels, (u32)minTime / 1000, (u32)(totalTime / s_ticks / 1000), (u32)maxTime / 1000);
        OSReport("gpu verts/s  %u < %u < %u\n", (u32)(((float)verts.u64Result * sclk) / maxTime), (u32)(((float)totalVerts * sclk) / totalTime), (u32)(((float)verts.u64Result * sclk) / minTime));
        OSReport("gpu pixels/s %llu < %llu < %llu\n", (u64)(((float)pixels.u64Result * sclk) / maxTime), (u64)(((float)totalPixels * sclk) / totalTime), (u64)(((float)pixels.u64Result * sclk) / minTime));
        OSReport("gpu texels/s  %llu < %llu < %llu\n", (u64)(((float)texels.u64Result * sclk) / maxTime), (u64)(((float)totalTexels * sclk) / totalTime), (u64)(((float)texels.u64Result * sclk) / minTime));
        OSReport("gpu texel B/s  %llu < %llu < %llu\n", (u64)(((float)texelMem.u64Result * sclk) / maxTime), (u64)(((float)totalTexelMem * sclk) / totalTime), (u64)(((float)texelMem.u64Result * sclk) / minTime));
        OSReport("gpu prims/s  %u < %u < %u\n", (u32)(((float)prims.u64Result * sclk) / maxTime), (u32)(((float)totalPrims * sclk) / totalTime), (u32)(((float)prims.u64Result * sclk) / minTime));
        OSReport("cpu verts/s  %u < %u < %u\n", (u32)(((float)NUM_REPEATS * NUM_VERTICES * OS_MICROSECONDS) / maxDrawTime), (u32)(((float)NUM_REPEATS * NUM_VERTICES * OS_MICROSECONDS * s_ticks) / totalDrawTime), (u32)(((float)NUM_REPEATS * NUM_VERTICES * OS_MICROSECONDS) / minDrawTime));
        OSReport("gpu flops  %llu < %llu < %llu\n", (u64)(((float)ops * sclk) / maxTime), (u64)(((float)totalOps * sclk) / totalTime), (u64)(((float)ops * sclk) / minTime));

        OSReport("\n");
    }
}

static void StartPerfMetricCapture()
{
#if NN_GFX_IS_TARGET_GX
    GX2Flush();
    GX2PerfMetricsClear(perfInfo);
    GX2PerfMetricEnable(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_TIME);
    GX2PerfMetricEnable(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_GPU_TIME);
    GX2PerfMetricEnable(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_VS_VERTICES_IN);
    GX2PerfMetricEnable(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_PS_PIXELS_IN);
    GX2PerfMetricEnable(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_TEXEL_FETCH_COUNT);
    GX2PerfMetricEnable(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_TEX_MEM_BYTES_READ);
    GX2PerfMetricEnable(perfInfo, GX2_PERF_TYPE_GPU_METRIC, GX2_PERF_U64_PRIMITIVES_IN);
    GX2PerfFrameStart(perfInfo);
    ASSERT(GX2PerfGetNumPasses(perfInfo) == 1 && "Perf requires multiple passes!");
    GX2PerfPassStart(perfInfo);
    GX2PerfTagStart(perfInfo, 0);
#endif
#if NN_GFX_IS_TARGET_NVN
    NVNcommandBuffer* nvnbuf;
    NVNbufferAddress bufferAddr;

    bufferAddr = perfBuffer.gpuAddress.ToData()->value;

    nvnbuf = DEMOCommandBuffer.ToData()->pNvnCommandBuffer;

    nvnCommandBufferSetTiledCacheAction(nvnbuf, tcOnOff);

    for (int i = (int)NVN_COUNTER_TYPE_SAMPLES_PASSED; i <= (int)NVN_COUNTER_TYPE_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN; i++)
    {
        NVNcounterType ci = (NVNcounterType)i;
        nvnCommandBufferResetCounter(nvnbuf, ci);
    }

    nvnCommandBufferReportCounter(nvnbuf, NVN_COUNTER_TYPE_TIMESTAMP, PerfAddr(bufferAddr, NVN_COUNTER_TYPE_TIMESTAMP));
#endif
}

static void EndPerfMetricCapture()
{
#if NN_GFX_IS_TARGET_GX
    GX2PerfTagEnd(perfInfo, 0);
    GX2PerfPassEnd(perfInfo);
    GX2PerfFrameEnd(perfInfo);
#endif
#if NN_GFX_IS_TARGET_NVN
    NVNcommandBuffer* nvnbuf;
    NVNbufferAddress bufferAddr;
    nvnbuf = DEMOCommandBuffer.ToData()->pNvnCommandBuffer;
    bufferAddr = perfBuffer.gpuAddress.ToData()->value;

    for (int i = (int)NVN_COUNTER_TYPE_SAMPLES_PASSED; i <= (int)NVN_COUNTER_TYPE_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN; i++)
    {
        NVNcounterType ci = (NVNcounterType)i;
        nvnCommandBufferReportCounter(nvnbuf, ci, PerfAddr(bufferAddr, ci));
    }

    DEMOCommandBuffer.FlushMemory(nn::gfx::GpuAccess_UnorderedAccessBuffer);
#endif
}

// The draw function for the rendering portions of this app
static int SceneDraw()
{
    DEMOGfxBeforeRender();

    nn::gfx::ColorTargetView* pCurrentScanBuffer = DEMOGetColorBufferView();

    // Clear buffers
    DEMOCommandBuffer.ClearColor( pCurrentScanBuffer, 0.4f, 0.1f, 0.1f, 1.0f, NULL);
    DEMOCommandBuffer.ClearDepthStencil(&DEMODepthBufferView, 1.0, 0, nn::gfx::DepthStencilClearMode_DepthStencil, NULL);

    // Restore context
    DEMOCommandBuffer.SetPipeline(&myPipeline.pipeline);
    DEMOCommandBuffer.SetViewportScissorState(&myViewport);

    // set up the color/depth buffers
    const nn::gfx::ColorTargetView* renderTargets[1];
    renderTargets[0] = pCurrentScanBuffer;
    DEMOCommandBuffer.SetRenderTargets(1, renderTargets, &DEMODepthBufferView);


    // Set Texture
    DEMOCommandBuffer.SetVertexBuffer(0, attribBuffer.gpuAddress, vertexSize, attribBuffer.size);

    if (useTexture)
    {
        DEMOCommandBuffer.SetDescriptorPool(&DEMOTextureDescriptorPool);
        DEMOCommandBuffer.SetDescriptorPool(&DEMOSamplerDescriptorPool);
        DEMOCommandBuffer.SetTextureAndSampler(samplerLoc, nn::gfx::ShaderStage_Pixel, pTexture->GetDescriptorSlot(0), mySamplerSlot);
    }

    if (testMode == TEST_BANDWIDTH_CPU_READ_ONLY)
    {
            // Allocate 4 MB of memory and read it
        const int MEM_SIZE = 4 * 1024 * 1024;
        u32* mem = (u32*)DEMOGfxAllocMEM1(MEM_SIZE, 1024);
        u32 total = 0;
        cpuBandwidthMem += MEM_SIZE;

        OSTime start = OSGetTime();

        for (int i = 0; i < MEM_SIZE / sizeof(u32); ++i)
            total += mem[i];

        cpuBandwidthTime += OSGetTime() - start;

        DEMOGfxFreeMEM1(mem);
    }
    else if (testMode == TEST_BANDWIDTH_CPU_WRITE_ONLY)
    {
            // Allocate 4 MB of memory and write it
        const int MEM_SIZE = 4 * 1024 * 1024;
        u8* mem = (u8*)DEMOGfxAllocMEM1(MEM_SIZE, 1024);
        cpuBandwidthMem += MEM_SIZE;

        OSTime start = OSGetTime();

        memset(mem, 0, MEM_SIZE);

        cpuBandwidthTime += OSGetTime() - start;

        DEMOGfxFreeMEM1(mem);
    }
    else if (testMode == TEST_BANDWIDTH_CPU_READ_WRITE)
    {
            // Allocate 2 MB of memory and copy it to another 2 MB of memory
        const int MEM_SIZE = 2 * 1024 * 1024;
        u8* memA = (u8*)DEMOGfxAllocMEM1(MEM_SIZE, 1024);
        u8* memB = (u8*)DEMOGfxAllocMEM1(MEM_SIZE, 1024);
        cpuBandwidthMem += MEM_SIZE * 2;

        OSTime start = OSGetTime();

        memcpy(memA, memB, MEM_SIZE);

        cpuBandwidthTime += OSGetTime() - start;

        DEMOGfxFreeMEM1(memA);
        DEMOGfxFreeMEM1(memB);
    }

    // Get the start count
    StartPerfMetricCapture();
    uDrawTime = OSGetTime();
    unsigned int i;

    if (testMode == TEST_CLEAR)
    {
        for (i = 0; i < NUM_REPEATS; i++)
        {
            float x = (float)(i + 1) / (float)NUM_REPEATS;
            if (!noColor)
            {
                DEMOCommandBuffer.ClearColor(pCurrentScanBuffer, 0.4f, x, 0.1f, 1.0f, NULL);
            }
            if (useDepthWrite)
            {
                DEMOCommandBuffer.ClearDepthStencil(&DEMODepthBufferView, x, 0, nn::gfx::DepthStencilClearMode_DepthStencil, NULL);
            }
        }
    }
    else
    {
        for (i = 0; i < NUM_REPEATS; i++)
        {
            DEMOCommandBuffer.Draw(nn::gfx::PrimitiveTopology_TriangleStrip, NUM_VERTICES, 0);
        }
    }
    EndPerfMetricCapture();
    DEMOCommandBuffer.End();
    DEMOQueue.ExecuteCommand(&DEMOCommandBuffer, &myFence);
    DEMOQueue.Flush();
    myFence.Sync(nn::TimeSpan::FromSeconds(1));
    uDrawTime = OSGetTime() - uDrawTime;

    // Draw Information
    DEMOCommandBuffer.Begin();
    PrintInfo();
    DEMOGfxDoneRender();

    s_ticks++;

    return 1; // 0 makes it exit
}

#define GET_INT_ARG(str, var)                       \
    {                                               \
        p = strstr(argv[i], str);                   \
        if (p != 0)                                 \
                {                                           \
            var = atoi(p + strlen(str));            \
            OSReport("%s%i\n", str, var);           \
                }                                           \
    }

#define GET_BOOL_ARG(str, var)                      \
    {                                               \
        p = strstr(argv[i], str);                   \
        if (p != 0)                                 \
                {                                           \
            var = true;                             \
            OSReport("%s is true\n", #var);         \
                }                                           \
    }

#define GET_STR_ARG(str, var, max_len)              \
    {                                               \
        p = strstr(argv[i], str);                   \
        if (p != 0)                                 \
        {                                           \
            var[max_len - 2] = 0;                   \
            strncpy(var, p + strlen(str), max_len - 2);   \
            char* p2 = var;                         \
            for( ; *p2 != '\n' && *p2 != '\r' && *p2 != ',' && *p2 != 0; p2++ ) \
            {                                       \
                if (*p2 == '[') { *p2 = '('; }      \
                if (*p2 == ']') { *p2 = ')'; }      \
                if (*p2 == '*') { *p2 = '+'; }      \
                if (*p2 == '_') { *p2 = ' '; }      \
            }                                       \
            *p2 = 0;                                \
            OSReport("%s%s\n", str, var);           \
        }                                           \
    }

#define GET_LIST_ARG(str, var)                      \
    {                                               \
        p = strstr(argv[i], str);                   \
        if (p != 0)                                 \
        {                                           \
            u32& temp = var;                        \
            const char* tempStr = str;              \
            if (0) {

#define ADD_LIST_ARG(val)                           \
        } else if (strncmp(p + strlen(tempStr), #val, strlen(#val)) == 0) {   \
                temp = val;                         \
                OSReport("%s%s\n", tempStr, #val);

#define END_LIST_ARG()                              \
            } else DEMOPrintf("Unrecognized argument: %s\n", p);    \
        }                                           \
    }

static void ResetVariables()
{
    SURFACE_WIDTH = 1;
    SURFACE_HEIGHT = 1;

    NUM_REPEATS = 1;

    noColor = false;
    solidColor = false;
    useBlend = false;
    useDepthRead = false;
    useDepthWrite = false;
    useBC1Texture = false;
    useRGBATexture = false;
    use128Texture = false;
    useBilinear = false;

    useGS = false;
    useAll = false;
    useNormal = false;
    useTexture = false;
    useArrayVec4 = 0;
    useArrayVec3 = 0;
    useArrayVec2 = 0;
    useArrayColor4 = 0;
    useArrayColor3 = 0;
    useArrayColor2 = 0;

    testName[0] = 0;
    testDesc[0] = 0;
    skipFrames = 30;
    testFrames = 100;
}

static void ProcessArgs(int argc, char** argv)
{
    // Defaults for everything test related
    ResetVariables();

    // Analyze arguments
    // Note that all arguments might be in a single string!
    for (int i = 0; i < argc; ++i)
    {
        OSReport("Got Argument: \"%s\"\n", argv[i]);

        char* p;

        GET_LIST_ARG("TEST_MODE=", testMode);
        ADD_LIST_ARG(TEST_VERTS);
        ADD_LIST_ARG(TEST_PIXELS);
        ADD_LIST_ARG(TEST_TEXELS);
        ADD_LIST_ARG(TEST_FMAD);
        ADD_LIST_ARG(TEST_BANDWIDTH_READ_ONLY);
        ADD_LIST_ARG(TEST_BANDWIDTH_WRITE_ONLY);
        ADD_LIST_ARG(TEST_BANDWIDTH_READ_WRITE);
        ADD_LIST_ARG(TEST_BANDWIDTH_TEXTURE);
        ADD_LIST_ARG(TEST_BANDWIDTH_CPU_READ_ONLY);
        ADD_LIST_ARG(TEST_BANDWIDTH_CPU_WRITE_ONLY);
        ADD_LIST_ARG(TEST_BANDWIDTH_CPU_READ_WRITE);
        ADD_LIST_ARG(TEST_CLEAR);
        END_LIST_ARG();

        GET_BOOL_ARG("NO_COLOR", noColor);
        GET_BOOL_ARG("SOLID_COLOR", solidColor);
        GET_BOOL_ARG("USE_BLEND", useBlend);
        GET_BOOL_ARG("USE_DEPTH_READ", useDepthRead);
        GET_BOOL_ARG("USE_DEPTH_WRITE", useDepthWrite);
        GET_BOOL_ARG("USE_BC1_TEXTURE", useBC1Texture);
        GET_BOOL_ARG("USE_RGBA_TEXTURE", useRGBATexture);
        GET_BOOL_ARG("USE_128_TEXTURE", use128Texture);
        GET_BOOL_ARG("USE_BILINEAR", useBilinear);

        GET_BOOL_ARG("USE_GS", useGS);
        GET_BOOL_ARG("USE_ALL", useAll);
        GET_BOOL_ARG("USE_NORMAL", useNormal);
        GET_BOOL_ARG("USE_TEXTURE", useTexture);
        GET_INT_ARG("USE_ARRAY_VEC4=", useArrayVec4);
        GET_INT_ARG("USE_ARRAY_VEC3=", useArrayVec3);
        GET_INT_ARG("USE_ARRAY_VEC2=", useArrayVec2);
        GET_INT_ARG("USE_ARRAY_COLOR4=", useArrayColor4);
        GET_INT_ARG("USE_ARRAY_COLOR3=", useArrayColor3);
        GET_INT_ARG("USE_ARRAY_COLOR2=", useArrayColor2);

        GET_STR_ARG("TEST_NAME=", testName, MAX_NAME_LEN);
        GET_STR_ARG("TEST_DESC=", testDesc, MAX_DESC_LEN);
        GET_INT_ARG("TEST_FRAMES=", testFrames);
    }

#undef GET_INT_ARG
#undef GET_BOOL_ARG
#undef GET_STR_ARG
#undef GET_LIST_ARG
#undef ADD_LIST_ARG
#undef END_LIST_ARG

    switch (testMode)
    {
    case TEST_VERTS:
        //SURFACE_WIDTH = 1280;
        //SURFACE_HEIGHT = 720;
        SURFACE_WIDTH = 256;
        SURFACE_HEIGHT = 256;
        NUM_REPEATS = 10;
        break;
    case TEST_PIXELS:
    case TEST_TEXELS:
        SURFACE_WIDTH = 1;
        SURFACE_HEIGHT = 1;

        NUM_REPEATS = 100;
        break;
    case TEST_BANDWIDTH_READ_ONLY:
    case TEST_BANDWIDTH_WRITE_ONLY:
    case TEST_BANDWIDTH_READ_WRITE:
    case TEST_BANDWIDTH_TEXTURE:
        SURFACE_WIDTH = 1;
        SURFACE_HEIGHT = 1;

        NUM_REPEATS = 50;
        break;
    case TEST_FMAD:
        SURFACE_WIDTH = 1;
        SURFACE_HEIGHT = 1;

        NUM_REPEATS = 1;
        break;
    case TEST_BANDWIDTH_CPU_READ_ONLY:
    case TEST_BANDWIDTH_CPU_WRITE_ONLY:
    case TEST_BANDWIDTH_CPU_READ_WRITE:
        SURFACE_WIDTH = 1;
        SURFACE_HEIGHT = 1;

        NUM_REPEATS = 0;
        break;
    case TEST_CLEAR:
        SURFACE_WIDTH = 1;
        SURFACE_HEIGHT = 1;

        NUM_REPEATS = 50;
        break;
    default:
        DEMOAssert(0 && "Invalid test mode!\n");
    }

    if (useTexture)
    {
        useRGBATexture = true;
    }
    if (useBC1Texture || useRGBATexture || use128Texture)
    {
        useTexture = true;
    }
    if (useTexture)
    {
        useNormal = true;
    }
    if (!testName[0])
    {
        testFrames = -1;
    }
}

#ifdef WIN32
static DEMOFSFileInfo s_demoFSInfo;
#endif

static void InitializeFile()
{
}

static void FinalizeFile()
{
#ifdef NEVER
    // Dump resulting data to file
    // This is broken now, so we rely on the console output
    // the code is left in case we ever need to go back to file output
    if (DEMOFSOpenFileMode(PERF_RESULT_FILE, &s_demoFSInfo, "w") != DEMO_FS_RESULT_OK)
    {
        DEMOAssert(0 && "Failed to initialize file");
    }
    u32 size = strlen(writeBuffer);
    DEMOFSWrite(&s_demoFSInfo, (void*)writeBuffer, size);
    DEMOFSCloseFile(&s_demoFSInfo);
#endif
}

static void SaveResults()
{
    const char* unit = "";
    float value = 0;

    switch (testMode)
    {
    case TEST_VERTS:
        unit = "Mprims/sec";
        value = ((float)totalPrims * sclk) / totalTime / 1000 / 1000;
        break;
    case TEST_PIXELS:
    case TEST_CLEAR:
    default:
        unit = "Gpix/sec";
        value = ((float)totalPixels * sclk) / totalTime / 1000 / 1000 / 1000;
        break;
    case TEST_TEXELS:
        unit = "Gtex/sec";
        value = ((float)totalTexels * sclk) / totalTime / 1000 / 1000 / 1000;
        break;
    case TEST_FMAD:
        unit = "GFMADs";
        value = ((float)totalOps * sclk) / totalTime / 1000 / 1000 / 1000 / 2;
        break;
    case TEST_BANDWIDTH_READ_ONLY:
        unit = "GB/sec";
        value = ((float)totalPixels * sclk * bitsPerPixelRead) / totalTime / 1024 / 1024 / 1024;
        break;
    case TEST_BANDWIDTH_WRITE_ONLY:
        unit = "GB/sec";
        value = ((float)totalPixels * sclk * bitsPerPixelWrite) / totalTime / 1024 / 1024 / 1024;
        break;
    case TEST_BANDWIDTH_READ_WRITE:
        unit = "GB/sec";
        value = ((float)totalPixels * sclk * (bitsPerPixelRead + bitsPerPixelWrite)) / totalTime / 1024 / 1024 / 1024;
        break;
    case TEST_BANDWIDTH_TEXTURE:
        unit = "GB/sec";
        value = ((float)totalTexelMem * sclk) / totalTime / 1024 / 1024 / 1024;
        break;
    case TEST_BANDWIDTH_CPU_READ_ONLY:
    case TEST_BANDWIDTH_CPU_WRITE_ONLY:
    case TEST_BANDWIDTH_CPU_READ_WRITE:
        unit = "GB/sec";
        value = ((float)cpuBandwidthMem * 1000000) / OSTicksToMicroseconds(cpuBandwidthTime) / 1024 / 1024 / 1024;
        break;
    }

        // Assuming the number is from 0~999, we want 3 significant digits
    char strValue[10];
    if (value >= 100.0f)
        sprintf(strValue, "%.0f", value);
    else if (value >= 10.0f)
        sprintf(strValue, "%.1f", value);
    else
        sprintf(strValue, "%.2f", value);

    OSReport("  Saving |%s|%s| %s|%s|\n", testName, testDesc, strValue, unit);

    // Dump resulting data to file
    {
        char tempBuffer[1024];
        (void)sprintf(tempBuffer, "%s,%s,%s,%s\r\n", testName, testDesc, strValue, unit);
        // append to our output data
        strcat(writeBuffer, tempBuffer);
#if defined(WIN32) || defined(CAFE)
        printf("%s", writeBuffer);
#else
        NN_LOG("%s", writeBuffer);
#endif
    }
}

static void RunTest(int argc, char** argv)
{
    ProcessArgs(argc, argv);

    DEMOTestInit(argc, argv);
    SceneInit();
    while (DEMOIsRunning())
    {
        SceneDraw();

        if (testFrames > 0 && skipFrames == 0)
        {
            --testFrames;
        }
        if (testFrames == 0)
        {
            SaveResults();
            break;
        }
    }

    // Free various resources
#if NN_GFX_IS_TARGET_GX
    GX2PerfFree(perfInfo);
    DEMOGfxFreeMEM2(perfInfo);
#endif
#if NN_GFX_IS_TARGET_NVN
    perfBuffer.Finalize();
    perfBufferPool->Reset();
#endif
    // Free shaders
    myPipeline.Finalize(&DEMODevice);
    myFence.Finalize(&DEMODevice);
    myViewport.Finalize(&DEMODevice);
    if (myViewportMem)
    {
        DEMOFree(myViewportMem);
        myViewportMem = NULL;
    }
    if (useTexture)
    {
        s_baseTexture.Finalize();
        mySampler.Finalize(&DEMODevice);

    }
    attribBuffer.Finalize(&DEMODevice);
    DEMOTestShutdown();
}

static int GetArgc(char** argv)
{
    int argc = 0;
    for (int i = 0; i < MAX_ARGS; ++i)
    {
        if (!argv[i])
            break;

        ++argc;
    }

    return argc;
}

extern BOOL gDemoGfxForceMEM1;

//extern "C" void TEST_MAIN()
TEST(GfxPerfTestSuite, Run)
{
    int argc = nnt::GetHostArgc();
    char**argv = nnt::GetHostArgv();
    bool bRunDefault = argc > 1;

#ifdef HORIZON_SETCLOCK
    // Select Console Mode
    char** newArgv = reinterpret_cast< char** >( malloc( sizeof(char*) * ( argc + 1 ) ) );
    char perfString[] = "DEMO_PERF_CONFIG=1";
    for ( int i = 0; i < argc; i++ )
    {
        newArgv[ i ] = argv[ i ];
    }
    newArgv[ argc ] = perfString;
    argv = newArgv;
    argc++;

    // Only run the default if there were no other arguments
    bRunDefault = argc > 2;
#endif

    gDemoGfxForceMEM1 = TRUE;

    DEMOInit();

    writeBuffer = static_cast<char *>(DEMOAllocEx(MAX_WRITEBUFFER_SIZE, PPC_IO_BUFFER_ALIGN));
    memset(writeBuffer, 0, MAX_WRITEBUFFER_SIZE);

    DEMOGfxInit(argc, argv);
    DEMOFontInit();

#ifdef HORIZON_SETCLOCK
    free( newArgv );
#endif

#if NN_GFX_IS_TARGET_NVN
    perfBufferPool = DEMOGfxMemPool::AllocNewPool(PERF_MEM_SIZE, PERF_ALIGN, nn::gfx::MemoryPoolProperty_CpuUncached | nn::gfx::MemoryPoolProperty_GpuCached);
#endif
    InitializeFile();

    // Default runs the suite, otherwise do whatever the arguments say
    if ( bRunDefault )
    {
        RunTest(argc, argv);
    }
    else
    {
        for (int i = 0; i < TEST_COUNT; ++i)
        {
            int newArgc = GetArgc(const_cast< char** >( testList[i] ));

            OSTime start = OSGetTime();
            RunTest(newArgc, const_cast< char** >( testList[i] ));
            OSReport("Test took %i ms\n", (int)OSTicksToMilliseconds(OSGetTime() - start));
        }
    }

    FinalizeFile();

    DEMOFontShutdown();
    DEMOGfxShutdown();
    DEMOShutdown();

    SUCCEED();
}
