﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <g3ddemo_GfxUtility.h>

#include <cstdarg>
#include <cctype>
#include <algorithm>
#include <nw/g3d/ut/g3d_Inlines.h>
#include <nw/g3d/fnd/g3d_GfxManage.h>
#include <nw/g3d/fnd/g3d_GLUtility.h>
#include <nw/g3d/res/g3d_ResFile.h>
#include <nw/g3d/res/g3d_ResShader.h>

#include <stdio.h>

#ifdef _WIN32
#include <intrin.h>
#endif

namespace nw { namespace g3d { namespace demo {

//--------------------------------------------------------------------------------------------------
// Graphics

void ColorBufferTexture::Setup()
{
    texture.CalcSize();
    texture.Setup();
    nw::g3d::DebugPrint("(ColorBuffer) base size: %d, mip size: %d, alignment: %d\n",
        texture.GetBaseSize(), texture.GetMipSize(), texture.GetAlignment());

    renderBuffer.SetTexture(&texture);
    if (isFTV)
    {
        renderBuffer.UpdateRegsFTV();
    }
    else
    {
        renderBuffer.UpdateRegs();
    }
    renderBuffer.CalcAuxSize();
    nw::g3d::DebugPrint("(AuxBuffer) aux size: %d, alignment: %d\n",
        renderBuffer.GetAuxSize(), renderBuffer.GetAuxAlignment());
}

void ColorBufferTexture::Cleanup()
{
    texture.Cleanup();
}

void ColorBufferTexture::Alloc(FuncAlloc funcAlloc, bit32 bufferType /*= ALL_BUFFER*/)
{
    NW_G3D_ASSERT_NOT_NULL(funcAlloc);

    if (bufferType & COLOR_BUFFER)
    {
        size_t size = texture.GetBaseSize() + texture.GetMipSize();
        void* imagePtr = funcAlloc(size, texture.GetAlignment());
        NW_G3D_ASSERT_NOT_NULL(imagePtr);
        texture.SetImagePtrs(imagePtr, NULL);
        renderBuffer.SetImagePtrs(&texture);
#ifndef _WIN32
        //DCInvalidateRange(imagePtr, size);
#endif
    }
    if ((bufferType & AUX_BUFFER) && renderBuffer.GetAuxSize() > 0)
    {
        void* auxPtr = funcAlloc(renderBuffer.GetAuxSize(), renderBuffer.GetAuxAlignment());
        NW_G3D_ASSERT_NOT_NULL(auxPtr);
        renderBuffer.SetAuxPtr(auxPtr);
#ifndef _WIN32
        //DCInvalidateRange(auxPtr, renderBuffer.GetAuxSize());
#endif
    }
}

void ColorBufferTexture::Free(FuncFree funcFree, bit32 bufferType /*= ALL_BUFFER*/)
{
    NW_G3D_ASSERT_NOT_NULL(funcFree);

    if ((bufferType & COLOR_BUFFER) && texture.GetBasePtr())
    {
        funcFree(texture.GetBasePtr());
    }
    if ((bufferType & AUX_BUFFER) && renderBuffer.GetAuxPtr())
    {
        funcFree(renderBuffer.GetAuxPtr());
    }
}

//--------------------------------------------------------------------------------------------------

void DepthBufferTexture::Setup()
{
    texture.CalcSize();
    texture.Setup();
    nw::g3d::DebugPrint("(DepthBuffer) base size: %d, mip size: %d, alignment: %d\n",
        texture.GetBaseSize(), texture.GetMipSize(), texture.GetAlignment());

    renderBuffer.SetTexture(&texture);
    renderBuffer.UpdateRegs();
    renderBuffer.SetClearDepthStencil(1.0f, 0);
    renderBuffer.CalcHiZSize();
    nw::g3d::DebugPrint("(HiZBuffer) hiz size: %d, alignment: %d\n",
        renderBuffer.GetHiZSize(), renderBuffer.GetHiZAlignment());
}

void DepthBufferTexture::Cleanup()
{
    texture.Cleanup();
}

void DepthBufferTexture::Alloc(FuncAlloc funcAlloc, bit32 bufferType /*= ALL_BUFFER*/)
{
    NW_G3D_ASSERT_NOT_NULL(funcAlloc);

    if (bufferType & DEPTH_BUFFER)
    {
        size_t size = texture.GetBaseSize() + texture.GetMipSize();
        void* imagePtr = funcAlloc(size, texture.GetAlignment());
        NW_G3D_ASSERT_NOT_NULL(imagePtr);
        texture.SetImagePtrs(imagePtr, NULL);
        renderBuffer.SetImagePtrs(&texture);
#ifndef _WIN32
        //DCInvalidateRange(imagePtr, size);
#endif
    }

    if ((bufferType & HIZ_BUFFER) && renderBuffer.GetHiZSize() > 0)
    {
        void* hiZPtr = funcAlloc(renderBuffer.GetHiZSize(), renderBuffer.GetHiZAlignment());
        NW_G3D_ASSERT_NOT_NULL(hiZPtr);
        renderBuffer.SetHiZPtr(hiZPtr);
        renderBuffer.SetHiZEnable(GX2_TRUE);
#ifndef _WIN32
        //DCInvalidateRange(hiZPtr, renderBuffer.GetHiZSize());
#endif
    }
}

void DepthBufferTexture::Free(FuncFree funcFree, bit32 bufferType /*= ALL_BUFFER*/)
{
    NW_G3D_ASSERT_NOT_NULL(funcFree);

    if (bufferType & DEPTH_BUFFER && texture.GetBasePtr())
    {
        funcFree(texture.GetBasePtr());
    }

    if ((bufferType & HIZ_BUFFER) && renderBuffer.GetHiZPtr())
    {
        funcFree(renderBuffer.GetHiZPtr());
    }
}

//--------------------------------------------------------------------------------------------------

size_t FrameBuffer::CalcSize(const InitArg& arg)
{
    NW_G3D_ASSERT(0 <= arg.colorBufferCount && arg.colorBufferCount <= MAX_RENDER_TARGET);
    NW_G3D_ASSERT(arg.width <= MAX_WIDTH);
    NW_G3D_ASSERT(arg.height <= MAX_HEIGHT);

    size_t size = sizeof(ColorBufferTexture) * arg.colorBufferCount;
    if (arg.useDepthBuffer)
    {
        size += sizeof(DepthBufferTexture);
    }
    return size;
}

bool FrameBuffer::Init(const InitArg& arg, void* pBuffer, size_t bufferSize)
{
    if (bufferSize > CalcSize(arg))
    {
        return false;
    }

    m_Handle = 0;
    m_Width = arg.width;
    m_Height = arg.height;
    m_NumRenderTarget = arg.colorBufferCount;
    m_pColorBuffer = arg.colorBufferCount > 0 ? static_cast<ColorBufferTexture*>(pBuffer) : NULL;
    m_pDepthBuffer = arg.useDepthBuffer ? AddOffset<DepthBufferTexture>(
        pBuffer, sizeof(ColorBufferTexture) * arg.colorBufferCount) : NULL;

    for (int renderTarget = 0; renderTarget < m_NumRenderTarget; ++renderTarget)
    {
        ColorBufferTexture* pColorBufferTexture = new(&m_pColorBuffer[renderTarget]) ColorBufferTexture();
        pColorBufferTexture->isFTV = arg.colorBufferFTV;
        GX2Surface& surface = pColorBufferTexture->texture.GetGX2Texture()->surface;
        surface.width = m_Width;
        surface.height = m_Height;
        surface.format = (arg.colorBufferFormat != GX2_SURFACE_FORMAT_INVALID) ? arg.colorBufferFormat : surface.format;
    }
    if (m_pDepthBuffer)
    {
        DepthBufferTexture* pDepthBufferTexture = new(m_pDepthBuffer) DepthBufferTexture();
        GX2Surface& surface = pDepthBufferTexture->texture.GetGX2Texture()->surface;
        surface.width = m_Width;
        surface.height = m_Height;
        surface.format = GX2_SURFACE_FORMAT_TCD_R32_FLOAT;
        surface.use = GX2_SURFACE_USE_DEPTH_BUFFER_TEXTURE;
    }

    SetMipLevel(0);

    return true;
}

void FrameBuffer::Setup()
{
#ifdef _WIN32
    if (m_Handle == 0)
    {
        glGenFramebuffers(1, &m_Handle);
        static const GLenum s_TblDrawBuffer[] = {
            GL_COLOR_ATTACHMENT0,
            GL_COLOR_ATTACHMENT1,
            GL_COLOR_ATTACHMENT2,
            GL_COLOR_ATTACHMENT3,
            GL_COLOR_ATTACHMENT4,
            GL_COLOR_ATTACHMENT5,
            GL_COLOR_ATTACHMENT6,
            GL_COLOR_ATTACHMENT7,
        };
#if NW_G3D_GL_PORTABILITY < NW_G3D_GL_LEVEL2
        glFramebufferDrawBuffersEXT(m_Handle, m_NumRenderTarget, s_TblDrawBuffer);
#else
        glBindFramebuffer(GL_FRAMEBUFFER, m_Handle);
        glDrawBuffers(m_NumRenderTarget, s_TblDrawBuffer);
        glBindFramebuffer(GL_FRAMEBUFFER, 0);
#endif
        NW_G3D_GL_ASSERT();
    }
#endif // _WIN32
    for (int renderTarget = 0; renderTarget < m_NumRenderTarget; ++renderTarget)
    {
        m_pColorBuffer[renderTarget].Setup();
    }
    if (m_pDepthBuffer)
    {
        m_pDepthBuffer->Setup();
    }
}

void FrameBuffer::Cleanup()
{
    for (int renderTarget = 0; renderTarget < m_NumRenderTarget; ++renderTarget)
    {
        m_pColorBuffer[renderTarget].Cleanup();
    }
    if (m_pDepthBuffer)
    {
        m_pDepthBuffer->Cleanup();
    }
#ifdef _WIN32
    if (m_Handle != 0)
    {
        glDeleteFramebuffers(1, &m_Handle);
        m_Handle = 0;
        NW_G3D_GL_ASSERT();
    }
#endif // _WIN32
}

void FrameBuffer::Alloc(FuncAlloc funcAlloc, bit32 bufferType /*= ALL_BUFFER*/)
{
    NW_G3D_ASSERT_NOT_NULL(funcAlloc);

    int numRenderTarget = (bufferType & COLOR_AUX_BUFFER) ? m_NumRenderTarget : 0;
    for (int renderTarget = 0; renderTarget < numRenderTarget; ++renderTarget)
    {
        m_pColorBuffer[renderTarget].Alloc(funcAlloc, bufferType);
    }

    if ((bufferType & DEPTH_HIZ_BUFFER) && m_pDepthBuffer)
    {
        m_pDepthBuffer->Alloc(funcAlloc, bufferType);
    }
}

void FrameBuffer::Free(FuncFree funcFree, bit32 bufferType /*= ALL_BUFFER*/)
{
    NW_G3D_ASSERT_NOT_NULL(funcFree);

    int numRenderTarget = (bufferType & COLOR_AUX_BUFFER) ? m_NumRenderTarget : 0;
    for (int renderTarget = 0; renderTarget < numRenderTarget; ++renderTarget)
    {
        m_pColorBuffer[renderTarget].Free(funcFree, bufferType);
    }

    if ((bufferType & DEPTH_HIZ_BUFFER) && m_pDepthBuffer)
    {
        m_pDepthBuffer->Free(funcFree, bufferType);
    }
}

void FrameBuffer::Load()
{
    for (int renderTarget = 0; renderTarget < m_NumRenderTarget; ++renderTarget)
    {
        m_pColorBuffer[renderTarget].renderBuffer.Load(
            static_cast<GX2RenderTarget>(renderTarget), m_Handle);
    }
    if (m_pDepthBuffer)
    {
        m_pDepthBuffer->renderBuffer.Load(m_Handle);
    }
    m_Viewport.Load();
    m_Scissor.Load();
#ifdef _WIN32
    glBindFramebuffer(GL_FRAMEBUFFER, m_Handle);
    glEnable(GL_FRAMEBUFFER_SRGB);
    NW_G3D_GL_ASSERT();
#endif
}

void FrameBuffer::SetMipLevel(u32 mipLevel)
{
    for (int renderTarget = 0; renderTarget < m_NumRenderTarget; ++renderTarget)
    {
        m_pColorBuffer[renderTarget].renderBuffer.GetGX2ColorBuffer()->viewMip = mipLevel;
    }
    if (m_pDepthBuffer)
    {
        m_pDepthBuffer->renderBuffer.GetGX2DepthBuffer()->viewMip = mipLevel;
    }
    u32 width = std::max(1u, m_Width >> mipLevel);
    u32 height = std::max(1u, m_Height >> mipLevel);
    m_Viewport.SetViewport(0, 0, static_cast<float>(width), static_cast<float>(height), 0.0f, 1.0f);
    m_Scissor.SetScissor(0, 0, width, height);
}

//--------------------------------------------------------------------------------------------------

void SimpleShader::Setup(const Path& path)
{
#ifdef _WIN32
#if NW_G3D_GFXSHADER_SETUPARG_CONSTRUCTOR
    nw::g3d::GfxShader::SetupArg arg;
#else
    nw::g3d::GfxShader::SetupArg arg = { 0 };
#endif
    void* src[] = { NULL, NULL, NULL, NULL, NULL };
    if (path.vertexShader)
    {
        src[0] = LoadFile(path.vertexShader);
        arg.pVertexShader = static_cast<char*>(src[0]);
    }
    if (path.geometryShader)
    {
        src[1] = LoadFile(path.geometryShader);
        arg.pGeometryShader = static_cast<char*>(src[1]);
    }
    if (path.fragmentShader)
    {
        src[2] = LoadFile(path.fragmentShader);
        arg.pFragmentShader = static_cast<char*>(src[2]);
    }
    if (path.computeShader)
    {
        src[3] = LoadFile(path.computeShader);
        arg.pComputeShader = static_cast<char*>(src[3]);
    }
    if (path.streamOut)
    {
        // *.so ファイルを解析して varying を arg に詰めます。
        size_t size = 0;
        src[4] = LoadFile(path.streamOut, &size);
        char* so = static_cast<char*>(src[4]);
        bool varying = false;
        for (int pos = 0, end = static_cast<int>(size); pos < end && so[pos]; ++pos)
        {
            if (isspace(so[pos]))
            {
                so[pos] = '\0';
                if (varying)
                {
                    varying = false;
                }
            }
            else
            {
                if (!varying)
                {
                    ++arg.numVaryings;
                    varying = true;
                }
            }
        }
        arg.ppTransformFeedbackVaryings = AllocMem2<const char*>(sizeof(char*) * arg.numVaryings);
        varying = false;
        for (int pos = 0, idxVarying = 0; idxVarying < arg.numVaryings; ++pos)
        {
            if (!so[pos] && varying)
            {
                varying = false;
            }
            else if (so[pos] && !varying)
            {
                arg.ppTransformFeedbackVaryings[idxVarying++] = &so[pos];
                varying = true;
            }
        }
    }

    nw::g3d::GfxShader::Setup(arg);

    for (int i = 0; i < sizeof(src) / sizeof(void*); ++i)
    {
        if (src[i])
        {
            FreeMem2(src[i]);
        }
    }
    if (arg.ppTransformFeedbackVaryings)
    {
        FreeMem2(arg.ppTransformFeedbackVaryings);
    }
#else
    nw::g3d::GfxShader::SetupArg arg;
    if (path.gsh)
    {
        pGsh = LoadFile(path.gsh, NULL, GX2_SHADER_ALIGNMENT);
        NW_G3D_ASSERTMSG(GFDGetAlignMode(pGsh) == GFD_ALIGN_MODE_ENABLE,
            ".gsh must be compiled with '-align'.\n    Path: %s\n", path.gsh);
        // "-align" オプションをつけてコンパイルしている前提。
        arg.pVertexShader = GFDGetVertexShaderPointer(0, pGsh);
        arg.pGeometryShader = GFDGetGeometryShaderPointer(0, pGsh);
        arg.pFragmentShader = GFDGetPixelShaderPointer(0, pGsh);
#if NW_G3D_COMPUTE_SHADER_ENABLE
        arg.pComputeShader = GFDGetComputeShaderPointer(0, pGsh);
#endif
    }
    nw::g3d::GfxShader::Setup(arg);
#endif
}

void SimpleShader::Cleanup()
{
#ifdef _WIN32
    nw::g3d::GfxShader::Cleanup();
#else
    FreeMem2(pGsh);
#endif
}

//--------------------------------------------------------------------------------------------------

int ScreenInfo::s_RefCount = 0;
SimpleShader ScreenInfo::s_Shader;
nw::g3d::GfxPolygonCtrl ScreenInfo::s_PolygonCtrl;
nw::g3d::GfxDepthCtrl ScreenInfo::s_DepthCtrl;
nw::g3d::GfxAlphaTest ScreenInfo::s_AlphaTest;
nw::g3d::GfxColorCtrl ScreenInfo::s_ColorCtrl;
nw::g3d::GfxBlendCtrl ScreenInfo::s_BlendCtrl;
nw::g3d::GfxChannelMasks ScreenInfo::s_ChannelMask;
nw::g3d::GfxSampler ScreenInfo::s_Sampler;
nw::g3d::ResFile* ScreenInfo::s_pTexture = NULL;

void ScreenInfo::Setup(u32 quadCount)
{
    size_t bufferSize = quadCount * 4 * sizeof(u32); // 文字数 * 頂点数 * sizeof(u32)
    void* pBuffer = AllocMem2(bufferSize);
    m_Buffer.SetData(pBuffer, bufferSize);
    m_Buffer.Setup();
    Reset();
    SetFontSize(2.5f);
    SetFontColor(0xFF, 0xFF, 0xFF);
    SetShapeColor(0xFF, 0xFF, 0xFF);

    if (s_RefCount == 0)
    {
        SimpleShader::Path path = {
            "shader/font.gsh",
            "shader/font_vertex.glsl",
            NULL,
            "shader/font_fragment.glsl",
            NULL
        };
        s_Shader.Setup(path);
        void* pTex = LoadFile("font.bfres", NULL, 8192);
        NW_G3D_ASSERT(nw::g3d::ResFile::IsValid(pTex));
        s_pTexture = nw::g3d::ResFile::ResCast(pTex);
        s_pTexture->Setup();
        s_Sampler.SetDefault();
        s_Sampler.SetClampX(GX2_TEX_CLAMP_CLAMP_BORDER);
        s_Sampler.SetClampY(GX2_TEX_CLAMP_CLAMP_BORDER);
        s_Sampler.SetMagFilter(GX2_TEX_XY_FILTER_BILINEAR);
        s_Sampler.SetMinFilter(GX2_TEX_XY_FILTER_BILINEAR);
        s_Sampler.SetMipFilter(GX2_TEX_MIP_FILTER_LINEAR);
        s_Sampler.SetBorderType(GX2_TEX_BORDER_CLEAR_BLACK);
        s_Sampler.Setup();
        s_PolygonCtrl.SetDefault();
        s_PolygonCtrl.SetCullFront(GX2_FALSE);
        s_PolygonCtrl.SetCullBack(GX2_FALSE);
        s_DepthCtrl.SetDefault();
        s_DepthCtrl.SetDepthTestEnable(GX2_DISABLE);
        s_AlphaTest.SetDefault();
        s_ColorCtrl.SetDefault();
        s_ColorCtrl.SetBlendEnableMask(0x1);
        s_BlendCtrl.SetDefault();
        s_ChannelMask.SetDefault();
    }
    ++s_RefCount;
}

void ScreenInfo::Cleanup()
{
    m_Buffer.Cleanup();
    void* pBuffer = m_Buffer.GetData();
    FreeMem2(pBuffer);
    m_Buffer.SetData(NULL, 0);

    if (s_RefCount == 1)
    {
        s_Shader.Cleanup();
        s_pTexture->Cleanup();
        FreeMem2(s_pTexture);
        s_Sampler.Cleanup();
    }
    --s_RefCount;
    NW_G3D_ASSERT(s_RefCount >= 0);
}

void ScreenInfo::SetFontSize(float pctH, float screenAspectRatio /*= 16.0f / 9.0f*/)
{
    static const float fontAspectRatio = 8.0f / 16.0f;
    m_FontSize[0] = pctH / screenAspectRatio * fontAspectRatio;
    m_FontSize[1] = pctH;
}

void ScreenInfo::SetShapeColor(u8 r, u8 g, u8 b, u8 a /*= 255*/)
{
    // パレットの位置に相当する uv を計算する。
    m_ShapeColor[0] = (b & 0x40) >> 6;
    m_ShapeColor[1] = (r & 0xC0) >> 3 | (g & 0xC0) >> 5 | (b & 0x80) >> 7;
    // アルファの上位 3 ビットを詰める。
    m_ShapeColor[2] = (a & 0xE0) >> 5;
    //m_ShapeColor[3] = 0; // パディング
}

void ScreenInfo::SetFontColor(u8 r, u8 g, u8 b)
{
    // u8 の各最上位ビットを計 3 ビットに詰める。
    u8 color = (r & 0x80) >> 5 | (g & 0x80) >> 6 | (b & 0x80) >> 7;
    m_FontColor[0] = m_FontColor[1] = m_FontColor[2] = m_FontColor[3] = color;
}

u32 ScreenInfo::QuantizeX(float pctX)
{
    return static_cast<u32>(Math::Clamp(pctX / PERCENT, 0.0f, 1.0f) * MASK_X + 0.5f);
}

u32 ScreenInfo::QuantizeY(float pctY)
{
    return static_cast<u32>(Math::Clamp(pctY / PERCENT, 0.0f, 1.0f) * MASK_Y + 0.5f);
}

void ScreenInfo::PutRect(float pctX, float pctY, float pctW, float pctH)
{
    NW_G3D_ASSERT((m_Count + 4) * sizeof(u32) <= m_Buffer.GetSize());
    u32 x0 = QuantizeX(pctX);
    u32 y0 = QuantizeY(pctY);
    u32 x1 = QuantizeX(pctX + pctW);
    u32 y1 = QuantizeY(pctY + pctH);
    Pack(x0, y0, m_ShapeColor[0], m_ShapeColor[1], m_ShapeColor[2]);
    Pack(x1, y0, m_ShapeColor[0], m_ShapeColor[1], m_ShapeColor[2]);
    Pack(x1, y1, m_ShapeColor[0], m_ShapeColor[1], m_ShapeColor[2]);
    Pack(x0, y1, m_ShapeColor[0], m_ShapeColor[1], m_ShapeColor[2]);
}

void ScreenInfo::PutLineH(float pctX, float pctY, float pctW)
{
    float pctH = 1.5f * PERCENT / MASK_Y;
    PutRect(pctX, pctY - pctH, pctW, pctH * 2.0f);
}

void ScreenInfo::PutLineV(float pctX, float pctY, float pctH)
{
    float pctW = 1.5f * PERCENT / MASK_X;
    PutRect(pctX - pctW, pctY, pctW * 2.0f, pctH);
}

void ScreenInfo::PutTriangle(
    float pctX0, float pctY0, float pctX1, float pctY1, float pctX2, float pctY2)
{
    Pack(QuantizeX(pctX0), QuantizeY(pctY0), m_ShapeColor[0], m_ShapeColor[1], m_ShapeColor[2]);
    Pack(QuantizeX(pctX1), QuantizeY(pctY1), m_ShapeColor[0], m_ShapeColor[1], m_ShapeColor[2]);
    Pack(QuantizeX(pctX2), QuantizeY(pctY2), m_ShapeColor[0], m_ShapeColor[1], m_ShapeColor[2]);
    Pack(QuantizeX(pctX0), QuantizeY(pctY0), m_ShapeColor[0], m_ShapeColor[1], m_ShapeColor[2]);
}

void ScreenInfo::PutChar(u32 left, u32 top, u32 right, u32 bottom, u32 charCode)
{
    NW_G3D_ASSERT((m_Count + 4) * sizeof(u32) <= m_Buffer.GetSize());
    Pack(left, top, 0, charCode, m_FontColor[0]);
    Pack(right, top, 1, charCode, m_FontColor[1]);
    Pack(right, bottom, 1, charCode + 1, m_FontColor[3]);
    Pack(left, bottom, 0, charCode + 1, m_FontColor[2]);
}

void ScreenInfo::PutString(float pctX, float pctY, const char* str)
{
    struct Position
    {
        Position(float originX, float originY, float fontW, float fontH, float deltaX, float deltaY)
            : originX(originX), originY(originY)
            , fontW(fontW), fontH(fontH)
            , deltaX(deltaX), deltaY(deltaY)
        {
            UpdateCol(0);
            UpdateRow(0);
        }

        void UpdateCol(int newCol)
        {
            col = newCol;
            curX0 = QuantizeX(originX + deltaX * col);
            curX1 = QuantizeX(originX + deltaX * col + fontW);
        }

        void UpdateRow(int newRow)
        {
            row = newRow;
            curY0 = QuantizeY(originY + deltaY * row);
            curY1 = QuantizeY(originY + deltaY * row + fontH);
        }

        void Next() { UpdateCol(col + 1); }

        void NewLine()
        {
            UpdateCol(0);
            UpdateRow(row + 1);
        }

        void operator=(const Position&) {} // 警告対策

        const float originX, originY, fontW, fontH, deltaX, deltaY;
        int row, col;
        u32 curX0, curY0, curX1, curY1;
    } pos(pctX, pctY, m_FontSize[0], m_FontSize[1], m_FontSize[0], m_FontSize[1]);

    while (char c = *str++)
    {
        if (0x20 < c && c < 0x7F)
        {
            PutChar(pos.curX0, pos.curY0, pos.curX1, pos.curY1, c);
            pos.Next();
        }
        else if (c == ' ')
        {
            pos.Next();
        }
        else if (c == '\n')
        {
            pos.NewLine();
        }
    }
}

void ScreenInfo::PutStringFmt( float pctX, float pctY, const char* format, ... )
{
    char buf[512];
    va_list arglist;
    va_start(arglist, format);
    vsnprintf(buf, sizeof(buf), format, arglist);
    va_end(arglist);
    PutString(pctX, pctY, buf);
}

void ScreenInfo::DumpTexture()
{
    int numCol = 16;
    for (int col = 0; col < numCol; ++ col)
    {
        u32 x0 = MASK_X * col / numCol;
        u32 x1 = MASK_X * (col + 1) / numCol;
        u32 v0 = 0x80 * col / numCol;
        u32 v1 = 0x80 * (col + 1) / numCol;
        u32 y1 = MASK_Y;
        if (col == numCol - 1)
        {
            y1 -= MASK_Y / (0x80 / numCol);
            v1 = 0x80 - 1;
        }
        Pack(x0, 0, 0, v0, 0x7);
        Pack(x1, 0, 1, v0, 0x7);
        Pack(x1, y1, 1, v1, 0x7);
        Pack(x0, y1, 0, v1, 0x7);
    }
}
void ScreenInfo::DCFlush() const
{
    nw::g3d::CPUCache::Flush(m_Buffer.GetData(), m_Count * sizeof(bit32));
}


void ScreenInfo::Draw() const
{
    if (m_Count > 0)
    {
        m_Buffer.DCFlush();
        m_Buffer.LoadIndices();
        s_Shader.Load();
#ifdef _WIN32
        glDrawElements(GL_QUADS, m_Count, GL_UNSIGNED_INT, 0);
#else
        GX2DrawIndexed(GX2_PRIMITIVE_QUADS, m_Count, GX2_INDEX_FORMAT_U32, m_Buffer.GetData());
#endif
    }
}

void ScreenInfo::LoadState()
{
    s_pTexture->GetTexture(0)->GetGfxTexture()->LoadFragmentTexture(0);
    s_Sampler.LoadFragmentSampler(0);

    s_PolygonCtrl.Load();
    s_DepthCtrl.Load();
    s_AlphaTest.Load();
    s_ColorCtrl.Load();
    s_BlendCtrl.Load(GX2_RENDER_TARGET_0);
    s_ChannelMask.Load();
}

void ScreenInfo::Pack(u32 x, u32 y, u32 u, u32 v, u32 c)
{
    bit32 packed = 0;
    packed |= x << SHIFT_X;
    packed |= y << SHIFT_Y;
    packed |= u << SHIFT_U;
    packed |= v << SHIFT_V;
    packed |= c << SHIFT_COLOR;

    bit32* pBuffer = static_cast<bit32*>(m_Buffer.GetData());
    pBuffer[m_Count++] = packed;
}

//--------------------------------------------------------------------------------------------------

void ProcessMeter::Setup()
{
    m_CurrentLog = 0;
    m_ScaleX = m_FrameScaleX = 0.97f;
    m_OffsetX = 1.5f;
    m_OffsetY = 85.0f;
    m_RowHeight = 2.0f;
    m_SysAlpha = 0x7F;
    u8 defaultColor[][4] = {
        { 0xFF, 0x00, 0xFF, 0xFF },
        { 0xFF, 0x00, 0x00, 0xFF },
        { 0xFF, 0x00, 0x00, 0xFF },
        { 0xFF, 0x00, 0x00, 0xFF },
        { 0xFF, 0x00, 0x00, 0xFF },
        // frame
        { 0x7F, 0x7F, 0x7F, m_SysAlpha },
        // sys
        { 0xFF, 0x00, 0xFF, m_SysAlpha },
        { 0xFF, 0x00, 0x00, m_SysAlpha },
        { 0xFF, 0x00, 0x00, m_SysAlpha },
        { 0xFF, 0x00, 0x00, m_SysAlpha },
        { 0xFF, 0x00, 0x00, m_SysAlpha }
    };
    memcpy(m_DefaultColor, defaultColor, sizeof(defaultColor));
    memset(m_FontColor, 0xFF, 4);

    for (int idxLog = 0; idxLog < sizeof(m_Log) / sizeof(Log); ++idxLog)
    {
        Log& log = m_Log[idxLog];
#ifdef _WIN32
        log.numGPUTimeSpan = log.numCPUTimeSpan = 0;
#else
        log.numGPUTimeSpan.u.s32 = log.numCPUTimeSpan.u.s32 = 0;
#endif
        for (int idxTimeSpan = 0; idxTimeSpan < MAX_GPU_TIMESPAN; ++idxTimeSpan)
        {
            Log::GPUTimeSpan& timespan = log.gpu[idxTimeSpan];
            timespan.group = GROUP_GPU;
            memcpy(timespan.color, m_DefaultColor[timespan.group], sizeof(timespan.color));
        }
        for (int idxTimeSpan = 1; idxTimeSpan < MAX_CPU_TIMESPAN; ++idxTimeSpan)
        {
            Log::CPUTimeSpan& timespan = log.cpu[idxTimeSpan];
            timespan.group = GROUP_CPU;
            memcpy(timespan.color, m_DefaultColor[timespan.group], sizeof(timespan.color));
        }
        {
            Log::CPUTimeSpan& timespan = log.cpu[0];
            timespan.group = GROUP_FRAME;
            memcpy(timespan.color, m_DefaultColor[timespan.group], sizeof(timespan.color));
        }
    }

    const int BUFFER_COUNT = sizeof(m_Log) / sizeof(Log);
    const int BEGIN_END = 2;
    m_pClockBuffer = AllocMem2<GPUClock>(sizeof(GPUClock) * MAX_GPU_TIMESPAN * BUFFER_COUNT * BEGIN_END, 32);
    int idxClock = 0;
    for (int idxLog = 0; idxLog < BUFFER_COUNT; ++idxLog)
    {
        Log& log = m_Log[idxLog];
        for (int idxTimeSpan = 0; idxTimeSpan < MAX_GPU_TIMESPAN; ++idxTimeSpan)
        {
            Log::GPUTimeSpan& timespan = log.gpu[idxTimeSpan];
            timespan.begin = new(&m_pClockBuffer[idxClock++]) GPUClock();
            timespan.end = new(&m_pClockBuffer[idxClock++]) GPUClock();
            timespan.begin->Setup();
            timespan.end->Setup();
        }
    }
    m_ScreenInfo.Setup(MAX_QUAD);
}

void ProcessMeter::Cleanup()
{
    for (int idxLog = 0; idxLog < sizeof(m_Log) / sizeof(Log); ++idxLog)
    {
        Log& log = m_Log[idxLog];
        for (int idxTimeSpan = 0; idxTimeSpan < MAX_GPU_TIMESPAN; ++idxTimeSpan)
        {
            Log::GPUTimeSpan& timespan = log.gpu[idxTimeSpan];
            timespan.begin->Cleanup();
            timespan.end->Cleanup();
        }
    }
    m_ScreenInfo.Cleanup();
    FreeMem2(m_pClockBuffer);
}

void ProcessMeter::Calc()
{
    m_ScreenInfo.Reset();
    const int BUFFER_COUNT = sizeof(m_Log) / sizeof(Log);
    const int BEGIN_END = 2;
    // 一つ古いログを参照する
    int logIndex = (m_CurrentLog <= 0) ? BUFFER_COUNT - 1 : m_CurrentLog - 1;

    Log& log = m_Log[logIndex];
    if (log.GetNumCPUTimeSpanNonAtomic() > 0)
    {
        static const CPUClock::Tick s_Frame = CPUClock::GetFreq() / 60;
        CPUClock::Tick begin = log.cpu[0].begin;
        CPUClock::Tick frame = log.cpu[0].end - begin;
        float pctFrame = CPUClock::ToPercents(frame);
        int numFrame = static_cast<int>(frame / s_Frame) + 1;
        m_FrameScaleX = m_ScaleX / numFrame;

        // background
        PutFrameBG(pctFrame, log.cpu[0].color);

        float total[NUM_GROUP];
        std::fill_n(total, static_cast<int>(NUM_GROUP), 0.0f);

        // まとめて Invalidate しないと 32 バイト単位にならない。
        CPUCache::Invalidate(m_pClockBuffer, sizeof(GPUClock) * MAX_GPU_TIMESPAN * BUFFER_COUNT * BEGIN_END);

        // GPU
        for (int idxTimeSpan = 0, numTimeSpan = log.GetNumGPUTimeSpanNonAtomic();
            idxTimeSpan < numTimeSpan; ++idxTimeSpan)
        {
            Log::GPUTimeSpan& timespan = log.gpu[idxTimeSpan];

            if (!timespan.begin->IsReady() || !timespan.end->IsReady())
            {
                continue;
            }
            GPUClock::Tick beginGPU = timespan.begin->GetTimeStamp();
            float pctBegin = GPUClock::ToPercents(beginGPU - log.beginGPU);
            float pctTimeSpan = GPUClock::ToPercents(timespan.end->GetTimeStamp() - beginGPU);
            total[timespan.group] += pctTimeSpan;
            PutTimeSpan(pctBegin, pctTimeSpan, timespan.group, timespan.color);
        }

        // CPU
        for (int idxTimeSpan = 1, numTimeSpan = log.GetNumCPUTimeSpanNonAtomic();
            idxTimeSpan < numTimeSpan; ++idxTimeSpan)
        {
            Log::CPUTimeSpan& timespan = log.cpu[idxTimeSpan];
            float pctBegin = CPUClock::ToPercents(timespan.begin - begin);
            float pctTimeSpan = CPUClock::ToPercents(timespan.end - timespan.begin);
            total[timespan.group] += pctTimeSpan;
            PutTimeSpan(pctBegin, pctTimeSpan, timespan.group, timespan.color);
        }

        // line
        PutFrameFG(numFrame, m_DefaultColor[GROUP_FRAME]);

        // text
        m_ScreenInfo.SetFontColor(m_FontColor[0], m_FontColor[1], m_FontColor[2]);
        m_ScreenInfo.SetFontSize(m_RowHeight);
        const char* title[] = { "GPU ", "CPU ", "CPU0", "CPU1", "CPU2" };
        for (int idxGroup = 0; idxGroup < sizeof(title) / sizeof(*title); ++idxGroup)
        {
            m_ScreenInfo.PutStringFmt(m_OffsetX, m_OffsetY + m_RowHeight * idxGroup,
                "%s: %.2f%%", title[idxGroup], total[idxGroup]);
        }
        m_ScreenInfo.PutStringFmt(m_OffsetX, m_OffsetY + m_RowHeight * NUM_GROUP_METER,
            "Total: %.2f%%", pctFrame);

        m_ScreenInfo.DCFlush();

        // 使ったログは破棄する。
#ifdef _WIN32
        log.numGPUTimeSpan = log.numCPUTimeSpan = 0;
#else
        log.numGPUTimeSpan.u.s32 = log.numCPUTimeSpan.u.s32 = 0;
#endif
    }
}

void ProcessMeter::Draw() const
{
    m_ScreenInfo.Draw();
}

int ProcessMeter::BeginTimeSpan(Group group, u8 r, u8 g, u8 b, u8 a /*=0xFF*/, bool addnextFrame /*=false*/)
{
    NW_G3D_ASSERT_INDEX_BOUNDS(group, NUM_GROUP);
    const int BUFFER_COUNT = sizeof(m_Log) / sizeof(Log);
    if (group == GROUP_FRAME)
    {
        m_CurrentLog = (m_CurrentLog >= BUFFER_COUNT - 1) ? 0 : m_CurrentLog + 1;
    }

    int logIndex = m_CurrentLog;
    if (addnextFrame)
    {
        logIndex = (logIndex >= BUFFER_COUNT - 1) ? 0 : logIndex + 1;
    }
    // GPU
    if (group == GROUP_GPU || group == GROUP_SYS_GPU)
    {
        Log& log = m_Log[logIndex];
#ifdef _WIN32
        int idxTimeSpan = _InterlockedIncrement(&log.numGPUTimeSpan) - 1;
#else
        int idxTimeSpan = OSIncAtomic(&log.numGPUTimeSpan);
#endif
        NW_G3D_ASSERT_INDEX_BOUNDS(idxTimeSpan, MAX_GPU_TIMESPAN);
        Log::GPUTimeSpan& timespan = log.gpu[idxTimeSpan];
        timespan.group = group;
        timespan.color[0] = r;
        timespan.color[1] = g;
        timespan.color[2] = b;
        timespan.color[3] = a;
        timespan.begin->Query();

        return idxTimeSpan | (logIndex << LOG_BIT_SHIFT);
    }

    // CPU
    if (group == GROUP_FRAME)
    {
        Log& log = m_Log[logIndex];
        log.beginGPU = GPUClock::Now();
    }

    Log& log = m_Log[logIndex];
#ifdef _WIN32
    int idxTimeSpan = _InterlockedIncrement(&log.numCPUTimeSpan) - 1;
#else
    int idxTimeSpan = OSIncAtomic(&log.numCPUTimeSpan);
#endif
    NW_G3D_ASSERT_INDEX_BOUNDS(idxTimeSpan, MAX_CPU_TIMESPAN);
    Log::CPUTimeSpan& timespan = log.cpu[idxTimeSpan];
    timespan.group = group;
    timespan.color[0] = r;
    timespan.color[1] = g;
    timespan.color[2] = b;
    timespan.color[3] = a;
    timespan.begin = CPUClock::Now();
    return (MAX_GPU_TIMESPAN + idxTimeSpan) | (logIndex << LOG_BIT_SHIFT);
}

int ProcessMeter::BeginTimeSpan(Group group, bool addnextFrame /*=false*/)
{
    NW_G3D_ASSERT_INDEX_BOUNDS(group, NUM_GROUP);
    u8* color = m_DefaultColor[group];
    return BeginTimeSpan(group, color[0], color[1], color[2], color[3], addnextFrame);
}

void ProcessMeter::EndTimeSpan(int index)
{
    int logIndex = (index & LOG_BIT_MASK) >> LOG_BIT_SHIFT;
    int idxTimeSpan = (index & ~LOG_BIT_MASK);

    NW_G3D_ASSERT_INDEX_BOUNDS(logIndex, sizeof(m_Log) / sizeof(Log));
    NW_G3D_ASSERT_INDEX_BOUNDS(idxTimeSpan, MAX_TIMESPAN);
    Log& log = m_Log[logIndex];

    if (idxTimeSpan < MAX_GPU_TIMESPAN)
    {
        log.gpu[idxTimeSpan].end->Query();
    }
    else
    {
        log.cpu[idxTimeSpan - MAX_GPU_TIMESPAN].end = CPUClock::Now();
        if (idxTimeSpan == MAX_GPU_TIMESPAN)
        {
            log.endGPU = GPUClock::Now();
        }
    }
}

void ProcessMeter::PutFrameFG(int numFrame, u8 color[4])
{
    m_ScreenInfo.SetShapeColor(color[0], color[1], color[2], 0xFF);
    float height = m_RowHeight * NUM_GROUP_METER;
    int maxLine = 5;
    for (int idxFrame = 1, endFrame = std::min(maxLine, numFrame); idxFrame < endFrame; ++idxFrame)
    {
        m_ScreenInfo.PutLineV(m_OffsetX + 100.0f * m_FrameScaleX * idxFrame, m_OffsetY, height);
    }
    m_ScreenInfo.PutLineH(m_OffsetX, m_OffsetY + height, 100.0f * m_ScaleX);
}

void ProcessMeter::PutFrameBG(float pctFrame, u8 color[4])
{
    m_ScreenInfo.SetShapeColor(color[0], color[1], color[2], color[3]);
    float height = m_RowHeight * NUM_GROUP_METER;
    m_ScreenInfo.PutRect(m_OffsetX, m_OffsetY, pctFrame * m_FrameScaleX, height);
    m_ScreenInfo.PutRect(m_OffsetX, m_OffsetY, 100.0f * m_ScaleX, height + m_RowHeight);
    m_ScreenInfo.PutRect(m_OffsetX, m_OffsetY + height, 100.0f * m_ScaleX, m_RowHeight);
}

void ProcessMeter::PutTimeSpan(float pctBegin, float pctTimeSpan, Group group, u8 color[4])
{
    m_ScreenInfo.SetShapeColor(color[0], color[1], color[2], color[3]);
    int idxRow = group % NUM_GROUP_USER;
    m_ScreenInfo.PutRect(m_OffsetX + pctBegin * m_FrameScaleX, m_OffsetY + idxRow * m_RowHeight,
        pctTimeSpan * m_FrameScaleX, m_RowHeight);
}

//--------------------------------------------------------------------------------------------------

size_t TextureDataConverter::CalcSize(const nw::g3d::GfxTexture* gfxTexture, TGAFormat tgaFormat)
{
    u8 pixelDepth = 32;
    switch (tgaFormat)
    {
        case TGAFormat_R8          : pixelDepth = 8;  break;
        case TGAFormat_R8_G8_B8    : pixelDepth = 24; break;
        case TGAFormat_R8_G8_B8_A8 : pixelDepth = 32; break;
    }

    u16 width  = static_cast<u16>(gfxTexture->GetWidth());
    u16 height = static_cast<u16>(gfxTexture->GetHeight());
    int imageSize = width * height * pixelDepth / 8;

    // 総サイズは、ヘッダ＋フッタ＋イメージ（幅 x 高さ x チャンネル数）
    return sizeof(TgaHeader) + sizeof(TgaFooter) + imageSize;
}

int FlipHeight(int height, int imageHeight, bool flip)
{
    return flip ? (imageHeight - height - 1) : height;
}

void TextureDataConverter::Convert(void* pBuffer, size_t size, const nw::g3d::GfxTexture* gfxTexture, TGAFormat tgaFormat, FuncAlloc funcDebugAlloc, FuncFree funcDebugFree)
{
    (void)size;

    NW_G3D_ASSERT_NOT_NULL(pBuffer);
    NW_G3D_ASSERT_NOT_NULL(funcDebugAlloc);

    u8 pixelDepth = 32;
    switch (tgaFormat)
    {
    case TGAFormat_R8          : pixelDepth = 8;  break;
    case TGAFormat_R8_G8_B8    : pixelDepth = 24; break;
    case TGAFormat_R8_G8_B8_A8 : pixelDepth = 32; break;
    }

    u16 width  = static_cast<u16>(gfxTexture->GetWidth());
    u16 height = static_cast<u16>(gfxTexture->GetHeight());
    int imageSize = width * height * pixelDepth / 8;

    // Heapから確保
    TgaHeader* pHeader = reinterpret_cast<TgaHeader*>(pBuffer);
    memset(pHeader, 0, sizeof(TgaHeader));

    switch (tgaFormat)
    {
    case TGAFormat_R8          : pHeader->imageType = 3; break;
    case TGAFormat_R8_G8_B8    : pHeader->imageType = 2; break;
    case TGAFormat_R8_G8_B8_A8 : pHeader->imageType = 2; break;
    }

#ifdef _WIN32
    pHeader->imageWidth = width;
    pHeader->imageHeight = height;
#else
    StoreRevU16(&pHeader->imageWidth, width);
    StoreRevU16(&pHeader->imageHeight, height);
#endif
    pHeader->pixelDepth      = pixelDepth;
    pHeader->imageDescriptor = 8;

    u8* pImage = AddOffset<u8>(pHeader, sizeof(TgaHeader));

    TgaFooter* pFooter = AddOffset<TgaFooter>(pImage, imageSize);
    pFooter->ext = 0;
    pFooter->dev = 0;
    memcpy(pFooter->signeture, "TRUEVISION-XFILE.\0", 18);

    GfxTexture tempTexture;
    tempTexture.SetDefault();
    GX2Surface& tempSurface = tempTexture.GetGX2Texture()->surface;
    tempSurface.width = width;
    tempSurface.height = height;
    tempSurface.format = gfxTexture->GetGX2Texture()->surface.format;

    tempTexture.CalcSize();

    u32 imagesize = (tempTexture.GetBaseSize()  + (32 - 1)) & ~(32 - 1);
    u32 alignment = (tempTexture.GetAlignment() + (32 - 1)) & ~(32 - 1);

    u8* pTempImage = static_cast<u8*>(funcDebugAlloc(imagesize, alignment));
    tempTexture.SetImagePtrs(pTempImage, NULL);

    CPUCache::Invalidate(pTempImage, imagesize);

#ifdef _WIN32
    const bool yFlip = false;
#if NW_G3D_GL_PORTABILITY < NW_G3D_GL_LEVEL2
    glGetTextureImageEXT(gfxTexture->handle, GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, pTempImage);
#else
    glBindTexture(GL_TEXTURE_2D, gfxTexture->handle);
    glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, pTempImage);
    glBindTexture(GL_TEXTURE_2D, 0);
#endif
    glFinish();
    NW_G3D_GL_ASSERT();
#else // _WIN32
    const bool yFlip = true;
    tempSurface.tileMode = GX2_TILE_MODE_LINEAR_SPECIAL;
    GX2CopySurface(&gfxTexture->GetGX2Texture()->surface, 0, 0, &tempSurface, 0, 0);
    GPUCache::InvalidateAll();
#endif // _WIN32

    switch ( pixelDepth / 8 )
    {
    case 1 :
        for ( int h = 0; h < height; ++h )
        {
            int tH = FlipHeight(h, height, yFlip);
            for ( int w = 0; w < width; ++w )
            {
                pImage[h * width + w] = pTempImage[(tH * width + w) * 4];
            }
        }
        break;
    case 3 :
        for (int h = 0; h < height; ++h)
        {
            int tH = FlipHeight(h, height, yFlip);
            for (int w = 0; w < width; ++w)
            {
                pImage[(h * width + w) * 3 + 0] = pTempImage[(tH * width + w) * 4 + 2];
                pImage[(h * width + w) * 3 + 1] = pTempImage[(tH * width + w) * 4 + 1];
                pImage[(h * width + w) * 3 + 2] = pTempImage[(tH * width + w) * 4 + 0];
            }
        }
        break;
    case 4 :
        for (int h = 0; h < height; ++h)
        {
            int tH = FlipHeight(h, height, yFlip);
            for (int w = 0; w < width; ++w)
            {
                pImage[(h * width + w) * 4 + 0] = pTempImage[(tH * width + w) * 4 + 2];
                pImage[(h * width + w) * 4 + 1] = pTempImage[(tH * width + w) * 4 + 1];
                pImage[(h * width + w) * 4 + 2] = pTempImage[(tH * width + w) * 4 + 0];
                pImage[(h * width + w) * 4 + 3] = pTempImage[(tH * width + w) * 4 + 3];
            }
        }
        break;
    }

    funcDebugFree(pTempImage);
}

//--------------------------------------------------------------------------------------------------
void DisplayList::Setup(int bufferCount, int bufferSize/*= MAX_DISPLAY_LIST_SIZE*/)
{
    NW_G3D_ASSERT(bufferCount > 0 && bufferCount <= MAX_BUFFER_COUNT);
    NW_G3D_ASSERT_ALIGNMENT(bufferSize, GX2_DISPLAY_LIST_ALIGNMENT);

    for (int i = 0; i < MAX_BUFFER_COUNT; ++i)
    {
        m_pBuffer[i] = NULL;
        m_Size[i] = 0;
    }

    // DisplayList バッファ
    void* pBuffer = AllocMem2(
        bufferSize * bufferCount, GX2_DISPLAY_LIST_ALIGNMENT);
    nw::g3d::CPUCache::Invalidate(pBuffer, bufferSize * bufferCount);
    m_pBuffer[0] = pBuffer;
    for (int i = 1; i < bufferCount; ++i)
    {
        m_pBuffer[i] = nw::g3d::AddOffset(m_pBuffer[i - 1], bufferSize);
    }

    m_CurrentBuffer = 0;
    m_BufferCount = bufferCount;
    m_BufferSize = bufferSize;
}

void DisplayList::Cleanup()
{
    FreeMem2(m_pBuffer[0]);
}

void DisplayList::Reset()
{
    for (int i = 0; i < MAX_BUFFER_COUNT; ++i)
    {
        m_Size[i] = 0;
    }
}

void DisplayList::Begin()
{
    m_CurrentBuffer = (m_CurrentBuffer >= m_BufferCount - 1) ? 0 : m_CurrentBuffer + 1;

    m_Size[m_CurrentBuffer] = 0;
#ifndef _WIN32
    GX2BeginDisplayListEx(m_pBuffer[m_CurrentBuffer], m_BufferSize, GX2_FALSE);
#else
    // do nothing.
#endif
}

void DisplayList::End()
{
#ifndef _WIN32
    m_Size[m_CurrentBuffer] = GX2EndDisplayList(m_pBuffer[m_CurrentBuffer]);
#else
    // do nothing.
#endif
}

void DisplayList::DirectCall()
{
#ifndef _WIN32
    if (m_Size[m_CurrentBuffer])
    {
        GX2DirectCallDisplayList(m_pBuffer[m_CurrentBuffer], m_Size[m_CurrentBuffer]);
    }
#else
    // do nothing.
#endif
}

void DisplayList::Call()
{
#ifndef _WIN32
    if (m_Size[m_CurrentBuffer])
    {
        GX2CallDisplayList(m_pBuffer[m_CurrentBuffer], m_Size[m_CurrentBuffer]);
    }
#else
    // do nothing.
#endif
}

//--------------------------------------------------------------------------------------------------

void ExportBuffer::Setup(void* pGPUBuffer, size_t size, int numBuffering /*= 1*/)
{
    NW_G3D_ASSERT_NOT_NULL(pGPUBuffer);
    NW_G3D_ASSERT_ALIGNMENT(size, GX2_EXPORT_BUFFER_ALIGNMENT);

    m_pExpBlock = pGPUBuffer;
    m_Size = size;
    m_pBufCPU = AllocMem2(m_Size * numBuffering);
    m_Buffer.SetData(m_pExpBlock, m_Size, numBuffering);
    m_Buffer.Setup();
}

void ExportBuffer::Cleanup()
{
    m_Buffer.Cleanup();
    if (m_pBufCPU)
    {
        FreeMem2(m_pBufCPU);
        m_pBufCPU = NULL;
    }
}

void ExportBuffer::CPUFlush(int bufferIndex /*= 0*/)
{
    NW_G3D_UNUSED(bufferIndex);
#if NW_G3D_IS_HOST_CAFE
#if NW_G3D_COMPUTE_SHADER_ENABLE
    GX2Invalidate(GX2_INVALIDATE_CPU, nw::g3d::ut::AddOffset(m_pExpBlock, m_Size * bufferIndex), m_Size);
#endif
#endif
}

void ExportBuffer::CPUInvalidate(int bufferIndex /*= 0*/)
{
    CPUCache::Invalidate(nw::g3d::ut::AddOffset(m_pExpBlock, m_Size * bufferIndex), m_Size);
}

void* ExportBuffer::GetCPUBuf(int bufferIndex /*= 0*/)
{
    return nw::g3d::ut::AddOffset(m_pBufCPU, m_Size * bufferIndex);
}
const void* ExportBuffer::GetCPUBuf(int bufferIndex /*= 0*/) const
{
    return nw::g3d::ut::AddOffset(m_pBufCPU, m_Size * bufferIndex);
}

void ExportBuffer::CopyToCPU(int bufferIndex /*= 0*/)
{
    nw::g3d::Copy32<true>(nw::g3d::ut::AddOffset(m_pBufCPU, m_Size * bufferIndex),
        nw::g3d::ut::AddOffset(m_pExpBlock, m_Size * bufferIndex), m_Size >> 2);
}
void ExportBuffer::CopyToGPU(int bufferIndex /*= 0*/)
{
    nw::g3d::Copy32<true>(nw::g3d::ut::AddOffset(m_pExpBlock, m_Size * bufferIndex),
        nw::g3d::ut::AddOffset(m_pBufCPU, m_Size * bufferIndex), m_Size >> 2);
}

//--------------------------------------------------------------------------------------------------

void LoadUniformBlock(const nw::g3d::GfxBuffer* pBlock,
    const nw::g3d::ResShaderProgram* pShaderProgram, int blockIndex, int bufferIndex /*= 0*/)
{
    NW_G3D_ASSERT_NOT_NULL(pBlock);
    NW_G3D_ASSERT_NOT_NULL(pShaderProgram);

    u32 locationVS = pShaderProgram->GetUniformBlockLocation(
        blockIndex, nw::g3d::ResShaderProgram::STAGE_VERTEX);
    if (locationVS != nw::g3d::SHADER_LOCATION_NONE)
    {
        pBlock->LoadVertexUniforms(locationVS, bufferIndex);
    }
    u32 locationGS = pShaderProgram->GetUniformBlockLocation(
        blockIndex, nw::g3d::ResShaderProgram::STAGE_GEOMETRY);
    if (locationGS != nw::g3d::SHADER_LOCATION_NONE)
    {
        pBlock->LoadGeometryUniforms(locationGS, bufferIndex);
    }
    u32 locationFS = pShaderProgram->GetUniformBlockLocation(
        blockIndex, nw::g3d::ResShaderProgram::STAGE_FRAGMENT);
    if (locationFS != nw::g3d::SHADER_LOCATION_NONE)
    {
        pBlock->LoadFragmentUniforms(locationFS, bufferIndex);
    }
    u32 locationCS = pShaderProgram->GetUniformBlockLocation(
        blockIndex, nw::g3d::ResShaderProgram::STAGE_COMPUTE);
    if (locationCS != nw::g3d::SHADER_LOCATION_NONE)
    {
        pBlock->LoadComputeUniforms(locationCS, bufferIndex);
    }
}

void LoadTextureSampler(const nw::g3d::GfxTexture* pTexture, const nw::g3d::GfxSampler* pSampler,
    const nw::g3d::ResShaderProgram* pShaderProgram, int samplerIndex)
{
    NW_G3D_ASSERT_NOT_NULL(pTexture);
    NW_G3D_ASSERT_NOT_NULL(pSampler);
    NW_G3D_ASSERT_NOT_NULL(pShaderProgram);

    u32 locationVS = pShaderProgram->GetSamplerLocation(
        samplerIndex, nw::g3d::ResShaderProgram::STAGE_VERTEX);
    if (locationVS != nw::g3d::SHADER_LOCATION_NONE)
    {
        pSampler->LoadVertexSampler(locationVS);
        pTexture->LoadVertexTexture(locationVS);
    }
    u32 locationGS = pShaderProgram->GetSamplerLocation(
        samplerIndex, nw::g3d::ResShaderProgram::STAGE_GEOMETRY);
    if (locationGS != nw::g3d::SHADER_LOCATION_NONE)
    {
        pSampler->LoadGeometrySampler(locationGS);
        pTexture->LoadGeometryTexture(locationGS);
    }
    u32 locationFS = pShaderProgram->GetSamplerLocation(
        samplerIndex, nw::g3d::ResShaderProgram::STAGE_FRAGMENT);
    if (locationFS != nw::g3d::SHADER_LOCATION_NONE)
    {
        pSampler->LoadFragmentSampler(locationFS);
        pTexture->LoadFragmentTexture(locationFS);
    }
    u32 locationCS = pShaderProgram->GetSamplerLocation(
        samplerIndex, nw::g3d::ResShaderProgram::STAGE_COMPUTE);
    if (locationCS != nw::g3d::SHADER_LOCATION_NONE)
    {
        pSampler->LoadComputeSampler(locationCS);
        pTexture->LoadComputeTexture(locationCS);
    }
}

}}} // namespace nw::g3d::demo
