﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include "stdafx.h"
#include "types.h"
#include <assert.h>
#include <stdio.h>
#include <vector>
#include <string>

#pragma warning ( push )
#pragma warning ( disable: 4302 4311 4312 )
#include "cafe/gx2/gx2Enum.h"
#include "cafe/gx2/gx2Constant.h"
#include "cafe/gx2/gx2Misc.h"
#include "cafe/gx2/gx2Shaders.h"
#include "cafe/gx2/gx2Surface.h"
#include "cafe/gx2/gx2Texture.h"
#pragma warning ( pop )

#include <sdk_ver.h>
#include "cafe/gfd.h"
#include "gfdMem.h"

extern GFDDataTable *GFDCreateDataTable( u32 max );
extern u32 GFDAddDataTable( GFDDataTable *pTable, void *data, u32 nBytes );
extern GFDStringTable *GFDCreateStringTable( u32 max );
extern u32 GFDAddStringTable( GFDStringTable *pTable,  const char *str );
extern void GFDDestroyDataTable( GFDDataTable *pTable );

BOOL _GFDExCheckHeaderVersions( const void *pData );
BOOL _GFDExCheckBlockHeaderMagicVersions( const GFDBlockHeader *pBlockHeader );
u32  _GFDExGetBlockCount( GFDBlockType blockType, const void *pData );
BOOL _GFDExGetHeaderVersions( u32 *pVerMajor, u32 *pVerMinor, u32 *pVerGPU, const void *pData );
u32  _GFDExGetBlockDataSize( GFDBlockType blockType, u32 index, const void *pData );
BOOL _GFDExRelocateBlock( u32 nBytesBlock, char *pData);
BOOL _GFDExRelocateBlockEx( GFDBlockRelocationHeader *pTrailer, u32 fromOffset, u32 toOffset, char *pData );

BOOL GFDGetVertexShader32( GX2VertexShader **pHeader, void *pProgram, u32 index, const void *pData );
BOOL GFDGetPixelShader32(  GX2PixelShader **pHeader, void *pProgram, u32 index, const void *pData );

#define _GFDEX_SWAP_BYTES(x) ( (((x) >> 24) & 0xff) | (((x) >> 8) & 0xff00) | (((x) << 8) & 0xff0000) | (((x) << 24) & 0xff000000) )

//  Cleans out extra debug flags attached to offset
u32 GFDExCleanTag(u32 Offset)     {return Offset & ~GFD_TAG_MASK;}

// Verifies offset stored in file is tagged with GFD_TAG_DAT
BOOL GFDExCheckTagDAT(u32 Offset) {return (Offset & GFD_TAG_MASK) == GFD_TAG_DAT;}

// Verifies offset stored in file is tagged with GFD_TAG_STR
BOOL GFDExCheckTagSTR(u32 Offset) {return (Offset & GFD_TAG_MASK) == GFD_TAG_STR;}

typedef struct _GFDVertexShaderRegs
{
    u32 reg[GX2_NUM_VERTEX_SHADER_REGISTERS];
} GFDVertexShaderRegs;

typedef struct _GFDPixelShaderRegs
{
    u32 reg[GX2_NUM_PIXEL_SHADER_REGISTERS];
} GFDPixelShaderRegs;

typedef struct _GFDVertexShader
{
    GFDVertexShaderRegs _regs;
    u32                 shaderSize;
    u32                 shaderPtr;
    GX2ShaderMode       shaderMode;
    u32                 numUniformBlocks;
    u32                 uniformBlocks;
    u32                 numUniforms;
    u32                 uniformVars;
    u32                 numInitialValues;
    u32                 initialValues;
    u32                 _numLoops;
    u32                 _loopVars;
    u32                 numSamplers;
    u32                 samplerVars;
    u32                 numAttribs;
    u32                 attribVars;
    u32                 ringItemsize;
    u32                 hasStreamOut;
    u32                 streamOutVertexStride[GX2_MAX_STREAMOUT_BUFFERS];
    GX2RBuffer          shaderProgram;
} GFDVertexShader;
typedef struct _GFDPixelShader
{
    GFDPixelShaderRegs  _regs;
    u32                 shaderSize;
    u32                 shaderPtr;
    GX2ShaderMode       shaderMode;
    u32                 numUniformBlocks;
    u32                 uniformBlocks;
    u32                 numUniforms;
    u32                 uniformVars;
    u32                 numInitialValues;
    u32                 initialValues;
    u32                 _numLoops;
    u32                 _loopVars;
    u32                 numSamplers;
    u32                 samplerVars;
    GX2RBuffer          shaderProgram;
} GFDPixelShader;


typedef struct _GFDUniformBlock
{
    u32          name;
    u32          location;
    u32          size;
} GFDUniformBlock;

#pragma warning ( push )
#pragma warning ( disable: 4302 4311 4312 )

//---------------------------------------------------------------------------
/// Copy a uniform block array from a 64-bit structure to a 64-bit structure.
//---------------------------------------------------------------------------
u32 GFDRepackUniformBlockArrayFor64Bit(GX2UniformBlock *pUBin64, GX2UniformBlock *pUBoutDst, u32 n)
{
    for(u32 i=0; i<n; i++) {
        pUBoutDst[i].name     = pUBin64[i].name;
        pUBoutDst[i].location = pUBin64[i].location;
        pUBoutDst[i].size     = pUBin64[i].size;
    }
    return sizeof(GX2UniformBlock)*n;
}

//---------------------------------------------------------------------------
/// Copy a uniform array from a 64-bit structure to a 64-bit structure.
//---------------------------------------------------------------------------
u32 GFDRepackUniformVarArrayFor64Bit(GX2UniformVar *pUVin64, GX2UniformVar *pUVoutDst, u32 n)
{
    for(u32 i=0; i<n; i++) {
        pUVoutDst[i].name       = pUVin64[i].name;
        pUVoutDst[i].type       = pUVin64[i].type;
        pUVoutDst[i].arrayCount = pUVin64[i].arrayCount;
        pUVoutDst[i].offset     = pUVin64[i].offset;
        pUVoutDst[i].blockIndex = pUVin64[i].blockIndex;
    }
    return sizeof(GX2UniformVar)*n;
}

//---------------------------------------------------------------------------
/// Copy a attribute array from a 64-bit structure to a 64-bit structure.
//---------------------------------------------------------------------------
u32 GFDRepackAttribVarArrayFor64Bit(GX2AttribVar *pAVin64, GX2AttribVar *pAVoutDst, u32 n)
{
    for(u32 i=0; i<n; i++) {
        pAVoutDst[i].name       = pAVin64[i].name;
        pAVoutDst[i].type       = pAVin64[i].type;
        pAVoutDst[i].arrayCount = pAVin64[i].arrayCount;
        pAVoutDst[i].location   = pAVin64[i].location;
    }
    return sizeof(GX2AttribVar)*n;
}

//---------------------------------------------------------------------------
/// Copy sampler var array from a 64-bit structure to a 64-bit structure.
//---------------------------------------------------------------------------
u32 GFDRepackSamplerVarArrayFor64Bit(GX2SamplerVar *pSVin64, GX2SamplerVar *pSVoutDst, u32 n)
{
    for(u32 i=0; i<n; i++) {
        pSVoutDst[i].name     = pSVin64[i].name;
        pSVoutDst[i].type     = pSVin64[i].type;
        pSVoutDst[i].location = pSVin64[i].location;
    }
    return sizeof(GX2SamplerVar)*n;
}

//---------------------------------------------------------------------------
/// Copy a uniform block array from a 32-bit structure to a 64-bit structure.
//---------------------------------------------------------------------------
void GFDRepackUniformBlockArray32To64Bit( void* pUBin32, GX2UniformBlock *pUBoutDst, u32 n )
{
    struct GDFExUniformBlock
    {
        u32          name;
        u32          location;
        u32          size;
    };

    GDFExUniformBlock* uniformBlock32 = (GDFExUniformBlock*)pUBin32;
    for ( u32 i = 0; i < n; i++ )
    {
        u32 nameSize = (u32)strlen((char*)uniformBlock32[i].name);
 		pUBoutDst[i].name     = (char *)malloc( nameSize + 1 );
        memset( (void*)pUBoutDst[i].name, 0, nameSize + 1);
 		memcpy( (char*)pUBoutDst[i].name, (char*)uniformBlock32[i].name, nameSize );

        pUBoutDst[i].location = _GFDEX_SWAP_BYTES( uniformBlock32[i].location );
        pUBoutDst[i].size     = _GFDEX_SWAP_BYTES( uniformBlock32[i].size );
    }
}

//---------------------------------------------------------------------------
/// Copy a uniform array from a 32-bit structure to a 64-bit structure.
//---------------------------------------------------------------------------
void GFDRepackUniformArray32To64Bit( void* pUVin32, GX2UniformVar *pUVinDst, u32 n )
{
    struct GFDExUniformVar
    {
        u32              name;
        GX2VarType       type;
        u32              arrayCount;
        u32              offset;
        u32              blockIndex;
    };

    GFDExUniformVar* uniformVar32 = (GFDExUniformVar *)pUVin32;
    for ( u32 i = 0; i < n; i++ )
    {
        u32 nameSize         = (u32)strlen((char*)uniformVar32[i].name);
 		pUVinDst[i].name     = (char *)malloc( nameSize + 1 );
        memset( (void*)pUVinDst[i].name, 0, nameSize + 1);
 		memcpy( (char*)pUVinDst[i].name, (char*)uniformVar32[i].name, nameSize );

        pUVinDst[i].type            = (GX2VarType)_GFDEX_SWAP_BYTES( (u32)uniformVar32[i].type );
        pUVinDst[i].arrayCount      = _GFDEX_SWAP_BYTES( uniformVar32[i].arrayCount );
        pUVinDst[i].offset          = _GFDEX_SWAP_BYTES( uniformVar32[i].offset );
        pUVinDst[i].blockIndex      = _GFDEX_SWAP_BYTES( uniformVar32[i].blockIndex );
    }
}

//---------------------------------------------------------------------------
/// Copy sampler var array from a 64-bit structure to a 32-bit structure.
//---------------------------------------------------------------------------
void GFDRepackSamplerVarArray32To64Bit( void* pSVin32, GX2SamplerVar *pUVinDst, u32 n )
{
    struct GFDExSamplerVar
    {
        u32                name;
        GX2SamplerType     type;
        u32                location;
    };

    GFDExSamplerVar* samplerVar32 = (GFDExSamplerVar*)pSVin32;
    for ( u32 i = 0; i < n; i++ )
    {
        u32 nameSize         = (u32)strlen((char*)samplerVar32[i].name);
 		pUVinDst[i].name     = (char *)malloc( nameSize + 1 );
        memset( (void*)pUVinDst[i].name, 0, nameSize + 1);
 		memcpy( (char*)pUVinDst[i].name, (char*)samplerVar32[i].name, nameSize );

        pUVinDst[i].type     = (GX2SamplerType)_GFDEX_SWAP_BYTES( samplerVar32[i].type );
        pUVinDst[i].location = _GFDEX_SWAP_BYTES( samplerVar32[i].location );

    }
}

//---------------------------------------------------------------------------
/// Repack an attrib var array from a 32-bit structure to a 64-bit structure.
//---------------------------------------------------------------------------
void GFDRepackAttribVarArray32To64Bit( void* pSVin32, GX2AttribVar *pUVinDst, u32 n )
{
    struct GFDExAttribVar
    {
        u32          name;
        GX2VarType   type;
        u32          arrayCount;
        u32          location;
    };

    GFDExAttribVar* attribVar32 = (GFDExAttribVar*)pSVin32;
    for ( u32 i = 0; i < n; i++ )
    {
        u32 nameSize             = (u32)strlen((char*)attribVar32[i].name);
 		pUVinDst[i].name         = (char *)malloc( nameSize + 1 );
        memset( (void*)pUVinDst[i].name, 0, nameSize + 1);
 		memcpy( (char*)pUVinDst[i].name, (char*)attribVar32[i].name, nameSize );

        pUVinDst[i].type         = (GX2VarType)_GFDEX_SWAP_BYTES( attribVar32[i].type );
        pUVinDst[i].arrayCount   = (GX2VarType)_GFDEX_SWAP_BYTES( attribVar32[i].arrayCount );
        pUVinDst[i].location     = (GX2VarType)_GFDEX_SWAP_BYTES( attribVar32[i].location );
    }
}

// ------------------------------------------------------------

//---------------------------------------------------------------------------
/// Create the flat datablock representation of a GX2VertexShader structure
/// Call GFDDataTableDestroy() on returned object once doen with it.
//---------------------------------------------------------------------------
GFDDataTable* _GFDCopyBlockVSH(GX2VertexShader *pVS, u32 dataSize )
{
    // Create second data structure to hold flattened, offseted version of our original shader
    GX2VertexShader vsCopy;
    memcpy(&vsCopy,  pVS,  sizeof(GX2VertexShader));

    // Walk thru copy, converting all pointers to data blocks in table, and changing
    // addresses to offsets into the data block

    // Create data table to hold the structure elements
    u32 allocSize = ( dataSize != 0 ) ? dataSize : sizeof(GX2VertexShader) + vsCopy.numUniforms * 8 + 512;

    GFDDataTable *pDT = GFDCreateDataTable( allocSize );      // todo - pick better number

    u32 size;

    // 0: Store main structure itself  (we'll rewrite offsets at the end).
    // For allocation and alignment purposes, this needs to be the first hunk in the data table
    GX2VertexShader* vsh = (GX2VertexShader*)( pDT->m_pDB + pDT->m_nDB );
    GFDAddDataTable(pDT, &vsCopy, sizeof(GX2VertexShader));

    // 1: Store uniform block/buffer array
    GX2UniformBlock *pUB = (GX2UniformBlock *) malloc(sizeof(GX2UniformBlock)*vsCopy.numUniformBlocks);
    if ( !pUB )
    {
        printf("Error! Failed to allocate Uniform block Variable structure!\n");
        GFDDestroyDataTable(pDT);
        return NULL;
    }

    vsh->uniformBlocks = ( vsCopy.numUniformBlocks == 0 ) ? (GX2UniformBlock *)0: (GX2UniformBlock *)( pDT->m_pDB + pDT->m_nDB );
    size = GFDRepackUniformBlockArrayFor64Bit(vsCopy.uniformBlocks, pUB, vsCopy.numUniformBlocks);
    GFDAddDataTable(pDT, pUB, size);
    free(pUB);

    // 2: Store uniform array
    GX2UniformVar *pUV = (GX2UniformVar *) malloc(sizeof(GX2UniformVar)*vsCopy.numUniforms);
    if ( !pUV )
    {
        printf("Error! Failed to allocate Uniform array Variable structure!\n");
        return NULL;
    }
    vsh->uniformVars = ( vsCopy.numUniforms == 0) ? (GX2UniformVar *)0 : (GX2UniformVar *)( pDT->m_pDB + pDT->m_nDB );
    size = GFDRepackUniformVarArrayFor64Bit(vsCopy.uniformVars, pUV, vsCopy.numUniforms);
    GFDAddDataTable(pDT, pUV, size);
    free(pUV);

    // 3: Store uniform initial values
    // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues
    vsh->initialValues = ( vsCopy.numInitialValues == 0 ) ? (GX2UniformInitialValue *)0 : (GX2UniformInitialValue *)( pDT->m_pDB + pDT->m_nDB );
    GFDAddDataTable(pDT, vsCopy.initialValues, vsCopy.numInitialValues * sizeof(GX2UniformInitialValue));

    // 4: Store loop
    vsh->_loopVars = ( vsCopy._numLoops == 0 ) ? (void*)0 : (void *)( pDT->m_pDB + pDT->m_nDB );
    GFDAddDataTable(pDT, vsCopy._loopVars, vsCopy._numLoops * sizeof(GFDLoopVar));

    // 5: Store sampler descriptors
    GX2SamplerVar *pSV = (GX2SamplerVar *) malloc(sizeof(GX2SamplerVar)*vsCopy.numSamplers);
    if ( !pSV )
    {
        printf("Error! Failed to allocate Sampler Variable structure!\n");
        return NULL;
    }
    vsh->samplerVars = ( vsCopy.numSamplers == 0 ) ? (GX2SamplerVar *)0 : (GX2SamplerVar *)( pDT->m_pDB + pDT->m_nDB );
    size = GFDRepackSamplerVarArrayFor64Bit(vsCopy.samplerVars, pSV, vsCopy.numSamplers);
    GFDAddDataTable(pDT, pSV, size);
    free(pSV);

    // 6: Store Attributes
    GX2AttribVar *pAV = (GX2AttribVar *) malloc(sizeof(GX2AttribVar)*vsCopy.numAttribs);
    if ( !pAV )
    {
        printf("Error! Failed to allocate Attribute Variable structure!\n");
        return NULL;
    }
    vsh->attribVars = ( vsCopy.numAttribs == 0 ) ? (GX2AttribVar *)0 : (GX2AttribVar *)( pDT->m_pDB + pDT->m_nDB );
    size = GFDRepackAttribVarArrayFor64Bit(vsCopy.attribVars, pAV, vsCopy.numAttribs);
    GFDAddDataTable(pDT, pAV, size);
    free(pAV);

    // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block
    for(u32 i = 0; i < vsCopy.numUniformBlocks; i++)
    {
        vsh->uniformBlocks[i].name = (char *)( pDT->m_pDB + pDT->m_nDB );
        GFDAddStringTable((GFDStringTable *)pDT, vsCopy.uniformBlocks[i].name );
    }

    // s2: Store each uniform name (in common string table)
    for(u32 i = 0; i < vsCopy.numUniforms; i++)
    {
        vsh->uniformVars[i].name = (char *)( pDT->m_pDB + pDT->m_nDB );
        GFDAddStringTable((GFDStringTable *)pDT, vsCopy.uniformVars[i].name );
    }

    // s3: Store each sampler name (in common string table)
    for(u32 i = 0; i < vsCopy.numSamplers; i++)
    {
        vsh->samplerVars[i].name = (char *)( pDT->m_pDB + pDT->m_nDB );
        GFDAddStringTable((GFDStringTable *)pDT, vsCopy.samplerVars[i].name );
    }

    // s4: Store each attrib name (in common string table)
    for(u32 i = 0; i < vsCopy.numAttribs; i++)
    {
        vsh->attribVars[i].name = (char *)( pDT->m_pDB + pDT->m_nDB );
        GFDAddStringTable((GFDStringTable *)pDT, vsCopy.attribVars[i].name );
    }

    // Store program data
    vsh->shaderPtr = (void *)( pDT->m_pDB + pDT->m_nDB );
    GFDAddDataTable(pDT, vsCopy.shaderPtr, vsCopy.shaderSize);

    return pDT;
}

GX2VertexShader* GFDCopyBlockVSH(GX2VertexShader *pVS)
{
    // データサイズを算出する。
    GFDDataTable* gd =_GFDCopyBlockVSH( pVS, 0 );

    GFDDataTable* vsgd =_GFDCopyBlockVSH( pVS, gd->m_maxDB );

    GFDDestroyDataTable( gd );

    GX2VertexShader* vshader = (GX2VertexShader*)vsgd->m_pDB;
    free( vsgd );
    return vshader;
}

//---------------------------------------------------------------------------
/// Create the flat datablock representation of a GX2PixelShader structure
/// Call GFDDataTableDestroy() on returned object once doen with it.
//---------------------------------------------------------------------------
GFDDataTable* _GFDCopyBlockPSH(GX2PixelShader *pPS, u32 dataSize )
{
   // Create second data structure to hold flattened, offseted version of our original shader
    GX2PixelShader psCopy;
    memcpy(&psCopy,  pPS,  sizeof(GX2PixelShader));

    // Walk thru copy, converting all pointers to data blocks in table, and changing
    // addresses to offsets into the data block

    // Create data table to hold the structure elements
    u32 allocSize = ( dataSize != 0 ) ? dataSize : sizeof(GX2PixelShader) + psCopy.numUniforms * 8 + 1024;

    GFDDataTable *pDT = GFDCreateDataTable( allocSize );      // todo - pick better number

    u32 size;


    // 0: Store main structure itself  (we'll rewrite offsets at the end).
    // For allocation and alignment purposes, this needs to be the first hunk in the data table
    GX2PixelShader* psh = (GX2PixelShader*)( pDT->m_pDB + pDT->m_nDB );
    GFDAddDataTable(pDT, &psCopy, sizeof(GX2PixelShader));


    // 1: Store uniform block array
    GX2UniformBlock *pUB = (GX2UniformBlock *) malloc(sizeof(GX2UniformBlock)*psCopy.numUniformBlocks);
    psh->uniformBlocks = ( psCopy.numUniformBlocks == 0 ) ?  (GX2UniformBlock *)0: (GX2UniformBlock *)( pDT->m_pDB + pDT->m_nDB );
    size = GFDRepackUniformBlockArrayFor64Bit(psCopy.uniformBlocks, pUB, psCopy.numUniformBlocks);
    GFDAddDataTable(pDT, pUB, size);
    free(pUB);

    // 2: Store uniform array
    GX2UniformVar *pUV = (GX2UniformVar *) malloc(sizeof(GX2UniformVar)*psCopy.numUniforms);
    psh->uniformVars = ( psCopy.numUniforms == 0 ) ? (GX2UniformVar *)0 : ( (GX2UniformVar *)( pDT->m_pDB + pDT->m_nDB ) );
    size = GFDRepackUniformVarArrayFor64Bit(psCopy.uniformVars, pUV, psCopy.numUniforms);
    GFDAddDataTable(pDT, pUV, size);
    free(pUV);

    // 3: Store uniform initial values
    // Note initial values points to uniform block, a contiguous section of GX2UniformInitialValues
    psh->initialValues = ( psCopy.numInitialValues == 0 ) ? (GX2UniformInitialValue*)0: (GX2UniformInitialValue *)( pDT->m_pDB + pDT->m_nDB );
    GFDAddDataTable(pDT, psCopy.initialValues, psCopy.numInitialValues * sizeof(GX2UniformInitialValue));

    // 4: Store loop array
    psh->_loopVars = ( psCopy._numLoops == 0 ) ? (void*)0 : (void *)( pDT->m_pDB + pDT->m_nDB );
    GFDAddDataTable(pDT, psCopy._loopVars, psCopy._numLoops * sizeof(GFDLoopVar));

    // 5: Store sampler descriptors
    GX2SamplerVar *pSV = (GX2SamplerVar *) malloc(sizeof(GX2SamplerVar)*psCopy.numSamplers);
    psh->samplerVars = ( psCopy.numSamplers == 0) ? (GX2SamplerVar*)0: (GX2SamplerVar *)( pDT->m_pDB + pDT->m_nDB );
    size = GFDRepackSamplerVarArrayFor64Bit(psCopy.samplerVars, pSV, psCopy.numSamplers);
    GFDAddDataTable(pDT, pSV, size);
    free(pSV);

     // s1: Store each uniform block name (in common string table), as well as pointer to the initial value in uniform block
    for(u32 i = 0; i < psCopy.numUniformBlocks; i++)
    {
        psh->uniformBlocks[i].name = (char *)( pDT->m_pDB + pDT->m_nDB );
        GFDAddStringTable((GFDStringTable *)pDT, psCopy.uniformBlocks[i].name );
    }

    // s2: Store each uniform name (in common string table)
    for(u32 i = 0; i < psCopy.numUniforms; i++)
    {
        psh->uniformVars[i].name = (char *)( pDT->m_pDB + pDT->m_nDB );
        GFDAddStringTable((GFDStringTable *)pDT, psCopy.uniformVars[i].name );
    }

    // s3: Store each sampler name (in common string table)
    for(u32 i = 0; i < psCopy.numSamplers; i++)
    {
        psh->samplerVars[i].name = (char *)( pDT->m_pDB + pDT->m_nDB );
        GFDAddStringTable((GFDStringTable *)pDT, psCopy.samplerVars[i].name );
    }

    // Store program data
    psh->shaderPtr = (void *)( pDT->m_pDB + pDT->m_nDB );
    GFDAddDataTable(pDT, psCopy.shaderPtr, psCopy.shaderSize);

    return pDT;
}

GX2PixelShader* GFDCopyBlockPSH(GX2PixelShader *pPS)
{
    // データサイズを算出する。
    GFDDataTable* gd =_GFDCopyBlockPSH( pPS, 0 );

    GFDDataTable* psgd =_GFDCopyBlockPSH( pPS, gd->m_maxDB );

    GFDDestroyDataTable( gd );

    GX2PixelShader* pshader =(GX2PixelShader *)psgd->m_pDB;
    free( psgd );

    return pshader;
}

//---------------------------------------------------------------------------
// pDataが32bitのbigEndianフォーマットのデータから頂点シェーダ個数を取得する
//---------------------------------------------------------------------------
u32 GDFGetVertexShaderNum32( void* pData )
{
    return _GFDExGetBlockCount(GFD_BLOCK_TYPE_GX2_VSH_HEADER, pData);
}

//---------------------------------------------------------------------------
// pDataが32bitのbigEndianフォーマットのデータからピクセルシェーダ個数を取得する
//---------------------------------------------------------------------------
u32 GDFGetPixelShaderNum32( void* pData )
{
    return _GFDExGetBlockCount(GFD_BLOCK_TYPE_GX2_PSH_HEADER, pData);
}

u32 GFDGetVertexShaderHeaderSize32( u32 index, const void *pData )
{
    return _GFDExGetBlockDataSize(GFD_BLOCK_TYPE_GX2_VSH_HEADER, index, pData);
}

u32 GFDGetPixelShaderHeaderSize32( u32 index, const void *pData )
{
    return _GFDExGetBlockDataSize(GFD_BLOCK_TYPE_GX2_PSH_HEADER, index, pData);
}

u32 GFDGetVertexShaderProgramSize32(u32 index, const void *pData)
{
    return _GFDExGetBlockDataSize(GFD_BLOCK_TYPE_GX2_VSH_PROGRAM, index, pData);
}

u32 GFDGetPixelShaderProgramSize32(u32 index, const void *pData)
{
    return _GFDExGetBlockDataSize(GFD_BLOCK_TYPE_GX2_PSH_PROGRAM, index, pData);
}

//---------------------------------------------------------------------------
// x を base の倍数に切り上げます。
//---------------------------------------------------------------------------
u32 RoundUp(u32 x, u32 base)
{
    u32 mask = base - 1;
    return ( ( x + mask ) & ~mask);
}


GX2VertexShader* GDFGetVertexShader32To64( u32 index , void* pData )
{
    GX2VertexShader *vertexShader = NULL;
    void *pProgram;
    u32 programSize;

    // Check index number of shaders
    if(index >= GDFGetVertexShaderNum32(pData))
    {
        return NULL;
    }

    // Get the size
    programSize = GFDGetVertexShaderProgramSize32(index, pData);
    programSize = RoundUp( programSize, 32 );

    if( !programSize )
        return NULL;

    pProgram = malloc( programSize );

    // Get the shader structure and program
    // from file buffer into user aligned buffer which created above
    u32 ret = GFDGetVertexShader32(&vertexShader, pProgram, index, pData);

    if(!ret)
    {
        if(vertexShader)
        {
            free(vertexShader);
            vertexShader = NULL;
        }
        if(pProgram)
        {
            free(pProgram);
            pProgram = NULL;
        }
    }

    return vertexShader;
}

GX2PixelShader* GDFGetPixelShader32To64( u32 index , void* pData )
{
    GX2PixelShader *pixelShader = NULL;
    void *pProgram;
    u32 programSize;

    // Check index number of shaders
    if(index >= GDFGetPixelShaderNum32(pData))
    {
        return NULL;
    }

    // Get the size
    programSize = GFDGetPixelShaderProgramSize32(index, pData);
    programSize = RoundUp( programSize, 32 );

    if( !programSize )
        return NULL;

    pProgram = malloc( programSize );

    // Get the shader structure and program
    // from file buffer into user aligned buffer which created above
    u32 ret = GFDGetPixelShader32(&pixelShader, pProgram, index, pData);

    if(!ret)
    {
        if(pixelShader)
        {
            free(pixelShader);
            pixelShader = NULL;
        }
        if(pProgram)
        {
            free(pProgram);
            pProgram = NULL;
        }
    }

    return pixelShader;
}

u32 _GFDExGetBlockCount(GFDBlockType blockType, const void *pData)
{
    char *pDataStruct;
    GFDBlockHeader *pBlockHeader;

    if(pData == NULL)
        return 0;

    u32 counts = 0;

    if(!_GFDExCheckHeaderVersions(pData))
        return 0;

    pDataStruct = (char*) pData + sizeof(GFDHeader); // jump over the header
    pBlockHeader = (GFDBlockHeader *) pDataStruct;

    while(_GFDExCheckBlockHeaderMagicVersions(pBlockHeader))
    {
        pBlockHeader = (GFDBlockHeader *)pDataStruct;

        if (blockType == _GFDEX_SWAP_BYTES( pBlockHeader->type ))
        {
            counts++;
        }
        pDataStruct = pDataStruct + sizeof(GFDBlockHeader) + _GFDEX_SWAP_BYTES( pBlockHeader->dataSize );

        if(GFD_BLOCK_TYPE_END == _GFDEX_SWAP_BYTES( pBlockHeader->type ))
            // terminate read, we have an end block
            break;
    }

    return counts;
}

BOOL _GFDExCheckHeaderVersions(const void *pData)
{
    u32 verMajor;
    u32 verMinor;
    u32 verGPU;

    if(pData == NULL)
        return FALSE;

    if(!_GFDExGetHeaderVersions(&verMajor, &verMinor, &verGPU, pData))
        return FALSE;               // fails if magic numbers not match

    if(GFD_HEADER_MAJOR != verMajor)       // major versions must match exactly
        return FALSE;

    if(GFD_HEADER_MINOR < verMinor)
    {
        return FALSE;
    }

    return TRUE;
}

BOOL _GFDExCheckBlockHeaderMagicVersions(const GFDBlockHeader *pBlockHeader)
{
    if (!(GFD_BLOCK_HEADER_MAGIC == pBlockHeader->magic ))
    {
        return FALSE;
    }

    if (!(GFD_BLOCK_HEADER_MAJOR == _GFDEX_SWAP_BYTES(pBlockHeader->majorVersion)))
    {
        return FALSE;
    }

    return TRUE;
}

BOOL _GFDExGetHeaderVersions(u32 *pVerMajor, u32 *pVerMinor, u32 *pVerGPU, const void *pData)
{
    u32 VerMagic;

    GFDHeader tmpVerH;
    tmpVerH.magic        = ((GFDHeader *)pData)->magic;
    tmpVerH.size         = _GFDEX_SWAP_BYTES( ((GFDHeader *)pData)->size) ;
    tmpVerH.majorVersion = _GFDEX_SWAP_BYTES( ((GFDHeader *)pData)->majorVersion) ;
    tmpVerH.minorVersion = _GFDEX_SWAP_BYTES( ((GFDHeader *)pData)->minorVersion) ;
    tmpVerH.gpuVersion   = (GFDGPUVersion)_GFDEX_SWAP_BYTES( ((GFDHeader *)pData)->gpuVersion) ;
    tmpVerH.reserved1    = _GFDEX_SWAP_BYTES( ((GFDHeader *)pData)->reserved1) ;
    tmpVerH.reserved2    = _GFDEX_SWAP_BYTES( ((GFDHeader *)pData)->reserved2) ;

    GFDHeader *pVerH = &tmpVerH;

    *pVerMajor = 0;
    *pVerMinor = 0;
    *pVerGPU   = 0;

    // swap if host side
    VerMagic = pVerH->magic;

    if(GFD_HEADER_MAGIC != VerMagic || GFD_HEADER_SIZE !=  pVerH->size)
    {
        if(GFD_HEADER_MAGIC == pVerH->magic)
        {
            ASSERT(!"Swap Byte Failed");
        }
        return FALSE;
    }

    *pVerMajor = pVerH->majorVersion;
    *pVerMinor = pVerH->minorVersion;
    *pVerGPU   = pVerH->gpuVersion;

    return TRUE;
}

u32 _GFDExGetBlockDataSize(GFDBlockType blockType, u32 index, const void *pData)
{
    char *pDataStruct; // jump over the header
    GFDBlockHeader *pBlockHeader;

    u32 nIndexs    = 0;

    if(pData == NULL)
        return 0;

    // Check Header Version
    if(!_GFDExCheckHeaderVersions(pData))
        return 0;

    pDataStruct = (char*) pData + sizeof(GFDHeader); // jump over the header
    pBlockHeader = (GFDBlockHeader *) pDataStruct;

    while(_GFDExCheckBlockHeaderMagicVersions(pBlockHeader))
    {
        pBlockHeader = (GFDBlockHeader *)pDataStruct;
        if (blockType == _GFDEX_SWAP_BYTES( pBlockHeader->type ))
        {
            if(index == nIndexs)
            {
                return _GFDEX_SWAP_BYTES( pBlockHeader->dataSize );
            }
            nIndexs++;
        }
        pDataStruct = pDataStruct + sizeof(GFDBlockHeader) + _GFDEX_SWAP_BYTES( pBlockHeader->dataSize );

        if(GFD_BLOCK_TYPE_END == _GFDEX_SWAP_BYTES( pBlockHeader->type ))
            // terminate read, we have an end block
            break;
    }

    return 0; // 0 if didn't find this index
}

//---------------------------------------------------------------------------
// メモリ空間がはリニアではないGX2VertexShader structを解放する。
//---------------------------------------------------------------------------
 void _ClearInternalVertexShader( GX2VertexShader* vertexShader )
{
    // uniformBlockのメモリを解放する
    if ( vertexShader->numUniformBlocks > 0 && vertexShader->uniformBlocks != NULL )
    {
        for ( u32 i = 0; i < vertexShader->numUniformBlocks; i++ )
        {
            if ( vertexShader->uniformBlocks[i].name != NULL ) free( (void *)vertexShader->uniformBlocks[i].name );
        }
        free( vertexShader->uniformBlocks );
    }

    // uniform配列のメモリを解放する
    if ( vertexShader->numUniforms > 0 && vertexShader->uniformVars != NULL )
    {
        for ( u32 i = 0; i < vertexShader->numUniforms; i++ )
        {
            if ( vertexShader->uniformVars[i].name != NULL ) free ( (void *)vertexShader->uniformVars[i].name );
        }
        free( vertexShader->uniformVars );
    }

    // uniform初期値を解放する
    if ( vertexShader->numInitialValues > 0 && vertexShader->initialValues != NULL )
    {
        free( vertexShader->initialValues );
    }

    // loopのメモリを解放する
    if ( vertexShader->_numLoops > 0 && vertexShader->_loopVars != NULL )
    {
        free( vertexShader->_loopVars );
    }

    // samplerのメモリを解放する
    if ( vertexShader->numSamplers > 0 && vertexShader->samplerVars != NULL )
    {
       for ( u32 i = 0; i < vertexShader->numSamplers; i++ )
       {
           if ( vertexShader->samplerVars[i].name[i] != NULL ) free ( (void *)vertexShader->samplerVars[i].name );
       }
       free( vertexShader->samplerVars );
    }

    // attributeのメモリを解放する
    if ( vertexShader->numAttribs > 0 && vertexShader->attribVars != NULL )
    {
        for ( u32 i = 0; i < vertexShader->numAttribs; i++ )
        {
            if ( vertexShader->attribVars[i].name != NULL ) free( (void *)vertexShader->attribVars[i].name );
        }
        free( vertexShader->attribVars );
    }
}

//---------------------------------------------------------------------------
// メモリ空間がはリニアではないGX2PixelShader structを解放する。
//---------------------------------------------------------------------------
 void _ClearInternalPixelShader( GX2PixelShader* pixelShader )
{
    // uniformBlockのメモリを解放する
    if ( pixelShader->numUniformBlocks > 0 && pixelShader->uniformBlocks != NULL )
    {
        for ( u32 i = 0; i < pixelShader->numUniformBlocks; i++ )
        {
            if ( pixelShader->uniformBlocks[i].name != NULL ) free( (void *)pixelShader->uniformBlocks[i].name );
        }
        free( pixelShader->uniformBlocks );
    }

    // uniform配列のメモリを解放する
    if ( pixelShader->numUniforms > 0 && pixelShader->uniformVars != NULL )
    {
        for ( u32 i = 0; i < pixelShader->numUniforms; i++ )
        {
            if ( pixelShader->uniformVars[i].name != NULL ) free ( (void *)pixelShader->uniformVars[i].name );
        }
        free( pixelShader->uniformVars );
    }

    // uniform初期値を解放する
    if ( pixelShader->numInitialValues > 0 && pixelShader->initialValues != NULL )
    {
        free( pixelShader->initialValues );
    }

    // loopのメモリを解放する
    if ( pixelShader->_numLoops > 0 && pixelShader->_loopVars != NULL )
    {
        free( pixelShader->_loopVars );
    }

    // samplerのメモリを解放する
    if ( pixelShader->numSamplers > 0 && pixelShader->samplerVars != NULL )
    {
       for ( u32 i = 0; i < pixelShader->numSamplers; i++ )
       {
           if ( pixelShader->samplerVars[i].name[i] != NULL ) free ( (void *)pixelShader->samplerVars[i].name );
       }
       free( pixelShader->samplerVars );
    }
}

void _GFDExSetVertexShaderData( GX2VertexShader* vshader, GFDVertexShader*  pVSout32 )
{
    // GX2VertexShader structを設定
    for ( u32 i = 0; i < GX2_NUM_VERTEX_SHADER_REGISTERS; i++ )
    {
        vshader->_regs[i]     = _GFDEX_SWAP_BYTES(pVSout32->_regs.reg[i]);
    }
    vshader->shaderSize       = _GFDEX_SWAP_BYTES( pVSout32->shaderSize );
    vshader->shaderMode       = (GX2ShaderMode)_GFDEX_SWAP_BYTES( pVSout32->shaderMode );
    vshader->numUniformBlocks = _GFDEX_SWAP_BYTES( pVSout32->numUniformBlocks );
    vshader->numUniforms      = _GFDEX_SWAP_BYTES( pVSout32->numUniforms );
    vshader->numInitialValues = _GFDEX_SWAP_BYTES( pVSout32->numInitialValues );
    vshader->_numLoops        = _GFDEX_SWAP_BYTES( pVSout32->_numLoops );
    vshader->numSamplers      = _GFDEX_SWAP_BYTES( pVSout32->numSamplers );
    vshader->numAttribs       = _GFDEX_SWAP_BYTES( pVSout32->numAttribs );
    vshader->ringItemsize     = _GFDEX_SWAP_BYTES( pVSout32->ringItemsize );
    vshader->hasStreamOut     = (GX2Boolean)_GFDEX_SWAP_BYTES( (GX2Boolean) pVSout32->hasStreamOut );

    for ( u32 i = 0; i < GX2_MAX_STREAMOUT_BUFFERS; i++ )
    {
        vshader->streamOutVertexStride[i] = _GFDEX_SWAP_BYTES( pVSout32->streamOutVertexStride[i] );
    }

    vshader->shaderProgram =    pVSout32->shaderProgram;


    // uniform block配列を設定
    if ( vshader->numUniformBlocks > 0 )
    {
        vshader->uniformBlocks    = (GX2UniformBlock*) malloc(sizeof(GX2UniformBlock) * vshader->numUniformBlocks);
        ASSERT(vshader->uniformBlocks != NULL);
        GFDRepackUniformBlockArray32To64Bit( (void*)pVSout32->uniformBlocks, vshader->uniformBlocks, vshader->numUniformBlocks );
    }
    else
    {
        vshader->uniformBlocks = NULL;
    }

    // uniform配列を設定
    if ( vshader->numUniforms > 0 )
    {
        vshader->uniformVars    = (GX2UniformVar*) malloc(sizeof(GX2UniformVar) * vshader->numUniforms);
        ASSERT(vshader->uniformVars != NULL);
        GFDRepackUniformArray32To64Bit( (void*)pVSout32->uniformVars, vshader->uniformVars, vshader->numUniforms );
    }
    else
    {
        vshader->uniformVars = NULL;
    }

    // unform初期値を設定
    if ( vshader->numInitialValues > 0 )
    {
        u32 initValusSize      = vshader->numInitialValues * sizeof( GX2UniformInitialValue );
        vshader->initialValues = (GX2UniformInitialValue *)malloc( initValusSize );
        memcpy( (void*)vshader->initialValues, (void *)pVSout32->initialValues, initValusSize );
    }
    else
    {
        vshader->initialValues = 0;
    }

    // loopを設定
    if ( vshader->_numLoops > 0 )
    {
        u32 looVarSize = vshader->_numLoops * sizeof(GFDLoopVar);
        vshader->_loopVars = malloc( looVarSize );
        memcpy( vshader->_loopVars, (void*)pVSout32->_loopVars, looVarSize );
    }
    else
    {
        vshader->_loopVars = 0;
    }

    // samplerの設定
    if ( vshader->numSamplers > 0 )
    {
        vshader->samplerVars = (GX2SamplerVar*)malloc( vshader->numSamplers * sizeof(GX2SamplerVar) );
        ASSERT(vshader->samplerVars != NULL);
        GFDRepackSamplerVarArray32To64Bit( (void*)pVSout32->samplerVars, vshader->samplerVars, vshader->numSamplers );
    }
    else
    {
        vshader->samplerVars = NULL;
    }

    // attributeの設定
    if ( vshader->numAttribs > 0 )
    {
        vshader->attribVars = (GX2AttribVar*)malloc( vshader->numAttribs * sizeof(GX2AttribVar) );
        ASSERT(vshader->attribVars != NULL);
        GFDRepackAttribVarArray32To64Bit( (void*)pVSout32->attribVars, vshader->attribVars, vshader->numAttribs );
    }
    else
    {
        vshader->attribVars = NULL;
    }
}

//---------------------------------------------------------------------------
// ピクセルシェーダの32bit構造GFDPixelShaderからGX2PixelShaderフォーマットへ
// 再設定を行います。
//---------------------------------------------------------------------------
void _GFDExSetPixelShaderData( GX2PixelShader* pshader, GFDPixelShader*  pPSout32 )
{
    // GX2PixelShader structを設定
    for ( u32 i = 0; i < GX2_NUM_PIXEL_SHADER_REGISTERS; i++ )
    {
        pshader->_regs[i]     = _GFDEX_SWAP_BYTES(pPSout32->_regs.reg[i]);
    }
    pshader->shaderSize       = _GFDEX_SWAP_BYTES( pPSout32->shaderSize );
    pshader->shaderMode       = (GX2ShaderMode)_GFDEX_SWAP_BYTES( pPSout32->shaderMode );
    pshader->numUniformBlocks = _GFDEX_SWAP_BYTES( pPSout32->numUniformBlocks );
    pshader->numUniforms      = _GFDEX_SWAP_BYTES( pPSout32->numUniforms );
    pshader->numInitialValues = _GFDEX_SWAP_BYTES( pPSout32->numInitialValues );
    pshader->_numLoops        = _GFDEX_SWAP_BYTES( pPSout32->_numLoops );
    pshader->numSamplers      = _GFDEX_SWAP_BYTES( pPSout32->numSamplers );
    pshader->shaderProgram    =  pPSout32->shaderProgram;

    // uniform block配列を設定
    if ( pshader->numUniformBlocks > 0 )
    {
        pshader->uniformBlocks    = (GX2UniformBlock*) malloc(sizeof(GX2UniformBlock) * pshader->numUniformBlocks);
        ASSERT(pshader->uniformBlocks != NULL);
        GFDRepackUniformBlockArray32To64Bit( (void*)pPSout32->uniformBlocks, pshader->uniformBlocks, pshader->numUniformBlocks );
    }
    else
    {
        pshader->uniformBlocks = NULL;
    }

    // uniform配列を設定
    if ( pshader->numUniforms > 0 )
    {
        pshader->uniformVars    = (GX2UniformVar*) malloc(sizeof(GX2UniformVar) * pshader->numUniforms);
        ASSERT(pshader->uniformVars != NULL);
        GFDRepackUniformArray32To64Bit( (void*)pPSout32->uniformVars, pshader->uniformVars, pshader->numUniforms );
    }
    else
    {
        pshader->uniformVars = NULL;
    }

    // unform初期値を設定
    if ( pshader->numInitialValues > 0 )
    {
        u32 initValusSize      = pshader->numInitialValues * sizeof( GX2UniformInitialValue );
        pshader->initialValues = (GX2UniformInitialValue *)malloc( initValusSize );
        memcpy( (void*)pshader->initialValues, (void *)pPSout32->initialValues, initValusSize );
    }
    else
    {
        pshader->initialValues = 0;
    }

    // loopを設定
    if ( pshader->_numLoops > 0 )
    {
        u32 looVarSize = pshader->_numLoops * sizeof(GFDLoopVar);
        pshader->_loopVars = malloc( looVarSize );
        memcpy( pshader->_loopVars, (void*)pPSout32->_loopVars, looVarSize );
    }
    else
    {
        pshader->_loopVars = 0;
    }

    // samplerの設定
    if ( pshader->numSamplers > 0 )
    {
        pshader->samplerVars = (GX2SamplerVar*)malloc( pshader->numSamplers * sizeof(GX2SamplerVar) );
        ASSERT(pshader->samplerVars != NULL);
        GFDRepackSamplerVarArray32To64Bit( (void*)pPSout32->samplerVars, pshader->samplerVars, pshader->numSamplers );
    }
    else
    {
        pshader->samplerVars = NULL;
    }
}

//---------------------------------------------------------------------------
//  gshファイルから頂点シェーダGX2VertexShaderを取得ます。
//---------------------------------------------------------------------------
BOOL GFDGetVertexShader32(GX2VertexShader **pHeader, void *pProgram, u32 index, const void *pData)
{
    char *pDataStruct;
    GFDBlockHeader *pBlockHeader;
    u32 nHeaders  = 0;
    u32 nPrograms = 0;

    if( pProgram == NULL || pData == NULL )
        return FALSE;

    if(!_GFDExCheckHeaderVersions(pData))
        return FALSE;

    GX2VertexShader* tempVerShader = (GX2VertexShader*)malloc(sizeof(GX2VertexShader));

    pDataStruct  = (char*)pData + sizeof(GFDHeader); // jump over the header
    pBlockHeader = (GFDBlockHeader *)pDataStruct;

    while(_GFDExCheckBlockHeaderMagicVersions(pBlockHeader))
    {
        pBlockHeader = (GFDBlockHeader *)pDataStruct;
        pDataStruct += sizeof(GFDBlockHeader);

        switch(_GFDEX_SWAP_BYTES( pBlockHeader->type ))
        {
        case GFD_BLOCK_TYPE_GX2_VSH_HEADER:
            if(index == nHeaders)
            {
                GFDVertexShader*  pVSout32 = (GFDVertexShader*)malloc( _GFDEX_SWAP_BYTES( pBlockHeader->dataSize ) );
                memcpy(pVSout32, (void *)pDataStruct, _GFDEX_SWAP_BYTES( pBlockHeader->dataSize ));

                if (!_GFDExRelocateBlock( _GFDEX_SWAP_BYTES( pBlockHeader->dataSize ),(char *)pVSout32))
                {
                    ASSERT(!"Internal offset/pointers corrupted.");
                    free( tempVerShader );
                    return FALSE;
                }

                _GFDExSetVertexShaderData( tempVerShader, pVSout32 );
                free( pVSout32 );
            }
            nHeaders++;
            break;
        case GFD_BLOCK_TYPE_GX2_VSH_PROGRAM:
            if(index == nPrograms)
            {
                // Set shader program
                tempVerShader->shaderPtr = pProgram;
                memcpy(tempVerShader->shaderPtr, (char *)pDataStruct, _GFDEX_SWAP_BYTES( pBlockHeader->dataSize ));
            }
            nPrograms++;
            break;
        default:
            break;
        }
        pDataStruct += _GFDEX_SWAP_BYTES( pBlockHeader->dataSize );

        if(GFD_BLOCK_TYPE_END == _GFDEX_SWAP_BYTES( pBlockHeader->type ))
            // terminate read, we have an end block
            break;
    }

    // リニアなメモリ空間で再構成する
    *pHeader = GFDCopyBlockVSH( tempVerShader );

    // テンポラリで作成したGX2VertexShaderを解放する。
    _ClearInternalVertexShader( tempVerShader );
    free( tempVerShader );
    return TRUE;
}

//---------------------------------------------------------------------------
//  gshファイルからピクセルシェーダGX2PixelShaderを取得ます。
//---------------------------------------------------------------------------
BOOL GFDGetPixelShader32(GX2PixelShader **pHeader, void *pProgram, u32 index, const void *pData)
{
    char *pDataStruct;
    GFDBlockHeader *pBlockHeader;
    u32 nHeaders  = 0;
    u32 nPrograms = 0;

    if( pProgram == NULL || pData == NULL )
        return FALSE;

    if(!_GFDExCheckHeaderVersions(pData))
        return FALSE;

    GX2PixelShader* tempPixelShader = (GX2PixelShader*)malloc(sizeof(GX2PixelShader));

    pDataStruct  = (char*)pData + sizeof(GFDHeader); // jump over the header
    pBlockHeader = (GFDBlockHeader *)pDataStruct;

    while(_GFDExCheckBlockHeaderMagicVersions(pBlockHeader))
    {
        pBlockHeader = (GFDBlockHeader *)pDataStruct;
        pDataStruct += sizeof(GFDBlockHeader);

        switch(_GFDEX_SWAP_BYTES( pBlockHeader->type ))
        {
        case GFD_BLOCK_TYPE_GX2_PSH_HEADER:
            if(index == nHeaders)
            {
                GFDPixelShader*  pVSout32 = (GFDPixelShader*)malloc( _GFDEX_SWAP_BYTES( pBlockHeader->dataSize ) );
                memcpy(pVSout32, (void *)pDataStruct, _GFDEX_SWAP_BYTES( pBlockHeader->dataSize ));

                if (!_GFDExRelocateBlock( _GFDEX_SWAP_BYTES( pBlockHeader->dataSize ),(char *)pVSout32))
                {
                    ASSERT(!"Internal offset/pointers corrupted.");
                    free( tempPixelShader );
                    return FALSE;
                }

                _GFDExSetPixelShaderData( tempPixelShader, pVSout32 );
            }
            nHeaders++;
            break;
        case GFD_BLOCK_TYPE_GX2_PSH_PROGRAM:
            if(index == nPrograms)
            {
                // Set shader program
                tempPixelShader->shaderPtr = pProgram;
                memcpy(tempPixelShader->shaderPtr, (char *)pDataStruct, _GFDEX_SWAP_BYTES( pBlockHeader->dataSize ));
            }
            nPrograms++;
            break;
        default:
            break;
        }
        pDataStruct += _GFDEX_SWAP_BYTES( pBlockHeader->dataSize );

        if(GFD_BLOCK_TYPE_END == _GFDEX_SWAP_BYTES( pBlockHeader->type ))
            // terminate read, we have an end block
            break;
    }

    // リニアなメモリ空間で再構成する
    *pHeader = GFDCopyBlockPSH( tempPixelShader );

    // テンポラリで作成したGX2VertexShaderを解放する。
    _ClearInternalPixelShader( tempPixelShader );
    free( tempPixelShader );
    return TRUE;
}

BOOL _GFDExRelocateBlock(u32 nBytesBlock, char *pData)
{
    u32 size = sizeof(GFDBlockRelocationHeader);

    if(pData == NULL)
        return FALSE;

    GFDBlockRelocationHeader *pTrailer =  (GFDBlockRelocationHeader *)
        (pData + nBytesBlock - size);

    ASSERT(GFD_BLOCK_RELOCATION_HEADER_MAGIC == pTrailer->magic && _GFDEX_SWAP_BYTES( pTrailer->size ) == size);

    ASSERT(GFDExCheckTagDAT( _GFDEX_SWAP_BYTES(pTrailer->patchTableOffset ) ));

    // if block has already been relocated, don't relocate it again
    if(pTrailer-> basePatchAddress != 0)
        return TRUE;

    // finally, use the patch table to update all the pointers in the structure to
    // from relative to the begining of the structure, to absolute in memory
    return _GFDExRelocateBlockEx(pTrailer, (u32)0, (u32)pData, pData);
}

///---------------------------------------------------------------------------
// Relocate the structure pointed to by pData by offseting the pointers at the
//   Addresses in it located  all the locations in the patch table.
//---------------------------------------------------------------------------
BOOL _GFDExRelocateBlockEx(GFDBlockRelocationHeader *pTrailer, u32 fromOffset, u32 toOffset, char *pData)
{
    u32 MainOffset   = GFDExCleanTag( _GFDEX_SWAP_BYTES( pTrailer->dataOffset ) );
    u32 PTableOffset = GFDExCleanTag( _GFDEX_SWAP_BYTES( pTrailer->patchTableOffset ) );
    u32 NPatches     = _GFDEX_SWAP_BYTES( pTrailer->patchTableOffsetNumber );
    u32 i;
    u32 Offset;

    if(pData == NULL)
        return FALSE;

    ASSERT(0 == MainOffset);

    for(i = 0; i < NPatches; i++)
    {
        Offset = _GFDEX_SWAP_BYTES( *((u32*) (pData + PTableOffset + 4*i)) );         // dang!  Be careful with those parens..
        if(Offset != 0)
        {
            u32 *pPatchLoc;

            ASSERT(fromOffset != 0 || GFDExCheckTagDAT(Offset) || GFDExCheckTagSTR(Offset));
            pPatchLoc = ((u32*) (pData +  GFDExCleanTag(Offset)) );

            GFDExCheckTagSTR(_GFDEX_SWAP_BYTES( *pPatchLoc ));
            ASSERT(fromOffset != 0 || GFDExCheckTagDAT(_GFDEX_SWAP_BYTES( *pPatchLoc )) || GFDExCheckTagSTR(_GFDEX_SWAP_BYTES( *pPatchLoc )));
            *pPatchLoc = (u32) ( GFDExCleanTag(_GFDEX_SWAP_BYTES( *pPatchLoc )) - fromOffset + toOffset);
        }
    }

    pTrailer->basePatchAddress = toOffset;            // store offset away so we don't step on ourselves.

    return TRUE;
}

#pragma warning ( pop )
