﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#define _CRT_SECURE_NO_WARNINGS

#include "stdafx.h"
#include <stdlib.h>
#include <windows.h>
#include <locale.h>
#include "eftShadergshCompile.h"
#include "omp.h"

typedef DWORD64 uint64;

#ifdef __cplusplus
extern "C"
{
#endif // __cplusplus

HMODULE  hShaderUtilDLL;
HMODULE  hGfdDLL;
GSH2Func fpGSH2;
GFDFunc  fpGFD;

#if 1
u32            g_NumThreads;
HMODULE*       g_ShaderUtilDLL;
GSH2Func*      g_FpGSH2;
GSH2Handle*    g_HShaderUtils;
wchar_t**      g_DllNameArray;



#endif
 // #if CAFE_OS_SDK_VERSION >= 21104
 // GFDShaders2             shaders;
 // #else
 // GFDShaders              shaders;
 // #endif
GSH2Setup               libSetup;
GSH2Handle              hShaderUtils = NULL;

CollectShaderBinary     collectShader;

#define GSH_DUMP_FILENAME_SIZE 256

void LoadDLLs(HMODULE *hShaderUtilDLL, GSH2Func *fpGSH2, HMODULE *hGfdDLL, GFDFunc *fpGFD, const char *exePath)
{
    // We have a problem when DLL path is longer than 512 chars; this needs to be fixed.
    wchar_t path[512];
    size_t returnValue;

    setlocale( LC_CTYPE, "" );
    mbstowcs_s(&returnValue, path, 512, exePath, 511);
    wcscat_s(path, 512, LIB_DLL_SHADERUTILS);

    *hShaderUtilDLL = LoadLibrary(path);
    if ( !*hShaderUtilDLL )
    {
        printf("Failed to load DLL %ws. Exiting.", path);
        FreeLibrary(*hShaderUtilDLL);
        exit(EC_LOADINGDLLFAILED);
    }

    // Get function
    fpGSH2->Initialize                    = reinterpret_cast<PGSH2Initialize>(GetProcAddress(*hShaderUtilDLL, "GSH2Initialize"));
    fpGSH2->Destroy                       = reinterpret_cast<PGSH2Destroy>(GetProcAddress(*hShaderUtilDLL, "GSH2Destroy"));
#if CAFE_OS_SDK_VERSION >= 21104
    fpGSH2->CompileProgram3               = reinterpret_cast<PGSH2CompileProgram3>(GetProcAddress(*hShaderUtilDLL, "GSH2CompileProgram3"));
#elif CAFE_OS_SDK_VERSION >= 21002
    fpGSH2->CompileProgram2               = reinterpret_cast<PGSH2CompileProgram2>(GetProcAddress(*hShaderUtilDLL, "GSH2CompileProgram2"));
#else
    fpGSH2->CompileProgram                = reinterpret_cast<PGSH2CompileProgram>(GetProcAddress(*hShaderUtilDLL, "GSH2CompileProgram"));
#endif
#if CAFE_OS_SDK_VERSION >= 21104
    fpGSH2->DestroyGX2Program3            = reinterpret_cast<PGSH2DestroyGX2Program3>(GetProcAddress(*hShaderUtilDLL, "GSH2DestroyGX2Program3"));
#else
    fpGSH2->DestroyGX2Program             = reinterpret_cast<PGSH2DestroyGX2Program>(GetProcAddress(*hShaderUtilDLL, "GSH2DestroyGX2Program"));
#endif
    fpGSH2->CalcFetchShaderSizeEx         = reinterpret_cast<PGSH2CalcFetchShaderSizeEx>(GetProcAddress(*hShaderUtilDLL, "GSH2CalcFetchShaderSizeEx"));
    fpGSH2->InitFetchShaderEx             = reinterpret_cast<PGSH2InitFetchShaderEx>(GetProcAddress(*hShaderUtilDLL, "GSH2InitFetchShaderEx"));
    fpGSH2->GetVertexShaderGPRs           = reinterpret_cast<PGSH2GetVertexShaderGPRs>(GetProcAddress(*hShaderUtilDLL, "GSH2GetVertexShaderGPRs"));
    fpGSH2->GetGeometryShaderGPRs         = reinterpret_cast<PGSH2GetGeometryShaderGPRs>(GetProcAddress(*hShaderUtilDLL, "GSH2GetGeometryShaderGPRs"));
    fpGSH2->GetPixelShaderGPRs            = reinterpret_cast<PGSH2GetPixelShaderGPRs>(GetProcAddress(*hShaderUtilDLL, "GSH2GetPixelShaderGPRs"));
    fpGSH2->GetVertexShaderStackEntries   = reinterpret_cast<PGSH2GetVertexShaderStackEntries>(GetProcAddress(*hShaderUtilDLL, "GSH2GetVertexShaderStackEntries"));
    fpGSH2->GetGeometryShaderStackEntries = reinterpret_cast<PGSH2GetGeometryShaderStackEntries>(GetProcAddress(*hShaderUtilDLL, "GSH2GetGeometryShaderStackEntries"));
    fpGSH2->GetPixelShaderStackEntries    = reinterpret_cast<PGSH2GetPixelShaderStackEntries>(GetProcAddress(*hShaderUtilDLL, "GSH2GetPixelShaderStackEntries"));
}

void FreeDLLs(HMODULE *hShaderUtilDLL, HMODULE *hGfdDLL)
{
    FreeLibrary(*hShaderUtilDLL);
    FreeLibrary(*hGfdDLL);
}

bool DeleteShaderSource(char** ppShaders)
{
    if (*ppShaders)
    {
        delete[] *ppShaders;
        *ppShaders = NULL;
    }

    return true;
}

bool gshAppendShaderSource(char* pShaderSrc, u32 fileLen, char** ppSource)
{
    u32 totalLen    = 0;
    u32 offset      = 0;

    if (!(*ppSource))
    {
        offset      = 0;
        totalLen    = fileLen + 1;
        *ppSource   = new char[totalLen];

        if (!(*ppSource))
        {
            return false;
        }

        *ppSource[0] = '\0';
    }
    else
    {
        offset      = (u32)(strlen(*ppSource) + 1);     // + 1 for appending "\n"
        totalLen    = offset + fileLen + 1;             // + 1 for appending "\0"
        char* pTemp = new char[totalLen];

        if (!pTemp)
        {
            return false;
        }

        assert(totalLen >= 2);
        strcat_s(pTemp, totalLen, (*ppSource));
        strcpy_s(pTemp, totalLen, "\n");
        delete [] (*ppSource);
        *ppSource = pTemp;
    }

    memcpy(((*ppSource) + offset), pShaderSrc, fileLen );
    (*ppSource)[totalLen - 1] = '\0';
    return true;
}

void DeleteSOVaryings(char** pSOVaryings, u32 numVaryings)
{
    if (pSOVaryings != NULL)
    {
        for (u32 idx = 0; idx < numVaryings; idx++)
        {
            delete[] (pSOVaryings[idx]);
        }
        delete [] (pSOVaryings);
    }
}

#define MIN_NUM_VARYINGS 50

char** gshGetStreamOutVaryings(char* pSOVaryingsData, u32 fileLen, u32* pNumSOVaryings)
{
    char varying[256];
    char** pSOVaryings = NULL;
    char** pTemp;
    u32 varyingsSize;
    u32 numVaryings = 0;
    u32 nData = 0;

    *pNumSOVaryings = 0;
    pSOVaryings = new char*[MIN_NUM_VARYINGS];
    varyingsSize = MIN_NUM_VARYINGS;

    if (pSOVaryings != NULL)
    {
        while (sscanf_s(&pSOVaryingsData[nData], "%s", varying, fileLen ) == 1)
        {
            size_t varyingLen;

            if (numVaryings >= varyingsSize)
            {
                varyingsSize += MIN_NUM_VARYINGS;
                pTemp = new char*[varyingsSize];

                if (pTemp == NULL)
                {
                    DeleteSOVaryings(pSOVaryings, numVaryings);
                    return NULL;
                }

                memcpy(pTemp, pSOVaryings, (numVaryings * sizeof(char *)));

                delete [] (pSOVaryings);

                pSOVaryings = pTemp;
            }
            varyingLen = strlen(varying) + 1;
            //varying[varyingLen - 2] = '\0';

            pSOVaryings[numVaryings] = new char[varyingLen];

            if (pSOVaryings[numVaryings] == NULL)
            {
                DeleteSOVaryings(pSOVaryings, numVaryings);
                return NULL;
            }

            strcpy_s(pSOVaryings[numVaryings], varyingLen, varying);

            numVaryings++;

            while (pSOVaryingsData[nData] != '\0' && pSOVaryingsData[nData] != '\n') ++nData;
            if (pSOVaryingsData[nData] == '\n') ++nData;
        }
    }

    *pNumSOVaryings = numVaryings;

    return pSOVaryings;
}

bool Destroy(ShaderSourceInfo* pConfig)
{
    DeleteShaderSource(&pConfig->vs_source);
    DeleteShaderSource(&pConfig->ps_source);
    DeleteShaderSource(&pConfig->gs_source);
    DeleteSOVaryings(pConfig->so_varyings, pConfig->numSOVaryings);

    return true;
}

void gshInitialize(const char *path, u32 numShader )
{
    // Loade DLLs
    LoadDLLs(&hShaderUtilDLL, &fpGSH2, &hGfdDLL, &fpGFD, path);

//    memset(&shaders, 0, sizeof(shaders));
    memset(&libSetup, 0, sizeof(libSetup));
    libSetup.gpu = DEFAULT_GPU;

    // Initialize shaderUtils lib
    if (!(hShaderUtils = fpGSH2.Initialize(&libSetup)))
    {
        FreeDLLs(&hShaderUtilDLL, &hGfdDLL);
        exit(EC_GSH2INITFAILED);
    }

    collectShader.Initialize( numShader );
}

void gshFinalize()
{
    if ( hShaderUtils != NULL ) {
        fpGSH2.Destroy(hShaderUtils);
        hShaderUtils = NULL;
    }

    FreeDLLs(&hShaderUtilDLL, &hGfdDLL);

    collectShader.~CollectShaderBinary();
}

void DumpShaderCode( char** pOutAsmCode, const char* pShaderDump )
{
    if (pShaderDump != NULL)
    {
        char* pTmp = NULL;
        pTmp = strstr( (char *)pShaderDump, "; --------  Disassembly" );
        pShaderDump = (pTmp != NULL) ? pTmp : pShaderDump;
        u32 codeSize = (u32)strlen( pShaderDump );
        *pOutAsmCode = (char *)malloc( codeSize + 1 );
        memcpy( *pOutAsmCode, pShaderDump, codeSize);
        (*pOutAsmCode)[codeSize] = '\0';
    }
    else
    {
        *pOutAsmCode = NULL;
    }
}

#if CAFE_OS_SDK_VERSION >= 21104
s32 gshCompile(
    char*           vshader,
    u32             vsize,
    char*           fshader,
    u32             fsize,
    char*           gshader,
    u32             gsize,
    char*           sodata,
    u32             sosize,
    char*           cshader,
    u32             csize,
    char*           outfile,
    ShaderTable*    shaderTbl,
    ShaderAsmTable* asmCodeTable )
#else
s32 gshCompile(
    char*           vshader,
    u32             vsize,
    char*           fshader,
    u32             fsize,
    char*           gshader,
    u32             gsize,
    char*           sodata,
    u32             sosize,
    char*           outfile,
    ShaderTable*    shaderTbl,
    ShaderAsmTable* asmCodeTable )

#endif
{
#if CAFE_OS_SDK_VERSION >= 21104
    GFDShaders2             shaders;
#else
    GFDShaders              shaders;
#endif
    memset(&shaders, 0, sizeof(shaders));

    s32         retCode     = EC_SUCCESS;
    const char* pInfoLog    = NULL;

#if CAFE_OS_SDK_VERSION >= 21104
    GSH2CompileSetup3   compileSetup;
    GSH2CompileOutput3 compileOutput;
#elif CAFE_OS_SDK_VERSION >= 21002
    GSH2CompileSetup2   compileSetup;
    GSH2CompileOutput  compileOutput;
#else
    GSH2CompileSetup   compileSetup;
    GSH2CompileOutput  compileOutput;
#endif

    ShaderSourceInfo   sourceInfo;

    memset(&compileOutput, 0, sizeof(compileOutput));
    memset(&compileSetup, 0, sizeof(compileSetup));
    memset(&sourceInfo,0,sizeof(sourceInfo));

#if CAFE_OS_SDK_VERSION >= 21104
    compileSetup.abi_version             = GSH2_ABI_VERSION3;
    compileOutput.abi_version            = GSH2_ABI_VERSION3;
    compileOutput.gx2Program.abi_version = GSH2_ABI_VERSION3;
#endif

    // initalize defaults
    sourceInfo.pOutFilename       = DEFAULT_OUTFILENAME;
    sourceInfo.gpu                = DEFAULT_GPU;
    sourceInfo.lang               = DEFAULT_SHADINGLANGUAGE;
    sourceInfo.forceUniformBlock  = DEFAULT_FORCEUNIFORMBLOCK;
    sourceInfo.outfilealign       = true;
//    sourceInfo.outfilealign       = DEFAULT_ALIGNMODE;
    sourceInfo.append             = DEFAULT_APPENDMODE;
    sourceInfo.ascode             = DEFAULT_ASCODEMODE;
    sourceInfo.endianbugfix       = DEFAULT_ENDIANBUGFIX;
    sourceInfo.dumpShaders        = (asmCodeTable != NULL) ? 1 : 0;
#if CAFE_OS_SDK_VERSION >= 21002
    // GUIDでの識別をしない為、出力は行わない。
    sourceInfo.noSparkInfo        = true;
    sourceInfo.optimizeFlags      = GSH2_OPTFLAGS_NONE;
#endif

    if (!gshAppendShaderSource(vshader, vsize, &sourceInfo.vs_source))
    {
        printf("Can not set vertex shader code\n");
        return (EC_INPUTFILEERROR);
    }

    if (!gshAppendShaderSource(fshader, fsize, &sourceInfo.ps_source))
    {
        printf("Can not set pixel shader code\n");
        return (EC_INPUTFILEERROR);
    }

    if ( gshader && (gsize != 0) )
    {
        if (!gshAppendShaderSource(gshader, gsize, &sourceInfo.gs_source))
        {
    		printf("Can not set geometry shader code\n");
    		return (EC_INPUTFILEERROR);
    	}
    }

    if ( sodata && (sosize != 0) )
    {
        sourceInfo.so_varyings = gshGetStreamOutVaryings(sodata, sosize, &sourceInfo.numSOVaryings);
        if (sourceInfo.so_varyings == NULL)
        {
    		printf("Can not set stream out varyings\n");
    		return (EC_INPUTFILEERROR);
        }
    }
#if CAFE_OS_SDK_VERSION >= 21104
    if ( cshader && (csize != 0) )
    {
        if (!gshAppendShaderSource(cshader, csize, &sourceInfo.cs_source))
        {
    		printf("Can not set compute shader code\n");
    		return (EC_INPUTFILEERROR);
    	}
    }

    if (sourceInfo.cs_source)
    {
        if (compileSetup.vs_source || compileSetup.ps_source || compileSetup.gs_source)
        {
            fprintf(stderr, "Error: compute shader must be compiled on its own.\n");
    		return (EC_INPUTFILEERROR);
        }
    }
#endif
    if (outfile) sourceInfo.pOutFilename = outfile;

    // Compile shaders
    compileSetup.vs_source                  = sourceInfo.vs_source;
    compileSetup.ps_source                  = sourceInfo.ps_source;
    compileSetup.gs_source                  = sourceInfo.gs_source;
    compileSetup.so_varyings                = (const char**)sourceInfo.so_varyings;
#if CAFE_OS_SDK_VERSION >= 21104
    compileSetup.cs_source                  = sourceInfo.cs_source;
#endif
    compileSetup.numSOVaryings              = sourceInfo.numSOVaryings;
    compileSetup.lang                       = sourceInfo.lang;
    compileSetup.options.dumpShaders        = (sourceInfo.dumpShaders) ? 1 : 0;
    compileSetup.options.forceUniformBlock  = (sourceInfo.forceUniformBlock) ? 1 : 0;
#if CAFE_OS_SDK_VERSION >= 21002
    compileSetup.options.optimize           = (sourceInfo.optimizeFlags != 0) ? 1 : 0;
    compileSetup.options.skipSparkDebug     = (sourceInfo.noSparkInfo) ? 1 : 0;
    compileSetup.options.optFlags			= 1;
    compileSetup.optimizeFlags				= sourceInfo.optimizeFlags;
    #if CAFE_OS_SDK_VERSION >= 21104
    GSH2_OPTFLAG_SETBIT(compileSetup.optimizeFlags, GSH2_OPTFLAG_LIMITARRAYSYMS);
    #endif

    compileSetup.vs_source_filename         = (const char*)"";
    compileSetup.ps_source_filename         = (const char*)"";
    compileSetup.gs_source_filename         = (const char*)"";
    compileSetup.spark_output_dir           = (const char*)NULL;
#endif

#if CAFE_OS_SDK_VERSION >= 21104
    if (fpGSH2.CompileProgram3(hShaderUtils, &compileSetup, &compileOutput))
#elif CAFE_OS_SDK_VERSION >= 21002
    if (fpGSH2.CompileProgram2(hShaderUtils, &compileSetup, &compileOutput))
#else
    if (fpGSH2.CompileProgram(hShaderUtils, &compileSetup, &compileOutput))
#endif
    {
        #if CAFE_OS_SDK_VERSION >= 21104
        shaders.abiVersion = GFD_DLL_ABI_VERSION;
        #endif
        if (sourceInfo.vs_source) {
            shaders.pVertexShader = &compileOutput.gx2Program.vs;
        }
        if (sourceInfo.ps_source) {
            shaders.pPixelShader = &compileOutput.gx2Program.ps;
        }
        if (sourceInfo.gs_source) {
            shaders.pGeometryShader = &compileOutput.gx2Program.gs;
        }

        if ( (sourceInfo.dumpShaders) && ( asmCodeTable != NULL ) )
        {
            DumpShaderCode( &asmCodeTable->vertexAsmCode, compileOutput.pVSDump );

            DumpShaderCode( &asmCodeTable->GeometryAsmCode, compileOutput.pGSDump );

            DumpShaderCode( &asmCodeTable->fragmentAsmCode, compileOutput.pPSDump );
        }

        #if CAFE_OS_SDK_VERSION >= 21002
        // 頂点シェーダバイナリの最後についている16byteのguidを取得。
        if ( !compileSetup.options.skipSparkDebug )
        {
            GX2VertexShader* vshader = &compileOutput.gx2Program.vs;
            void* dstPtr = (void*)((u64)vshader->shaderPtr+(vshader->shaderSize-16));
            memcpy( (void *)&shaderTbl->shaderID, dstPtr, 16 );
        }
        #endif

        // 頂点、フラグメントシェーダの重複チェックとIDを取得する
        GX2VertexShader* vsh = GFDCopyBlockVSH(shaders.pVertexShader);
        GX2PixelShader*  fsh = GFDCopyBlockPSH(shaders.pPixelShader);

        // streamOutの時は、一番最後に設定する
        if ( sodata != NULL && sosize > 0 )
        {
            collectShader.SetVertexShader( collectShader.GetVertexShaderCnt(), vsh );
            collectShader.SetPixelShader(  collectShader.GetPixelShaderCnt(), fsh );

            shaderTbl->SetVertexShaderId(  collectShader.GetVertexShaderCnt() );
            shaderTbl->SetPixelShaderId(   collectShader.GetPixelShaderCnt() );
            collectShader.updateShaderCnt();
        }
        else  //　通常のシェーダの場合
        {
            shaderTbl->SetVertexShaderId( collectShader.CheckDupVertexShader( vsh ) );
            shaderTbl->SetPixelShaderId(  collectShader.CheckDupPixelShader( fsh ) );
        }
    }
    else
    {
        retCode = EC_COMPILEFAILED;
    }


    // we expect to have warning/error message in infoLog
    if ( compileOutput.pInfoLog  )
    {
        shaderTbl->SetLog(compileOutput.pInfoLog);
        if ( retCode == EC_SUCCESS ) retCode = EC_OUTPUTWARNING;
    }

    // Cleanup
#if CAFE_OS_SDK_VERSION >= 21104
    fpGSH2.DestroyGX2Program3(hShaderUtils, &compileOutput.gx2Program);
#else
    fpGSH2.DestroyGX2Program(hShaderUtils, &compileOutput.gx2Program);
#endif
    Destroy(&sourceInfo);

    return retCode;
}


//---------------------------------------------------------------------------
// gshシェーダコンパイルをマルチスレッドで行います。
//---------------------------------------------------------------------------
#if 1
void FreeDLLsMulti( HMODULE *hShaderUtilDLL )
{
    FreeLibrary(*hShaderUtilDLL);
}

void CopyDlls( u32 numDll,  const char *exePath, const char *multiPath )
{
    g_DllNameArray = new wchar_t *[numDll];

    wchar_t  wchExePath[512];
    wchar_t  wchMultiPath[512];
    wchar_t  srcPath[512];
    size_t  returnValue;
    setlocale( LC_CTYPE, "" );
    mbstowcs_s(&returnValue, wchExePath, 512, exePath, 511);
    mbstowcs_s(&returnValue, wchMultiPath, 512, multiPath, 511);
    wchar_t dllName[16];

    // 拡張子を省いたファイル名の部分を取得
    _wsplitpath( (wchar_t *)&LIB_DLL_SHADERUTILS, NULL, NULL, (wchar_t *)&dllName, NULL );

    swprintf_s(srcPath, 512, L"%s%s.dll", wchExePath, dllName );

    for ( u32 i = 0; i < numDll; i++ )
    {
        // 生成したshaderUtils.dll名前を設定する
        wchar_t dllFileName[64];
        swprintf_s( (wchar_t *)&dllFileName, 64, L"%s_%02d.dll", dllName, i );
        g_DllNameArray[i] = new wchar_t[64];
        swprintf_s( g_DllNameArray[i], 64, L"%s", dllFileName );

        wchar_t  dstPath[512];
        // 生成したDLL名にパスをつける
        swprintf_s(dstPath, 512, L"%s%s", wchMultiPath, dllFileName);

        // shaderUtils.dllを新しい名前でコピーする
        if ( CopyFile( srcPath, dstPath, false ) == 0 )
        {
            DWORD err = GetLastError();
            printf("Warnning: 0x%x Can't copy\n", err);
        }
    }
}

void LoadDLLsMulti(HMODULE* hShaderUtilDLL, GSH2Func *fpGSH2, const char *exePath, u32 loadIdx = 0)
{
    // We have a problem when DLL path is longer than 512 chars; this needs to be fixed.
    wchar_t path[512];
    size_t returnValue;
    setlocale( LC_CTYPE, "" );
    mbstowcs_s(&returnValue, path, 512, exePath, 511);
    wcscat_s(path, 512, g_DllNameArray[loadIdx]);
  //    wcscat_s(path, 512, LIB_DLL_SHADERUTILS);

    *hShaderUtilDLL  = LoadLibrary(path);

    if ( !*hShaderUtilDLL )
    {
        DWORD err = GetLastError();
        printf( "err: 0x%x Failed to load DLL %ws\n", err, path );
        FreeLibrary(*hShaderUtilDLL);
        exit(EC_LOADINGDLLFAILED);
    }

    // Get function
    fpGSH2->Initialize                    = reinterpret_cast<PGSH2Initialize>(GetProcAddress(*hShaderUtilDLL, "GSH2Initialize"));
    fpGSH2->Destroy                       = reinterpret_cast<PGSH2Destroy>(GetProcAddress(*hShaderUtilDLL, "GSH2Destroy"));
#if CAFE_OS_SDK_VERSION >= 21104
    fpGSH2->CompileProgram3               = reinterpret_cast<PGSH2CompileProgram3>(GetProcAddress(*hShaderUtilDLL, "GSH2CompileProgram3"));
#elif CAFE_OS_SDK_VERSION >= 21002
    fpGSH2->CompileProgram2               = reinterpret_cast<PGSH2CompileProgram2>(GetProcAddress(*hShaderUtilDLL, "GSH2CompileProgram2"));
#else
    fpGSH2->CompileProgram                = reinterpret_cast<PGSH2CompileProgram>(GetProcAddress(*hShaderUtilDLL, "GSH2CompileProgram"));
#endif
#if CAFE_OS_SDK_VERSION >= 21104
    fpGSH2->DestroyGX2Program3            = reinterpret_cast<PGSH2DestroyGX2Program3>(GetProcAddress(*hShaderUtilDLL, "GSH2DestroyGX2Program3"));
#else
    fpGSH2->DestroyGX2Program             = reinterpret_cast<PGSH2DestroyGX2Program>(GetProcAddress(*hShaderUtilDLL, "GSH2DestroyGX2Program"));
#endif
}

void gshInitializMulti( const char *multiPath, const char *exePath, int jobsNumber )
{
    // コピー先ディレクトリを作成
    {
        wchar_t path[512];
        size_t returnValue;
        setlocale( LC_CTYPE, "" );
        mbstowcs_s(&returnValue, path, 512, multiPath, 511);

        CreateDirectory(path, NULL);
    }

    g_NumThreads     = jobsNumber;
   CopyDlls( g_NumThreads, exePath, multiPath );

    g_ShaderUtilDLL = (HMODULE *) malloc ( sizeof(HMODULE) * g_NumThreads );
    g_FpGSH2         = (GSH2Func *) malloc( sizeof(GSH2Func) * g_NumThreads );
    g_HShaderUtils   = (GSH2Handle *)malloc( sizeof(GSH2Handle) * g_NumThreads );

    GSH2Setup    libSetup;
    memset(&libSetup, 0, sizeof(libSetup));
    libSetup.gpu   = DEFAULT_GPU;

    for ( u32 i = 0; i < g_NumThreads; i++ )
    {
        LoadDLLsMulti( &g_ShaderUtilDLL[i], &g_FpGSH2[i], multiPath, i );
    	if ( !( g_HShaderUtils[i] = g_FpGSH2[i].Initialize( &libSetup) ) )
    	{
    		FreeDLLsMulti( &g_ShaderUtilDLL[i] );
    	}
    }
}

#if CAFE_OS_SDK_VERSION >= 21104
s32 gshCompileMulti(
    char*           vshader,
    u32             vsize,
    char*           fshader,
    u32             fsize,
    char*           gshader,
    u32             gsize,
    char*           sodata,
    u32             sosize,
    char*           cshader,
    u32             csize,
    char*           outfile,
    ShaderTable*    shaderTbl,
    ShaderAsmTable* asmCodeTable )
#else
s32 gshCompileMulti(
    char*           vshader,
    u32             vsize,
    char*           fshader,
    u32             fsize,
    char*           gshader,
    u32             gsize,
    char*           sodata,
    u32             sosize,
    char*           outfile,
    ShaderTable*    shaderTbl,
    ShaderAsmTable* asmCodeTable )

#endif
{
    s32         retCode     = EC_COMPILEFAILED;
    const char* pInfoLog    = NULL;

#if CAFE_OS_SDK_VERSION >= 21104
    GSH2CompileSetup3   compileSetup;
    GSH2CompileOutput3 compileOutput;
#elif CAFE_OS_SDK_VERSION >= 21002
    GSH2CompileSetup2   compileSetup;
    GSH2CompileOutput  compileOutput;
#else
    GSH2CompileSetup   compileSetup;
    GSH2CompileOutput  compileOutput;
#endif

    ShaderSourceInfo   sourceInfo;

    memset(&compileOutput, 0, sizeof(compileOutput));
    memset(&compileSetup, 0, sizeof(compileSetup));
    memset(&sourceInfo,0,sizeof(sourceInfo));

#if CAFE_OS_SDK_VERSION >= 21104
    compileSetup.abi_version             = GSH2_ABI_VERSION3;
    compileOutput.abi_version            = GSH2_ABI_VERSION3;
    compileOutput.gx2Program.abi_version = GSH2_ABI_VERSION3;
#endif

    // initalize defaults
 //   sourceInfo.pOutFilename       = DEFAULT_OUTFILENAME;
 //   sourceInfo.gpu                = DEFAULT_GPU;
 //   sourceInfo.lang               = DEFAULT_SHADINGLANGUAGE;
 //   sourceInfo.forceUniformBlock  = DEFAULT_FORCEUNIFORMBLOCK;
 //   sourceInfo.outfilealign       = DEFAULT_ALIGNMODE;
 //   sourceInfo.append             = DEFAULT_APPENDMODE;
 //   sourceInfo.ascode             = DEFAULT_ASCODEMODE;
 //   sourceInfo.endianbugfix       = DEFAULT_ENDIANBUGFIX;
//    sourceInfo.dumpShaders        = (asmCodeTable != NULL) ? 1 : 0;
#if CAFE_OS_SDK_VERSION >= 21002
    // GUIDでの識別をしない為、出力は行わない。
//    sourceInfo.noSparkInfo        = true;
//    sourceInfo.optimizeFlags      = GSH2_OPTFLAGS_NONE;
#endif

    if (!gshAppendShaderSource(vshader, vsize, &sourceInfo.vs_source))
    {
        printf("Can not set vertex shader code\n");
        return (EC_INPUTFILEERROR);
    }

    if (!gshAppendShaderSource(fshader, fsize, &sourceInfo.ps_source))
    {
        printf("Can not set pixel shader code\n");
        return (EC_INPUTFILEERROR);
    }

    if ( gshader && (gsize != 0) )
    {
        if (!gshAppendShaderSource(gshader, gsize, &sourceInfo.gs_source))
        {
    		printf("Can not set geometry shader code\n");
    		return (EC_INPUTFILEERROR);
    	}
    }

    if ( sodata && (sosize != 0) )
    {
        sourceInfo.so_varyings = gshGetStreamOutVaryings(sodata, sosize, &compileSetup.numSOVaryings);
        if (sourceInfo.so_varyings == NULL)
        {
    		printf("Can not set stream out varyings\n");
    		return (EC_INPUTFILEERROR);
        }
    }
#if CAFE_OS_SDK_VERSION >= 21104
    if ( cshader && (csize != 0) )
    {
        if (!gshAppendShaderSource(cshader, csize, &sourceInfo.cs_source))
        {
    		printf("Can not set compute shader code\n");
    		return (EC_INPUTFILEERROR);
    	}
    }

    if (sourceInfo.cs_source)
    {
        if (compileSetup.vs_source || compileSetup.ps_source || compileSetup.gs_source)
        {
            fprintf(stderr, "Error: compute shader must be compiled on its own.\n");
    		return (EC_INPUTFILEERROR);
        }
    }
#endif
    if (outfile) sourceInfo.pOutFilename = outfile;

    // Compile shaders
    compileSetup.vs_source                  = sourceInfo.vs_source;
    compileSetup.ps_source                  = sourceInfo.ps_source;
    compileSetup.gs_source                  = sourceInfo.gs_source;
    compileSetup.so_varyings                = (const char**)sourceInfo.so_varyings;
#if CAFE_OS_SDK_VERSION >= 21104
    compileSetup.cs_source                  = sourceInfo.cs_source;
#endif
 //   compileSetup.numSOVaryings              = sourceInfo.numSOVaryings;
    compileSetup.lang                       = DEFAULT_SHADINGLANGUAGE;
    compileSetup.options.dumpShaders        = (asmCodeTable != NULL) ? 1 : 0;
    compileSetup.options.forceUniformBlock  = DEFAULT_FORCEUNIFORMBLOCK;
#if CAFE_OS_SDK_VERSION >= 21002
    compileSetup.options.optimize           = (GSH2_OPTFLAGS_NONE != 0) ? 1 : 0;
    compileSetup.options.skipSparkDebug     = 1;
    compileSetup.options.optFlags			= 1;
    compileSetup.optimizeFlags				= GSH2_OPTFLAGS_NONE;
    #if CAFE_OS_SDK_VERSION >= 21104
    GSH2_OPTFLAG_SETBIT(compileSetup.optimizeFlags, GSH2_OPTFLAG_LIMITARRAYSYMS);
    #endif

    compileSetup.vs_source_filename         = (const char*)"";
    compileSetup.ps_source_filename         = (const char*)"";
    compileSetup.gs_source_filename         = (const char*)"";
    compileSetup.spark_output_dir           = (const char*)NULL;
#endif

    u32 tidx = omp_get_thread_num();

#if CAFE_OS_SDK_VERSION >= 21104
    if ( g_FpGSH2[tidx].CompileProgram3( g_HShaderUtils[tidx], &compileSetup, &compileOutput ) )
#elif CAFE_OS_SDK_VERSION >= 21002
    if (g_FpGSH2[tidx].CompileProgram2(g_HShaderUtils[tidx], &compileSetup, &compileOutput))
#else
    if (g_FpGSH2[tidx].CompileProgram(g_HShaderUtils[tidx], &compileSetup, &compileOutput))
#endif
    {
        #if CAFE_OS_SDK_VERSION >= 21002
        // 頂点シェーダバイナリの最後についている16byteのguidを取得。
        if ( !compileSetup.options.skipSparkDebug )
        {
            GX2VertexShader* vshader = &compileOutput.gx2Program.vs;
            void* dstPtr = (void*)((u64)vshader->shaderPtr+(vshader->shaderSize-16));
            memcpy( (void *)&shaderTbl->shaderID, dstPtr, 16 );
        }
        #endif

       // 頂点、フラグメントシェーダの重複チェックとIDを取得する
        GX2VertexShader* vsh = GFDCopyBlockVSH(&compileOutput.gx2Program.vs);
        GX2PixelShader*  psh = GFDCopyBlockPSH(&compileOutput.gx2Program.ps);
        if (vsh && psh)
        {
            shaderTbl->SetVertexShader(vsh);
            shaderTbl->SetPixelShader(psh);
            retCode = EC_SUCCESS;
        }
    }
    else
    {
        retCode = EC_COMPILEFAILED;
    }

    // we expect to have warning/error message in infoLog
    if ( compileOutput.pInfoLog  )
    {
        shaderTbl->SetLog(compileOutput.pInfoLog);
        if ( retCode == EC_SUCCESS ) retCode = EC_OUTPUTWARNING;
    }

    // Cleanup
#if CAFE_OS_SDK_VERSION >= 21104
    g_FpGSH2[tidx].DestroyGX2Program3(g_HShaderUtils[tidx], &compileOutput.gx2Program);
#else
    g_FpGSH2[tidx].DestroyGX2Program(g_HShaderUtils[tidx], &compileOutput.gx2Program);
#endif
    Destroy(&sourceInfo);

    return retCode;
}

void gshRemoveDuplicateShader( ShaderTable* shaderTbl, u32 numShaders )
{
    for ( u32 i = 0; i < numShaders; i++ )
    {
        if ( shaderTbl[i].GetVertexShader() != NULL )
        {
            shaderTbl[i].SetVertexShaderId( collectShader.CheckDupVertexShader( shaderTbl[i].GetVertexShader() ) );
        }
        if ( shaderTbl[i].GetPixelShader() != NULL )
        {
            shaderTbl[i].SetPixelShaderId(  collectShader.CheckDupPixelShader( shaderTbl[i].GetPixelShader()  ) );
        }
    }
}

void DeleteDlls( const char *exePath )
{
    for ( u32 i = 0; i < g_NumThreads; i++ )
    {
        wchar_t path[512];
        size_t returnValue;
        setlocale( LC_CTYPE, "" );
        mbstowcs_s(&returnValue, path, 512, exePath, 511);
        wcscat_s(path, 512, g_DllNameArray[i] );

        DeleteFile( path );
    }
}

void gshFinalizeMulti( const char *multiPath )
{
    for ( u32 i = 0; i < g_NumThreads; i++ )
    {
        FreeDLLsMulti( &g_ShaderUtilDLL[i] );
    }
    DeleteDlls( multiPath );

    {
        wchar_t path[512];
        size_t returnValue;
        setlocale( LC_CTYPE, "" );
        mbstowcs_s(&returnValue, path, 512, multiPath, 511);

        RemoveDirectory(path);
    }
}
#endif

//---------------------------------------------------------------------------
// gshシェーダバイナリを生成します。
//---------------------------------------------------------------------------
void gshGenerateShaderBinary()
{
    collectShader.GenerateGsh();
}

//---------------------------------------------------------------------------
// gshシェーダバイナリのサイズを取得します。
//---------------------------------------------------------------------------
u32 gshGetgshBinarySize()
{
    return collectShader.GetGshBinarySize();
}

//---------------------------------------------------------------------------
// gshシェーダバイナリを取得します。
//---------------------------------------------------------------------------
char*  gshGetgshBinary()
{
    return collectShader.GetGshBinary();
}

//---------------------------------------------------------------------------
// 頂点シェーダ構造体をPUSHします。
//---------------------------------------------------------------------------
u32  gshPushVertexShader( GX2VertexShader* vsh )
{
    return collectShader.PushVertexShader( vsh );
}

//---------------------------------------------------------------------------
// gshシェーダバイナリを取得します。
//---------------------------------------------------------------------------
u32  gshPushPixelShader( GX2PixelShader* fsh )
{
    return collectShader.PushPixelShader( fsh );
}

//---------------------------------------------------------------------------
// srcとdst頂点シェーダが同じかどうか確認します。
//---------------------------------------------------------------------------
bool CollectShaderBinary::CheckVertexShader( GX2VertexShader* srcShader,  GX2VertexShader* dstShader )
{
    if ( srcShader->shaderSize == dstShader->shaderSize )
    {
        // shader binaryのチェック
        if ( memcmp( (void *)srcShader->shaderPtr, (void *)dstShader->shaderPtr, srcShader->shaderSize ) != 0 )
            return true;

        // GX2VertexShder構造体のチェック
        if ( ( srcShader->shaderMode     == dstShader->shaderMode       ) &&
           ( srcShader->numUniformBlocks == dstShader->numUniformBlocks ) &&
           ( srcShader->numUniforms      == dstShader->numUniforms      ) &&
           ( srcShader->numInitialValues == dstShader->numInitialValues ) &&
           ( srcShader->_numLoops        == dstShader->_numLoops        ) &&
           ( srcShader->numSamplers      == dstShader->numSamplers      ) &&
           ( srcShader->numAttribs       == dstShader->numAttribs       ) &&
           ( srcShader->ringItemsize     == dstShader->ringItemsize     ) &&
           ( srcShader->hasStreamOut     == dstShader->hasStreamOut     ) )
        {
            // regsのチェック
            if ( memcmp( srcShader->_regs, dstShader->_regs, GX2_NUM_VERTEX_SHADER_REGISTERS ) != 0 )
            {
               return true;
            }

            // uniformBlocksのチェック
            for ( u32 cnt = 0; cnt < srcShader->numUniformBlocks; cnt++ )
            {
                if ( ( srcShader->uniformBlocks[cnt].size     == dstShader->uniformBlocks[cnt].size     ) &&
                     ( srcShader->uniformBlocks[cnt].location == dstShader->uniformBlocks[cnt].location ) )
                {
                    if ( memcmp( srcShader->uniformBlocks[cnt].name, dstShader->uniformBlocks[cnt].name, strlen(dstShader->uniformBlocks[cnt].name) ) != 0 )
                    {
                        return true;
                    }
                }
                else
                {
                    return true;
                }
            }

            // uniformVarsのチェック
            for ( u32 cnt = 0; cnt < srcShader->numUniforms; cnt++ )
            {
                if ( ( srcShader->uniformVars[cnt].type       == dstShader->uniformVars[cnt].type ) &&
                     ( srcShader->uniformVars[cnt].arrayCount == dstShader->uniformVars[cnt].arrayCount ) &&
                     ( srcShader->uniformVars[cnt].blockIndex == dstShader->uniformVars[cnt].blockIndex ) )
                {
                    if ( memcmp( srcShader->uniformVars[cnt].name, dstShader->uniformVars[cnt].name, strlen(dstShader->uniformVars[cnt].name) ) != 0 )
                    {
                        return true;
                    }
                }
                else
                {
                    return true;
                }
            }

            // initialValuesのチェック
            for ( u32 cnt = 0; cnt < srcShader->numInitialValues; cnt++ )
            {
                if ( ( srcShader->initialValues[cnt].value[0] != dstShader->initialValues[cnt].value[0] ) ||
                     ( srcShader->initialValues[cnt].value[1] != dstShader->initialValues[cnt].value[1] ) ||
                     ( srcShader->initialValues[cnt].value[2] != dstShader->initialValues[cnt].value[2] ) ||
                     ( srcShader->initialValues[cnt].value[3] != dstShader->initialValues[cnt].value[3] ) ||
                     ( srcShader->initialValues[cnt].offset   != dstShader->initialValues[cnt].offset ) )
                {
                    return true;
                }
            }

            // _loopVarsのチェック
            if ( srcShader->_numLoops != 0 )
            {
                if (memcmp( srcShader->_loopVars, dstShader->_loopVars, dstShader->_numLoops * sizeof(GFDLoopVar) ) != 0 )
                {
                    return true;
                }
            }

            // samplerVarsのチェック
            for ( u32 cnt = 0; cnt < srcShader->numSamplers; cnt++ )
            {
               if ( ( srcShader->samplerVars[cnt].type     == dstShader->samplerVars[cnt].type ) &&
                    ( srcShader->samplerVars[cnt].location == dstShader->samplerVars[cnt].location ) )
                {
                    if ( memcmp( srcShader->samplerVars[cnt].name, dstShader->samplerVars[cnt].name, strlen(dstShader->samplerVars[cnt].name )) != 0 )
                    {
                        return true;
                    }
                }
                else
                {
                    return true;
                }
            }

            // attribVarsのチェック
            for ( u32 cnt = 0; cnt < srcShader->numAttribs; cnt++ )
            {
                if ( ( srcShader->attribVars[cnt].type       == dstShader->attribVars[cnt].type ) &&
                     ( srcShader->attribVars[cnt].arrayCount == dstShader->attribVars[cnt].arrayCount ) &&
                     ( srcShader->attribVars[cnt].location   == dstShader->attribVars[cnt].location ) )
                {
                    if ( memcmp( srcShader->attribVars[cnt].name, dstShader->attribVars[cnt].name, strlen(dstShader->uniformVars[cnt].name) ) != 0 )
                    {
                        return true;
                    }
                }
                else
                {
                    return true;
                }
            }

            // streamOutVertexStrideのチェック
            if ( memcmp( srcShader->streamOutVertexStride, dstShader->streamOutVertexStride, GX2_MAX_STREAMOUT_BUFFERS ) != 0 )
            {
                return true;
            }

            // shaderProgramのチェック
            if ( ( srcShader->shaderProgram.resourceFlags != dstShader->shaderProgram.resourceFlags ) ||
                 ( srcShader->shaderProgram.elementSize   != dstShader->shaderProgram.elementSize ) ||
                 ( srcShader->shaderProgram.elementCount  != dstShader->shaderProgram.elementCount ) )
            {
                return true;
            }
        }
        else
        {
            return true;
        }
    }
    else
    {
        return true;
    }

    return false;
}

//---------------------------------------------------------------------------
// srcとdstピクセルシェーダが同じかどうか確認します。
//---------------------------------------------------------------------------
bool CollectShaderBinary::CheckPixelShader( GX2PixelShader* srcShader,  GX2PixelShader* dstShader )
{
    if ( srcShader->shaderSize == dstShader->shaderSize )
    {
        // shader binaryのチェック
        if ( memcmp( (void *)srcShader->shaderPtr, (void *)dstShader->shaderPtr, srcShader->shaderSize ) != 0 )
            return true;

        // GX2PixelShader構造体のチェック
        if ( ( srcShader->shaderMode     == dstShader->shaderMode       ) &&
           ( srcShader->numUniformBlocks == dstShader->numUniformBlocks ) &&
           ( srcShader->numUniforms      == dstShader->numUniforms      ) &&
           ( srcShader->numInitialValues == dstShader->numInitialValues ) &&
           ( srcShader->_numLoops        == dstShader->_numLoops        ) &&
           ( srcShader->numSamplers      == dstShader->numSamplers      ) )
        {
            // regsのチェック
            if ( memcmp( srcShader->_regs, dstShader->_regs, GX2_NUM_VERTEX_SHADER_REGISTERS ) != 0 )
            {
               return true;
            }
             // uniformBlocksのチェック
            for ( u32 cnt = 0; cnt < srcShader->numUniformBlocks; cnt++ )
            {
                if ( ( srcShader->uniformBlocks[cnt].size     == dstShader->uniformBlocks[cnt].size ) &&
                     ( srcShader->uniformBlocks[cnt].location == dstShader->uniformBlocks[cnt].location ) )
                {
                    if ( memcmp( srcShader->uniformBlocks[cnt].name, dstShader->uniformBlocks[cnt].name, strlen(dstShader->uniformBlocks[cnt].name) ) != 0 )
                    {
                        return true;
                    }
                }
            }

            // uniformVarsのチェック
            for ( u32 cnt = 0; cnt < srcShader->numUniforms; cnt++ )
            {
                if ( ( srcShader->uniformVars[cnt].type       == dstShader->uniformVars[cnt].type ) &&
                     ( srcShader->uniformVars[cnt].arrayCount == dstShader->uniformVars[cnt].arrayCount ) &&
                     ( srcShader->uniformVars[cnt].blockIndex == dstShader->uniformVars[cnt].blockIndex ) )
                {
                    if ( memcmp( srcShader->uniformVars[cnt].name, dstShader->uniformVars[cnt].name, strlen(dstShader->uniformVars[cnt].name) ) != 0 )
                    {
                        return true;
                    }
                }
            }

            // initialValuesのチェック
            for ( u32 cnt = 0; cnt < srcShader->numInitialValues; cnt++ )
            {
                if ( ( srcShader->initialValues[cnt].value[0] != dstShader->initialValues[cnt].value[0] ) ||
                     ( srcShader->initialValues[cnt].value[1] != dstShader->initialValues[cnt].value[1] ) ||
                     ( srcShader->initialValues[cnt].value[2] != dstShader->initialValues[cnt].value[2] ) ||
                     ( srcShader->initialValues[cnt].value[3] != dstShader->initialValues[cnt].value[3] ) ||
                     ( srcShader->initialValues[cnt].offset != dstShader->initialValues[cnt].offset ) )
                {
                    return true;
                }
            }

            // _loopVarsのチェック
            if ( srcShader->_numLoops != 0 )
            {
                if (memcmp( srcShader->_loopVars, dstShader->_loopVars, dstShader->_numLoops * sizeof(GFDLoopVar) ) != 0 )
                {
                     return true;
                }
            }

            // samplerVarsのチェック
            for ( u32 cnt = 0; cnt < srcShader->numSamplers; cnt++ )
            {
               if ( ( srcShader->samplerVars[cnt].type     == dstShader->samplerVars[cnt].type ) &&
                    ( srcShader->samplerVars[cnt].location == dstShader->samplerVars[cnt].location ) )
                {
                    if ( memcmp( srcShader->samplerVars[cnt].name, dstShader->samplerVars[cnt].name, strlen(dstShader->samplerVars[cnt].name )) != 0 )
                    {
                        return true;
                    }
                }
            }

            // shaderProgramのチェック
            if ( ( srcShader->shaderProgram.resourceFlags != dstShader->shaderProgram.resourceFlags ) ||
                 ( srcShader->shaderProgram.elementSize   != dstShader->shaderProgram.elementSize ) ||
                 ( srcShader->shaderProgram.elementCount  != dstShader->shaderProgram.elementCount ) )
            {
                return true;
            }
        }
        else
        {
            return true;
        }
    }
    else
    {
        return true;
    }

    return false;
}

//---------------------------------------------------------------------------
// 頂点シェーダの重複を確認します。重複していない場合は追加してindexを返します。
//---------------------------------------------------------------------------
u32 CollectShaderBinary::CheckDupVertexShader( GX2VertexShader*   vertexShader )
{
    bool isFindVer  = false;

    for ( u32 i = 0; i < mVertexCnt; i++ )
    {
        // チェック頂点シェーダ
        isFindVer = ( CheckVertexShader(vertexShader, mVertexShaderArray[i] ) == true ) ? false : true;
        if ( isFindVer == true )
        {
            return i;
        }
    }

    mVertexShaderArray[mVertexCnt] = vertexShader;
    mVertexCnt++;
    return mVertexCnt - 1;
}

//---------------------------------------------------------------------------
// ピクセルシェーダの重複を確認します。重複していない場合は追加してindexを返します。
//---------------------------------------------------------------------------
u32 CollectShaderBinary::CheckDupPixelShader( GX2PixelShader*   pixelShader )
{
    bool isFindFrag  = false;

    for ( u32 i = 0; i < mPixelCnt; i++ )
    {
        // チェックフラグメントシェーダ
        isFindFrag = ( CheckPixelShader(pixelShader, mPixelShaderArray[i] ) == true ) ? false : true;
        if ( isFindFrag == true ) return i;
    }

    mPixelShaderArray[mPixelCnt] = pixelShader;
    mPixelCnt++;
    return mPixelCnt - 1;
}

//---------------------------------------------------------------------------
// gshファイルを生成します。
//---------------------------------------------------------------------------
bool CollectShaderBinary::GenerateGsh()
{
    #if CAFE_OS_SDK_VERSION >= 21104
    GFDShaders2*  gfdShaders = new GFDShaders2[mVertexCnt + mPixelCnt];
    memset( gfdShaders, 0, sizeof(GFDShaders2) * (mVertexCnt + mPixelCnt) );
    #else
    GFDShaders*  gfdShaders = new GFDShaders[mVertexCnt + mPixelCnt];
    memset( gfdShaders, 0, sizeof(GFDShaders) * (mVertexCnt + mPixelCnt) );
    #endif

    u32 numShader = 0;

    for ( u32 i = 0; i <  mVertexCnt; i++, numShader++ )
    {
        gfdShaders[numShader].pVertexShader = mVertexShaderArray[i];

        #if CAFE_OS_SDK_VERSION >= 21104
        gfdShaders[numShader].abiVersion = GFD_DLL_ABI_VERSION;
        #endif
    }

    for ( u32 i = 0; i <  mPixelCnt; i++, numShader++ )
    {
        gfdShaders[numShader].pPixelShader = mPixelShaderArray[i];

        #if CAFE_OS_SDK_VERSION >= 21104
        gfdShaders[numShader].abiVersion = GFD_DLL_ABI_VERSION;
        #endif
    }

    ShaderSourceInfo   sourceInfo;
    memset(&sourceInfo,0,sizeof(sourceInfo));
    sourceInfo.gpu                = DEFAULT_GPU;
    sourceInfo.endianbugfix       = DEFAULT_ENDIANBUGFIX;
    sourceInfo.outfilealign       = true;
//    sourceInfo.outfilealign       = DEFAULT_ALIGNMODE;

#if CAFE_OS_SDK_VERSION >= 21104
    if (!GFDWriteMemShader2(&mGshBinaryPointer,
                            (sourceInfo.gpu == GPU_VERSION_1) ? GFD_GPU_VERSION_1 : GFD_GPU_VERSION_GPU7,
                            (sourceInfo.endianbugfix) ? GFD_ENDIAN_SWAP_MODE_8_IN_32 : GFD_ENDIAN_SWAP_MODE_DEFAULT,
                            (sourceInfo.outfilealign) ? GFD_ALIGN_MODE_ENABLE : GFD_ALIGN_MODE_DISABLE,
                            numShader,
                            gfdShaders,
                            &mGshBinarySize))
#else
    if (!GFDWriteMemShader(&mGshBinaryPointer,
                            (sourceInfo.gpu == GPU_VERSION_1) ? GFD_GPU_VERSION_1 : GFD_GPU_VERSION_GPU7,
                            (sourceInfo.endianbugfix) ? GFD_ENDIAN_SWAP_MODE_8_IN_32 : GFD_ENDIAN_SWAP_MODE_DEFAULT,
                            (sourceInfo.outfilealign) ? GFD_ALIGN_MODE_ENABLE : GFD_ALIGN_MODE_DISABLE,
                            numShader,
                            gfdShaders,
                            &mGshBinarySize))
#endif
    {
        delete gfdShaders;
        return false;
    }

#if 0 // for test
    u32 vshNum = GDFGetVertexShaderNum32( mGshBinaryPointer );
    u32 fshNum = GDFGetPixelShaderNum32( mGshBinaryPointer );

    GFDShaders2*  gfdShaders_temp = new GFDShaders2[vshNum + fshNum];
    memset( gfdShaders_temp, 0, sizeof(GFDShaders2) * (vshNum + fshNum) );

    FILE* fp = fopen("test.log", "w" );
    fprintf(fp, "vsh %d fsh %d orgNumShader %d\n", vshNum, fshNum, numShader);
    numShader = 0;
#if 1
    for ( u32 i = 0; i < vshNum; i++, numShader++ )
    {
        GX2VertexShader* vsh = GDFGetVertexShader32To64( i, mGshBinaryPointer );

        bool isFindVer = ( CheckVertexShader(vsh, mVertexShaderArray[i] ) == true ) ? false : true;
        if ( isFindVer == false )
            fprintf( fp, "the both of vertex shaders are different\n");
        else
        {
            fprintf( fp, "the both of vertex shaders are the same\n");
            gfdShaders_temp[numShader].pVertexShader = vsh;
            gfdShaders_temp[numShader].abiVersion = GFD_DLL_ABI_VERSION;
        }
    }
#endif
#if 1
    for ( u32 i = 0; i < fshNum; i++, numShader++ )
    {
        GX2PixelShader* fsh = GDFGetPixelShader32To64( i, mGshBinaryPointer );

        bool isFindFrag = ( CheckPixelShader(fsh, mPixelShaderArray[i] ) == true ) ? false : true;
        if ( isFindFrag == false )
            fprintf(fp,"the both of pixel shaders are different\n");
        else
       {
            fprintf(fp,"the both of pixel shaders are the same\n");
            gfdShaders_temp[numShader].pPixelShader = fsh;
            gfdShaders_temp[numShader].abiVersion = GFD_DLL_ABI_VERSION;
        }
    }
#endif
    char* shaderPtr = NULL;
    u32 shaderSize;
    if (!GFDWriteMemShader2(&shaderPtr,
                            (sourceInfo.gpu == GPU_VERSION_1) ? GFD_GPU_VERSION_1 : GFD_GPU_VERSION_GPU7,
                            (sourceInfo.endianbugfix) ? GFD_ENDIAN_SWAP_MODE_8_IN_32 : GFD_ENDIAN_SWAP_MODE_DEFAULT,
                            (sourceInfo.outfilealign) ? GFD_ALIGN_MODE_ENABLE : GFD_ALIGN_MODE_DISABLE,
                            numShader,
                            gfdShaders_temp,
                            &shaderSize))
    {
        fprintf( fp, "GFDWriteMemShader2 failed\n" );
    }

    fprintf( fp, "new shaderSize %d new numShader %d\n", shaderSize, numShader );
    fclose(fp);

    fp = fopen("convert.gsh", "w" );
    fwrite( mGshBinaryPointer, mGshBinarySize, 1, fp );
    fclose(fp);
    fp = fopen("reConvert.gsh", "w" );
    fwrite( shaderPtr, shaderSize, 1, fp );
    fclose(fp);

    for ( u32 i = 0; i < numShader; i++ )
    {
        if ( gfdShaders_temp[i].pVertexShader != NULL ) free( gfdShaders_temp[i].pVertexShader );
        if ( gfdShaders_temp[i].pPixelShader != NULL ) free( gfdShaders_temp[i].pPixelShader );
    }

#endif
    delete gfdShaders;
    return true;
}

u32  CollectShaderBinary::PushVertexShader( GX2VertexShader* vsh )
{
    return CheckDupVertexShader( vsh );
}

u32  CollectShaderBinary::PushPixelShader( GX2PixelShader* fsh )
{
    return CheckDupPixelShader( fsh );
}

#ifdef __cplusplus
}
#endif // __cplusplus
