﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

// g3d_GX2Misc.h
// gx2Misc.hと同じ内容のヘッダーです。Android向けビルドのために一部変更されています。
//
// Declares misc function prototypes for gx2 library.

#ifndef _CAFE_GX2_MISC_H_
#define _CAFE_GX2_MISC_H_

#if !(defined _WIN32 || defined _WIN64 || defined ANDROID || defined __APPLE__)
#include <cafe/mem.h>
#endif

#if defined(ANDROID) || defined(__APPLE__)
#include <assert.h>
#define ASSERT assert // android
#endif

#ifdef __cplusplus
extern "C"
{
#endif // __cplusplus

// -----------------
// Types that are used in multiple areas

/// Used to define a component (swap/routing) selection set
///
/// Use the macros in GX2UTComponentGroup to create or decode a selection.
/// There are also predefined variables to use for common selections.
///
typedef u32 GX2CompSel;

/// Macro to get X_R component from a component selection set
///
#define GX2_GET_COMPONENT_X_R(v)  (((v)>>24)&0xff)

/// Macro to get Y_G component from a component selection set
///
#define GX2_GET_COMPONENT_Y_G(v)  (((v)>>16)&0xff)

/// Macro to get Z_B component from a component selection set
///
#define GX2_GET_COMPONENT_Z_B(v)  (((v)>> 8)&0xff)

/// Macro to get W_A component from a component selection set
///
#define GX2_GET_COMPONENT_W_A(v)  (((v)>> 0)&0xff)

/// Macro to assemble a component selection set from individual components
///
#define GX2_GET_COMP_SEL(x,y,z,w) ((((x)&0xff)<<24)|(((y)&0xff)<<16)|(((z)&0xff)<<8)|((w)&0xff))

/// Predefined component select (0,0,0,1) or (0,0,0,1)
#define GX2_COMP_SEL_NONE GX2_GET_COMP_SEL(GX2_COMPONENT_C_0,GX2_COMPONENT_C_0,GX2_COMPONENT_C_0,GX2_COMPONENT_C_1)

/// Predefined component select (X,0,0,1) or (R,0,0,1)
#define GX2_COMP_SEL_X001 GX2_GET_COMP_SEL(GX2_COMPONENT_X_R,GX2_COMPONENT_C_0,GX2_COMPONENT_C_0,GX2_COMPONENT_C_1)

/// Predefined component select (X,Y,0,1) or (R,G,0,1)
#define GX2_COMP_SEL_XY01 GX2_GET_COMP_SEL(GX2_COMPONENT_X_R,GX2_COMPONENT_Y_G,GX2_COMPONENT_C_0,GX2_COMPONENT_C_1)

/// Predefined component select (X,Y,Z,1) or (R,G,B,1)
#define GX2_COMP_SEL_XYZ1 GX2_GET_COMP_SEL(GX2_COMPONENT_X_R,GX2_COMPONENT_Y_G,GX2_COMPONENT_Z_B,GX2_COMPONENT_C_1)

/// Predefined component select (X,Y,Z,W) or (R,G,B,A)
#define GX2_COMP_SEL_XYZW GX2_GET_COMP_SEL(GX2_COMPONENT_X_R,GX2_COMPONENT_Y_G,GX2_COMPONENT_Z_B,GX2_COMPONENT_W_A)

/// Predefined component select (X,X,X,X) or (R,R,R,R)
#define GX2_COMP_SEL_XXXX GX2_GET_COMP_SEL(GX2_COMPONENT_X_R,GX2_COMPONENT_X_R,GX2_COMPONENT_X_R,GX2_COMPONENT_X_R)

/// Predefined component select (Y,Y,Y,Y) or (G,G,G,G)
#define GX2_COMP_SEL_YYYY GX2_GET_COMP_SEL(GX2_COMPONENT_Y_G,GX2_COMPONENT_Y_G,GX2_COMPONENT_Y_G,GX2_COMPONENT_Y_G)

/// Predefined component select (Z,Z,Z,Z) or (B,B,B,B)
#define GX2_COMP_SEL_ZZZZ GX2_GET_COMP_SEL(GX2_COMPONENT_Z_B,GX2_COMPONENT_Z_B,GX2_COMPONENT_Z_B,GX2_COMPONENT_Z_B)

/// Predefined component select (W,W,W,W) or (A,A,A,A)
#define GX2_COMP_SEL_WWWW GX2_GET_COMP_SEL(GX2_COMPONENT_W_A,GX2_COMPONENT_W_A,GX2_COMPONENT_W_A,GX2_COMPONENT_W_A)

/// Predefined component select (W,Z,Y,X) or (A,B,G,R)
#define GX2_COMP_SEL_WZYX GX2_GET_COMP_SEL(GX2_COMPONENT_W_A,GX2_COMPONENT_Z_B,GX2_COMPONENT_Y_G,GX2_COMPONENT_X_R)

/// Predefined component select (W,X,Y,Z) or (A,R,G,B)
#define GX2_COMP_SEL_WXYZ GX2_GET_COMP_SEL(GX2_COMPONENT_W_A,GX2_COMPONENT_X_R,GX2_COMPONENT_Y_G,GX2_COMPONENT_Z_B);

// -----------------
// Utility #defines

/// Swap byte order in a 32-bit int.
///
#define GX2_SWAP_8_IN_32(x) \
    ( (((x) >> 24) & 0xff) | (((x) >> 8) & 0xff00) | (((x) << 8) & 0xff0000) | (((x) << 24) & 0xff000000) )

/// Swap byte order in a 16-bit int.
///
/// Note: x must already be cast to a 16-bit int-type.
///
#define GX2_SWAP_8_IN_16(x) \
    ( ((x) >> 8) | ((x) << 8) )

/// Swap 16-bit word order in a 32-bit int.
///
/// Note: x must already be cast to a 32-bit int-type.
///
#define GX2_SWAP_16_IN_32(x) \
    ( ((x) >> 16) | ((x) << 16) )

/// Swap byte order in a 64-bit int.
///
/// 0xffull is a 64-bit constant = 0xff
#define GX2_SWAP_8_IN_64(x) \
    ((((x)>>56)&(0xffull<< 0))|(((x)>>40)&(0xffull<< 8))|(((x)>>24)&(0xffull<<16))|(((x)>> 8)&(0xffull<<24))| \
     (((x)<< 8)&(0xffull<<32))|(((x)<<24)&(0xffull<<40))|(((x)<<40)&(0xffull<<48))|(((x)<<56)&(0xffull<<56)))

/// Utility function to copy and endian-swap a given range of bytes.
/// It performs a 8-in-32 swap while copying u32 units from source to dest.
/// For best results, you should also call DCZeroRange() on the dest first
/// (this can only be done when the range is 32-byte aligned in location and
/// size, though).
///
/// dst     Start of the given dest buffer range to copy and swap
/// src     Start of the given src buffer range
/// byteLen Length of the given range specified in bytes. Must be a multiple of 4.
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE void GX2CopyEndianSwap(void *dst, const void *src, u32 byteLen)
{
    ASSERT((byteLen % sizeof(u32))==0);
#if !(defined _WIN32 || defined _WIN64 || defined ANDROID || defined __APPLE__)
    u32 i;
    u32 *csrc = (u32 *) src;
    __bytereversed u32 *cdst = (__bytereversed u32 *) dst;
    for(i=0; i<(byteLen>>2); i++) cdst[i] = csrc[i];
#else
    {
        u32* csrc = (u32*)src;
        u32* cdst = (u32*)dst;
        u32 i;
        for (i=0; i<byteLen>>2; i++)
        {
            cdst[i] = GX2_SWAP_8_IN_32(csrc[i]);
        }
    }
#endif
}

/// Utility function to endian-swap a given range of bytes. 8-in-32 swap is done in-place.
///
/// Length is given in bytes to mirror other APIs that operate on ranges.
/// ptr     Start of the given range to swap
/// byteLen Length of the given range specified in bytes. Must be a multiple of 4.
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE void GX2EndianSwap(void *ptr, u32 byteLen)
{
    ASSERT((byteLen % sizeof(u32))==0);
#if !(defined _WIN32 || defined _WIN64 || defined ANDROID || defined __APPLE__)
    u32 i;
    u32 *src = (u32 *) ptr;
    __bytereversed u32 *dst = (__bytereversed u32 *) ptr;
    for(i=0; i<(byteLen>>2); i++) dst[i] = src[i];
#else
    {
        u32* src = (u32*)ptr;
        u32 i;
        for (i=0; i<byteLen>>2; i++)
        {
            src[i] = GX2_SWAP_8_IN_32(src[i]);
        }
    }
#endif
}

/// Utility function to round a value up to a multiple of a power of 2 size
///
/// This function only works if size is a power of 2
///
/// value    the input value to be rounded up
/// size     a power of 2 multiple to round value up to
/// return         value rounded up to a multiple of size
///
/// donotcall threadsafe devonly enddonotcall
///
GX2_INLINE u32 GX2RoundUp(u32 value, u32 size)
{
    // assert if the size is not a power of 2
    ASSERT((size & (size - 1)) == 0);
    return ((value + (size - 1)) & ~(size - 1));
}

/// Type used for default GX2 memory allocator
typedef void* (*GX2DefaultAllocateFunc)(u32 userArg, u32 byteCount, u32 alignment);

/// Type used for default GX2 memory free function
typedef void (*GX2DefaultFreeFunc)(u32 userArg, void* pMem);

/// Set default functions to use for memory allocation/freeing.
///
/// These will be used by GX2R regular & debug features as well as by the debug PM4 capture
/// unless other allocators are specifically set using other APIs.  These functions can also
/// be easily used by the perf APIs by passing in GX2MEMDefaultAllocator.
///
/// If not set by the user, these will just call MEMAllocFromDefaultHeap/MEMFreeToDefaultHeap.
///
/// pfnAlloc pointer to allocator function
/// pfnFree  pointer to free function
///
/// donotcall fgonly notthreadsafe notinterrupt notexception devonly enddonotcall
///
void GX2API GX2SetDefaultAllocator(GX2DefaultAllocateFunc pfnAlloc, GX2DefaultFreeFunc pfnFree);

/// Get default functions to use for memory allocation/freeing.
/// ppfnAlloc pointer to get pointer to allocator function
/// ppfnFree  pointer to get pointer to free function
///
/// donotcall threadsafe devonly enddonotcall
///
void GX2API GX2GetDefaultAllocator(GX2DefaultAllocateFunc *ppfnAlloc, GX2DefaultFreeFunc *ppfnFree);

#if !(defined _WIN32 || defined _WIN64 || defined ANDROID || defined __APPLE__)
/// A pre-defined allocator that will call the default GX2 allocator/free functions
/// that are set up using GX2SetDefaultAllocator.
extern MEMAllocator GX2MEMDefaultAllocator;
#endif

#ifdef _DEBUG
/// Tell GX2 about graphics memory allocation.
///
/// While not required, calling this function when allocating GPU-accessed
/// graphics data may help the GPU debugger PM4 frame capture feature
/// work properly or more efficiently.
///
/// donotcall fgonly notthreadsafe notinterrupt notexception devonly enddonotcall
///
void GX2API GX2NotifyMemAlloc(void* addr, u32 size, u32 align);

/// Tell GX2 about graphics memory free.
///
/// While not required, calling this function when freeing GPU-accessed
/// graphics data may help the GPU debugger PM4 frame capture feature
/// work properly or more efficiently.
///
/// donotcall fgonly notthreadsafe notinterrupt notexception devonly enddonotcall
///
void GX2API GX2NotifyMemFree(void* addr);
#else
GX2_INLINE void GX2NotifyMemAlloc(const void* addr, u32 size, u32 align){
    addr=addr;
    size=size;
    align=align;
}
GX2_INLINE void GX2NotifyMemFree(const void* addr){
    addr=addr;
}
#endif

/// Invalidate GPU input caches or flush GPU output caches/pipelines.
///
/// This function invalidates GPU input caches or flushes GPU output caches.
/// For CPU ranges, it calls DCFlushRange() (ie, it flushes the data cache).
///
/// Every memory range to be read by the GPU must be flushed or
/// invalidated from the CPU cache before use.  For display lists,
/// you should call GX2Invalidate() on the memory *before* writing
/// to it with GX2 commands.  For any vertex, texture, shader, or
/// uniform buffer data written by the CPU, you should call
/// GX2Invalidate() *after* writing it.
///
/// For rendering to textures, this function must be called twice:
/// - Once to flush the color buffer (or depth buffer) write pipe
/// - Once to invalidate the texture read cache
///
/// For GPU input cache flushes, you may specify a range of 0x0 - 0xffffffff.
/// This can make resource management easier for the CPU, though it may
/// cause some unnecessary invalidation.
/// For GPU output cache flushes, you may specify a range of 0x0 - 0xffffffff.
///  This can make resource management easier for the CPU, though it may cause some
///  unnecessary invalidation.  In addition, the GPU has optimizations to avoid
///  unnecessary stalling when it is given ranges that match the ones it knows about.
///
/// invType  Flags indicating which invalidations/flushes to perform
/// ptr      start of cached data area
/// size     size of cached data area in bytes
///
/// donotcall gx2_typical enddonotcall
///
/// writesgpu
/// writesgpu{if the invType contains any GPU invalidation bit}
///
void GX2API GX2Invalidate( GX2InvalidateType invType, void *ptr, u32 size );

// -----------------
// Debugging Helpers

/// Enable or disable debug drawing modes
///
/// - "Flush per draw" makes sure that each draw command is submitted to the
/// hardware as soon as possible.
/// - "Done per flush" waits for each submit (of any type) to complete before
/// continuing.
///
/// This function only operates in debug builds of GX2.  In non-debug
/// builds, the function still exists, but does not do anything.
///
/// donotcall fgonly notthreadsafe devonly enddonotcall
///
void GX2API GX2SetDebugMode(GX2DebugMode dm);

/// Determines the maximum amount of time to wait for the GPU to
/// perform certain actions before regarding it as hung.
///
/// This value affects functions such as GX2WaitTimeStamp and
/// GX2WaitForVsync. When GX2 thinks the GPU might be hung,
/// it can report the state of some GPU registers and determine which
/// GPU blocks are busy.
///
/// This API can change the timeout value for debugging purposes.  Note
/// that you should avoid setting the timeout to less than 17 msec, or
/// some functions will most likely timeout on you.  The default timeout
/// is given by GX2_DEFAULT_GPU_TIMEOUT_IN_MILLISEC.
///
/// millisec amount of time before GX2 considers GPU to be hung
///
/// donotcall fgonly notthreadsafe devonly enddonotcall
///
void GX2API GX2SetGPUTimeout(u32 millisec);

/// Returns the value set by GX2SetGPUTimeout
///
/// donotcall fgonly threadsafe devonly enddonotcall
///
u32 GX2API GX2GetGPUTimeout(void);

/// Determines the maximum number of interrupts allowed during
/// a single frame time (1/60 of a second) before a warning is issued.
///
/// In debug builds only, a warning is printed out if this limit is
/// exceeded.  This is to help alert the user of unexpected processing
/// that can interfere with normal operations.  The default limit is
/// given by GX2_DEFAULT_INTERRUPT_COUNT_LIMIT.
///
/// limit The interrupt count limit during each 1/60 sec frame.
///
/// donotcall fgonly notthreadsafe devonly enddonotcall
///
void GX2API GX2SetInterruptCountLimit(u32 limit);

/// Helper function to see current GPU status.
///
/// warning This should not be used in production code.
///
/// donotcall gx2_typical enddonotcall
///
void GX2API GX2PrintGPUStatus(void);

/// Function used to set GX2Log-related settings.
///
/// donotcall threadsafe devonly enddonotcall
///
void GX2API GX2LogSetMisc(GX2LogAttrib attrib, u32 value);


/// Function to write a user string to the PM4 capture
/// userTagType      A semantic tag written with the string for tool use
/// formatString     printf-style arguments, written to the captured command buffer
///
/// donotcall gx2_typical enddonotcall
///
/// writesgpu
/// alwayswritesgpu
///
void GX2API GX2DebugTagUserString(GX2DebugTagUserStringType userTagType, const char* formatString, ...);

/// Use this version of GX2DebugTagUserString if you want to forward a varargs wrapper
/// userTagType      A semantic tag written with the string for tool use
/// formatString     printf-style arguments, written to the captured command buffer
/// args             Extract from varargs "..." using C standard library va_start()/va_end()
///
/// donotcall gx2_typical enddonotcall
///
/// writesgpu
/// alwayswritesgpu
///
void GX2API GX2DebugTagUserStringVA(GX2DebugTagUserStringType userTagType, const char* formatString, va_list args);

/// Initialize the capture system.
///
/// Initialize the capture system. The remote Capture button in the Spark GPU debugger
/// will be enabled after this call.
///
/// You must link with gx2spark.a to use this function.
///
/// initAttribs  Reserved for future use.
///
/// donotcall fgonly notthreadsafe notinterrupt notexception notcallback devonly enddonotcall
///
void GX2DebugCaptureInit(u32 *initAttribs);

/// Captures the PM4 command sequence and associated buffers to a capture file
///        for analysis with pm4parse tool or Spark GPU debugger.
///
/// The capture will start at the next GX2SwapScanBuffers and will end at
/// the subsequent occurrence of that function.
///
/// This is the officially supported API for debug captures.
///
/// filename Name of the .4mp output file in data/save/common
///
/// donotcall fgonly notthreadsafe notinterrupt notexception devonly enddonotcall
///
void GX2API GX2DebugCaptureFrame(const char* filename);

/// Similar to GX2DebugCaptureFrame, but handles multiple frames.
///
/// This function is deprecated.  Please use GX2DebugCaptureFrame instead.
///
/// filename Name of the .4mp output file in data/save
/// numFrames Number of frames to capture
///
/// donotcall fgonly notthreadsafe notinterrupt notexception devonly enddonotcall
///
void GX2API GX2DebugCaptureFrames(const char* filename, u32 numFrames);

/// Start capturing the PM4 command sequence and associated buffers to a capture file.
///
/// This API is for advanced usage.  Capturing more or less than a full single frame
///       may present debugger support issues not present otherwise (we may not support such
///       usage).  It should be fine for viewing with pm4parse, however.
///
/// filename     Name of the .4mp output file in data/save/common
/// optionFlags  See GX2DebugCaptureOptions
///
/// donotcall nomulticore gx2_dl enddonotcall
///
/// writesgpu
/// writesgpu{if GX2_DEBUG_CAPTURE_NO_FLUSH is not set.}
///
void GX2API GX2DebugCaptureStart(const char* filename, GX2DebugCaptureOptions optionFlags);

/// Stops a PM4 capture.
///
/// This API is for advanced usage.  Capturing more or less than a full single frame
///       may present debugger support issues not present otherwise (we may not support such
///       usage).  It should be fine for viewing with pm4parse, however.
///
/// optionFlags  See GX2DebugCaptureOptions
///
/// donotcall nomulticore gx2_dl enddonotcall
///
/// writesgpu
/// writesgpu{if GX2_DEBUG_CAPTURE_NO_FLUSH is not set.}
///
void GX2API GX2DebugCaptureEnd(GX2DebugCaptureOptions optionFlags);

/// Compile-time assert
#define GX2_STATIC_ASSERT(e, message)   typedef int static_assert_failed_##message[!(e) ? -1 : 1]

/// Set the bits in *pFlags
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE void GX2SetBitFlags(u32* pFlags, u32 bits)                 { *pFlags |= bits; }

/// Clear the bits in *pFlags
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE void GX2ClearBitFlags(u32* pFlags, u32 bits)               { *pFlags &= ~bits; }

/// Mask the bits in *pFlags
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE void GX2MaskBitFlags(u32* pFlags, u32 mask)                { *pFlags &= mask; }

/// Replace the bits in mask with bits
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE void GX2ReplaceBitFlags(u32* pFlags, u32 mask, u32 bits)   { *pFlags &= ~mask; *pFlags |= (bits & mask); }

/// Returns GX2_TRUE if any of the bits are set
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE GX2Boolean GX2TestBitFlagsAny(u32 flags, u32 bits)         { return (GX2Boolean)((flags & bits) != 0); }

/// Returns GX2_TRUE if all of the bits are set
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE GX2Boolean GX2TestBitFlagsAll(u32 flags, u32 bits)         { return (GX2Boolean)((flags & bits) == bits); }

/// Returns the min of a and b
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE u32  GX2Min(u32 a, u32 b)                                  { return (a < b) ? a : b; }

/// Returns the max of a and b
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE u32  GX2Max(u32 a, u32 b)                                  { return (a > b) ? a : b; }

/// Utility function, returns GX2_TRUE if the pointer meets the given alignment
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE GX2Boolean GX2IsAligned(const void *p, u32 alignment)
{
    return (GX2Boolean)((u32)p == GX2RoundUp((u32)p, alignment));
}

#if defined(__clang__) // android
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wswitch"
#endif

/// Returns true if the specified format supports the fast fixed-function box MSAA resolve.
///
/// donotcall threadsafe enddonotcall
///
GX2_INLINE GX2Boolean GX2IsResolveSupported(GX2SurfaceFormat format)
{
    switch(format)
    {
        //Only the following formats support fixed function (box-filter) MSAA resolve
        case GX2_SURFACE_FORMAT_TC_R32_G32_B32_A32_FLOAT:

        case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_FLOAT:
        case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_UNORM:
        case GX2_SURFACE_FORMAT_TC_R16_G16_B16_A16_SNORM:
        case GX2_SURFACE_FORMAT_TC_R32_G32_FLOAT:

        case GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_UNORM:
        case GX2_SURFACE_FORMAT_TCS_R8_G8_B8_A8_SRGB:
        case GX2_SURFACE_FORMAT_TC_R8_G8_B8_A8_SNORM:
        case GX2_SURFACE_FORMAT_TCS_R10_G10_B10_A2_UNORM:
        case GX2_SURFACE_FORMAT_TCS_A2_B10_G10_R10_UNORM:
        case GX2_SURFACE_FORMAT_TC_R11_G11_B10_FLOAT:
        case GX2_SURFACE_FORMAT_TC_R16_G16_FLOAT:
        case GX2_SURFACE_FORMAT_TC_R16_G16_UNORM:
        case GX2_SURFACE_FORMAT_TC_R16_G16_SNORM:
        case GX2_SURFACE_FORMAT_TCD_R32_FLOAT:

        case GX2_SURFACE_FORMAT_TCS_R5_G6_B5_UNORM:
        case GX2_SURFACE_FORMAT_TC_R5_G5_B5_A1_UNORM:
        case GX2_SURFACE_FORMAT_TC_A1_B5_G5_R5_UNORM:
        case GX2_SURFACE_FORMAT_TC_R4_G4_B4_A4_UNORM:
        case GX2_SURFACE_FORMAT_TC_R8_G8_UNORM:
        case GX2_SURFACE_FORMAT_TC_R8_G8_SNORM:
        case GX2_SURFACE_FORMAT_TC_R16_FLOAT:
        case GX2_SURFACE_FORMAT_TCD_R16_UNORM:
        case GX2_SURFACE_FORMAT_TC_R16_SNORM:

        case GX2_SURFACE_FORMAT_TC_R8_UNORM:
        case GX2_SURFACE_FORMAT_TC_R8_SNORM:
        case GX2_SURFACE_FORMAT_T_R4_G4_UNORM:
            return GX2_TRUE;
    }

    return GX2_FALSE;
}

#if defined(__clang__) // android
#pragma clang diagnostic pop
#endif

/// Context state data size in bytes.
///
#define GX2_CONTEXT_STATE_DATA_SIZE 41216

/// Context state
///
/// Context state
typedef struct _GX2ContextState {
    u32 data[GX2_CONTEXT_STATE_DATA_SIZE/sizeof(u32)];
} GX2ContextState;

/// Call other GX2 commands to set up the default render state. See GX2SetDefaultStateSect.
///
/// donotcall gx2_typical enddonotcall
///
/// clobberstate
///
/// writesgpu
/// alwayswritesgpu
///
void GX2API GX2SetDefaultState(void);

/// Initialize the given context state memory with the default state and set it in use.
///
/// When profiling is disabled GX2 will not override any state based on the profile mode or
/// toss stage that was setup during initialization.
///
/// This function will normally create an internal display list as an optimization to speed
/// up calls to GX2SetupContextState.  Since it must call GX2BeginDisplayList in order to
/// create the internal display list, this means you cannot call this function while
/// making a display list.
///
/// pState          pointer to a GX2ContextState buffer.
/// enableProfiling enable HW profiling for this context state.
///
/// donotcall gx2_typical enddonotcall
///
/// enablesstateshadow
/// clobberstate
///
/// writesgpu
/// alwayswritesgpu
///
void GX2API GX2SetupContextStateEx(GX2ContextState* pState, GX2Boolean enableProfiling);

/// Initialize the given context state memory with the default state and set it in use.
///
/// HW Profiling will be enabled for the context state
///
/// pState  pointer to a GX2ContextState buffer.
///
/// donotcall gx2_typical enddonotcall
///
/// enablesstateshadow if pState is non-NULL.
/// disablesstateshadow if pState is NULL.
/// clobberstate
///
/// writesgpu
/// alwayswritesgpu
///
GX2_INLINE void GX2SetupContextState(GX2ContextState* pState)
{
    GX2SetupContextStateEx(pState, GX2_ENABLE);
}

/// Loads the given context state and makes it current for future changes.
///
/// If pState is NULL the previous context state is preserved
/// and future state changes will not be saved to memory.
/// (Ie, this disables state shadowing.)
///
/// Normally this function will just call or copy an internal display list that
/// contains all the necessary load commands.  A call is used unless we are in
/// the process of creating a display list, in which case a copy is done instead.
///
/// This function invokes a full pipeline flush; it takes approx. 23 usecs.
///
/// pState  pointer to an already-initialized GX2ContextState buffer, or NULL.
///
/// donotcall gx2_typical enddonotcall
///
/// clobberstate
/// enablesstateshadow State shadowing is enabled if pState is non-NULL.
/// disablesstateshadow State shadowing is disabled if pState is NULL.
/// notincompute
///
/// writesgpu
/// alwayswritesgpu
///
void GX2API GX2SetContextState(const GX2ContextState* pState);

/// Returns the pointer and size for the internal display list found in a GX2ContextState.
///
/// To improve the performance of GX2SetContextState, when you call GX2SetupContextState,
/// we create an internal display list that performs all the necessary register loads.
/// Then, when you call GX2SetContextState, we either make a call to that display list (if
/// you are currently writing to a command buffer), or we copy that display list (if you
/// are currently writing to a display list).  The latter is done to avoid having too many
/// nested display list calls.  However, if you know you will not exceed the nesting level,
/// you can call the display list directly by getting its parameters with this API.
///
/// pState         pointer to an already-initialized GX2ContextState buffer.
/// ppDisplayList  pointer to receive the display list pointer.
/// pByteSize      pointer to receive the display list size.
///
/// donotcall threadsafe devonly enddonotcall
///
void GX2API GX2GetContextStateDisplayList(const GX2ContextState *pState, void **ppDisplayList, u32 *pByteSize);

#ifdef __cplusplus
}
#endif // __cplusplus

#endif // _CAFE_GX2_MISC_H_
