﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#if defined(__APPLE__)
#include <TargetConditionals.h>
#endif

#if defined(ANDROID)
#include <android/log.h>
#elif TARGET_OS_IPHONE
#include <cstdio>
#else
#include <Windows.h>
#endif


#include <string.h> // for memcpy

#include <winext/cafe/os.h>
#include <winext/cafe/ai.h>
#include "AXPrivate.h"

#include "AXUpsample.h"
// build options
#define DSP_DOES_MULTI_SRC
#define AX_IST_MODE

#ifndef APP_IO_MODE
#define APP_IO_MODE FALSE
#endif

namespace nw {
    namespace internal {
        namespace winext {

namespace {
bool s_IsEnableDevice[AX_MAX_NUM_DEVICES] = {true, false, false};

// DRC の処理を Android は無効化する
#if defined(ANDROID) || TARGET_OS_IPHONE
    const int s_ProcessDrcCount = 0;
#else
    const int s_ProcessDrcCount = AX_MAX_NUM_DRCS;
#endif
}

void AISetFormatChangeState(AI_CHANNEL channel);
/*---------------------------------------------------------------------------*
    local vars
 *---------------------------------------------------------------------------*/
// 32k -> ALIGNED_VAR(static s16, PPC_IO_BUFFER_ALIGN, __AXOutBuffer[3][AX_OUT_SAMPLES_PER_FRAME * 2]);

#define AX_OUT_48K_SAMPLES_PER_FRAME 144
#define BYTES_PER_SAMP 2
ALIGNED_VAR(static s16, PPC_IO_BUFFER_ALIGN, __AXOutBuffer[3][AX_OUT_48K_SAMPLES_PER_FRAME * 2]);
ALIGNED_VAR(static s16, PPC_IO_BUFFER_ALIGN, __AXOutDRCAI2Buffer[3][AX_OUT_48K_SAMPLES_PER_FRAME * 2]);
ALIGNED_VAR(static s16, PPC_IO_BUFFER_ALIGN, __AXOutAI2Buffer[3][AX_OUT_48K_SAMPLES_PER_FRAME * 2]);
ALIGNED_VAR(static s32, PPC_IO_BUFFER_ALIGN, __AXOutSBuffer[AX_IN_SAMPLES_PER_FRAME * 2]);

ALIGNED_VAR(static s16, PPC_IO_BUFFER_ALIGN, __AXOut6chBuffer[3][AX_OUT_48K_SAMPLES_PER_FRAME * 6]);
ALIGNED_VAR(static s32, PPC_IO_BUFFER_ALIGN, __AXOutTVBuffer[AX_MAX_NUM_TVS][3][AX_MAX_NUM_TV_CHS][AX_IN_SAMPLES_PER_FRAME]);
ALIGNED_VAR(static s32, PPC_IO_BUFFER_ALIGN, __AXOutDRCBuffer[AX_MAX_NUM_DRCS][3][AX_MAX_NUM_DRC_CHS][AX_IN_SAMPLES_PER_FRAME]);

// The following two for the final Mix callback at 48kHz
ALIGNED_VAR(static s32, PPC_IO_BUFFER_ALIGN, __AXTVFinalMixBuffer[AX_MAX_NUM_TVS][AX_MAX_NUM_TV_CHS][AX_OUT_48K_SAMPLES_PER_FRAME ]);
ALIGNED_VAR(static s32, PPC_IO_BUFFER_ALIGN, __AXDRCFinalMixBuffer[AX_MAX_NUM_DRCS][AX_MAX_NUM_DRC_CHS][AX_OUT_48K_SAMPLES_PER_FRAME]);

// The following two for the final Output at 48kHz
ALIGNED_VAR(static s16, PPC_IO_BUFFER_ALIGN, __AXTVFinalOutputBuffer[AX_MAX_NUM_TVS * AX_MAX_NUM_TV_CHS * AX_OUT_48K_SAMPLES_PER_FRAME ]);
ALIGNED_VAR(static s16, PPC_IO_BUFFER_ALIGN, __AXDRCFinalOutputBuffer[AX_MAX_NUM_DRCS * AX_MAX_NUM_DRC_CHS * AX_OUT_48K_SAMPLES_PER_FRAME]);

// define the history buffers for devices
static s32 __AXTVHistories[AX_MAX_NUM_TVS][AX_MAX_NUM_TV_CHS];
static s32 __AXDRCHistories[AX_MAX_NUM_DRCS][AX_MAX_NUM_DRC_CHS];

static AXUpsampleStruct __AXTVUpsampleStruct[AX_MAX_NUM_TVS][AX_MAX_NUM_TV_CHS];
static AXUpsampleStruct __AXDRCUpsampleStruct[AX_MAX_NUM_DRCS][AX_MAX_NUM_DRC_CHS];

static u32 __AXDevicePostFCBUpsample[AX_MAX_NUM_DEVICES];

static void __AXHandleFinalMixCallbackStage(void);
static BOOL __AXTVLinearUpsampler[AX_MAX_NUM_TVS];
static BOOL __AXDRCLinearUpsampler[AX_MAX_NUM_DRCS];

ALIGNED_VAR(u16, PPC_IO_BUFFER_ALIGN, __debugTrace[32]) = { 0 };
ALIGNED_VAR(u16, PPC_IO_BUFFER_ALIGN, __debugTrace_b[32]) = { 0 };

#pragma warning(push)
#pragma warning(disable:4305) // possible loss of data
// get the coefficents from the filter
ALIGNED_VAR(static f32, PPC_IO_BUFFER_ALIGN, __AXUpsampleFiltCoeffs[] = { )
#include "upsamplingFIRcoeffs_66.h"
};
#pragma warning(pop) // C4305

#ifdef __ghs__
#define volstat static volatile
#else
#define volstat volatile static
#endif
void __AXAI2_push_frame(void);

volstat u32             __AXOutFrame;
volstat u32             __AXOutAI2Frame;
volstat u32             __AXAiDmaFrame;
volstat u32             __AXOutDspReady;
volstat u32             __DRC2AI2_FIFO[4];
volstat u32             __DRC2AI2_in;
volstat u32             __DRC2AI2_out;

// Remix at the output stage variables
// (1->6) going to (2,6) for TV
// (1->4) going to (1,2) for DRC
// so max of 12 combinations
#define AX_MAX_REMIX_MATRICES 12
AXDeviceRemixStruct __AXDeviceRemixMatrix[AX_MAX_NUM_DEVICES][AX_MAX_REMIX_MATRICES];
u16					__AXDeviceFinalMixOutChannels[AX_MAX_NUM_DEVICES];
u16					__AXDeviceOutChannels[AX_MAX_NUM_DEVICES];

// ramp up and ramp down variables during init/shutdown etc
/* all the following numbers are calculated for 60ms, (20 frames) @48kHz*/
#define AX_RAMP_FRAMES 20
#pragma warning(push)
#pragma warning(disable:4305) // possible loss of data
static const f32 __AXRampDownRate = 0.996169712514941;   // -0.05dB/sample
static const f32 __AXRampUpRate = 1.00384501499789;  // 0.05dB/sample
static const f32 __AXRampDownVolumeStart = 1.0 ;  // 0dB
static const f32 __AXRampUpVolumeStart = 0.0000158489319246111 ; //-96dB
#pragma warning(pop) // C4305
static f32 __AXRampRate, __AXRampCurVolume;
static u32 __AXFramesToGo;
AX_RAMP_STATE_TYPE __AXRampState = AX_RAMP_DONE;

// new callbacks
static  AXUserCallback  __AXUserFrameCallback;

// final mix callbacks
static AXUserFinalMixCallback __AXDeviceFinalMixCallback[AX_MAX_NUM_DEVICES];
static AX_FINAL_MIX_CB_STRUCT __AXDeviceFinalMixCBStruct[AX_MAX_NUM_DEVICES];
static s32*                   __AXTVFinalMixPointers[AX_MAX_NUM_TVS*AX_MAX_NUM_TV_CHS];
static s32*                   __AXDRCFinalMixPointers[AX_MAX_NUM_DRCS*AX_MAX_NUM_DRC_CHS];

#ifdef AX_SUPPORT_RMT_FINAL_MIX
// double buffering remote pointers since remote data is handled differently;
//      remote data is put into a queue, and before we hit the final mix, we lose the previous
//      position in the queue, so keep double buffer for the usage
#define AX_PRIV_RMT_FINAL_MIX_SIZE 2
static s32*                   __AXRMTFinalMixPointers[AX_PRIV_RMT_FINAL_MIX_SIZE][AX_MAX_NUM_RMTS*AX_MAX_NUM_RMT_CHS];
static u16                    __AXRMTFinalMixReadPos = 0;
static u16                    __AXRMTFinalMixWritePos = 0;
#endif // AX_SUPPORT_RMT_FINAL_MIX

static  AIDCallback     __AX_OldAIDMACallback = NULL;
static  AIDCallback     __AX_OldAI2DMACallback = NULL;
        AXPROFILE       __AXLocalProfile;
        AXPROFILE      *__AXLastProfile = NULL;

volstat BOOL            __AXDSPInitFlag;
volstat BOOL            __AXDSPDoneFlag;
volstat BOOL            __AXServicingFrameFlag;

volstat u32             __AXDebugSteppingMode;

//static OSThreadQueue    __AXOutThreadQueue;

static u32              __AXOutputBufferMode;
       u32              __AXMixingMode    = AXMIX_DSP_ENABLE;
static AXMixerCallback  __AXMixerCallback = NULL;
static u16             *__last_cl;
static u32              __AXDefaultMixer  = AX_PB_RENDERER_SELECT_DSP_OR_PPC;

void (*__AINextCallback)(
    int channels,
    signed short* buffer,
    unsigned long samples,
    int sampleRate) = NULL;

// for IST Service Thread
#define AX_IST_THREAD_STACKSIZE        (4096*4)
#define AX_IO_THREAD_STACKSIZE         (4096*4)
#define AX_IST_MAX_CALLS                     16
#define AX_IO_MAX_CALLS                      16
#define AXIST_QUIT                            0
#define AXIST_RUN                             1
#define AXIST_POST                            2
#define AXIST_AI2_EMPTY                       3
#define AXIST_AI_EARLY                        4
#define AXIO_QUIT                             0
#define AXIO_RUN                              1

typedef struct _AXISTMessage
{
    void*  data;
    u32    type;
    OSTime time;
} AXISTMessage;

static u8             __AXIstThreadStack[AX_IST_THREAD_STACKSIZE];
#if defined(ANDROID) || TARGET_OS_IPHONE
static OSThread       __AXIstThread;
static OSThread       __AXIoThread;
#else
static HANDLE         __AXIstThread;
static HANDLE         __AXIoThread;
#endif

static int            __AXIstThreadFunc(int argc, void* argv);
//static OSMessage      __AXIstMessage[AX_IST_MAX_CALLS];
//static OSMessageQueue __AXIstMessageQueue;
// static int            __AXIoThreadFunc(int argc, void* argv);
static u8             __AXIoThreadStack[AX_IO_THREAD_STACKSIZE];
//static OSMessage      __AXIoMessage[AX_IO_MAX_CALLS];
//static OSMessageQueue __AXIoMessageQueue;
static BOOL           __ax_app_io_mode ;

static int __AXOutIsInForeground = 0;

typedef int (*t___OSIntVoidFunc)(void);
t___OSIntVoidFunc __fp__OSIsInterruptEnabled;

// for remote speakers
// ***TBD***!!! revisit cache/DDR line alignment AND PADDING!
//              also implicates DSP Remote Final DMA length

ALIGNED_VAR(static s16, PPC_IO_BUFFER_ALIGN, __AXRmtOutBuffer[AX_RMT_CHANNELS][AX_RMT_SAMPLES_PER_FRAME * AX_RMT_MAX_BLOCKS]);
static s32 __AXRmtBuffLen;
static s32 __AXRmtDspPtr;
static s32 __AXRmtCpuPtr;

// for debug
static AXExceedCallback    __AXExceedCallback;
static OSTime              __axDSPStart         = 0;
/*---------------------------------------------------------------------------*
    DSP Task information
 *---------------------------------------------------------------------------*/
#if 0
#include "axDspSlave.h"
#include "axDspSlaveBalanced.h"

#define AX_DSP_YIELD_DATA_LENGTH    0x0040          // 64 bytes (= 32 words)
#define AX_DSP_YIELD_DATA_ADDR      0x0cd2          // param area
#define INIT_RAM_DATA_LENGTH 0x1000
#define INIT_RAM_DATA_ADDR 0x0000

// Look at dspSlave32.map for these
static u16 axDspInitVector = 0x0010;
static u16 axDspResumeVector = 0x0037;

// // static u8 __AXDramImage[AX_DSP_YIELD_DATA_LENGTH] ATTRIBUTE_ALIGN(32); // for saving DSP DRAM context
//          static u8      __AXDramImage[INIT_RAM_DATA_LENGTH] ATTRIBUTE_ALIGN(32) = { 0 }; // for saving DSP DRAM context
ALIGNED_VAR(static u8, PPC_IO_BUFFER_ALIGN, __AXDramImage[INIT_RAM_DATA_LENGTH] = { 0 });
static DSPTaskInfo __AXDSPTask;

#endif


// List based frame callbacks
#define AX_MAX_APP_FRAME_CBS    64
AXUserCallback __AXAppFrameCallbacks[AX_MAX_APP_FRAME_CBS];
u32    __AXActiveAppFrameCbs;

#if defined(ANDROID) || TARGET_OS_IPHONE
OSMutex __AXAppFrameCBMutex;
#else
CRITICAL_SECTION __AXAppFrameCBMutex;
#endif


static void __AXSendTvFrame(u32 which_frame);
static u32 __AXSendDRCFrame(u32 which_frame);
static void __AXHandleCompressor(void);
static void __AXHandleDeviceModeReMix(void);
/*---------------------------------------------------------------------------*
 *---------------------------------------------------------------------------*/
BOOL __AXSetAppIoMode(BOOL mode)
{
    BOOL old_mode = __ax_app_io_mode;

    __ax_app_io_mode = mode;

    return old_mode;
}
/*---------------------------------------------------------------------------*
 *---------------------------------------------------------------------------*/
BOOL __AXGetAppIoMode(BOOL /* mode */)
{
    return __ax_app_io_mode;
}
#if 0
/*---------------------------------------------------------------------------*
 Perform the PB Sync and PPC Mixdown portions of __AXOutNewFrame()
 but in the IST Thread context.
 *---------------------------------------------------------------------------*/
static u32 __AXIstRun(OSMessageQueue *queue)
{
    u32         cl;
    AXRmtBuffs  rbuffer;
    s32         next;
    u32         lostDspCycles;
    AXPB       *root_dsp;    // root to run on the DSP
    AXPB       *root_ppc;    // root to run on the PPC

    memset(&__AXLocalProfile, 0, sizeof(AXPROFILE));
    __AXLocalProfile.axFrameStart = OSGetTime();

// Debug DSP Renderer
#ifdef _DEBUG
    DCInvalidateRange(__debugTrace, 32*sizeof(u16));
    memcpy(&__debugTrace_b[0], &__debugTrace[0], 32*sizeof(u16));
#endif

    // Sync PBs with the ones DSP processed to keep context from last frame

    // TBD this was capturing lateness based on DSP cycle times
    // we need to adjust to fractional percentages for the Cafe way
    lostDspCycles = (AX_OUT_SAMPLES_PER_FRAME - (AIGetDMABytesLeft() >> 2)) * AX_CYCLES_PER_SAMPLE;

    if (__AXOutputBufferMode == AX_OUTPUT_BUFFER_TRIPLE)
    {
        __AXSyncPBs(0, &root_dsp, &root_ppc);
    }
    else
    {
        // the first arg represents the cost of this run phase when only double-buffered
        // since this will be the only mode
        // this cost is now to be included in the AX base cost of doin' bizness
        // while we're at it, get back the active voice list roots
        __AXSyncPBs(AX_OUT_CONSUMED_OFFSET * AX_CYCLES_PER_USEC, &root_dsp, &root_ppc);
    }

    // print the studio parameter block (i.e. initial buss channel depop states)
    __AXPrintStudio();

#if (0)
    // tell the command list module it's time to make another frame for the next DMA cycle
    // OSReport("of = %u\n", __AXOutFrame);
    __AXNextFrame(&__AXOutTVBuffer[AX_TV_ID0][__AXOutFrame][AX_CH_LEFT][0],
                  &rbuffer, root_dsp, root_ppc,
                  &__AXOutDRCBuffer[AX_DRC_ID0][__AXOutFrame][AX_CH_LEFT][0],
                  &__AXOutDRCBuffer[AX_DRC_ID1][__AXOutFrame][AX_CH_LEFT][0]);
trace(AI_BUILD_CL, __AXOutFrame)
#endif
    // update the Command List built last frame with
    // this frame's actual voice list roots
    // and get its address to pass to the Renderers
    cl = __AXGetCommandListAddress(root_dsp, root_ppc);
    // stash it for the AXIstPost phase for the PPC Renderer
    __last_cl = (u16*)cl;

    if (__AXMixingMode&AXMIX_DSP_ENABLE)
    {
        u32 cl_pe = (u32)OSEffectiveToPhysical((u32*)cl);

// set in AI Interrupt!!!
//      __AXOutDspReady = AX_DSP_FRAME_NOT_COMPLETE;

#if PLATFORM==CAFE
        {
            // set up the Segment Offset register for the DSP to get the cl
            u16 mmem_seg_offset = (u16)((cl_pe >> 29)&0x7);
            OSWriteRegister16(mmem_seg_offset, OS_DSP, DSP_MEM_4GB_ADDR_OFFSET_IDX);
    //        __dsp_p_DSPRegs[DSP_MEM_4GB_ADDR_OFFSET_IDX] = mmem_seg_offset;
        }
#endif

trace(AI_ISSUE_CL0, 0)
        // give the DSP a new command list
        DSPSendMailToDSP(0xBABE0000 | AX_HOST_CL_BYTES);

trace(AI_ISSUE_CL1, 0)
        while (DSPCheckMailToDSP()) ;

trace(AI_ISSUE_CL2, 0)
        DSPSendMailToDSP(cl_pe);

trace(AI_ISSUE_CL3, 0)
        while (DSPCheckMailToDSP()) ;
//        __AXLocalProfile.axDSPStart = OSGetTime();
        __axDSPStart = OSGetTime();
trace(AI_ISSUE_CL, __AXOutFrame^1)
    }

    if ((__AXMixingMode&AXMIX_PPC_ENABLE) && __AXMixerCallback)
    {
        __AXLocalProfile.axPPCStart = OSGetTime();
        (*__AXMixerCallback)((u16*)cl, FALSE);
        __AXLocalProfile.axPPCEnd = OSGetTime();
        __AXLocalProfile.axPPCPostStart = 0LL;
        __AXLocalProfile.axPPCPostEnd = 0LL;
    }

    // in a stopped single step mixer debug state
    // but have to keep AI handler running
    // so ..
    if (!__AXMixingMode)
    {
        __AXOutDspReady = AX_DSP_FRAME_COMPLETE;
    }

    // first lets do callbacks for voices that might have been dropped due to
    // dsp cycles
    __AXServiceCallbackStack();

    // process the AUX buses
    __AXLocalProfile.userAuxProcStart = OSGetTime();
    __AXProcessAux();
    __AXLocalProfile.userAuxProcEnd = OSGetTime();

    // Perform the call back to let the user know another frame has gone by,
    // this should cause the user to give us some more audio events. These
    // will be in the form of allocating and setting voice PB parameters, and
    // the voices to be mixed will get put into an active PB stack my the
    // AXVPB code module.
    if (__ax_app_io_mode)
    {
        if (__AXUserFrameCallback )
        {
            AXISTMessage msg;

            msg.data = NULL;
            msg.type = AXIO_RUN;
            msg.time = OSGetTime();

            if (!OSSendMessage(&__AXIoMessageQueue, (OSMessage*)&msg, OS_MESSAGE_NOBLOCK))
            {
                //OSReport("%s: OSSendMessage AXIO_RUN Failed\n", __FUNCTION__);
            }

    //        __AXOutDspReady = AX_DSP_FRAME_COMPLETE;
        }
    }
    else
    {
        __AXLocalProfile.userCallbackStart = OSGetTime();
        if (__AXUserFrameCallback) {
            (*__AXUserFrameCallback)();
        }
        // also go through the new APP frame callbacks
        for(u32 i = 0; i < __AXActiveAppFrameCbs; i++)
        {
            (*__AXAppFrameCallbacks[i])();
        }

        __AXLocalProfile.userCallbackEnd   = OSGetTime();
    }

    rbuffer.rmt0 = &__AXRmtOutBuffer[0][__AXRmtDspPtr];
    rbuffer.rmt1 = &__AXRmtOutBuffer[1][__AXRmtDspPtr];
    rbuffer.rmt2 = &__AXRmtOutBuffer[2][__AXRmtDspPtr];
    rbuffer.rmt3 = &__AXRmtOutBuffer[3][__AXRmtDspPtr];

#ifdef AX_SUPPORT_RMT_FINAL_MIX
    u32 chCnt, dev_id;
    for(dev_id = 0; dev_id < AX_MAX_NUM_RMTS; dev_id++)
    {
        for(chCnt = 0; chCnt < AX_MAX_NUM_RMT_CHS ; chCnt++)
        {
            __AXRMTFinalMixPointers[__AXRMTFinalMixWritePos][dev_id*AX_MAX_NUM_RMT_CHS + chCnt] = &__AXRmtOutBuffer[dev_id][__AXRmtDspPtr];
        }
    }
    __AXRMTFinalMixWritePos = (__AXRMTFinalMixWritePos + 1) & (0x1);
#endif //     AX_SUPPORT_RMT_FINAL_MIX
    next = __AXRmtDspPtr + AX_RMT_SAMPLES_PER_FRAME;
    if (next >= __AXRmtBuffLen)
    {
        next = 0;
    }

    if ((__AXRmtCpuPtr >= __AXRmtDspPtr) && (__AXRmtCpuPtr < __AXRmtDspPtr + AX_RMT_SAMPLES_PER_FRAME))
    {
        __AXRmtCpuPtr = next;
    }

    __AXRmtDspPtr = next;
#if (1)
    // tell the command list module it's time to make another frame for the next DMA cycle
    // OSReport("of = %u\n", __AXOutFrame);
    __AXNextFrame(&__AXOutTVBuffer[AX_TV_ID0][__AXOutFrame][AX_CH_LEFT][0],
                  &rbuffer, root_dsp, root_ppc,
                  &__AXOutDRCBuffer[AX_DRC_ID0][__AXOutFrame][AX_CH_LEFT][0],
                  &__AXOutDRCBuffer[AX_DRC_ID1][__AXOutFrame][AX_CH_LEFT][0]);
trace(AI_BUILD_CL, __AXOutFrame)
#endif

    // if DSP is not running we invoke PPC Post processing here
    if (!(__AXMixingMode&AXMIX_DSP_ENABLE))
    {
        AXISTMessage msg;

        msg.data = NULL;
        msg.type = AXIST_POST;
        msg.time = OSGetTime();

        if (!OSSendMessage(queue, (OSMessage*)&msg, OS_MESSAGE_NOBLOCK))
        {
            //OSReport("%s: OSSendMessage AXIST_POST Failed\n", __FUNCTION__);
        }

        __AXOutDspReady = AX_DSP_FRAME_COMPLETE;
    }


    __AXLateMonitorAX(&__AXLocalProfile);

    return lostDspCycles;
}

/*---------------------------------------------------------------------------*
 Perform the PPC Post portions of __AXOutNewFrame() / __AXDSPResumeCallback
 but in the IST Thread context.
 *---------------------------------------------------------------------------*/
static u32 __AXIstPost(OSMessageQueue *queue)
{
    AXPROFILE   *profile;
    u32          bPushedAI = FALSE;


    if (__AXMixerCallback)
    {
        __AXLocalProfile.axPPCPostStart = OSGetTime();
        (*__AXMixerCallback)((u16*)__last_cl, TRUE);
        __AXLocalProfile.axPPCPostEnd = OSGetTime();
    }


    // push the buffer along
    __AXOutFrame++;
    __AXOutFrame &= 1;

/*
    This is the place we have all the final mixes done for the devices.
    Time to call the user functions to do the modifications to final mixes
    Callbacks processed;
    IMPORTATNT: this callbacks have to be after the AXOutFrame is updated in the line
*/

    /* --------------------------------------------
     * The order implemented as of now is:
     *      - Upsample to 48kHz,
     *      - Final Mix Callbacks
     *      - Ramp
     *      - Compressor
     *      - Upsample/ Down sample
     *      - format for the device mode
     *---------------------------------------------*/
    // call the final mix callback stage function
    // Do final Mix for both TV & DRC
    __AXLocalProfile.userFinalMixStart = OSGetTime();
    __AXHandleFinalMixCallbackStage();
    __AXLocalProfile.userFinalMixEnd = OSGetTime();

   // the output now is in s32 Q0; at 48kHz

    // take care of ramping the outputs for fg/bg transitions
    __AXHandleRamp();

    // Apply compressor on all the outputs
    __AXHandleCompressor();

    // Remix the outputs as per device mode
    __AXHandleDeviceModeReMix();

    // Format/pack and send to devices
    __AXSendTvFrame(__AXOutFrame);
    bPushedAI = __AXSendDRCFrame(__AXOutFrame);


    __AXLocalProfile.axFrameEnd     = OSGetTime();
    __AXLocalProfile.axNumVoices     = __AXGetNumVoices();
    __AXLocalProfile.axNumDspVoices  = __AXGetNumDspVoices();

    profile = __AXGetCurrentProfile();
    __AXLastProfile = profile;

    if (profile)
    {
        u8 *src, *dest;
        u32 i;

        i = sizeof(AXPROFILE);

        dest   = (u8*)profile;
        src    = (u8*)&__AXLocalProfile;

        while (i)
        {
            *dest = *src;
            dest++;
            src++;
            i--;
        }
    }
    __AXLateMonitorAX(&__AXLocalProfile);

    return bPushedAI;
}

#if 0
/*---------------------------------------------------------------------------*
 AX IO Service Thread
 This handles the AXUserFrameCallback only
 *---------------------------------------------------------------------------*/
static int __AXIoThreadFunc(int argc, void* argv)
{
    OSMessageQueue *queue = (OSMessageQueue *)argv;
    OSMessage       msg;
    AXISTMessage   *res;
    u32             done = 0;

#ifdef _DEBUG
    OSReport("AX IO Thread running on Core %d\n", OSGetCoreId());
#endif

    while (!done)
    {
        if (FALSE == OSReceiveMessage(queue, &msg, OS_MESSAGE_BLOCK))
        {
            OSHalt("Error: __AXIoThreadFunc failed to receive message.\n");
        }
        res = (AXISTMessage*) &msg;
        switch (res->type)
        {
        case AXIO_RUN:
            if (__AXLastProfile)
                __AXLastProfile->userCallbackStart = OSGetTime();
            if (__AXUserFrameCallback)
                (*__AXUserFrameCallback)();
            if (__AXLastProfile)
                __AXLastProfile->userCallbackEnd = OSGetTime();
            break;

        case AXIO_QUIT:
            done = 1;
            break;
        }
    }
    return 0;
}
#endif

/*---------------------------------------------------------------------------*
 AX Interrupt Service Thread
 All lengthy service for AI and DSP interrupts is handled in this Thread.
 It is a Driver Thread running at highest priority in the core.
 *---------------------------------------------------------------------------*/
static int __AXIstThreadFunc(int argc, void* argv)
{
    OSMessageQueue *queue = (OSMessageQueue *)argv;
    OSMessage       msg;
    AXISTMessage   *res;
    u32             done = 0;
    u32             aiEarly = 0;
    u32             lostDspCycles;
    u32             postPushedAI = 0;

#ifdef _DEBUG
    OSReport("AX IST Thread running on Core %d with Priority %d\n", OSGetCoreId(), OSGetThreadPriority(OSGetCurrentThread()));
#endif

    while (!done)
    {
        if (FALSE == OSReceiveMessage(queue, &msg, OS_MESSAGE_BLOCK))
        {
            OSHalt("Error: __AXIstThreadFunc failed to receive message.\n");
        }

        res = (AXISTMessage*) &msg;

        __AXServicingFrameFlag = res->type; // TRUE;

        switch (res->type)
        {
        case AXIST_RUN:
            __AXLocalProfile.istLatency = OSGetTime() - res->time;
            trace(AX_IST_RUN, 0);
            lostDspCycles = __AXIstRun(queue);
            break;

        case AXIST_POST:
            __AXLocalProfile.istPostLatency = OSGetTime() - res->time;
            trace(AX_IST_POST, 0);
            postPushedAI = __AXIstPost(queue);
            // for debug
            if (aiEarly)
            {
                aiEarly = 0;
                if (__AXExceedCallback)
                {
                    (*__AXExceedCallback)(lostDspCycles);
                }
            }
            break;

        case AXIST_AI2_EMPTY:
            if (!postPushedAI) // (__DRC2AI2_empty)
            {
               // AI2 still needs us to push to AI2 DMA
                __AXAI2_push_frame();
            }
            break;

        case AXIST_AI_EARLY:
            aiEarly = 1;
            // for debug
            if (__AXExceedCallback)
            {
                (*__AXExceedCallback)(0);
            }
            if (PPC_ONLY)
            {
                __AXOutDspReady = AX_DSP_FRAME_COMPLETE;
            }
            // may want to note ticks for servicing latency at some point

            break;

        case AXIST_QUIT:
            done = 1;
            break;

        default:
            OSReport("%s: unsupported function type (%d)\n", __FUNCTION__, res->type);
            OSHalt("Error: unsupported function type.\n");
            break;
        }
        __AXServicingFrameFlag = FALSE;

//        OSRestoreInterrupts(old);
        trace(AX_IST_OUT, __fp__OSIsInterruptEnabled());
    }

    return 0;
}
#endif
/*---------------------------------------------------------------------------*
    This function sends mail to the DSP for the next frame and causes another
    commandlist to be generated by the runtime code. This is normally called
    by the AI callback, however if the DSP has not finished in time it will
    be called by the DSP callback.

    Not called when built in IST mode.
 *---------------------------------------------------------------------------*/
u32 __AXOutNewFrame(void)
{
    u32         cl;
    AXPROFILE   *profile;
    AXRmtBuffs  rbuffer;
    s32         next;
    u32         lostDspCycles = 0;
    AXPB       *root_dsp = NULL;    // root to run on the DSP
    AXPB       *root_ppc = NULL;    // root to run on the PPC

    memset(&__AXLocalProfile, 0, sizeof(AXPROFILE));
    __AXLocalProfile.axFrameStart = OSGetTime();

    // Sync PBs with the ones DSP processed to keep context from last frame

//    lostDspCycles = (AX_OUT_SAMPLES_PER_FRAME - (AIGetDMABytesLeft() >> 2)) * AX_CYCLES_PER_SAMPLE;

#if 0
    if (__AXOutputBufferMode == AX_OUTPUT_BUFFER_TRIPLE)
    {
        __AXSyncPBs(0, &root_dsp, &root_ppc);
    }
    else
    {
        __AXSyncPBs(AX_OUT_CONSUMED_OFFSET * AX_CYCLES_PER_USEC, &root_dsp, &root_ppc);
    }
#endif

    // print the studio parameter block
    __AXPrintStudio();

    cl = __AXGetCommandListAddress(root_dsp, root_ppc);
    __last_cl = (u16*)cl;

#if 0
    if (__AXMixingMode&AXMIX_DSP_ENABLE)
    {
        u32 cl_pe = (u32)OSEffectiveToPhysical((u32*)cl);

#if PLATFORM==CAFE
        {
            // set up the Segment Offset register for the DSP to get the cl
            u16 mmem_seg_offset = (u16)((cl_pe >> 29)&0x7);
            OSWriteRegister16(mmem_seg_offset, OS_DSP, DSP_MEM_4GB_ADDR_OFFSET_IDX);
            //        __dsp_p_DSPRegs[DSP_MEM_4GB_ADDR_OFFSET_IDX] = mmem_seg_offset;
        }
#endif

        // give the DSP a new command list
        DSPSendMailToDSP(0xBABE0000 | 128);

        while (DSPCheckMailToDSP()) ;

        DSPSendMailToDSP(cl_pe);

        while (DSPCheckMailToDSP()) ;
        //        __AXLocalProfile.axDSPStart = OSGetTime();
        __axDSPStart = OSGetTime();
        trace(AI_ISSUE_CL, __AXOutFrame^1)
    }
#endif

    if ((__AXMixingMode&AXMIX_PPC_ENABLE) && __AXMixerCallback)
    {
        __AXLocalProfile.axPPCStart = OSGetTime();
        (*__AXMixerCallback)((u16*)cl, FALSE);
        __AXLocalProfile.axPPCEnd = OSGetTime();
        if (!(__AXMixingMode&AXMIX_DSP_ENABLE))
        {
            __AXLocalProfile.axPPCPostStart = OSGetTime();
            (*__AXMixerCallback)((u16*)cl, TRUE);
            __AXLocalProfile.axPPCPostEnd = OSGetTime();
            __AXOutDspReady = AX_DSP_FRAME_COMPLETE;
        }
        else
        {
            __AXLocalProfile.axPPCPostStart = 0LL;
            __AXLocalProfile.axPPCPostEnd = 0LL;
        }
    }

    // in a stopped single step mixer debug state
    // but have to keep AI handler running
    // so ..
    if (!__AXMixingMode)
    {
        __AXOutDspReady = AX_DSP_FRAME_COMPLETE;
    }

    // first lets do callbacks for voices that might have been dropped due to
    // dsp cycles
    __AXServiceCallbackStack();

    // process the AUX buses
    __AXLocalProfile.userAuxProcStart = OSGetTime();
    __AXProcessAux();
    __AXLocalProfile.userAuxProcEnd = OSGetTime();

    // Perform the call back to let the user know another frame has gone by,
    // this should cause the user to give us some more audio events. These
    // will be in the form of allocating and setting voice PB parameters, and
    // the voices to be mixed will get put into an active PB stack my the
    // AXVPB code module.
    __AXLocalProfile.userCallbackStart = OSGetTime();
    if (__AXUserFrameCallback)
        (*__AXUserFrameCallback)();
    // also go through the new APP frame callbacks
    for(u32 i = 0; i < __AXActiveAppFrameCbs; i++)
    {
        (*__AXAppFrameCallbacks[i])();
    }
    __AXLocalProfile.userCallbackEnd   = OSGetTime();

    rbuffer.rmt0 = &__AXRmtOutBuffer[0][__AXRmtDspPtr];
    rbuffer.rmt1 = &__AXRmtOutBuffer[1][__AXRmtDspPtr];
    rbuffer.rmt2 = &__AXRmtOutBuffer[2][__AXRmtDspPtr];
    rbuffer.rmt3 = &__AXRmtOutBuffer[3][__AXRmtDspPtr];

    next = __AXRmtDspPtr + AX_RMT_SAMPLES_PER_FRAME;
    if (next >= __AXRmtBuffLen)
    {
        next = 0;
    }

    if ((__AXRmtCpuPtr >= __AXRmtDspPtr) && (__AXRmtCpuPtr < __AXRmtDspPtr + AX_RMT_SAMPLES_PER_FRAME))
    {
        __AXRmtCpuPtr = next;
    }

    __AXRmtDspPtr = next;

    // tell the command list module it's time to make another frame for the next DMA cycle
    __AXNextFrame(&__AXOutTVBuffer[AX_TV_ID0][__AXOutFrame][AX_CH_LEFT][0],
                  &rbuffer, root_dsp, root_ppc,
                  &__AXOutDRCBuffer[AX_DRC_ID0][__AXOutFrame][AX_CH_LEFT][0],
                  &__AXOutDRCBuffer[AX_DRC_ID1][__AXOutFrame][AX_CH_LEFT][0]);
//trace(AI_BUILD_CL, __AXOutFrame)

    // push the buffer along
    __AXOutFrame++;

    if (__AXOutputBufferMode == AX_OUTPUT_BUFFER_TRIPLE)
    {
        __AXOutFrame %= 3;
    }
    else
    {
        __AXOutFrame &= 1;
        // tell AI to DMA the frame the DSP just completed
        // LPCM 4.0 - moved into __AXSendTvFrame
        // AIInitDMA((u32)&__AXOutBuffer[__AXOutFrame], AX_OUT_SAMPLES_PER_FRAME * 4);
#ifdef DSP_DOES_MULTI_SRC // should not uprate until we KNOW DSP has completed
        if (!(__AXMixingMode&AXMIX_DSP_ENABLE) || AXGetMode() < AX_MODE_DPL2)
#endif
        {
            __AXSendDRCFrame(__AXOutFrame);
            __AXSendTvFrame(__AXOutFrame);
        }
//        trace(AI_INIT_DMA, __AXOutFrame)
    }

    __AXLocalProfile.axNumVoices     = AXGetNumVoices();
    __AXLocalProfile.axNumDspVoices  = AXGetNumDspVoices();

    profile = __AXGetCurrentProfile();
    __AXLastProfile = profile;

    if (profile)
    {
        u8 *src, *dest;
        u32 i;

        i = sizeof(AXPROFILE);

        dest   = (u8*)profile;
        src    = (u8*)&__AXLocalProfile;

        while (i)
        {
            *dest = *src;
            dest++;
            src++;
            i--;
        }
    }

//    __AXLateMonitorAX(&__AXLocalProfile);

    return lostDspCycles;
}

/*---------------------------------------------------------------------------*
    Uprate output to 48K and 6 channels if in DPL2 mode, else stereo at 32K
 *---------------------------------------------------------------------------*/
#ifndef max
#define max(a,b) (((a)>(b)) ? (a) : (b))
#define min(a,b) (((a)>(b)) ? (b) : (a))
#endif
#define clamp_sample(sample) max(-32768, min(32767, (sample)))

static void __AXSendTvFrame(u32 which_frame)
{
    s32 count;

    s32 sample_L;
    s32 sample_R;
    s32 sample_RL;
    s32 sample_RR;
    s32 sample_FC;
    s32 sample_LFE;

    s32 *linL;
    s32 *linR;
    s32 *linSL;
    s32 *linSR;
    s32 *linFC;
    s32 *linLFE;

    s16 *output;




    if  (__AXDeviceMode[AX_DEVICE_TV] == AX_MODE_6CHAN)
    {
        // uprate all six channels to 48K
        // on their way into the final out interleaved frame buffer

        linL   = &__AXTVFinalMixBuffer[AX_TV_ID0][AX_CH_LEFT][0];
        linR   = &__AXTVFinalMixBuffer[AX_TV_ID0][AX_CH_RIGHT][0];
        linSL  = &__AXTVFinalMixBuffer[AX_TV_ID0][AX_CH_SUR_LEFT][0];
        linSR  = &__AXTVFinalMixBuffer[AX_TV_ID0][AX_CH_SUR_RIGHT][0];
        linFC  = &__AXTVFinalMixBuffer[AX_TV_ID0][AX_CH_FC][0];
        linLFE = &__AXTVFinalMixBuffer[AX_TV_ID0][AX_CH_LFE][0];

        // following cache invalidation not necessary since everything is in PPC
#if 0
        if (DSP_ONLY) // PPC will have flushed for us if enabled
        {
            DCInvalidateRange(linL,   AX_OUT_48K_SAMPLES_PER_FRAME *sizeof(s32));
            DCInvalidateRange(linR,   AX_OUT_48K_SAMPLES_PER_FRAME *sizeof(s32));
            DCInvalidateRange(linSL,  AX_OUT_48K_SAMPLES_PER_FRAME *sizeof(s32));
            DCInvalidateRange(linSR,  AX_OUT_48K_SAMPLES_PER_FRAME *sizeof(s32));
            DCInvalidateRange(linFC,  AX_OUT_48K_SAMPLES_PER_FRAME *sizeof(s32));
            DCInvalidateRange(linLFE, AX_OUT_48K_SAMPLES_PER_FRAME *sizeof(s32));
        }
#endif //

        output = &__AXOut6chBuffer[which_frame][0];

        // take the samples and pack them to send out to TV
        for (count = 0; count < (AX_OUT_48K_SAMPLES_PER_FRAME ); count++)
        {
            // front R
            sample_R   = clamp_sample(linR[0] );
            *output++  = (s16) sample_R;

            // Front Left
            sample_L   = clamp_sample(linL[0] );
            *output++  = (s16) sample_L;

            // FC
            sample_FC   = clamp_sample(linFC[0] );
            *output++  = (s16) sample_FC;
            // LFE
            sample_LFE   = clamp_sample(linLFE[0] );
            *output++  = (s16) sample_LFE;

            // Rear Right
            sample_RR   = clamp_sample(linSR[0] );
            *output++  = (s16) sample_RR;

            // Rear Left
            sample_RL   = clamp_sample(linSL[0] );
            *output++  = (s16) sample_RL;


            // and move pointers up to next pairs.
            linL += 1;
            linR += 1;
            linSL += 1;
            linSR += 1;
            linFC += 1;
            linLFE += 1;
        }


        // AIInitDMA will flush the cache.
        AIInitDMA((u32)&__AXOut6chBuffer[which_frame], AX_OUT_48K_SAMPLES_PER_FRAME * 6 * sizeof(s16));
    }
    else
    if (__AXDeviceMode[AX_DEVICE_TV] == AX_MODE_STEREO) // DPL2 aka 4CHAN TBD
    {
        // Interleave L/R to s16 stereo buffer
        linL = &__AXTVFinalMixBuffer[AX_TV_ID0][AX_CH_LEFT][0];
        linR = &__AXTVFinalMixBuffer[AX_TV_ID0][AX_CH_RIGHT][0];
        output = &__AXOutBuffer[which_frame][0];
#if 0
        if (DSP_ONLY)
        {
            DCInvalidateRange(linL, AX_OUT_SAMPLES_PER_FRAME*sizeof(s32));
            DCInvalidateRange(linR, AX_OUT_SAMPLES_PER_FRAME*sizeof(s32));
        }
#endif
        for (count = 0; count < AX_OUT_48K_SAMPLES_PER_FRAME ; count++)
        {
            *output++ = (s16)clamp_sample((*linR)); linR++;
            *output++ = (s16)clamp_sample((*linL)); linL++;
        }
        // AIInitDMA will flush the cache.
        AIInitDMA((u32)&__AXOutBuffer[which_frame], AX_OUT_48K_SAMPLES_PER_FRAME * 2 * sizeof(s16));
    }

}
/*---------------------------------------------------------------------------*
    test function for in ISR vs at Thread level
 *---------------------------------------------------------------------------*/
BOOL __AXServicingFrame(void)
{
    return __AXServicingFrameFlag;
}
/*---------------------------------------------------------------------------*
    This is the callback for the AI interrupt, handled in the ISR,
    this version invokes the Interrupt Service Thread for servicing.
    First we check to make sure the DSP has completed the last frame.
    If it has we will increment the output buffer then make the next commandlist.
 *---------------------------------------------------------------------------*/
#if 0
void __AXIstAiCallback(void)
{
    AXISTMessage msg;

    if (__AX_OldAIDMACallback) (*__AX_OldAIDMACallback)();

//trace(AI_INT, __AXOutFrame)
    if (__AXDSPDoneFlag == TRUE)
    {
//trace(AI_END, __AXOutFrame)
        return;
    }

    // if the DSP has completed the last frame we can go ahead and make a new
    // frame here or else mark the __AXOutDspReady flag to tell the DSP
    // callback to make the next frame
    if (__AXOutDspReady == AX_DSP_FRAME_COMPLETE)
    {
        __AXOutDspReady = AX_DSP_FRAME_NOT_COMPLETE;

        msg.data = NULL;
        msg.type = AXIST_RUN;
        msg.time = OSGetTime();

        // Note: __OSSendMessageInternal must be called with disabled interrupts.
        //       Because __OSSendMessageInternal don't disable interrupts.
        //       This is for the optimization. OSDisableInterrupts and OSRestoreInterrupts
        //       is system call. Those functions take the cycle to execute. But it
        //       is unnecessary to execute those functions in ISR. Because interrupts are disabled
        //       in ISR.
        if (!__OSSendMessageInternal(&__AXIstMessageQueue, (OSMessage*)&msg, OS_MESSAGE_NOBLOCK))
        {
            //OSReport("%s: OSSendMessage AXIST_RUN Failed\n", __FUNCTION__);
        }
    }
    else
    {
        // The DSP is late! So when the DSP actually finishes, the "Resume"
        // callback should drive generation of the next audio frame.
        __AXOutDspReady = AX_DSP_FRAME_MAKEFRAME;

        msg.data = NULL;
        msg.type = AXIST_AI_EARLY;
        msg.time = OSGetTime();

        // Note: __OSSendMessageInternal must be called with disabled interrupts.
        //       Because __OSSendMessageInternal don't disable interrupts.
        //       This is for the optimization. OSDisableInterrupts and OSRestoreInterrupts
        //       is system call. Those functions take the cycle to execute. But it
        //       is unnecessary to execute those functions in ISR. Because interrupts are disabled
        //       in ISR.
        if (!__OSSendMessageInternal(&__AXIstMessageQueue, (OSMessage*)&msg, OS_MESSAGE_NOBLOCK))
        {
            //OSReport("%s: OSSendMessage AXIST_AI_EARLY Failed\n", __FUNCTION__);
        }

        // for debug
        if (__AXExceedCallback)
        {
            (*__AXExceedCallback)(0);
        }

        // Since the audio task is running at highest priority, we can be
        // 'rude' and assert the task! This will:
        //
        // - Send an interrupt to the current DSP task, telling it to yield
        // - Mark the audio task as the next task to run, no matter what
        //
        // If the DSP audio task is already running but is late because it
        // had too much to do, DSPAssertTask() will ensure that the audio
        // task keeps control of the DSP and does NOT yield.
        DSPAssertTask(&__AXDSPTask);
    }

    if (__AXOutputBufferMode == AX_OUTPUT_BUFFER_TRIPLE)
    {
        u32 nextFrame;

        // tell AI to DMA the next frame
        AIInitDMA((u32)(&__AXOutBuffer[__AXAiDmaFrame]), AX_OUT_SAMPLES_PER_FRAME * 4);

        nextFrame  = __AXAiDmaFrame + 1;
        nextFrame %= 3;
        if (nextFrame != __AXOutFrame)
        {
            __AXAiDmaFrame = nextFrame;
        }
    }
trace(AI_END, 0)
}

/*---------------------------------------------------------------------------*
    This is the callback for the AI interrupt, handled in the ISR
    First we check to make sure the DSP has completed the last frame.
    If it has we will increment the output buffer then make the next commandlist.

    Not called when built in IST mode.
 *---------------------------------------------------------------------------*/
void __AXOutAiCallback(void)
{
    u32 nextFrame;

    if (__AX_OldAIDMACallback) (*__AX_OldAIDMACallback)();

trace(AI_INT, __AXOutFrame)
    if (__AXDSPDoneFlag == TRUE)
    {
trace(AI_END, __AXOutFrame)
        return;
    }

    __AXServicingFrameFlag = TRUE;

    // if the DSP has completed the last frame we can go ahead and make a new
    // frame here or else mark the __AXOutDspReady flag to tell the DSP
    // callback to make the next frame
    if (__AXOutDspReady == AX_DSP_FRAME_COMPLETE)
    {
        __AXOutDspReady = AX_DSP_FRAME_NOT_COMPLETE;
        __AXOutNewFrame();
    }
    else
    {
        // The DSP is late! So when the DSP actually finishes, the "Resume"
        // callback should drive generation of the next audio frame.
        __AXOutDspReady = AX_DSP_FRAME_MAKEFRAME;
        // for debug
        if (__AXExceedCallback)
        {
            (*__AXExceedCallback)(0);
        }

        // Since the audio task is running at highest priority, we can be
        // 'rude' and assert the task! This will:
        //
        // - Send an interrupt to the current DSP task, telling it to yield
        // - Mark the audio task as the next task to run, no matter what
        //
        // If the DSP audio task is already running but is late because it
        // had too much to do, DSPAssertTask() will ensure that the audio
        // task keeps control of the DSP and does NOT yield.
        DSPAssertTask(&__AXDSPTask);
    }

    if (__AXOutputBufferMode == AX_OUTPUT_BUFFER_TRIPLE)
    {
        // tell AI to DMA the next frame
        AIInitDMA((u32)(&__AXOutBuffer[__AXAiDmaFrame]), AX_OUT_SAMPLES_PER_FRAME * 4);

        nextFrame  = __AXAiDmaFrame + 1;
        nextFrame %= 3;
        if (nextFrame != __AXOutFrame)
        {
            __AXAiDmaFrame = nextFrame;
        }
    }
    __AXServicingFrameFlag = FALSE;
trace(AI_END, 0)
}

/*---------------------------------------------------------------------------*
    This is the AI2 DRC Audio DMA handler.
    See which block AI is sending to figure out whic DRC block
    we should send next.

    If DSP is really late the FIFO will be empty
    so flag the DSP to call this function when it is ready.

    When the DSP calls here the FIFO is guaranteed not to be empty
    'cause it just put one in.
 *---------------------------------------------------------------------------*/
void __AXAI2_push_frame(void)
{
    u32  next_frame;
    // read the __DRC2AI2_FIFO for the next safe buffer index
    if (__DRC2AI2_in != __DRC2AI2_out)
    {
        OSSwapAtomic(&__DRC2AI2_empty, FALSE);
        next_frame = __DRC2AI2_FIFO[__DRC2AI2_out];
trace(AI2_POP, __DRC2AI2_out)
        __DRC2AI2_out = (__DRC2AI2_out + 1) & 3;

        // copy current DRC buffer into next AI2 buffer
        memcpy(&__AXOutAI2Buffer[__AXOutAI2Frame], &__AXOutDRCAI2Buffer[next_frame], AX_OUT_48K_SAMPLES_PER_FRAME * 4);
        DCFlushRange((&__AXOutAI2Buffer[__AXOutAI2Frame]), AX_OUT_48K_SAMPLES_PER_FRAME * 4);
trace(AI2_COPY, __AXOutAI2Frame)

        // set it up to go out next to DRC
        AI2InitDMA((u32)(&__AXOutAI2Buffer[__AXOutAI2Frame]), AX_OUT_48K_SAMPLES_PER_FRAME * 4);
trace(AI2_INIT_DMA, __AXOutAI2Frame)
        __AXOutAI2Frame ^= 1;
    }
    else
    {
        // DSP has to catch up, let it do this
        OSSwapAtomic(&__DRC2AI2_empty, TRUE);
        // __DRC2AI2_empty = TRUE;
trace(AI2_EMPTY,__DRC2AI2_out)
    }
}
/*---------------------------------------------------------------------------*
    This is the AI2 DRC Audio callback.
    See which block AI is sending to figure out whic DRC block
    we should send next.
 *---------------------------------------------------------------------------*/
void __AXOutAi2Callback(void)
{
    if (__AX_OldAI2DMACallback) (*__AX_OldAI2DMACallback)();

trace(AI2_INT,__AXOutFrame)
    __AXAI2_push_frame();
trace(AI2_END, 0)
}
/*---------------------------------------------------------------------------*
    This callback is invoked when the DSP audio task has initialized for the
    first time.
 *---------------------------------------------------------------------------*/
static void __AXDSPInitCallback(void *task __attribute__((unused)))
{
// #pragma unused(task)

    __AXDSPInitFlag = TRUE;

} // end __AXDSPInitCallback()

#endif


/*---------------------------------------------------------------------------*
    Set debug stepping mode
 *---------------------------------------------------------------------------*/
void AXSetStepMode(u32 i)
{
    __AXDebugSteppingMode = i;
}

/*---------------------------------------------------------------------------*
    Set and get default mixer (used when acquiring voices)
 *---------------------------------------------------------------------------*/
u32 AXSetDefaultRenderer(u32 mixerSelection)
{
    u32 old = __AXDefaultMixer;

    if (AXMIX_PPC_ENABLE == __AXMixingMode)
    {
        __AXDefaultMixer = AX_PB_MIXER_SELECT_PPC;
    }
    else
    if (AXMIX_DSP_ENABLE == __AXMixingMode)
    {
        __AXDefaultMixer = AX_PB_MIXER_SELECT_DSP;
    }
    else
    {
        __AXDefaultMixer = mixerSelection;
    }

    return old;
}
/*---------------------------------------------------------------------------*/
u32     AXGetDefaultRenderer(void)
{
    return __AXDefaultMixer;
}

/*---------------------------------------------------------------------------*
    This is the callback for the DSP to CPU interrupt in IST mode, this is simply to
    mark that the DSP has completed the last frame. Hopefully we get this
    before the next AI interrupt.
 *---------------------------------------------------------------------------*/
#if 0
static void __AXIstDSPResumeCallback(void *task __attribute__((unused)))
{
    AXISTMessage msg;

#ifdef _DEBUG
    // voices
    if (__AXDebugSteppingMode)
    {
        __AXOutDspReady = AX_DSP_FRAME_COMPLETE;
        return;
    }
#endif

trace(DSP_INT, __AXOutFrame)

    if (__AXLastProfile)
    {
        __AXLastProfile->axDSPStart = __axDSPStart;
        __AXLastProfile->axDSPEnd   = OSGetTime();
    }

    msg.data = NULL;
    msg.type = AXIST_POST;
    msg.time = OSGetTime();

    // Note: __OSSendMessageInternal must be called with disabled interrupts.
    //       Because __OSSendMessageInternal don't disable interrupts.
    //       This is for the optimization. OSDisableInterrupts and OSRestoreInterrupts
    //       is system call. Those functions take the cycle to execute. But it
    //       is unnecessary to execute those functions in ISR. Because interrupts are disabled
    //       in ISR.
    if (!__OSSendMessageInternal(&__AXIstMessageQueue, (OSMessage*)&msg, OS_MESSAGE_NOBLOCK))
    {
        //OSReport("%s: OSSendMessage AXIST_POST Failed\n", __FUNCTION__);
    }


    // see if we should make the next frame in here or simply mark that the
    // dsp has completed
    if (__AXOutDspReady == AX_DSP_FRAME_MAKEFRAME)
    {
        __AXOutDspReady = AX_DSP_FRAME_NOT_COMPLETE;

        msg.data = NULL;
        msg.type = AXIST_RUN;
        msg.time = OSGetTime();

        // Note: __OSSendMessageInternal must be called with disabled interrupts.
        //       Because __OSSendMessageInternal don't disable interrupts.
        //       This is for the optimization. OSDisableInterrupts and OSRestoreInterrupts
        //       is system call. Those functions take the cycle to execute. But it
        //       is unnecessary to execute those functions in ISR. Because interrupts are disabled
        //       in ISR.

        if (!__OSSendMessageInternal(&__AXIstMessageQueue, (OSMessage*)&msg, OS_MESSAGE_NOBLOCK))
        {
            //OSReport("%s: OSSendMessage AXIST_RUN Failed\n", __FUNCTION__);
        }

        // for debug
        if (__AXExceedCallback)
        {
            (*__AXExceedCallback)(1);
        }
    }
    else
    {
        __AXOutDspReady = AX_DSP_FRAME_COMPLETE;
    }

    __AXServicingFrameFlag = FALSE;
trace(DSP_END, 0)
} // end __AXIstDSPResumeCallback()

/*---------------------------------------------------------------------------*
    This is the callback for the DSP to CPU interrupt, this is simply to
    mark that the DSP has completed the last frame. Hopefully we get this
    before the next AI interrupt.

    Not called when built in IST mode.
 *---------------------------------------------------------------------------*/
static void __AXDSPResumeCallback(void *task __attribute__((unused)))
{
#ifdef _DEBUG
    // voices
    if (__AXDebugSteppingMode)
    {
        __AXOutDspReady = AX_DSP_FRAME_COMPLETE;
        return;
    }
#endif

trace(DSP_INT, __AXOutFrame)
    if (__AXLastProfile)
    {
        __AXLastProfile->axDSPStart = __axDSPStart;
        __AXLastProfile->axDSPEnd   = OSGetTime();
    }
    __AXServicingFrameFlag = TRUE;

    // Perform PPC mixer post-process
    // this merges its outputs with the DSP outputs
    if ((__AXMixingMode&AXMIX_PPC_ENABLE) && __AXMixerCallback)
    {
        if (__AXLastProfile)
        {
            __AXLastProfile->axPPCPostStart = OSGetTime();
            __AXMixerCallback((u16*)__last_cl, TRUE);
            __AXLastProfile->axPPCPostEnd = OSGetTime();
        }
        else
        {
            __AXMixerCallback((u16*)__last_cl, TRUE);
        }
    }
#ifdef DSP_DOES_MULTI_SRC // should not uprate until we KNOW DSP has completed
    if (AXGetMode() >= AX_MODE_DPL2)
    {
        __AXSendTvFrame(__AXOutFrame);
    }
#endif
    // see if we should make the next frame in here or simply mark that the
    // dsp has completed
    if (__AXOutDspReady == AX_DSP_FRAME_MAKEFRAME)
    {
        u32 lostDspCycles;

        __AXOutDspReady = AX_DSP_FRAME_NOT_COMPLETE;

        lostDspCycles = __AXOutNewFrame();

        // for debug
        if (__AXExceedCallback)
        {
            (*__AXExceedCallback)(lostDspCycles);
        }
    }
    else
    {
        __AXOutDspReady = AX_DSP_FRAME_COMPLETE;
    }

    __AXServicingFrameFlag = FALSE;
trace(DSP_END, 0)
} // end __AXDSPResumeCallback()

/*---------------------------------------------------------------------------*
  This callback is invoked when the DSP audio task is completely DONE.
  Currently, we do not use this but someday maybe...
  Actually, used when AXOutQuit is called
 *---------------------------------------------------------------------------*/
static void __AXDSPDoneCallback(void *task __attribute__((unused)))
{
//#pragma unused(task)

    __AXDSPDoneFlag = TRUE;

    OSWakeupThread(&__AXOutThreadQueue);

} // end __AXDSPDoneCallback()

/*---------------------------------------------------------------------------*
  This callback is invoked when the DSP audio task has output final AI data.
  Ummm - never used, only us could now in cafe, so removed the interrupt from DSP code
 *---------------------------------------------------------------------------*/
static void __AXDSPRequestCallback(void *task __attribute__((unused)))
{
//#pragma unused(task)

} // end __AXDSPRequestCallback()

#endif
/*---------------------------------------------------------------------------*
    This function is called by the AXOutInit() to initialize the DSP, it
    downloads the DSP binary used for this task
 *---------------------------------------------------------------------------*/
u16 *slaveData = NULL;
u32 slaveLength = 0;

#if 0
void __AXInitOutBalanced(void)
{
    slaveData = axDspSlaveBalanced;
    slaveLength = axDspSlaveBalancedLength;
}

void __AXInitOutLegacy(void)
{
    slaveData = axDspSlave;
    slaveLength = axDspSlaveLength;
}

void __AXOutInitDSP(void)
{
    if (!slaveData)
    {
        __AXInitOutBalanced();
    }

    // initialize and download program code to DSP, this part will be put
    // into a lib later so we don't have to deal with this here... I told
    // you to wear your seatbelt!
    __AXDSPTask.iram_mmem_addr     = (u16*)OSEffectiveToPhysical(slaveData);
    __AXDSPTask.iram_length        = slaveLength;
    __AXDSPTask.iram_addr          = 0x00000000;
    __AXDSPTask.dram_mmem_addr     = (u16*)OSEffectiveToPhysical(__AXDramImage);
    __AXDSPTask.dram_length        = INIT_RAM_DATA_LENGTH;
    __AXDSPTask.dram_addr          = INIT_RAM_DATA_ADDR;
//    __AXDSPTask.dram_length        = AX_DSP_YIELD_DATA_LENGTH;
//    __AXDSPTask.dram_addr          = AX_DSP_YIELD_DATA_ADDR;

    __AXDSPTask.dsp_init_vector    = axDspInitVector;
    __AXDSPTask.dsp_resume_vector  = axDspResumeVector;

    __AXDSPTask.init_cb            = __AXDSPInitCallback;
#ifdef AX_IST_MODE
    __AXDSPTask.res_cb             = __AXIstDSPResumeCallback;
#else
    __AXDSPTask.res_cb             = __AXDSPResumeCallback;
#endif
    __AXDSPTask.done_cb            = __AXDSPDoneCallback;
    __AXDSPTask.req_cb             = __AXDSPRequestCallback;

    __AXDSPTask.priority           = 0;    // this is highest priority!


    // initialize synchronization flags
    __AXDSPInitFlag        = FALSE;
    __AXDSPDoneFlag        = FALSE;
    __AXServicingFrameFlag = FALSE;

    OSInitThreadQueue(&__AXOutThreadQueue);

    if (FALSE == DSPCheckInit())
    {
        DSPInit();              // later, move this outside of AX
    }

    DSPAddTask(&__AXDSPTask);

    // this must be called with interrupts enabled!
    while (FALSE == __AXDSPInitFlag)
    {
        // do nothing
        // yes, we will hang if the DSP never initializes...
    }

} // end __AXOutInitDSP()

#endif

/*---------------------------------------------------------------------------*
    This function is called by the AXInit() to initialize the output buffer.
 *---------------------------------------------------------------------------*/

void __AXOutOnForeground()
{
    AXRmtBuffs  rbuffer;

    ASSERT(!__AXOutIsInForeground);

#ifdef _DEBUG
    OSReport("Initializing AXOut code module build date %s %s\n", __DATE__, __TIME__);
#endif
#if 0
    if (OSSwapAtomic(&__AIInitialized, TRUE))
    {
        OSReport("AXOut Module Already Initialized\n");
        return;
    }
#endif

    ASSERT(((u32)&__AXOutBuffer[0][0] & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT(((u32)&__AXOutBuffer[1][0] & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT(((u32)&__AXOutBuffer[2][0] & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT(((u32)&__AXOutSBuffer[0] & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT(((u32)&__AXOut6chBuffer[0] & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT(((u32)&__AXOutDRCBuffer[0][0] & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT(((u32)&__AXOutDRCBuffer[1][0] & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT(((u32)&__AXOutDRCBuffer[2][0] & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT(((u32)&__AXOutAI2Buffer[0][0] & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT(((u32)&__AXOutAI2Buffer[1][0] & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT(((u32)&__AXOutAI2Buffer[2][0] & (PPC_IO_BUFFER_ALIGN-1)) == 0);

    ASSERT((sizeof(__AXOutBuffer[0]) & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT((sizeof(__AXOutBuffer[1]) & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT((sizeof(__AXOutBuffer[2]) & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT((sizeof(__AXOutSBuffer) & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT((sizeof(__AXOut6chBuffer[0]) & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT((sizeof(__AXOutDRCAI2Buffer[0]) & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT((sizeof(__AXOutDRCAI2Buffer[1]) & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT((sizeof(__AXOutDRCAI2Buffer[2]) & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT((sizeof(__AXOutAI2Buffer[0]) & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT((sizeof(__AXOutAI2Buffer[1]) & (PPC_IO_BUFFER_ALIGN-1)) == 0);
    ASSERT((sizeof(__AXOutAI2Buffer[2]) & (PPC_IO_BUFFER_ALIGN-1)) == 0);


    __ax_app_io_mode     = APP_IO_MODE;

    // initialize local params
    __DRC2AI2_in = 1;
    __DRC2AI2_out = 0;
    __DRC2AI2_FIFO[0] = 0;
    __DRC2AI2_FIFO[1] = 0;
    __DRC2AI2_FIFO[2] = 0;
    __DRC2AI2_FIFO[3] = 0;
//    OSSwapAtomic(&__DRC2AI2_empty, FALSE);
    // __DRC2AI2_empty = 0;
    __AXOutFrame = 0;
    __AXOutAI2Frame = 0;
    __AXAiDmaFrame = 0;
    __AXDebugSteppingMode = 0;

    // zero the output buffers ... actually this should already be 0 except
    // if someone quits and then reinitializes this code module, so lets 0
    // these bits just incase and don't forget to wear your seatbelt...
    // nag nag nag

    memset(&__AXOutBuffer, 0, sizeof(__AXOutBuffer));
//    DCFlushRange(&__AXOutBuffer, sizeof(__AXOutBuffer));

    memset(&__AXOut6chBuffer, 0, sizeof(__AXOut6chBuffer));
//    DCFlushRange(&__AXOut6chBuffer, sizeof(__AXOut6chBuffer));

    // zero the DRC buffer also
    memset(&__AXOutDRCBuffer, 0, sizeof(__AXOutDRCBuffer));
 //   DCFlushRange(&__AXOutDRCBuffer, sizeof(__AXOutDRCBuffer));

    // zero the AI2 Interleave buffer also
    memset(&__AXOutDRCAI2Buffer, 0, sizeof(__AXOutDRCAI2Buffer));
 //   DCFlushRange(&__AXOutDRCAI2Buffer, sizeof(__AXOutDRCAI2Buffer));

    // zero the AI2 buffer also
    memset(&__AXOutAI2Buffer, 0, sizeof(__AXOutAI2Buffer));
//    DCFlushRange(&__AXOutAI2Buffer, sizeof(__AXOutAI2Buffer));

    // zero the surround buffer also
    memset(&__AXOutSBuffer, 0, sizeof(__AXOutSBuffer));
//    DCFlushRange(&__AXOutSBuffer, sizeof(__AXOutSBuffer));

    // zero the remote buffers also
    memset(&__AXRmtOutBuffer, 0, sizeof(__AXRmtOutBuffer));
 //   DCFlushRange(&__AXRmtOutBuffer, sizeof(__AXRmtOutBuffer));



    // initialize the CB structs
    // for TV
    __AXDeviceFinalMixCBStruct[AX_DEVICE_TV].data = NULL;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_TV].numChnsIn = AX_MAX_NUM_TV_CHS;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_TV].numChnsOut = AX_MAX_NUM_TV_CHS;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_TV].numDevices = AX_MAX_NUM_TVS;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_TV].numSamples = AX_IN_SAMPLES_PER_FRAME;

    // for DRC
    __AXDeviceFinalMixCBStruct[AX_DEVICE_DRC].data = NULL;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_DRC].numChnsIn = AX_MAX_NUM_DRC_CHS;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_DRC].numChnsOut = AX_MAX_NUM_DRC_CHS;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_DRC].numDevices = AX_MAX_NUM_DRCS;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_DRC].numSamples = AX_IN_SAMPLES_PER_FRAME;

    // for RMT
    __AXDeviceFinalMixCBStruct[AX_DEVICE_RMT].data = NULL;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_RMT].numChnsIn = AX_MAX_NUM_RMT_CHS;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_RMT].numChnsOut = AX_MAX_NUM_RMT_CHS;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_RMT].numDevices = AX_MAX_NUM_RMTS;
    __AXDeviceFinalMixCBStruct[AX_DEVICE_RMT].numSamples = AX_IN_SAMPLES_PER_FRAME;
    // for debug
    __AXExceedCallback = NULL;

#if 0
    // start up the IST Thread
    OSInitMessageQueue(&__AXIstMessageQueue, __AXIstMessage, AX_IST_MAX_CALLS);
    if (!__OSCreateThreadType(&__AXIstThread,
                       __AXIstThreadFunc,
                       0, &__AXIstMessageQueue,
                       __AXIstThreadStack + sizeof __AXIstThreadStack,
                       sizeof __AXIstThreadStack,
                       OS_DRVR_PRIORITY_DEFAULT-2, // slightly higher than default for now
                       0,
                       OSTHREAD_TYPE_DRVR))
    {
        OSReport("OSCreateThreadType failed!\n");
    }
    else
    {
        OSSetThreadName(&__AXIstThread, "{SYS AX IST}");
        OSResumeThread(&__AXIstThread);
    }


    OSInitMessageQueue(&__AXIoMessageQueue, __AXIoMessage, AX_IO_MAX_CALLS);
    if (!OSCreateThreadType(&__AXIoThread,
                       __AXIoThreadFunc,
                       0, &__AXIoMessageQueue,
                       __AXIoThreadStack + sizeof __AXIoThreadStack,
                       sizeof __AXIoThreadStack,
                       OS_APP_IO_PRIORITY_DEFAULT, // just #15, may need to be 0
                       0,
                       OSTHREAD_TYPE_APP_IO))
    {
        OSReport("OSCreateThreadType failed!\n");
    }
    else
    {
        OSSetThreadName(&__AXIoThread, "{SYS AX IO}");
        OSResumeThread(&__AXIoThread);
    }

    // start up the DSP
    __AXOutInitDSP();


    // register callback for AI-FIFO DMA interupt
#ifdef AX_IST_MODE
    __AX_OldAIDMACallback = AIRegisterDMACallback(&__AXIstAiCallback);
#else
    __AX_OldAIDMACallback = AIRegisterDMACallback(&__AXOutAiCallback);
#endif
    __AX_OldAI2DMACallback = AI2RegisterDMACallback(&__AXOutAi2Callback);

    #endif

    rbuffer.rmt0 = &__AXRmtOutBuffer[0][0];
    rbuffer.rmt1 = &__AXRmtOutBuffer[1][0];
    rbuffer.rmt2 = &__AXRmtOutBuffer[2][0];
    rbuffer.rmt3 = &__AXRmtOutBuffer[3][0];

    __AXRmtCpuPtr  = AX_RMT_SAMPLES_PER_FRAME;
    __AXRmtDspPtr  = AX_RMT_SAMPLES_PER_FRAME;
    __AXRmtBuffLen = AX_RMT_SAMPLES_PER_FRAME * AX_RMT_MAX_BLOCKS;

#ifdef AX_SUPPORT_RMT_FINAL_MIX
    u32 chCnt;
    for(dev_id = 0; dev_id < AX_MAX_NUM_RMTS; dev_id++)
    {
        for(chCnt = 0; chCnt < AX_MAX_NUM_RMT_CHS ; chCnt++)
        {
            __AXRMTFinalMixPointers[__AXRMTFinalMixWritePos][dev_id*AX_MAX_NUM_RMT_CHS + chCnt] = &__AXRmtOutBuffer[dev_id][__AXRmtDspPtr];
        }
    }
    __AXRMTFinalMixWritePos = (__AXRMTFinalMixWritePos + 1) & (0x1);
#endif // AX_SUPPORT_RMT_FINAL_MIX

   // sending from init
    if (__AXOutputBufferMode == AX_OUTPUT_BUFFER_TRIPLE)
    {
        // generate a frame for the first AI interrupt this will put the output
        // buffer on __AXOutBuffer[2]
        __AXNextFrame(&__AXOutTVBuffer[AX_TV_ID0][2][AX_CH_LEFT][0],
                      &rbuffer, NULL, NULL, // no voices processed this time
                      &__AXOutDRCBuffer[AX_DRC_ID0][2],
                      &__AXOutDRCBuffer[AX_DRC_ID1][2]);
    }
    else
    {
        __AXNextFrame(&__AXOutTVBuffer[AX_TV_ID0][1][AX_CH_LEFT][0],
                      &rbuffer, NULL, NULL, // no voices processed this time
                      &__AXOutDRCBuffer[AX_DRC_ID0][1],
                      &__AXOutDRCBuffer[AX_DRC_ID1][1]);
    }

    // mark the DSP ready for the first frame as the __AXNextFrame() sets it
    // to not ready by default and there's no DSP interrupt for the first
    // frame
    __AXOutDspReady = AX_DSP_FRAME_COMPLETE;


#if 0
    // check the AVM mode and set the audio mode
    AVMTVAudioMode avmTVMode;
    if(AVMGetTVAudioMode(&avmTVMode) )
    {
        switch(avmTVMode)
        {
            case AVM_TV_AUDIO_STEREO:
            case AVM_TV_AUDIO_ANALOG_SURROUND:
                __AXSetMode(AX_MODE_STEREO);
                AISetFormatChangeState(AI_CHANNEL_STEREO);
                AISetChannel(AI_CHANNEL_STEREO, FALSE);
                 //AISetDSPSampleRate(AI_SAMPLERATE_32KHZ);
                AISetDSPSampleRate(AI_SAMPLERATE_48KHZ);

                __AXDeviceOutChannels[AX_DEVICE_TV] = AX_STEREO_CHANNELS;
                break;
            case AVM_TV_AUDIO_HDMI_SURROUND:

                // Please do not change the order.
                // Order in which AI functions are called matters
                __AXSetMode(AX_MODE_6CHAN);
                AISetDSPSampleRate(AI_SAMPLERATE_48KHZ);
                AISetFormatChangeState(AI_CHANNEL_MULTI_PCM);
                AISetChannel(AI_CHANNEL_MULTI_PCM, FALSE);

                __AXDeviceOutChannels[AX_DEVICE_TV] = AX_SURROUND_CHANNELS;
                break;
            case AVM_TV_AUDIO_HDMI_BITSTREAM:
                OSReport("Unsupported TV Audio mode");
                break;

        } // switch(avmTVMode)
    }
    else
    {
        OSReport("Couldnt read AVM TV Audio Mode\n");
    }

    AVMDRCAudioMode avmDRCMode;
    if(AVMGetDRCAudioMode(&avmDRCMode) )
    {
        switch(avmDRCMode.format)
        {
            case AVM_DRC_AUDIO_FORMAT_STEREO:
                AXSetDeviceMode(AX_DEVICE_DRC, AX_MODE_STEREO);
                __AXDeviceOutChannels[AX_DEVICE_DRC] = AX_STEREO_CHANNELS;
                break;
            case AVM_DRC_AUDIO_FORMAT_NONE:
                break;
            default:
                OSReport("Unsupported DRC Audio mode : %d \n", avmDRCMode.format);
                break;

        } // switch(avmDRCMode)
    }
    else
    {
        OSReport("Couldnt read AVM DRC Audio Mode\n");
    }
#endif
    // set DRC sampling rate to 48k always
//    AI2SetDSPSampleRate(AI_SAMPLERATE_48KHZ);

    if (__AXOutputBufferMode == AX_OUTPUT_BUFFER_TRIPLE)
    {
        // Ok, lets assume the DSP program is up and running
        // LPCM 4.0 - moved into __AXSendTvFrame
        // AIInitDMA((u32)(&__AXOutBuffer[__AXAiDmaFrame]), AX_OUT_SAMPLES_PER_FRAME * 4);
        __AXSendTvFrame(__AXAiDmaFrame);

        __AXAiDmaFrame++;
    }
    else
    {
        // LPCM 4.0 - moved into __AXSendTvFrame
        // AIInitDMA((u32)(&__AXOutBuffer[__AXOutFrame]), AX_OUT_SAMPLES_PER_FRAME * 4);
        __AXSendTvFrame(__AXOutFrame);
    }
    // DRC always 32K for now
//    AI2InitDMA((u32)(&__AXOutAI2Buffer[0]), AX_OUT_48K_SAMPLES_PER_FRAME * 4);

    // tell the AI to start
 //   AIStartDMA();
//    AI2StartDMA();

//    __AXLateMonitorAILauncher(&__AXOutBuffer[0][0], sizeof(__AXOutBuffer[0]));
//
    __AXOutIsInForeground = 1;
//    OSMemoryBarrier();

} // __AXOutOnForeground()

// part of the initialization and quit of AXOutI
void __AXOutClearCallbacks(void)
{
    u32 dev_id;
    // clear user callbacks

    __AXUserFrameCallback = NULL;

    // clear the app frame callbacks
    for(u32 id = 0 ; id < AX_MAX_APP_FRAME_CBS ; id++)
    {
        __AXAppFrameCallbacks[id] = NULL;
    }
    __AXActiveAppFrameCbs = 0;
#if defined(ANDROID) || TARGET_OS_IPHONE
    OSLockMutex(&__AXAppFrameCBMutex);
#else
    ::InitializeCriticalSection( &__AXAppFrameCBMutex );
#endif

    // clear final mix callbacks
    for(dev_id = 0; dev_id < AX_MAX_NUM_DEVICES; dev_id++)
    {
        __AXDeviceFinalMixCallback[dev_id] = NULL;
    }


}
/*=============================================================================
 * Function: __AXUpsamplerInit
 * Arguments: input     : pointer to the input buffer (Q -8 or Q0 format)
 *            output    : pointer to the output buffer (Q 0 format)
 *            numSamples: number of samples in the input buffer
 *            downshift : bool to specify whether input is in Q-8 or Q0
 * Outputs:   numOutSamples: number of output samples in the output buffer
=============================================================================*/
static void __AXUpsamplerInit(AXUpsampleStruct *upsampleStruct)
{
    memset(upsampleStruct->filtMem, 0, sizeof(upsampleStruct->filtMem));

    upsampleStruct->filterLength = sizeof(__AXUpsampleFiltCoeffs)/sizeof(f32);
    upsampleStruct->overRateSkip = AX_UPSAMPLE_OVERRATE ;
    upsampleStruct->filtMemLength = upsampleStruct->filterLength /upsampleStruct->overRateSkip ;
    upsampleStruct->startIdx = 0;
    upsampleStruct->nextSampleIdx = 0;
    upsampleStruct->inIdxCount=0;
    upsampleStruct->outIdxCount = 2; // 2 so that the first operation makes it 6->0 n hence we fetch a new sample
    upsampleStruct->outIdxInc = (32*upsampleStruct->overRateSkip)/48; // must be 4

    return;

} // end of void __AXUpsamplerInit()
/*---------------------------------------------------------------------------*
    This function is called by the AXInit() to start the output buffer.
 *---------------------------------------------------------------------------*/
 void __AXOutInit(u32 outputBufferMode)
 {
    // set the upsampling to preFCB

    for(u32 dCnt=0; dCnt < AX_MAX_NUM_DEVICES ; dCnt++)
    {
        __AXDevicePostFCBUpsample[dCnt] = 0;
    }

    // initialize the remix matrices
    for (u32 device = 0; device < AX_MAX_NUM_DEVICES; device++)
    {
        for (u32 mCnt = 0; mCnt < AX_MAX_REMIX_MATRICES; mCnt++)
        {
            __AXDeviceRemixMatrix[device][mCnt].numInChns = 0;
            __AXDeviceRemixMatrix[device][mCnt].numOutChns = 0;
            __AXDeviceRemixMatrix[device][mCnt].matrix = NULL;
        } // end of for loop
    }

    __AXDeviceFinalMixOutChannels[AX_DEVICE_TV] = AX_MAX_NUM_TV_CHS;
    __AXDeviceFinalMixOutChannels[AX_DEVICE_DRC] = AX_MAX_NUM_DRC_CHS;



    // zero out the upsampling histories
    memset(&__AXTVHistories, 0, sizeof(__AXTVHistories));
    memset(&__AXDRCHistories, 0, sizeof(__AXDRCHistories));

    for(u32 device=0;device<AX_MAX_NUM_TVS;device++)
    {
        for(u32 chCnt=0; chCnt < AX_MAX_NUM_TV_CHS;chCnt++)
        {
            __AXUpsamplerInit(&__AXTVUpsampleStruct[device][chCnt]);
        }

        //__AXTVLinearUpsampler[device] = FALSE;
        __AXTVLinearUpsampler[device] = TRUE;
    }

    for(u32 device=0;device<s_ProcessDrcCount;device++)
    {
        for(u32 chCnt=0; chCnt < AX_MAX_NUM_DRC_CHS;chCnt++)
        {
            __AXUpsamplerInit(&__AXDRCUpsampleStruct[device][chCnt]);
        }
        //__AXDRCLinearUpsampler[device] = FALSE;
        __AXDRCLinearUpsampler[device] = TRUE;
    }
    // zero out FinalMixBuffers
    memset(&__AXTVFinalMixBuffer, 0, sizeof(__AXTVFinalMixBuffer));
    memset(&__AXDRCFinalMixBuffer, 0, sizeof(__AXDRCFinalMixBuffer));

    // clear the callbacks
    __AXOutClearCallbacks();

    // set the outputbuffer mode
    __AXOutputBufferMode = outputBufferMode;

    // do what needs to be done at foreground; start DMA/DSP etc
    __AXOutIsInForeground = 0;
    __AXOutOnForeground();
    ASSERT(__AXOutIsInForeground);


#if defined(ANDROID) || TARGET_OS_IPHONE
    OSInitMutex(&__AXAppFrameCBMutex);
#else
    ::InitializeCriticalSection( &__AXAppFrameCBMutex );
#endif
 } // end of void __AXOutInit()

/*---------------------------------------------------------------------------*
    This function is called by the AXInit() to start the output buffer.
 *---------------------------------------------------------------------------*/
 void __AXOutQuit()
 {
    if (__AXOutIsInForeground)
    {
       // do what needs to be done at foreground; stop DMA/DSP etc
 //      __AXOutOnBackground();
    }

    // clear the callbacks
    __AXOutClearCallbacks();
 } // end of void __AXOutInit()

/*---------------------------------------------------------------------------*
    This function is called by the AXQuit() to stop the output buffer.
 *---------------------------------------------------------------------------*/
#if 0
void __AXOutOnBackground(void)
{
    int old;
    AXISTMessage msg;

    ASSERT(__AXOutIsInForeground);

#ifdef _DEBUG
    OSReport("Shutting down AXOut code module\n");
#endif

    msg.data = NULL;
    msg.time = OSGetTime();

    msg.type = AXIST_QUIT;
    OSJamMessage(&__AXIstMessageQueue, (OSMessage*)&msg, OS_MESSAGE_NOBLOCK);
    OSJoinThread(&__AXIstThread, NULL);
    msg.type = AXIO_QUIT;
    OSJamMessage(&__AXIoMessageQueue, (OSMessage*)&msg, OS_MESSAGE_NOBLOCK);
    OSJoinThread(&__AXIoThread, NULL);
    old = OSDisableInterrupts();

    // cancel DSP task (& wait)
    {
        DSPCancelTask(&__AXDSPTask);
        if (0) // (__AXOutDspReady != AX_DSP_FRAME_COMPLETE)
        {
            OSSleepThread(&__AXOutThreadQueue);
        }
        else
        {
            DSPHalt();
            DSPReset();
        }
    }

    // clear callback for AI-FIFO DMA interupt
    AIRegisterDMACallback(NULL);
    AI2RegisterDMACallback(NULL);

    // stop the AI DMA
    AIStopDMA();
    AI2StopDMA();

    // for debug
    __AXExceedCallback = NULL;

    OSSwapAtomic(&__AIInitialized, FALSE);

    OSRestoreInterrupts(old);
    DSPQuit();
    AIQuit();

    __AXOutIsInForeground = 0;
    OSMemoryBarrier();
}
#endif

/*---------------------------------------------------------------------------*
    exposed functions
 *---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*
    This function is called by the user to register for callback notification
    for each audio frame.
 *---------------------------------------------------------------------------*/
AXUserCallback AXRegisterCallback(AXUserCallback callback)
{
    OSReport("%s is deprecated; Use AXRegisterFrameCallback instead\n", __FUNCTION__);
    return AXRegisterFrameCallback(callback);
}

/*---------------------------------------------------------------------------*
    This function is called by the user to register for callback notification
    for each audio frame.
 *---------------------------------------------------------------------------*/
AXUserCallback AXRegisterDRCCallback(AXUserCallback callback)
{
    OSReport("%s is deprecated; Use AXRegisterFrameCallback instead\n", __FUNCTION__);
    return AXRegisterFrameCallback(callback);

}

/*---------------------------------------------------------------------------*
 *---------------------------------------------------------------------------*/
AXMixerCallback __AXRegisterMixerMode(AXMixerCallback mix_cb, u32 mix_mode)
{
    AXMixerCallback old_cb;

    int old = OSDisableInterrupts();

    old_cb = __AXMixerCallback;
     __AXMixerCallback = mix_cb;
     __AXMixingMode     = mix_mode;

    if (AXMIX_PPC_ENABLE == mix_mode)
    {
        AXSetDefaultRenderer(AX_PB_MIXER_SELECT_PPC);
    }
    if (AXMIX_DSP_ENABLE == mix_mode)
    {
        AXSetDefaultRenderer(AX_PB_MIXER_SELECT_DSP);
    }

    OSRestoreInterrupts(old);
    return old_cb;
}
/*---------------------------------------------------------------------------*
 *---------------------------------------------------------------------------*/

s32 AXRmtGetSamplesLeft(void)
{
    s32 lefts;

    lefts = __AXRmtDspPtr - 2 * AX_RMT_SAMPLES_PER_FRAME;
    if (lefts < 0)
    {
        lefts += __AXRmtBuffLen;
    }

    lefts -= __AXRmtCpuPtr;
    if (lefts < 0)
    {
        lefts += __AXRmtBuffLen;
    }

    return lefts;
}

/*---------------------------------------------------------------------------*
 *---------------------------------------------------------------------------*/

s32 AXRmtGetSamples(s32 chan, s16* buffer, s32 samples)
{
    s32 ii;
    s32 lefts, num, ptr;

    lefts = AXRmtGetSamplesLeft();
    num   = samples > lefts? lefts: samples;
    ptr   = __AXRmtCpuPtr;

#if 0
    if (DSP_ONLY)
    {
        DCInvalidateRange(&__AXRmtOutBuffer[chan][0], (u32)__AXRmtBuffLen * sizeof(s16));
    }
#endif

    for (ii = 0; ii < num; ii++)
    {
        *buffer++ = __AXRmtOutBuffer[chan][ptr++];
        if (ptr >= __AXRmtBuffLen)
        {
            ptr = 0;
        }
    }

    return num;
}

/*---------------------------------------------------------------------------*
 *---------------------------------------------------------------------------*/
s32 AXRmtAdvancePtr(s32 samples)
{
    s32 lefts, num;

    lefts = AXRmtGetSamplesLeft();
    num   = samples > lefts? lefts: samples;

    __AXRmtCpuPtr += num;
    if (__AXRmtCpuPtr >= __AXRmtBuffLen)
    {
        __AXRmtCpuPtr -= __AXRmtBuffLen;
    }

    return num;
}

/*---------------------------------------------------------------------------*
 *---------------------------------------------------------------------------*/
void  AXRegisterExceedCallback(AXExceedCallback callback)
{
    BOOL old;

    old = OSDisableInterrupts();

    __AXExceedCallback = callback;

    OSRestoreInterrupts(old);
}

//#ifdef _DEBUG
// expose some statics to the test application if it wants them
struct name_value_pair { u32 value; char name[60]; };
struct name_value_pair ax_debug_info[] = {
    { sizeof(struct name_value_pair), "u32 value, char name[60]"},
    { (u32)             0, "__AXOutTVBuffer" },
    { (u32)             0, "__AXCommandList" }, // DO NOT change the offset of these fields
    { (u32)             0, "__AXPB" },
    { (u32) sizeof(AXVPB), "AXVPB_SIZE" },
    { (u32) sizeof(AXPB) , "AXPB_SIZE" },
    { (u32)             0, "__AXOutFrame" },
    { (u32)             0, "__AXVPB" },
    { (u32)             0, "__debugTrace_b" },
    { (u32)             0, "__s_u_AXPB" },
    { (u32)             0, "__AXOutDRCBuffer" },
    { (u32)             0, "__AXOutBuffer" },
    { 0 }
};

#if 0
AXPB* __AXGetUPBs(void);
struct name_value_pair *AXGetDebugInfo(void);
struct name_value_pair *AXGetDebugInfo(void)
{
    ax_debug_info[1].value = (u32)&__AXOutTVBuffer[AX_TV_ID0][__AXOutFrame][AX_CH_LEFT][0];
    ax_debug_info[10].value = (u32)&__AXOutDRCBuffer[AX_DRC_ID0][__AXOutFrame][AX_CH_LEFT][0];
    ax_debug_info[3].value = (u32)__AXGetPBs();
    ax_debug_info[6].value = (u32)&__AXOutFrame;
    ax_debug_info[7].value = (u32)__AXGetVPBs();
    ax_debug_info[8].value = (u32)&__debugTrace_b[0];
    ax_debug_info[9].value = (u32)__AXGetUPBs();
    ax_debug_info[11].value = (u32)__AXOutBuffer; // use *__AXOutFrame for which frame is current
    return ax_debug_info;
}
#endif
//#endif


/*******************************************************************************
 * Function AXRegisterFrameCallback
 * Arguments
 *          fcb     :    (in) callback function pointer
 * Output:
 * 		Returns the frame callback function previously registered
*******************************************************************************/
AXUserCallback AXRegisterFrameCallback(AXUserCallback fcb)
{
    BOOL            enabled;
    AXUserCallback  oldCB ;

    enabled = OSDisableInterrupts();
    oldCB = __AXUserFrameCallback;
    __AXUserFrameCallback = fcb;
    OSRestoreInterrupts(enabled);

    return oldCB;
}

/*******************************************************************************
 * Function AXRegisterDeviceFinalMixCallback
 * Arguments
 *          device  :    (in) device to which callback has to be registered
 *          fcb     :    (in) pointer to variable to return the callback function
 * Registers the callback for the final mix on a particular device type
*******************************************************************************/
AXPB_ERROR_CODE AXRegisterDeviceFinalMixCallback(AXPBDeviceType device,  AXUserFinalMixCallback fcb)
{

    AXPB_ERROR_CODE retErrCode = AXPB_ERROR_NONE;
    // only TV & DRC are supported as of now

    BOOL enabled = OSDisableInterrupts();
    if( (AX_DEVICE_TV == device) || (AX_DEVICE_DRC == device) )
    {
        __AXDeviceFinalMixCallback[device] = fcb;
    }
    else
    {
        retErrCode = AXPB_ERROR_DEVICE_TYPE;
    }

    OSRestoreInterrupts(enabled);
    return retErrCode;
} // end of AXRegisterDeviceFinalMixCallback
/*******************************************************************************
 * Function AXGetDeviceFinalMixCallback
 * Arguments
 *          device  :    (in) device to which callback has to be registered
 *          *fcb    :    (in) pointer to variable to return the callback function
 * Registers the callback for the final mix on a particular device type
*******************************************************************************/
AXPB_ERROR_CODE AXGetDeviceFinalMixCallback(AXPBDeviceType device,  AXUserFinalMixCallback *fcb)
{

    AXPB_ERROR_CODE retErrCode = AXPB_ERROR_NONE;

    // only TV & DRC are supported as of now
    if( (AX_DEVICE_TV == device) || (AX_DEVICE_DRC == device) )
    {
        *fcb = __AXDeviceFinalMixCallback[device];
    }
    else
    {
        retErrCode = AXPB_ERROR_DEVICE_TYPE;
    }

    return retErrCode;
} // end of AXGetDeviceFinalMixCallback


/*==============================================================================
 * Function:    __AXApplyFinalRampOneChannel
 * Arguments:   chPtr:       pointer to the input buffer
 *              *curVol:     pointer to the current volume
 *              rampRate:    rate at which we have to ramp
 * Description:
 *          Handles the ramp up and ramp down in transition cases
==============================================================================*/
static void __AXApplyFinalRampOneChannel(s32 *chPtr, f32 *curVol, f32 rampRate, u32 numSamples)
{
    f32 input;

    for(unsigned int i=0; i < numSamples; i++)
    {

        input = (f32) *chPtr;
        input = input* (*curVol);
        *chPtr++ = (s32)input;
        *curVol = *curVol * rampRate;
    }
} // void __AXApplyFinalRamp(u32 which_frame, f32 *curVol, f32 rampRate)
/*
 * Function:    __AXApplyFinalRamp
 * Arguments:   which_frame: frame number in double buffer
 *              *curVol:     pointer to the current volume
 *              rampRate:    rate at which we have to ramp
 * Description:
 *          Handles the ramp up and ramp down in transition cases
 * */
static void __AXApplyFinalRamp( f32 *curVol, f32 rampRate)
{
    u32 channelCnt;
    f32 curVolStored;

    // lets do for tv first
    //

    u32 numSamples = AX_OUT_48K_SAMPLES_PER_FRAME ;
    for(channelCnt = 0; channelCnt < AX_MAX_NUM_TV_CHS; channelCnt++)
    {
        curVolStored = *curVol;
        __AXApplyFinalRampOneChannel(&__AXTVFinalMixBuffer[AX_TV_ID0][channelCnt][0],
                                     &curVolStored, rampRate, numSamples);
    }

    for(channelCnt = 0; channelCnt < AX_MAX_NUM_DRC_CHS; channelCnt++)
    {
        curVolStored = *curVol;
        __AXApplyFinalRampOneChannel(&__AXDRCFinalMixBuffer[AX_DRC_ID0][channelCnt][0],
                                     &curVolStored, rampRate, numSamples);
    }

    *curVol = curVolStored;

} //__AXApplyFinalRamp(u32 which_frame, f32 *curVol, f32 rampRate)


/*
 * Function:    __AXHandleRamp
 * Arguments:   which_frame: frame number in double buffer
 * Description:
 *          Handles the ramp up and ramp down in transition cases
 * */
void __AXHandleRamp( void )
{
    if(__AXFramesToGo)
    {
        __AXApplyFinalRamp(&__AXRampCurVolume, __AXRampRate);
        // decrement the frames to go
        __AXFramesToGo--;

        if(0 == __AXFramesToGo)
        {
            __AXRampState = AX_RAMP_DONE;
        }
    }
} // end of static void __AXHandleRamp(u32 which_frame)

/*
 * Function:  __AXSetRampState
 * Arguments: state:    state to set it to; RAMP_UP or RAMP_DOWN
 * Description:
 *      Sets the transition state to either RAMP_UP or RAMP_DOWN
 *      AX will internally change the state to RAMP_DONE when
 *      it gets done with ramp
 * */
void __AXSetRampState(AX_RAMP_STATE_TYPE  rampState)
{
    switch(rampState)
    {
        case AX_RAMP_UP:
            __AXRampCurVolume = __AXRampUpVolumeStart;
            __AXRampRate = __AXRampUpRate;
            __AXFramesToGo = AX_RAMP_FRAMES;
            break;
        case AX_RAMP_DOWN:
            __AXRampCurVolume = __AXRampDownVolumeStart;
            __AXRampRate = __AXRampDownRate;
            __AXFramesToGo = AX_RAMP_FRAMES;
            break;
        default:
            OSReport("unexpected state in %s : %d\n", __FUNCTION__,rampState);
    }// end of switch
    __AXRampState = rampState;

}

/* get the ramp state*/
AX_RAMP_STATE_TYPE __AXGetRampState(void)
{
    return __AXRampState;
}


/*=============================================================================
 * Function: __AXUpsample32To48
 * Arguments: input     : pointer to the input buffer (Q -8 or Q0 format)
 *            output    : pointer to the output buffer (Q 0 format)
 *            numSamples: number of samples in the input buffer
 *            downshift : bool to specify whether input is in Q-8 or Q0
 * Outputs:   numOutSamples: number of output samples in the output buffer
=============================================================================*/
static u32 __AXUpsample32To48(s32 *input, s32 *output, AXUpsampleStruct *upSampleStruct, u32 numSamples, BOOL downshift, u32 numChns)
{
    // basic algo
    //  check if we need to fetch a new input sample to filter memory
    //  update startIdx, convolution, and output...
    u32 outCnt=0;
    u32 shift_val = (downshift? 8 : 0);
    u32 inSamples = numSamples;
    u32 outNumSamples = (numSamples*48)/32;

    u32 filtMemLength = upSampleStruct->filtMemLength;
    u32 outIdxInc = upSampleStruct->outIdxInc;
    u32 overRateSkip = upSampleStruct->overRateSkip;


    while(inSamples >0)
    {
        // increment the outIdxCount
        upSampleStruct->outIdxCount += outIdxInc;

        // if the outIdxCount >= overRate, then we need to bring in the new sample
        if(upSampleStruct->outIdxCount >= overRateSkip)
        {
            // get the sample for all the channels into filter memory
            for(u32 chCnt =0 ; chCnt < numChns; chCnt++)
            {
                upSampleStruct->filtMem[chCnt][upSampleStruct->nextSampleIdx] = *(input+numSamples*chCnt);
            }
            input++;

            // reduce the number of samples to process
            inSamples--;

            upSampleStruct->startIdx = upSampleStruct->nextSampleIdx;
            if(upSampleStruct->nextSampleIdx == 0)
            {
                upSampleStruct->nextSampleIdx = filtMemLength-1;
            }
            else
            {
                upSampleStruct->nextSampleIdx = upSampleStruct->nextSampleIdx - 1;
            }

            // adjust outIdxCount
            upSampleStruct->outIdxCount = upSampleStruct->outIdxCount - overRateSkip;
        }

        // convolutions now..
        f32 convOut[AX_MAX_NUM_TV_CHS] = {0};
        u32 filtIdx = upSampleStruct->outIdxCount;
        u32 sIdx = upSampleStruct->startIdx;
        u32 firstCnt = filtMemLength - sIdx;
        u32 secondCnt = sIdx;
        s32 sample1, sample2;
        f32 coeff;
        u32 sCnt, cCnt;

        f32 *filtPtr = &__AXUpsampleFiltCoeffs[filtIdx];
        s32 *sampPtr = &upSampleStruct->filtMem[0][sIdx];


        for(sCnt=0; sCnt < firstCnt ; sCnt++)
        {
            coeff = *filtPtr;
            filtPtr = filtPtr + overRateSkip;

            for(cCnt = 0; cCnt < numChns; cCnt+=2)
            {
                sample1 = sampPtr[cCnt*filtMemLength];
                sample2 = sampPtr[ (cCnt+1)*filtMemLength];
                convOut[cCnt] = convOut[cCnt] + coeff * (f32) sample1;
                convOut[cCnt+1] = convOut[cCnt+1] + coeff * (f32) sample2;
            }

            sampPtr++;
        }

        sampPtr = &upSampleStruct->filtMem[0][0];

        for(sCnt=0; sCnt < secondCnt ; sCnt++)
        {
            coeff = *filtPtr;
            filtPtr = filtPtr + overRateSkip;

            for(cCnt = 0; cCnt < numChns; cCnt+=2)
            {
                sample1 = sampPtr[cCnt*filtMemLength];
                sample2 = sampPtr[ (cCnt+1)*filtMemLength];
                convOut[cCnt] = convOut[cCnt] + coeff * (f32) sample1;
                convOut[cCnt+1] = convOut[cCnt+1] + coeff * (f32) sample2;
            }


            sampPtr++;
        }


        for(u32 cCnt =0 ; cCnt < numChns ; cCnt++)
        {
            *(output+ cCnt*outNumSamples) = (((s32) (convOut[cCnt] * overRateSkip)) >> shift_val);
        }
        output++;

        outCnt++;

    } // end of all samples

    return outCnt;
} // end of __AXUpsample32To48
 u32 __AXUpsample32To48_linear(s32 *input, s32 *output, s32 *history, u32 numSamples, BOOL downshift, u32 numChns)
{
    u32 count;
    f32 memSample;
    f32 sample_one, sample_R, sample_two;

    u32 outCount =0;

    u32 shift_val = (downshift? 8 : 0);

    static f32 ratio2by3 = (f32)2.0/(f32)3.0;
    static f32 ratio1by3 = (f32)1.0/(f32)3.0;


    for(u32 chCnt = 0; chCnt < numChns; chCnt++)
    {
        memSample = (f32) (*history);
        outCount = 0;
        for (count = 0; count < (numSamples/2); count++)
        {
            // First sample is at 2:1 between last sample and first
            // Front Right
            sample_one = memSample * ratio2by3;
            sample_R   = (f32) input[0] ;
            sample_two = sample_R * ratio1by3;
            *output++  = ((s32)(sample_one + sample_two) >> shift_val);


            // middle output sample is exactly the first sample
            *output++  = input[0]>> shift_val;


            // Third sample is at 1:2 between first and second sample
            // R
            sample_R   = (f32) input[1] ;
            sample_one = sample_R * ratio2by3;
            *output++  = ((s32)(sample_one + sample_two) >> shift_val);


            // slide the samples into history
            memSample = sample_R;

            // and move pointers up to next pairs.

            input += 2;

            outCount +=3;

        }
        *history++ = (s32) memSample;
    }


    return outCount;
} // upsample32To48


/*=============================================================================
 * Function: __AXHandleFinalMixCallbackStage
 * Arguments:
 * Outputs:
 * Description:
 *   Function to handle the final mix callback stage of output stage
 *   It does ( for both TV & DRC)
 *      - upsamples the input to 48kHz
 *      - calls the final mix callback functions
 *      - output of this stage is in s32 Q0 format
=============================================================================*/
static void __AXHandleFinalMixCallbackStage(void)
{
    // determine if we need to do upsampling pre-final mix call back or after
    u16 chCnt, deviceCnt;
    u16 numOutSamples =AX_IN_SAMPLES_PER_FRAME;
    BOOL downshift;

    if( !__AXDevicePostFCBUpsample[AX_DEVICE_TV])
    {
    	downshift = 1;

        // if pre final mix callback requested, upsample first
        for(deviceCnt=0 ; deviceCnt<AX_MAX_NUM_TVS ; deviceCnt++)
        {
            if(__AXTVLinearUpsampler[deviceCnt])
            {
                numOutSamples = __AXUpsample32To48_linear(&__AXOutTVBuffer[deviceCnt][__AXOutFrame][0][0],
                                   &__AXTVFinalMixBuffer[deviceCnt][0][0],
                                   &__AXTVHistories[deviceCnt][0],
                                   AX_IN_SAMPLES_PER_FRAME,
                                   downshift, AX_MAX_NUM_TV_CHS);
            }
            else
            {

               numOutSamples = __AXUpsample32To48( &__AXOutTVBuffer[deviceCnt][__AXOutFrame][0][0],
                                   &__AXTVFinalMixBuffer[deviceCnt][0][0],
                                   &__AXTVUpsampleStruct[deviceCnt][0],
                                   AX_IN_SAMPLES_PER_FRAME,
                                   downshift, AX_MAX_NUM_TV_CHS);
            }

            for(chCnt=0 ; chCnt < AX_MAX_NUM_TV_CHS ; chCnt++)
            {
                __AXTVFinalMixPointers[deviceCnt*AX_MAX_NUM_TV_CHS + chCnt] = &__AXTVFinalMixBuffer[deviceCnt][chCnt][0];
            }
        }
    } // end of pre FCB upsample
    else
    {

        // lets assign the pointers to the frame buffers
        // for TV
        for(deviceCnt=0 ; deviceCnt<AX_MAX_NUM_TVS ; deviceCnt++)
        {
            for(chCnt=0 ; chCnt < AX_MAX_NUM_TV_CHS ; chCnt++)
            {
            	// input is in Q-8 format, change it to Q0
            	for(u32 sCnt=0; sCnt < AX_IN_SAMPLES_PER_FRAME; sCnt++)
            	{
            		__AXOutTVBuffer[deviceCnt][__AXOutFrame][chCnt][sCnt] = __AXOutTVBuffer[deviceCnt][__AXOutFrame][chCnt][sCnt] >> 8;
            	}
                __AXTVFinalMixPointers[deviceCnt*AX_MAX_NUM_TV_CHS + chCnt] = &__AXOutTVBuffer[deviceCnt][__AXOutFrame][chCnt][0];
            }
        }
    } // end of post/pre upsample case

    // now set the data to the final mix poitners
        __AXDeviceFinalMixCBStruct[AX_DEVICE_TV].data = __AXTVFinalMixPointers;
        __AXDeviceFinalMixCBStruct[AX_DEVICE_TV].numSamples = numOutSamples ;

        // At this stage: TV final mix data is either at 32k or at 48k, final mix pointers are all set
        // properly, ready to be sent to final mix callback
        // numSamples is also properly set

        numOutSamples =AX_IN_SAMPLES_PER_FRAME;
    if( !__AXDevicePostFCBUpsample[AX_DEVICE_DRC])
    {

    	downshift = 1;
        // if pre final mix callback requested, upsample first
        //for(deviceCnt=0 ; deviceCnt<AX_MAX_NUM_DRCS ; deviceCnt++)
        for(deviceCnt=0 ; deviceCnt<s_ProcessDrcCount; deviceCnt++)
        {


            if(__AXDRCLinearUpsampler[deviceCnt])
            {

               numOutSamples = __AXUpsample32To48_linear( &__AXOutDRCBuffer[deviceCnt][__AXOutFrame][0][0],
                                   &__AXDRCFinalMixBuffer[deviceCnt][0][0],
                                   &__AXDRCHistories[deviceCnt][0],
                                   AX_IN_SAMPLES_PER_FRAME,
                                   downshift, AX_MAX_NUM_DRC_CHS);
            }
            else
            {

               numOutSamples = __AXUpsample32To48( &__AXOutDRCBuffer[deviceCnt][__AXOutFrame][0][0],
                                   &__AXDRCFinalMixBuffer[deviceCnt][0][0],
                                   &__AXDRCUpsampleStruct[deviceCnt][0],
                                   AX_IN_SAMPLES_PER_FRAME,
                                   downshift, AX_MAX_NUM_DRC_CHS);
            }

            for(chCnt=0 ; chCnt < AX_MAX_NUM_DRC_CHS ; chCnt++)
            {
            __AXDRCFinalMixPointers[deviceCnt*AX_MAX_NUM_DRC_CHS + chCnt] = &__AXDRCFinalMixBuffer[deviceCnt][chCnt][0];
            }
        }
    } // end of pre FCB upsample
    else
    {

        // lets assign the pointers to the frame buffers
        // for DRC
        for(deviceCnt=0 ; deviceCnt<s_ProcessDrcCount; deviceCnt++)
        {
            for(chCnt=0 ; chCnt < AX_MAX_NUM_DRC_CHS ; chCnt++)
            {
            	// input is in Q-8 format, change it to Q0
            	for(u32 sCnt=0; sCnt < AX_IN_SAMPLES_PER_FRAME; sCnt++)
            	{
            		__AXOutDRCBuffer[deviceCnt][__AXOutFrame][chCnt][sCnt] = __AXOutDRCBuffer[deviceCnt][__AXOutFrame][chCnt][sCnt] >> 8;
            	}
                __AXDRCFinalMixPointers[deviceCnt*AX_MAX_NUM_DRC_CHS + chCnt] = &__AXOutDRCBuffer[deviceCnt][__AXOutFrame][chCnt][0];
            }
        }
    } // end of post/pre upsample case

    // now set the data to the final mix poitners
        __AXDeviceFinalMixCBStruct[AX_DEVICE_DRC].data = __AXDRCFinalMixPointers;
        __AXDeviceFinalMixCBStruct[AX_DEVICE_DRC].numSamples = numOutSamples ;

        // At this stage: DRC final mix data is either at 32k or at 48k, final mix pointers are all set
        // properly, ready to be sent to final mix callback
        // numSamples is also properly set

    // for RMT, we already filled earlier; so just use with the read position
#ifdef AX_SUPPORT_RMT_FINAL_MIX
    __AXDeviceFinalMixCBStruct[AX_DEVICE_RMT].data = &__AXRMTFinalMixPointers[__AXRMTFinalMixReadPos];
    __AXRMTFinalMixReadPos = (__AXRMTFinalMixReadPos+1) & 0x1;
#endif // AX_SUPPORT_RMT_FINAL_MIX


    // ready to call final mix callbacks
    for(deviceCnt = 0 ; deviceCnt < AX_MAX_NUM_DEVICES; deviceCnt++)
    {
        if(NULL != __AXDeviceFinalMixCallback[deviceCnt])
        {
            // flush out the values to memory before doing anything
            for(u16 chCnt = 0; chCnt < __AXDeviceFinalMixCBStruct[deviceCnt].numChnsIn; chCnt++)
            {
//                DCFlushRange(__AXDeviceFinalMixCBStruct[deviceCnt].data[chCnt], __AXDeviceFinalMixCBStruct[deviceCnt].numSamples*sizeof(s32));
            }

            (*__AXDeviceFinalMixCallback[deviceCnt])(&__AXDeviceFinalMixCBStruct[deviceCnt]);

            // update the output channels from FMCB
            __AXDeviceFinalMixOutChannels[deviceCnt] = __AXDeviceFinalMixCBStruct[deviceCnt].numChnsOut;

            // for now doing numChnsIn; later we could just do numChnsOut
            for(u16 chCnt = 0; chCnt < __AXDeviceFinalMixCBStruct[deviceCnt].numChnsIn; chCnt++)
            {
 //               DCFlushRange(__AXDeviceFinalMixCBStruct[deviceCnt].data[chCnt], __AXDeviceFinalMixCBStruct[deviceCnt].numSamples*sizeof(s32));
            }
        }
    } // for all the devices


    // if the upsampling needs to be done before the final mixcb, we are done at this point.
    // now look into post final mixcb upsampling
    // if we are in prefinalmix upsampling, then we are done. otherwise, upsample that particular device out
    if( __AXDevicePostFCBUpsample[AX_DEVICE_DRC])
    {

    	downshift = 0;
        // if pre final mix callback requested, upsample first
        for(deviceCnt=0 ; deviceCnt<s_ProcessDrcCount; deviceCnt++)
        {
            if(__AXDRCLinearUpsampler[deviceCnt])
            {

               numOutSamples = __AXUpsample32To48_linear( &__AXOutDRCBuffer[deviceCnt][__AXOutFrame][0][0],
                                   &__AXDRCFinalMixBuffer[deviceCnt][0][0],
                                   &__AXDRCHistories[deviceCnt][0],
                                   AX_IN_SAMPLES_PER_FRAME,
                                   downshift, AX_MAX_NUM_DRC_CHS);
            }
            else
            {

               numOutSamples = __AXUpsample32To48( &__AXOutDRCBuffer[deviceCnt][__AXOutFrame][0][0],
                                   &__AXDRCFinalMixBuffer[deviceCnt][0][0],
                                   &__AXDRCUpsampleStruct[deviceCnt][0],
                                   AX_IN_SAMPLES_PER_FRAME,
                                   downshift, AX_MAX_NUM_DRC_CHS);
            }

        }
    } // end of pre FCB upsample

    if( __AXDevicePostFCBUpsample[AX_DEVICE_TV])
    {
    	downshift = 0;

        // if pre final mix callback requested, upsample first
        for(deviceCnt=0 ; deviceCnt<AX_MAX_NUM_TVS ; deviceCnt++)
        {
            if(__AXTVLinearUpsampler[deviceCnt])
            {
                numOutSamples = __AXUpsample32To48_linear(&__AXOutTVBuffer[deviceCnt][__AXOutFrame][0][0],
                                   &__AXTVFinalMixBuffer[deviceCnt][0][0],
                                   &__AXTVHistories[deviceCnt][0],
                                   AX_IN_SAMPLES_PER_FRAME,
                                   downshift, AX_MAX_NUM_TV_CHS);
            }
            else
            {

               numOutSamples = __AXUpsample32To48( &__AXOutTVBuffer[deviceCnt][__AXOutFrame][0][0],
                                   &__AXTVFinalMixBuffer[deviceCnt][0][0],
                                   &__AXTVUpsampleStruct[deviceCnt][0],
                                   AX_IN_SAMPLES_PER_FRAME,
                                   downshift, AX_MAX_NUM_TV_CHS);
            }

        }
    } // end of pre FCB upsample
}//static u32 __AXHandleFinalMixCallbackStage(void)
/*=============================================================================
 * Function: __AXHandleCompressor
 * Arguments:
 * Outputs:
 * Description:
 *   Function to handle the compressor stage of output stage
 *   It does ( for both TV & DRC)
 *      - applies compressor on the devices
=============================================================================*/
static void __AXHandleCompressor(void)
{
    // handle compresor for TV
    __AXApplyDeviceCompressor(&__AXTVFinalMixBuffer[AX_TV_ID0][AX_CH_LEFT][0],
            AX_DEVICE_TV, AX_TV_ID0);

    // handle compressor for DRCs
    __AXApplyDeviceCompressor(&__AXDRCFinalMixBuffer[AX_DRC_ID0][AX_CH_LEFT][0],
            AX_DEVICE_DRC, AX_DRC_ID0);

    // supressing compressor for second DRC until we can handle it.
#if 0
    __AXApplyDeviceCompressor(&__AXDRCFinalMixBuffer[AX_DRC_ID1][AX_CH_LEFT][0],
            AX_DEVICE_DRC, AX_DRC_ID1);
#endif

    return;
} // static void __AXHandleCompressor(void)
/*=============================================================================
 * Function: __AXSendDRCFrame
 * Arguments:
 * Outputs:
 * Description:
 *   Function to handle sending data to DRC on frame basis
=============================================================================*/
static u32 __AXSendDRCFrame(u32 which_frame)
{
    s32 *linL, *linR;
    s32 count;
    s16 *output;
    u32 bPushedAI=FALSE;

    s32 sample;
    // also interleave the DRC finals
    // (assume single Stereo only initially, S chans already mixed to front)
    if (__AXDeviceMode[AX_DEVICE_DRC] == AX_MODE_STEREO) // DPL2 aka 4CHAN TBD
    {
        // Interleave L/R to s16 stereo buffer
        linL = &__AXDRCFinalMixBuffer[AX_DRC_ID0][AX_CH_LEFT][0];
        linR = &__AXDRCFinalMixBuffer[AX_DRC_ID0][AX_CH_RIGHT][0];
        output = &__AXOutDRCAI2Buffer[which_frame][0];
#if 0
        if (DSP_ONLY)
        {
            DCInvalidateRange(linL, AX_OUT_48K_SAMPLES_PER_FRAME *sizeof(s32));
            DCInvalidateRange(linR, AX_OUT_48K_SAMPLES_PER_FRAME *sizeof(s32));
        }
#endif
        for (count = 0; count < AX_OUT_48K_SAMPLES_PER_FRAME ; count++)
        {
            *output++ = clamp_sample(*linR ); linR++;
            *output++ = clamp_sample(*linL ); linL++;
        }
    }
    else if (__AXDeviceMode[AX_DEVICE_DRC] == AX_MODE_MONO) // DPL2 aka 4CHAN TBD
    {
        // Interleave L/R to s16 stereo buffer
        linL = &__AXDRCFinalMixBuffer[AX_DRC_ID0][AX_CH_LEFT][0];
        linR = &__AXDRCFinalMixBuffer[AX_DRC_ID0][AX_CH_RIGHT][0];
        output = &__AXOutDRCAI2Buffer[which_frame][0];
#if 0
        if (DSP_ONLY)
        {
            DCInvalidateRange(linL, AX_OUT_48K_SAMPLES_PER_FRAME *sizeof(s32));
            DCInvalidateRange(linR, AX_OUT_48K_SAMPLES_PER_FRAME *sizeof(s32));
        }
#endif
        for (count = 0; count < AX_OUT_48K_SAMPLES_PER_FRAME ; count++)
        {
            sample = *linR;
            sample += *linL;
            sample = sample /2;
            sample = clamp_sample(sample);
            *output++ = sample; linR++;
            *output++ = sample; linL++;
        }
    }

#if 0
    // put the DRC buffer index we just completed into the __DRC2AI2_FIFO
    // DRC always 32K for now
    __DRC2AI2_FIFO[__DRC2AI2_in] = __AXOutFrame;
//    trace(AI_PUSH, __DRC2AI2_in)
        __DRC2AI2_in = (__DRC2AI2_in + 1) & 3;
    if (OSCompareAndSwapAtomic(&__DRC2AI2_empty, TRUE, FALSE)) // (__DRC2AI2_empty)
    {
        // AI2 needs us to push to AI2 DMA
        __AXAI2_push_frame();
        bPushedAI = TRUE;
    }
#endif
    return bPushedAI;
} //static u32 __AXSendDRCFrame(u32 which_frame)


/*=============================================================================
 * Function: __AXApplyRemixMatrix
 * Arguments:
 * Outputs:
 * Description:
 *   Applies remix matrix to the input. Equation is
 *   	O = M * I;
 *   		O is a vector of numOutChns;
 *   		M is a matrix of numOutChns x numInChns
 *   		I is a vector of numInChns
=============================================================================*/
static void __AXApplyRemixMatrix(s32 *finalMixPtr, f32 const *matrix, u32 const numInChns,
                                 u32 const numOutChns, u32 const numSamples)
{
    //s32 tempOut[AX_MAX_NUM_TV_CHS][AX_OUT_48K_SAMPLES_PER_FRAME];
    f32 tempInput[AX_MAX_NUM_TV_CHS];
    s32 tempOut[AX_MAX_NUM_TV_CHS];

    // read the inputs, multiply with matrix and write out;
    u32 inCnt, outCnt;
    f32 const *inMatrix = matrix;
    s32 *inFinalMix = finalMixPtr;
    f32 sample;

    // read one input from all the channels
    // multiply with matrix
    // write out to the output channels
    for (u32 sCnt = 0; sCnt < numSamples; sCnt++)
    {
        matrix = inMatrix;
        // read sample from all channels
        for (inCnt = 0; inCnt < numInChns; inCnt++)
        {
            tempInput[inCnt] = (f32) finalMixPtr[inCnt * numSamples + sCnt];
        }

        // first loop just multiply
        for (outCnt = 0; outCnt < numOutChns; outCnt++)
        {
            tempOut[outCnt] = 0;
            for (inCnt = 0; inCnt < numInChns; inCnt++)
            {
                sample =  *matrix;
                tempOut[outCnt] += (s32) (tempInput[inCnt] * sample);

                matrix++;
            }
        }

        // write out the samples
        for(outCnt =0; outCnt < numOutChns; outCnt++)
        {
            finalMixPtr[outCnt*numSamples + sCnt] = tempOut[outCnt];
        }

    } // forall samples
}// end of __AXApplyRemixMatrix

/*=============================================================================
 * Function: __AXHandleDeviceModeReMix
 * Arguments:
 * Outputs:
 * Description:
 *   Handles applying the remix matrix for the devices (TV & DRC)
 *   Checks the output channels from FinalMixCallback and the output channels
 *   to AI for the device and apply the corresponding matrix
=============================================================================*/
static void __AXHandleDeviceModeReMix(void)
{
    // lets look at TV first

    BOOL bFound=FALSE;
    u32 device = AX_DEVICE_TV;
    f32 const *matrixPtr;
    for(u32 mCnt=0; mCnt < AX_MAX_REMIX_MATRICES; mCnt++)
    {
        if( (__AXDeviceRemixMatrix[device][mCnt].numInChns == __AXDeviceFinalMixOutChannels[device] ) &&
            (__AXDeviceRemixMatrix[device][mCnt].numOutChns == __AXDeviceOutChannels[device]) &&
            (__AXDeviceRemixMatrix[device][mCnt].matrix != NULL)

            )
        {
            bFound = TRUE;
            matrixPtr = __AXDeviceRemixMatrix[device][mCnt].matrix;
            break;
        }
    } // end of for loop

    if(bFound)
    {
        __AXApplyRemixMatrix(&__AXTVFinalMixBuffer[AX_TV_ID0][AX_CH_LEFT][0], matrixPtr,
                __AXDeviceFinalMixOutChannels[device],
                __AXDeviceOutChannels[device],
                AX_OUT_48K_SAMPLES_PER_FRAME);
    } // apply DeviceModeReMix for TV
    else
    {
        //OSReport("by passed remix for TV in=%d out=%d\n", __AXDeviceFinalMixOutChannels[device], __AXDeviceOutChannels[device]);
    }


    // now lets do it for DRC
    bFound=FALSE;
    device = AX_DEVICE_DRC;
    for(u32 mCnt=0; mCnt < AX_MAX_REMIX_MATRICES; mCnt++)
    {
        if( (__AXDeviceRemixMatrix[device][mCnt].numInChns == __AXDeviceFinalMixOutChannels[device] ) &&
            (__AXDeviceRemixMatrix[device][mCnt].numOutChns == __AXDeviceOutChannels[device]) &&
            (__AXDeviceRemixMatrix[device][mCnt].matrix != NULL)
            )
        {
            bFound = TRUE;
            matrixPtr = __AXDeviceRemixMatrix[device][mCnt].matrix;
            break;
        }
    } // end of for loop

    if(bFound)
    {
        for(u32 deviceId=0; deviceId < s_ProcessDrcCount; deviceId++)
        {
        __AXApplyRemixMatrix(&__AXDRCFinalMixBuffer[deviceId][AX_CH_LEFT][0], matrixPtr,
                __AXDeviceFinalMixOutChannels[device],
                __AXDeviceOutChannels[device],
                AX_OUT_48K_SAMPLES_PER_FRAME);
        }
    } // apply DeviceModeReMix for TV

}// end of __AXHandleDeviceModeReMix

/*=============================================================================
 * Function: AXSetDeviceRemixMatrix
 * Arguments:
 * 				device		: the device to which this matrix should be applied
 * 				numInChns	: number of input channels for which this matrix is applied
 * 				numOutChns	: number of output channels produced by applying the matrix
 * 				matrix		: pointer to an array of s16[numOutChns][numInChns]
 * Outputs:		AXPB_ERROR_CODE
 * Description:
 *   This function registers a remix matrix for the device for a given (numInChsn, numOutChns) pair
=============================================================================*/
AXPB_ERROR_CODE AXSetDeviceRemixMatrix(AXPBDeviceType device, u32 const numInChns, u32 const numOutChns, f32 const *matrixPtr)
{
    AXPB_ERROR_CODE retErr = AXPB_ERROR_NONE;

    switch(device)
    {

    case AX_DEVICE_TV:
        if(numInChns > AX_MAX_NUM_TV_CHS)
        {
            retErr = AXPB_ERROR_REMIX_ILLEGAL_IN_CHANNELS;
        }
        if(numOutChns != AX_STEREO_CHANNELS && numOutChns != AX_SURROUND_CHANNELS)
        {
            retErr = AXPB_ERROR_REMIX_ILLEGAL_OUT_CHANNELS;
        }
        break;
    case AX_DEVICE_DRC:
        if (numInChns > AX_MAX_NUM_DRC_CHS)
        {
            retErr = AXPB_ERROR_REMIX_ILLEGAL_IN_CHANNELS;
        }
        if(numOutChns != AX_STEREO_CHANNELS && numOutChns != AX_MONO_CHANNELS)
        {
            retErr = AXPB_ERROR_REMIX_ILLEGAL_OUT_CHANNELS;
        }
        break;
    case AX_DEVICE_RMT:
        OSReport("%s not supported for Remote\n", __FUNCTION__);
        break;
    default:
        retErr = AXPB_ERROR_DEVICE_TYPE;
        break;

    } // end of switch(device)

    if(retErr != AXPB_ERROR_NONE)
    {
        return retErr;
    }
    // find if there is already a matrix alloted for this combo
    BOOL bFound=FALSE;
    for(u32 mCnt=0; mCnt < AX_MAX_REMIX_MATRICES; mCnt++)
    {
        if( (__AXDeviceRemixMatrix[device][mCnt].numInChns == numInChns) &&
            (__AXDeviceRemixMatrix[device][mCnt].numOutChns == numOutChns)
            )
        {
            bFound = TRUE;
            BOOL enable = OSDisableInterrupts();

            __AXDeviceRemixMatrix[device][mCnt].matrix = matrixPtr;
            OSRestoreInterrupts(enable);
            break;
        }
    } // end of for loop

    if(!bFound)
    {
        // find the first non-initialized entry
        for(u32 mCnt=0; mCnt < AX_MAX_REMIX_MATRICES; mCnt++)
        {
            if( (__AXDeviceRemixMatrix[device][mCnt].numInChns == 0) &&
                (__AXDeviceRemixMatrix[device][mCnt].numOutChns == 0)
                )
            {
                bFound = TRUE;

                BOOL enable = OSDisableInterrupts();
                __AXDeviceRemixMatrix[device][mCnt].matrix = matrixPtr;
                __AXDeviceRemixMatrix[device][mCnt].numInChns = numInChns;
                __AXDeviceRemixMatrix[device][mCnt].numOutChns = numOutChns;

                OSRestoreInterrupts(enable);
                break;
            }
        } // end of for loop

    } // endof if()

    // the following should never happen, but precautionary
    if(!bFound)
    {
        retErr = AXPB_ERRIR_REMIX_EXCEED_MAX_ALLOWED;
    }

    return retErr;
} // end of AXSetDeviceRemixMatrix()

/*=============================================================================
 * Function: AXGetDeviceRemixMatrix
 * Arguments:
 * 				device		: the device to which this matrix should be applied
 * 				numInChns	: number of input channels for which this matrix is applied
 * 				numOutChns	: number of output channels produced by applying the matrix
 * 				matrix		: pointer to an array of s16[numOutChns][numInChns]
 * Outputs:		AXPB_ERROR_CODE
 * Description:
 *   Gets the matrix registered for the device for a given (numInChsn, numOutChns) pair
=============================================================================*/
AXPB_ERROR_CODE AXGetDeviceRemixMatrix(AXPBDeviceType device,  u32 const numInChns,  u32 const numOutChns, f32 const **matrixPtr)
{
    AXPB_ERROR_CODE retErr = AXPB_ERROR_NONE;

    switch(device)
    {

    case AX_DEVICE_TV:
        if(numInChns > AX_MAX_NUM_TV_CHS)
        {
            retErr = AXPB_ERROR_REMIX_ILLEGAL_IN_CHANNELS;
        }
        if(numOutChns != AX_STEREO_CHANNELS && numOutChns != AX_SURROUND_CHANNELS)
        {
            retErr = AXPB_ERROR_REMIX_ILLEGAL_OUT_CHANNELS;
        }
        break;
    case AX_DEVICE_DRC:
        if (numInChns > AX_MAX_NUM_DRC_CHS)
        {
            retErr = AXPB_ERROR_REMIX_ILLEGAL_IN_CHANNELS;
        }
        if(numOutChns != AX_STEREO_CHANNELS && numOutChns != AX_MONO_CHANNELS)
        {
            retErr = AXPB_ERROR_REMIX_ILLEGAL_OUT_CHANNELS;
        }
        break;
    case AX_DEVICE_RMT:
        OSReport("%s not supported for Remote\n", __FUNCTION__);
        break;
    default:
        retErr = AXPB_ERROR_DEVICE_TYPE;
        break;

    } // end of switch(device)

    if(retErr != AXPB_ERROR_NONE)
    {
        return retErr;
    }
    // find if there is already a matrix alloted for this combo
    BOOL bFound=FALSE;
    for(u32 mCnt=0; mCnt < AX_MAX_REMIX_MATRICES; mCnt++)
    {
        if( (__AXDeviceRemixMatrix[device][mCnt].numInChns == numInChns) &&
            (__AXDeviceRemixMatrix[device][mCnt].numOutChns == numOutChns)
            )
        {
            bFound = TRUE;
            *matrixPtr = __AXDeviceRemixMatrix[device][mCnt].matrix;
            break;
        }
    } // end of for loop

    // the following should never happen, but precautionary
    if(!bFound)
    {
        retErr = AXPB_ERRIR_REMIX_MATRIX_UNINITIALIZED;
    }

    return retErr;
} // end of AXGetDeviceRemixMatrix()
/*=============================================================================
 * Function: AXSetDeviceUpsampleStage()
 * Arguments:
 * 				device		: the device to which this matrix should be applied
 * 				post		: 1, if upsample stage after final mix callback
 * 							  0, if upsample stage before final mix callback
 * Outputs:		AXPB_ERROR_CODE
 * Description:
 *   Sets whether upsampling is done after the final mix callback or before
=============================================================================*/
AXPB_ERROR_CODE AXSetDeviceUpsampleStage(AXPBDeviceType device, BOOL post)
{
    if(device != AX_DEVICE_TV && device!=AX_DEVICE_DRC)
    {
        return AXPB_ERROR_DEVICE_TYPE;
    }
    else
    {
        BOOL enable = OSDisableInterrupts();
        __AXDevicePostFCBUpsample[device] = post;
        OSRestoreInterrupts(enable);

        return AXPB_ERROR_NONE;
    }
} // end of AXSetDeviceUpsampleStage
/*=============================================================================
 * Function: AXGetDeviceUpsampleStage()
 * Arguments:
 * 				device		: the device to which this matrix should be applied
 * 				post		: 1, if upsample stage after final mix callback
 * 							  0, if upsample stage before final mix callback
 * Outputs:		AXPB_ERROR_CODE
 * Description:
 *   Sets whether upsampling is done after the final mix callback or before
=============================================================================*/
AXPB_ERROR_CODE AXGetDeviceUpsampleStage(AXPBDeviceType device, BOOL *post)
{
    if(device != AX_DEVICE_TV && device!=AX_DEVICE_DRC)
    {
        return AXPB_ERROR_DEVICE_TYPE;
    }
    else
    {
        *post = __AXDevicePostFCBUpsample[device];
        return AXPB_ERROR_NONE;
    }
} // end of AXGetDeviceUpsampleStage

/*=============================================================================
 * Function: AXGetDeviceFinalOutput()
 * Arguments:
 * 				device		              : the device to which this matrix should be applied
 * 				dataPtr                   :
 * 				size                      :
 * 			    axDeviceFinalOutputStruct :
 * Outputs:		AXPB_ERROR_CODE
=============================================================================*/
AXPB_ERROR_CODE AXGetDeviceFinalOutput(AXPBDeviceType device, s16* dataPtr, u32 const /* size */, AXDEVICEFINALOUTPUTSTRUCT* axDeviceFinalOutputStruct)
{
    axDeviceFinalOutputStruct->format   = 0; // TODO: SDK が対応されたら修正。
    axDeviceFinalOutputStruct->sampFreq = AI_SAMPLERATE_48KHZ;

    if(device == AX_DEVICE_DRC)
    {
        switch (__AXDeviceMode[AX_DEVICE_DRC])
        {
            case AX_MODE_MONO:
            case AX_MODE_STEREO:
                axDeviceFinalOutputStruct->numChannels = 2;
                break;
            default:
                axDeviceFinalOutputStruct->numChannels = 0;
        }

        unsigned int sampleNum = axDeviceFinalOutputStruct->numChannels * AX_OUT_48K_SAMPLES_PER_FRAME;

        for ( unsigned int i = 0; i < sampleNum; i++ )
        {
            dataPtr[i] = __AXDRCFinalOutputBuffer[i];
        }

        axDeviceFinalOutputStruct->length     = sampleNum * 2;
        axDeviceFinalOutputStruct->numDevices = 0;
    }
    else if(device == AX_DEVICE_TV)
    {
        switch (__AXDeviceMode[AX_DEVICE_TV])
        {
            case AX_MODE_MONO:
            case AX_MODE_STEREO:
            case AX_MODE_SURROUND:
            case AX_MODE_6CHAN:
            case AX_MODE_DPL2:
                axDeviceFinalOutputStruct->numChannels = 2;
                break;
            default:
                axDeviceFinalOutputStruct->numChannels = 0;
        }

        unsigned int sampleNum = axDeviceFinalOutputStruct->numChannels * AX_OUT_48K_SAMPLES_PER_FRAME;

        for ( unsigned int i = 0; i < sampleNum; i++ )
        {
            dataPtr[i] = __AXTVFinalOutputBuffer[i];
        }

        axDeviceFinalOutputStruct->length     = sampleNum * 2;
        axDeviceFinalOutputStruct->numDevices = 1;
    }
    else
    {
        return AXPB_ERROR_DEVICE_TYPE;
    }

    return AXPB_ERROR_NONE;
}

/*=============================================================================
 * Function: AXSetDeviceLinearUpsampler()
 * Arguments:
 * 				device		: the device to which this matrix should be applied
 * 				deviceId    : id of the device type to which func applies
 * 			    flag        : TRUE  -> use linear upsampling
 * 			                  FALSE -> use polyphase upsampling
 * Outputs:		AXPB_ERROR_CODE
 * Description:
 *   Sets whether upsampling is done after the final mix callback or before
=============================================================================*/
AXPB_ERROR_CODE AXSetDeviceLinearUpsampler(AXPBDeviceType device, u32 deviceId, BOOL flag)
{
    AXPB_ERROR_CODE errCode = AXPB_ERROR_NONE;
    BOOL enable = OSDisableInterrupts();
    switch(device)
    {
        case AX_DEVICE_TV:
            if(deviceId < AX_MAX_NUM_TVS)
            {
                __AXTVLinearUpsampler[deviceId] = flag;
            }
            else
            {
                errCode = AXPB_ERROR_DEVICE_ID;
            }
            break;
        case AX_DEVICE_DRC:
            if(deviceId < AX_MAX_NUM_DRCS)
            {
                __AXDRCLinearUpsampler[deviceId] = flag;
            }
            else
            {
                errCode = AXPB_ERROR_DEVICE_ID;
            }
            break;
        case AX_DEVICE_RMT:
            errCode = AXPB_ERROR_DEVICE_TYPE;
            break;
        default:
            errCode = AXPB_ERROR_DEVICE_TYPE;
            break;
    }//end of switch(device)

    OSRestoreInterrupts(enable);

    return errCode;
}

// -----------------------------------------------------------------------------
void __AXProcessBusMix(BusBuffer* busBuffer)
{
    ASSERT(busBuffer != NULL);

    // NOTE: AX の正式なミックス方法は未確認だが、busBuffer のうち、
    // 各 AUX バスの値をメインバスに足し込んでいく

    // TV
    {
        f32 coefs[AX_AUX_ID_MAX_NUM] = {0.0f};
        for (u32 auxId = 0; auxId < AX_AUX_ID_MAX_NUM; auxId++)
        {
            u16 returnVol = 0;
            AXPB_ERROR_CODE err = AXGetAuxReturnVolume(AX_DEVICE_TV, 0, auxId, &returnVol);
            ASSERT(err == AXPB_ERROR_NONE);

            coefs[auxId] = (f32)(returnVol) / (32768);
        }

        for (u32 ch = 0; ch < AX_MAX_NUM_TV_CHS; ch++)
        {
            s32* output = busBuffer->buffers[ch];
            for (u32 auxId = 0; auxId < AX_AUX_ID_MAX_NUM; auxId++)
            {
                s32* input = busBuffer->buffers[ch + AX_MAX_NUM_TV_CHS * (auxId+1)];
                for (u32 i = 0; i < AX_IN_SAMPLES_PER_FRAME; i++)
                {
                    output[i] += (s32)(input[i] * coefs[auxId]);
                }
            }
        }
    }

    // DRC
    u32 chOffset = AX_MAX_NUM_TV_CHS * AX_MAX_NUM_BUSES;
    for (u32 id = 0; id < s_ProcessDrcCount; id++)
    {
        f32 coefs[AX_AUX_ID_MAX_NUM] = {0.0f};
        for (u32 auxId = 0; auxId < AX_AUX_ID_MAX_NUM; auxId++)
        {
            u16 returnVol = 0;
            AXPB_ERROR_CODE err = AXGetAuxReturnVolume(AX_DEVICE_DRC, id, auxId, &returnVol);
            ASSERT(err == AXPB_ERROR_NONE);

            coefs[auxId] = (f32)(returnVol) / (32768);
        }

        u32 base = chOffset + id * MIX_BUFFER_COUNT_DRC;
        for (u32 ch = 0; ch < AX_MAX_NUM_DRC_CHS; ch++)
        {
            s32* output = busBuffer->buffers[ch + base];
            for (u32 auxId = 0; auxId < AX_AUX_ID_MAX_NUM; auxId++)
            {
                s32 auxBusChIdx = ch + AX_MAX_NUM_DRC_CHS * (auxId+1) + base;
                s32* input = busBuffer->buffers[auxBusChIdx];
                for (u32 i = 0; i < AX_IN_SAMPLES_PER_FRAME; i++)
                {
                    output[i] += (s32)(input[i] * coefs[auxId]);
                }
            }
        }
    }
}



// -----------------------------------------------------------------------------
void __AXWaveOutCallbackFunc(
    int channels,
    signed short* buffer,
    unsigned long samples,
    int sampleRate
)
{
    if(__AINextCallback != NULL)
    {
        __AINextCallback(channels, buffer, samples, sampleRate);
    }

    OSDisableInterrupts();

    memset(__AXOutTVBuffer, 0, sizeof(__AXOutTVBuffer) );
    memset(__AXOutDRCBuffer, 0, sizeof(__AXOutDRCBuffer) );
    memset(__AXRmtOutBuffer, 0, sizeof(__AXRmtOutBuffer) );
    __AXClearAuxBus();

    BusBuffer busBuffer;
    {
        // TV 用 (6ch x 4 バス)
        for (u32 ch = 0; ch < AX_MAX_NUM_TV_CHS; ch++)
        {
            // メインバス
            busBuffer.buffers[ch] = __AXOutTVBuffer[AX_TV_ID0][__AXOutFrame][ch];

            // AUX A～C
            busBuffer.buffers[ch + AX_MAX_NUM_TV_CHS * AX_AUXA_BUS] = __AXGetAuxBus(AX_DEVICE_TV, 0, AX_AUX_ID_A, ch);
            busBuffer.buffers[ch + AX_MAX_NUM_TV_CHS * AX_AUXB_BUS] = __AXGetAuxBus(AX_DEVICE_TV, 0, AX_AUX_ID_B, ch);
            busBuffer.buffers[ch + AX_MAX_NUM_TV_CHS * AX_AUXC_BUS] = __AXGetAuxBus(AX_DEVICE_TV, 0, AX_AUX_ID_C, ch);
        }

        // DRC 用 (4ch x 4 バス x 2 デバイス)
        u32 chOffset = AX_MAX_NUM_TV_CHS * AX_MAX_NUM_BUSES;
        for (u32 id = 0; id < s_ProcessDrcCount; id++)
        {
            for (u32 ch = 0; ch < AX_MAX_NUM_DRC_CHS; ch++)
            {
                u32 base = chOffset + ch + id * MIX_BUFFER_COUNT_DRC;
                // メインバス
                busBuffer.buffers[base] = __AXOutDRCBuffer[id][__AXOutFrame][ch];

                // AUX A～C
                busBuffer.buffers[base + AX_MAX_NUM_DRC_CHS * AX_AUXA_BUS] = __AXGetAuxBus(AX_DEVICE_DRC, id, AX_AUX_ID_A, ch);
                busBuffer.buffers[base + AX_MAX_NUM_DRC_CHS * AX_AUXB_BUS] = __AXGetAuxBus(AX_DEVICE_DRC, id, AX_AUX_ID_B, ch);
                busBuffer.buffers[base + AX_MAX_NUM_DRC_CHS * AX_AUXC_BUS] = __AXGetAuxBus(AX_DEVICE_DRC, id, AX_AUX_ID_C, ch);
            }
        }

#if 0 // TODO
        // RMT 用 1ch x 1 バス x 4 台
        chOffset += AX_MAX_NUM_DRCS * AX_MAX_NUM_DRC_CHS * AX_MAX_NUM_BUSES;
        for (u32 id = 0; id < AX_MAX_NUM_RMTS; id++)
        {
            busBuffer.buffers[chOffset + id] = __AXRmtOutBuffer[id][__AXRmtDspPtr];
        }
#endif
    }

    // ボイスレンダリング
    int voiceCount = __AXSyncPBs(
        channels,
        &busBuffer,
        AX_IN_SAMPLES_PER_FRAME,
        32000
    );


    // AUX 処理
    __AXProcessAux();

    // バスミックス
    __AXProcessBusMix(&busBuffer);

    // フィナルミックス
    const signed int* srcTv[AX_MAX_NUM_TV_CHS] = {NULL};
    const signed int* srcDrcs[AX_MAX_NUM_DRCS][AX_MAX_NUM_DRC_CHS] = {NULL};
    signed short* destp = buffer;

    if ( sampleRate == 48000 )
    {
        __AXHandleFinalMixCallbackStage();

        for (u32 ch = 0; ch < AX_MAX_NUM_TV_CHS; ch++)
        {
            srcTv[ch] = __AXTVFinalMixBuffer[AX_TV_ID0][ch];
        }
        for (u32 id = 0; id < s_ProcessDrcCount; id++)
        {
            for (u32 ch = 0; ch < AX_MAX_NUM_DRC_CHS; ch++)
            {
                srcDrcs[id][ch] = __AXDRCFinalMixBuffer[id][ch];
            }
        }

        // PC は 2ch 出力しか出来ないので、適当にダウンミックスする
        // (以下の例は TV。DRC も似たような感じでミックスする。
        //  RMT は無視される)
        // L = srcL + srcSL + (srcFC / 2) + (srcLFE / 2)
        // R = srcR + srcSR + (srcFC / 2) + (srcLFE / 2)
        for ( unsigned int i = 0; i < samples; i++ )
        {
            // TV
            s32 tv[AX_MAX_NUM_TV_CHS];
            for (u32 ch = 0; ch < AX_MAX_NUM_TV_CHS; ch++)
            {
                tv[ch] = *srcTv[ch]++;
                if ( tv[ch] < -32768 ) tv[ch] = -32768;
                else if ( tv[ch] > 32767 ) tv[ch] = 32767;
            }
            s32 tv_sub = (tv[AX_CH_FC] + tv[AX_CH_LFE]) / 2;
            s32 tv_left  = tv[AX_CH_LEFT]  + tv[AX_CH_SUR_LEFT] + tv_sub;
            s32 tv_right = tv[AX_CH_RIGHT] + tv[AX_CH_SUR_RIGHT] + tv_sub;

            // DRC
            s32 drcs[AX_MAX_NUM_DRCS][AX_MAX_NUM_DRC_CHS];
            for (u32 id = 0; id < s_ProcessDrcCount; id++)
            {
                for (u32 ch = 0; ch < AX_MAX_NUM_DRC_CHS; ch++)
                {
                    drcs[id][ch] = *srcDrcs[id][ch]++;
                    if ( drcs[id][ch] < -32768 ) drcs[id][ch] = -32768;
                    else if ( drcs[id][ch] > 32767 ) drcs[id][ch] = 32767;
                }
            }
            s32 drc_left = 0;
            s32 drc_right = 0;
            for (u32 id = 0; id < s_ProcessDrcCount; id++)
            {
                drc_left += drcs[id][AX_CH_LEFT] + drcs[id][AX_CH_SUR_LEFT];
                drc_right += drcs[id][AX_CH_RIGHT] + drcs[id][AX_CH_SUR_RIGHT];
            }

            s32 left = 0;
            s32 right = 0;
            if (s_IsEnableDevice[AX_DEVICE_TV]) {
                left += tv_left;
                right += tv_right;
            }
            if (s_IsEnableDevice[AX_DEVICE_DRC]) {
                left += drc_left;
                right += drc_right;
            }
            // MIX → 出力バッファに書き込み
            *destp++= static_cast<s16>(clamp_sample(left));
            *destp++= static_cast<s16>(clamp_sample(right));

        }
    }
    else
    {
        for (u32 ch = 0; ch < AX_MAX_NUM_TV_CHS; ch++)
        {
            srcTv[ch] = __AXOutTVBuffer[AX_TV_ID0][__AXOutFrame][ch];
        }
        for (u32 id = 0; id < s_ProcessDrcCount; id++)
        {
            for (u32 ch = 0; ch < AX_MAX_NUM_DRC_CHS; ch++)
            {
                srcDrcs[id][ch] = __AXOutDRCBuffer[id][__AXOutFrame][ch];
            }
        }

        for ( unsigned int i = 0; i < samples; i++ )
        {
            // TV
            s32 tv[AX_MAX_NUM_TV_CHS];
            for (u32 ch = 0; ch < AX_MAX_NUM_TV_CHS; ch++)
            {
                tv[ch] = *srcTv[ch]++;
                tv[ch] = tv[ch] >> 8;
                if ( tv[ch] < -32768 ) tv[ch] = -32768;
                else if ( tv[ch] > 32767 ) tv[ch] = 32767;
            }
            s32 tv_sub = (tv[AX_CH_FC] + tv[AX_CH_LFE]) / 2;
            s32 tv_left  = tv[AX_CH_LEFT]  + tv[AX_CH_SUR_LEFT] + tv_sub;
            s32 tv_right = tv[AX_CH_RIGHT] + tv[AX_CH_SUR_RIGHT] + tv_sub;

            // DRC
            s32 drcs[AX_MAX_NUM_DRCS][AX_MAX_NUM_DRC_CHS];
            for (u32 id = 0; id < s_ProcessDrcCount; id++)
            {
                for (u32 ch = 0; ch < AX_MAX_NUM_DRC_CHS; ch++)
                {
                    drcs[id][ch] = *srcDrcs[id][ch]++;
                    drcs[id][ch] = drcs[id][ch] >> 8;
                    if ( drcs[id][ch] < -32768 ) drcs[id][ch] = -32768;
                    else if ( drcs[id][ch] > 32767 ) drcs[id][ch] = 32767;
                }
            }
            s32 drc_left = 0;
            s32 drc_right = 0;
            for (u32 id = 0; id < s_ProcessDrcCount; id++)
            {
                drc_left += drcs[id][AX_CH_LEFT] + drcs[id][AX_CH_SUR_LEFT];
                drc_right += drcs[id][AX_CH_RIGHT] + drcs[id][AX_CH_SUR_RIGHT];
            }

            // MIX → 出力バッファに書き込み
            *destp++= (signed short)(tv_left + drc_left);
            *destp++= (signed short)(tv_right + drc_left);
        }
    }

    {
        signed short* srcp  = buffer;
        signed short* destp = __AXTVFinalOutputBuffer;

        for ( unsigned int i = 0; i < samples * channels; i++ )
        {
            *destp++ = *srcp++;
        }
    }

    if (__AXUserFrameCallback)
        (*__AXUserFrameCallback)();

    // also go through the new APP frame callbacks
    for(u32 i = 0; i < __AXActiveAppFrameCbs; i++)
    {
        (*__AXAppFrameCallbacks[i])();
    }

    __AXLocalProfile.axNumVoices     = AXGetNumVoices();
    __AXLocalProfile.axNumDspVoices  = AXGetNumDspVoices();

    AXPROFILE* profile = __AXGetCurrentProfile();
    __AXLastProfile = profile;

    if (profile)
    {
        u8 *src, *dest;
        u32 i;

        i = sizeof(AXPROFILE);

        dest   = (u8*)profile;
        src    = (u8*)&__AXLocalProfile;

        while (i)
        {
            *dest = *src;
            dest++;
            src++;
            i--;
        }
    }

    OSEnableInterrupts();
}


/* ===========================================================================
 *     FUNCTION:  AXRegisterAppFrameCallback
 *  Description:  Adds a frame callback to the list of frame callbacks
 *                registered with AX
 *                The list is already managed, the first empty slot is used
 *       Output:  AXPB_ERROR_CODE
 * ==========================================================================*/
 AXPB_ERROR_CODE  AXRegisterAppFrameCallback(AXUserCallback fcb)
 {
    // check if app sent us a null pointer
    if (NULL == fcb)
    {
        return AXPB_ERROR_FRAMECB_NULL_FUNC;
    }

    // check if some other process is already using this function
#if defined(ANDROID) || TARGET_OS_IPHONE
    BOOL mutexLocked = OSTryLockMutex(&__AXAppFrameCBMutex);
#else
    BOOL mutexLocked = ::TryEnterCriticalSection(&__AXAppFrameCBMutex); //OSTryLockMutex(&__AXAppFrameCBMutex);
#endif

    if(mutexLocked)
    {
        // no one is using it, so we are good to go

        // check if the max depth is already reached
        if( AX_MAX_APP_FRAME_CBS == __AXActiveAppFrameCbs)
        {
            return AXPB_ERROR_FRAMECB_MAX_DEPTH_REACHED;
        }
        BOOL enable = OSDisableInterrupts();

        // we have some space in our "linked list"
        __AXAppFrameCallbacks[__AXActiveAppFrameCbs] = fcb;
        __AXActiveAppFrameCbs++;

        OSRestoreInterrupts(enable);

#if defined(ANDROID) || TARGET_OS_IPHONE
        OSUnlockMutex(&__AXAppFrameCBMutex);
#else
        ::LeaveCriticalSection(&__AXAppFrameCBMutex); // OSUnlockMutex(&__AXAppFrameCBMutex);
#endif
    }
    else
    {
        // someone is using, tell app to try again
        return AXPB_ERROR_FRAMECB_TRY_AGAIN;
    }

    // if everything went fine, then no error
    return AXPB_ERROR_NONE;
 } // end of AXRegisterAppFrameCallback

 /* ===========================================================================
 *     FUNCTION:  AXDeregisterAppFrameCallback
 *  Description:  Adds a frame callback to the list of frame callbacks
 *                registered with AX
 *                The list is already managed, the first empty slot is used
 *       Output:  AXPB_ERROR_CODE
 * ==========================================================================*/
 AXPB_ERROR_CODE  AXDeregisterAppFrameCallback(AXUserCallback fcb)
 {
    AXPB_ERROR_CODE errCode;
    // check if app sent us a null pointer
    if (NULL == fcb)
    {
        return AXPB_ERROR_FRAMECB_NULL_FUNC;
    }

    // check if some other process is already using this function
#if defined(ANDROID) || TARGET_OS_IPHONE
    BOOL mutexLocked = OSTryLockMutex(&__AXAppFrameCBMutex);
#else
    BOOL mutexLocked = ::TryEnterCriticalSection(&__AXAppFrameCBMutex); // OSTryLockMutex(&__AXAppFrameCBMutex);
#endif

    if(mutexLocked)
    {
        // no one is using it, so we are good to go
        // find the location of the registered fcb
        u32 location = 0;
        BOOL isFound = FALSE;
        for(location =0; location < AX_MAX_APP_FRAME_CBS; location++)
        {
            if(__AXAppFrameCallbacks[location] == fcb)
            {
                isFound = TRUE;
                break;
            }
        }
        if(isFound)
        {
            // found the location of the previously registered functin
            // take it out of the list and rearrange the list.
            BOOL enable = OSDisableInterrupts();

            // needs reshuffle
            for(u32 i=location; i< __AXActiveAppFrameCbs; i++)
            {
                __AXAppFrameCallbacks[i] = __AXAppFrameCallbacks[i+1];
            }
            __AXAppFrameCallbacks[__AXActiveAppFrameCbs] = NULL;
            __AXActiveAppFrameCbs--;
            OSRestoreInterrupts(enable);

            errCode = AXPB_ERROR_NONE;
        }
        else
        {
            errCode = AXPB_ERROR_FRAMECB_NOT_FOUND;
        }

#if defined(ANDROID) || TARGET_OS_IPHONE
        OSUnlockMutex(&__AXAppFrameCBMutex);
#else
        ::LeaveCriticalSection(&__AXAppFrameCBMutex); // OSUnlockMutex(&__AXAppFrameCBMutex);
#endif

        return errCode;
    }
    else
    {
        // someone is using, tell app to try again
        return AXPB_ERROR_FRAMECB_TRY_AGAIN;
    }

 } // end of AXDeregisterAppFrameCallback

// -----------------------------------------------------------------------------
void AXSetEnableDevice(AXPBDeviceType type, bool enable)
{
    ASSERT(0 <= type && type < AX_MAX_NUM_DEVICES);
    s_IsEnableDevice[type] = enable;
}

// -----------------------------------------------------------------------------
bool AXGetEnableDevice(AXPBDeviceType type)
{
    ASSERT(0 <= type && type < AX_MAX_NUM_DEVICES);
    return s_IsEnableDevice[type];
}

        } // namespace winext
    } // namespace internal
} // namespace nw
