﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <cmath>

#include <nn/nn_Common.h>
#include <nn/nn_Macro.h>
#include <nn/nn_SdkAssert.h>
#include "../common/audio_BuildDefinition.h"

#include <nn/result/result_HandlingUtility.h>

#include <nn/audio/audio_Common.h>
#include <nn/audio/audio_PolyphaseResampler.h>
#include <nn/audio/audio_Result.h>
#include "audio_PolyphaseResampler.h"

namespace nn { namespace audio {

namespace {

    typedef struct
    {
        double      scaleFactor;
        double      oneOverNMinusOne;
        double      piAlpha;
        double      downScaleFactor;
        double      initialArgumentValue;
        double      gain;
        uint32_t    numberOfTaps;
    } WindowStateType;

    typedef struct
    {
        uint8_t stuffCount;
        uint8_t phaseIndex;
    } PhaseAndCountType;

    typedef struct
    {
        uint8_t     interpolationFactor;
        uint8_t     decimationFactor;
        uint8_t     stuffIndex;
        uint8_t     stuffCount;
        uint8_t     noStuffCount;
        uint8_t     interpolationIndex;
        uint8_t     minStuff;
        uint8_t     maxStuff;
        int16_t     ddaTerm;
        int16_t     phase;
        int16_t     spare0;
        int16_t     spare1;
    } DdaStateType;

    typedef struct _ResampleState ResampleStateType;
    extern "C" typedef uint32_t (*pfnFilterFunction)(ResampleStateType*, const int16_t*, int16_t*, uint32_t);

    typedef struct _ResampleState
    {
        DdaStateType        ddaState;
        int16_t**           pFilterKernels;
        int16_t**           pDelayLine;
        uint32_t            numberOfTaps;
        uint32_t            numberOfChannels;
        pfnFilterFunction   pfnApplyFilter;
    } ResampleStateType;

    const double piApproximateValue             = 3.1415926535897932384626433832795;
    const double highQualityDecimatorDerating   = 1.075;
    const double lowQualityDecimatorDerating    = 1.1;
    const double highQualityInterpolatorDerating = 1.089;
    const double lowQualityInterpolatorDerating = 1.11;

    const double epsilon                        = 0.0000001;
    const double sincApproximationThreshold     = 0.01;
    const uint32_t filterSizeGranularity        = 16;
    const uint32_t lowQualityFilterMultiplier   = 1;    /* must be set to 1 */
    const uint32_t highQualityFilterMultiplier  = 2;    /* must be set to 2 */
    const double lowQualityBeta                 = 2.12;
    const double highQualityBeta                = 3.384;
    const double S15_SCALE_FACTOR               = static_cast<double>(1 << 15);
    const int32_t INT_S15_SCALE_FACTOR          = (1 << 15);

    const uint32_t maximumInterpolationFactor = 256;
    const uint32_t outputBufferSizeEstimationPadding = 2;       // Overestimate outputbuffer size by two samples

    double  calculateI0(double arg) NN_NOEXCEPT
    {
        double      rval;
        double      lastTerm;
        double      accumulator;
        double      nextTerm;
        double      delta;
        uint32_t    index;

        arg = 0.5 * arg;
        rval = 0.0;
        index = 1;
        accumulator = 1.0;
        do
        {
            rval += accumulator;
            lastTerm = accumulator;
            nextTerm = arg / static_cast<double>(index);
            nextTerm *= nextTerm;
            accumulator *= nextTerm;
            index++;
            delta = accumulator - lastTerm;
            if(delta < 0.0)
            {
                delta = -delta;
            }
        } while((index < 5) || (delta > epsilon));
        return rval;
    }

    void    initializeWindowState(WindowStateType* pState, double beta, double downScaleFactor, uint32_t numberOfTaps, double gain) NN_NOEXCEPT
    {
        pState->numberOfTaps = numberOfTaps;
        pState->piAlpha = piApproximateValue * beta;
        pState->scaleFactor = calculateI0(pState->piAlpha);
        pState->scaleFactor = 1.0 / pState->scaleFactor;
        pState->oneOverNMinusOne = 1.0 / static_cast<double>(numberOfTaps - 1);
        pState->initialArgumentValue = -(static_cast<double>(numberOfTaps) - 1.0) / 2.0;
        pState->downScaleFactor = downScaleFactor;
        pState->gain = gain;
    }

    double  calculateWindowCoefficient(WindowStateType* pState, uint32_t index) NN_NOEXCEPT
    {
        double  rval = 0.0;

        if(index < pState->numberOfTaps)
        {
            double  squareTerm;
            double  arg;
            double  i0;

            squareTerm = (static_cast<double>(2 * index) * pState->oneOverNMinusOne) - 1.0;
            squareTerm = squareTerm * squareTerm;
            arg = pState->piAlpha * sqrt(1.0 - squareTerm);
            i0 = calculateI0(arg);
            rval = i0 * pState->scaleFactor;
        }
        return rval;
    }

    double  sinc(double normalizedArg) NN_NOEXCEPT
    {
        double  rval;
        double  arg;
        double  absoluteArg;

        arg = piApproximateValue * normalizedArg;
        absoluteArg = (arg < 0.0) ? -arg : arg;
        if(absoluteArg < sincApproximationThreshold)
        {
            rval = 1.0 - (arg * arg / 6.0);
        }
        else
        {
            rval = sin(arg) / arg;
        }
        return rval;
    }

    double  calculateFilterElement(WindowStateType* pState, uint32_t index) NN_NOEXCEPT
    {
        double  windowValue;
        double  sincValue;

        windowValue = calculateWindowCoefficient(pState, index);
        sincValue = sinc((pState->initialArgumentValue + static_cast<double>(index)) / pState->downScaleFactor);
        return(windowValue * sincValue);
    }

    void        shiftDelayLine(struct _ResampleState* pState, const int16_t* input) NN_NOEXCEPT
    {
        int32_t     index;
        uint32_t    cIndex;
        int16_t*    pDelay;

        for(cIndex = 0; cIndex < pState->numberOfChannels; cIndex++)
        {
            pDelay = pState->pDelayLine[cIndex];
            for(index = 0; index < static_cast<int32_t>(pState->numberOfTaps - 1); index++)
            {
                pDelay[index] = pDelay[index + 1];
            }
            pDelay[index] = input[cIndex];
        }
    }

    void    refillDelayLine(struct _ResampleState* pState, const int16_t* input, uint32_t delayIndex) NN_NOEXCEPT
    {
        uint32_t    index;
        uint32_t    cIndex;
        uint32_t    sIndex;
        int16_t*    pDelay;

        if(delayIndex < pState->numberOfTaps)
        {
            for(index = 0; index < delayIndex; index++)
            {
                shiftDelayLine(pState, input);
                input = &input[pState->numberOfChannels];
            }
        }
        else
        {
            for(cIndex = 0; cIndex < pState->numberOfChannels; cIndex++)
            {
                sIndex = cIndex;
                pDelay = pState->pDelayLine[cIndex];
                for(index = 0; index < pState->numberOfTaps; index++)
                {
                    pDelay[index] = input[sIndex];
                    sIndex += pState->numberOfChannels;
                }
            }
        }
    }
    /*
     * one or more output samples are computed for each input sample.
     * stuffIndex is always zero.
     * one output sample is computed per iteration.
     */
    extern "C" uint32_t interpolatingFilter(struct _ResampleState* pState, const int16_t* input, int16_t* output, uint32_t inputSampleCount) NN_NOEXCEPT
    {
        uint32_t        rval=0;
        DdaStateType*   pDda;
        uint32_t        delayIndex;

        pDda = &pState->ddaState;
        delayIndex = 0;
        while(inputSampleCount) {
            int32_t     accumulator;
            int16_t*    pKernel;
            int16_t*    pDelay;
            uint32_t    index;
            uint32_t    cIndex;
            uint32_t    kIndex;
            uint32_t    sIndex;
            int32_t     product;

            if(static_cast<int8_t>(pDda->stuffIndex) < static_cast<int8_t>(pDda->stuffCount))
            {
                if(delayIndex < pState->numberOfTaps)
                {
                    delayIndex++;
                }
                else
                {
                    input = &input[pState->numberOfChannels];
                }
                inputSampleCount--;
            }
            pDda->phase += pDda->ddaTerm;
            if(static_cast<uint16_t>(pDda->phase) >= static_cast<uint16_t>(pDda->interpolationFactor))
            {
                pDda->phase = static_cast<int16_t>(static_cast<uint16_t>(pDda->phase) - static_cast<uint16_t>(pDda->interpolationFactor));
                pDda->stuffCount = 1;
            }
            else
            {
                pDda->stuffCount = 0;
            }

            pKernel = pState->pFilterKernels[pDda->interpolationIndex];
            for(cIndex = 0; cIndex < pState->numberOfChannels; cIndex++)
            {
                accumulator = 0;
                kIndex=0;
                pDelay = pState->pDelayLine[cIndex];
                for(index = delayIndex; index < pState->numberOfTaps; index++)
                {
                    product = static_cast<int32_t>(pDelay[index]) * static_cast<int32_t>(pKernel[kIndex]);
                    accumulator += product;
                    kIndex++;
                }
                sIndex = cIndex;
                for(index = 0; index < delayIndex; index++)
                {
                    product = static_cast<int32_t>(input[sIndex]) * static_cast<int32_t>(pKernel[kIndex]);
                    accumulator += product;
                    kIndex++;
                    sIndex += pState->numberOfChannels;
                }
                output[cIndex] = static_cast<int16_t>(accumulator >> 15);
            }
            output = &output[pState->numberOfChannels];
            rval++;

            pDda->interpolationIndex++;
            if(pDda->interpolationIndex == pDda->interpolationFactor)
            {
                pDda->interpolationIndex = 0;
            }
        }
        refillDelayLine(pState, input, delayIndex);
        return rval;
    }
    /*
     * per output sample, at least one input sample must be consumed.
     */
    extern "C" uint32_t decimatingFilter(struct _ResampleState* pState, const int16_t* input, int16_t* output, uint32_t inputSampleCount) NN_NOEXCEPT
    {
        uint32_t        rval=0;
        DdaStateType*   pDda;
        uint32_t        delayIndex;

        pDda = &pState->ddaState;
        delayIndex = 0;
        do {
            int32_t     accumulator;
            int16_t*    pKernel;
            int16_t*    pDelay;
            uint32_t    index;
            uint32_t    cIndex;
            uint32_t    kIndex;
            uint32_t    sIndex;
            int32_t     product;

            while(static_cast<int8_t>(pDda->stuffIndex) < static_cast<int8_t>(pDda->stuffCount))
            {
                if(delayIndex < pState->numberOfTaps)
                {
                    delayIndex++;
                }
                else
                {
                    input = &input[pState->numberOfChannels];
                }
                pDda->stuffIndex++;
                inputSampleCount--;
                if((inputSampleCount == 0) && (pDda->stuffIndex != pDda->stuffCount))
                {
                    refillDelayLine(pState, input, delayIndex);
                    return rval;
                }
            }
            pDda->phase += pDda->ddaTerm;
            if(pDda->phase < 0)
            {
                pDda->phase = static_cast<int16_t>(static_cast<uint16_t>(pDda->phase) + static_cast<uint16_t>(pDda->interpolationFactor));
                pDda->stuffCount = pDda->minStuff;
            }
            else
            {
                pDda->stuffCount = pDda->maxStuff;
            }
            pDda->stuffIndex = 0;

            pKernel = pState->pFilterKernels[pDda->interpolationIndex];
            for(cIndex = 0; cIndex < pState->numberOfChannels; cIndex++)
            {
                accumulator = 0;
                kIndex=0;
                pDelay = pState->pDelayLine[cIndex];
                for(index = delayIndex; index < pState->numberOfTaps; index++)
                {
                    product = static_cast<int32_t>(pDelay[index]) * static_cast<int32_t>(pKernel[kIndex]);
                    accumulator += product;
                    kIndex++;
                }
                sIndex = cIndex;
                for(index = 0; index < delayIndex; index++)
                {
                    product = static_cast<int32_t>(input[sIndex]) * static_cast<int32_t>(pKernel[kIndex]);
                    accumulator += product;
                    kIndex++;
                    sIndex += pState->numberOfChannels;
                }
                output[cIndex] = static_cast<int16_t>(accumulator >> 15);
            }
            output = &output[pState->numberOfChannels];
            rval++;

            pDda->interpolationIndex++;
            if(pDda->interpolationIndex == pDda->interpolationFactor)
            {
                pDda->interpolationIndex = 0;
            }
        } while(inputSampleCount);
        refillDelayLine(pState, input, delayIndex);
        return rval;
    }

    uint32_t    nopFilter(struct _ResampleState* pState, const int16_t* input, int16_t* output, uint32_t inputCount) NN_NOEXCEPT
    {
        NN_UNUSED(pState);
        NN_UNUSED(input);
        NN_UNUSED(output);
        NN_UNUSED(inputCount);
        return 0;
    }

    int32_t     prepareDda(ResampleStateType* pState, uint8_t startStuffCount,
                           uint32_t interpolationFactor, uint32_t decimationFactor) NN_NOEXCEPT
    {
        DdaStateType*   pDda;
        int32_t         rval=1;

        pDda = &pState->ddaState;
        pDda->interpolationFactor = static_cast<uint8_t>(interpolationFactor);
        pDda->decimationFactor = static_cast<uint8_t>(decimationFactor);
        pDda->stuffIndex = 0;
        pDda->stuffCount = startStuffCount;
        pDda->noStuffCount = 0;
        pDda->interpolationIndex = 0;
        pDda->phase = 0;
        if(interpolationFactor > decimationFactor)
        {
            pDda->minStuff = 0;
            pDda->maxStuff = 1;
            pDda->ddaTerm = static_cast<int16_t>(decimationFactor);
#if defined(NN_AUDIO_ENABLE_AARCH64_NEON)
            if((pState->numberOfTaps == (lowQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 1))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(interpolating1Ch16TapFilter);
            }
            else if((pState->numberOfTaps == (lowQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 2))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(interpolating2Ch16TapFilter);
            }
            else if((pState->numberOfTaps == (lowQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 6))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(interpolating6Ch16TapFilter);
            }
            else if((pState->numberOfTaps == (highQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 1))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(interpolating1Ch32TapFilter);
            }
            else if((pState->numberOfTaps == (highQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 2))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(interpolating2Ch32TapFilter);
            }
            else if((pState->numberOfTaps == (highQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 6))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(interpolating6Ch32TapFilter);
            }
            else
            {
                pState->pfnApplyFilter = interpolatingFilter;
            }
#else
            pState->pfnApplyFilter = interpolatingFilter;
#endif
        }
        else if(decimationFactor > interpolationFactor)
        {
            uint32_t    minStuff;
            uint32_t    maxStuff;

            minStuff = decimationFactor / interpolationFactor;
            maxStuff = (decimationFactor + interpolationFactor - 1) / interpolationFactor;
            pDda->ddaTerm = static_cast<int16_t>(decimationFactor - (maxStuff * interpolationFactor));
            pDda->minStuff = static_cast<uint8_t>(minStuff);
            pDda->maxStuff = static_cast<uint8_t>(maxStuff);
#if defined(NN_AUDIO_ENABLE_AARCH64_NEON)
            if((pState->numberOfTaps == (lowQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 1))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(decimating1Ch16TapFilter);
            }
            else if((pState->numberOfTaps == (lowQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 2))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(decimating2Ch16TapFilter);
            }
            else if((pState->numberOfTaps == (lowQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 6))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(decimating6Ch16TapFilter);
            }
            else if((pState->numberOfTaps == (highQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 1))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(decimating1Ch32TapFilter);
            }
            else if((pState->numberOfTaps == (highQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 2))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(decimating2Ch32TapFilter);
            }
            else if((pState->numberOfTaps == (highQualityFilterMultiplier * filterSizeGranularity)) && (pState->numberOfChannels == 6))
            {
                pState->pfnApplyFilter = reinterpret_cast<pfnFilterFunction>(decimating6Ch32TapFilter);
            }
            else
            {
                pState->pfnApplyFilter = decimatingFilter;
            }
#else
            pState->pfnApplyFilter = decimatingFilter;
#endif
        } else {
            rval = 0;
            pState->pfnApplyFilter = nopFilter;
        }
        return rval;
    }

    void    makePhaseAndCounterMap(PhaseAndCountType* map, uint32_t l, uint32_t m) NN_NOEXCEPT
    {
        uint32_t    phase;
        uint32_t    index;
        uint32_t    lastL;
        uint32_t    prevL;

        prevL = 0;
        for(index = 0; index < l; index++)
        {
            phase = (index * m) % l;
            lastL = (index * m) / l;
            map[index].stuffCount = static_cast<uint8_t>(lastL - prevL);
            map[index].phaseIndex = static_cast<uint8_t>(phase);
            prevL = lastL;
        }
        map[0].stuffCount = static_cast<uint8_t>(m - prevL);
        return;
    }

    void    calculateFilterPhases(WindowStateType* pState, int16_t** pFilters, PhaseAndCountType* map,
                                  uint32_t interpolationFactor, uint32_t numberOfTaps) NN_NOEXCEPT
    {
        uint32_t    indexI, indexJ;

        for(indexI = 0; indexI < interpolationFactor; indexI++)
        {
            double      filterPhase[maximumInterpolationFactor];
            double      accumulator;
            uint32_t    phaseIndex;
            int16_t*    pDst;

            phaseIndex = map[indexI].phaseIndex;
            for(indexJ = 0; indexJ < numberOfTaps; indexJ++)
            {
                filterPhase[numberOfTaps - indexJ - 1] = calculateFilterElement(pState, phaseIndex);
                phaseIndex += interpolationFactor;
            }
            accumulator = 0.0;
            for(indexJ = 0; indexJ < numberOfTaps; indexJ++)
            {
                accumulator += filterPhase[indexJ];
            }
            accumulator = pState->gain / accumulator;
            for(indexJ = 0; indexJ < numberOfTaps; indexJ++)
            {
                filterPhase[indexJ] *= accumulator;
            }
            pDst = pFilters[indexI];
            for(indexJ = 0; indexJ < numberOfTaps; indexJ++)
            {
                int32_t coefficient;

                coefficient = static_cast<int32_t>((S15_SCALE_FACTOR * filterPhase[indexJ]) + 0.5);
                if(coefficient >= INT_S15_SCALE_FACTOR)
                {
                    coefficient = INT_S15_SCALE_FACTOR - 1;
                }
                else if(coefficient < -INT_S15_SCALE_FACTOR)
                {
                    coefficient = 1 - INT_S15_SCALE_FACTOR;
                }
                pDst[indexJ] = static_cast<int16_t>(coefficient);
            }
        }
    }

    uint64_t    oneIteration(uint64_t greaterInt, uint64_t lesserInt) NN_NOEXCEPT
    {
        uint64_t    quotient;

        quotient = greaterInt / lesserInt;
        return(greaterInt - (quotient * lesserInt));
    }

    uint64_t    euclidGcf(uint64_t value1, uint64_t value2) NN_NOEXCEPT
    {
        uint64_t    greaterInt;
        uint64_t    lesserInt;
        uint64_t    lastResult;
        uint64_t    currentResult;

        if(value1 > value2)
        {
            greaterInt = value1;
            lesserInt = value2;
        }
        else
        {
            greaterInt = value2;
            lesserInt = value1;
        }
        currentResult = lesserInt;
        do
        {
            lastResult = currentResult;
            currentResult = oneIteration(greaterInt, lesserInt);
            greaterInt = lesserInt;
            lesserInt = currentResult;
        } while(currentResult != 0);
        return lastResult;
    }

    uint64_t    makeRelativePrime64Bit(uint64_t* value1, uint64_t* value2) NN_NOEXCEPT
    {
        uint64_t    gcf;

        gcf = euclidGcf(*value1, *value2);
        *value1 /= gcf;
        *value2 /= gcf;
        return gcf;
    }

    const double lowerBound = 0.1;
    const double upperBound = 10.0;
    const double multiplier = (2.0 * 3.0 * 5.0 * 7.0 * 11.0 * 13.0 * 17.0);
    const int32_t maximumSearchRange = 48;

    typedef struct
    {
        uint64_t    value1;
        uint64_t    value2;
    } ratioType;

    static  void    findApproximateLM(ratioType* pApproximation, double startValue) NN_NOEXCEPT
    {
        pApproximation->value2 = static_cast<uint64_t>(1000.0 * multiplier * multiplier);
        pApproximation->value1 = static_cast<uint64_t>(1000.0 * multiplier * multiplier * startValue);
        makeRelativePrime64Bit(&pApproximation->value1, &pApproximation->value2);
        while((pApproximation->value1 >= 256) || (pApproximation->value2 >= 256))
        {
            pApproximation->value1 >>= 1;
            pApproximation->value2 >>= 1;
        }
        makeRelativePrime64Bit(&pApproximation->value1, &pApproximation->value2);
    }

    bool    findLmFromDouble(uint32_t* pL, uint32_t* pM, double startValue) NN_NOEXCEPT
    {
        bool    rval;

        if((startValue >= lowerBound) && (startValue <= upperBound))
        {
            ratioType   approximation1;
            ratioType   approximation2;
            double      oneOverStartValue;
            double      delta1;
            double      delta2;

            oneOverStartValue = 1.0 / startValue;
            findApproximateLM(&approximation1, startValue);
            findApproximateLM(&approximation2, oneOverStartValue);
            delta1 = startValue - (static_cast<double>(approximation1.value1) / static_cast<double>(approximation1.value2));
            delta2 = startValue - (static_cast<double>(approximation2.value2) / static_cast<double>(approximation2.value1));
            delta1 = (delta1 < 0.0) ? -delta1 : delta1;
            delta2 = (delta2 < 0.0) ? -delta2 : delta2;
            if(delta1 < delta2)
            {
                *pL = static_cast<uint32_t>(approximation1.value1);
                *pM = static_cast<uint32_t>(approximation1.value2);
            }
            else
            {
                *pL = static_cast<uint32_t>(approximation2.value2);
                *pM = static_cast<uint32_t>(approximation2.value1);
            }
            rval = true;
        }
        else
        {
            *pL = 0;
            *pM = 0;
            rval = false;
        }
        return rval;
    }

    uint32_t    makeRelativePrime(uint32_t* value1, uint32_t* value2) NN_NOEXCEPT
    {
        uint64_t    value64Bit1;
        uint64_t    value64Bit2;
        uint64_t    rval64Bit;

        value64Bit1 = static_cast<uint64_t>(*value1);
        value64Bit2 = static_cast<uint64_t>(*value2);
        rval64Bit = makeRelativePrime64Bit(&value64Bit1, &value64Bit2);
        *value1 = static_cast<uint32_t>(value64Bit1);
        *value2 = static_cast<uint32_t>(value64Bit2);
        return(static_cast<uint32_t>(rval64Bit));
    }

    void        searchForSmallRelativePrime(uint32_t* interpolationFactor, uint32_t* decimationFactor) NN_NOEXCEPT
    {
        uint32_t    greaterValue;
        int32_t     searchRange;
        uint32_t    bestInterpolationFactor;
        uint32_t    bestDecimationFactor;
        double      ratio;
        double      delta;
        int32_t     index;

        greaterValue = (*interpolationFactor < *decimationFactor) ? *decimationFactor : *interpolationFactor;
        searchRange = static_cast<int32_t>(greaterValue / 100);
        if(searchRange > maximumSearchRange)
        {
            searchRange = maximumSearchRange;
        }
        else if(searchRange == 0)
        {
            searchRange = 1;
        }
        ratio = static_cast<double>(*interpolationFactor) / static_cast<double>(*decimationFactor);
        bestInterpolationFactor = *interpolationFactor;
        bestDecimationFactor = *decimationFactor;
        findLmFromDouble(&bestInterpolationFactor, &bestDecimationFactor, ratio);
        delta = (static_cast<double>(bestInterpolationFactor) / static_cast<double>(bestDecimationFactor)) - ratio;
        if(delta < 0.0)
        {
            delta = -delta;
        }
        for(index = -searchRange; index <= searchRange; index++)
        {
            uint32_t    trialInterpolationFactor;
            uint32_t    trialDecimationFactor;
            double      trialDelta;

            trialInterpolationFactor = static_cast<uint32_t>(static_cast<int32_t>(*interpolationFactor) + index);
            trialDecimationFactor = *decimationFactor;
            makeRelativePrime(&trialInterpolationFactor, &trialDecimationFactor);
            if((trialInterpolationFactor < maximumInterpolationFactor) && (trialDecimationFactor < maximumInterpolationFactor))
            {
                trialDelta = (static_cast<double>(trialInterpolationFactor) / static_cast<double>(trialDecimationFactor)) - ratio;
                if(trialDelta < 0.0)
                {
                    trialDelta = -trialDelta;
                }
                if(trialDelta < delta)
                {
                    delta = trialDelta;
                    bestInterpolationFactor = trialInterpolationFactor;
                    bestDecimationFactor = trialDecimationFactor;
                }
            }
        }
        for(index = -searchRange; index <= searchRange; index++)
        {
            uint32_t    trialInterpolationFactor;
            uint32_t    trialDecimationFactor;
            double      trialDelta;

            trialInterpolationFactor = *interpolationFactor;
            trialDecimationFactor = static_cast<uint32_t>(static_cast<int32_t>(*decimationFactor) + index);
            makeRelativePrime(&trialInterpolationFactor, &trialDecimationFactor);
            if((trialInterpolationFactor < maximumInterpolationFactor) && (trialDecimationFactor < maximumInterpolationFactor))
            {
                trialDelta = (static_cast<double>(trialInterpolationFactor) / static_cast<double>(trialDecimationFactor)) - ratio;
                if(trialDelta < 0.0)
                {
                    trialDelta = -trialDelta;
                }
                if(trialDelta < delta)
                {
                    delta = trialDelta;
                    bestInterpolationFactor = trialInterpolationFactor;
                    bestDecimationFactor = trialDecimationFactor;
                }
            }
        }
        *interpolationFactor = bestInterpolationFactor;
        *decimationFactor = bestDecimationFactor;
    }
}

double  computeAcutalOutputSampleRate(int inputSampleRate, int outputSampleRate) NN_NOEXCEPT
{
    double      rval;
    uint32_t    interpolationFactor;
    uint32_t    decimationFactor;

    interpolationFactor = static_cast<uint32_t>(outputSampleRate);
    decimationFactor = static_cast<uint32_t>(inputSampleRate);
    makeRelativePrime(&interpolationFactor, &decimationFactor);
    if(!((interpolationFactor < maximumInterpolationFactor) && (decimationFactor < maximumInterpolationFactor)))
    {
        searchForSmallRelativePrime(&interpolationFactor, &decimationFactor);
        rval = static_cast<double>(interpolationFactor) * static_cast<double>(inputSampleRate) / static_cast<double>(decimationFactor);
    }
    else
    {
        rval = static_cast<double>(outputSampleRate);
    }
    return rval;
}

uint32_t polyphaseResample(void* filter, const int16_t* input, int16_t* output, uint32_t inputSampleCount) NN_NOEXCEPT
{
    uint32_t            rval;
    ResampleStateType*  pState;

    pState = static_cast<ResampleStateType*>(filter);
    rval = pState->pfnApplyFilter(pState, input, output, inputSampleCount);
    return rval;
}

size_t requiredPolyphaseResamplerMemory(uint32_t numberOfChannels, uint32_t interpolationFactor, uint32_t decimationFactor, bool highQuality) NN_NOEXCEPT
{
    uint32_t    allocationSize = 0;
    uint32_t    filterMultiplier;
    uint32_t    numberOfTaps;

    filterMultiplier = (highQuality) ? highQualityFilterMultiplier : lowQualityFilterMultiplier;
    numberOfTaps = filterMultiplier * filterSizeGranularity;
    makeRelativePrime(&interpolationFactor, &decimationFactor);
    if(!((interpolationFactor < maximumInterpolationFactor) && (decimationFactor < maximumInterpolationFactor)))
    {
        searchForSmallRelativePrime(&interpolationFactor, &decimationFactor);
    }
    allocationSize = ((sizeof(ResampleStateType) + 7) & ~7);
    allocationSize += ((numberOfTaps + 3) & ~3) * sizeof(int16_t) * numberOfChannels;       /* delay lines */
    allocationSize += ((numberOfChannels + 1) & ~1) * sizeof(int16_t*);                     /* pDelayLine */
    allocationSize += ((numberOfTaps + 3) & ~3) * interpolationFactor * sizeof(int16_t);    /* all filter phases */
    allocationSize += interpolationFactor * sizeof(int16_t*);                               /* array of ptrs to filter kernel phases */
    return(static_cast<size_t>(allocationSize));
}

size_t  estimatePolyphaseResamplerOutputBufferSize(void* state, size_t inputBufferSize, bool applyPadding) NN_NOEXCEPT
{
    uint32_t            outputBufferSizeInBytes;
    uint32_t            outputBufferSampleSize;
    uint32_t            inputBufferSampleSize;
    uint32_t            sampleSize;
    uint32_t            interpolationFactor;
    uint32_t            decimationFactor;
    ResampleStateType*  pState;

    pState = static_cast<ResampleStateType*>(state);
    interpolationFactor = static_cast<uint32_t>(pState->ddaState.interpolationFactor);
    decimationFactor = static_cast<uint32_t>(pState->ddaState.decimationFactor);
    sampleSize = pState->numberOfChannels * sizeof(int16_t);
    inputBufferSampleSize = (static_cast<uint32_t>(inputBufferSize) + sampleSize - 1) / sampleSize;
    outputBufferSampleSize = ((inputBufferSampleSize * interpolationFactor) + decimationFactor - 1) / decimationFactor;
    outputBufferSizeInBytes = outputBufferSampleSize * sampleSize;
    if(applyPadding)
    {
        outputBufferSizeInBytes += outputBufferSizeEstimationPadding * sampleSize;
    }
    return(static_cast<size_t>(outputBufferSizeInBytes));
}

int estimatePolyphaseResamplerOutputSampleCount(void* state, int inputSampleCount, bool applyPadding) NN_NOEXCEPT
{
    auto pState = static_cast<ResampleStateType*>(state);
    auto interpolationFactor = static_cast<int>(pState->ddaState.interpolationFactor);
    auto decimationFactor = static_cast<int>(pState->ddaState.decimationFactor);
    auto outputSampleCount = static_cast<int>(((static_cast<int64_t>(inputSampleCount) * interpolationFactor) + decimationFactor - 1) / decimationFactor);
    if(applyPadding)
    {
        outputSampleCount += outputBufferSizeEstimationPadding;
    }
    return outputSampleCount;
}

void initializePolyphaseResampler(void* state, uint32_t interpolationFactor, uint32_t decimationFactor, uint32_t numberOfChannels, bool highQuality, float gain) NN_NOEXCEPT
{
    uint32_t            filterMultiplier;
    double              filterScaleFactor;
    uint32_t            numberOfTaps;
    uint32_t            numberOfPhaseTaps;
    uint32_t            rndUpNumberOfPhaseTaps;
    uint32_t            rndUpStructSize;
    PhaseAndCountType   map[maximumInterpolationFactor];
    WindowStateType     window;
    uint32_t            indexI;
    uint32_t            indexJ;
    int16_t*            pKernels;
    int16_t*            pDelayLines;
    int8_t*             charBuffer;
    ResampleStateType*  pState;
    double              beta;
    double              interpolatorDeratingValue;
    double              decimatorDeratingValue;

    makeRelativePrime(&interpolationFactor, &decimationFactor);
    if(!((interpolationFactor < maximumInterpolationFactor) && (decimationFactor < maximumInterpolationFactor)))
    {
        searchForSmallRelativePrime(&interpolationFactor, &decimationFactor);
    }

    filterMultiplier = (highQuality) ? highQualityFilterMultiplier : lowQualityFilterMultiplier;
    interpolatorDeratingValue = (highQuality) ? highQualityInterpolatorDerating : lowQualityInterpolatorDerating;
    decimatorDeratingValue = (highQuality) ? highQualityDecimatorDerating : lowQualityDecimatorDerating;
    beta = (highQuality) ? highQualityBeta : lowQualityBeta;
    gain = (gain > 1.0f) ? 1.0f : gain;
    gain = (gain < 0.0f) ? 0.0f : gain;
    filterScaleFactor = (interpolationFactor < decimationFactor) ? decimatorDeratingValue * static_cast<double>(decimationFactor) :
                                                                   interpolatorDeratingValue * static_cast<double>(interpolationFactor);
    numberOfTaps = filterMultiplier * filterSizeGranularity * interpolationFactor;
    numberOfPhaseTaps = (numberOfTaps + (interpolationFactor - 1)) / interpolationFactor;
    rndUpNumberOfPhaseTaps = ((numberOfPhaseTaps + 3) & ~3);
    rndUpStructSize = ((sizeof(ResampleStateType) + 7) & ~7);
    pState = static_cast<ResampleStateType*>(state);
    charBuffer = static_cast<int8_t*>(state);

    makePhaseAndCounterMap(map, interpolationFactor, decimationFactor);
    initializeWindowState(&window, beta, filterScaleFactor, numberOfTaps, static_cast<double>(gain));
    pState->numberOfTaps = numberOfPhaseTaps;
    pState->numberOfChannels = numberOfChannels;
    prepareDda(pState, map[0].stuffCount, interpolationFactor, decimationFactor);
    pState->pDelayLine = reinterpret_cast<int16_t**>(&charBuffer[rndUpStructSize]);
    pDelayLines = reinterpret_cast<int16_t*>(&pState->pDelayLine[numberOfChannels]);
    for(indexI = 0; indexI < numberOfChannels; indexI++)
    {
        pState->pDelayLine[indexI] = pDelayLines;
        for(indexJ = 0; indexJ < rndUpNumberOfPhaseTaps; indexJ++)
        {
            pDelayLines[indexJ] = 0;
        }
        pDelayLines = &pDelayLines[rndUpNumberOfPhaseTaps];
    }
    pState->pFilterKernels = reinterpret_cast<int16_t**>(pDelayLines);
    pKernels = reinterpret_cast<int16_t*>(&pState->pFilterKernels[interpolationFactor]);
    for(indexI = 0; indexI < interpolationFactor; indexI++)
    {
        pState->pFilterKernels[indexI] = pKernels;
        for(indexJ = 0; indexJ < rndUpNumberOfPhaseTaps; indexJ++)
        {
            pKernels[indexJ] = 0;
        }
        pKernels = &pKernels[rndUpNumberOfPhaseTaps];
    }
    calculateFilterPhases(&window, pState->pFilterKernels, map, interpolationFactor, numberOfPhaseTaps);
}

// ----------------------------------------------------------------------------

namespace {

const int polyphaseChannelMarker = (1 << 7);

}

double CalculatePolyphaseResamplerActualOutputSampleRate(int inputSampleRate, int outputSampleRate) NN_NOEXCEPT
{
    NN_SDK_REQUIRES(inputSampleRate > 0, "Input sample rate must be greater than 0");
    NN_SDK_REQUIRES(outputSampleRate > 0, "Output sample rate must be greater than 0");
    float ratioInFloat = static_cast<float>(outputSampleRate) / static_cast<float>(inputSampleRate);
    NN_SDK_ASSERT_LESS_EQUAL(ratioInFloat, PolyphaseResamplerType::GetConversionRatioMax());
    NN_SDK_ASSERT_GREATER_EQUAL(ratioInFloat, PolyphaseResamplerType::GetConversionRatioMin());
    NN_UNUSED(ratioInFloat);
    return(computeAcutalOutputSampleRate(inputSampleRate, outputSampleRate));
}

size_t GetRequiredBufferSizeForPolyphaseResampler(int inputSampleRate, int outputSampleRate, int channelCount, bool highQuality) NN_NOEXCEPT
{
    NN_SDK_REQUIRES(channelCount > 0, "Channel count must be greater than 0");
    NN_SDK_REQUIRES(channelCount <= PolyphaseResamplerType::ChannelCountMax, "Channel count must be less than or equal to ChannelCountMax");
    float ratioInFloat = static_cast<float>(outputSampleRate) / static_cast<float>(inputSampleRate);
    NN_SDK_ASSERT_LESS_EQUAL(ratioInFloat, PolyphaseResamplerType::GetConversionRatioMax());
    NN_SDK_ASSERT_GREATER_EQUAL(ratioInFloat, PolyphaseResamplerType::GetConversionRatioMin());
    NN_UNUSED(ratioInFloat);
    size_t polyphaseResamplerStateSize = requiredPolyphaseResamplerMemory(static_cast<uint32_t>(channelCount),
                                                                          static_cast<uint32_t>(outputSampleRate),
                                                                          static_cast<uint32_t>(inputSampleRate),
                                                                          highQuality);
    polyphaseResamplerStateSize = nn::util::align_up(polyphaseResamplerStateSize, nn::audio::BufferAlignSize);
    return polyphaseResamplerStateSize;
}

size_t GetPolyphaseResampledOutBufferSize(const PolyphaseResamplerType* pResampler, size_t inputBufferSize) NN_NOEXCEPT
{
    size_t  estimatedOutputBufferSize;
    NN_SDK_REQUIRES_NOT_NULL(pResampler);
    NN_SDK_REQUIRES(pResampler->_channelCount & polyphaseChannelMarker);
    estimatedOutputBufferSize = estimatePolyphaseResamplerOutputBufferSize(pResampler->_workBuffer, inputBufferSize, true);
    size_t outputBufferSize = nn::util::align_up(estimatedOutputBufferSize, nn::audio::BufferAlignSize);
    return outputBufferSize;
}

int GetPolyphaseResamplerOutputSampleCount(const PolyphaseResamplerType* pResampler, int inputSampleCount) NN_NOEXCEPT
{
    NN_SDK_REQUIRES_NOT_NULL(pResampler);
    if (inputSampleCount <= 0)
    {
        return 0;
    }
    return estimatePolyphaseResamplerOutputSampleCount(pResampler->_workBuffer, inputSampleCount, true);
}

Result InitializePolyphaseResampler(PolyphaseResamplerType* pResampler, void* buffer, size_t bufferSize, int inputSampleRate, int outputSampleRate, int channelCount, bool highQuality, float gain) NN_NOEXCEPT
{
    NN_SDK_REQUIRES_NOT_NULL(pResampler);
    NN_SDK_REQUIRES_NOT_NULL(buffer);
    NN_SDK_REQUIRES(inputSampleRate > 0, "Input sample rate must be greater than 0");
    NN_SDK_REQUIRES(outputSampleRate > 0, "Output sample rate must be greater than 0");
    float ratioInFloat = static_cast<float>(outputSampleRate) / static_cast<float>(inputSampleRate);
    NN_SDK_ASSERT_LESS_EQUAL(ratioInFloat, PolyphaseResamplerType::GetConversionRatioMax());
    NN_SDK_ASSERT_GREATER_EQUAL(ratioInFloat, PolyphaseResamplerType::GetConversionRatioMin());
    NN_SDK_REQUIRES(channelCount > 0, "Channel count must be greater than 0");
    NN_SDK_REQUIRES(channelCount <= PolyphaseResamplerType::ChannelCountMax, "Channel count must be less than or equal to ChannelCountMax");
    auto requiredBufferSize = GetRequiredBufferSizeForPolyphaseResampler(inputSampleRate, outputSampleRate, channelCount, highQuality);
    NN_SDK_ASSERT_GREATER_EQUAL(bufferSize, requiredBufferSize);
    NN_UNUSED(requiredBufferSize);
    NN_UNUSED(ratioInFloat);

    pResampler->_inSampleRate = inputSampleRate;
    pResampler->_outSampleRate = outputSampleRate;
    pResampler->_workBuffer = buffer;
    pResampler->_bufferSize = bufferSize;
    pResampler->_channelCount = channelCount | polyphaseChannelMarker;

    initializePolyphaseResampler(buffer,
                                 static_cast<uint32_t>(outputSampleRate),
                                 static_cast<uint32_t>(inputSampleRate),
                                 static_cast<uint32_t>(channelCount),
                                 highQuality, gain);
    NN_RESULT_SUCCESS;
}

Result ProcessPolyphaseResamplerBuffer(PolyphaseResamplerType* pResampler, int* pOutputSampleCount, int16_t* pOutputBuffer, size_t outputBufferSize, const int16_t* pInputBuffer, int inputSampleCount) NN_NOEXCEPT
{
    size_t inputBufferSize;
    NN_SDK_REQUIRES_NOT_NULL(pResampler);
    NN_SDK_REQUIRES(pResampler->_channelCount & polyphaseChannelMarker);
    NN_SDK_REQUIRES_NOT_NULL(pOutputBuffer);
    NN_SDK_REQUIRES_NOT_NULL(pInputBuffer);
    NN_SDK_REQUIRES_NOT_NULL(pOutputSampleCount);
    inputBufferSize = static_cast<size_t>(inputSampleCount * (pResampler->_channelCount & (polyphaseChannelMarker - 1))) * sizeof(int16_t);
    NN_RESULT_THROW_UNLESS(estimatePolyphaseResamplerOutputBufferSize(pResampler->_workBuffer, inputBufferSize, false) <= outputBufferSize, ResultInsufficientBuffer());
    *pOutputSampleCount = polyphaseResample(pResampler->_workBuffer, pInputBuffer, pOutputBuffer, inputSampleCount);
    NN_UNUSED(outputBufferSize);
    NN_UNUSED(inputBufferSize);
    NN_RESULT_SUCCESS;
}

}}
