﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#pragma once

#include <arm_neon.h>

#include "../audio_DspCommon.h"
#include "../audio_EffectReverb.h"
#include "../audio_DspUtility.h"
#include "../audio_EffectReverbCoefficients.h"


namespace nn { namespace audio { namespace detail {

NN_FORCEINLINE void ApplyReverbEffect1ch(const ReverbParameter * reverb, ReverbState* state, const int sampleCount, const int32_t* pInData1, int32_t* pOutData1)
{
    NN_AUDIO_DSP_ASSERT(false, "Not implemented.");
}

NN_FORCEINLINE void ApplyReverbEffect2ch(const ReverbParameter* reverb, ReverbState* state, int sampleCount, const int32_t* pInData0, const int32_t* pInData1, int32_t* pOutData0, int32_t* pOutData1)
{
    NN_AUDIO_DSP_ASSERT(false, "Not implemented.");
#if 0
    for (int i = 0; i < sampleCount; ++i)
    {
        outputs[0] = outputs[1] = QF_CONST(0.0f); // zero outputs

        // early reflections
        for (int i = 0; i < ReverbParameter::EarlyReflectionTaps; ++i) unrolled
        {
            outputs[__TapOutAssignments2ch[i]] += MultiQfQf(DelayLineTapOut(earlyDelay, earlyTap[i]), earlyCoef[i]);
        }
        // get input samples (as int32_t)
        qf in_samp = MultiQfQf(reverb->_reverbGain, Q8ToQf(pInData1[j])) + MultiQfQf(reverb->reverbGain, Q8ToQf(pInData2[j])); // Mix input channels
        DelayLineTick(earlyDelay, in_samp);

        // late reverb
        qf late_in = MultiQfQf(DelayLineTapOut(earlyDelay, lateTap), reverb->_lateGain);

        // $$$$$$$$$$$$$$$$$$ start vectorizing from here$$$$$$$$$$$$$$$$$$$$$$$
        // get last samples out of delay lines, multiply by attenuation coef, and low-pass filter
        ProcessFeedback(temps2, fdnFeedback, fdnDelay, fdnLowPassHistory, fdnLowPassCoef);

        int32x2_t tempsVec = vdup_n_s32(0);
        int32x2_t lateInVec = vdup_n_s32(late_in);
        int32x2_t coeffVec = vdup_n_s32(0);
        int64x2_t  multiplyVec;
        int32x2_t outVec = vdup_n_s32(0);
        int32x2_t outVec01 = vdup_n_s32(0);
        // add inputs, then all-pass, then feedback to delays
        for (int i = 0; i < ReverbParameter::FeedbackDelayNetworkCount; i += 2)
        {
           // temps[i] = AllPassTick(&allPass[i], late_in + temps2[i]);
           // qf AllPassTick(FXDelayLine *allpass, qf in)
           //    {
           //       qf temp, out;
           //       qf coef = allpass->coef;
           //       temp = in - MultiQfQf(coef, DelayLineNextOut(allpass));
           //       out = MultiQfQf(coef, temp) + DelayLineTick(allpass, temp);
           //        return out;
           //   }
            tempsVec = vld1_s32(&temps2[i]);
            lateInVec = vqadd_s32(lateInVec, tempsVec);

            outVec = vld1_lane_s32((&allPass[i])->p, outVec, 0);
            outVec = vld1_lane_s32((&allPass[i + 1])->p, outVec, 1);

            coeffVec = vld1_lane_s32(&((&allPass[i])->coef), coeffVec, 0);
            coeffVec = vld1_lane_s32(&((&allPass[i + 1])->coef), coeffVec, 1);

            multiplyVec = vmull_s32(outVec, coeffVec);
            tempsVec = vrshrn_n_s64(multiplyVec, (QF_FRACTIONAL_BIT_COUNT));
            tempsVec =   vqsub_s32(lateInVec, tempsVec);

            DelayLineTick(&allPass[i], vget_lane_s32(tempsVec, 0));
            DelayLineTick(&allPass[i + 1], vget_lane_s32(tempsVec, 1));

            multiplyVec = vmull_s32(tempsVec, coeffVec);
            tempsVec = vrshrn_n_s64(multiplyVec, (QF_FRACTIONAL_BIT_COUNT));
            outVec = vqadd_s32(outVec, tempsVec);
            if (i==0) { outVec01 = outVec;}

            DelayLineTick(&fdnDelay[i], vget_lane_s32(outVec, 0));
            DelayLineTick(&fdnDelay[i + 1], vget_lane_s32(outVec, 1));
        }

        // send outputs
        //outputs[0] += (temps[0] + temps[2]); // Left + LeftSur
        //outputs[1] += (temps[1] + temps[3]); // Right + RightSur
        outVec = vqadd_s32(outVec, outVec01);
        outVec01 = vld1_s32(&outputs[0]);
        outVec = vqadd_s32(outVec, outVec01);

        //pOutData1[j] = QfToQ8(MultiQfQf(outputs[0], reverb->_outGain)) + MultiQfQf(reverb->_dryGain, pInData1[j]);
        //pOutData2[j] = QfToQ8(MultiQfQf(outputs[1], reverb->_outGain)) + MultiQfQf(reverb->_dryGain, pInData2[j]);
        coeffVec = vdup_n_s32(reverb->_outGain);
        multiplyVec = vmull_s32(outVec, coeffVec);
        outVec = vrshrn_n_s64(multiplyVec, (QF_FRACTIONAL_BIT_COUNT + (QF_FRACTIONAL_BIT_COUNT - 8)));
        coeffVec = vdup_n_s32(reverb->_dryGain);
        tempsVec = vld1_lane_s32(&pInData1[j], tempsVec, 0);
        tempsVec = vld1_lane_s32(&pInData2[j], tempsVec, 1);
        multiplyVec = vmull_s32(tempsVec, coeffVec);
        tempsVec = vrshrn_n_s64(multiplyVec, (QF_FRACTIONAL_BIT_COUNT));
        outVec = vqadd_s32(outVec, tempsVec);

        pOutData1[j] = vget_lane_s32(outVec, 0);
        pOutData2[j] = vget_lane_s32(outVec, 1);
    }
#endif
}

NN_FORCEINLINE void ApplyReverbEffect4ch(const ReverbParameter * reverb, ReverbState* state, int sampleCount,
                      const int32_t* pInData0, const int32_t* pInData1, const int32_t* pInData2, const int32_t* pInData3,
                      int32_t* pOutData0, int32_t* pOutData1, int32_t* pOutData2, int32_t* pOutData3)
{
    NN_AUDIO_DSP_ASSERT(false, "Not implemented.");
}

NN_FORCEINLINE void ApplyReverbEffectBypass(int32_t **ppInData, int32_t **ppOutData, int channelCount, int sampleCount)
{
    NN_AUDIO_DSP_ASSERT(false, "Not implemented.");
}

}}} // namespace nn::audio::detail
