﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#pragma once

#include <arm_neon.h>
#include <nn/nn_Macro.h>

namespace nn { namespace audio { namespace dsp { namespace detail {

NN_FORCEINLINE void ApplyBiquadFilter(int32_t* output, const int32_t* input, const int16_t* numerator, const int16_t* denominator, int64_t* state, int sampleCount) NN_NOEXCEPT
{
    const int Q = 14;

    int32x2_t b0zero = vcreate_s32(0);
    b0zero = vset_lane_s32(numerator[0], b0zero, 0);

    int32x2_t b1b2 = vcreate_s32(0);
    b1b2 = vset_lane_s32(numerator[1], b1b2, 0);
    b1b2 = vset_lane_s32(numerator[2], b1b2, 1);

    int32x2_t a1a2 = vcreate_s32(0);
    a1a2 = vset_lane_s32(denominator[0], a1a2, 0);
    a1a2 = vset_lane_s32(denominator[1], a1a2, 1);

    auto s1 = state[0];
    auto s2 = state[1];
    int64x2_t s1s2 = vcombine_s64(vcreate_s64(s1), vcreate_s64(s2));

    for (int i = sampleCount; i != 0; --i)
    {
        int32_t in = *input++;
        s1s2 = vmlal_n_s32(s1s2, b0zero, in);
        int32x2_t tmp = vqrshrn_n_s64(s1s2, Q);
        int32_t out = vget_lane_s32(tmp, 0);

        s1s2 = vcombine_s64(vget_high_s64(s1s2), vcreate_s64(0));
        s1s2 = vmlal_n_s32(s1s2, b1b2, in);
        s1s2 = vmlal_n_s32(s1s2, a1a2, out);

        *output++ = out;
    }

    s1 = vgetq_lane_s64(s1s2, 0);
    s2 = vgetq_lane_s64(s1s2, 1);
    state[0] = s1;
    state[1] = s2;
}

}}}}  // namespace nn::audio::dsp::detail
