﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#pragma once

#include <arm_neon.h>
#include <nn/nn_Macro.h>

namespace nn { namespace audio { namespace dsp { namespace detail {

int32_t ApplyDepopMix2(int32_t* output, int32_t startSample, int32_t factor, int sampleCount) NN_NOEXCEPT
{
    const int Q = 15;

    if (startSample > 0)
    {
        auto x0 = static_cast<int32_t>((static_cast<int64_t>(startSample) * factor) >> Q);
        auto x1 = static_cast<int32_t>((static_cast<int64_t>(x0)          * factor) >> Q);
        output[0] += x0;
        output[1] += x1;
        output += 2;
        sampleCount -= 2;

        int32x2_t factor2 = vdup_n_s32(factor * factor);
        int32x2_t in2 = vcreate_s32(0);
        in2 = vset_lane_s32(x0, in2, 0);
        in2 = vset_lane_s32(x1, in2, 1);

        for (int i = sampleCount; i != 0; i -= 2)
        {
            auto tmp = vmull_s32(in2, factor2);
            in2 = vqshrn_n_s64(tmp, 2 * Q);
            auto out2 = vld1_s32(output);
            out2 = vqadd_s32(out2, in2);
            vst1_s32(output, out2);
            output += 2;
        }

        return vget_lane_s32(in2, 1);
    }
    else
    {
        startSample = -startSample;

        auto x0 = static_cast<int32_t>((static_cast<int64_t>(startSample) * factor) >> Q);
        auto x1 = static_cast<int32_t>((static_cast<int64_t>(x0)          * factor) >> Q);
        output[0] -= x0;
        output[1] -= x1;
        output += 2;
        sampleCount -= 2;

        int32x2_t factor2 = vdup_n_s32(factor * factor);
        int32x2_t in2 = vcreate_s32(0);
        in2 = vset_lane_s32(x0, in2, 0);
        in2 = vset_lane_s32(x1, in2, 1);

        for (int i = sampleCount; i != 0; i -= 2)
        {
            auto tmp = vmull_s32(in2, factor2);
            in2 = vqshrn_n_s64(tmp, 2 * Q);
            auto out2 = vld1_s32(output);
            out2 = vqsub_s32(out2, in2);
            vst1_s32(output, out2);
            output += 2;
        }

        return -vget_lane_s32(in2, 1);
    }
}


}}}}  // namespace nn::audio::dsp::detail
