﻿/*--------------------------------------------------------------------------------*
  Copyright (C)Nintendo All rights reserved.

  These coded instructions, statements, and computer programs contain proprietary
  information of Nintendo and/or its licensed developers and are protected by
  national and international copyright laws. They may not be disclosed to third
  parties or copied or duplicated in any form, in whole or in part, without the
  prior written consent of Nintendo.

  The content herein is highly confidential and should be handled accordingly.
 *--------------------------------------------------------------------------------*/

#include <cstdint>
#include <cstdlib>  // TODO: should remove malloc/free
#include <cmath>

#include <nn/codec/codec_AdpcmCommon.h>
#include "codec_AdpcmCommonInternal.h"
#include "codec_AdpcmEncoderCodebook.h"
#include "codec_AdpcmEncoderDesign.h"
#include "codec_AdpcmEncoderEstimate.h"

namespace nn { namespace codec {

namespace {

const int InputBufferSize = 1024;
const int Order = AdpcmPredictionOrder;

}

void CalculateCoefficients(int16_t* input, int sampleCount, AdpcmInfo *cxt)
{
    double  thresh          = 10.0;
    int     order           = Order;
    int     refine_iter     = 2;
    int     frame_size      = AdpcmFrameSampleCount;
    int     log2_tablesize;
    int     inoffset, insize;
    int16_t *calc_buffer, *in_buffer;
    double  *ac, *ref, e2, **a, coef;
    double  **codebook, **training;
    double  *dir;

    int32_t nframes, aframecnt, framecnt, samplecnt;
    int32_t actual_size, n_entries;
    int32_t *indx;
    int32_t overflow = 0;

    int16_t *inputBuffer;

    int16_t *coeftable = (int16_t*)cxt;

    nframes = ( sampleCount + frame_size - 1 ) / frame_size;

    for (log2_tablesize = 1; (1 << log2_tablesize) < 8; ++log2_tablesize)
    {
    }

    // Initialize codebook storage
    codebook = (double **) malloc((size_t)(1llu << log2_tablesize) * sizeof(double *));

    for (auto i = 0; i < (1 << log2_tablesize); ++i)
    {
        codebook[i] = (double *) malloc((order + 1) * sizeof(double));
    }

    // Splitting direction
    dir = (double *) malloc((order + 1) * sizeof(double));

    in_buffer = (int16_t *)malloc(InputBufferSize * frame_size * sizeof(int16_t));

    calc_buffer = (int16_t *) malloc(2 * frame_size * sizeof(int16_t));

    for (auto i = 0; i < 2 * frame_size; ++i)
    {
        calc_buffer[i] = 0;
    }

    ac  = (double *) malloc((order + 1) * sizeof(double));
    ref = (double *) malloc((order + 1) * sizeof(double));

    // For matrix method
    a = (double **) malloc((order + 1) * sizeof(double *));

    for (auto i = 0; i <= order; ++i)
    {
        a[i] = (double *) malloc((order + 1) * sizeof(double));
    }

    indx = (int32_t *) malloc((order + 1) * sizeof(int32_t));

    // Reserve storage for the training data
    training  = (double **) malloc(nframes * sizeof(double *));
    framecnt  = 0;
    aframecnt = 0;  // active frame counter
    samplecnt = sampleCount;

//    printf("Nframes = %d\n", nframes);

    while(samplecnt > 0)
    {

        // Read PCM data from input file
        if (samplecnt > InputBufferSize * frame_size)
        {
            insize      =   InputBufferSize * frame_size;
            samplecnt   -=  InputBufferSize * frame_size;
        }
        else
        {
            insize = samplecnt;

            for (auto i = 0; i < frame_size; ++i)
            {
                if (insize + i >= InputBufferSize * frame_size)
                {
                    break;
                }

                in_buffer[insize + i] = 0;
            }

            samplecnt = 0;
        }

//        fread(in_buffer, insize, sizeof(int16_t), infile);

        inputBuffer = in_buffer;

        for (auto i = 0; i < insize; ++i)
        {
            *inputBuffer++ = *input++;
        }

        inoffset = 0;

        // Statistic data sampling Loop
        while (inoffset < insize)
        {
            // Set up samples for calculation
            for (auto i = 0; i < frame_size; ++i)
            {
                calc_buffer[i] = calc_buffer[i + frame_size];
            }

            for (auto i = 0; i < frame_size; ++i)
            {
                calc_buffer[frame_size + i] = in_buffer[inoffset];
                inoffset ++;
            }

            //printf("Frame no. %d  ",framecnt); // Debug
            CalculateAutoCorrelationVector(calc_buffer + frame_size, order, frame_size, ac);

            if (fabs(ac[0]) > thresh)
            {
                CalculateAutoCorrelationMatrix(calc_buffer + frame_size, order, frame_size, a);

                // Lower-upper decomposition
                if (ExecuteLuComposition(a, order, indx) == 0)
                {
                    // Put solution in ac
                    SolveEquationWithLuDecomposition(a, order, indx, ac);
                    ac[0] = 1.0;

                    // Convert to reflection coefficients - reject unstable vectors
                    if (!CalculateReflectionFromTap(ac, ref, order))
                    {
                        // The training data is stored as tap values
                        training[aframecnt] = (double *) malloc((order + 1) * sizeof(double));

                        training[aframecnt][0] = 1.0;

                        for (auto i = 1; i <= order; ++i)
                        {
                            //Stabilize the filter

                            if (ref[i] >= 1.0)
                            {
                                ref[i] = 1.0 - AdpcmEpsilon;
                            }
                            if (ref[i] <= -1.0)
                            {
                                ref[i] = -1.0 + AdpcmEpsilon;
                            }
                        }

                        CalculateTapFromReflection(ref, training[aframecnt], order);

                        //printf("a1:%f  a2:%f ", training[aframecnt][1], training[aframecnt][2]); // Debug
                        ++aframecnt;
                    }

                }  // if (lud(a, order, indx, &d)==0)

            }  // if (fabs(ac[0]) > thresh)

            ++framecnt;
            //printf("\n");  // Debug

        }  // while ( inoffset < insize )
    }  // while( samplecnt > 0 )

//    printf("Available frames = %d\n", aframecnt);

    // To start things off find the average auto-correlation over
    // the complete data set.
    ac[0] = 1.0;

    for (auto j = 1; j <= order; ++j)
    {
        ac[j] = 0;
    }

    for (auto i = 0; i < aframecnt; ++i)
    {
        CalculateAutoCorrelationFromTap(training[i], order, codebook[0]);

        for (auto j = 1; j <= order; ++j)
        {
            ac[j] += codebook[0][j];
        }
    }

    if (aframecnt > 0)  // to avoid zero division
    {
        for (auto j = 1; j <= order; ++j)
        {
            ac[j] = ac[j] / aframecnt;
        }
    }

    // The average model
    ExecuteDurbin(ac, order, ref, codebook[0], &e2);

    // Stabilize - could put this in durbin
    for (auto j = 1; j <= order; ++j)
    {
        if (ref[j] >= 1.0)
        {
            ref[j] = 1.0 - AdpcmEpsilon;
        }

        if (ref[j] <= -1.0)
        {
            ref[j] = -1.0 + AdpcmEpsilon;
        }
    }

    CalculateTapFromReflection(ref, codebook[0], order);

    //printf("Av.  a1:%f  a2:%f\n", codebook[0][1], codebook[0][2]); // Debug

    actual_size = 0;

    while (actual_size < log2_tablesize)
    {
        n_entries = 1 << actual_size;

        // Split each codebook template into
        // two - the original and a shifted version
        for (auto i = 0; i <= order; ++i)
        {
            dir[i] = 0;
        }

        dir[order - 1] = -1.0;

        SplitCodebook(codebook, dir, order, n_entries, 0.01);

        // Iterative refinement of templates
        actual_size++;
        RefineCodebook(codebook, order, 1 << actual_size, training, aframecnt, refine_iter, 0);
    }

    n_entries = 1 << actual_size;

    // Let's see what it looks like
//    fprintf(stdout,"order = %d  entry = %d\n",order,n_entries);

    for (auto i = 0; i < n_entries; ++i)
    {
//        printf("Predictor %d : ", i);

        for (auto j = 0; j < order; ++j)
        {
            coef = - codebook[i][j + 1] * AdpcmCoefficientScaling;

            if (coef > 0)
            {
                if (coef > 32767)
                {
                    coeftable[i * Order + j] = 32767;
                }
                else
                {
                    coeftable[i * Order + j] = (int16_t)(coef + 0.5);
                }
            }
            else
            {
                if (coef < -32768)
                {
                    coeftable[i * Order + j] = -32768;
                }
                else
                {
                    coeftable[i * Order + j] = (int16_t)(coef - 0.5);
                }
            }

//            printf("a%d = %f[0x%04X] ", j + 1, -codebook[i][j+1], coeftable[i*Order+j]);
        }

//        printf("\n");
    }


    // Free working memories
    for (auto i = 0; i < (1 << log2_tablesize); ++i)
    {
        free(codebook[i]);
    }

    for (auto i = 0; i < aframecnt; ++i)
    {
        free(training[i]);
    }

    for (auto i = 0; i <= order; ++i)
    {
        free(a[i]);
    }

    free(codebook);
    free(training);
    free(a);
    free(in_buffer);
    free(calc_buffer);
    free(ac);
    free(ref);
    free(dir);
    free(indx);
}  // NOLINT(impl/function_size)

}}  // namespace nn::codec
