/*
 * MPEG Audio decoder
 * Copyright (c) 2001, 2002 Fabrice Bellard
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * MPEG Audio decoder
 */

#ifdef AVM_FFMPEG_EXT_TABLES
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <signal.h>
#include <unistd.h>
#endif


#include "libavutil/attributes.h"
#include "libavutil/avassert.h"
#include "libavutil/channel_layout.h"
#include "libavutil/float_dsp.h"
#include "libavutil/libm.h"
#include "avcodec.h"
#include "get_bits.h"
#include "internal.h"
#include "mathops.h"
#include "mpegaudiodsp.h"

/*
 * TODO:
 *  - test lsf / mpeg25 extensively.
 */

#include "mpegaudio.h"
#include "mpegaudiodecheader.h"

#define BACKSTEP_SIZE 512
#define EXTRABYTES 24
#define LAST_BUF_SIZE 2 * BACKSTEP_SIZE + EXTRABYTES

/* layer 3 "granule" */
typedef struct GranuleDef {
    uint8_t scfsi;
    int part2_3_length;
    int big_values;
    int global_gain;
    int scalefac_compress;
    uint8_t block_type;
    uint8_t switch_point;
    uint8_t scalefac_scale;
    uint8_t count1table_select;
    int table_select[3];
    int subblock_gain[3];
    int region_size[3]; /* number of huffman codes in each region */
    int preflag;
    int short_start, long_end; /* long/short band indexes */
    uint8_t scale_factors[40];
    DECLARE_ALIGNED(16, INTFLOAT, sb_hybrid)[SBLIMIT * 18]; /* 576 samples */
} GranuleDef;

typedef struct MPADecodeContext {
    MPA_DECODE_HEADER
    uint8_t last_buf[LAST_BUF_SIZE];
    int last_buf_size;
    int extrasize;
    /* next header (used in free format parsing) */
    uint32_t free_format_next_header;
    GetBitContext gb;
    GetBitContext in_gb;
    DECLARE_ALIGNED(32, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512 * 2];
    int synth_buf_offset[MPA_MAX_CHANNELS];
    DECLARE_ALIGNED(32, INTFLOAT, sb_samples)[MPA_MAX_CHANNELS][36][SBLIMIT];
    INTFLOAT mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
    GranuleDef granules[2][2]; /* Used in Layer 3 */
    int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3
    int dither_state;
    int err_recognition;
    AVCodecContext* avctx;
    MPADSPContext mpadsp;
    AVFloatDSPContext *fdsp;
    AVFrame *frame;
} MPADecodeContext;

#define HEADER_SIZE 4

#include "mpegaudiodata.h"
#include "mpegaudiodectab.h"

/* vlc structure for decoding layer 3 huffman tables */
static VLC huff_vlc[16];
static VLC_TYPE huff_vlc_tables[
    0 + 128 + 128 + 128 + 130 + 128 + 154 + 166 +
  142 + 204 + 190 + 170 + 542 + 460 + 662 + 414
  ][2];
static const int huff_vlc_tables_sizes[16] = {
    0,  128,  128,  128,  130,  128,  154,  166,
  142,  204,  190,  170,  542,  460,  662,  414
};
static VLC huff_quad_vlc[2];
static VLC_TYPE  huff_quad_vlc_tables[128+16][2];
static const int huff_quad_vlc_tables_sizes[2] = { 128, 16 };
/* computed from band_size_long */
static uint16_t band_index_long[9][23];
#include "mpegaudio_tablegen.h"
/* intensity stereo coef table */
static INTFLOAT is_table[2][16];
static INTFLOAT is_table_lsf[2][2][16];
static INTFLOAT csa_table[8][4];

/* HINT: if you change number or name of devision_tabs, also adjust mpegaudiodec_read_tbls () */
/* and mpegaudiodec_write_tbls () */
static int16_t division_tab3[1<<6 ];
static int16_t division_tab5[1<<8 ];
static int16_t division_tab9[1<<11];

static int16_t * const division_tabs[4] = {
    division_tab3, division_tab5, NULL, division_tab9
};

/* lower 2 bits: modulo 3, higher bits: shift */
static uint16_t scale_factor_modshift[64];
/* [i][j]:  2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */
static int32_t scale_factor_mult[15][3];
/* mult table for layer 2 group quantization */

#define SCALE_GEN(v) \
{ FIXR_OLD(1.0 * (v)), FIXR_OLD(0.7937005259 * (v)), FIXR_OLD(0.6299605249 * (v)) }

static const int32_t scale_factor_mult2[3][3] = {
    SCALE_GEN(4.0 / 3.0), /* 3 steps */
    SCALE_GEN(4.0 / 5.0), /* 5 steps */
    SCALE_GEN(4.0 / 9.0), /* 9 steps */
};

#ifdef AVM_FFMPEG_EXT_TABLES
static int mpegaudiodec_read_tbls (void);
#endif

/**
 * Convert region offsets to region sizes and truncate
 * size to big_values.
 */
static void region_offset2size(GranuleDef *g)
{
    int i, k, j = 0;
    g->region_size[2] = 576 / 2;
    for (i = 0; i < 3; i++) {
        k = FFMIN(g->region_size[i], g->big_values);
        g->region_size[i] = k - j;
        j = k;
    }
}

static void init_short_region(MPADecodeContext *s, GranuleDef *g)
{
    if (g->block_type == 2) {
        if (s->sample_rate_index != 8)
            g->region_size[0] = (36 / 2);
        else
            g->region_size[0] = (72 / 2);
    } else {
        if (s->sample_rate_index <= 2)
            g->region_size[0] = (36 / 2);
        else if (s->sample_rate_index != 8)
            g->region_size[0] = (54 / 2);
        else
            g->region_size[0] = (108 / 2);
    }
    g->region_size[1] = (576 / 2);
}

static void init_long_region(MPADecodeContext *s, GranuleDef *g,
                             int ra1, int ra2)
{
    int l;
    g->region_size[0] = band_index_long[s->sample_rate_index][ra1 + 1] >> 1;
    /* should not overflow */
    l = FFMIN(ra1 + ra2 + 2, 22);
    g->region_size[1] = band_index_long[s->sample_rate_index][      l] >> 1;
}

static void compute_band_indexes(MPADecodeContext *s, GranuleDef *g)
{
    if (g->block_type == 2) {
        if (g->switch_point) {
            if(s->sample_rate_index == 8)
                avpriv_request_sample(s->avctx, "switch point in 8khz");
            /* if switched mode, we handle the 36 first samples as
                long blocks.  For 8000Hz, we handle the 72 first
                exponents as long blocks */
            if (s->sample_rate_index <= 2)
                g->long_end = 8;
            else
                g->long_end = 6;

            g->short_start = 3;
        } else {
            g->long_end    = 0;
            g->short_start = 0;
        }
    } else {
        g->short_start = 13;
        g->long_end    = 22;
    }
}

/* layer 1 unscaling */
/* n = number of bits of the mantissa minus 1 */
static inline int l1_unscale(int n, int mant, int scale_factor)
{
    int shift, mod;
    int64_t val;

    shift   = scale_factor_modshift[scale_factor];
    mod     = shift & 3;
    shift >>= 2;
    val     = MUL64((int)(mant + (-1U << n) + 1), scale_factor_mult[n-1][mod]);
    shift  += n;
    /* NOTE: at this point, 1 <= shift >= 21 + 15 */
    return (int)((val + (1LL << (shift - 1))) >> shift);
}

static inline int l2_unscale_group(int steps, int mant, int scale_factor)
{
    int shift, mod, val;

    shift   = scale_factor_modshift[scale_factor];
    mod     = shift & 3;
    shift >>= 2;

    val = (mant - (steps >> 1)) * scale_factor_mult2[steps >> 2][mod];
    /* NOTE: at this point, 0 <= shift <= 21 */
    if (shift > 0)
        val = (val + (1 << (shift - 1))) >> shift;
    return val;
}

/* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */
static inline int l3_unscale(int value, int exponent)
{
    unsigned int m;
    int e;

    e  = table_4_3_exp  [4 * value + (exponent & 3)];
    m  = table_4_3_value[4 * value + (exponent & 3)];
    e -= exponent >> 2;
#ifdef DEBUG
    if(e < 1)
        av_log(NULL, AV_LOG_WARNING, "l3_unscale: e is %d\n", e);
#endif
    if (e > 31)
        return 0;
    m = (m + (1 << (e - 1))) >> e;

    return m;
}

static av_cold void decode_init_static(
#ifdef AVM_FFMPEG_EXT_TABLES
    unsigned int mode
#else
    void
#endif
    )
{
    int i, j;
    int offset;
#ifdef AVM_FFMPEG_EXT_TABLES
    int ext_tbls_ok;
#endif

    /* scale factors table for layer 1/2 */
    for (i = 0; i < 64; i++) {
        int shift, mod;
        /* 1.0 (i = 3) is normalized to 2 ^ FRAC_BITS */
        shift = i / 3;
        mod   = i % 3;
        scale_factor_modshift[i] = mod | (shift << 2);
    }

    /* scale factor multiply for layer 1 */
    for (i = 0; i < 15; i++) {
        int n, norm;
        n = i + 2;
        norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
        scale_factor_mult[i][0] = MULLx(norm, FIXR(1.0          * 2.0), FRAC_BITS);
        scale_factor_mult[i][1] = MULLx(norm, FIXR(0.7937005259 * 2.0), FRAC_BITS);
        scale_factor_mult[i][2] = MULLx(norm, FIXR(0.6299605249 * 2.0), FRAC_BITS);
#ifndef AVM_FFMPEG_SILENT
        ff_dlog(NULL, "%d: norm=%x s=%x %x %x\n", i, norm,
                scale_factor_mult[i][0],
                scale_factor_mult[i][1],
                scale_factor_mult[i][2]);
#endif
    }

    RENAME(ff_mpa_synth_init)(RENAME(ff_mpa_synth_window));

    /* huffman decode tables */
    offset = 0;
    for (i = 1; i < 16; i++) {
        const HuffTable *h = &mpa_huff_tables[i];
        int xsize, x, y;
        uint8_t  tmp_bits [512] = { 0 };
        uint16_t tmp_codes[512] = { 0 };

        xsize = h->xsize;

        j = 0;
        for (x = 0; x < xsize; x++) {
            for (y = 0; y < xsize; y++) {
                tmp_bits [(x << 5) | y | ((x&&y)<<4)]= h->bits [j  ];
                tmp_codes[(x << 5) | y | ((x&&y)<<4)]= h->codes[j++];
            }
        }

        /* XXX: fail test */
        huff_vlc[i].table = huff_vlc_tables+offset;
        huff_vlc[i].table_allocated = huff_vlc_tables_sizes[i];
        init_vlc(&huff_vlc[i], 7, 512,
                 tmp_bits, 1, 1, tmp_codes, 2, 2,
                 INIT_VLC_USE_NEW_STATIC);
        offset += huff_vlc_tables_sizes[i];
    }
    av_assert0(offset == FF_ARRAY_ELEMS(huff_vlc_tables));

    offset = 0;
    for (i = 0; i < 2; i++) {
        huff_quad_vlc[i].table = huff_quad_vlc_tables+offset;
        huff_quad_vlc[i].table_allocated = huff_quad_vlc_tables_sizes[i];
        init_vlc(&huff_quad_vlc[i], i == 0 ? 7 : 4, 16,
                 mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1,
                 INIT_VLC_USE_NEW_STATIC);
        offset += huff_quad_vlc_tables_sizes[i];
    }
    av_assert0(offset == FF_ARRAY_ELEMS(huff_quad_vlc_tables));

    for (i = 0; i < 9; i++) {
        int k = 0;
        for (j = 0; j < 22; j++) {
            band_index_long[i][j] = k;
            k += band_size_long[i][j];
        }
        band_index_long[i][22] = k;
    }

    /* compute n ^ (4/3) and store it in mantissa/exp format */

#ifdef AVM_FFMPEG_EXT_TABLES
    ext_tbls_ok = -1;
    if (mode == 0) {
        ext_tbls_ok = mpegaudiodec_read_tbls ();
    }
    if (ext_tbls_ok < 0) {
        av_log(NULL, AV_LOG_WARNING, "decode_init_static, failed to read ext tbl (%d), calc tbls", 
            ext_tbls_ok);
#endif
        mpegaudio_tableinit();
    /*--- AV_LOG_SAMPLE_DUMP (0, (unsigned char *)table_4_3_exp, sizeof (table_4_3_exp[0]), TABLE_4_3_SIZE); ---*/  /* testing!!! */
    /*--- AV_LOG_SAMPLE_DUMP (0, (unsigned char *)table_4_3_value, sizeof (table_4_3_value[0]), TABLE_4_3_SIZE); ---*/  /* testing!!! */

        for (i = 0; i < 4; i++) {
            if (ff_mpa_quant_bits[i] < 0) {
                for (j = 0; j < (1 << (-ff_mpa_quant_bits[i]+1)); j++) {
                    int val1, val2, val3, steps;
                    int val = j;
                    steps   = ff_mpa_quant_steps[i];
                    val1    = val % steps;
                    val    /= steps;
                    val2    = val % steps;
                    val3    = val / steps;
                    division_tabs[i][j] = val1 + (val2 << 4) + (val3 << 8);
                }
            }
        }


        for (i = 0; i < 7; i++) {
            float f;
            INTFLOAT v;
            if (i != 6) {
                f = tan((double)i * M_PI / 12.0);
                v = FIXR(f / (1.0 + f));
            } else {
                v = FIXR(1.0);
            }
            is_table[0][    i] = v;
            is_table[1][6 - i] = v;
        }
        /* invalid values */
        for (i = 7; i < 16; i++)
            is_table[0][i] = is_table[1][i] = 0.0;

        for (i = 0; i < 16; i++) {
            double f;
            int e, k;

            for (j = 0; j < 2; j++) {
                e = -(j + 1) * ((i + 1) >> 1);
                f = exp2(e / 4.0);
                k = i & 1;
                is_table_lsf[j][k ^ 1][i] = FIXR(f);
                is_table_lsf[j][k    ][i] = FIXR(1.0);
#ifndef AVM_FFMPEG_SILENT
                ff_dlog(NULL, "is_table_lsf %d %d: %f %f\n",
                        i, j, (float) is_table_lsf[j][0][i],
                        (float) is_table_lsf[j][1][i]);
#endif
            }
        }

        for (i = 0; i < 8; i++) {
#ifdef FF_V0_4_9_MATH
            float ci, cs, ca;
#else
            double ci, cs, ca;
#endif
            ci = ci_table[i];
            cs = 1.0 / sqrt(1.0 + ci * ci);
            ca = cs * ci;
#if !USE_FLOATS
            csa_table[i][0] = FIXHR(cs/4);
            csa_table[i][1] = FIXHR(ca/4);
            csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
            csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4);
#else
            csa_table[i][0] = cs;
            csa_table[i][1] = ca;
            csa_table[i][2] = ca + cs;
            csa_table[i][3] = ca - cs;
#endif
        }
#ifdef AVM_FFMPEG_EXT_TABLES
    }
#endif
}

#ifdef AVM_FFMPEG_EXT_TABLES

#define AVM_FFMPEG_EXT_TABLE_MAGIC 0xbeadface
#define AVM_FFMPEG_EXT_TABLE_NAME "/var/tmp/ffmpeg_mp3.tables"
#define AVM_FFMPEG_EXT_TABLE_TMP_SUFFIX ".part"

static int mpegaudiodec_open_tbls_for_read (void)
{
    int retry = 15, fd = -1;
    sigset_t sall;
    sigset_t sold;

    sigfillset (&sall);
    sigemptyset (&sold);
    sigprocmask (SIG_BLOCK, &sall, &sold);
    while (retry) {
        fd = open (AVM_FFMPEG_EXT_TABLE_NAME, O_RDONLY);
        if (fd >= 0) {
            break;
        }
        sleep (2);
        retry--;
    }
    sigprocmask (SIG_SETMASK, &sold, &sall);
    if (fd < 0) {
        av_log(NULL, AV_LOG_ERROR, "mpegaudiodec_open_tbls_for_read, failed to open: %d",
            errno);
    }
    return fd;
}

static int mpegaudiodec_read_tbl (int fd, void *tbl, int size)
{
    int ret = 0;
    unsigned int magic;

    if (read (fd, tbl, size) != size) {
      ret = -1;
    } else if (read (fd, &magic, sizeof (magic)) != sizeof (magic)) {
      ret = -2;
    } else if (magic != AVM_FFMPEG_EXT_TABLE_MAGIC) {
      ret = -3;
    }
    return ret;
}

static int mpegaudiodec_read_tbls (void)
{ 
    int fd_tbls, ret = 0;

    errno = 0;
    if ((fd_tbls = mpegaudiodec_open_tbls_for_read ()) >= 0) {
        /* read tables */
        if (mpegaudiodec_read_tbl (fd_tbls, table_4_3_value, sizeof (table_4_3_value))) {
          ret = -1;
        } else if (mpegaudiodec_read_tbl (fd_tbls, table_4_3_exp, sizeof (table_4_3_exp))) {
          ret = -2;
        } else if (mpegaudiodec_read_tbl (fd_tbls, expval_table_fixed, sizeof (expval_table_fixed))) {
          ret = -3;
        } else if (mpegaudiodec_read_tbl (fd_tbls, exp_table_fixed, sizeof (exp_table_fixed))) {
          ret = -4;
        } else if (mpegaudiodec_read_tbl (fd_tbls, division_tab3, sizeof (division_tab3))) {
          ret = -5;
        } else if (mpegaudiodec_read_tbl (fd_tbls, division_tab5, sizeof (division_tab5))) {
          ret = -6;
        } else if (mpegaudiodec_read_tbl (fd_tbls, division_tab9, sizeof (division_tab9))) {
          ret = -7;
        } else if (mpegaudiodec_read_tbl (fd_tbls, is_table, sizeof (is_table))) {
          ret = -8;
        } else if (mpegaudiodec_read_tbl (fd_tbls, is_table_lsf, sizeof (is_table_lsf))) {
          ret = -9;
        } else if (mpegaudiodec_read_tbl (fd_tbls, csa_table, sizeof (csa_table))) {
          ret = -10;
#ifdef undef
        } else if (mpegaudiodec_read_tbl (fd_tbls, ff_mdct_win_fixed, sizeof (ff_mdct_win_fixed))) {
          ret = -11;
#endif
        }
        close (fd_tbls);
    } else {
      ret = errno;
    }
    return ret;
}

static int mpegaudiodec_open_tbls_for_write (void)
{
    int fd = -1;

    /* exclusive: pre app could have been started parallel */
    fd = open (AVM_FFMPEG_EXT_TABLE_NAME AVM_FFMPEG_EXT_TABLE_TMP_SUFFIX, 
        O_WRONLY | O_CREAT | O_EXCL | O_TRUNC,
        S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
    if (fd < 0) {
        av_log(NULL, AV_LOG_ERROR, "mpegaudiodec_open_tbls_for_write, failed to open: %d",
            errno);
    }
    return fd;
}

static int mpegaudiodec_write_tbl (int fd, void *tbl, int size)
{
    int ret = 0;
    unsigned int magic = AVM_FFMPEG_EXT_TABLE_MAGIC;

    if (write (fd, tbl, size) != size) {
      ret = -1;
    } else if (write (fd, &magic, sizeof (magic)) != sizeof (magic)) {
      ret = -2;
    }
    return ret;
}


static av_cold int mpegaudiodec_write_tbls (void)
{
    int fd_tbls, ret = 0;

    decode_init_static (1); /* calculate tbls for being written */
    errno = 0;
    if ((fd_tbls = mpegaudiodec_open_tbls_for_write ()) >= 0) {
        if (mpegaudiodec_write_tbl (fd_tbls, table_4_3_value, sizeof (table_4_3_value))) {
            ret = -1;
        } else if (mpegaudiodec_write_tbl (fd_tbls, table_4_3_exp, sizeof (table_4_3_exp))) {
            ret = -2;
        } else if (mpegaudiodec_write_tbl (fd_tbls, expval_table_fixed, sizeof (expval_table_fixed))) {
            ret = -3;
        } else if (mpegaudiodec_write_tbl (fd_tbls, exp_table_fixed, sizeof (exp_table_fixed))) {
            ret = -4;
        } else if (mpegaudiodec_write_tbl (fd_tbls, division_tab3, sizeof (division_tab3))) {
            ret = -5;
        } else if (mpegaudiodec_write_tbl (fd_tbls, division_tab5, sizeof (division_tab5))) {
            ret = -6;
        } else if (mpegaudiodec_write_tbl (fd_tbls, division_tab9, sizeof (division_tab9))) {
            ret = -7;
        } else if (mpegaudiodec_write_tbl (fd_tbls, is_table, sizeof (is_table))) {
            ret = -8;
        } else if (mpegaudiodec_write_tbl (fd_tbls, is_table_lsf, sizeof (is_table_lsf))) {
            ret = -9;
        } else if (mpegaudiodec_write_tbl (fd_tbls, csa_table, sizeof (csa_table))) {
            ret = -10;
#ifdef undef
        } else if (mpegaudiodec_write_tbl (fd_tbls, ff_mdct_win_fixed, sizeof (ff_mdct_win_fixed))) {
            ret = -11;
#endif
        }
        close (fd_tbls);

        if (ret < 0) {
            av_log (NULL, AV_LOG_ERROR, "mpegaudiodec_write_tbls, write failed (%d), rm %s",
                ret, AVM_FFMPEG_EXT_TABLE_NAME AVM_FFMPEG_EXT_TABLE_TMP_SUFFIX);
            unlink (AVM_FFMPEG_EXT_TABLE_NAME AVM_FFMPEG_EXT_TABLE_TMP_SUFFIX);
        } else if (rename (AVM_FFMPEG_EXT_TABLE_NAME AVM_FFMPEG_EXT_TABLE_TMP_SUFFIX, AVM_FFMPEG_EXT_TABLE_NAME) == -1) {
            av_log (NULL, AV_LOG_ERROR, "mpegaudiodec_write_tbls, rename failed, errno %u, rm %s",
                errno, AVM_FFMPEG_EXT_TABLE_NAME AVM_FFMPEG_EXT_TABLE_TMP_SUFFIX);
            unlink (AVM_FFMPEG_EXT_TABLE_NAME AVM_FFMPEG_EXT_TABLE_TMP_SUFFIX);
        } else {
            av_log (NULL, AV_LOG_INFO, "mpegaudiodec_write_tbls, %s written",
                AVM_FFMPEG_EXT_TABLE_NAME);
        }
    } else {
        ret = errno;
    }
    return ret;
}

#endif

#if USE_FLOATS
static av_cold int decode_close(AVCodecContext * avctx)
{
    MPADecodeContext *s = avctx->priv_data;
    av_freep(&s->fdsp);

    return 0;
}
#endif

static av_cold int decode_init(AVCodecContext * avctx)
{
    static int initialized_tables = 0;
    MPADecodeContext *s = avctx->priv_data;

    if (!initialized_tables) {
        decode_init_static(
#ifdef AVM_FFMPEG_EXT_TABLES
            0   /* read */
#endif
            );
        initialized_tables = 1;
    }

    s->avctx = avctx;

#if USE_FLOATS
    s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
    if (!s->fdsp)
        return AVERROR(ENOMEM);
#endif

    ff_mpadsp_init(&s->mpadsp);

    if (
#ifndef AVM_FFMPEG_REMOVE_UNNEEDED
        /* It took me a whole week to understand that decoding problems when porting from a very */
        /* early 0.4.9 version of ffmpeg to this one are due to this new sample_fmt option with */
        /* different default behaviour. */
        avctx->request_sample_fmt == OUT_FMT && 
#endif /* AVM_FFMPEG_REMOVE_UNNEEDED */
        avctx->codec_id != AV_CODEC_ID_MP3ON4)
        avctx->sample_fmt = OUT_FMT;
    else
        avctx->sample_fmt = OUT_FMT_P;
    s->err_recognition = avctx->err_recognition;

    if (avctx->codec_id == AV_CODEC_ID_MP3ADU)
        s->adu_mode = 1;

    s->frame_count = 0;

    return 0;
}

#define C3 FIXHR(0.86602540378443864676/2)
#define C4 FIXHR(0.70710678118654752439/2) //0.5 / cos(pi*(9)/36)
#define C5 FIXHR(0.51763809020504152469/2) //0.5 / cos(pi*(5)/36)
#define C6 FIXHR(1.93185165257813657349/4) //0.5 / cos(pi*(15)/36)

/* 12 points IMDCT. We compute it "by hand" by factorizing obvious
   cases. */
static void imdct12(INTFLOAT *out, INTFLOAT *in)
{
    INTFLOAT in0, in1, in2, in3, in4, in5, t1, t2;

    in0  = in[0*3];
    in1  = in[1*3] + in[0*3];
    in2  = in[2*3] + in[1*3];
    in3  = in[3*3] + in[2*3];
    in4  = in[4*3] + in[3*3];
    in5  = in[5*3] + in[4*3];
    in5 += in3;
    in3 += in1;

    in2  = MULH3(in2, C3, 2);
    in3  = MULH3(in3, C3, 4);

    t1   = in0 - in4;
    t2   = MULH3(in1 - in5, C4, 2);

    out[ 7] =
    out[10] = t1 + t2;
    out[ 1] =
    out[ 4] = t1 - t2;

    in0    += SHR(in4, 1);
    in4     = in0 + in2;
    in5    += 2*in1;
    in1     = MULH3(in5 + in3, C5, 1);
    out[ 8] =
    out[ 9] = in4 + in1;
    out[ 2] =
    out[ 3] = in4 - in1;

    in0    -= in2;
    in5     = MULH3(in5 - in3, C6, 2);
    out[ 0] =
    out[ 5] = in0 - in5;
    out[ 6] =
    out[11] = in0 + in5;
}

/* return the number of decoded frames */
static int mp_decode_layer1(MPADecodeContext *s)
{
    int bound, i, v, n, ch, j, mant;
    uint8_t allocation[MPA_MAX_CHANNELS][SBLIMIT];
    uint8_t scale_factors[MPA_MAX_CHANNELS][SBLIMIT];

    if (s->mode == MPA_JSTEREO)
        bound = (s->mode_ext + 1) * 4;
    else
        bound = SBLIMIT;

    /* allocation bits */
    for (i = 0; i < bound; i++) {
        for (ch = 0; ch < s->nb_channels; ch++) {
            allocation[ch][i] = get_bits(&s->gb, 4);
        }
    }
    for (i = bound; i < SBLIMIT; i++)
        allocation[0][i] = get_bits(&s->gb, 4);

    /* scale factors */
    for (i = 0; i < bound; i++) {
        for (ch = 0; ch < s->nb_channels; ch++) {
            if (allocation[ch][i])
                scale_factors[ch][i] = get_bits(&s->gb, 6);
        }
    }
    for (i = bound; i < SBLIMIT; i++) {
        if (allocation[0][i]) {
            scale_factors[0][i] = get_bits(&s->gb, 6);
            scale_factors[1][i] = get_bits(&s->gb, 6);
        }
    }

    /* compute samples */
    for (j = 0; j < 12; j++) {
        for (i = 0; i < bound; i++) {
            for (ch = 0; ch < s->nb_channels; ch++) {
                n = allocation[ch][i];
                if (n) {
                    mant = get_bits(&s->gb, n + 1);
                    v = l1_unscale(n, mant, scale_factors[ch][i]);
                } else {
                    v = 0;
                }
                s->sb_samples[ch][j][i] = v;
            }
        }
        for (i = bound; i < SBLIMIT; i++) {
            n = allocation[0][i];
            if (n) {
                mant = get_bits(&s->gb, n + 1);
                v = l1_unscale(n, mant, scale_factors[0][i]);
                s->sb_samples[0][j][i] = v;
                v = l1_unscale(n, mant, scale_factors[1][i]);
                s->sb_samples[1][j][i] = v;
            } else {
                s->sb_samples[0][j][i] = 0;
                s->sb_samples[1][j][i] = 0;
            }
        }
    }
    return 12;
}

static int mp_decode_layer2(MPADecodeContext *s)
{
    int sblimit; /* number of used subbands */
    const unsigned char *alloc_table;
    int table, bit_alloc_bits, i, j, ch, bound, v;
    unsigned char bit_alloc[MPA_MAX_CHANNELS][SBLIMIT];
    unsigned char scale_code[MPA_MAX_CHANNELS][SBLIMIT];
    unsigned char scale_factors[MPA_MAX_CHANNELS][SBLIMIT][3], *sf;
    int scale, qindex, bits, steps, k, l, m, b;

    /* select decoding table */
    table = ff_mpa_l2_select_table(s->bit_rate / 1000, s->nb_channels,
                                   s->sample_rate, s->lsf);
    sblimit     = ff_mpa_sblimit_table[table];
    alloc_table = ff_mpa_alloc_tables[table];

    if (s->mode == MPA_JSTEREO)
        bound = (s->mode_ext + 1) * 4;
    else
        bound = sblimit;

    ff_dlog(s->avctx, "bound=%d sblimit=%d\n", bound, sblimit);

    /* sanity check */
    if (bound > sblimit)
        bound = sblimit;

    /* parse bit allocation */
    j = 0;
    for (i = 0; i < bound; i++) {
        bit_alloc_bits = alloc_table[j];
        for (ch = 0; ch < s->nb_channels; ch++)
            bit_alloc[ch][i] = get_bits(&s->gb, bit_alloc_bits);
        j += 1 << bit_alloc_bits;
    }
    for (i = bound; i < sblimit; i++) {
        bit_alloc_bits = alloc_table[j];
        v = get_bits(&s->gb, bit_alloc_bits);
        bit_alloc[0][i] = v;
        bit_alloc[1][i] = v;
        j += 1 << bit_alloc_bits;
    }

    /* scale codes */
    for (i = 0; i < sblimit; i++) {
        for (ch = 0; ch < s->nb_channels; ch++) {
            if (bit_alloc[ch][i])
                scale_code[ch][i] = get_bits(&s->gb, 2);
        }
    }

    /* scale factors */
    for (i = 0; i < sblimit; i++) {
        for (ch = 0; ch < s->nb_channels; ch++) {
            if (bit_alloc[ch][i]) {
                sf = scale_factors[ch][i];
                switch (scale_code[ch][i]) {
                default:
                case 0:
                    sf[0] = get_bits(&s->gb, 6);
                    sf[1] = get_bits(&s->gb, 6);
                    sf[2] = get_bits(&s->gb, 6);
                    break;
                case 2:
                    sf[0] = get_bits(&s->gb, 6);
                    sf[1] = sf[0];
                    sf[2] = sf[0];
                    break;
                case 1:
                    sf[0] = get_bits(&s->gb, 6);
                    sf[2] = get_bits(&s->gb, 6);
                    sf[1] = sf[0];
                    break;
                case 3:
                    sf[0] = get_bits(&s->gb, 6);
                    sf[2] = get_bits(&s->gb, 6);
                    sf[1] = sf[2];
                    break;
                }
            }
        }
    }

    /* samples */
    for (k = 0; k < 3; k++) {
        for (l = 0; l < 12; l += 3) {
            j = 0;
            for (i = 0; i < bound; i++) {
                bit_alloc_bits = alloc_table[j];
                for (ch = 0; ch < s->nb_channels; ch++) {
                    b = bit_alloc[ch][i];
                    if (b) {
                        scale = scale_factors[ch][i][k];
                        qindex = alloc_table[j+b];
                        bits = ff_mpa_quant_bits[qindex];
                        if (bits < 0) {
                            int v2;
                            /* 3 values at the same time */
                            v = get_bits(&s->gb, -bits);
                            v2 = division_tabs[qindex][v];
                            steps  = ff_mpa_quant_steps[qindex];

                            s->sb_samples[ch][k * 12 + l + 0][i] =
                                l2_unscale_group(steps,  v2       & 15, scale);
                            s->sb_samples[ch][k * 12 + l + 1][i] =
                                l2_unscale_group(steps, (v2 >> 4) & 15, scale);
                            s->sb_samples[ch][k * 12 + l + 2][i] =
                                l2_unscale_group(steps,  v2 >> 8      , scale);
                        } else {
                            for (m = 0; m < 3; m++) {
                                v = get_bits(&s->gb, bits);
                                v = l1_unscale(bits - 1, v, scale);
                                s->sb_samples[ch][k * 12 + l + m][i] = v;
                            }
                        }
                    } else {
                        s->sb_samples[ch][k * 12 + l + 0][i] = 0;
                        s->sb_samples[ch][k * 12 + l + 1][i] = 0;
                        s->sb_samples[ch][k * 12 + l + 2][i] = 0;
                    }
                }
                /* next subband in alloc table */
                j += 1 << bit_alloc_bits;
            }
            /* XXX: find a way to avoid this duplication of code */
            for (i = bound; i < sblimit; i++) {
                bit_alloc_bits = alloc_table[j];
                b = bit_alloc[0][i];
                if (b) {
                    int mant, scale0, scale1;
                    scale0 = scale_factors[0][i][k];
                    scale1 = scale_factors[1][i][k];
                    qindex = alloc_table[j+b];
                    bits = ff_mpa_quant_bits[qindex];
                    if (bits < 0) {
                        /* 3 values at the same time */
                        v = get_bits(&s->gb, -bits);
                        steps = ff_mpa_quant_steps[qindex];
                        mant = v % steps;
                        v = v / steps;
                        s->sb_samples[0][k * 12 + l + 0][i] =
                            l2_unscale_group(steps, mant, scale0);
                        s->sb_samples[1][k * 12 + l + 0][i] =
                            l2_unscale_group(steps, mant, scale1);
                        mant = v % steps;
                        v = v / steps;
                        s->sb_samples[0][k * 12 + l + 1][i] =
                            l2_unscale_group(steps, mant, scale0);
                        s->sb_samples[1][k * 12 + l + 1][i] =
                            l2_unscale_group(steps, mant, scale1);
                        s->sb_samples[0][k * 12 + l + 2][i] =
                            l2_unscale_group(steps, v, scale0);
                        s->sb_samples[1][k * 12 + l + 2][i] =
                            l2_unscale_group(steps, v, scale1);
                    } else {
                        for (m = 0; m < 3; m++) {
                            mant = get_bits(&s->gb, bits);
                            s->sb_samples[0][k * 12 + l + m][i] =
                                l1_unscale(bits - 1, mant, scale0);
                            s->sb_samples[1][k * 12 + l + m][i] =
                                l1_unscale(bits - 1, mant, scale1);
                        }
                    }
                } else {
                    s->sb_samples[0][k * 12 + l + 0][i] = 0;
                    s->sb_samples[0][k * 12 + l + 1][i] = 0;
                    s->sb_samples[0][k * 12 + l + 2][i] = 0;
                    s->sb_samples[1][k * 12 + l + 0][i] = 0;
                    s->sb_samples[1][k * 12 + l + 1][i] = 0;
                    s->sb_samples[1][k * 12 + l + 2][i] = 0;
                }
                /* next subband in alloc table */
                j += 1 << bit_alloc_bits;
            }
            /* fill remaining samples to zero */
            for (i = sblimit; i < SBLIMIT; i++) {
                for (ch = 0; ch < s->nb_channels; ch++) {
                    s->sb_samples[ch][k * 12 + l + 0][i] = 0;
                    s->sb_samples[ch][k * 12 + l + 1][i] = 0;
                    s->sb_samples[ch][k * 12 + l + 2][i] = 0;
                }
            }
        }
    }
    return 3 * 12;
}

#define SPLIT(dst,sf,n)             \
    if (n == 3) {                   \
        int m = (sf * 171) >> 9;    \
        dst   = sf - 3 * m;         \
        sf    = m;                  \
    } else if (n == 4) {            \
        dst  = sf & 3;              \
        sf >>= 2;                   \
    } else if (n == 5) {            \
        int m = (sf * 205) >> 10;   \
        dst   = sf - 5 * m;         \
        sf    = m;                  \
    } else if (n == 6) {            \
        int m = (sf * 171) >> 10;   \
        dst   = sf - 6 * m;         \
        sf    = m;                  \
    } else {                        \
        dst = 0;                    \
    }

static av_always_inline void lsf_sf_expand(int *slen, int sf, int n1, int n2,
                                           int n3)
{
#ifdef FF_V0_4_9_MATH
    if (n3) {
        slen[3] = sf % n3;
        sf /= n3;
    } else {
        slen[3] = 0;
    }
    if (n2) {
        slen[2] = sf % n2;
        sf /= n2;
    } else {
        slen[2] = 0;
    }
    slen[1] = sf % n1;
    sf /= n1;
#else
    SPLIT(slen[3], sf, n3)
    SPLIT(slen[2], sf, n2)
    SPLIT(slen[1], sf, n1)
#endif
    slen[0] = sf;
}

static void exponents_from_scale_factors(MPADecodeContext *s, GranuleDef *g,
                                         int16_t *exponents)
{
    const uint8_t *bstab, *pretab;
    int len, i, j, k, l, v0, shift, gain, gains[3];
    int16_t *exp_ptr;

    exp_ptr = exponents;
    gain    = g->global_gain - 210;
    shift   = g->scalefac_scale + 1;

    bstab  = band_size_long[s->sample_rate_index];
    pretab = mpa_pretab[g->preflag];
    for (i = 0; i < g->long_end; i++) {
        v0 = gain - ((g->scale_factors[i] + pretab[i]) << shift) + 400;
        len = bstab[i];
        for (j = len; j > 0; j--)
            *exp_ptr++ = v0;
    }

    if (g->short_start < 13) {
        bstab    = band_size_short[s->sample_rate_index];
        gains[0] = gain - (g->subblock_gain[0] << 3);
        gains[1] = gain - (g->subblock_gain[1] << 3);
        gains[2] = gain - (g->subblock_gain[2] << 3);
        k        = g->long_end;
        for (i = g->short_start; i < 13; i++) {
            len = bstab[i];
            for (l = 0; l < 3; l++) {
                v0 = gains[l] - (g->scale_factors[k++] << shift) + 400;
                for (j = len; j > 0; j--)
                    *exp_ptr++ = v0;
            }
        }
    }
}

static void switch_buffer(MPADecodeContext *s, int *pos, int *end_pos,
                          int *end_pos2)
{
    if (s->in_gb.buffer && *pos >= s->gb.size_in_bits 
#ifndef FF_V0_4_9_EXTRASIZE
        - s->extrasize * 8
#endif
        ) {
        s->gb           = s->in_gb;
        s->in_gb.buffer = NULL;
#ifndef FF_V0_4_9_EXTRASIZE
        s->extrasize    = 0;
#endif
        av_assert2((get_bits_count(&s->gb) & 7) == 0);
        skip_bits_long(&s->gb, *pos - *end_pos);
        *end_pos2 =
        *end_pos  = *end_pos2 + get_bits_count(&s->gb) - *pos;
        *pos      = get_bits_count(&s->gb);
    }
}

/* Following is an optimized code for
            INTFLOAT v = *src
            if(get_bits1(&s->gb))
                v = -v;
            *dst = v;
*/
#if USE_FLOATS
#define READ_FLIP_SIGN(dst,src)                     \
    v = AV_RN32A(src) ^ (get_bits1(&s->gb) << 31);  \
    AV_WN32A(dst, v);
#else
#define READ_FLIP_SIGN(dst,src)     \
    v      = -get_bits1(&s->gb);    \
    *(dst) = (*(src) ^ v) - v;
#endif

static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
                          int16_t *exponents, int end_pos2)
{
    int s_index;
    int i;
    int last_pos, bits_left;
    VLC *vlc;
    int end_pos = FFMIN(end_pos2, s->gb.size_in_bits 
#ifndef FF_V0_4_9_EXTRASIZE
        - s->extrasize * 8
#endif
        );

    /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&end_pos2, sizeof (end_pos2), 1); ---*/ /* testing!!! */
    /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&s->gb.size_in_bits, sizeof (s->gb.size_in_bits), 1); ---*/ /* testing!!! */
    /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&end_pos, sizeof (end_pos), 1); ---*/ /* testing!!! */
    /*--- AV_LOG_SAMPLE_DUMP (3, (unsigned char *)expval_table_fixed, sizeof (expval_table_fixed), 1); ---*/ /* testing!!! */

    /* low frequencies (called big values) */
    s_index = 0;
    for (i = 0; i < 3; i++) {
        int j, k, l, linbits;
        j = g->region_size[i];
        if (j == 0)
            continue;
        /* select vlc table */
        k       = g->table_select[i];
        l       = mpa_huff_data[k][0];
        linbits = mpa_huff_data[k][1];
        vlc     = &huff_vlc[l];
        /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&k, sizeof (k), 1); ---*/ /* testing!!! */
        /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&l, sizeof (l), 1); ---*/ /* testing!!! */
        /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&linbits, sizeof (linbits), 1); ---*/ /* testing!!! */

        if (!l) {
            memset(&g->sb_hybrid[s_index], 0, sizeof(*g->sb_hybrid) * 2 * j);
            s_index += 2 * j;
            continue;
        }

        /* read huffcode and compute each couple */
        for (; j > 0; j--) {
            int exponent, x, y;
            int v;
            int pos = get_bits_count(&s->gb);

        /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&pos, sizeof (pos), 1); ---*/ /* testing!!! */
            if (pos >= end_pos){
                switch_buffer(s, &pos, &end_pos, &end_pos2);
                if (pos >= end_pos)
                    break;
            }
            y = get_vlc2(&s->gb, vlc->table, 7, 3);
        /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&y, sizeof (y), 1); ---*/ /* testing!!! */

            if (!y) {
                g->sb_hybrid[s_index  ] =
                g->sb_hybrid[s_index+1] = 0;
                s_index += 2;
                continue;
            }

            exponent= exponents[s_index];

#ifndef AVM_FFMPEG_SILENT
            ff_dlog(s->avctx, "region=%d n=%d x=%d y=%d exp=%d\n",
                    i, g->region_size[i] - j, x, y, exponent);
#endif
            if (y & 16) {
                x = y >> 5;
                y = y & 0x0f;
                if (x < 15) {
                    READ_FLIP_SIGN(g->sb_hybrid + s_index, RENAME(expval_table)[exponent] + x)
                } else {
                    x += get_bitsz(&s->gb, linbits);
                    v  = l3_unscale(x, exponent);
                    if (get_bits1(&s->gb))
                        v = -v;
                    g->sb_hybrid[s_index] = v;
                }
                if (y < 15) {
                    READ_FLIP_SIGN(g->sb_hybrid + s_index + 1, RENAME(expval_table)[exponent] + y)
                } else {
                    y += get_bitsz(&s->gb, linbits);
                    v  = l3_unscale(y, exponent);
                    if (get_bits1(&s->gb))
                        v = -v;
                    g->sb_hybrid[s_index+1] = v;
                }
            } else {
                x = y >> 5;
                y = y & 0x0f;
                x += y;
                if (x < 15) {
                    READ_FLIP_SIGN(g->sb_hybrid + s_index + !!y, RENAME(expval_table)[exponent] + x)
                } else {
                    x += get_bitsz(&s->gb, linbits);
                    v  = l3_unscale(x, exponent);
                    if (get_bits1(&s->gb))
                        v = -v;
                    g->sb_hybrid[s_index+!!y] = v;
                }
                g->sb_hybrid[s_index + !y] = 0;
            }
            s_index += 2;
        }
    }
    /*--- AV_LOG_SAMPLE_DUMP(6, (unsigned char *)g->sb_hybrid, sizeof (uint32_t), 576); ---*/

    /* high frequencies */
    vlc = &huff_quad_vlc[g->count1table_select];
    last_pos = 0;
    while (s_index <= 572) {
        int pos, code;
        pos = get_bits_count(&s->gb);
        if (pos >= end_pos) {
            if (pos > end_pos2 && last_pos) {
                /* some encoders generate an incorrect size for this
                   part. We must go back into the data */
                s_index -= 4;
                skip_bits_long(&s->gb, last_pos - pos);
                av_log(s->avctx, AV_LOG_INFO, "overread, skip %d enddists: %d %d", last_pos - pos, end_pos-pos, end_pos2-pos);
                if(s->err_recognition & (AV_EF_BITSTREAM|AV_EF_COMPLIANT))
                    s_index=0;
                break;
            }
            switch_buffer(s, &pos, &end_pos, &end_pos2);
            if (pos >= end_pos)
                break;
        }
        last_pos = pos;

        code = get_vlc2(&s->gb, vlc->table, vlc->bits, 1);
#ifndef AVM_FFMPEG_SILENT
        ff_dlog(s->avctx, "t=%d code=%d\n", g->count1table_select, code);
#endif
        g->sb_hybrid[s_index+0] =
        g->sb_hybrid[s_index+1] =
        g->sb_hybrid[s_index+2] =
        g->sb_hybrid[s_index+3] = 0;
        while (code) {
            static const int idxtab[16] = { 3,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0 };
            int v;

            pos = s_index + idxtab[code];
            code   ^= 8 >> idxtab[code];
            READ_FLIP_SIGN(g->sb_hybrid + pos, RENAME(exp_table)+exponents[pos])
        }
        s_index += 4;
    }
    /*--- AV_LOG_SAMPLE_DUMP(6, (unsigned char *)g->sb_hybrid, sizeof (uint32_t), 576); ---*/
    /* skip extension bits */
    bits_left = end_pos2 - get_bits_count(&s->gb);
    if (bits_left < 0 && (s->err_recognition & (AV_EF_BUFFER|AV_EF_COMPLIANT))) {
        av_log(s->avctx, AV_LOG_ERROR, "bits_left=%d", bits_left);
        s_index=0;
    } else if (bits_left > 0 && (s->err_recognition & (AV_EF_BUFFER|AV_EF_AGGRESSIVE))) {
        av_log(s->avctx, AV_LOG_ERROR, "bits_left=%d", bits_left);
        s_index = 0;
    }
    memset(&g->sb_hybrid[s_index], 0, sizeof(*g->sb_hybrid) * (576 - s_index));
    skip_bits_long(&s->gb, bits_left);

    i = get_bits_count(&s->gb);
    switch_buffer(s, &i, &end_pos, &end_pos2);

    return 0;
}

/* Reorder short blocks from bitstream order to interleaved order. It
   would be faster to do it in parsing, but the code would be far more
   complicated */
static void reorder_block(MPADecodeContext *s, GranuleDef *g)
{
    int i, j, len;
    INTFLOAT *ptr, *dst, *ptr1;
    INTFLOAT tmp[576];

    if (g->block_type != 2)
        return;

    if (g->switch_point) {
        if (s->sample_rate_index != 8)
            ptr = g->sb_hybrid + 36;
        else
            ptr = g->sb_hybrid + 72;
    } else {
        ptr = g->sb_hybrid;
    }

    for (i = g->short_start; i < 13; i++) {
        len  = band_size_short[s->sample_rate_index][i];
        ptr1 = ptr;
        dst  = tmp;
        for (j = len; j > 0; j--) {
            *dst++ = ptr[0*len];
            *dst++ = ptr[1*len];
            *dst++ = ptr[2*len];
            ptr++;
        }
        ptr += 2 * len;
        memcpy(ptr1, tmp, len * 3 * sizeof(*ptr1));
    }
}

#define ISQRT2 FIXR(0.70710678118654752440)

static void compute_stereo(MPADecodeContext *s, GranuleDef *g0, GranuleDef *g1)
{
    int i, j, k, l;
    int sf_max, sf, len, non_zero_found;
    INTFLOAT (*is_tab)[16], *tab0, *tab1, tmp0, tmp1, v1, v2;
    int non_zero_found_short[3];

    /* intensity stereo */
    if (s->mode_ext & MODE_EXT_I_STEREO) {
        if (!s->lsf) {
            is_tab = is_table;
            sf_max = 7;
        } else {
            is_tab = is_table_lsf[g1->scalefac_compress & 1];
            sf_max = 16;
        }

        tab0 = g0->sb_hybrid + 576;
        tab1 = g1->sb_hybrid + 576;

        non_zero_found_short[0] = 0;
        non_zero_found_short[1] = 0;
        non_zero_found_short[2] = 0;
        k = (13 - g1->short_start) * 3 + g1->long_end - 3;
        for (i = 12; i >= g1->short_start; i--) {
            /* for last band, use previous scale factor */
            if (i != 11)
                k -= 3;
            len = band_size_short[s->sample_rate_index][i];
            for (l = 2; l >= 0; l--) {
                tab0 -= len;
                tab1 -= len;
                if (!non_zero_found_short[l]) {
                    /* test if non zero band. if so, stop doing i-stereo */
                    for (j = 0; j < len; j++) {
                        if (tab1[j] != 0) {
                            non_zero_found_short[l] = 1;
                            goto found1;
                        }
                    }
                    sf = g1->scale_factors[k + l];
                    if (sf >= sf_max)
                        goto found1;

                    v1 = is_tab[0][sf];
                    v2 = is_tab[1][sf];
                    for (j = 0; j < len; j++) {
                        tmp0    = tab0[j];
                        tab0[j] = MULLx(tmp0, v1, FRAC_BITS);
                        tab1[j] = MULLx(tmp0, v2, FRAC_BITS);
                    }
                } else {
found1:
                    if (s->mode_ext & MODE_EXT_MS_STEREO) {
                        /* lower part of the spectrum : do ms stereo
                           if enabled */
                        for (j = 0; j < len; j++) {
                            tmp0    = tab0[j];
                            tmp1    = tab1[j];
                            tab0[j] = MULLx(tmp0 + tmp1, ISQRT2, FRAC_BITS);
                            tab1[j] = MULLx(tmp0 - tmp1, ISQRT2, FRAC_BITS);
                        }
                    }
                }
            }
        }

        non_zero_found = non_zero_found_short[0] |
                         non_zero_found_short[1] |
                         non_zero_found_short[2];

        for (i = g1->long_end - 1;i >= 0;i--) {
            len   = band_size_long[s->sample_rate_index][i];
            tab0 -= len;
            tab1 -= len;
            /* test if non zero band. if so, stop doing i-stereo */
            if (!non_zero_found) {
                for (j = 0; j < len; j++) {
                    if (tab1[j] != 0) {
                        non_zero_found = 1;
                        goto found2;
                    }
                }
                /* for last band, use previous scale factor */
                k  = (i == 21) ? 20 : i;
                sf = g1->scale_factors[k];
                if (sf >= sf_max)
                    goto found2;
                v1 = is_tab[0][sf];
                v2 = is_tab[1][sf];
                for (j = 0; j < len; j++) {
                    tmp0    = tab0[j];
                    tab0[j] = MULLx(tmp0, v1, FRAC_BITS);
                    tab1[j] = MULLx(tmp0, v2, FRAC_BITS);
                }
            } else {
found2:
                if (s->mode_ext & MODE_EXT_MS_STEREO) {
                    /* lower part of the spectrum : do ms stereo
                       if enabled */
                    for (j = 0; j < len; j++) {
                        tmp0    = tab0[j];
                        tmp1    = tab1[j];
                        tab0[j] = MULLx(tmp0 + tmp1, ISQRT2, FRAC_BITS);
                        tab1[j] = MULLx(tmp0 - tmp1, ISQRT2, FRAC_BITS);
                    }
                }
            }
        }
    } else if (s->mode_ext & MODE_EXT_MS_STEREO) {
        /* ms stereo ONLY */
        /* NOTE: the 1/sqrt(2) normalization factor is included in the
           global gain */
#if USE_FLOATS
       s->fdsp->butterflies_float(g0->sb_hybrid, g1->sb_hybrid, 576);
#else
        tab0 = g0->sb_hybrid;
        tab1 = g1->sb_hybrid;
        for (i = 0; i < 576; i++) {
            tmp0    = tab0[i];
            tmp1    = tab1[i];
            tab0[i] = tmp0 + tmp1;
            tab1[i] = tmp0 - tmp1;
        }
#endif
    }
}

#if USE_FLOATS
#if HAVE_MIPSFPU
#   include "mips/compute_antialias_float.h"
#endif /* HAVE_MIPSFPU */
#else
#if HAVE_MIPSDSP
#   include "mips/compute_antialias_fixed.h"
#endif /* HAVE_MIPSDSP */
#endif /* USE_FLOATS */

#ifndef compute_antialias
#if USE_FLOATS
#define AA(j) do {                                                      \
        float tmp0 = ptr[-1-j];                                         \
        float tmp1 = ptr[   j];                                         \
        ptr[-1-j] = tmp0 * csa_table[j][0] - tmp1 * csa_table[j][1];    \
        ptr[   j] = tmp0 * csa_table[j][1] + tmp1 * csa_table[j][0];    \
    } while (0)
#else
#define AA(j) do {                                              \
        int tmp0 = ptr[-1-j];                                   \
        int tmp1 = ptr[   j];                                   \
        int tmp2 = MULH(tmp0 + tmp1, csa_table[j][0]);          \
        ptr[-1-j] = 4 * (tmp2 - MULH(tmp1, csa_table[j][2]));   \
        ptr[   j] = 4 * (tmp2 + MULH(tmp0, csa_table[j][3]));   \
    } while (0)
#endif

static void compute_antialias(MPADecodeContext *s, GranuleDef *g)
{
    INTFLOAT *ptr;
    int n, i;

    /* we antialias only "long" bands */
    if (g->block_type == 2) {
        if (!g->switch_point)
            return;
        /* XXX: check this for 8000Hz case */
        n = 1;
    } else {
        n = SBLIMIT - 1;
    }

    ptr = g->sb_hybrid + 18;
    for (i = n; i > 0; i--) {
        AA(0);
        AA(1);
        AA(2);
        AA(3);
        AA(4);
        AA(5);
        AA(6);
        AA(7);

        ptr += 18;
    }
}
#endif /* compute_antialias */

static void compute_imdct(MPADecodeContext *s, GranuleDef *g,
                          INTFLOAT *sb_samples, INTFLOAT *mdct_buf)
{
    INTFLOAT *win, *out_ptr, *ptr, *buf, *ptr1;
    INTFLOAT out2[12];
    int i, j, mdct_long_end, sblimit;

    /* find last non zero block */
    ptr  = g->sb_hybrid + 576;
    ptr1 = g->sb_hybrid + 2 * 18;
    while (ptr >= ptr1) {
        int32_t *p;
        ptr -= 6;
        p    = (int32_t*)ptr;
        if (p[0] | p[1] | p[2] | p[3] | p[4] | p[5])
            break;
    }
    sblimit = ((ptr - g->sb_hybrid) / 18) + 1;

    if (g->block_type == 2) {
        /* XXX: check for 8000 Hz */
        if (g->switch_point)
            mdct_long_end = 2;
        else
            mdct_long_end = 0;
    } else {
        mdct_long_end = sblimit;
    }
    /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&g->block_type, sizeof (g->block_type), 1); ---*/  /* testing!!! */
    /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&mdct_long_end, sizeof (mdct_long_end), 1); ---*/  /* testing!!! */

#ifdef FF_V0_4_9_IMDCT
    buf = mdct_buf;
    ptr = g->sb_hybrid;
    for(j=0;j<mdct_long_end;j++) {
        int *win1;

        /* apply window & overlap with previous buffer */
        out_ptr = sb_samples + j;
        /* select window */
        if (g->switch_point && j < 2)
            win1 = ff_mdct_win_fixed[0];
        else
            win1 = ff_mdct_win_fixed[g->block_type];
        /* select frequency inversion */
        win = win1 + ((4 * 36) & -(j & 1));
        ff_imdct36_fixed(out_ptr, buf, ptr, win);
        out_ptr += 18*SBLIMIT;
        ptr += 18;
        buf += 18;
    }
#else
    s->mpadsp.RENAME(imdct36_blocks)(sb_samples, mdct_buf, g->sb_hybrid,
                                     mdct_long_end, g->switch_point,
                                     g->block_type);

    buf = mdct_buf + 4*18*(mdct_long_end >> 2) + (mdct_long_end & 3);
    ptr = g->sb_hybrid + 18 * mdct_long_end;
#endif

    for (j = mdct_long_end; j < sblimit; j++) {
        /* select frequency inversion */

#ifdef FF_V0_4_9_IMDCT
        win = ff_mdct_win_fixed[2] + ((4 * 36) & -(j & 1));
#else
        win     = RENAME(ff_mdct_win)[2 + (4  & -(j & 1))];
#endif
        out_ptr = sb_samples + j;

        for (i = 0; i < 6; i++) {
#ifdef FF_V0_4_9_IMDCT
            *out_ptr = buf[i];
#else
            *out_ptr = buf[4*i];
#endif
            out_ptr += SBLIMIT;
        }
        imdct12(out2, ptr + 0);
        for (i = 0; i < 6; i++) {
#ifdef FF_V0_4_9_IMDCT
            *out_ptr = MULH3(out2[i], win[i], 1) + buf[i + 6*1];
            buf[i + 6*2] = MULH3(out2[i + 6], win[i + 6], 1);
#else
            *out_ptr     = MULH3(out2[i    ], win[i    ], 1) + buf[4*(i + 6*1)];
            buf[4*(i + 6*2)] = MULH3(out2[i + 6], win[i + 6], 1);
#endif
            out_ptr += SBLIMIT;
        }
        imdct12(out2, ptr + 1);
        for (i = 0; i < 6; i++) {
#ifdef FF_V0_4_9_IMDCT
            *out_ptr = MULH3(out2[i], win[i], 1) + buf[i + 6*2];
            buf[i + 6*0] = MULH3(out2[i + 6], win[i + 6], 1);
#else
            *out_ptr     = MULH3(out2[i    ], win[i    ], 1) + buf[4*(i + 6*2)];
            buf[4*(i + 6*0)] = MULH3(out2[i + 6], win[i + 6], 1);
#endif
            out_ptr += SBLIMIT;
        }
        imdct12(out2, ptr + 2);
        for (i = 0; i < 6; i++) {
#ifdef FF_V0_4_9_IMDCT
            buf[i + 6*0] = MULH3(out2[i], win[i], 1) + buf[i + 6*0];
            buf[i + 6*1] = MULH3(out2[i + 6], win[i + 6], 1);
            buf[i + 6*2] = 0;
#else
            buf[4*(i + 6*0)] = MULH3(out2[i    ], win[i    ], 1) + buf[4*(i + 6*0)];
            buf[4*(i + 6*1)] = MULH3(out2[i + 6], win[i + 6], 1);
            buf[4*(i + 6*2)] = 0;
#endif
        }
        ptr += 18;
#ifdef FF_V0_4_9_IMDCT
        buf += 18;
#else
        buf += (j&3) != 3 ? 1 : (4*18-3);
#endif
    }
    /* zero bands */
    for (j = sblimit; j < SBLIMIT; j++) {
        /* overlap */
        out_ptr = sb_samples + j;
        for (i = 0; i < 18; i++) {
#ifdef FF_V0_4_9_IMDCT
            *out_ptr = buf[i];
            buf[i] = 0;
#else
            *out_ptr = buf[4*i];
            buf[4*i]   = 0;
#endif
            out_ptr += SBLIMIT;
        }
#ifdef FF_V0_4_9_IMDCT
        buf += 18;
#else
        buf += (j&3) != 3 ? 1 : (4*18-3);
#endif
    }
}

/* main layer3 decoding function */
static int mp_decode_layer3(MPADecodeContext *s)
{
    int nb_granules, main_data_begin;
    int gr, ch, blocksplit_flag, i, j, k, n, bits_pos;
    GranuleDef *g;
    int16_t exponents[576]; //FIXME try INTFLOAT

    /* read side info */
    if (s->lsf) {
        main_data_begin = get_bits(&s->gb, 8);
        skip_bits(&s->gb, s->nb_channels);
        nb_granules = 1;
    } else {
        main_data_begin = get_bits(&s->gb, 9);
        if (s->nb_channels == 2) {
            skip_bits(&s->gb, 3);
        } else {
            skip_bits(&s->gb, 5);
        }
        nb_granules = 2;
        for (ch = 0; ch < s->nb_channels; ch++) {
            s->granules[ch][0].scfsi = 0;/* all scale factors are transmitted */
            s->granules[ch][1].scfsi = get_bits(&s->gb, 4);
        }
    }

    for (gr = 0; gr < nb_granules; gr++) {
        for (ch = 0; ch < s->nb_channels; ch++) {
#ifndef AVM_FFMPEG_SILENT
            ff_dlog(s->avctx, "gr=%d ch=%d: side_info\n", gr, ch);
#endif /* !AVM_FFMPEG_SILENT */
            g = &s->granules[ch][gr];
            g->part2_3_length = get_bits(&s->gb, 12);
            g->big_values     = get_bits(&s->gb,  9);
            if (g->big_values > 288) {
                av_log(s->avctx, AV_LOG_ERROR, "big_values too big\n");
                return AVERROR_INVALIDDATA;
            }

            g->global_gain = get_bits(&s->gb, 8);
            /* if MS stereo only is selected, we precompute the
               1/sqrt(2) renormalization factor */
            if ((s->mode_ext & (MODE_EXT_MS_STEREO | MODE_EXT_I_STEREO)) ==
                MODE_EXT_MS_STEREO)
                g->global_gain -= 2;
            if (s->lsf)
                g->scalefac_compress = get_bits(&s->gb, 9);
            else
                g->scalefac_compress = get_bits(&s->gb, 4);
            blocksplit_flag = get_bits1(&s->gb);
            if (blocksplit_flag) {
                g->block_type = get_bits(&s->gb, 2);
                if (g->block_type == 0) {
                    av_log(s->avctx, AV_LOG_ERROR, "invalid block type\n");
                    return AVERROR_INVALIDDATA;
                }
                g->switch_point = get_bits1(&s->gb);
                for (i = 0; i < 2; i++)
                    g->table_select[i] = get_bits(&s->gb, 5);
                for (i = 0; i < 3; i++)
                    g->subblock_gain[i] = get_bits(&s->gb, 3);
                init_short_region(s, g);
            } else {
                int region_address1, region_address2;
                g->block_type = 0;
                g->switch_point = 0;
                for (i = 0; i < 3; i++)
                    g->table_select[i] = get_bits(&s->gb, 5);
                /* compute huffman coded region sizes */
                region_address1 = get_bits(&s->gb, 4);
                region_address2 = get_bits(&s->gb, 3);
#ifndef AVM_FFMPEG_SILENT
                ff_dlog(s->avctx, "region1=%d region2=%d\n",
                        region_address1, region_address2);
#endif
                init_long_region(s, g, region_address1, region_address2);
            }
            region_offset2size(g);
            compute_band_indexes(s, g);

            g->preflag = 0;
            if (!s->lsf)
                g->preflag = get_bits1(&s->gb);
            g->scalefac_scale     = get_bits1(&s->gb);
            g->count1table_select = get_bits1(&s->gb);
            ff_dlog(s->avctx, "block_type=%d switch_point=%d\n",
                    g->block_type, g->switch_point);
        }
    }

    /*--- AV_LOG_SAMPLE_DUMP (0, (unsigned char *)&s->adu_mode, sizeof (s->adu_mode), 1); ---*/  /* testing!!! */
    if (!s->adu_mode) {
#ifndef FF_V0_4_9_EXTRASIZE
        int skip;
#endif

        const uint8_t *ptr = s->gb.buffer + (get_bits_count(&s->gb)>>3);
#ifndef FF_V0_4_9_EXTRASIZE
        s->extrasize = av_clip((get_bits_left(&s->gb) >> 3) - s->extrasize, 0,
                               FFMAX(0, LAST_BUF_SIZE - s->last_buf_size));
        av_assert1((get_bits_count(&s->gb) & 7) == 0);
        /* now we get bits from the main_data_begin offset */
        ff_dlog(s->avctx, "seekback:%d, lastbuf:%d\n",
                main_data_begin, s->last_buf_size);
#endif

        memcpy(s->last_buf + s->last_buf_size, ptr,
#ifdef FF_V0_4_9_EXTRASIZE
            EXTRABYTES 
#else
            s->extrasize
#endif
            );
        s->in_gb = s->gb;
        init_get_bits(&s->gb, s->last_buf, (s->last_buf_size
#ifndef FF_V0_4_9_EXTRASIZE
            + s->extrasize
#endif
            ) * 8);
        /*--- AV_LOG_SAMPLE_DUMP(0, (unsigned char *)&s->last_buf_size, sizeof (s->last_buf_size), 1); ---*/  /* testing!!! */
        /*--- AV_LOG_SAMPLE_DUMP(0, s->last_buf, 1, s->last_buf_size); ---*/  /* testing!!! */
#ifdef FF_V0_4_9_EXTRASIZE
        skip_bits_long(&s->gb, 8*(s->last_buf_size - main_data_begin));
#else
        s->last_buf_size <<= 3;
        for (gr = 0; gr < nb_granules && (s->last_buf_size >> 3) < main_data_begin; gr++) {
            for (ch = 0; ch < s->nb_channels; ch++) {
                g = &s->granules[ch][gr];
                s->last_buf_size += g->part2_3_length;
                memset(g->sb_hybrid, 0, sizeof(g->sb_hybrid));
                compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]);
            }
        }
        skip = s->last_buf_size - 8 * main_data_begin;
        if (skip >= s->gb.size_in_bits - s->extrasize * 8 && s->in_gb.buffer) {
            skip_bits_long(&s->in_gb, skip - s->gb.size_in_bits + s->extrasize * 8);
            s->gb           = s->in_gb;
            s->in_gb.buffer = NULL;
            s->extrasize    = 0;
        } else {
            skip_bits_long(&s->gb, skip);
        }
#endif
    } else {
        gr = 0;
        s->extrasize = 0;
    }

    for (
#ifdef FF_V0_4_9_EXTRASIZE
         gr=0
#endif
             ; gr < nb_granules; gr++) {
        for (ch = 0; ch < s->nb_channels; ch++) {
            g = &s->granules[ch][gr];
            /*--- AV_LOG_SAMPLE_DUMP(0, (unsigned char *)&s->lsf, sizeof (s->lsf), 1); ---*/
            /*--- AV_LOG_SAMPLE_DUMP(0, (unsigned char *)&g->block_type, sizeof (g->block_type), 1); ---*/
            /*--- AV_LOG_SAMPLE_DUMP(0, (unsigned char *)g, sizeof (GranuleDef ), 1); ---*/
            bits_pos = get_bits_count(&s->gb);
            /*--- AV_LOG_SAMPLE_DUMP(6, (unsigned char *)&bits_pos, sizeof (bits_pos), 1); ---*/  /* testing!!! */
            /*--- AV_LOG_SAMPLE_DUMP(6, (unsigned char *)&g->part2_3_length, sizeof (g->part2_3_length), 1); ---*/  /* testing!!! */
#ifdef FF_V0_4_9_EXTRASIZE
            if (bits_pos < 0) {
/*== AVM/JBR 20080522 AVM_FFMPEG_SILENT ==*/
                av_log(NULL, AV_LOG_ERROR, "mdb:%d, lastbuf:%d skipping granule %d\n",
                                            main_data_begin, s->last_buf_size, gr);
                skip_bits_long(&s->gb, g->part2_3_length);
                memset(g->sb_hybrid, 0, sizeof(g->sb_hybrid));
                if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->in_gb.buffer){
                    skip_bits_long(&s->in_gb, get_bits_count(&s->gb) - s->gb.size_in_bits);
                    s->gb= s->in_gb;
                    s->in_gb.buffer=NULL;
                }
                continue;
            }
#endif

            if (!s->lsf) {
                uint8_t *sc;
                int slen, slen1, slen2;

                /* MPEG-1 scale factors */
                slen1 = slen_table[0][g->scalefac_compress];
                slen2 = slen_table[1][g->scalefac_compress];
                ff_dlog(s->avctx, "slen1=%d slen2=%d\n", slen1, slen2);
                if (g->block_type == 2) {
                    n = g->switch_point ? 17 : 18;
                    j = 0;
                    if (slen1) {
                        for (i = 0; i < n; i++) {
                            g->scale_factors[j++] = get_bits(&s->gb, slen1);
                        }
                    } else {
                        for (i = 0; i < n; i++) {
                            g->scale_factors[j++] = 0;
                        }
                    }
                    if (slen2) {
                        for (i = 0; i < 18; i++) {
                            g->scale_factors[j++] = get_bits(&s->gb, slen2);
                        }
                        for (i = 0; i < 3; i++) {
                            g->scale_factors[j++] = 0;
                        }
                    } else {
                        for (i = 0; i < 21; i++) {
                            g->scale_factors[j++] = 0;
                        }
                    }
                } else {
                    sc = s->granules[ch][0].scale_factors;
                    j = 0;
                    for (k = 0; k < 4; k++) {
                        n = k == 0 ? 6 : 5;
                        if ((g->scfsi & (0x8 >> k)) == 0) {
                            slen = (k < 2) ? slen1 : slen2;
                            if (slen) {
                                for (i = 0; i < n; i++) {
                                    g->scale_factors[j++] = get_bits(&s->gb, slen);
                                }
                            } else {
                                for (i = 0; i < n; i++) {
                                    g->scale_factors[j++] = 0;
                                }
                            }
                        } else {
                            /* simply copy from last granule */
                            for (i = 0; i < n; i++, j++) {
                                g->scale_factors[j] = sc[j];
                            }
                        }
                    }
                    g->scale_factors[j++] = 0;
                }
            } else {
                int tindex, tindex2, slen[4], sl, sf;

                /* LSF scale factors */
                if (g->block_type == 2)
                    tindex = g->switch_point ? 2 : 1;
                else
                    tindex = 0;

                sf = g->scalefac_compress;
                if ((s->mode_ext & MODE_EXT_I_STEREO) && ch == 1) {
                    /* intensity stereo case */
                    sf >>= 1;
                    if (sf < 180) {
                        lsf_sf_expand(slen, sf, 6, 6, 0);
                        tindex2 = 3;
                    } else if (sf < 244) {
                        lsf_sf_expand(slen, sf - 180, 4, 4, 0);
                        tindex2 = 4;
                    } else {
                        lsf_sf_expand(slen, sf - 244, 3, 0, 0);
                        tindex2 = 5;
                    }
                } else {
                    /* normal case */
                    if (sf < 400) {
                        lsf_sf_expand(slen, sf, 5, 4, 4);
                        tindex2 = 0;
                    } else if (sf < 500) {
                        lsf_sf_expand(slen, sf - 400, 5, 4, 0);
                        tindex2 = 1;
                    } else {
                        lsf_sf_expand(slen, sf - 500, 3, 0, 0);
                        tindex2 = 2;
                        g->preflag = 1;
                    }
                }

                j = 0;
                for (k = 0; k < 4; k++) {
                    n  = lsf_nsf_table[tindex2][tindex][k];
                    sl = slen[k];
                    if (sl) {
                        for (i = 0; i < n; i++)
                            g->scale_factors[j++] = get_bits(&s->gb, sl);
                    } else {
                        for (i = 0; i < n; i++)
                            g->scale_factors[j++] = 0;
                    }
                }
                /* XXX: should compute exact size */
                for (; j < 40; j++)
                    g->scale_factors[j] = 0;
            }

            exponents_from_scale_factors(s, g, exponents);

            /* read Huffman coded residue */
            /*--- AV_LOG_SAMPLE_DUMP(1, (unsigned char*)exponents, sizeof (uint16_t), 576); ---*/
            /*--- AV_LOG_SAMPLE_DUMP(2, (unsigned char *)g, sizeof (GranuleDef ), 1); ---*/
            huffman_decode(s, g, exponents, bits_pos + g->part2_3_length);
            /*--- AV_LOG_SAMPLE_DUMP(4, (unsigned char *)g->sb_hybrid, sizeof (uint32_t), 576); ---*/  /* testing!!! */
            /*--- AV_LOG_SAMPLE_DUMP(6, (unsigned char *)g, sizeof (GranuleDef ), 1); ---*/
        } /* ch */

        if (s->mode == MPA_JSTEREO)
            compute_stereo(s, &s->granules[0][gr], &s->granules[1][gr]);

        for (ch = 0; ch < s->nb_channels; ch++) {
            g = &s->granules[ch][gr];

            reorder_block(s, g);
            compute_antialias(s, g);
            /*--- AV_LOG_SAMPLE_DUMP (5, (unsigned char *)g, sizeof (GranuleDef ), 1); ---*/
            compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]);
            /*--- AV_LOG_SAMPLE_DUMP (1, (unsigned char *)s->mdct_buf[ch], sizeof (uint32_t), 576); ---*/   /* testing!!! */
            /*--- AV_LOG_SAMPLE_DUMP (3, (unsigned char *)s->sb_samples[ch][18 * gr], sizeof (uint32_t), 576); ---*/    /* testing!!! */
        }
    } /* gr */
    if (get_bits_count(&s->gb) < 0)
        skip_bits_long(&s->gb, -get_bits_count(&s->gb));
    return nb_granules * 18;
}

static int mp_decode_frame(MPADecodeContext *s, OUT_INT **samples,
                           const uint8_t *buf, int buf_size)
{
    int i, nb_frames, ch, ret;
    OUT_INT *samples_ptr;

    init_get_bits(&s->gb, buf + HEADER_SIZE, (buf_size - HEADER_SIZE) * 8);

    /* skip error protection field */
    if (s->error_protection)
        skip_bits(&s->gb, 16);

    switch(s->layer) {
    case 1:
        s->avctx->frame_size = 384;
        nb_frames = mp_decode_layer1(s);
        break;
    case 2:
        s->avctx->frame_size = 1152;
        nb_frames = mp_decode_layer2(s);
        break;
    case 3:
        s->avctx->frame_size = s->lsf ? 576 : 1152;
        /* fall through */
    default:
        nb_frames = mp_decode_layer3(s);

        s->last_buf_size=0;
        if (s->in_gb.buffer) {
            align_get_bits(&s->gb);
            i = (get_bits_left(&s->gb) >> 3)
#ifndef FF_V0_4_9_EXTRASIZE
                 - s->extrasize
#endif
                 ;
            if (i >= 0 && i <= BACKSTEP_SIZE) {
                memmove(s->last_buf, s->gb.buffer + (get_bits_count(&s->gb)>>3), i);
                s->last_buf_size=i;
            } else {
                av_log(s->avctx, AV_LOG_ERROR, "invalid old backstep %d", i);
            }
#ifndef FF_V0_4_9_EXTRASIZE
            s->extrasize    = 0;
#endif
            s->gb           = s->in_gb;
            s->in_gb.buffer = NULL;
        }

        align_get_bits(&s->gb);
        av_assert1((get_bits_count(&s->gb) & 7) == 0);
        i = (get_bits_left(&s->gb) >> 3)
#ifndef FF_V0_4_9_EXTRASIZE
            - s->extrasize
#endif
            ;
        if (i < 0 || i > BACKSTEP_SIZE || nb_frames < 0) {
            if (i < 0)
                av_log(s->avctx, AV_LOG_ERROR, "invalid new backstep %d", i);
            i = FFMIN(BACKSTEP_SIZE, buf_size - HEADER_SIZE);
        }
        av_assert1(i <= buf_size - HEADER_SIZE && i >= 0);
        memcpy(s->last_buf + s->last_buf_size, s->gb.buffer + buf_size - HEADER_SIZE - i, i);
        s->last_buf_size += i;
    }

    if(nb_frames < 0)
        return nb_frames;

    /* get output buffer */
    if (!samples) {
        av_assert0(s->frame);
        s->frame->nb_samples = s->avctx->frame_size;
        if ((ret = ff_get_buffer(s->avctx, s->frame, 0)) < 0)
            return ret;
        samples = (OUT_INT **)s->frame->extended_data;
    }

    /* apply the synthesis filter */
    for (ch = 0; ch < s->nb_channels; ch++) {
        int sample_stride;
        if (s->avctx->sample_fmt == OUT_FMT_P) {
            samples_ptr   = samples[ch];
            sample_stride = 1;
        } else {
            samples_ptr   = samples[0] + ch;
            sample_stride = s->nb_channels;
        }
        /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&s->avctx->sample_fmt, sizeof (s->avctx->sample_fmt), 1); ---*/  /* testing!!! */
        /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&sample_stride, sizeof (sample_stride), 1); ---*/  /* testing!!! */
        /*--- AV_LOG_SAMPLE_DUMP (4, (unsigned char *)s->sb_samples[ch], sizeof (s->sb_samples[ch][0]), nb_frames); ---*/ /* testing!!! */
        for (i = 0; i < nb_frames; i++) {
            /*--- AV_LOG_SAMPLE_DUMP (6, (unsigned char *)&s->dither_state, sizeof (s->dither_state), 1); ---*/
            RENAME(ff_mpa_synth_filter)(&s->mpadsp, s->synth_buf[ch],
                                        &(s->synth_buf_offset[ch]),
                                        RENAME(ff_mpa_synth_window),
                                        &s->dither_state, samples_ptr,
                                        sample_stride, s->sb_samples[ch][i]);
            samples_ptr += 32 * sample_stride;
        }
    }
    s->frame_count++;
    /*--- AV_LOG_SAMPLE_DUMP (0, (unsigned char *)(samples[0]), sizeof (*samples_ptr), ---*/ 
        /*--- nb_frames * 32 * s->nb_channels); ---*/ /* testing!!! */

    return nb_frames * 32 * sizeof(OUT_INT) * s->nb_channels;
}

static int decode_frame(AVCodecContext * avctx, 
    void *data, int *got_frame_ptr, AVPacket *avpkt)
{
#ifdef AVM_FFMPEG_REMOVE_UNNEEDED
  /* The whole threading and framing for output buffer management does not fit to our application. 
     To my opinion it should be part of the application to care for threading (if necessary at 
     all) and should not be the focus of the decoder lib. The decoder lib should care about 
     mathematical aspects and related performance issues by correcting and optimizing math (as it 
     is done here also - thank you for this). Because this specific math for codec xyz is something
     the ordinary software engineer might not be very familiar with. 
     In the end I do not want unnecessary copying and buffer management for simple mp3 decoding 
     and simple non planar output. By the way: why is non planar output not the default? It took 
     me a whole week to understand that decoding problems when porting from a very early 0.4.9 
     version of ffmpeg to this one are due to this change of policy.
     I also think it is no good idea to change interface getting incompatible every few years or 
     so, because changing application structure is something that might be too expensive. */
    OUT_INT *out_samples = data;
#endif
    const uint8_t *buf  = avpkt->data;
    int buf_size        = avpkt->size;
    MPADecodeContext *s = avctx->priv_data;
    uint32_t header;
    int ret;
    int skipped = 0;
#ifdef AVM_FFMPEG_SYNC_LOOKAHEAD
    int __buf_size = 0;

retry:
#endif /* AVM_FFMPEG_SYNC_LOOKAHEAD */
    while(buf_size && !*buf){
        buf++;
        buf_size--;
        skipped++;
    }
    if (buf_size < HEADER_SIZE) {
        /* == AVM/JB 20080505 new_errorcode ==
         * New error return value -2 for cases, where not enough
         * buffer space is supplied. FFMPEG-developpers say this case should
         * be a real error (aka -1, see bug #128), but i disagree.
         */
        return -2;
    }
    header = AV_RB32(buf);
#ifdef AVM_FFMPEG_SYNC_LOOKAHEAD
    if(ff_mpa_check_header(header) < 0){
prepare_retry:
        buf++;
        buf_size--;
        //av_log(avctx, AV_LOG_ERROR, "Header missing (0x%x), skipping one byte",
        //  header);
        skipped++;
        goto retry;
    }
#endif	/* AVM_FFMPEG_SYNC_LOOKAHEAD */

    if (header>>8 == AV_RB32("TAG")>>8) {
        av_log(avctx, AV_LOG_DEBUG, "discarding ID3 tag\n");
        return buf_size;
    }
    ret = avpriv_mpegaudio_decode_header((MPADecodeHeader *)s, header);
    if (ret < 0) {
        av_log(avctx, AV_LOG_ERROR, "Header missing: 0x%x", header);
        return -1;
    } else if (ret == 1) {
        /* free format: prepare to compute frame size */
#ifdef AVM_FFMPEG_NO_FREEFORMAT
        goto prepare_retry;
#endif
        s->frame_size = -1;
        return -1;
    }
    /* update codec info */
    avctx->channels       = s->nb_channels;
    avctx->channel_layout = s->nb_channels == 1 ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;
    if (!avctx->bit_rate)
        avctx->bit_rate = s->bit_rate;

#ifdef AVM_FFMPEG_SYNC_LOOKAHEAD
    /* save buffer original buffer size */
    __buf_size = buf_size;
#endif	/* AVM_FFMPEG_SYNC_LOOKAHEAD */

    if ((s->frame_size <= 0) 
        /* the following condition was not part of original code, but it is vital for us */
        || (s->frame_size > buf_size)
        ) {
#ifndef AVM_FFMPEG_SILENT
        av_log(avctx, AV_LOG_WARNING, "incomplete frame: size %d, buf_size %d, skip %d", 
            s->frame_size, buf_size, skipped);
#endif /* !AVM_FFMPEG_SILENT */
        /* == AVM/JB 20080505 new_errorcode == */
        return -2;
    } else if (s->frame_size < buf_size) {
#ifndef AVM_FFMPEG_SILENT
        av_log(avctx, AV_LOG_DEBUG, "frame size %d of %d bytes available, skip %d",
            s->frame_size, buf_size, skipped);
#endif /* !AVM_FFMPEG_SILENT */
        buf_size= s->frame_size;
    } else {
        av_log(avctx, AV_LOG_DEBUG, "incomplete frame size: %d > %d, chancnt %d, sample_rate %d",
            s->frame_size, buf_size, avctx->channels, s->sample_rate);
    }

#ifdef AVM_FFMPEG_SYNC_LOOKAHEAD
    /* == AVM/JB 20080402 better_sync ==
     * This fixes decoding problem with some webradio-stations,
     * that send garbage at the start of the stream.
     *
     * To increase sync'ing behavior, we look after the end of
     * the frame and check for the following sync-word. If no
     * sync word is found we assume a false-positive and
     * skip the frame.
     */
    if (__buf_size >= (skipped+(s->frame_size)+4)) {
        uint32_t nextheader = 0;
        nextheader = AV_RB32(buf+(s->frame_size));

        av_log(avctx, AV_LOG_DEBUG, "%s: %d: check for next sync %08x/%08x, s fsze %d",
            __func__, __LINE__, header, nextheader, s->frame_size);

        if (ff_mpa_check_header(nextheader) < 0) {
            av_log(avctx, AV_LOG_INFO, "%s: %d: skipping MAD frame: %08x/%08x",
                      __func__, __LINE__, header, nextheader);
            buf_size = __buf_size;
            goto prepare_retry;
        } else {
            av_log(avctx, AV_LOG_DEBUG, "%s: %d: sane frame", __func__, __LINE__);
        }
    } else {
        av_log(avctx, AV_LOG_INFO, "%s: %d: buffer too small for next-header-check", __func__, __LINE__);
    }
#endif /* AVM_FFMPEG_SYNC_LOOKAHEAD */

#ifndef AVM_FFMPEG_REMOVE_UNNEEDED
    /* see comment above */
    s->frame = data;
#endif /* AVM_FFMPEG_REMOVE_UNNEEDED */

    ret = mp_decode_frame(s, &out_samples, buf, buf_size);

#ifdef undef
    if (s->frame_count >= 10) {
      AV_LOG_CLOSE_SAMPLE_DUMP ();
    }
#endif
    if (ret >= 0) {
#ifndef AVM_FFMPEG_REMOVE_UNNEEDED
        /* see comment above */
        s->frame->nb_samples = avctx->frame_size;
        *got_frame_ptr       = 1;
#else
        *got_frame_ptr = ret;
#endif /* AVM_FFMPEG_REMOVE_UNNEEDED */
        avctx->sample_rate   = s->sample_rate;
        //FIXME maybe move the other codec info stuff from above here too
    } else {
        av_log(avctx, AV_LOG_ERROR, "Error while decoding MPEG audio frame.\n");
        /* Only return an error if the bad frame makes up the whole packet or
         * the error is related to buffer management.
         * If there is more data in the packet, just consume the bad frame
         * instead of returning an error, which would discard the whole
         * packet. */
        *got_frame_ptr = 0;
        if (buf_size == avpkt->size || ret != AVERROR_INVALIDDATA)
            return -1;
    }
    s->frame_size = 0;
    return buf_size + skipped;
}

static void mp_flush(MPADecodeContext *ctx)
{
    memset(ctx->synth_buf, 0, sizeof(ctx->synth_buf));
    memset(ctx->mdct_buf, 0, sizeof(ctx->mdct_buf));
    ctx->last_buf_size = 0;
    ctx->dither_state = 0;
}

static void flush(AVCodecContext *avctx)
{
    mp_flush(avctx->priv_data);
}

#if CONFIG_MP3ADU_DECODER || CONFIG_MP3ADUFLOAT_DECODER
static int decode_frame_adu(AVCodecContext *avctx, void *data,
                            int *got_frame_ptr, AVPacket *avpkt)
{
    const uint8_t *buf  = avpkt->data;
    int buf_size        = avpkt->size;
    MPADecodeContext *s = avctx->priv_data;
    uint32_t header;
    int len, ret;
    int av_unused out_size;

    len = buf_size;

    // Discard too short frames
    if (buf_size < HEADER_SIZE) {
        av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
        return AVERROR_INVALIDDATA;
    }


    if (len > MPA_MAX_CODED_FRAME_SIZE)
        len = MPA_MAX_CODED_FRAME_SIZE;

    // Get header and restore sync word
    header = AV_RB32(buf) | 0xffe00000;

    ret = avpriv_mpegaudio_decode_header((MPADecodeHeader *)s, header);
    if (ret < 0) {
        av_log(avctx, AV_LOG_ERROR, "Invalid frame header\n");
        return ret;
    }
    /* update codec info */
    avctx->sample_rate = s->sample_rate;
    avctx->channels    = s->nb_channels;
    avctx->channel_layout = s->nb_channels == 1 ? AV_CH_LAYOUT_MONO : AV_CH_LAYOUT_STEREO;
    if (!avctx->bit_rate)
        avctx->bit_rate = s->bit_rate;

    s->frame_size = len;

    s->frame = data;

    ret = mp_decode_frame(s, NULL, buf, buf_size);
    if (ret < 0) {
        av_log(avctx, AV_LOG_ERROR, "Error while decoding MPEG audio frame.\n");
        return ret;
    }

    *got_frame_ptr = 1;

    return buf_size;
}
#endif /* CONFIG_MP3ADU_DECODER || CONFIG_MP3ADUFLOAT_DECODER */

#if CONFIG_MP3ON4_DECODER || CONFIG_MP3ON4FLOAT_DECODER

/**
 * Context for MP3On4 decoder
 */
typedef struct MP3On4DecodeContext {
    int frames;                     ///< number of mp3 frames per block (number of mp3 decoder instances)
    int syncword;                   ///< syncword patch
    const uint8_t *coff;            ///< channel offsets in output buffer
    MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance
} MP3On4DecodeContext;

#include "mpeg4audio.h"

/* Next 3 arrays are indexed by channel config number (passed via codecdata) */

/* number of mp3 decoder instances */
static const uint8_t mp3Frames[8] = { 0, 1, 1, 2, 3, 3, 4, 5 };

/* offsets into output buffer, assume output order is FL FR C LFE BL BR SL SR */
static const uint8_t chan_offset[8][5] = {
    { 0             },
    { 0             },  // C
    { 0             },  // FLR
    { 2, 0          },  // C FLR
    { 2, 0, 3       },  // C FLR BS
    { 2, 0, 3       },  // C FLR BLRS
    { 2, 0, 4, 3    },  // C FLR BLRS LFE
    { 2, 0, 6, 4, 3 },  // C FLR BLRS BLR LFE
};

/* mp3on4 channel layouts */
static const int16_t chan_layout[8] = {
    0,
    AV_CH_LAYOUT_MONO,
    AV_CH_LAYOUT_STEREO,
    AV_CH_LAYOUT_SURROUND,
    AV_CH_LAYOUT_4POINT0,
    AV_CH_LAYOUT_5POINT0,
    AV_CH_LAYOUT_5POINT1,
    AV_CH_LAYOUT_7POINT1
};

static av_cold int decode_close_mp3on4(AVCodecContext * avctx)
{
    MP3On4DecodeContext *s = avctx->priv_data;
    int i;

    for (i = 0; i < s->frames; i++)
        av_freep(&s->mp3decctx[i]);

    return 0;
}


static av_cold int decode_init_mp3on4(AVCodecContext * avctx)
{
    MP3On4DecodeContext *s = avctx->priv_data;
    MPEG4AudioConfig cfg;
    int i;

    if ((avctx->extradata_size < 2) || !avctx->extradata) {
        av_log(avctx, AV_LOG_ERROR, "Codec extradata missing or too short.\n");
        return AVERROR_INVALIDDATA;
    }

    avpriv_mpeg4audio_get_config(&cfg, avctx->extradata,
                                 avctx->extradata_size * 8, 1);
    if (!cfg.chan_config || cfg.chan_config > 7) {
        av_log(avctx, AV_LOG_ERROR, "Invalid channel config number.\n");
        return AVERROR_INVALIDDATA;
    }
    s->frames             = mp3Frames[cfg.chan_config];
    s->coff               = chan_offset[cfg.chan_config];
    avctx->channels       = ff_mpeg4audio_channels[cfg.chan_config];
    avctx->channel_layout = chan_layout[cfg.chan_config];

    if (cfg.sample_rate < 16000)
        s->syncword = 0xffe00000;
    else
        s->syncword = 0xfff00000;

    /* Init the first mp3 decoder in standard way, so that all tables get builded
     * We replace avctx->priv_data with the context of the first decoder so that
     * decode_init() does not have to be changed.
     * Other decoders will be initialized here copying data from the first context
     */
    // Allocate zeroed memory for the first decoder context
    s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext));
    if (!s->mp3decctx[0])
        goto alloc_fail;
    // Put decoder context in place to make init_decode() happy
    avctx->priv_data = s->mp3decctx[0];
    decode_init(avctx);
    // Restore mp3on4 context pointer
    avctx->priv_data = s;
    s->mp3decctx[0]->adu_mode = 1; // Set adu mode

    /* Create a separate codec/context for each frame (first is already ok).
     * Each frame is 1 or 2 channels - up to 5 frames allowed
     */
    for (i = 1; i < s->frames; i++) {
        s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext));
        if (!s->mp3decctx[i])
            goto alloc_fail;
        s->mp3decctx[i]->adu_mode = 1;
        s->mp3decctx[i]->avctx = avctx;
        s->mp3decctx[i]->mpadsp = s->mp3decctx[0]->mpadsp;
        s->mp3decctx[i]->fdsp = s->mp3decctx[0]->fdsp;
    }

    return 0;
alloc_fail:
    decode_close_mp3on4(avctx);
    return AVERROR(ENOMEM);
}


static void flush_mp3on4(AVCodecContext *avctx)
{
    int i;
    MP3On4DecodeContext *s = avctx->priv_data;

    for (i = 0; i < s->frames; i++)
        mp_flush(s->mp3decctx[i]);
}


static int decode_frame_mp3on4(AVCodecContext *avctx, void *data,
                               int *got_frame_ptr, AVPacket *avpkt)
{
    AVFrame *frame         = data;
    const uint8_t *buf     = avpkt->data;
    int buf_size           = avpkt->size;
    MP3On4DecodeContext *s = avctx->priv_data;
    MPADecodeContext *m;
    int fsize, len = buf_size, out_size = 0;
    uint32_t header;
    OUT_INT **out_samples;
    OUT_INT *outptr[2];
    int fr, ch, ret;

    /* get output buffer */
    frame->nb_samples = MPA_FRAME_SIZE;
    if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
        return ret;
    out_samples = (OUT_INT **)frame->extended_data;

    // Discard too short frames
    if (buf_size < HEADER_SIZE)
        return AVERROR_INVALIDDATA;

    avctx->bit_rate = 0;

    ch = 0;
    for (fr = 0; fr < s->frames; fr++) {
        fsize = AV_RB16(buf) >> 4;
        fsize = FFMIN3(fsize, len, MPA_MAX_CODED_FRAME_SIZE);
        m     = s->mp3decctx[fr];
        av_assert1(m);

        if (fsize < HEADER_SIZE) {
            av_log(avctx, AV_LOG_ERROR, "Frame size smaller than header size\n");
            return AVERROR_INVALIDDATA;
        }
        header = (AV_RB32(buf) & 0x000fffff) | s->syncword; // patch header

        ret = avpriv_mpegaudio_decode_header((MPADecodeHeader *)m, header);
        if (ret < 0) {
            av_log(avctx, AV_LOG_ERROR, "Bad header, discard block\n");
            return AVERROR_INVALIDDATA;
        }

        if (ch + m->nb_channels > avctx->channels ||
            s->coff[fr] + m->nb_channels > avctx->channels) {
            av_log(avctx, AV_LOG_ERROR, "frame channel count exceeds codec "
                                        "channel count\n");
            return AVERROR_INVALIDDATA;
        }
        ch += m->nb_channels;

        outptr[0] = out_samples[s->coff[fr]];
        if (m->nb_channels > 1)
            outptr[1] = out_samples[s->coff[fr] + 1];

        if ((ret = mp_decode_frame(m, outptr, buf, fsize)) < 0) {
            av_log(avctx, AV_LOG_ERROR, "failed to decode channel %d\n", ch);
            memset(outptr[0], 0, MPA_FRAME_SIZE*sizeof(OUT_INT));
            if (m->nb_channels > 1)
                memset(outptr[1], 0, MPA_FRAME_SIZE*sizeof(OUT_INT));
            ret = m->nb_channels * MPA_FRAME_SIZE*sizeof(OUT_INT);
        }

        out_size += ret;
        buf      += fsize;
        len      -= fsize;

        avctx->bit_rate += m->bit_rate;
    }
    if (ch != avctx->channels) {
        av_log(avctx, AV_LOG_ERROR, "failed to decode all channels\n");
        return AVERROR_INVALIDDATA;
    }

    /* update codec info */
    avctx->sample_rate = s->mp3decctx[0]->sample_rate;

    frame->nb_samples = out_size / (avctx->channels * sizeof(OUT_INT));
    *got_frame_ptr    = 1;

    return buf_size;
}
#endif /* CONFIG_MP3ON4_DECODER || CONFIG_MP3ON4FLOAT_DECODER */