lame/libmp3lame/encoder.c

   1 /*
   2  *      LAME MP3 encoding engine
   3  *
   4  *      Copyright (c) 1999 Mark Taylor
   5  *      Copyright (c) 2000-2002 Takehiro Tominaga
   6  *      Copyright (c) 2000-2011 Robert Hegemann
   7  *      Copyright (c) 2001 Gabriel Bouvigne
   8  *      Copyright (c) 2001 John Dahlstrom
   9  *
  10  * This library is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Library General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2 of the License, or (at your option) any later version.
  14  *
  15  * This library is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Library General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Library General Public
  21  * License along with this library; if not, write to the
  22  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  23  * Boston, MA 02111-1307, USA.
  24  */
  25
  26 /* $Id: encoder.c,v 1.114 2017/08/26 10:54:57 robert Exp $ */
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include <config.h>
  30 #endif
  31
  32
  33 #include "lame.h"
  34 #include "machine.h"
  35 #include "encoder.h"
  36 #include "util.h"
  37 #include "lame_global_flags.h"
  38 #include "newmdct.h"
  39 #include "psymodel.h"
  40 #include "lame-analysis.h"
  41 #include "bitstream.h"
  42 #include "VbrTag.h"
  43 #include "quantize.h"
  44 #include "quantize_pvt.h"
  45
  46
  47
  48 /*
  49  * auto-adjust of ATH, useful for low volume
  50  * Gabriel Bouvigne 3 feb 2001
  51  *
  52  * modifies some values in
  53  *   gfp->internal_flags->ATH
  54  *   (gfc->ATH)
  55  */
  56 static void
  57 adjust_ATH(lame_internal_flags const *const gfc)
  58 {
  59     SessionConfig_t const *const cfg = &gfc->cfg;
  60     FLOAT   gr2_max, max_pow;
  61
  62     if (gfc->ATH->use_adjust == 0) {
  63         gfc->ATH->adjust_factor = 1.0; /* no adjustment */
  64         return;
  65     }
  66
  67     /* jd - 2001 mar 12, 27, jun 30 */
  68     /* loudness based on equal loudness curve; */
  69     /* use granule with maximum combined loudness */
  70     max_pow = gfc->ov_psy.loudness_sq[0][0];
  71     gr2_max = gfc->ov_psy.loudness_sq[1][0];
  72     if (cfg->channels_out == 2) {
  73         max_pow += gfc->ov_psy.loudness_sq[0][1];
  74         gr2_max += gfc->ov_psy.loudness_sq[1][1];
  75     }
  76     else {
  77         max_pow += max_pow;
  78         gr2_max += gr2_max;
  79     }
  80     if (cfg->mode_gr == 2) {
  81         max_pow = Max(max_pow, gr2_max);
  82     }
  83     max_pow *= 0.5;     /* max_pow approaches 1.0 for full band noise */
  84
  85     /* jd - 2001 mar 31, jun 30 */
  86     /* user tuning of ATH adjustment region */
  87     max_pow *= gfc->ATH->aa_sensitivity_p;
  88
  89     /*  adjust ATH depending on range of maximum value
  90      */
  91
  92     /* jd - 2001 feb27, mar12,20, jun30, jul22 */
  93     /* continuous curves based on approximation */
  94     /* to GB's original values. */
  95     /* For an increase in approximate loudness, */
  96     /* set ATH adjust to adjust_limit immediately */
  97     /* after a delay of one frame. */
  98     /* For a loudness decrease, reduce ATH adjust */
  99     /* towards adjust_limit gradually. */
 100     /* max_pow is a loudness squared or a power. */
 101     if (max_pow > 0.03125) { /* ((1 - 0.000625)/ 31.98) from curve below */
 102         if (gfc->ATH->adjust_factor >= 1.0) {
 103             gfc->ATH->adjust_factor = 1.0;
 104         }
 105         else {
 106             /* preceding frame has lower ATH adjust; */
 107             /* ascend only to the preceding adjust_limit */
 108             /* in case there is leading low volume */
 109             if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
 110                 gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
 111             }
 112         }
 113         gfc->ATH->adjust_limit = 1.0;
 114     }
 115     else {              /* adjustment curve */
 116         /* about 32 dB maximum adjust (0.000625) */
 117         FLOAT const adj_lim_new = 31.98 * max_pow + 0.000625;
 118         if (gfc->ATH->adjust_factor >= adj_lim_new) { /* descend gradually */
 119             gfc->ATH->adjust_factor *= adj_lim_new * 0.075 + 0.925;
 120             if (gfc->ATH->adjust_factor < adj_lim_new) { /* stop descent */
 121                 gfc->ATH->adjust_factor = adj_lim_new;
 122             }
 123         }
 124         else {          /* ascend */
 125             if (gfc->ATH->adjust_limit >= adj_lim_new) {
 126                 gfc->ATH->adjust_factor = adj_lim_new;
 127             }
 128             else {      /* preceding frame has lower ATH adjust; */
 129                 /* ascend only to the preceding adjust_limit */
 130                 if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
 131                     gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
 132                 }
 133             }
 134         }
 135         gfc->ATH->adjust_limit = adj_lim_new;
 136     }
 137 }
 138
 139 /***********************************************************************
 140  *
 141  *  some simple statistics
 142  *
 143  *  bitrate index 0: free bitrate -> not allowed in VBR mode
 144  *  : bitrates, kbps depending on MPEG version
 145  *  bitrate index 15: forbidden
 146  *
 147  *  mode_ext:
 148  *  0:  LR
 149  *  1:  LR-i
 150  *  2:  MS
 151  *  3:  MS-i
 152  *
 153  ***********************************************************************/
 154
 155 static void
 156 updateStats(lame_internal_flags * const gfc)
 157 {
 158     SessionConfig_t const *const cfg = &gfc->cfg;
 159     EncResult_t *eov = &gfc->ov_enc;
 160     int     gr, ch;
 161     assert(0 <= eov->bitrate_index && eov->bitrate_index < 16);
 162     assert(0 <= eov->mode_ext && eov->mode_ext < 4);
 163
 164     /* count bitrate indices */
 165     eov->bitrate_channelmode_hist[eov->bitrate_index][4]++;
 166     eov->bitrate_channelmode_hist[15][4]++;
 167
 168     /* count 'em for every mode extension in case of 2 channel encoding */
 169     if (cfg->channels_out == 2) {
 170         eov->bitrate_channelmode_hist[eov->bitrate_index][eov->mode_ext]++;
 171         eov->bitrate_channelmode_hist[15][eov->mode_ext]++;
 172     }
 173     for (gr = 0; gr < cfg->mode_gr; ++gr) {
 174         for (ch = 0; ch < cfg->channels_out; ++ch) {
 175             int     bt = gfc->l3_side.tt[gr][ch].block_type;
 176             if (gfc->l3_side.tt[gr][ch].mixed_block_flag)
 177                 bt = 4;
 178             eov->bitrate_blocktype_hist[eov->bitrate_index][bt]++;
 179             eov->bitrate_blocktype_hist[eov->bitrate_index][5]++;
 180             eov->bitrate_blocktype_hist[15][bt]++;
 181             eov->bitrate_blocktype_hist[15][5]++;
 182         }
 183     }
 184 }
 185
 186
 187
 188
 189 static void
 190 lame_encode_frame_init(lame_internal_flags * gfc, const sample_t *const inbuf[2])
 191 {
 192     SessionConfig_t const *const cfg = &gfc->cfg;
 193
 194     int     ch, gr;
 195
 196     if (gfc->lame_encode_frame_init == 0) {
 197         sample_t primebuff0[286 + 1152 + 576];
 198         sample_t primebuff1[286 + 1152 + 576];
 199         int const framesize = 576 * cfg->mode_gr;
 200         /* prime the MDCT/polyphase filterbank with a short block */
 201         int     i, j;
 202         gfc->lame_encode_frame_init = 1;
 203         memset(primebuff0, 0, sizeof(primebuff0));
 204         memset(primebuff1, 0, sizeof(primebuff1));
 205         for (i = 0, j = 0; i < 286 + 576 * (1 + cfg->mode_gr); ++i) {
 206             if (i < framesize) {
 207                 primebuff0[i] = 0;
 208                 if (cfg->channels_out == 2)
 209                     primebuff1[i] = 0;
 210             }
 211             else {
 212                 primebuff0[i] = inbuf[0][j];
 213                 if (cfg->channels_out == 2)
 214                     primebuff1[i] = inbuf[1][j];
 215                 ++j;
 216             }
 217         }
 218         /* polyphase filtering / mdct */
 219         for (gr = 0; gr < cfg->mode_gr; gr++) {
 220             for (ch = 0; ch < cfg->channels_out; ch++) {
 221                 gfc->l3_side.tt[gr][ch].block_type = SHORT_TYPE;
 222             }
 223         }
 224         mdct_sub48(gfc, primebuff0, primebuff1);
 225
 226         /* check FFT will not use a negative starting offset */
 227 #if 576 < FFTOFFSET
 228 # error FFTOFFSET greater than 576: FFT uses a negative offset
 229 #endif
 230         /* check if we have enough data for FFT */
 231         assert(gfc->sv_enc.mf_size >= (BLKSIZE + framesize - FFTOFFSET));
 232         /* check if we have enough data for polyphase filterbank */
 233         assert(gfc->sv_enc.mf_size >= (512 + framesize - 32));
 234     }
 235
 236 }
 237
 238
 239
 240
 241
 242
 243
 244 /************************************************************************
 245 *
 246 * encodeframe()           Layer 3
 247 *
 248 * encode a single frame
 249 *
 250 ************************************************************************
 251 lame_encode_frame()
 252
 253
 254                        gr 0            gr 1
 255 inbuf:           |--------------|--------------|--------------|
 256
 257
 258 Polyphase (18 windows, each shifted 32)
 259 gr 0:
 260 window1          <----512---->
 261 window18                 <----512---->
 262
 263 gr 1:
 264 window1                         <----512---->
 265 window18                                <----512---->
 266
 267
 268
 269 MDCT output:  |--------------|--------------|--------------|
 270
 271 FFT's                    <---------1024---------->
 272                                          <---------1024-------->
 273
 274
 275
 276     inbuf = buffer of PCM data size=MP3 framesize
 277     encoder acts on inbuf[ch][0], but output is delayed by MDCTDELAY
 278     so the MDCT coefficints are from inbuf[ch][-MDCTDELAY]
 279
 280     psy-model FFT has a 1 granule delay, so we feed it data for the
 281     next granule.
 282     FFT is centered over granule:  224+576+224
 283     So FFT starts at:   576-224-MDCTDELAY
 284
 285     MPEG2:  FFT ends at:  BLKSIZE+576-224-MDCTDELAY      (1328)
 286     MPEG1:  FFT ends at:  BLKSIZE+2*576-224-MDCTDELAY    (1904)
 287
 288     MPEG2:  polyphase first window:  [0..511]
 289                       18th window:   [544..1055]          (1056)
 290     MPEG1:            36th window:   [1120..1631]         (1632)
 291             data needed:  512+framesize-32
 292
 293     A close look newmdct.c shows that the polyphase filterbank
 294     only uses data from [0..510] for each window.  Perhaps because the window
 295     used by the filterbank is zero for the last point, so Takehiro's
 296     code doesn't bother to compute with it.
 297
 298     FFT starts at 576-224-MDCTDELAY (304)  = 576-FFTOFFSET
 299
 300 */
 301
 302 typedef FLOAT chgrdata[2][2];
 303
 304
 305 int
 306 lame_encode_mp3_frame(       /* Output */
 307                          lame_internal_flags * gfc, /* Context */
 308                          sample_t const *inbuf_l, /* Input */
 309                          sample_t const *inbuf_r, /* Input */
 310                          unsigned char *mp3buf, /* Output */
 311                          int mp3buf_size)
 312 {                       /* Output */
 313     SessionConfig_t const *const cfg = &gfc->cfg;
 314     int     mp3count;
 315     III_psy_ratio masking_LR[2][2]; /*LR masking & energy */
 316     III_psy_ratio masking_MS[2][2]; /*MS masking & energy */
 317     const III_psy_ratio (*masking)[2]; /*pointer to selected maskings */
 318     const sample_t *inbuf[2];
 319
 320     FLOAT   tot_ener[2][4];
 321     FLOAT   ms_ener_ratio[2] = { .5, .5 };
 322     FLOAT   pe[2][2] = { {0., 0.}, {0., 0.} }, pe_MS[2][2] = { {
 323     0., 0.}, {
 324     0., 0.}};
 325     FLOAT (*pe_use)[2];
 326
 327     int     ch, gr;
 328
 329     inbuf[0] = inbuf_l;
 330     inbuf[1] = inbuf_r;
 331
 332     if (gfc->lame_encode_frame_init == 0) {
 333         /*first run? */
 334         lame_encode_frame_init(gfc, inbuf);
 335
 336     }
 337
 338
 339     /********************** padding *****************************/
 340     /* padding method as described in
 341      * "MPEG-Layer3 / Bitstream Syntax and Decoding"
 342      * by Martin Sieler, Ralph Sperschneider
 343      *
 344      * note: there is no padding for the very first frame
 345      *
 346      * Robert Hegemann 2000-06-22
 347      */
 348     gfc->ov_enc.padding = FALSE;
 349     if ((gfc->sv_enc.slot_lag -= gfc->sv_enc.frac_SpF) < 0) {
 350         gfc->sv_enc.slot_lag += cfg->samplerate_out;
 351         gfc->ov_enc.padding = TRUE;
 352     }
 353
 354
 355
 356     /****************************************
 357     *   Stage 1: psychoacoustic model       *
 358     ****************************************/
 359
 360     {
 361         /* psychoacoustic model
 362          * psy model has a 1 granule (576) delay that we must compensate for
 363          * (mt 6/99).
 364          */
 365         int     ret;
 366         const sample_t *bufp[2] = {0, 0}; /* address of beginning of left & right granule */
 367         int     blocktype[2];
 368
 369         for (gr = 0; gr < cfg->mode_gr; gr++) {
 370
 371             for (ch = 0; ch < cfg->channels_out; ch++) {
 372                 bufp[ch] = &inbuf[ch][576 + gr * 576 - FFTOFFSET];
 373             }
 374             ret = L3psycho_anal_vbr(gfc, bufp, gr,
 375                                     masking_LR, masking_MS,
 376                                     pe[gr], pe_MS[gr], tot_ener[gr], blocktype);
 377             if (ret != 0)
 378                 return -4;
 379
 380             if (cfg->mode == JOINT_STEREO) {
 381                 ms_ener_ratio[gr] = tot_ener[gr][2] + tot_ener[gr][3];
 382                 if (ms_ener_ratio[gr] > 0)
 383                     ms_ener_ratio[gr] = tot_ener[gr][3] / ms_ener_ratio[gr];
 384             }
 385
 386             /* block type flags */
 387             for (ch = 0; ch < cfg->channels_out; ch++) {
 388                 gr_info *const cod_info = &gfc->l3_side.tt[gr][ch];
 389                 cod_info->block_type = blocktype[ch];
 390                 cod_info->mixed_block_flag = 0;
 391             }
 392         }
 393     }
 394
 395
 396     /* auto-adjust of ATH, useful for low volume */
 397     adjust_ATH(gfc);
 398
 399
 400     /****************************************
 401     *   Stage 2: MDCT                       *
 402     ****************************************/
 403
 404     /* polyphase filtering / mdct */
 405     mdct_sub48(gfc, inbuf[0], inbuf[1]);
 406
 407
 408     /****************************************
 409     *   Stage 3: MS/LR decision             *
 410     ****************************************/
 411
 412     /* Here will be selected MS or LR coding of the 2 stereo channels */
 413     gfc->ov_enc.mode_ext = MPG_MD_LR_LR;
 414
 415     if (cfg->force_ms) {
 416         gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
 417     }
 418     else if (cfg->mode == JOINT_STEREO) {
 419         /* ms_ratio = is scaled, for historical reasons, to look like
 420            a ratio of side_channel / total.
 421            0 = signal is 100% mono
 422            .5 = L & R uncorrelated
 423          */
 424
 425         /* [0] and [1] are the results for the two granules in MPEG-1,
 426          * in MPEG-2 it's only a faked averaging of the same value
 427          * _prev is the value of the last granule of the previous frame
 428          * _next is the value of the first granule of the next frame
 429          */
 430
 431         FLOAT   sum_pe_MS = 0;
 432         FLOAT   sum_pe_LR = 0;
 433         for (gr = 0; gr < cfg->mode_gr; gr++) {
 434             for (ch = 0; ch < cfg->channels_out; ch++) {
 435                 sum_pe_MS += pe_MS[gr][ch];
 436                 sum_pe_LR += pe[gr][ch];
 437             }
 438         }
 439
 440         /* based on PE: M/S coding would not use much more bits than L/R */
 441         if (sum_pe_MS <= 1.00 * sum_pe_LR) {
 442
 443             gr_info const *const gi0 = &gfc->l3_side.tt[0][0];
 444             gr_info const *const gi1 = &gfc->l3_side.tt[cfg->mode_gr - 1][0];
 445
 446             if (gi0[0].block_type == gi0[1].block_type && gi1[0].block_type == gi1[1].block_type) {
 447
 448                 gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
 449             }
 450         }
 451     }
 452
 453     /* bit and noise allocation */
 454     if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
 455         masking = (const III_psy_ratio (*)[2])masking_MS; /* use MS masking */
 456         pe_use = pe_MS;
 457     }
 458     else {
 459         masking = (const III_psy_ratio (*)[2])masking_LR; /* use LR masking */
 460         pe_use = pe;
 461     }
 462
 463
 464     /* copy data for MP3 frame analyzer */
 465     if (cfg->analysis && gfc->pinfo != NULL) {
 466         for (gr = 0; gr < cfg->mode_gr; gr++) {
 467             for (ch = 0; ch < cfg->channels_out; ch++) {
 468                 gfc->pinfo->ms_ratio[gr] = 0;
 469                 gfc->pinfo->ms_ener_ratio[gr] = ms_ener_ratio[gr];
 470                 gfc->pinfo->blocktype[gr][ch] = gfc->l3_side.tt[gr][ch].block_type;
 471                 gfc->pinfo->pe[gr][ch] = pe_use[gr][ch];
 472                 memcpy(gfc->pinfo->xr[gr][ch], &gfc->l3_side.tt[gr][ch].xr[0], sizeof(FLOAT) * 576);
 473                 /* in psymodel, LR and MS data was stored in pinfo.
 474                    switch to MS data: */
 475                 if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
 476                     gfc->pinfo->ers[gr][ch] = gfc->pinfo->ers[gr][ch + 2];
 477                     memcpy(gfc->pinfo->energy[gr][ch], gfc->pinfo->energy[gr][ch + 2],
 478                            sizeof(gfc->pinfo->energy[gr][ch]));
 479                 }
 480             }
 481         }
 482     }
 483
 484
 485     /****************************************
 486     *   Stage 4: quantization loop          *
 487     ****************************************/
 488
 489     if (cfg->vbr == vbr_off || cfg->vbr == vbr_abr) {
 490         static FLOAT const fircoef[9] = {
 491             -0.0207887 * 5, -0.0378413 * 5, -0.0432472 * 5, -0.031183 * 5,
 492             7.79609e-18 * 5, 0.0467745 * 5, 0.10091 * 5, 0.151365 * 5,
 493             0.187098 * 5
 494         };
 495
 496         int     i;
 497         FLOAT   f;
 498
 499         for (i = 0; i < 18; i++)
 500             gfc->sv_enc.pefirbuf[i] = gfc->sv_enc.pefirbuf[i + 1];
 501
 502         f = 0.0;
 503         for (gr = 0; gr < cfg->mode_gr; gr++)
 504             for (ch = 0; ch < cfg->channels_out; ch++)
 505                 f += pe_use[gr][ch];
 506         gfc->sv_enc.pefirbuf[18] = f;
 507
 508         f = gfc->sv_enc.pefirbuf[9];
 509         for (i = 0; i < 9; i++)
 510             f += (gfc->sv_enc.pefirbuf[i] + gfc->sv_enc.pefirbuf[18 - i]) * fircoef[i];
 511
 512         f = (670 * 5 * cfg->mode_gr * cfg->channels_out) / f;
 513         for (gr = 0; gr < cfg->mode_gr; gr++) {
 514             for (ch = 0; ch < cfg->channels_out; ch++) {
 515                 pe_use[gr][ch] *= f;
 516             }
 517         }
 518     }
 519     switch (cfg->vbr)
 520     {
 521     default:
 522     case vbr_off:
 523         CBR_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
 524         break;
 525     case vbr_abr:
 526         ABR_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
 527         break;
 528     case vbr_rh:
 529         VBR_old_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
 530         break;
 531     case vbr_mt:
 532     case vbr_mtrh:
 533         VBR_new_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
 534         break;
 535     }
 536
 537
 538     /****************************************
 539     *   Stage 5: bitstream formatting       *
 540     ****************************************/
 541
 542
 543     /*  write the frame to the bitstream  */
 544     (void) format_bitstream(gfc);
 545
 546     /* copy mp3 bit buffer into array */
 547     mp3count = copy_buffer(gfc, mp3buf, mp3buf_size, 1);
 548
 549
 550     if (cfg->write_lame_tag) {
 551         AddVbrFrame(gfc);
 552     }
 553
 554     if (cfg->analysis && gfc->pinfo != NULL) {
 555         int     framesize = 576 * cfg->mode_gr;
 556         for (ch = 0; ch < cfg->channels_out; ch++) {
 557             int     j;
 558             for (j = 0; j < FFTOFFSET; j++)
 559                 gfc->pinfo->pcmdata[ch][j] = gfc->pinfo->pcmdata[ch][j + framesize];
 560             for (j = FFTOFFSET; j < 1600; j++) {
 561                 gfc->pinfo->pcmdata[ch][j] = inbuf[ch][j - FFTOFFSET];
 562             }
 563         }
 564         gfc->sv_qnt.masking_lower = 1.0;
 565
 566         set_frame_pinfo(gfc, masking);
 567     }
 568
 569     ++gfc->ov_enc.frame_number;
 570
 571     updateStats(gfc);
 572
 573     return mp3count;
 574 }